├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── download_model.sh ├── evs.gif ├── launch_container.sh └── xtreme-view ├── DeepMVS ├── __init__.py └── model.py ├── dataloader ├── __init__.py └── colmap_loader.py ├── run_colmap.sh ├── run_colmap_all.sh ├── run_xtreme_view.py ├── run_xtreme_view_all.sh └── vsynthlib ├── __init__.py ├── core.py ├── deepmvs_wrapper.py ├── depth_util.py ├── refinement.py └── refinet ├── __init__.py └── models.py /.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__ 2 | data/ 3 | models/ 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:19.06-py3 2 | 3 | # Install COLMAP 4 | RUN apt-get update && apt-get -y install \ 5 | git \ 6 | cmake \ 7 | build-essential \ 8 | libboost-program-options-dev \ 9 | libboost-filesystem-dev \ 10 | libboost-graph-dev \ 11 | libboost-regex-dev \ 12 | libboost-system-dev \ 13 | libboost-test-dev \ 14 | libeigen3-dev \ 15 | libsuitesparse-dev \ 16 | libfreeimage-dev \ 17 | libgoogle-glog-dev \ 18 | libgflags-dev \ 19 | libglew-dev \ 20 | qtbase5-dev \ 21 | libqt5opengl5-dev \ 22 | libcgal-dev \ 23 | libcgal-qt5-dev \ 24 | libatlas-base-dev \ 25 | libsuitesparse-dev \ 26 | libopenblas-dev 27 | 28 | RUN git clone https://ceres-solver.googlesource.com/ceres-solver && \ 29 | cd ceres-solver && \ 30 | git checkout 1.14.0 && \ 31 | mkdir build && \ 32 | cd build && \ 33 | cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF && \ 34 | make -j8 && \ 35 | make install 36 | 37 | RUN git clone https://github.com/colmap/colmap.git && \ 38 | cd colmap && \ 39 | git checkout 3.5 && \ 40 | mkdir build && \ 41 | cd build && \ 42 | cmake .. -DCUDA_ARCHS="5.2 6.0 6.1 7.0 7.5+PTX" && \ 43 | make -j8 && \ 44 | make install 45 | 46 | # Install xtreme-view dependencies 47 | RUN pip install pydensecrf \ 48 | pyquaternion \ 49 | imageio 50 | 51 | COPY xtreme-view xtreme-view 52 | WORKDIR xtreme-view 53 | 54 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## Nvidia Source Code License 2 | 3 | ### 1. Definitions. 4 | 5 | “Licensor” means any person or entity that distributes its Work. 6 | 7 | “Software” means the original work of authorship made available under this License. 8 | 9 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 10 | 11 | “Nvidia Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by Nvidia or its affiliates. 12 | 13 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 14 | 15 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 16 | 17 | ### 2. License Grants. 18 | 19 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 20 | 21 | 2.2 Patent Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free patent license to make, have made, use, sell, offer for sale, import, and otherwise transfer its Work, in whole or in part. The foregoing license applies only to the patent claims licensable by Licensor that would be infringed by Licensor’s Work (or portion thereof) individually and excluding any combinations with any other materials or technology. 22 | 23 | ### 3. Limitations. 24 | 25 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 26 | 27 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 28 | 29 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use commercially with Nvidia Processors. 30 | 31 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately. 32 | 33 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 34 | 35 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately. 36 | 37 | ### 4. Disclaimer of Warranty. 38 | 39 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. SOME STATES’ CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU. 40 | 41 | ### 5. Limitation of Liability. 42 | 43 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Extreme View Synthesis 2 | 3 | #### [Paper](https://arxiv.org/abs/1812.04777) | [Extended Presentation at GTC 2019](https://developer.nvidia.com/gtc/2019/video/S9576) (requires free registration) | [Latex citation](#citation) 4 | 5 | Code for the paper: 6 | **Extreme View Synthesis** 7 | [Inchang Choi](http://www.inchangchoi.info/), [Orazio Gallo](https://oraziogallo.github.io/), [Alejandro Troccoli](https://research.nvidia.com/person/alejandro-troccoli), [Min H. Kim](http://vclab.kaist.ac.kr/minhkim/) and [Jan Kautz](http://jankautz.com/), IEEE International Conference on Computer Vision, 2019 (Oral). 8 | 9 | 10 | ## License 11 | 12 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 13 | 14 | Licensed under the [NVIDIA Source Code License](LICENSE.md) 15 | 16 | ## Pre-requisites 17 | 18 | For convenience, we provide a Dockerfile to build a container image to run the code. The image will contain the Python dependencies and a build of COLMAP. 19 | 20 | Your system will need: 21 | 22 | 1. Docker (>= 19.03) 23 | 24 | 2. [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker/wiki) 25 | 26 | 3. NVIDIA GPU driver 418 or later. 27 | 28 | Build the container image: 29 | 30 | ``` 31 | docker build -t xtreme-view . 32 | ``` 33 | 34 | ## Download the models 35 | 36 | You can download the models from the NVIDIA GPU CLOUD registry using: 37 | 38 | 39 | ``` 40 | ./download_model.sh 41 | ``` 42 | 43 | 44 | 45 | ## Running the code 46 | 47 | Place your sequence of images in a directory tree with root ```data```, followed by a directory per sequence, e.g., ```data/0000```, and place all images in the sequence into the ```data/0000/images``` sub-directory. 48 | 49 | Launch the container using the provided script: 50 | 51 | ``` 52 | ./launch_container.sh 53 | ``` 54 | 55 | Run COLMAP on a sequence of images to get the camera parameters: 56 | 57 | ``` 58 | ./run_colmap.sh /data/0000 59 | ``` 60 | 61 | Run the extreme view code generation: 62 | 63 | ``` 64 | python run_xtreme_view.py /data/0000 --input_views=6,8 65 | ``` 66 | 67 | This will run the extreme view synthesis code using images 6 and 8 of the sequence /data/0000. You can modify the code to use different virtual cameras. 68 | 69 | You can run COLMAP and the extreme view synthesis on all the sample sequences: 70 | 71 | ``` 72 | ./run_colmap_all.sh 73 | ./run_xtreme_view_all.sh 74 | ``` 75 | 76 | The results are stored in the sequence directory under ```xtreme-view```. For example, for ```data/0000``` you will find the results in the directory ```data/0000/xtreme-view```. The initial view synthesis is located under ```output``` and the the refined one under ```refinement```. 77 | 78 | ## Citation 79 | If you find this code useful in your research or fun project, please consider citing the paper: 80 | ``` 81 | @inproceedings{extremeview, 82 | title={Extreme View Synthesis}, 83 | author={Choi, Inchang and Gallo, Orazio and Troccoli, Alejandro and Kim, Min H and Kautz, Jan}, 84 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 85 | pages={7781--7790}, 86 | year={2019} 87 | } 88 | ``` 89 | 90 | ## Open Source licenses 91 | 92 | DeepMVS is Copyright (c) 2018, Po-Han Huang, distributed under the [BSD 2-clause license](https://opensource.org/licenses/BSD-2-Clause) 93 | -------------------------------------------------------------------------------- /download_model.sh: -------------------------------------------------------------------------------- 1 | wget -q --show-progress -O models.zip https://api.ngc.nvidia.com/v2/models/nvidia/xtreme_view/versions/1/zip 2 | unzip models.zip 3 | rm models.zip -------------------------------------------------------------------------------- /evs.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/evs.gif -------------------------------------------------------------------------------- /launch_container.sh: -------------------------------------------------------------------------------- 1 | docker run --gpus all --rm -it --ipc=host -u $(id -u ${USER}):$(id -g ${USER}) -v /etc/passwd:/etc/passwd -v /etc/group:/etc/group -e TORCH_HOME=/models/torchvision -v $(pwd)/models:/models -v $(pwd)/data:/data xtreme-view 2 | -------------------------------------------------------------------------------- /xtreme-view/DeepMVS/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /xtreme-view/DeepMVS/model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | BSD 2-Clause License 3 | 4 | Copyright (c) 2018, Po-Han Huang 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | ''' 28 | 29 | import torch 30 | import torch.nn as nn 31 | import torch.nn.functional as F 32 | 33 | 34 | class DeepMVS(nn.Module): 35 | def __init__(self, num_depths, use_gpu = True, gpu_id = 0): 36 | super(DeepMVS, self).__init__() 37 | # Patch Matching 38 | self.layer_0 = nn.Sequential( 39 | nn.Conv2d(3, 64, (5, 5), stride = (1, 1), padding = (2, 2)), 40 | nn.SELU() 41 | ) 42 | self.layer_1 = nn.Sequential( 43 | nn.Conv2d(128, 96, (5, 5), stride = (1, 1), padding = (2, 2)), 44 | nn.SELU(), 45 | nn.Conv2d(96, 32, (5, 5), stride = (1, 1), padding = (2, 2)), 46 | nn.SELU(), 47 | nn.Conv2d(32, 4, (5, 5), stride = (1, 1), padding = (2, 2)), 48 | nn.SELU() 49 | ) 50 | # Encoder 51 | self.layer_2_e1x = nn.Sequential( 52 | nn.Conv2d(4 * num_depths, 200, (3, 3), stride = (1, 1), padding = (1, 1)), 53 | nn.SELU(), 54 | nn.Conv2d(200, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 55 | nn.SELU() 56 | ) 57 | self.layer_2_e2x = nn.Sequential( 58 | nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)), 59 | nn.SELU(), 60 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 61 | nn.SELU() 62 | ) 63 | self.layer_2_e4x = nn.Sequential( 64 | nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)), 65 | nn.SELU(), 66 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 67 | nn.SELU(), 68 | ) 69 | self.layer_2_e8x = nn.Sequential( 70 | nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)), 71 | nn.SELU(), 72 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 73 | nn.SELU(), 74 | ) 75 | self.layer_2_e16x = nn.Sequential( 76 | nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)), 77 | nn.SELU(), 78 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 79 | nn.SELU() 80 | ) 81 | # Buffer layers for VGG features 82 | self.layer_b1x = nn.Sequential( 83 | nn.Conv2d(64, 64, (1, 1), stride = (1, 1), padding = (0, 0)), 84 | nn.SELU(), 85 | ) 86 | self.layer_b2x = nn.Sequential( 87 | nn.Conv2d(128, 100, (1, 1), stride = (1, 1), padding = (0, 0)), 88 | nn.SELU(), 89 | ) 90 | self.layer_b4x = nn.Sequential( 91 | nn.Conv2d(256, 100, (1, 1), stride = (1, 1), padding = (0, 0)), 92 | nn.SELU(), 93 | ) 94 | self.layer_b8x = nn.Sequential( 95 | nn.Conv2d(512, 100, (1, 1), stride = (1, 1), padding = (0, 0)), 96 | nn.SELU(), 97 | ) 98 | self.layer_b16x = nn.Sequential( 99 | nn.Conv2d(512, 100, (1, 1), stride = (1, 1), padding = (0, 0)), 100 | nn.SELU(), 101 | ) 102 | # Decoder 103 | self.layer_2_d16x = nn.Sequential( 104 | nn.Conv2d(200, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 105 | nn.SELU(), 106 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 107 | nn.SELU(), 108 | ) 109 | self.layer_2_d8x = nn.Sequential( 110 | nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 111 | nn.SELU(), 112 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 113 | nn.SELU() 114 | ) 115 | self.layer_2_d4x = nn.Sequential( 116 | nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 117 | nn.SELU(), 118 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 119 | nn.SELU() 120 | ) 121 | self.layer_2_d2x = nn.Sequential( 122 | nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 123 | nn.SELU(), 124 | nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)), 125 | nn.SELU() 126 | ) 127 | self.layer_2_d1x = nn.Sequential( 128 | nn.Conv2d(264, 400, (3, 3), stride = (1, 1), padding = (1, 1)), 129 | nn.SELU(), 130 | nn.Conv2d(400, 800, (3, 3), stride = (1, 1), padding = (1, 1)), 131 | nn.SELU() 132 | ) 133 | # Inter-Volume Aggregation 134 | self.layer_3 = nn.Sequential( 135 | nn.Conv2d(800, 400, (3, 3), stride = (1, 1), padding = (1, 1)), 136 | nn.SELU(), 137 | nn.Conv2d(400, num_depths, (3, 3), stride = (1, 1), padding = (1, 1)) 138 | ) 139 | self.layer_loss = nn.CrossEntropyLoss(ignore_index=-1) 140 | 141 | if use_gpu: 142 | self.layer_0 = self.layer_0.cuda(gpu_id) 143 | self.layer_1 = self.layer_1.cuda(gpu_id) 144 | self.layer_2_e1x = self.layer_2_e1x.cuda(gpu_id) 145 | self.layer_2_e2x = self.layer_2_e2x.cuda(gpu_id) 146 | self.layer_2_e4x = self.layer_2_e4x.cuda(gpu_id) 147 | self.layer_2_e8x = self.layer_2_e8x.cuda(gpu_id) 148 | self.layer_2_e16x = self.layer_2_e16x.cuda(gpu_id) 149 | self.layer_b1x = self.layer_b1x.cuda(gpu_id) 150 | self.layer_b2x = self.layer_b2x.cuda(gpu_id) 151 | self.layer_b4x = self.layer_b4x.cuda(gpu_id) 152 | self.layer_b8x = self.layer_b8x.cuda(gpu_id) 153 | self.layer_b16x = self.layer_b16x.cuda(gpu_id) 154 | self.layer_2_d16x = self.layer_2_d16x.cuda(gpu_id) 155 | self.layer_2_d8x = self.layer_2_d8x.cuda(gpu_id) 156 | self.layer_2_d4x = self.layer_2_d4x.cuda(gpu_id) 157 | self.layer_2_d2x = self.layer_2_d2x.cuda(gpu_id) 158 | self.layer_2_d1x = self.layer_2_d1x.cuda(gpu_id) 159 | self.layer_3 = self.layer_3.cuda(gpu_id) 160 | self.layer_loss = self.layer_loss.cuda(gpu_id) 161 | 162 | # Shape of 'volume_input': batch_size * num_neighbors (or num_sources) * num_depths * 2 * num_channels * height * width 163 | # 'feature_inputs' is a list of five VGG feature tensors, each of shape: batch_size * num_features * height * width 164 | def forward(self, volume_input, feature_inputs): 165 | (aggregated_feature, _) = torch.max(self.forward_feature(volume_input, feature_inputs), 1) 166 | return self.forward_predict(aggregated_feature) 167 | 168 | def forward_feature(self, volume_input, feature_inputs): 169 | if volume_input.dim() != 7 or volume_input.size(3) != 2: 170 | raise ValueError("'volume_input' must be a tensor of shape: batch_size * num_neighbors (or num_sources) * num_depths * 2 * num_channels * height * width") 171 | if len(feature_inputs) != 5: 172 | raise ValueError("'feature_inputs' is a list of five VGG feature tensors of shape: batch_size * num_features * height * width") 173 | for feature in feature_inputs: 174 | if feature.dim() != 4: 175 | raise ValueError("'feature_inputs' is a list of five VGG feature tensors of shape: batch_size * num_features * height * width") 176 | batch_size = volume_input.size(0) 177 | num_neighbors = volume_input.size(1) 178 | num_depths = volume_input.size(2) 179 | num_channels = volume_input.size(4) 180 | height = volume_input.size(5) 181 | width = volume_input.size(6) 182 | layer_0_output = self.layer_0( 183 | volume_input.view(batch_size * num_neighbors * num_depths * 2, num_channels, height, width)) 184 | layer_1_output = self.layer_1( 185 | layer_0_output.view(batch_size * num_neighbors * num_depths, 2 * 64, height, width)) 186 | layer_2_e1x_out = self.layer_2_e1x(layer_1_output.view(batch_size * num_neighbors, num_depths * 4, height, width)) 187 | layer_2_e2x_out = self.layer_2_e2x(layer_2_e1x_out) 188 | layer_2_e4x_out = self.layer_2_e4x(layer_2_e2x_out) 189 | layer_2_e8x_out = self.layer_2_e8x(layer_2_e4x_out) 190 | layer_2_e16x_out = self.layer_2_e16x(layer_2_e8x_out) 191 | layer_b1x_out = self.layer_b1x(feature_inputs[0]) 192 | layer_b2x_out = self.layer_b2x(feature_inputs[1]) 193 | layer_b4x_out = self.layer_b4x(feature_inputs[2]) 194 | layer_b8x_out = self.layer_b8x(feature_inputs[3]) 195 | layer_b16x_out = self.layer_b16x(feature_inputs[4]) 196 | if num_neighbors != 1: 197 | # We need to copy the features for each neighbor image. When batch_size = 1, use expand() instead of repeat() to save memory. 198 | if batch_size == 1: 199 | layer_b1x_out = layer_b1x_out.expand(batch_size * num_neighbors, -1, -1, -1) 200 | layer_b2x_out = layer_b2x_out.expand(batch_size * num_neighbors, -1, -1, -1) 201 | layer_b4x_out = layer_b4x_out.expand(batch_size * num_neighbors, -1, -1, -1) 202 | layer_b8x_out = layer_b8x_out.expand(batch_size * num_neighbors, -1, -1, -1) 203 | layer_b16x_out = layer_b16x_out.expand(batch_size * num_neighbors, -1, -1, -1) 204 | else: 205 | layer_b1x_out = layer_b1x_out.repeat(num_neighbors, 1, 1, 1) 206 | layer_b2x_out = layer_b2x_out.repeat(num_neighbors, 1, 1, 1) 207 | layer_b4x_out = layer_b4x_out.repeat(num_neighbors, 1, 1, 1) 208 | layer_b8x_out = layer_b8x_out.repeat(num_neighbors, 1, 1, 1) 209 | layer_b16x_out = layer_b16x_out.repeat(num_neighbors, 1, 1, 1) 210 | layer_2_d16x_out = self.layer_2_d16x(torch.cat((layer_2_e16x_out, layer_b16x_out), 1)) 211 | layer_2_d8x_out = self.layer_2_d8x(torch.cat((layer_2_e8x_out, F.upsample(layer_2_d16x_out, scale_factor=2, mode='bilinear'), layer_b8x_out), 1)) 212 | layer_2_d4x_out = self.layer_2_d4x(torch.cat((layer_2_e4x_out, F.upsample(layer_2_d8x_out, scale_factor=2, mode='bilinear'), layer_b4x_out), 1)) 213 | layer_2_d2x_out = self.layer_2_d2x(torch.cat((layer_2_e2x_out, F.upsample(layer_2_d4x_out, scale_factor=2, mode='bilinear'), layer_b2x_out), 1)) 214 | layer_2_d1x_out = self.layer_2_d1x(torch.cat((layer_2_e1x_out, F.upsample(layer_2_d2x_out, scale_factor=2, mode='bilinear'), layer_b1x_out), 1)) 215 | return layer_2_d1x_out.view(batch_size, num_neighbors, 800, height, width) 216 | 217 | def forward_predict(self, aggregated_feature): 218 | layer_3_output = self.layer_3(aggregated_feature) 219 | return layer_3_output 220 | 221 | def weights_init(m): 222 | if isinstance(m, nn.Conv2d): 223 | nn.init.xavier_normal(m.weight.data) 224 | m.bias.data.fill_(0) -------------------------------------------------------------------------------- /xtreme-view/dataloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/dataloader/__init__.py -------------------------------------------------------------------------------- /xtreme-view/dataloader/colmap_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | 8 | import numpy as np 9 | import cv2 10 | import imageio 11 | import json 12 | import os 13 | import sys 14 | from pyquaternion import Quaternion 15 | 16 | colmap_root = os.getenv('COLMAP_ROOT', '/workspace/colmap') 17 | sys.path.append(os.path.join(colmap_root, 'scripts', 'python')) 18 | import read_model 19 | 20 | 21 | def read_array(path): 22 | with open(path, "rb") as fid: 23 | width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1, 24 | usecols=(0, 1, 2), dtype=int) 25 | fid.seek(0) 26 | num_delimiter = 0 27 | byte = fid.read(1) 28 | while True: 29 | if byte == b"&": 30 | num_delimiter += 1 31 | if num_delimiter >= 3: 32 | break 33 | byte = fid.read(1) 34 | array = np.fromfile(fid, np.float32) 35 | array = array.reshape((width, height, channels), order="F") 36 | return np.transpose(array, (1, 0, 2)).squeeze() 37 | 38 | class COLMAPData(): 39 | 40 | @staticmethod 41 | def read_data_to_list(seq_path): 42 | 43 | list_img = [] 44 | list_depth = [] 45 | list_cam_params = [] 46 | 47 | # read camera params 48 | c_cams, c_images, c_points3D = read_model.read_model('%s/dense/0/sparse' % seq_path, '.bin') 49 | 50 | # read image and depth 51 | img_dir = os.path.join(seq_path, 'dense', '0', 'images') 52 | img_list = os.listdir(img_dir) 53 | img_list.sort() 54 | 55 | for idx, img_name in enumerate(img_list): 56 | filename_img = os.path.join(img_dir, img_name) 57 | filename_depth = os.path.join(seq_path, 'dense', '0', 'stereo', 'depth_maps', '%s.geometric.bin' % img_name) 58 | 59 | # read images 60 | img = imageio.imread(filename_img).astype(np.float32) / 255.0 61 | list_img.append(img) 62 | 63 | # read depths 64 | depth = read_array(filename_depth) 65 | 66 | min_depth, max_depth = np.percentile(depth, [5, 90]) 67 | depth[depth < min_depth] = min_depth 68 | depth[depth > max_depth] = max_depth 69 | list_depth.append(depth) 70 | 71 | # fetch the camera params 72 | for key in c_images: 73 | image_key = c_images[key] 74 | image_name = image_key.name 75 | if image_name == img_name: 76 | key_to_fetch_for_cam = image_key.camera_id 77 | key_to_fetch_for_image = key 78 | 79 | params = {} 80 | c_cam = c_cams[key_to_fetch_for_cam] 81 | params['f_x'] = c_cam.params[0] 82 | params['f_y'] = c_cam.params[1] 83 | params['c_x'] = c_cam.params[2] 84 | params['c_y'] = c_cam.params[3] 85 | 86 | c_image = c_images[key_to_fetch_for_image] 87 | q = Quaternion(c_image.qvec) 88 | e = np.zeros(shape=(4, 4)) 89 | e[0:3, 0:3] = q.rotation_matrix 90 | e[0:3, 3] = c_image.tvec 91 | e[3, 3] = 1.0 92 | int_mat = np.array([[params['f_x'], 0.0, params['c_x']], 93 | [0.0, params['f_y'], params['c_y']], 94 | [0.0, 0.0, 1.0]]) 95 | cam = {} 96 | cam['extrinsic'] = e 97 | cam['intrinsic'] = int_mat 98 | list_cam_params.append(cam) 99 | 100 | return list_img, list_depth, list_cam_params 101 | -------------------------------------------------------------------------------- /xtreme-view/run_colmap.sh: -------------------------------------------------------------------------------- 1 | # The project folder must contain a folder "images" with all the images. 2 | DATASET_PATH=$1 3 | 4 | colmap feature_extractor \ 5 | --database_path $DATASET_PATH/db.db \ 6 | --image_path $DATASET_PATH/images \ 7 | --ImageReader.single_camera=1 8 | 9 | colmap exhaustive_matcher \ 10 | --database_path $DATASET_PATH/db.db 11 | 12 | mkdir $DATASET_PATH/sparse 13 | 14 | colmap mapper \ 15 | --database_path $DATASET_PATH/db.db \ 16 | --image_path $DATASET_PATH/images \ 17 | --output_path $DATASET_PATH/sparse \ 18 | --Mapper.init_min_tri_angle=0.1 \ 19 | --Mapper.tri_min_angle=0.1 \ 20 | --Mapper.filter_min_tri_angle=0.1 \ 21 | --Mapper.init_max_forward_motion=1.0 22 | 23 | mkdir -p $DATASET_PATH/dense/0 24 | 25 | colmap image_undistorter \ 26 | --image_path $DATASET_PATH/images \ 27 | --input_path $DATASET_PATH/sparse/0 \ 28 | --output_path $DATASET_PATH/dense/0 \ 29 | --output_type COLMAP \ 30 | --max_image_size 2000 31 | 32 | colmap patch_match_stereo \ 33 | --workspace_path $DATASET_PATH/dense/0 \ 34 | --workspace_format COLMAP \ 35 | --PatchMatchStereo.geom_consistency true \ 36 | --PatchMatchStereo.min_triangulation_angle=0.1 \ 37 | --PatchMatchStereo.filter_min_triangulation_angle=0.1 38 | 39 | colmap stereo_fusion \ 40 | --workspace_path $DATASET_PATH/dense/0 \ 41 | --workspace_format COLMAP \ 42 | --input_type geometric \ 43 | --output_path $DATASET_PATH/dense/fused.ply 44 | -------------------------------------------------------------------------------- /xtreme-view/run_colmap_all.sh: -------------------------------------------------------------------------------- 1 | ./run_colmap.sh /data/0000 2 | ./run_colmap.sh /data/0005 3 | ./run_colmap.sh /data/0009 4 | ./run_colmap.sh /data/0020 5 | ./run_colmap.sh /data/0027 -------------------------------------------------------------------------------- /xtreme-view/run_xtreme_view.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | from copy import deepcopy 8 | from vsynthlib import deepmvs_wrapper 9 | from vsynthlib import core 10 | from dataloader import colmap_loader 11 | 12 | import os 13 | import sys 14 | import argparse 15 | import numpy as np 16 | 17 | class XtremeViewRunner(): 18 | 19 | virtual_cams = {} 20 | 21 | # scene 0 22 | virtual_cams['0000'] = { 'src_indx' : 0, 23 | 'view_offsets' : [np.array([-6, 0, 0, 0]), 24 | np.array([-4.5,0, 0, 0]), 25 | np.array([-3,0, 0, 0]), 26 | np.array([ 3, 0, 0, 0]), 27 | np.array([ 4.5,0, 0, 0]), 28 | np.array([ 6, 0, 0, 0])] 29 | } 30 | 31 | # Figure 10 - top row 32 | virtual_cams['0005'] = { 'src_indx': 0, 33 | 'view_offsets': [np.array([6, 0, -0.5, 0]), 34 | np.array([-6, 0, -0.5, 0])] 35 | } 36 | 37 | # Figure 10 - second row 38 | virtual_cams['0009'] = { 'src_indx': 1, 39 | 'view_offsets': [np.array([-2, 0, -0.5, 0]), # absent 40 | np.array([-5,0, -0.5, 0]), # absent 41 | np.array([-8, 0, -0.5, 0]), # absent 42 | np.array([-11, 0, -0.5, 0])] 43 | } 44 | 45 | # Figure 10 - third row 46 | virtual_cams['0020'] = {'src_indx': 1, 47 | 'view_offsets' :[ 48 | np.array([0, 0, -3.0, 0]), 49 | np.array([0, 0, -4.0, 0]), 50 | np.array([0, 0, -8.0, 0]), 51 | ] 52 | } 53 | 54 | # Figure 10 - fourth row 55 | virtual_cams['0027'] = { 'src_indx': 1, 56 | 'view_offsets': [np.array([0.25,0, 0 , 0]), 57 | np.array([0.5,0, 0, 0]), 58 | np.array([1.0, 0, 0, 0]), 59 | np.array([1.5, 0, 0, 0])] 60 | } 61 | 62 | virtual_cams['default'] = { 'src_indx': 1, 63 | 'view_offsets': [np.array([0.25,0, 0 , 0]), 64 | np.array([0.5,0, 0, 0]), 65 | np.array([1.0, 0, 0, 0]), 66 | np.array([1.5, 0, 0, 0])] 67 | } 68 | 69 | def __init__(self, args): 70 | 71 | #################################### 72 | # Create a DeepMVS wrapper object 73 | #################################### 74 | filename_DeepMVS = os.path.join(args.models_path, 'DeepMVS_final.model') 75 | self.models_path = args.models_path 76 | self.refine_model_path = os.path.join(args.models_path, 'Model_VNPCAT_E33.pth') 77 | self.deepmvs_obj = deepmvs_wrapper.DeepMVSWrapper(filename_DeepMVS, do_filter=True) 78 | 79 | self.dense_crf_params = {'default': {'sigma_xy': 45.0, 'sigma_rgb': 30.0, 'iteration_num': 5, 'compat': 10.0}} 80 | 81 | 82 | def run(self, colmap_seq_path, input_views=[]): 83 | 84 | print('Processing sequence: ', colmap_seq_path) 85 | seq_name = os.path.basename(os.path.normpath(colmap_seq_path)) 86 | 87 | outDir = os.path.join(colmap_seq_path, 'xtreme-view') 88 | if not os.path.exists(outDir): 89 | os.mkdir(outDir) 90 | 91 | # Adjust the dense crf parameters if needed 92 | if seq_name in self.dense_crf_params: 93 | self.deepmvs_obj.dict_DenseCRF = self.dense_crf_params[colmap_seq_path] 94 | else: 95 | self.deepmvs_obj.dict_DenseCRF = self.dense_crf_params['default'] 96 | 97 | list_img, list_depth, list_cam_params \ 98 | = colmap_loader.COLMAPData.read_data_to_list(colmap_seq_path) 99 | 100 | if len(input_views) > 0: 101 | list_img = [list_img[i] for i in input_views] 102 | list_depth = [list_depth[i] for i in input_views] 103 | list_cam_params = [list_cam_params[i] for i in input_views] 104 | 105 | ############################# 106 | # Create our vsynth object 107 | ############################# 108 | view_synthesizer = core.VSynth(list_img, list_cam_params, outDir, 109 | self.deepmvs_obj, list_depth=list_depth, 110 | mode_colmap=True) 111 | 112 | ##################################################### 113 | # Compute the depth probability ( = perform DeepMVS) 114 | # When the depth probability are stored in the working dir, 115 | # it will skip without performing DeepMVS 116 | ##################################################### 117 | view_synthesizer.compute_depth_probability() 118 | 119 | ############################################# 120 | # Create the virtual cameras 121 | ############################################# 122 | view_synthesizer.list_vcams = [] 123 | 124 | if seq_name in self.virtual_cams: 125 | view_offsets = self.virtual_cams[seq_name]['view_offsets'] 126 | src_indx = self.virtual_cams[seq_name]['src_indx'] 127 | else: 128 | view_offsets = self.virtual_cams['default']['view_offsets'] 129 | src_indx = self.virtual_cams['default']['src_indx'] 130 | 131 | for view_offset in view_offsets: 132 | new_vcam = deepcopy(view_synthesizer.list_src_cams[src_indx]) 133 | new_vcam['extrinsic'][:,3] = new_vcam['extrinsic'][:,3] + view_offset 134 | view_synthesizer.list_vcams.append(new_vcam) 135 | 136 | list_todo_index=[] # generate all the cameras 137 | view_synthesizer.do(MHW_SRCV_WEIGHT=False, list_todo_index=list_todo_index) 138 | view_synthesizer.refine(self.refine_model_path, list_todo_index=list_todo_index) 139 | 140 | if __name__ == '__main__': 141 | parser = argparse.ArgumentParser() 142 | parser.add_argument('seq_path', help='the path to the sequence of images.') 143 | parser.add_argument('--models_path', help='the path where the pre-trained models have been downloaded to.', default='/models') 144 | parser.add_argument('--input_views', help='comma-separated list of the indices in the sequence to use as inputs') 145 | args = parser.parse_args() 146 | print(args) 147 | 148 | runner = XtremeViewRunner(args) 149 | input_views = [] 150 | if args.input_views is not None: 151 | input_views = [int(i) for i in args.input_views.split(',')] 152 | runner.run(args.seq_path, input_views) -------------------------------------------------------------------------------- /xtreme-view/run_xtreme_view_all.sh: -------------------------------------------------------------------------------- 1 | python run_xtreme_view.py /data/0005 --input_views=4,6 2 | python run_xtreme_view.py /data/0009 --input_views=4,6 3 | python run_xtreme_view.py /data/0020 --input_views=9,11 4 | python run_xtreme_view.py /data/0027 --input_views=8,9,11,12 5 | python run_xtreme_view.py /data/0000 --input_views=5,6,8,9 -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/vsynthlib/__init__.py -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | import os 8 | import numpy as np 9 | import cv2 10 | from scipy.signal import argrelextrema 11 | import imageio 12 | from shutil import copyfile 13 | import torch 14 | import sys 15 | 16 | from vsynthlib import deepmvs_wrapper 17 | from vsynthlib import depth_util 18 | from vsynthlib import refinement 19 | 20 | class VSynth(object): 21 | 22 | DT_THRESHOLD = 0.075 23 | # DT_THRESHOLD = 0.010 24 | VISIBILITY_TEST_THRESHOLD = 0.10 25 | MHW_THRESHOLD = 0.05 26 | DEPTH_AT_INFINITY = 9999999999.0 27 | 28 | def __init__(self, list_src_img, list_src_cams, out_dir, deepmvs_obj, 29 | list_cam_params_for_vpath=[], 30 | n_virtual_cams=10, 31 | list_src_names=[], list_depth=[], n_depths=100, 32 | vcam_mode='extrap_naive', 33 | write_out=True, 34 | mode_colmap=False): 35 | 36 | self.list_src_img = list_src_img 37 | self.list_src_cams = list_src_cams 38 | self.list_depth = list_depth 39 | self.out_dir = out_dir 40 | 41 | self.depth_estimator = deepmvs_obj 42 | 43 | if len(list_cam_params_for_vpath) == 0: 44 | self.list_src_cams_for_vpath = list_src_cams 45 | else: 46 | self.list_src_cams_for_vpath = list_cam_params_for_vpath 47 | 48 | if len(list_src_names) == 0: 49 | self.list_src_names = [] 50 | for i in range(len(list_src_img)): 51 | self.list_src_names.append('%04d'%i) 52 | else: 53 | self.list_src_names = list_src_names 54 | 55 | self.params = dict() 56 | self.params['n_virtual_cams'] = n_virtual_cams 57 | self.params['n_depths'] = n_depths 58 | height, width, _ = list_src_img[0].shape 59 | self.params['height'] = height 60 | self.params['width'] = width 61 | self.params['vcam_mode'] = vcam_mode 62 | self.params['write_out'] = write_out 63 | self.params['mode_colmap'] = mode_colmap 64 | 65 | if self.params['write_out']: 66 | self.set_out_dirs() 67 | self.save_inputs() 68 | 69 | 70 | def set_out_dirs(self): 71 | if not os.path.exists(self.out_dir): 72 | os.mkdir(self.out_dir) 73 | 74 | self.out_dir_dp = self.out_dir + '/dp' 75 | self.out_dir_input = self.out_dir + '/input' 76 | self.out_dir_output = self.out_dir + '/output' 77 | self.out_dir_synth_obj = self.out_dir + '/synth_obj' 78 | self.out_dir_vcams = self.out_dir + '/vcam' 79 | self.out_dir_refinement = self.out_dir + '/refinement' 80 | self.out_dir_2nd_synth_obj = self.out_dir + '/acc_synth_obj' 81 | self.out_dir_2nd_output = self.out_dir + '/acc_output' 82 | self.out_dir_2nd_refinement = self.out_dir + '/acc_refinement' 83 | self.out_dir_back_to_front_synth = self.out_dir + '/back_to_front' 84 | 85 | if not os.path.exists(self.out_dir_back_to_front_synth): 86 | os.mkdir(self.out_dir_back_to_front_synth) 87 | 88 | 89 | def save_inputs(self): 90 | if not os.path.exists(self.out_dir_input): 91 | os.mkdir(self.out_dir_input) 92 | 93 | for i in range(len(self.list_src_img)): 94 | img_i = self.list_src_img[i] 95 | cam_i = self.list_src_cams[i] 96 | 97 | # save dp_i to a file 98 | f_img_i = '%s/img_%s.png'%(self.out_dir_input, self.list_src_names[i]) 99 | f_cam_i = '%s/cam_%s.npy'%(self.out_dir_input, self.list_src_names[i]) 100 | 101 | np.save(f_cam_i, cam_i) 102 | imageio.imwrite(f_img_i, img_i) 103 | 104 | def save_cam_params(self, vcam_path): 105 | if not os.path.exists(self.out_dir_vcams): 106 | os.mkdir(self.out_dir_vcams) 107 | 108 | for cam_idx, vcam in enumerate(vcam_path): 109 | f_vcam = '%s/%04d.npy' % (self.out_dir_vcams, cam_idx) 110 | np.save(f_vcam, vcam) 111 | 112 | 113 | 114 | 115 | def compute_depth_probability(self, load_if_exists=True, 116 | LF_dataset_obj=None, hint=''): 117 | if self.params['write_out']: 118 | if not os.path.exists(self.out_dir_dp): 119 | os.mkdir(self.out_dir_dp) 120 | 121 | # compute the depth range 122 | self.compute_depth_range(hint=hint) 123 | 124 | 125 | if self.params['write_out'] and load_if_exists: 126 | # check if there are precomputed depth probabilities 127 | list_loaded_dp = [] 128 | all_loaded = True 129 | for i in range(len(self.list_src_img)): 130 | filename = '%s/dp_%s.npy' % (self.out_dir_dp, self.list_src_names[i]) 131 | if os.path.exists(filename): 132 | dp_i = np.load(filename) 133 | print(filename) 134 | list_loaded_dp.append(dp_i) 135 | 136 | if i == 0: 137 | _, dmap_color_i, _, color_depth_max = depth_util.generate_depthmap(dp_i, 138 | self.params['min_disp'], 139 | self.params['disp_step'], 140 | self.DEPTH_AT_INFINITY) 141 | self.color_depth_max = color_depth_max 142 | 143 | else: 144 | all_loaded = False 145 | 146 | # if exists, load and return 147 | if all_loaded: 148 | print('The depth probabilities are loaded!') 149 | self.list_depth_prob = list_loaded_dp 150 | return 151 | 152 | self.list_depth_prob = [] 153 | 154 | for i in range(len(self.list_src_img)): 155 | 156 | 157 | import time 158 | start_time = time.time() 159 | 160 | dp_i = self.depth_estimator.compute(self.list_src_img, self.list_src_cams, i, 161 | self.params['min_disp'], 162 | self.params['disp_step'], 163 | self.DEPTH_AT_INFINITY) 164 | 165 | elapsed_time = time.time() - start_time 166 | print('DeepMVS: ' + time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) 167 | 168 | self.list_depth_prob.append(dp_i) 169 | 170 | # save dp_i to a file 171 | if self.params['write_out']: 172 | filename = '%s/dp_%s.npy' % (self.out_dir_dp, self.list_src_names[i]) 173 | print(filename) 174 | np.save(filename, dp_i) 175 | 176 | # gen depth map 177 | if i == 0: 178 | _, dmap_color_i, _, color_depth_max = depth_util.generate_depthmap(dp_i, 179 | self.params['min_disp'], 180 | self.params['disp_step'], 181 | self.DEPTH_AT_INFINITY) 182 | self.color_depth_max = color_depth_max 183 | else: 184 | _, dmap_color_i, _ = depth_util.generate_depthmap(dp_i, 185 | self.params['min_disp'], 186 | self.params['disp_step'], 187 | self.DEPTH_AT_INFINITY, 188 | color_max_val=color_depth_max) 189 | filename = '%s/dmap_%s.png' % (self.out_dir_dp, self.list_src_names[i]) 190 | imageio.imwrite(filename, dmap_color_i) 191 | 192 | 193 | def compute_depth_range(self, hint=''): 194 | max_depth = 0.0 195 | min_depth = 9999999.0 196 | 197 | 198 | list_data = np.array([]) 199 | for img_depth in self.list_depth: 200 | valid_mask = np.logical_not(np.isinf(img_depth)) 201 | valid_mask = np.logical_and(valid_mask, img_depth != 0.0) 202 | list_data = np.append(list_data, img_depth[valid_mask]) 203 | 204 | hist, bins = np.histogram(list_data, bins=100) 205 | n_data = len(list_data) 206 | threshold_max = n_data*0.98 207 | threshold_min = n_data*0.02 208 | sum_hist = 0 209 | min_depth = np.min(list_data) 210 | max_depth = np.max(list_data) 211 | print('min: %f / max: %f (before histogram)'%(min_depth, max_depth)) 212 | 213 | min_found = False 214 | for bin_idx, hist_val in enumerate(hist): 215 | sum_hist += hist_val 216 | if not min_found and sum_hist > threshold_min: 217 | if bin_idx >= 1: 218 | min_depth = bins[bin_idx - 1] 219 | else: 220 | min_depth = bins[bin_idx] 221 | min_found = True 222 | 223 | if sum_hist > threshold_max: 224 | max_depth = bins[bin_idx + 1] 225 | break 226 | 227 | # museum (2.5, 15.0) 228 | # our_0046 (30, 800) 229 | # min_depth = 2.5 230 | # max_depth = 15.0 231 | 232 | if hint == 'museum1': 233 | min_depth = 2.5 234 | max_depth = 15.0 235 | 236 | print('min: %f / max: %f (after histogram)' % (min_depth, max_depth)) 237 | 238 | 239 | print('max depth: %f' % (max_depth)) 240 | print('min depth: %f' % (min_depth)) 241 | max_disp = 1.0 / (min_depth + 1e-06) 242 | min_disp = 1.0 / (max_depth + 1e-06) 243 | disp_step = (max_disp - min_disp) / (self.params['n_depths'] - 1) 244 | print('disp step: ' + str(disp_step)) 245 | 246 | # save to params 247 | self.params['max_depth'] = max_depth 248 | self.params['min_depth'] = min_depth 249 | self.params['max_disp'] = max_disp 250 | self.params['min_disp'] = min_disp 251 | self.params['disp_step'] = disp_step 252 | 253 | return max_depth, min_depth, max_disp, min_disp, disp_step 254 | 255 | 256 | def do(self, list_idx=None, MHW=False, save_dp=False, MHW_SRCV_WEIGHT=False, list_todo_index=[], 257 | winner_takes_all=False): 258 | 259 | if self.params['write_out']: 260 | # create directories 261 | if not os.path.exists(self.out_dir_output): 262 | os.mkdir(self.out_dir_output) 263 | 264 | if not os.path.exists(self.out_dir_synth_obj): 265 | os.mkdir(self.out_dir_synth_obj) 266 | 267 | 268 | for cam_idx, cam in enumerate(self.list_vcams): 269 | 270 | if list_todo_index != [] and not (cam_idx in list_todo_index): 271 | continue 272 | synth_obj = self.do_single_image(cam, cam_idx, 273 | winner_takes_all) 274 | 275 | if self.params['write_out']: 276 | if list_idx is None: 277 | id = cam_idx 278 | else: 279 | id = list_idx[cam_idx] 280 | 281 | # save 282 | f_synth_obj = '%s/%04d.npz'%(self.out_dir_synth_obj, id) 283 | np.savez_compressed(f_synth_obj, synth_obj) 284 | f_img_synth = '%s/vsynth_%04d.png'%(self.out_dir_output, id) 285 | imageio.imwrite(f_img_synth, synth_obj['img_synth']) 286 | f_depth_synth = '%s/dmap_%04d.png' % (self.out_dir_output, id) 287 | 288 | if not MHW: 289 | imageio.imwrite(f_depth_synth, 290 | depth_util.apply_colormap_to_depth(synth_obj['depth_map'], 291 | self.DEPTH_AT_INFINITY, 292 | max_depth=self.color_depth_max)) 293 | if save_dp: 294 | f_dp_synth = '%s/dp_%04d.npy'%(self.out_dir_output, id) 295 | np.save(f_dp_synth, synth_obj['dp']) 296 | 297 | 298 | def do_single_image(self, dest_cam, idx=0, 299 | winner_takes_all=False, 300 | save_dp=False): 301 | # transform the depth probability 302 | 303 | import time 304 | start = time.time() 305 | 306 | 307 | dp_dest, list_warped_prob\ 308 | = transform_cost_volume_cuda(self.list_src_cams, self.list_depth_prob, 309 | dest_cam, 310 | self.params['n_depths'], 311 | self.params['height'], 312 | self.params['width'], 313 | self.params['min_disp'], 314 | self.params['disp_step'], 315 | self.params['max_depth'], 316 | self.params['min_depth']) 317 | 318 | end = time.time() 319 | print("Transform_cost_volume() took " + str(end - start)) 320 | 321 | 322 | # generate PSV 323 | start = time.time() 324 | PSV_dest = build_PSV(self.list_src_img, self.list_src_cams, 325 | dest_cam, 326 | self.params['n_depths'], 327 | self.params['height'], 328 | self.params['width'], 329 | self.params['min_disp'], 330 | self.params['disp_step'], 331 | self.params['max_depth'], 332 | USE_DICT=True) 333 | end = time.time() 334 | print("build_PSV() took " + str(end - start)) 335 | 336 | # perform view synthesis 337 | start = time.time() 338 | img_synth, list_new_vies, visibility_map,\ 339 | depth_map_P1, depth_map_color_P1,\ 340 | depth_map_P2, depth_map_color_P2\ 341 | = synthesize_a_view(dest_cam, PSV_dest, dp_dest, 342 | self.list_src_img, self.list_src_cams, 343 | self.list_depth_prob, 344 | self.params['min_disp'], 345 | self.params['disp_step'], 346 | self.DEPTH_AT_INFINITY, 347 | self.params['height'], 348 | self.params['width'], 349 | self.params['n_depths'], 350 | with_ULR_weight=True, 351 | color_max_depth=self.color_depth_max, 352 | winner_takes_all=winner_takes_all) 353 | 354 | 355 | 356 | end = time.time() 357 | print("synthesize_a_view() took " + str(end - start)) 358 | 359 | 360 | # save 361 | synth_obj = {'img_synth': img_synth, 362 | 'visibility_map': visibility_map, 363 | 'depth_map': depth_map_P1, 364 | 'depth_map_P1': depth_map_P1, 365 | 'depth_map_P2': depth_map_P2, 366 | 'view_idx': idx, 367 | 'dest_cam': dest_cam} 368 | 369 | if save_dp: 370 | synth_obj['dp'] = dp_dest 371 | 372 | return synth_obj 373 | 374 | def refine(self, filename_weight, do_stereo=False, 375 | patch_size=64, list_todo_index=[], 376 | custom_outdir=''): 377 | refiner = refinement.DeepViewRefiner(filename_weight, 378 | self.out_dir, 379 | self.out_dir_refinement, 380 | patch_size=patch_size) 381 | 382 | for cam_idx, cam in enumerate(self.list_vcams): 383 | if list_todo_index != [] and not (cam_idx in list_todo_index): 384 | continue 385 | f_synth_obj = '%s/%04d.npz'%(self.out_dir_synth_obj, cam_idx) 386 | synth_obj = np.load(f_synth_obj, allow_pickle=True) 387 | synth_obj = synth_obj['arr_0'].item() 388 | 389 | refiner.do(synth_obj, self.list_src_img, self.list_src_cams, cam_idx, 390 | do_stereo=do_stereo, custom_outdir=custom_outdir) 391 | 392 | 393 | 394 | def build_PSV(list_src_img, list_src_cams, 395 | cam_dest, num_depths, height, width, 396 | min_disp, disp_step, max_depth, USE_DICT=False): 397 | 398 | n_neighbors = len(list_src_img) 399 | 400 | if USE_DICT: 401 | PSV = {} 402 | else: 403 | PSV = np.zeros(shape=[n_neighbors, num_depths, height, width, 3], dtype=np.float32) 404 | 405 | int_dest = cam_dest['intrinsic'] 406 | fx_dest = int_dest[0, 0] 407 | fy_dest = int_dest[1, 1] 408 | cx_dest = int_dest[0, 2] 409 | cy_dest = int_dest[1, 2] 410 | ext_dest = cam_dest['extrinsic'] 411 | inv_ext_dest = np.linalg.inv(ext_dest) 412 | 413 | # for each neighbor image 414 | counter_img = 0 415 | for i in range(len(list_src_img)): 416 | img_i = list_src_img[i] 417 | cam_i = list_src_cams[i] 418 | # get the parameters 419 | int_i = cam_i['intrinsic'] 420 | fx_i = int_i[0, 0] 421 | fy_i = int_i[1, 1] 422 | cx_i = int_i[0, 2] 423 | cy_i = int_i[1, 2] 424 | ext_i = cam_i['extrinsic'] 425 | 426 | # 4 Corners on the virtual camera to get te 4 rays that intersect with the depth plane 427 | src_pts = np.reshape([0, 0, 428 | width, 0, 429 | width, height, 430 | 0, height], (4, 2)) 431 | 432 | if USE_DICT: 433 | PSV_i = np.zeros(shape=[num_depths, height, width, 3], dtype=np.float32) 434 | 435 | # for each depth plane 436 | for d in range(num_depths): 437 | 438 | disp = d * disp_step + min_disp 439 | if d == 0: 440 | depth = max_depth 441 | else: 442 | depth = 1.0 / disp 443 | 444 | # print(depth) 445 | 446 | # compute dst points 447 | dst_pts = np.zeros((4, 2)) 448 | counter_pt = 0 449 | for p in src_pts: 450 | p_3D_ref = np.asarray([(depth * p[0] - depth * cx_dest) / fx_dest, 451 | (depth * p[1] - depth * cy_dest) / fy_dest, 452 | depth]) 453 | p_4D_ref = np.array([p_3D_ref[0], p_3D_ref[1], p_3D_ref[2], 1.0]) 454 | p_4D_world = inv_ext_dest.dot(p_4D_ref) 455 | p_4D_i = ext_i.dot(p_4D_world) 456 | dst = np.asarray([cx_i + fx_i * p_4D_i[0] / p_4D_i[2], cy_i + fy_i * p_4D_i[1] / p_4D_i[2]]) 457 | dst_pts[counter_pt, :] = dst.squeeze() 458 | counter_pt += 1 459 | 460 | # compute homography 461 | M, mask = cv2.findHomography(dst_pts, src_pts) 462 | # warp the image 463 | result = cv2.warpPerspective(img_i, M, (width, height), 464 | flags=cv2.INTER_LINEAR, 465 | borderMode=cv2.BORDER_REPLICATE) 466 | # cv2.imshow("img_ref", img_ref) 467 | # cv2.imshow("PSV of img %02d" % (i), result) 468 | # cv2.waitKey() 469 | if USE_DICT: 470 | PSV_i[d, :, :, :] = result 471 | else: 472 | PSV[counter_img, d, :, :, :] = result 473 | 474 | if USE_DICT: 475 | PSV[i] = PSV_i 476 | counter_img += 1 477 | 478 | return PSV 479 | 480 | 481 | 482 | def transform_cost_volume_cuda(list_src_cams, list_src_DPs, 483 | dest_cam, 484 | num_depths, height, width, 485 | min_disp, disp_step, max_depth, min_depth, 486 | do_normalization=True): 487 | 488 | 489 | int_dest = dest_cam['intrinsic'] 490 | ext_dest = dest_cam['extrinsic'] 491 | inv_int_dest = np.linalg.inv(int_dest) 492 | torch_inv_int_dest = torch.from_numpy(inv_int_dest) 493 | torch_inv_int_dest = torch_inv_int_dest.cuda() 494 | inv_ext_dest = np.linalg.inv(ext_dest) 495 | torch_inv_ext_dest = torch.from_numpy(inv_ext_dest) 496 | torch_inv_ext_dest = torch_inv_ext_dest.cuda() 497 | 498 | list_warped_prob = [] 499 | sum_warped_prob = np.zeros(shape=(num_depths, height, width)) 500 | view_counter = np.zeros(shape=(num_depths, height, width)) 501 | 502 | # define the voxel grid 503 | Z, Y, X = np.meshgrid(np.arange(0, num_depths), np.arange(0, height), np.arange(0, width), indexing='ij') 504 | Z = Z * disp_step 505 | zero_mask = (Z == 0) 506 | Z[Z != 0] += min_disp 507 | Z[Z != 0] = 1.0 / Z[Z != 0] 508 | # Z[Z != 0] += min_disp 509 | Z[zero_mask] = max_depth 510 | X = X * Z 511 | Y = Y * Z 512 | points = np.array([X.reshape(-1), Y.reshape(-1), Z.reshape(-1)]) 513 | points = np.transpose(points) 514 | 515 | torch_points = torch.from_numpy(points) 516 | torch_points = torch_points.cuda() 517 | 518 | 519 | for src_idx, cam_param_src in enumerate(list_src_cams): 520 | import time 521 | start = time.time() 522 | start_multiply = time.time() 523 | print('Transforming the cost volume of %02d' % (src_idx)) 524 | src_prob = list_src_DPs[src_idx] 525 | 526 | # get the parameters 527 | ext_i = cam_param_src['extrinsic'] 528 | torch_ext_i = torch.from_numpy(ext_i) 529 | torch_ext_i = torch_ext_i.cuda() 530 | int_i = cam_param_src['intrinsic'] 531 | torch_int_i = torch.from_numpy(int_i) 532 | torch_int_i = torch_int_i.cuda() 533 | 534 | warped_prob = np.zeros(shape=(num_depths, height, width)) 535 | 536 | transformed_points = torch.matmul(torch_points, torch_inv_int_dest.t()) 537 | transformed_points = torch.matmul(transformed_points, torch_inv_ext_dest[0:3, 0:3].t()) 538 | transformed_points = torch.add(transformed_points, torch_inv_ext_dest[0:3, 3]) 539 | transformed_points = torch.matmul(transformed_points, torch_ext_i[0:3, 0:3].t()) 540 | transformed_points = torch.add(transformed_points, torch_ext_i[0:3, 3]) 541 | # transformed_points = transformed_points[:, 0:3] 542 | transformed_points = torch.matmul(transformed_points, torch_int_i.t()) 543 | X_src = transformed_points[:, 0] / transformed_points[:, 2] 544 | Y_src = transformed_points[:, 1] / transformed_points[:, 2] 545 | Z_src = transformed_points[:, 2] 546 | 547 | X_src = X_src.cpu().numpy() 548 | Y_src = Y_src.cpu().numpy() 549 | Z_src = Z_src.cpu().numpy() 550 | 551 | 552 | end_multipy = time.time() 553 | print('\t- Multiplication Iteration Took: ' + str(end_multipy - start_multiply)) 554 | 555 | start_round = time.time() 556 | 557 | X_src = X_src.reshape((num_depths, height, width)) 558 | Y_src = Y_src.reshape((num_depths, height, width)) 559 | Z_src = Z_src.reshape((num_depths, height, width)) 560 | disp_src = 1.0 / Z_src - min_disp 561 | 562 | round_Y_src = np.round(Y_src).astype(np.int) 563 | round_X_src = np.round(X_src).astype(np.int) 564 | round_Z_src = np.round(disp_src / disp_step).astype(np.int) 565 | round_Z_src[Z_src >= max_depth] = 0 566 | round_Z_src[Z_src <= min_depth] = num_depths - 1 567 | 568 | valid_index = np.bitwise_and(round_Y_src >= 0, round_Y_src < height) 569 | valid_index = np.bitwise_and(valid_index, round_X_src >= 0) 570 | valid_index = np.bitwise_and(valid_index, round_X_src < width) 571 | valid_index = np.bitwise_and(valid_index, round_Z_src >= 0) 572 | valid_index = np.bitwise_and(valid_index, round_Z_src < num_depths) 573 | 574 | end_round = time.time() 575 | print('\t- Round Iteration Took: ' + str(end_round - start_round)) 576 | 577 | start_warp = time.time() 578 | warped_prob[valid_index] = src_prob[round_Z_src[valid_index], 579 | round_Y_src[valid_index], 580 | round_X_src[valid_index]] 581 | end_warp = time.time() 582 | 583 | print('\t- Warp Iteration Took: ' + str(end_warp - start_warp)) 584 | 585 | view_counter[valid_index] += 1.0 586 | list_warped_prob.append(warped_prob) 587 | sum_warped_prob += warped_prob 588 | 589 | end = time.time() 590 | print('\t\t- One Iteration Took: ' + str(end - start)) 591 | 592 | # save to a file 593 | # np.save('%s/warped_prob_from_%02d.npy' % (out_dir, prob_src_id), warped_prob) 594 | 595 | # # save the depth probability 596 | # for i in range(num_depths): 597 | # prob = warped_prob[i] 598 | # prob_color = cv2.applyColorMap((prob * 255).astype(np.uint8), cv2.COLORMAP_JET) 599 | # cv2.imshow("prob", prob_color) 600 | # cv2.waitKey() 601 | 602 | if do_normalization: 603 | sum_warped_prob = np.multiply(sum_warped_prob, 1.0/(view_counter + 1e-10)) 604 | dp_sq_sum = np.sqrt(np.sum(np.multiply(sum_warped_prob, sum_warped_prob), axis=0)) + 1e-10 605 | sum_warped_prob = sum_warped_prob / dp_sq_sum 606 | 607 | # ## remove some inconfident ray 608 | # confident_ray = np.sum(sum_warped_prob, axis=0) >= 0.75 609 | # confident_ray = np.expand_dims(confident_ray, axis=0) 610 | # confident_ray = np.tile(confident_ray, [num_depths, 1, 1]) 611 | # # valid_index = np.bitwise_and(valid_index, confident_ray) 612 | # warped_prob[np.logical_not(confident_ray)] = 0.0 613 | # ## 614 | 615 | 616 | 617 | return sum_warped_prob, list_warped_prob 618 | 619 | 620 | def synthesize_a_view(cam_dest, PSV, depth_prob, 621 | list_img, list_cam_params, list_depth_prob, 622 | min_disp, disp_step, depth_at_infinity, 623 | height, width, num_depths, 624 | list_validity_maps=[], with_ULR_weight=False, 625 | color_max_depth=None, 626 | winner_takes_all=False): 627 | my_comparator = depth_util.my_comparator_greater 628 | 629 | int_dest = cam_dest['intrinsic'] 630 | inv_int_dest = np.linalg.inv(int_dest) 631 | ext_dest = cam_dest['extrinsic'] 632 | inv_ext_dest = np.linalg.inv(cam_dest['extrinsic']) 633 | campos_dest = inv_ext_dest[0:3, 3] 634 | camdir_dest = ext_dest[2, 0:3] 635 | 636 | # check zero_prob_idx 637 | # sum_prob = np.sqrt(np.sum(depth_prob*depth_prob, axis=0)) 638 | # nonzero_prob_idx = sum_prob > 0.35 639 | # nonzero_prob_idx = np.expand_dims(nonzero_prob_idx, -1) 640 | # nonzero_prob_idx = np.tile(nonzero_prob_idx, [1, 1, 3]) 641 | abs_max = np.max(depth_prob, axis=0) 642 | valid_prob_1d = abs_max >= 0.10 643 | valid_prob = np.expand_dims(valid_prob_1d, -1) 644 | valid_prob = np.tile(valid_prob, [1, 1, 3]) 645 | if with_ULR_weight: 646 | ULR_weight_sum = 0.0 647 | 648 | avg_new_view = np.zeros((height, width, 3), dtype=np.float) 649 | normalizer = np.zeros((height, width, 3), dtype=np.float) 650 | visibility_map = np.zeros((height, width), dtype=np.float) 651 | 652 | # for each PSV_k, perfrom view synthesis 653 | dict_new_views = dict() 654 | list_new_views = [] 655 | 656 | # compute the weight 657 | import time 658 | start_time = time.time() 659 | # weight_volume = np.zeros((num_depths, height, width, 3), dtype=np.float) 660 | # for j in range(height): 661 | # for i in range(width): 662 | # data = depth_prob[:, j, i] 663 | # idx_local_max = argrelextrema(data, my_comparator, order=5) 664 | # max_index = np.argmax(data) 665 | # global_max_value = data[max_index] 666 | # 667 | # if len(idx_local_max) == 0: 668 | # depth_idx = max_index 669 | # else: 670 | # idx_local_max = idx_local_max[0] 671 | # if len(idx_local_max) == 0 or len(idx_local_max) == 1: 672 | # depth_idx = max_index 673 | # else: 674 | # for idx in reversed(idx_local_max): 675 | # local_max_value = data[idx] 676 | # if local_max_value >= global_max_value * VSynth.DT_THRESHOLD: 677 | # depth_idx = idx 678 | # break 679 | # 680 | # weight_volume[depth_idx, j, i, :] = 1.0 681 | 682 | weight_volume = np.zeros((num_depths, height, width, 3), dtype=np.float) 683 | weight_volume_2 = np.zeros((num_depths, height, width, 3), dtype=np.float) 684 | obj_local_max = argrelextrema(depth_prob, my_comparator, order=3, mode='wrap') 685 | obj_global_max = np.argmax(depth_prob, axis=0) 686 | 687 | first_closest_peak = np.ones(shape=(height, width))*(-1) 688 | second_closest_peak = np.ones(shape=(height, width))*(-1) 689 | 690 | for i in reversed(range(len(obj_local_max[0]))): 691 | idx_d = obj_local_max[0][i] 692 | idx_y = obj_local_max[1][i] 693 | idx_x = obj_local_max[2][i] 694 | 695 | local_max_value = depth_prob[idx_d, idx_y, idx_x] 696 | global_max_value = depth_prob[obj_global_max[idx_y, idx_x], idx_y, idx_x] 697 | 698 | if local_max_value >= global_max_value * VSynth.DT_THRESHOLD: 699 | fp_exists = first_closest_peak[idx_y, idx_x] != -1 700 | sp_exists = second_closest_peak[idx_y, idx_x] != -1 701 | if sp_exists: 702 | continue 703 | if not sp_exists and not fp_exists: 704 | first_closest_peak[idx_y, idx_x] = idx_d 705 | weight_volume[idx_d, idx_y, idx_x] = 1.0 706 | elif fp_exists and not sp_exists: 707 | second_closest_peak[idx_y, idx_x] = idx_d 708 | weight_volume_2[idx_d, idx_y, idx_x] = 1.0 709 | 710 | fp_empty = first_closest_peak == -1 711 | sp_empty = second_closest_peak == -1 712 | j_grid, i_grid = np.meshgrid(range(0, height), range(0, width), indexing='ij') 713 | weight_volume[obj_global_max[fp_empty], j_grid[fp_empty], i_grid[fp_empty]] = 1.0 714 | weight_volume_2[obj_global_max[sp_empty], j_grid[sp_empty], i_grid[sp_empty]] = 1.0 715 | 716 | elapsed_time = time.time() - start_time 717 | print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) 718 | 719 | # compute the depth map 720 | ref_depth_map, ref_depth_map_colored, zero_disp \ 721 | = depth_util.generate_depthmap(weight_volume[:,:,:,0], 722 | min_disp, disp_step, depth_at_infinity, color_max_val=color_max_depth) 723 | ref_depth_map_2, ref_depth_map_colored_2, _\ 724 | = depth_util.generate_depthmap(weight_volume_2[:,:,:,0], 725 | min_disp, disp_step, depth_at_infinity, color_max_val=color_max_depth) 726 | 727 | # filter the depthmap 728 | invalid_prob_1d = np.logical_not(valid_prob_1d) 729 | invalid_prob = np.logical_not(valid_prob) 730 | ref_depth_map[invalid_prob_1d] = 0.0 731 | ref_depth_map_colored[invalid_prob] = 0.0 732 | ref_depth_map_2[invalid_prob_1d] = 0.0 733 | ref_depth_map_colored_2[invalid_prob] = 0.0 734 | 735 | 736 | # imageio.imwrite('./ref_depth_map_colored.png', ref_depth_map_colored) 737 | # imageio.imwrite('./ref_depth_map_colored_2.png', ref_depth_map_colored_2) 738 | 739 | # compute weights 740 | if with_ULR_weight: 741 | 742 | # compute max distance and min distance 743 | max_dist = -9999999.0 744 | min_dist = 9999999.0 745 | min_idx = -1 746 | for src_idx in range(len(list_img)): 747 | # get the parameters ready 748 | cam_param_src = list_cam_params[src_idx] 749 | ext_i = cam_param_src['extrinsic'] 750 | 751 | # positional weight 752 | inv_ext_i = np.linalg.inv(ext_i) 753 | campos_i = inv_ext_i[0:3, 3] 754 | campos_diff = campos_i - campos_dest 755 | campos_dist = np.sqrt(np.sum(campos_diff * campos_diff)) 756 | if campos_dist > max_dist: 757 | max_dist = campos_dist 758 | if campos_dist < min_dist: 759 | min_dist = campos_dist 760 | min_idx = src_idx 761 | 762 | # compute weights 763 | list_ULR_weights = [] 764 | for src_idx in range(len(list_img)): 765 | # get the parameters ready 766 | cam_param_src = list_cam_params[src_idx] 767 | ext_i = cam_param_src['extrinsic'] 768 | 769 | # positional weight 770 | inv_ext_i = np.linalg.inv(ext_i) 771 | campos_i = inv_ext_i[0:3, 3] 772 | campos_diff = campos_i - campos_dest 773 | campos_dist = np.sqrt(np.sum(campos_diff * campos_diff))/max_dist 774 | # campos_weight = np.exp(-campos_dist / (0.40 * 0.40)) 775 | campos_weight = 100*np.exp(-campos_dist / (0.2 * 0.2)) 776 | 777 | 778 | 779 | # directional weight 780 | camdir_i = ext_i[2, 0:3] 781 | camdir_dot = camdir_i * camdir_dest 782 | camdir_dist = np.sqrt(np.sum(camdir_dot * camdir_dot)) 783 | if camdir_dist < 0.5: 784 | camdir_weight = 0.0 785 | else: 786 | # camdir_weight = np.exp(-camdir_dist / (0.8 * 0.8)) 787 | camdir_weight = 100*np.exp(-camdir_dist / (0.4 * 0.4)) 788 | 789 | if winner_takes_all: 790 | if src_idx == min_idx: 791 | campos_weight = 1.0 792 | camdir_weight = 1.0 793 | else: 794 | campos_weight = 0.1 795 | camdir_weight = 0.1 796 | 797 | ULR_weight = campos_weight * camdir_weight 798 | print("%f / %f / %f"%(campos_weight, camdir_weight, ULR_weight)) 799 | list_ULR_weights.append(ULR_weight) 800 | 801 | for src_idx in range(len(list_img)): 802 | # compute the depth map 803 | depth_map_k, _, _, _ \ 804 | = depth_util.generate_depthmap(list_depth_prob[src_idx], min_disp, disp_step, depth_at_infinity) 805 | 806 | # get the parameters ready 807 | cam_param_src = list_cam_params[src_idx] 808 | # get the parameters 809 | ext_i = cam_param_src['extrinsic'] 810 | int_i = cam_param_src['intrinsic'] 811 | 812 | # reproject to 813 | Y, X = np.meshgrid(np.arange(0, height), np.arange(0, width), indexing='ij') 814 | Z = ref_depth_map 815 | X = X * Z 816 | Y = Y * Z 817 | points = np.array([X.reshape(-1), Y.reshape(-1), Z.reshape(-1)]) 818 | points = np.matmul(inv_int_dest, points) 819 | points = np.vstack([points, np.ones((1, height * width))]) 820 | points = np.matmul(inv_ext_dest, points) 821 | points = np.matmul(ext_i, points) 822 | points = points[0:3] 823 | points = np.matmul(int_i, points) 824 | X_src = points[0] / points[2] 825 | Y_src = points[1] / points[2] 826 | Z_src = points[2] 827 | 828 | X_src = X_src.reshape((height, width)) 829 | Y_src = Y_src.reshape((height, width)) 830 | Z_src = Z_src.reshape((height, width)) 831 | # make an exception for sky 832 | Z_src[zero_disp] = depth_at_infinity 833 | Z_src[Z_src > depth_at_infinity] = depth_at_infinity 834 | 835 | round_Y_src = np.round(Y_src).astype(np.int) 836 | round_X_src = np.round(X_src).astype(np.int) 837 | valid_index = np.bitwise_and(round_Y_src >= 0, round_Y_src < height) 838 | valid_index = np.bitwise_and(valid_index, round_X_src >= 0) 839 | valid_index = np.bitwise_and(valid_index, round_X_src < width) 840 | 841 | # warped_depth_map_k = np.zeros(shape=(height, width)) 842 | # warped_depth_map_k[valid_index] = depth_map_k[round_Y_src[valid_index], 843 | # round_X_src[valid_index]] 844 | # 845 | # depth_diff = ref_depth_map - warped_depth_map_k 846 | # invalid_depth = depth_diff > ref_depth_map*VISIBILITY_TEST_THRESHOLD 847 | # invalid_depth = depth_diff > 0 848 | depth_diff = np.zeros(shape=(height, width)) 849 | depth_diff[valid_index] = Z_src[valid_index]\ 850 | - depth_map_k[round_Y_src[valid_index], 851 | round_X_src[valid_index]] 852 | invalid_depth = depth_diff > Z_src*VSynth.VISIBILITY_TEST_THRESHOLD 853 | valid_depth = np.logical_not(invalid_depth) 854 | valid_index = np.logical_and(valid_index, valid_depth) 855 | 856 | # get PSV 857 | PSV_k = PSV[src_idx] 858 | 859 | if list_validity_maps != []: 860 | validity_map = list_validity_maps[src_idx] 861 | check_validity = np.zeros(shape=(height, width)) 862 | check_validity[valid_index] = validity_map[round_Y_src[valid_index], 863 | round_X_src[valid_index]] 864 | valid_index = np.logical_and(valid_index, check_validity) 865 | 866 | # perform 867 | valid_index = valid_index.astype(np.float) 868 | valid_index = np.expand_dims(valid_index, -1) 869 | valid_index = np.tile(valid_index, [1, 1, 3]) 870 | # valid_index = np.logical_and(valid_index, nonzero_prob_idx) 871 | valid_index = np.logical_and(valid_index, valid_prob) 872 | 873 | 874 | view_k = np.multiply(PSV_k, weight_volume) 875 | view_k = np.sum(view_k, 0) 876 | view_k = view_k * valid_index 877 | # import imageio 878 | # imageio.imwrite('./view_%04d.png' % src_idx, view_k) 879 | # imageio.imwrite('./view_%04d_mask.png' % src_idx, valid_index.astype(np.float)) 880 | 881 | if with_ULR_weight: 882 | # apply the weight 883 | ULR_weight = list_ULR_weights[src_idx] 884 | ULR_weight_sum += ULR_weight 885 | 886 | avg_new_view += view_k*ULR_weight 887 | normalizer += valid_index*ULR_weight 888 | 889 | else: 890 | avg_new_view += view_k 891 | normalizer += valid_index 892 | 893 | visibility_map += valid_index.astype(np.float)[:,:,0] 894 | dict_new_views[src_idx] = view_k 895 | list_new_views.append(view_k) 896 | 897 | 898 | # fig = plt.figure() 899 | # plt.subplot(121) 900 | # plt.imshow(img_k) 901 | # plt.title("Source Image") 902 | # plt.subplot(122) 903 | # plt.imshow(view_k) 904 | # plt.title("View Synth [%02d]" % (k)) 905 | 906 | zero_pixels = avg_new_view == 0.0 907 | avg_new_view = np.multiply(avg_new_view, 1.0 / (normalizer + 1e-10)) 908 | avg_new_view[zero_pixels] = 0.0 909 | 910 | 911 | # remove area where only one view sees 912 | # one_view_map = (visibility_map == 1.0) 913 | # one_view_map = np.expand_dims(one_view_map, -1) 914 | # one_view_map = np.tile(one_view_map, (1, 1, 3)) 915 | # avg_new_view[one_view_map] = 0.0 916 | 917 | validity_map = visibility_map > 1.0 918 | visibility_map /= float(len(list_img)) 919 | 920 | # remove outliers 921 | avg_new_view[avg_new_view > 1.0] = 1.0 922 | avg_new_view[avg_new_view < 0.0] = 0.0 923 | 924 | # imageio.imwrite('./view_merged.png', avg_new_view) 925 | # fig = plt.figure() 926 | # plt.imshow(visibility_map) 927 | # plt.show() 928 | if list_validity_maps != []: 929 | return avg_new_view, list_new_views, visibility_map, validity_map, \ 930 | ref_depth_map, ref_depth_map_colored, \ 931 | ref_depth_map_2, ref_depth_map_colored_2 932 | else: 933 | return avg_new_view, list_new_views, visibility_map,\ 934 | ref_depth_map, ref_depth_map_colored,\ 935 | ref_depth_map_2, ref_depth_map_colored_2 936 | 937 | 938 | -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/deepmvs_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | import torch 8 | import torchvision as vision 9 | import torch.nn.functional as F 10 | from torch.autograd import Variable 11 | import pydensecrf.densecrf as dcrf 12 | import numpy as np 13 | import cv2 14 | 15 | from DeepMVS.model import DeepMVS 16 | 17 | 18 | 19 | 20 | class DeepMVSWrapper(object): 21 | def __init__(self, filename_DeepMVS, 22 | n_depths=100, 23 | enable_CUDA=True, 24 | do_filter=True): 25 | 26 | self.dev_id = 0 27 | if torch.cuda.device_count() > 1: 28 | self.dev_id = 1 29 | 30 | self.model_deepMVS = DeepMVS(n_depths, use_gpu=enable_CUDA, gpu_id=self.dev_id) 31 | self.model_deepMVS.load_state_dict(torch.load(filename_DeepMVS)) 32 | self.model_deepMVS.share_memory() 33 | print('DeepMVS model loaded!', filename_DeepMVS) 34 | 35 | if enable_CUDA: 36 | self.model_VGGNet = vision.models.vgg19(pretrained=True).cuda(self.dev_id) 37 | else: 38 | self.model_VGGNet = vision.models.vgg19(pretrained=True) 39 | 40 | self.model_VGGNet.share_memory() 41 | self.model_VGGNet_normalize\ 42 | = vision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 43 | print('VGGNET model loaded!') 44 | 45 | # Constants for DenseCRF. 46 | self.dict_DenseCRF = dict() 47 | 48 | ###################################### 49 | # default from DeepMVS 50 | ###################################### 51 | # self.dict_DenseCRF['sigma_xy'] = 80.0 52 | # self.dict_DenseCRF['sigma_rgb'] = 15.0 53 | # self.dict_DenseCRF['sigma_d'] = 10.0 54 | # self.dict_DenseCRF['iteration_num'] = 5 55 | # compat = np.zeros((n_depths, n_depths), dtype=np.float32) 56 | # for row in range(0, n_depths): 57 | # for col in range(0, n_depths): 58 | # compat[row, col] = (row - col) ** 2 / self.dict_DenseCRF['sigma_d'] ** 2 / 2 59 | # self.dict_DenseCRF['compat'] = compat 60 | ##################################### 61 | 62 | ###################################### 63 | # For museum and others 64 | ###################################### 65 | self.dict_DenseCRF['sigma_xy'] = 30.0 66 | self.dict_DenseCRF['sigma_rgb'] = 3 67 | self.dict_DenseCRF['iteration_num'] = 20 68 | self.dict_DenseCRF['compat'] = 10.0 69 | 70 | # for high res 71 | # self.dict_DenseCRF['sigma_xy'] = 60 72 | # self.dict_DenseCRF['sigma_rgb'] = 3.0 73 | # self.dict_DenseCRF['iteration_num'] = 20 74 | # self.dict_DenseCRF['compat'] = 10.0 75 | 76 | ###################################### 77 | # For bikes of StereoMagnificiation 78 | ###################################### 79 | # self.dict_DenseCRF['sigma_xy'] = 25.0 80 | # self.dict_DenseCRF['sigma_rgb'] = 10.0 81 | # self.dict_DenseCRF['iteration_num'] = 5 82 | # self.dict_DenseCRF['compat'] = 5.0 83 | 84 | self.n_depths = n_depths 85 | self.patch_size = 128 86 | self.stride = int(self.patch_size/2) 87 | self.do_filter = do_filter 88 | 89 | def build_PSV(self, list_src_img, list_src_cam, ref_idx, 90 | height, width, 91 | min_disp, disp_step, max_depth): 92 | 93 | n_neighbors = len(list_src_img) - 1 94 | 95 | PSV = np.zeros(shape=[n_neighbors, self.n_depths, height, width, 3], dtype=np.float32) 96 | 97 | cam_param_ref = list_src_cam[ref_idx] 98 | int_mat_ref = cam_param_ref['intrinsic'] 99 | fx_ref = int_mat_ref[0, 0] 100 | fy_ref = int_mat_ref[1, 1] 101 | cx_ref = int_mat_ref[0, 2] 102 | cy_ref = int_mat_ref[1, 2] 103 | ext_ref = cam_param_ref['extrinsic'] 104 | inv_ext_ref = np.linalg.inv(ext_ref) 105 | 106 | # for each neighbor image 107 | counter_img = 0 108 | for i in range(len(list_src_img)): 109 | if i == ref_idx: 110 | continue 111 | 112 | img_i = list_src_img[i] 113 | cam_param_i = list_src_cam[i] 114 | # get the parameters 115 | int_mat = cam_param_i['intrinsic'] 116 | fx_i = int_mat[0, 0] 117 | fy_i = int_mat[1, 1] 118 | cx_i = int_mat[0, 2] 119 | cy_i = int_mat[1, 2] 120 | ext_i = cam_param_i['extrinsic'] 121 | 122 | # 4 Corners on the virtual camera to get te 4 rays that intersect with the depth plane 123 | src_pts = np.reshape([0, 0, 124 | width, 0, 125 | width, height, 126 | 0, height], (4, 2)) 127 | 128 | # for each depth plane 129 | for d in range(self.n_depths): 130 | 131 | disp = d * disp_step + min_disp 132 | if d == 0: 133 | depth = max_depth 134 | else: 135 | depth = 1.0 / disp 136 | 137 | # print(depth) 138 | 139 | # compute dst points 140 | dst_pts = np.zeros((4, 2)) 141 | counter_pt = 0 142 | for p in src_pts: 143 | p_3D_ref = np.asarray([(depth * p[0] - depth * cx_ref) / fx_ref, 144 | (depth * p[1] - depth * cy_ref) / fy_ref, 145 | depth]) 146 | p_4D_ref = np.array([p_3D_ref[0], p_3D_ref[1], p_3D_ref[2], 1.0]) 147 | p_4D_world = inv_ext_ref.dot(p_4D_ref) 148 | p_4D_i = ext_i.dot(p_4D_world) 149 | dst = np.asarray([cx_i + fx_i * p_4D_i[0] / p_4D_i[2], cy_i + fy_i * p_4D_i[1] / p_4D_i[2]]) 150 | dst_pts[counter_pt, :] = dst.squeeze() 151 | counter_pt += 1 152 | 153 | # compute homography 154 | M, mask = cv2.findHomography(dst_pts, src_pts) 155 | # warp the image 156 | result = cv2.warpPerspective(img_i, M, (width, height), 157 | flags=cv2.INTER_LINEAR, 158 | borderMode=cv2.BORDER_REPLICATE) 159 | # cv2.imshow("img_ref", img_ref) 160 | # cv2.imshow("PSV of img %02d" % (i), result) 161 | # cv2.waitKey() 162 | 163 | PSV[counter_img, d, :, :, :] = result 164 | 165 | counter_img += 1 166 | 167 | return PSV 168 | 169 | def perform_DeepMVS(self, list_img, ref_idx, PSV, 170 | height, width, batch_size=1, use_gpu=True): 171 | 172 | # Generate VGG features. 173 | with torch.no_grad(): 174 | VGG_tensor = Variable( 175 | self.model_VGGNet_normalize(torch.FloatTensor(list_img[ref_idx].copy())).permute(2, 0, 1).unsqueeze(0)) 176 | 177 | if use_gpu: 178 | VGG_tensor = VGG_tensor.cuda(self.dev_id) 179 | VGG_scaling_factor = 0.01 180 | for i in range(0, 4): 181 | VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor) 182 | if use_gpu: 183 | feature_input_1x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor 184 | else: 185 | feature_input_1x_whole = VGG_tensor.data.clone() * VGG_scaling_factor 186 | for i in range(4, 9): 187 | VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor) 188 | if use_gpu: 189 | feature_input_2x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor 190 | else: 191 | feature_input_2x_whole = VGG_tensor.data.clone() * VGG_scaling_factor 192 | for i in range(9, 14): 193 | VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor) 194 | if use_gpu: 195 | feature_input_4x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor 196 | else: 197 | feature_input_4x_whole = VGG_tensor.data.clone() * VGG_scaling_factor 198 | for i in range(14, 23): 199 | VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor) 200 | if use_gpu: 201 | feature_input_8x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor 202 | else: 203 | feature_input_8x_whole = VGG_tensor.data.clone() * VGG_scaling_factor 204 | for i in range(23, 32): 205 | VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor) 206 | if use_gpu: 207 | feature_input_16x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor 208 | else: 209 | feature_input_16x_whole = VGG_tensor.data.clone() * VGG_scaling_factor 210 | del VGG_tensor 211 | 212 | # Stride through entire reference image. 213 | predict_raw = torch.zeros(self.n_depths, height, width) 214 | border_x = (self.patch_size - self.stride) / 2 215 | border_y = (self.patch_size - self.stride) / 2 216 | col_total = int((width - 2 * border_x - 1) / self.stride) + 1 217 | row_total = int((height - 2 * border_y - 1) / self.stride) + 1 218 | 219 | for row_idx in range(0, row_total): 220 | for col_idx in range(0, col_total): 221 | 222 | # Compute patch location for this patch and next patch. 223 | if col_idx != col_total - 1: 224 | start_x = col_idx * self.stride 225 | else: 226 | start_x = width - self.patch_size 227 | 228 | if row_idx != row_total - 1: 229 | start_y = row_idx * self.stride 230 | else: 231 | start_y = height - self.patch_size 232 | 233 | # Read plane-sweep volume and start next patch. 234 | ref_img = list_img[ref_idx][start_y:(start_y + self.patch_size), start_x:(start_x + self.patch_size), 235 | :].copy() - 0.5 236 | sweep_volume = PSV[:, :, start_y:(start_y + self.patch_size), start_x:(start_x + self.patch_size), 237 | :].copy() - 0.5 238 | num_neighbors = len(list_img) - 1 239 | 240 | # Prepare the inputs. 241 | data_in_tensor = torch.FloatTensor(batch_size, 1, self.n_depths, 2, 3, self.patch_size, self.patch_size) 242 | ref_img_tensor = torch.FloatTensor(ref_img).permute(2, 0, 1).unsqueeze(0) 243 | data_in_tensor[0, 0, :, 0, ...] = ref_img_tensor.expand(self.n_depths, -1, -1, -1) 244 | with torch.no_grad(): 245 | feature_input_1x \ 246 | = Variable( 247 | feature_input_1x_whole[..., start_y:start_y + self.patch_size, start_x:start_x + self.patch_size]) 248 | feature_input_2x \ 249 | = Variable( 250 | feature_input_2x_whole[..., int(start_y / 2):int(start_y / 2) + int(self.patch_size / 2), 251 | int(start_x / 2):int(start_x / 2) + int(self.patch_size / 2)]) 252 | feature_input_4x \ 253 | = Variable( 254 | feature_input_4x_whole[..., int(start_y / 4):int(start_y / 4) + int(self.patch_size / 4), 255 | int(start_x / 4):int(start_x / 4) + int(self.patch_size / 4)]) 256 | feature_input_8x \ 257 | = Variable( 258 | feature_input_8x_whole[..., int(start_y / 8):int(start_y / 8) + int(self.patch_size / 8), 259 | int(start_x / 8):int(start_x / 8) + int(self.patch_size / 8)]) 260 | feature_input_16x \ 261 | = Variable( 262 | feature_input_16x_whole[..., int(start_y / 16):int(start_y / 16) + int(self.patch_size / 16), 263 | int(start_x / 16):int(start_x / 16) + int(self.patch_size / 16)]) 264 | if use_gpu: 265 | feature_input_1x = feature_input_1x.cuda(self.dev_id) 266 | feature_input_2x = feature_input_2x.cuda(self.dev_id) 267 | feature_input_4x = feature_input_4x.cuda(self.dev_id) 268 | feature_input_8x = feature_input_8x.cuda(self.dev_id) 269 | feature_input_16x = feature_input_16x.cuda(self.dev_id) 270 | # Loop through all neighbor images. 271 | for neighbor_idx in range(0, num_neighbors): 272 | data_in_tensor[0, 0, :, 1, ...] = torch.FloatTensor( 273 | np.moveaxis(sweep_volume[neighbor_idx, ...], -1, -3)) 274 | with torch.no_grad(): 275 | data_in = Variable(data_in_tensor) 276 | if use_gpu: 277 | data_in = data_in.cuda(self.dev_id) 278 | if neighbor_idx == 0: 279 | cost_volume \ 280 | = self.model_deepMVS.forward_feature(data_in, [feature_input_1x, feature_input_2x, feature_input_4x, 281 | feature_input_8x, feature_input_16x]).data[...] 282 | else: 283 | cost_volume \ 284 | = torch.max(cost_volume, self.model_deepMVS.forward_feature(data_in, [feature_input_1x, feature_input_2x, 285 | feature_input_4x, feature_input_8x, 286 | feature_input_16x]).data[...]) 287 | # Make final prediction. 288 | with torch.no_grad(): 289 | predict = self.model_deepMVS.forward_predict(Variable(cost_volume[:, 0, ...])) 290 | 291 | # Compute copy range. 292 | if col_idx == 0: 293 | copy_x_start = 0 294 | copy_x_end = self.patch_size - border_x 295 | elif col_idx == col_total - 1: 296 | copy_x_start = border_x + col_idx * self.stride 297 | copy_x_end = width 298 | else: 299 | copy_x_start = border_x + col_idx * self.stride 300 | copy_x_end = copy_x_start + self.stride 301 | 302 | if row_idx == 0: 303 | copy_y_start = 0 304 | copy_y_end = self.patch_size - border_y 305 | elif row_idx == row_total - 1: 306 | copy_y_start = border_y + row_idx * self.stride 307 | copy_y_end = height 308 | else: 309 | copy_y_start = border_y + row_idx * self.stride 310 | copy_y_end = copy_y_start + self.stride 311 | 312 | # Copy the prediction to buffer. 313 | copy_x_start = int(copy_x_start) 314 | copy_x_end = int(copy_x_end) 315 | copy_y_start = int(copy_y_start) 316 | copy_y_end = int(copy_y_end) 317 | predict_raw[..., copy_y_start:copy_y_end, copy_x_start:copy_x_end] \ 318 | = predict.data[0, :, copy_y_start - start_y:copy_y_end - start_y, 319 | copy_x_start - start_x:copy_x_end - start_x] 320 | 321 | ###################################################### 322 | # compute the depth probability 323 | ###################################################### 324 | with torch.no_grad(): 325 | depth_prob = F.softmax(Variable(predict_raw), dim=0).data.numpy() 326 | 327 | ###################################################### 328 | # Pass through DenseCRF. 329 | ###################################################### 330 | with torch.no_grad(): 331 | unary_energy = F.log_softmax(Variable(predict_raw), dim=0).data.numpy() 332 | 333 | crf = dcrf.DenseCRF2D(width, height, self.n_depths) 334 | crf.setUnaryEnergy(-unary_energy.reshape(self.n_depths, height * width)) 335 | ref_img_full = (list_img[ref_idx] * 255.0).astype(np.uint8) 336 | crf.addPairwiseBilateral(sxy=(self.dict_DenseCRF['sigma_xy'], self.dict_DenseCRF['sigma_xy']), 337 | srgb=( 338 | self.dict_DenseCRF['sigma_rgb'], self.dict_DenseCRF['sigma_rgb'], self.dict_DenseCRF['sigma_rgb']), 339 | rgbim=ref_img_full, 340 | compat=self.dict_DenseCRF['compat'], 341 | kernel=dcrf.FULL_KERNEL, 342 | normalization=dcrf.NORMALIZE_SYMMETRIC) 343 | new_raw = crf.inference(self.dict_DenseCRF['iteration_num']) 344 | new_raw = np.array(new_raw).reshape(self.n_depths, height, width) 345 | 346 | return new_raw, depth_prob 347 | 348 | def compute(self, list_src_img, list_src_cam, ref_idx, 349 | min_disp, disp_step, max_depth): 350 | 351 | img = list_src_img[0] 352 | height, width, n_channels = img.shape 353 | 354 | # build PSV 355 | PSVs = self.build_PSV(list_src_img, list_src_cam, ref_idx, 356 | height, width, 357 | min_disp, disp_step, max_depth) 358 | 359 | # call deepMVS 360 | dp_refined, dp = self.perform_DeepMVS(list_src_img, ref_idx, PSVs, 361 | height, width) 362 | 363 | if self.do_filter: 364 | return dp_refined 365 | else: 366 | return dp 367 | 368 | 369 | 370 | -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/depth_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | import numpy as np 8 | import cv2 9 | from scipy.signal import argrelextrema 10 | 11 | def my_comparator_greater(x1, x2): 12 | res_1 = np.greater_equal(x1, x2) 13 | # res_2 = x1 > DT_THRESHOLD 14 | # res = np.logical_and(res_1, res_2) 15 | return res_1 16 | 17 | 18 | def generate_depthmap(depth_prob, min_disp, disp_step, depht_at_inifinity, 19 | color_max_val=None, use_argmax=True): 20 | if use_argmax: 21 | depth_idx = np.argmax(depth_prob, axis=0) 22 | else: 23 | depth_idx = np.argmin(depth_prob, axis=0) 24 | 25 | 26 | img_depth = depth_idx*disp_step 27 | zero_disp = (depth_idx == 0) 28 | img_depth = 1.0 / (img_depth + min_disp) 29 | img_depth[zero_disp] = depht_at_inifinity 30 | 31 | if color_max_val is None: 32 | img_depth_colored, color_max_val = apply_colormap_to_depth(img_depth, depht_at_inifinity) 33 | return img_depth, img_depth_colored, zero_disp, color_max_val 34 | 35 | else: 36 | img_depth_colored = apply_colormap_to_depth(img_depth, depht_at_inifinity, max_depth=color_max_val) 37 | return img_depth, img_depth_colored, zero_disp 38 | 39 | 40 | 41 | def apply_colormap_to_depth(img_depth, depth_at_infinity, max_depth=None, max_percent=95, RGB=True): 42 | img_depth_colored = img_depth.copy() 43 | m = np.min(img_depth_colored) 44 | M = np.max(img_depth_colored) 45 | 46 | if max_depth is None: 47 | valid_mask = img_depth_colored < depth_at_infinity 48 | valid_mask = np.logical_and(valid_mask, np.logical_not(np.isinf(img_depth))) 49 | valid_mask = np.logical_and(valid_mask, img_depth != 0.0) 50 | list_data = img_depth[valid_mask] 51 | 52 | hist, bins = np.histogram(list_data, bins=20) 53 | n_data = len(list_data) 54 | threshold_max = n_data * float(max_percent)/100.0 55 | sum_hist = 0 56 | 57 | for bin_idx, hist_val in enumerate(hist): 58 | sum_hist += hist_val 59 | if sum_hist > threshold_max: 60 | M = bins[bin_idx + 1] 61 | break 62 | else: 63 | M = max_depth 64 | 65 | img_depth_colored[img_depth_colored > M] = M 66 | img_depth_colored = (img_depth_colored - m) / (M - m) 67 | img_depth_colored = (img_depth_colored * 255).astype(np.uint8) 68 | img_depth_colored = cv2.applyColorMap(img_depth_colored, cv2.COLORMAP_JET) 69 | 70 | if RGB: 71 | img_depth_colored = cv2.cvtColor(img_depth_colored, cv2.COLOR_BGR2RGB) 72 | 73 | if max_depth is None: 74 | return img_depth_colored, M 75 | else: 76 | return img_depth_colored 77 | 78 | 79 | 80 | def fetch_patches_VNP(y, x, p_size, dest_cam, 81 | img_synth, list_src_img, list_src_cam, 82 | depth_map_P1, depth_map_P2, return_None=True): 83 | 84 | ######################### 85 | # define the input and the output 86 | ######################### 87 | # t_input = np.zeros(shape=(p_size, p_size, 27)) 88 | # list_src_cam_IDs_ref = dest_cam['list_src_cam_IDs_ref'] 89 | chs_for_fg_patches = 3*len(list_src_img) 90 | t_input = np.zeros(shape=(p_size, p_size, 3 + 2*chs_for_fg_patches)) 91 | 92 | t_input_synth = np.zeros(shape=(p_size, p_size, 3)) 93 | list_t_candi_patch = [] 94 | 95 | 96 | 97 | ######################### 98 | # set output 99 | ######################### 100 | X_grid, Y_grid = np.meshgrid(np.arange(x, x + p_size), 101 | np.arange(y, y + p_size)) 102 | 103 | 104 | ######################### 105 | # set input 106 | ######################### 107 | synth_patch = img_synth[Y_grid, X_grid] 108 | t_input_synth = synth_patch 109 | # cv2.imshow('synth_patch', synth_patch) 110 | 111 | # get the reference camera params 112 | inv_int_dest = np.linalg.inv(dest_cam['intrinsic']) 113 | inv_ext_dest = np.linalg.inv(dest_cam['extrinsic']) 114 | 115 | 116 | for count in range(len(list_src_img)): 117 | # get the target camera params 118 | cam_i = list_src_cam[count] 119 | ext_i = cam_i['extrinsic'] 120 | int_i = cam_i['intrinsic'] 121 | 122 | planar_patch_P1_i = backward_warp_center_depth(y, x, depth_map_P1, list_src_img, 123 | p_size, ext_i, int_i, count, 124 | inv_int_dest, inv_ext_dest) 125 | planar_patch_P2_i = backward_warp_center_depth(y, x, depth_map_P2, list_src_img, 126 | p_size, ext_i, int_i, count, 127 | inv_int_dest, inv_ext_dest) 128 | 129 | if return_None: 130 | if planar_patch_P1_i is None or planar_patch_P2_i is None: 131 | return None, None 132 | 133 | list_t_candi_patch.append(planar_patch_P1_i) 134 | 135 | z_1 = depth_map_P1[y, x] 136 | z_2 = depth_map_P2[y, x] 137 | diff_z = np.abs(z_1 - z_2)/z_1*100 138 | 139 | if diff_z > 2: 140 | list_t_candi_patch.append(planar_patch_P2_i) 141 | 142 | 143 | # change shape and subtract 0.5 144 | t_input_synth = np.moveaxis(t_input_synth, -1, 0) 145 | t_input_synth -= 0.5 146 | 147 | for i in range(len(list_t_candi_patch)): 148 | t_candi_patch = list_t_candi_patch[i] 149 | t_candi_patch = np.moveaxis(t_candi_patch, -1, 0) 150 | t_candi_patch -= 0.5 151 | list_t_candi_patch[i] = t_candi_patch 152 | 153 | ######################### 154 | return t_input_synth, list_t_candi_patch 155 | 156 | 157 | 158 | 159 | def backward_warp_center_depth(y_coord, x_coord, dmap, list_src_img, 160 | patch_size, ext_i, int_i, src_idx, 161 | inv_int_ref, inv_ext_ref): 162 | 163 | z_coord = dmap[int(y_coord + patch_size/2), int(x_coord + patch_size/2)] 164 | # if z_coord == 0.0: 165 | # return None 166 | 167 | height, width = dmap.shape 168 | X_grid, Y_grid = np.meshgrid(np.arange(x_coord, x_coord + patch_size), 169 | np.arange(y_coord, y_coord + patch_size)) 170 | Z_grid = np.ones(shape=X_grid.shape) * z_coord 171 | X_grid = np.multiply(X_grid, Z_grid) 172 | Y_grid = np.multiply(Y_grid, Z_grid) 173 | 174 | points = np.array([X_grid.reshape(-1), Y_grid.reshape(-1), Z_grid.reshape(-1)]) 175 | points = np.matmul(inv_int_ref, points) 176 | points = np.vstack([points, np.ones((1, patch_size *patch_size))]) 177 | points = np.matmul(inv_ext_ref, points) 178 | points = np.matmul(ext_i, points) 179 | points = points[0:3] 180 | points = np.matmul(int_i, points) 181 | Xi = points[0] /points[2] 182 | Yi = points[1] /points[2] 183 | Xi = Xi.reshape((patch_size, patch_size)) 184 | Yi = Yi.reshape((patch_size, patch_size)) 185 | 186 | # handle some exceptions 187 | invalid_Xi_zero = Xi < 0 188 | Xi[invalid_Xi_zero] = 0 189 | invalid_Xi_width = Xi >= width 190 | Xi[invalid_Xi_width] = width - 1 191 | invalid_Xi = np.logical_or(invalid_Xi_zero, invalid_Xi_width) 192 | 193 | invalid_Yi_zero = Yi < 0 194 | Yi[invalid_Yi_zero] = 0 195 | invalid_Yi_height = Yi >= height 196 | Yi[invalid_Yi_height] = height - 1 197 | invalid_Yi = np.logical_or(invalid_Yi_zero, invalid_Yi_height) 198 | invalid_XYi = np.logical_or(invalid_Xi, invalid_Yi) 199 | 200 | # do warping 201 | img_i = list_src_img[src_idx] 202 | Xi = Xi.astype(np.int) 203 | Yi = Yi.astype(np.int) 204 | 205 | 206 | planar_patch_i = img_i[Yi, Xi] 207 | planar_patch_i[invalid_XYi] = 0 208 | 209 | return planar_patch_i 210 | -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/refinement.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | import numpy as np 8 | import torch 9 | from torch.autograd import Variable 10 | import os 11 | import time 12 | import imageio 13 | 14 | from vsynthlib.refinet.models import Model_VNPCAT 15 | from vsynthlib import depth_util 16 | 17 | 18 | class DeepViewRefiner(object): 19 | 20 | NUM_INPUT_CHANNELS = 27 21 | 22 | def __init__(self, filename_model_weight, working_dir, out_dir, 23 | patch_size=64, 24 | with_CUDA=True): 25 | 26 | # define the model 27 | self.model = Model_VNPCAT() 28 | 29 | # load weight 30 | self.with_CUDA = with_CUDA 31 | self.model.load_state_dict(torch.load(filename_model_weight)) 32 | if with_CUDA: 33 | self.model.cuda() 34 | 35 | self.working_dir = working_dir 36 | self.out_dir = out_dir 37 | self.patch_size = patch_size 38 | 39 | if not os.path.exists(self.out_dir): 40 | os.mkdir(self.out_dir) 41 | 42 | 43 | 44 | pass 45 | 46 | def do(self, synth_obj, list_src_img, list_src_cam, count=0, 47 | do_stereo=False, return_val=False, custom_outdir=''): 48 | 49 | value = self.do_VNPCAT(synth_obj, list_src_img, list_src_cam, 50 | count=count, return_val=return_val, 51 | without_candi= False, 52 | custom_outdir=custom_outdir) 53 | 54 | if return_val: 55 | return value 56 | 57 | def do_VNPCAT(self, synth_obj, list_src_img, list_src_cam, count=0, 58 | return_val=False, without_candi=False, custom_outdir=''): 59 | 60 | func_fetch_patch = depth_util.fetch_patches_VNP 61 | 62 | # load synth data 63 | img_synth = synth_obj['img_synth'] 64 | depth_map_P1 = synth_obj['depth_map_P1'] 65 | depth_map_P2 = synth_obj['depth_map_P2'] 66 | dest_cam = synth_obj['dest_cam'] 67 | height, width, _ = img_synth.shape 68 | 69 | # perform refinement patch-by-patchy 70 | ############################################################# 71 | # Do Testing 72 | ############################################################# 73 | img_merged = np.zeros(shape=(height, width, 3)) 74 | img_counter = np.zeros(shape=(height, width, 3)) 75 | for j in range(0, height, int(self.patch_size / 4)): 76 | for i in range(0, width, int(self.patch_size / 4)): 77 | 78 | t_start = time.time() 79 | # set the model to the evaluation mode 80 | self.model.eval() 81 | 82 | # get candidate tensor 83 | x_top = i 84 | y_top = j 85 | if x_top + self.patch_size >= width: 86 | x_top = width - self.patch_size 87 | if y_top + self.patch_size >= height: 88 | y_top = height - self.patch_size 89 | 90 | t_input_synth, list_t_candi_patch = func_fetch_patch(y_top, x_top, self.patch_size, dest_cam, 91 | img_synth, list_src_img, list_src_cam, 92 | depth_map_P1, depth_map_P2) 93 | 94 | if t_input_synth is None: 95 | print('None!') 96 | continue 97 | 98 | # check if more than half of input pixels are valid 99 | t_in_slice = t_input_synth[0] 100 | bool_nz = t_in_slice != -0.5 101 | bool_nz = bool_nz.astype(np.float) 102 | sum_nz = np.sum(bool_nz) 103 | if sum_nz < self.patch_size * self.patch_size * 0.6: 104 | continue 105 | 106 | t_input_synth = np.expand_dims(t_input_synth, axis=0) 107 | t_input_synth = t_input_synth.astype(np.float32) 108 | _, chs, _, _ = t_input_synth.shape 109 | n_patches = len(list_t_candi_patch) 110 | t_in_synth = t_input_synth 111 | 112 | input_synth_tensor \ 113 | = torch.from_numpy(t_in_synth) 114 | 115 | if self.with_CUDA: 116 | input_synth_tensor = input_synth_tensor.cuda() 117 | with torch.no_grad(): 118 | input_synth_variable = Variable(input_synth_tensor, requires_grad=False) 119 | 120 | list_input_candi_variable = [] 121 | for i in range(n_patches): 122 | candi_patch = list_t_candi_patch[i] 123 | candi_patch = np.expand_dims(candi_patch, axis=0) 124 | candi_patch = candi_patch.astype(np.float32) 125 | 126 | candi_tensor = torch.from_numpy(candi_patch) 127 | 128 | if self.with_CUDA: 129 | candi_tensor = candi_tensor.cuda() 130 | 131 | with torch.no_grad(): 132 | input_candi_variable = Variable(candi_tensor) 133 | 134 | list_input_candi_variable.append(input_candi_variable) 135 | 136 | 137 | # do forward pass 138 | if without_candi: 139 | output_variable = self.model(input_synth_variable) 140 | output_to_show = output_variable[0].cpu().data[0] 141 | else: 142 | output_variable = self.model(input_synth_variable, list_input_candi_variable) 143 | output_to_show = output_variable.cpu().data[0] 144 | 145 | output_to_show = output_to_show + 0.5 146 | output_to_show = output_to_show.permute(1, 2, 0).numpy() 147 | output_to_show[output_to_show < 0.0] = 0.0 148 | output_to_show[output_to_show > 1.0] = 1.0 149 | output_to_show = output_to_show * 255.0 150 | output_to_show = output_to_show.astype(np.uint8) 151 | 152 | img_merged[y_top:(y_top + self.patch_size), x_top:(x_top + self.patch_size), :] += output_to_show 153 | img_counter[y_top:(y_top + self.patch_size), x_top:(x_top + self.patch_size), :] += 1 154 | 155 | t_current = time.time() 156 | t_elapsed_row = t_current - t_start 157 | 158 | # delete variables 159 | del input_synth_variable 160 | for var in list_input_candi_variable: 161 | self.var = var 162 | del self.var 163 | 164 | 165 | img_merged = img_merged / (img_counter + 1e-10) 166 | img_merged /= 255.0 167 | if return_val: 168 | return img_merged[0:height, 0:width] 169 | else: 170 | filename_out_prefix = 'refined_vsynth_%04d' % (count) 171 | if custom_outdir != '': 172 | imageio.imwrite('%s/%s.png' % (custom_outdir, filename_out_prefix), img_merged[0:height, 0:width]) 173 | else: 174 | imageio.imwrite('%s/%s.png' % (self.out_dir, filename_out_prefix), img_merged[0:height, 0:width]) 175 | -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/refinet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/vsynthlib/refinet/__init__.py -------------------------------------------------------------------------------- /xtreme-view/vsynthlib/refinet/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (C) 2019 NVIDIA Corporation. All rights reserved. 3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth. 4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz 5 | """ 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.parallel 10 | import torch.utils.data 11 | import torch.nn.init as init 12 | 13 | # 14 | # VNPCAT = Variable Number of Patch using concatenation 15 | # 16 | 17 | class Model_VNPCAT_Encoder(nn.Module): 18 | # Based on Unet and inpainting network 19 | def __init__(self, num_in_features = 3): 20 | super(Model_VNPCAT_Encoder, self).__init__() 21 | self.relu = nn.ReLU() 22 | 23 | self.conv1 = nn.Conv2d(num_in_features, 128, 3, 1, 1) 24 | self.conv2 = nn.Conv2d(128, 128, 3, 1, 1) 25 | self.conv2_bnorm = nn.BatchNorm2d(128) 26 | 27 | self.conv3 = nn.Conv2d(128, 256, 3, 2, 1) 28 | self.conv3_bnorm = nn.BatchNorm2d(256) 29 | self.conv4 = nn.Conv2d(256, 256, 3, 1, 1) 30 | self.conv4_bnorm = nn.BatchNorm2d(256) 31 | 32 | self.conv5 = nn.Conv2d(256, 512, 3, 2, 1) 33 | self.conv5_bnorm = nn.BatchNorm2d(512) 34 | self.conv6 = nn.Conv2d(512, 512, 3, 1, 1) 35 | self.conv6_bnorm = nn.BatchNorm2d(512) 36 | 37 | self.conv7 = nn.Conv2d(512, 512, 3, 2, 1) 38 | self.conv7_bnorm = nn.BatchNorm2d(512) 39 | 40 | self.apply(self.initialize_weight) 41 | 42 | def forward(self, x): 43 | 44 | # encoder 45 | x1 = self.relu(self.conv1(x)) 46 | x2 = self.relu(self.conv2_bnorm(self.conv2(x1))) 47 | 48 | x3 = self.relu(self.conv3_bnorm(self.conv3(x2))) 49 | x4 = self.relu(self.conv4_bnorm(self.conv4(x3))) 50 | 51 | x5 = self.relu(self.conv5_bnorm(self.conv5(x4))) 52 | x6 = self.relu(self.conv6_bnorm(self.conv6(x5))) 53 | 54 | x7 = self.relu(self.conv7_bnorm(self.conv7(x6))) 55 | 56 | return [x2, x4, x6, x7] 57 | 58 | def initialize_weight(self, m): 59 | classname = m.__class__.__name__ 60 | if classname.find('Conv') != -1: 61 | init.xavier_normal_(m.weight) 62 | 63 | elif classname.find('BatchNorm') != -1: 64 | m.weight.data.normal_(1.0, 0.02) 65 | m.bias.data.fill_(0) 66 | 67 | 68 | class Model_VNPCAT_Decoder(nn.Module): 69 | # Based on Unet and inpainting network 70 | def __init__(self): 71 | super(Model_VNPCAT_Decoder, self).__init__() 72 | self.relu = nn.ReLU() 73 | self.upsample = nn.Upsample(scale_factor=2) 74 | 75 | self.conv1 = nn.Conv2d(512*2, 512, 3, 1, 1) 76 | self.conv1_bnorm = nn.BatchNorm2d(512) 77 | self.conv2 = nn.Conv2d(512, 512, 3, 1, 1) 78 | self.conv2_bnorm = nn.BatchNorm2d(512) 79 | self.conv2_up = nn.Conv2d(512, 512, 3, 1, 1) 80 | self.conv2_up_bnorm = nn.BatchNorm2d(512) 81 | 82 | self.conv3 = nn.Conv2d(512*3, 512, 3, 1, 1) 83 | self.conv3_bnorm = nn.BatchNorm2d(512) 84 | self.conv4 = nn.Conv2d(512, 512, 3, 1, 1) 85 | self.conv4_bnorm = nn.BatchNorm2d(512) 86 | self.conv4_up = nn.Conv2d(512, 256, 3, 1, 1) 87 | self.conv4_up_bnorm = nn.BatchNorm2d(256) 88 | 89 | self.conv5 = nn.Conv2d(256*3, 256, 3, 1, 1) 90 | self.conv5_bnorm = nn.BatchNorm2d(256) 91 | self.conv6 = nn.Conv2d(256, 256, 3, 1, 1) 92 | self.conv6_bnorm = nn.BatchNorm2d(256) 93 | self.conv6_up = nn.Conv2d(256, 128, 3, 1, 1) 94 | self.conv6_up_bnorm = nn.BatchNorm2d(128) 95 | 96 | self.conv7 = nn.Conv2d(128*3, 128, 3, 1, 1) 97 | self.conv7_bnorm = nn.BatchNorm2d(128) 98 | self.conv8 = nn.Conv2d(128, 128, 3, 1, 1) 99 | self.conv8_bnorm = nn.BatchNorm2d(128) 100 | self.conv9 = nn.Conv2d(128, 3, 3, 1, 1) 101 | 102 | self.apply(self.initialize_weight) 103 | 104 | def forward(self, list_F_synth, list_F_max): 105 | 106 | # encoder 107 | F_synth_3 = list_F_synth[3] 108 | F_max_3 = list_F_max[3] 109 | x0 = torch.cat((F_synth_3, F_max_3), 1) 110 | x1 = self.relu(self.conv1_bnorm(self.conv1(x0))) 111 | x2 = self.relu(self.conv2_bnorm(self.conv2(x1))) 112 | x2_up = self.relu(self.conv2_up_bnorm(self.conv2_up(self.upsample(x2)))) 113 | 114 | F_synth_2 = list_F_synth[2] 115 | F_max_2 = list_F_max[2] 116 | x2_cat = torch.cat((x2_up, F_synth_2, F_max_2), 1) 117 | x3 = self.relu(self.conv3_bnorm(self.conv3(x2_cat))) 118 | x4 = self.relu(self.conv4_bnorm(self.conv4(x3))) 119 | x4_up = self.relu(self.conv4_up_bnorm(self.conv4_up(self.upsample(x4)))) 120 | 121 | F_synth_1 = list_F_synth[1] 122 | F_max_1 = list_F_max[1] 123 | x4_cat = torch.cat((x4_up, F_synth_1, F_max_1), 1) 124 | x5 = self.relu(self.conv5_bnorm(self.conv5(x4_cat))) 125 | x6 = self.relu(self.conv6_bnorm(self.conv6(x5))) 126 | x6_up = self.relu(self.conv6_up_bnorm(self.conv6_up(self.upsample(x6)))) 127 | 128 | F_synth_0 = list_F_synth[0] 129 | F_max_0 = list_F_max[0] 130 | x6_cat = torch.cat((x6_up, F_synth_0, F_max_0), 1) 131 | x7 = self.relu(self.conv7_bnorm(self.conv7(x6_cat))) 132 | x8 = self.relu(self.conv8_bnorm(self.conv8(x7))) 133 | x9 = self.conv9(x8) 134 | 135 | return x9 136 | 137 | def initialize_weight(self, m): 138 | classname = m.__class__.__name__ 139 | if classname.find('Conv') != -1: 140 | init.xavier_normal_(m.weight) 141 | 142 | elif classname.find('BatchNorm') != -1: 143 | m.weight.data.normal_(1.0, 0.02) 144 | m.bias.data.fill_(0) 145 | 146 | 147 | class Model_VNPCAT(nn.Module): 148 | # Based on Unet and inpainting network 149 | def __init__(self): 150 | super(Model_VNPCAT, self).__init__() 151 | self.E = Model_VNPCAT_Encoder() 152 | self.D = Model_VNPCAT_Decoder() 153 | self.apply(self.initialize_weight) 154 | 155 | def forward(self, x_synth, list_x_candi): 156 | 157 | # encoder 158 | list_F_synth = self.E(x_synth) 159 | list_list_F_candi = [] 160 | for x_candi in list_x_candi: 161 | list_F_candi = self.E(x_candi) 162 | list_list_F_candi.append(list_F_candi) 163 | 164 | # do max pool 165 | list_F0 = [] 166 | list_F1 = [] 167 | list_F2 = [] 168 | list_F3 = [] 169 | 170 | for list_F_candi in list_list_F_candi: 171 | list_F0.append(list_F_candi[0][None]) 172 | list_F1.append(list_F_candi[1][None]) 173 | list_F2.append(list_F_candi[2][None]) 174 | list_F3.append(list_F_candi[3][None]) 175 | 176 | concat_F0 = torch.cat(list_F0) 177 | concat_F1 = torch.cat(list_F1) 178 | concat_F2 = torch.cat(list_F2) 179 | concat_F3 = torch.cat(list_F3) 180 | 181 | F0_max, _ = torch.max(concat_F0, dim=0) 182 | F1_max, _ = torch.max(concat_F1, dim=0) 183 | F2_max, _ = torch.max(concat_F2, dim=0) 184 | F3_max, _ = torch.max(concat_F3, dim=0) 185 | 186 | list_F_max = [F0_max, F1_max, F2_max, F3_max] 187 | 188 | # decoder 189 | x_refined = self.D(list_F_synth, list_F_max) 190 | 191 | return x_refined 192 | 193 | def initialize_weight(self, m): 194 | classname = m.__class__.__name__ 195 | if classname.find('Conv') != -1: 196 | init.xavier_normal_(m.weight) 197 | 198 | elif classname.find('BatchNorm') != -1: 199 | m.weight.data.normal_(1.0, 0.02) 200 | m.bias.data.fill_(0) 201 | --------------------------------------------------------------------------------