├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── download_model.sh
├── evs.gif
├── launch_container.sh
└── xtreme-view
    ├── DeepMVS
        ├── __init__.py
        └── model.py
    ├── dataloader
        ├── __init__.py
        └── colmap_loader.py
    ├── run_colmap.sh
    ├── run_colmap_all.sh
    ├── run_xtreme_view.py
    ├── run_xtreme_view_all.sh
    └── vsynthlib
        ├── __init__.py
        ├── core.py
        ├── deepmvs_wrapper.py
        ├── depth_util.py
        ├── refinement.py
        └── refinet
            ├── __init__.py
            └── models.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__
2 | data/
3 | models/
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:19.06-py3
 2 | 
 3 | # Install COLMAP
 4 | RUN apt-get update && apt-get -y install \
 5 |     git \
 6 |     cmake \
 7 |     build-essential \
 8 |     libboost-program-options-dev \
 9 |     libboost-filesystem-dev \
10 |     libboost-graph-dev \
11 |     libboost-regex-dev \
12 |     libboost-system-dev \
13 |     libboost-test-dev \
14 |     libeigen3-dev \
15 |     libsuitesparse-dev \
16 |     libfreeimage-dev \
17 |     libgoogle-glog-dev \
18 |     libgflags-dev \
19 |     libglew-dev \
20 |     qtbase5-dev \
21 |     libqt5opengl5-dev \
22 |     libcgal-dev \
23 |     libcgal-qt5-dev \
24 |     libatlas-base-dev \
25 |     libsuitesparse-dev \
26 |     libopenblas-dev
27 | 
28 | RUN git clone https://ceres-solver.googlesource.com/ceres-solver && \
29 |     cd ceres-solver && \
30 |     git checkout 1.14.0 && \
31 |     mkdir build && \
32 |     cd build && \
33 |     cmake .. -DBUILD_TESTING=OFF -DBUILD_EXAMPLES=OFF && \
34 |     make -j8 && \
35 |     make install
36 | 
37 | RUN git clone https://github.com/colmap/colmap.git && \
38 |     cd colmap && \
39 |     git checkout 3.5 && \
40 |     mkdir build && \
41 |     cd build && \
42 |     cmake .. -DCUDA_ARCHS="5.2 6.0 6.1 7.0 7.5+PTX" && \
43 |     make -j8 && \
44 |     make install
45 | 
46 | # Install xtreme-view dependencies
47 | RUN pip install pydensecrf \
48 |     pyquaternion \
49 |     imageio
50 | 
51 | COPY xtreme-view xtreme-view
52 | WORKDIR xtreme-view
53 | 
54 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | ## Nvidia Source Code License
 2 | 
 3 | ### 1. Definitions.
 4 | 
 5 | “Licensor” means any person or entity that distributes its Work.
 6 | 
 7 | “Software” means the original work of authorship made available under this License.
 8 | 
 9 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License.
10 | 
11 | “Nvidia Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by Nvidia or its affiliates.
12 | 
13 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
14 | 
15 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
16 | 
17 | ### 2. License Grants.
18 | 
19 | 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
20 | 
21 | 2.2 Patent Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free patent license to make, have made, use, sell, offer for sale, import, and otherwise transfer its Work, in whole or in part. The foregoing license applies only to the patent claims licensable by Licensor that would be infringed by Licensor’s Work (or portion thereof) individually and excluding any combinations with any other materials or technology.
22 | 
23 | ### 3. Limitations.
24 | 
25 | 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work.
26 | 
27 | 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself.
28 | 
29 | 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use commercially with Nvidia Processors.
30 | 
31 | 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately.
32 | 
33 | 3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License.
34 | 
35 | 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately.
36 | 
37 | ### 4. Disclaimer of Warranty.
38 | 
39 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. SOME STATES’ CONSUMER LAWS DO NOT ALLOW EXCLUSION OF AN IMPLIED WARRANTY, SO THIS DISCLAIMER MAY NOT APPLY TO YOU.
40 | 
41 | ### 5. Limitation of Liability.
42 | 
43 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
44 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Extreme View Synthesis
 2 | 
 3 | #### [Paper](https://arxiv.org/abs/1812.04777) | [Extended Presentation at GTC 2019](https://developer.nvidia.com/gtc/2019/video/S9576) (requires free registration) | [Latex citation](#citation)
 4 | 
 5 | Code for the paper:  
 6 | **Extreme View Synthesis**  
 7 | [Inchang Choi](http://www.inchangchoi.info/), [Orazio Gallo](https://oraziogallo.github.io/), [Alejandro Troccoli](https://research.nvidia.com/person/alejandro-troccoli), [Min H. Kim](http://vclab.kaist.ac.kr/minhkim/) and [Jan Kautz](http://jankautz.com/), IEEE International Conference on Computer Vision, 2019 (Oral).
 8 | 
 9 | 
10 | ## License
11 | 
12 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
13 | 
14 | Licensed under the [NVIDIA Source Code License](LICENSE.md)
15 | 
16 | ## Pre-requisites
17 | 
18 | For convenience, we provide a Dockerfile to build a container image to run the code. The image will contain the Python dependencies and a build of COLMAP.
19 | 
20 | Your system will need:
21 | 
22 | 1. Docker (>= 19.03)
23 | 
24 | 2. [NVIDIA Docker](https://github.com/NVIDIA/nvidia-docker/wiki)
25 | 
26 | 3. NVIDIA GPU driver 418 or later.
27 | 
28 | Build the container image:
29 | 
30 | ```
31 | docker build -t xtreme-view .
32 | ```
33 | 
34 | ## Download the models
35 | 
36 | You can download the models from the NVIDIA GPU CLOUD registry using:
37 | 
38 | 
39 | ```
40 | ./download_model.sh
41 | ```
42 | 
43 | 
44 | 
45 | ## Running the code
46 | 
47 | Place your sequence of images in a directory tree with root ```data```, followed by a directory per sequence, e.g., ```data/0000```, and place all images in the sequence into the ```data/0000/images``` sub-directory.
48 | 
49 | Launch the container using the provided script:
50 | 
51 | ```
52 | ./launch_container.sh
53 | ```
54 | 
55 | Run COLMAP on a sequence of images to get the camera parameters:
56 | 
57 | ```
58 | ./run_colmap.sh /data/0000
59 | ```
60 | 
61 | Run the extreme view code generation:
62 | 
63 | ```
64 | python run_xtreme_view.py /data/0000 --input_views=6,8
65 | ```
66 | 
67 | This will run the extreme view synthesis code using images 6 and 8 of the sequence /data/0000. You can modify the code to use different virtual cameras.
68 | 
69 | You can run COLMAP and the extreme view synthesis on all the sample sequences:
70 | 
71 | ```
72 | ./run_colmap_all.sh
73 | ./run_xtreme_view_all.sh
74 | ```
75 | 
76 | The results are stored in the sequence directory under ```xtreme-view```. For example, for ```data/0000``` you will find the results in the directory ```data/0000/xtreme-view```. The initial view synthesis is located under ```output``` and the the refined one under ```refinement```.
77 | 
78 | ## <a name="citation"></a> Citation
79 | If you find this code useful in your research or fun project, please consider citing the paper:
80 | ```
81 | @inproceedings{extremeview,  
82 |   title={Extreme View Synthesis},  
83 |   author={Choi, Inchang and Gallo, Orazio and Troccoli, Alejandro and Kim, Min H and Kautz, Jan},  
84 |   booktitle={Proceedings of the IEEE International Conference on Computer Vision},  
85 |   pages={7781--7790},  
86 |   year={2019}  
87 | }
88 | ```
89 | 
90 | ## Open Source licenses
91 | 
92 | DeepMVS is Copyright (c) 2018, Po-Han Huang, distributed under the [BSD 2-clause license](https://opensource.org/licenses/BSD-2-Clause)
93 | 


--------------------------------------------------------------------------------
/download_model.sh:
--------------------------------------------------------------------------------
1 | wget -q --show-progress -O models.zip https://api.ngc.nvidia.com/v2/models/nvidia/xtreme_view/versions/1/zip
2 | unzip models.zip
3 | rm models.zip


--------------------------------------------------------------------------------
/evs.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/evs.gif


--------------------------------------------------------------------------------
/launch_container.sh:
--------------------------------------------------------------------------------
1 | docker run --gpus all --rm -it --ipc=host -u $(id -u ${USER}):$(id -g ${USER}) -v /etc/passwd:/etc/passwd -v /etc/group:/etc/group -e TORCH_HOME=/models/torchvision -v $(pwd)/models:/models -v $(pwd)/data:/data xtreme-view
2 | 


--------------------------------------------------------------------------------
/xtreme-view/DeepMVS/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/xtreme-view/DeepMVS/model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | BSD 2-Clause License
  3 | 
  4 | Copyright (c) 2018, Po-Han Huang
  5 | All rights reserved.
  6 | 
  7 | Redistribution and use in source and binary forms, with or without
  8 | modification, are permitted provided that the following conditions are met:
  9 | 
 10 | * Redistributions of source code must retain the above copyright notice, this
 11 |   list of conditions and the following disclaimer.
 12 | 
 13 | * Redistributions in binary form must reproduce the above copyright notice,
 14 |   this list of conditions and the following disclaimer in the documentation
 15 |   and/or other materials provided with the distribution.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | '''
 28 | 
 29 | import torch
 30 | import torch.nn as nn
 31 | import torch.nn.functional as F
 32 | 
 33 | 
 34 | class DeepMVS(nn.Module):
 35 |     def __init__(self, num_depths, use_gpu = True, gpu_id = 0):
 36 |         super(DeepMVS, self).__init__()
 37 |         # Patch Matching
 38 |         self.layer_0 = nn.Sequential(
 39 |                 nn.Conv2d(3, 64, (5, 5), stride = (1, 1), padding = (2, 2)),
 40 |                 nn.SELU()
 41 |             )
 42 |         self.layer_1 = nn.Sequential(
 43 |                 nn.Conv2d(128, 96, (5, 5), stride = (1, 1), padding = (2, 2)),
 44 |                 nn.SELU(),
 45 |                 nn.Conv2d(96, 32, (5, 5), stride = (1, 1), padding = (2, 2)),
 46 |                 nn.SELU(),
 47 |                 nn.Conv2d(32, 4, (5, 5), stride = (1, 1), padding = (2, 2)),
 48 |                 nn.SELU()
 49 |             )
 50 |         # Encoder
 51 |         self.layer_2_e1x = nn.Sequential(
 52 |                 nn.Conv2d(4 * num_depths, 200, (3, 3), stride = (1, 1), padding = (1, 1)),
 53 |                 nn.SELU(),
 54 |                 nn.Conv2d(200, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
 55 |                 nn.SELU()
 56 |             )
 57 |         self.layer_2_e2x = nn.Sequential(
 58 |                 nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)),
 59 |                 nn.SELU(),
 60 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
 61 |                 nn.SELU()
 62 |             )
 63 |         self.layer_2_e4x = nn.Sequential(
 64 |                 nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)),
 65 |                 nn.SELU(),
 66 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
 67 |                 nn.SELU(),
 68 |             )
 69 |         self.layer_2_e8x = nn.Sequential(
 70 |                 nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)),
 71 |                 nn.SELU(),
 72 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
 73 |                 nn.SELU(),
 74 |             )
 75 |         self.layer_2_e16x = nn.Sequential(
 76 |                 nn.Conv2d(100, 100, (2, 2), stride = (2, 2), padding = (0, 0)),
 77 |                 nn.SELU(),
 78 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
 79 |                 nn.SELU()
 80 |             )
 81 |         # Buffer layers for VGG features
 82 |         self.layer_b1x = nn.Sequential(
 83 |                 nn.Conv2d(64, 64, (1, 1), stride = (1, 1), padding = (0, 0)),
 84 |                 nn.SELU(),
 85 |             )
 86 |         self.layer_b2x = nn.Sequential(
 87 |                 nn.Conv2d(128, 100, (1, 1), stride = (1, 1), padding = (0, 0)),
 88 |                 nn.SELU(),
 89 |             )
 90 |         self.layer_b4x = nn.Sequential(
 91 |                 nn.Conv2d(256, 100, (1, 1), stride = (1, 1), padding = (0, 0)),
 92 |                 nn.SELU(),
 93 |             )
 94 |         self.layer_b8x = nn.Sequential(
 95 |                 nn.Conv2d(512, 100, (1, 1), stride = (1, 1), padding = (0, 0)),
 96 |                 nn.SELU(),
 97 |             )
 98 |         self.layer_b16x = nn.Sequential(
 99 |                 nn.Conv2d(512, 100, (1, 1), stride = (1, 1), padding = (0, 0)),
100 |                 nn.SELU(),
101 |             )
102 |         # Decoder
103 |         self.layer_2_d16x = nn.Sequential(
104 |                 nn.Conv2d(200, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
105 |                 nn.SELU(),
106 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
107 |                 nn.SELU(),
108 |             )
109 |         self.layer_2_d8x = nn.Sequential(
110 |                 nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
111 |                 nn.SELU(),
112 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
113 |                 nn.SELU()
114 |             )
115 |         self.layer_2_d4x = nn.Sequential(
116 |                 nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
117 |                 nn.SELU(),
118 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
119 |                 nn.SELU()
120 |             )
121 |         self.layer_2_d2x = nn.Sequential(
122 |                 nn.Conv2d(300, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
123 |                 nn.SELU(),
124 |                 nn.Conv2d(100, 100, (3, 3), stride = (1, 1), padding = (1, 1)),
125 |                 nn.SELU()
126 |             )
127 |         self.layer_2_d1x = nn.Sequential(
128 |                 nn.Conv2d(264, 400, (3, 3), stride = (1, 1), padding = (1, 1)),
129 |                 nn.SELU(),
130 |                 nn.Conv2d(400, 800, (3, 3), stride = (1, 1), padding = (1, 1)),
131 |                 nn.SELU()
132 |             )
133 |         # Inter-Volume Aggregation
134 |         self.layer_3 = nn.Sequential(
135 |                 nn.Conv2d(800, 400, (3, 3), stride = (1, 1), padding = (1, 1)),
136 |                 nn.SELU(),
137 |                 nn.Conv2d(400, num_depths, (3, 3), stride = (1, 1), padding = (1, 1))
138 |             )
139 |         self.layer_loss = nn.CrossEntropyLoss(ignore_index=-1)
140 | 
141 |         if use_gpu:
142 |             self.layer_0 = self.layer_0.cuda(gpu_id)
143 |             self.layer_1 = self.layer_1.cuda(gpu_id)
144 |             self.layer_2_e1x = self.layer_2_e1x.cuda(gpu_id)
145 |             self.layer_2_e2x = self.layer_2_e2x.cuda(gpu_id)
146 |             self.layer_2_e4x = self.layer_2_e4x.cuda(gpu_id)
147 |             self.layer_2_e8x = self.layer_2_e8x.cuda(gpu_id)
148 |             self.layer_2_e16x = self.layer_2_e16x.cuda(gpu_id)
149 |             self.layer_b1x = self.layer_b1x.cuda(gpu_id)
150 |             self.layer_b2x = self.layer_b2x.cuda(gpu_id)
151 |             self.layer_b4x = self.layer_b4x.cuda(gpu_id)
152 |             self.layer_b8x = self.layer_b8x.cuda(gpu_id)
153 |             self.layer_b16x = self.layer_b16x.cuda(gpu_id)
154 |             self.layer_2_d16x = self.layer_2_d16x.cuda(gpu_id)
155 |             self.layer_2_d8x = self.layer_2_d8x.cuda(gpu_id)
156 |             self.layer_2_d4x = self.layer_2_d4x.cuda(gpu_id)
157 |             self.layer_2_d2x = self.layer_2_d2x.cuda(gpu_id)
158 |             self.layer_2_d1x = self.layer_2_d1x.cuda(gpu_id)
159 |             self.layer_3 = self.layer_3.cuda(gpu_id)
160 |             self.layer_loss = self.layer_loss.cuda(gpu_id)
161 | 
162 |     # Shape of 'volume_input': batch_size * num_neighbors (or num_sources) * num_depths * 2 * num_channels * height * width
163 |     # 'feature_inputs' is a list of five VGG feature tensors, each of shape: batch_size * num_features * height * width
164 |     def forward(self, volume_input, feature_inputs):
165 |         (aggregated_feature, _) = torch.max(self.forward_feature(volume_input, feature_inputs), 1)
166 |         return self.forward_predict(aggregated_feature)
167 | 
168 |     def forward_feature(self, volume_input, feature_inputs):
169 |         if volume_input.dim() != 7 or volume_input.size(3) != 2:
170 |             raise ValueError("'volume_input' must be a tensor of shape: batch_size * num_neighbors (or num_sources) * num_depths * 2 * num_channels * height * width")
171 |         if len(feature_inputs) != 5:
172 |             raise ValueError("'feature_inputs' is a list of five VGG feature tensors of shape: batch_size * num_features * height * width")
173 |         for feature in feature_inputs:
174 |             if feature.dim() != 4:
175 |                 raise ValueError("'feature_inputs' is a list of five VGG feature tensors of shape: batch_size * num_features * height * width")
176 |         batch_size = volume_input.size(0)
177 |         num_neighbors = volume_input.size(1)
178 |         num_depths = volume_input.size(2)
179 |         num_channels = volume_input.size(4)
180 |         height = volume_input.size(5)
181 |         width = volume_input.size(6)
182 |         layer_0_output = self.layer_0(
183 |             volume_input.view(batch_size * num_neighbors * num_depths * 2, num_channels, height, width))
184 |         layer_1_output = self.layer_1(
185 |             layer_0_output.view(batch_size * num_neighbors * num_depths, 2 * 64, height, width))
186 |         layer_2_e1x_out = self.layer_2_e1x(layer_1_output.view(batch_size * num_neighbors, num_depths * 4, height, width))
187 |         layer_2_e2x_out = self.layer_2_e2x(layer_2_e1x_out)
188 |         layer_2_e4x_out = self.layer_2_e4x(layer_2_e2x_out)
189 |         layer_2_e8x_out = self.layer_2_e8x(layer_2_e4x_out)
190 |         layer_2_e16x_out = self.layer_2_e16x(layer_2_e8x_out)
191 |         layer_b1x_out = self.layer_b1x(feature_inputs[0])
192 |         layer_b2x_out = self.layer_b2x(feature_inputs[1])
193 |         layer_b4x_out = self.layer_b4x(feature_inputs[2])
194 |         layer_b8x_out = self.layer_b8x(feature_inputs[3])
195 |         layer_b16x_out = self.layer_b16x(feature_inputs[4])
196 |         if num_neighbors != 1:
197 |             # We need to copy the features for each neighbor image. When batch_size = 1, use expand() instead of repeat() to save memory.
198 |             if batch_size == 1:
199 |                 layer_b1x_out = layer_b1x_out.expand(batch_size * num_neighbors, -1, -1, -1)
200 |                 layer_b2x_out = layer_b2x_out.expand(batch_size * num_neighbors, -1, -1, -1)
201 |                 layer_b4x_out = layer_b4x_out.expand(batch_size * num_neighbors, -1, -1, -1)
202 |                 layer_b8x_out = layer_b8x_out.expand(batch_size * num_neighbors, -1, -1, -1)
203 |                 layer_b16x_out = layer_b16x_out.expand(batch_size * num_neighbors, -1, -1, -1)
204 |             else:
205 |                 layer_b1x_out = layer_b1x_out.repeat(num_neighbors, 1, 1, 1)
206 |                 layer_b2x_out = layer_b2x_out.repeat(num_neighbors, 1, 1, 1)
207 |                 layer_b4x_out = layer_b4x_out.repeat(num_neighbors, 1, 1, 1)
208 |                 layer_b8x_out = layer_b8x_out.repeat(num_neighbors, 1, 1, 1)
209 |                 layer_b16x_out = layer_b16x_out.repeat(num_neighbors, 1, 1, 1)
210 |         layer_2_d16x_out = self.layer_2_d16x(torch.cat((layer_2_e16x_out, layer_b16x_out), 1))
211 |         layer_2_d8x_out = self.layer_2_d8x(torch.cat((layer_2_e8x_out, F.upsample(layer_2_d16x_out, scale_factor=2, mode='bilinear'), layer_b8x_out), 1))
212 |         layer_2_d4x_out = self.layer_2_d4x(torch.cat((layer_2_e4x_out, F.upsample(layer_2_d8x_out, scale_factor=2, mode='bilinear'), layer_b4x_out), 1))
213 |         layer_2_d2x_out = self.layer_2_d2x(torch.cat((layer_2_e2x_out, F.upsample(layer_2_d4x_out, scale_factor=2, mode='bilinear'), layer_b2x_out), 1))
214 |         layer_2_d1x_out = self.layer_2_d1x(torch.cat((layer_2_e1x_out, F.upsample(layer_2_d2x_out, scale_factor=2, mode='bilinear'), layer_b1x_out), 1))
215 |         return layer_2_d1x_out.view(batch_size, num_neighbors, 800, height, width)
216 | 
217 |     def forward_predict(self, aggregated_feature):
218 |         layer_3_output = self.layer_3(aggregated_feature)
219 |         return layer_3_output
220 | 
221 | def weights_init(m):
222 |     if isinstance(m, nn.Conv2d):
223 |         nn.init.xavier_normal(m.weight.data)
224 |         m.bias.data.fill_(0)


--------------------------------------------------------------------------------
/xtreme-view/dataloader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/dataloader/__init__.py


--------------------------------------------------------------------------------
/xtreme-view/dataloader/colmap_loader.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | import imageio
 11 | import json
 12 | import os
 13 | import sys
 14 | from pyquaternion import Quaternion
 15 | 
 16 | colmap_root = os.getenv('COLMAP_ROOT', '/workspace/colmap')
 17 | sys.path.append(os.path.join(colmap_root, 'scripts', 'python'))
 18 | import read_model
 19 | 
 20 | 
 21 | def read_array(path):
 22 |     with open(path, "rb") as fid:
 23 |         width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1,
 24 |                                                 usecols=(0, 1, 2), dtype=int)
 25 |         fid.seek(0)
 26 |         num_delimiter = 0
 27 |         byte = fid.read(1)
 28 |         while True:
 29 |             if byte == b"&":
 30 |                 num_delimiter += 1
 31 |                 if num_delimiter >= 3:
 32 |                     break
 33 |             byte = fid.read(1)
 34 |         array = np.fromfile(fid, np.float32)
 35 |     array = array.reshape((width, height, channels), order="F")
 36 |     return np.transpose(array, (1, 0, 2)).squeeze()
 37 | 
 38 | class COLMAPData():
 39 | 
 40 |     @staticmethod
 41 |     def read_data_to_list(seq_path):
 42 | 
 43 |         list_img = []
 44 |         list_depth = []
 45 |         list_cam_params = []
 46 | 
 47 |         # read camera params
 48 |         c_cams, c_images, c_points3D = read_model.read_model('%s/dense/0/sparse' % seq_path, '.bin')
 49 | 
 50 |         # read image and depth
 51 |         img_dir = os.path.join(seq_path, 'dense', '0', 'images')
 52 |         img_list = os.listdir(img_dir)
 53 |         img_list.sort()
 54 | 
 55 |         for idx, img_name in enumerate(img_list):
 56 |             filename_img = os.path.join(img_dir, img_name)
 57 |             filename_depth = os.path.join(seq_path, 'dense', '0', 'stereo', 'depth_maps', '%s.geometric.bin' % img_name)
 58 | 
 59 |             # read images
 60 |             img = imageio.imread(filename_img).astype(np.float32) / 255.0
 61 |             list_img.append(img)
 62 | 
 63 |             # read depths
 64 |             depth = read_array(filename_depth)
 65 | 
 66 |             min_depth, max_depth = np.percentile(depth, [5, 90])
 67 |             depth[depth < min_depth] = min_depth
 68 |             depth[depth > max_depth] = max_depth
 69 |             list_depth.append(depth)
 70 | 
 71 |             # fetch the camera params
 72 |             for key in c_images:
 73 |                 image_key = c_images[key]
 74 |                 image_name = image_key.name
 75 |                 if image_name == img_name:
 76 |                     key_to_fetch_for_cam = image_key.camera_id
 77 |                     key_to_fetch_for_image = key
 78 | 
 79 |             params = {}
 80 |             c_cam = c_cams[key_to_fetch_for_cam]
 81 |             params['f_x'] = c_cam.params[0]
 82 |             params['f_y'] = c_cam.params[1]
 83 |             params['c_x'] = c_cam.params[2]
 84 |             params['c_y'] = c_cam.params[3]
 85 | 
 86 |             c_image = c_images[key_to_fetch_for_image]
 87 |             q = Quaternion(c_image.qvec)
 88 |             e = np.zeros(shape=(4, 4))
 89 |             e[0:3, 0:3] = q.rotation_matrix
 90 |             e[0:3, 3] = c_image.tvec
 91 |             e[3, 3] = 1.0
 92 |             int_mat = np.array([[params['f_x'], 0.0, params['c_x']],
 93 |                                 [0.0, params['f_y'], params['c_y']],
 94 |                                 [0.0, 0.0, 1.0]])
 95 |             cam = {}
 96 |             cam['extrinsic'] = e
 97 |             cam['intrinsic'] = int_mat
 98 |             list_cam_params.append(cam)
 99 | 
100 |         return list_img, list_depth, list_cam_params
101 | 


--------------------------------------------------------------------------------
/xtreme-view/run_colmap.sh:
--------------------------------------------------------------------------------
 1 | # The project folder must contain a folder "images" with all the images.
 2 | DATASET_PATH=$1
 3 | 
 4 | colmap feature_extractor \
 5 |    --database_path $DATASET_PATH/db.db \
 6 |    --image_path $DATASET_PATH/images \
 7 |    --ImageReader.single_camera=1
 8 | 
 9 | colmap exhaustive_matcher \
10 |    --database_path $DATASET_PATH/db.db
11 | 
12 | mkdir $DATASET_PATH/sparse
13 | 
14 | colmap mapper \
15 |     --database_path $DATASET_PATH/db.db \
16 |     --image_path $DATASET_PATH/images \
17 |     --output_path $DATASET_PATH/sparse \
18 |     --Mapper.init_min_tri_angle=0.1 \
19 |     --Mapper.tri_min_angle=0.1 \
20 |     --Mapper.filter_min_tri_angle=0.1 \
21 |     --Mapper.init_max_forward_motion=1.0
22 | 
23 | mkdir -p $DATASET_PATH/dense/0
24 | 
25 | colmap image_undistorter \
26 |     --image_path $DATASET_PATH/images \
27 |     --input_path $DATASET_PATH/sparse/0 \
28 |     --output_path $DATASET_PATH/dense/0 \
29 |     --output_type COLMAP \
30 |     --max_image_size 2000
31 | 
32 | colmap patch_match_stereo \
33 |     --workspace_path $DATASET_PATH/dense/0 \
34 |     --workspace_format COLMAP \
35 |     --PatchMatchStereo.geom_consistency true \
36 |     --PatchMatchStereo.min_triangulation_angle=0.1 \
37 |     --PatchMatchStereo.filter_min_triangulation_angle=0.1
38 | 
39 | colmap stereo_fusion \
40 |     --workspace_path $DATASET_PATH/dense/0 \
41 |     --workspace_format COLMAP \
42 |     --input_type geometric \
43 |     --output_path $DATASET_PATH/dense/fused.ply
44 | 


--------------------------------------------------------------------------------
/xtreme-view/run_colmap_all.sh:
--------------------------------------------------------------------------------
1 | ./run_colmap.sh /data/0000
2 | ./run_colmap.sh /data/0005
3 | ./run_colmap.sh /data/0009
4 | ./run_colmap.sh /data/0020
5 | ./run_colmap.sh /data/0027


--------------------------------------------------------------------------------
/xtreme-view/run_xtreme_view.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | from copy import deepcopy
  8 | from vsynthlib import deepmvs_wrapper
  9 | from vsynthlib import core
 10 | from dataloader import colmap_loader
 11 | 
 12 | import os
 13 | import sys
 14 | import argparse
 15 | import numpy as np
 16 | 
 17 | class XtremeViewRunner():
 18 | 
 19 |     virtual_cams = {}
 20 | 
 21 |     # scene 0
 22 |     virtual_cams['0000'] = { 'src_indx' : 0,
 23 |                     'view_offsets' : [np.array([-6, 0, 0, 0]),
 24 |                                     np.array([-4.5,0,  0, 0]),
 25 |                                     np.array([-3,0,  0, 0]),
 26 |                                     np.array([ 3, 0, 0, 0]),
 27 |                                     np.array([ 4.5,0,  0, 0]),
 28 |                                     np.array([ 6, 0, 0, 0])]
 29 |                 }
 30 | 
 31 |     # Figure 10 - top row
 32 |     virtual_cams['0005'] = { 'src_indx': 0,
 33 |                     'view_offsets':  [np.array([6, 0, -0.5, 0]),
 34 |                                       np.array([-6, 0, -0.5, 0])]
 35 |                 }
 36 | 
 37 |     # Figure 10 - second row
 38 |     virtual_cams['0009'] = { 'src_indx': 1,
 39 |                     'view_offsets': [np.array([-2, 0, -0.5, 0]), # absent
 40 |                                     np.array([-5,0, -0.5, 0]), # absent
 41 |                                     np.array([-8, 0, -0.5, 0]), # absent
 42 |                                     np.array([-11, 0, -0.5, 0])]
 43 |                 }
 44 | 
 45 |     # Figure 10 - third row
 46 |     virtual_cams['0020'] = {'src_indx': 1,
 47 |                     'view_offsets' :[
 48 |                     np.array([0, 0, -3.0, 0]),
 49 |                     np.array([0, 0, -4.0, 0]),
 50 |                     np.array([0, 0, -8.0, 0]),
 51 |                     ]
 52 |                 }
 53 | 
 54 |     # Figure 10 - fourth row
 55 |     virtual_cams['0027'] = { 'src_indx': 1,
 56 |                     'view_offsets': [np.array([0.25,0, 0 , 0]),
 57 |                         np.array([0.5,0,  0, 0]),
 58 |                         np.array([1.0, 0, 0, 0]),
 59 |                         np.array([1.5, 0, 0, 0])]
 60 |                 }
 61 | 
 62 |     virtual_cams['default'] = { 'src_indx': 1,
 63 |                         'view_offsets': [np.array([0.25,0, 0 , 0]),
 64 |                         np.array([0.5,0,  0, 0]),
 65 |                         np.array([1.0, 0, 0, 0]),
 66 |                         np.array([1.5, 0, 0, 0])]
 67 |                 }
 68 | 
 69 |     def __init__(self, args):
 70 | 
 71 |         ####################################
 72 |         # Create a DeepMVS wrapper object
 73 |         ####################################
 74 |         filename_DeepMVS = os.path.join(args.models_path, 'DeepMVS_final.model')
 75 |         self.models_path = args.models_path
 76 |         self.refine_model_path = os.path.join(args.models_path, 'Model_VNPCAT_E33.pth')
 77 |         self.deepmvs_obj = deepmvs_wrapper.DeepMVSWrapper(filename_DeepMVS, do_filter=True)
 78 | 
 79 |         self.dense_crf_params = {'default': {'sigma_xy': 45.0, 'sigma_rgb': 30.0, 'iteration_num': 5, 'compat': 10.0}}
 80 | 
 81 | 
 82 |     def run(self, colmap_seq_path, input_views=[]):
 83 | 
 84 |         print('Processing sequence: ', colmap_seq_path)
 85 |         seq_name = os.path.basename(os.path.normpath(colmap_seq_path))
 86 | 
 87 |         outDir = os.path.join(colmap_seq_path, 'xtreme-view')
 88 |         if not os.path.exists(outDir):
 89 |             os.mkdir(outDir)
 90 | 
 91 |         # Adjust the dense crf parameters if needed
 92 |         if seq_name in self.dense_crf_params:
 93 |             self.deepmvs_obj.dict_DenseCRF = self.dense_crf_params[colmap_seq_path]
 94 |         else:
 95 |             self.deepmvs_obj.dict_DenseCRF = self.dense_crf_params['default']
 96 | 
 97 |         list_img, list_depth, list_cam_params \
 98 |             = colmap_loader.COLMAPData.read_data_to_list(colmap_seq_path)
 99 | 
100 |         if len(input_views) > 0:
101 |             list_img = [list_img[i] for i in input_views]
102 |             list_depth = [list_depth[i] for i in input_views]
103 |             list_cam_params = [list_cam_params[i] for i in input_views]
104 | 
105 |         #############################
106 |         # Create our vsynth object
107 |         #############################
108 |         view_synthesizer = core.VSynth(list_img, list_cam_params, outDir,
109 |                                     self.deepmvs_obj, list_depth=list_depth,
110 |                                     mode_colmap=True)
111 | 
112 |         #####################################################
113 |         # Compute the depth probability ( = perform DeepMVS)
114 |         # When the depth probability are stored in the working dir,
115 |         # it will skip without performing DeepMVS
116 |         #####################################################
117 |         view_synthesizer.compute_depth_probability()
118 | 
119 |         #############################################
120 |         # Create the virtual cameras
121 |         #############################################
122 |         view_synthesizer.list_vcams = []
123 | 
124 |         if seq_name in self.virtual_cams:
125 |             view_offsets = self.virtual_cams[seq_name]['view_offsets']
126 |             src_indx = self.virtual_cams[seq_name]['src_indx']
127 |         else:
128 |             view_offsets = self.virtual_cams['default']['view_offsets']
129 |             src_indx = self.virtual_cams['default']['src_indx']
130 | 
131 |         for view_offset in view_offsets:
132 |             new_vcam = deepcopy(view_synthesizer.list_src_cams[src_indx])
133 |             new_vcam['extrinsic'][:,3] = new_vcam['extrinsic'][:,3] + view_offset
134 |             view_synthesizer.list_vcams.append(new_vcam)
135 | 
136 |         list_todo_index=[] # generate all the cameras
137 |         view_synthesizer.do(MHW_SRCV_WEIGHT=False, list_todo_index=list_todo_index)
138 |         view_synthesizer.refine(self.refine_model_path, list_todo_index=list_todo_index)
139 | 
140 | if __name__ == '__main__':
141 |     parser = argparse.ArgumentParser()
142 |     parser.add_argument('seq_path', help='the path to the sequence of images.')
143 |     parser.add_argument('--models_path', help='the path where the pre-trained models have been downloaded to.',  default='/models')
144 |     parser.add_argument('--input_views', help='comma-separated list of the indices in the sequence to use as inputs')
145 |     args = parser.parse_args()
146 |     print(args)
147 | 
148 |     runner = XtremeViewRunner(args)
149 |     input_views = []
150 |     if args.input_views is not None:
151 |         input_views = [int(i) for i in args.input_views.split(',')]
152 |     runner.run(args.seq_path, input_views)


--------------------------------------------------------------------------------
/xtreme-view/run_xtreme_view_all.sh:
--------------------------------------------------------------------------------
1 | python run_xtreme_view.py /data/0005 --input_views=4,6
2 | python run_xtreme_view.py /data/0009 --input_views=4,6
3 | python run_xtreme_view.py /data/0020 --input_views=9,11
4 | python run_xtreme_view.py /data/0027 --input_views=8,9,11,12
5 | python run_xtreme_view.py /data/0000 --input_views=5,6,8,9


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/vsynthlib/__init__.py


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | import os
  8 | import numpy as np
  9 | import cv2
 10 | from scipy.signal import argrelextrema
 11 | import imageio
 12 | from shutil import copyfile
 13 | import torch
 14 | import sys
 15 | 
 16 | from vsynthlib import deepmvs_wrapper
 17 | from vsynthlib import depth_util
 18 | from vsynthlib import refinement
 19 | 
 20 | class VSynth(object):
 21 | 
 22 |     DT_THRESHOLD = 0.075
 23 |     # DT_THRESHOLD = 0.010
 24 |     VISIBILITY_TEST_THRESHOLD = 0.10
 25 |     MHW_THRESHOLD = 0.05
 26 |     DEPTH_AT_INFINITY = 9999999999.0
 27 | 
 28 |     def __init__(self, list_src_img, list_src_cams, out_dir, deepmvs_obj,
 29 |                  list_cam_params_for_vpath=[],
 30 |                  n_virtual_cams=10,
 31 |                  list_src_names=[], list_depth=[], n_depths=100,
 32 |                  vcam_mode='extrap_naive',
 33 |                  write_out=True,
 34 |                  mode_colmap=False):
 35 | 
 36 |         self.list_src_img = list_src_img
 37 |         self.list_src_cams = list_src_cams
 38 |         self.list_depth = list_depth
 39 |         self.out_dir = out_dir
 40 | 
 41 |         self.depth_estimator = deepmvs_obj
 42 | 
 43 |         if len(list_cam_params_for_vpath) == 0:
 44 |             self.list_src_cams_for_vpath = list_src_cams
 45 |         else:
 46 |             self.list_src_cams_for_vpath = list_cam_params_for_vpath
 47 | 
 48 |         if len(list_src_names) == 0:
 49 |             self.list_src_names = []
 50 |             for i in range(len(list_src_img)):
 51 |                 self.list_src_names.append('%04d'%i)
 52 |         else:
 53 |             self.list_src_names = list_src_names
 54 | 
 55 |         self.params = dict()
 56 |         self.params['n_virtual_cams'] = n_virtual_cams
 57 |         self.params['n_depths'] = n_depths
 58 |         height, width, _ = list_src_img[0].shape
 59 |         self.params['height'] = height
 60 |         self.params['width'] = width
 61 |         self.params['vcam_mode'] = vcam_mode
 62 |         self.params['write_out'] = write_out
 63 |         self.params['mode_colmap'] = mode_colmap
 64 | 
 65 |         if self.params['write_out']:
 66 |             self.set_out_dirs()
 67 |             self.save_inputs()
 68 | 
 69 | 
 70 |     def set_out_dirs(self):
 71 |         if not os.path.exists(self.out_dir):
 72 |             os.mkdir(self.out_dir)
 73 | 
 74 |         self.out_dir_dp = self.out_dir + '/dp'
 75 |         self.out_dir_input = self.out_dir + '/input'
 76 |         self.out_dir_output = self.out_dir + '/output'
 77 |         self.out_dir_synth_obj = self.out_dir + '/synth_obj'
 78 |         self.out_dir_vcams = self.out_dir + '/vcam'
 79 |         self.out_dir_refinement = self.out_dir + '/refinement'
 80 |         self.out_dir_2nd_synth_obj = self.out_dir + '/acc_synth_obj'
 81 |         self.out_dir_2nd_output = self.out_dir + '/acc_output'
 82 |         self.out_dir_2nd_refinement = self.out_dir + '/acc_refinement'
 83 |         self.out_dir_back_to_front_synth = self.out_dir + '/back_to_front'
 84 | 
 85 |         if not os.path.exists(self.out_dir_back_to_front_synth):
 86 |             os.mkdir(self.out_dir_back_to_front_synth)
 87 | 
 88 | 
 89 |     def save_inputs(self):
 90 |         if not os.path.exists(self.out_dir_input):
 91 |             os.mkdir(self.out_dir_input)
 92 | 
 93 |         for i in range(len(self.list_src_img)):
 94 |             img_i = self.list_src_img[i]
 95 |             cam_i = self.list_src_cams[i]
 96 | 
 97 |             # save dp_i to a file
 98 |             f_img_i = '%s/img_%s.png'%(self.out_dir_input, self.list_src_names[i])
 99 |             f_cam_i = '%s/cam_%s.npy'%(self.out_dir_input, self.list_src_names[i])
100 | 
101 |             np.save(f_cam_i, cam_i)
102 |             imageio.imwrite(f_img_i, img_i)
103 | 
104 |     def save_cam_params(self, vcam_path):
105 |         if not os.path.exists(self.out_dir_vcams):
106 |             os.mkdir(self.out_dir_vcams)
107 | 
108 |         for cam_idx, vcam in enumerate(vcam_path):
109 |             f_vcam = '%s/%04d.npy' % (self.out_dir_vcams, cam_idx)
110 |             np.save(f_vcam, vcam)
111 | 
112 | 
113 | 
114 | 
115 |     def compute_depth_probability(self, load_if_exists=True,
116 |                                   LF_dataset_obj=None, hint=''):
117 |         if self.params['write_out']:
118 |             if not os.path.exists(self.out_dir_dp):
119 |                 os.mkdir(self.out_dir_dp)
120 | 
121 |         # compute the depth range
122 |         self.compute_depth_range(hint=hint)
123 | 
124 | 
125 |         if self.params['write_out'] and load_if_exists:
126 |             # check if there are precomputed depth probabilities
127 |             list_loaded_dp = []
128 |             all_loaded = True
129 |             for i in range(len(self.list_src_img)):
130 |                 filename = '%s/dp_%s.npy' % (self.out_dir_dp, self.list_src_names[i])
131 |                 if os.path.exists(filename):
132 |                     dp_i = np.load(filename)
133 |                     print(filename)
134 |                     list_loaded_dp.append(dp_i)
135 | 
136 |                     if i == 0:
137 |                         _, dmap_color_i, _, color_depth_max = depth_util.generate_depthmap(dp_i,
138 |                                                                                            self.params['min_disp'],
139 |                                                                                            self.params['disp_step'],
140 |                                                                                            self.DEPTH_AT_INFINITY)
141 |                         self.color_depth_max = color_depth_max
142 | 
143 |                 else:
144 |                     all_loaded = False
145 | 
146 |             # if exists, load and return
147 |             if all_loaded:
148 |                 print('The depth probabilities are loaded!')
149 |                 self.list_depth_prob = list_loaded_dp
150 |                 return
151 | 
152 |         self.list_depth_prob = []
153 | 
154 |         for i in range(len(self.list_src_img)):
155 | 
156 | 
157 |             import time
158 |             start_time = time.time()
159 | 
160 |             dp_i = self.depth_estimator.compute(self.list_src_img, self.list_src_cams, i,
161 |                                            self.params['min_disp'],
162 |                                            self.params['disp_step'],
163 |                                            self.DEPTH_AT_INFINITY)
164 | 
165 |             elapsed_time = time.time() - start_time
166 |             print('DeepMVS: ' + time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
167 | 
168 |             self.list_depth_prob.append(dp_i)
169 | 
170 |             # save dp_i to a file
171 |             if self.params['write_out']:
172 |                 filename = '%s/dp_%s.npy' % (self.out_dir_dp, self.list_src_names[i])
173 |                 print(filename)
174 |                 np.save(filename, dp_i)
175 | 
176 |                 # gen depth map
177 |                 if i == 0:
178 |                     _, dmap_color_i, _, color_depth_max = depth_util.generate_depthmap(dp_i,
179 |                                                                         self.params['min_disp'],
180 |                                                                         self.params['disp_step'],
181 |                                                                         self.DEPTH_AT_INFINITY)
182 |                     self.color_depth_max = color_depth_max
183 |                 else:
184 |                     _, dmap_color_i, _ = depth_util.generate_depthmap(dp_i,
185 |                                                                         self.params['min_disp'],
186 |                                                                         self.params['disp_step'],
187 |                                                                         self.DEPTH_AT_INFINITY,
188 |                                                                         color_max_val=color_depth_max)
189 |                 filename = '%s/dmap_%s.png' % (self.out_dir_dp, self.list_src_names[i])
190 |                 imageio.imwrite(filename, dmap_color_i)
191 | 
192 | 
193 |     def compute_depth_range(self, hint=''):
194 |         max_depth = 0.0
195 |         min_depth = 9999999.0
196 | 
197 | 
198 |         list_data = np.array([])
199 |         for img_depth in self.list_depth:
200 |             valid_mask = np.logical_not(np.isinf(img_depth))
201 |             valid_mask = np.logical_and(valid_mask, img_depth != 0.0)
202 |             list_data = np.append(list_data, img_depth[valid_mask])
203 | 
204 |         hist, bins = np.histogram(list_data, bins=100)
205 |         n_data = len(list_data)
206 |         threshold_max = n_data*0.98
207 |         threshold_min = n_data*0.02
208 |         sum_hist = 0
209 |         min_depth = np.min(list_data)
210 |         max_depth = np.max(list_data)
211 |         print('min: %f / max: %f (before histogram)'%(min_depth, max_depth))
212 | 
213 |         min_found = False
214 |         for bin_idx, hist_val in enumerate(hist):
215 |             sum_hist += hist_val
216 |             if not min_found  and sum_hist > threshold_min:
217 |                 if bin_idx >= 1:
218 |                     min_depth = bins[bin_idx - 1]
219 |                 else:
220 |                     min_depth = bins[bin_idx]
221 |                 min_found = True
222 | 
223 |             if sum_hist > threshold_max:
224 |                 max_depth = bins[bin_idx + 1]
225 |                 break
226 | 
227 |         # museum (2.5, 15.0)
228 |         # our_0046 (30, 800)
229 |         # min_depth = 2.5
230 |         # max_depth = 15.0
231 | 
232 |         if hint == 'museum1':
233 |             min_depth = 2.5
234 |             max_depth = 15.0
235 | 
236 |         print('min: %f / max: %f (after histogram)' % (min_depth, max_depth))
237 | 
238 | 
239 |         print('max depth: %f' % (max_depth))
240 |         print('min depth: %f' % (min_depth))
241 |         max_disp = 1.0 / (min_depth + 1e-06)
242 |         min_disp = 1.0 / (max_depth + 1e-06)
243 |         disp_step = (max_disp - min_disp) / (self.params['n_depths'] - 1)
244 |         print('disp step: ' + str(disp_step))
245 | 
246 |         # save to params
247 |         self.params['max_depth'] = max_depth
248 |         self.params['min_depth'] = min_depth
249 |         self.params['max_disp'] = max_disp
250 |         self.params['min_disp'] = min_disp
251 |         self.params['disp_step'] = disp_step
252 | 
253 |         return max_depth, min_depth, max_disp, min_disp, disp_step
254 | 
255 | 
256 |     def do(self, list_idx=None, MHW=False, save_dp=False, MHW_SRCV_WEIGHT=False, list_todo_index=[],
257 |            winner_takes_all=False):
258 | 
259 |         if self.params['write_out']:
260 |             # create directories
261 |             if not os.path.exists(self.out_dir_output):
262 |                 os.mkdir(self.out_dir_output)
263 | 
264 |             if not os.path.exists(self.out_dir_synth_obj):
265 |                 os.mkdir(self.out_dir_synth_obj)
266 | 
267 | 
268 |         for cam_idx, cam in enumerate(self.list_vcams):
269 | 
270 |             if list_todo_index != [] and  not (cam_idx in list_todo_index):
271 |                 continue
272 |             synth_obj = self.do_single_image(cam, cam_idx,
273 |                                                  winner_takes_all)
274 | 
275 |             if self.params['write_out']:
276 |                 if list_idx is None:
277 |                     id = cam_idx
278 |                 else:
279 |                     id = list_idx[cam_idx]
280 | 
281 |                 # save
282 |                 f_synth_obj = '%s/%04d.npz'%(self.out_dir_synth_obj, id)
283 |                 np.savez_compressed(f_synth_obj, synth_obj)
284 |                 f_img_synth = '%s/vsynth_%04d.png'%(self.out_dir_output, id)
285 |                 imageio.imwrite(f_img_synth, synth_obj['img_synth'])
286 |                 f_depth_synth = '%s/dmap_%04d.png' % (self.out_dir_output, id)
287 | 
288 |                 if not MHW:
289 |                     imageio.imwrite(f_depth_synth,
290 |                                     depth_util.apply_colormap_to_depth(synth_obj['depth_map'],
291 |                                                                        self.DEPTH_AT_INFINITY,
292 |                                                                        max_depth=self.color_depth_max))
293 |                 if save_dp:
294 |                     f_dp_synth = '%s/dp_%04d.npy'%(self.out_dir_output, id)
295 |                     np.save(f_dp_synth, synth_obj['dp'])
296 | 
297 | 
298 |     def do_single_image(self, dest_cam, idx=0,
299 |                         winner_takes_all=False,
300 |                         save_dp=False):
301 |         # transform the depth probability
302 | 
303 |         import time
304 |         start = time.time()
305 | 
306 | 
307 |         dp_dest, list_warped_prob\
308 |             = transform_cost_volume_cuda(self.list_src_cams, self.list_depth_prob,
309 |                                         dest_cam,
310 |                                         self.params['n_depths'],
311 |                                         self.params['height'],
312 |                                         self.params['width'],
313 |                                         self.params['min_disp'],
314 |                                         self.params['disp_step'],
315 |                                         self.params['max_depth'],
316 |                                         self.params['min_depth'])
317 | 
318 |         end = time.time()
319 |         print("Transform_cost_volume() took " + str(end - start))
320 | 
321 | 
322 |         # generate PSV
323 |         start = time.time()
324 |         PSV_dest = build_PSV(self.list_src_img, self.list_src_cams,
325 |                              dest_cam,
326 |                              self.params['n_depths'],
327 |                              self.params['height'],
328 |                              self.params['width'],
329 |                              self.params['min_disp'],
330 |                              self.params['disp_step'],
331 |                              self.params['max_depth'],
332 |                              USE_DICT=True)
333 |         end = time.time()
334 |         print("build_PSV() took " + str(end - start))
335 | 
336 |         # perform view synthesis
337 |         start = time.time()
338 |         img_synth, list_new_vies, visibility_map,\
339 |         depth_map_P1, depth_map_color_P1,\
340 |         depth_map_P2, depth_map_color_P2\
341 |             = synthesize_a_view(dest_cam, PSV_dest, dp_dest,
342 |                                 self.list_src_img, self.list_src_cams,
343 |                                 self.list_depth_prob,
344 |                                 self.params['min_disp'],
345 |                                 self.params['disp_step'],
346 |                                 self.DEPTH_AT_INFINITY,
347 |                                 self.params['height'],
348 |                                 self.params['width'],
349 |                                 self.params['n_depths'],
350 |                                 with_ULR_weight=True,
351 |                                 color_max_depth=self.color_depth_max,
352 |                                 winner_takes_all=winner_takes_all)
353 | 
354 | 
355 | 
356 |         end = time.time()
357 |         print("synthesize_a_view() took " + str(end - start))
358 | 
359 | 
360 |         # save
361 |         synth_obj = {'img_synth': img_synth,
362 |                      'visibility_map': visibility_map,
363 |                      'depth_map': depth_map_P1,
364 |                      'depth_map_P1': depth_map_P1,
365 |                      'depth_map_P2': depth_map_P2,
366 |                      'view_idx': idx,
367 |                      'dest_cam': dest_cam}
368 | 
369 |         if save_dp:
370 |             synth_obj['dp'] = dp_dest
371 | 
372 |         return synth_obj
373 | 
374 |     def refine(self, filename_weight, do_stereo=False,
375 |                patch_size=64, list_todo_index=[],
376 |                custom_outdir=''):
377 |         refiner = refinement.DeepViewRefiner(filename_weight,
378 |                                             self.out_dir,
379 |                                             self.out_dir_refinement,
380 |                                              patch_size=patch_size)
381 | 
382 |         for cam_idx, cam in enumerate(self.list_vcams):
383 |             if list_todo_index != [] and  not (cam_idx in list_todo_index):
384 |                 continue
385 |             f_synth_obj = '%s/%04d.npz'%(self.out_dir_synth_obj, cam_idx)
386 |             synth_obj = np.load(f_synth_obj, allow_pickle=True)
387 |             synth_obj = synth_obj['arr_0'].item()
388 | 
389 |             refiner.do(synth_obj, self.list_src_img, self.list_src_cams, cam_idx,
390 |                        do_stereo=do_stereo, custom_outdir=custom_outdir)
391 | 
392 | 
393 | 
394 | def build_PSV(list_src_img, list_src_cams,
395 |               cam_dest, num_depths, height, width,
396 |               min_disp, disp_step, max_depth, USE_DICT=False):
397 | 
398 |     n_neighbors = len(list_src_img)
399 | 
400 |     if USE_DICT:
401 |         PSV = {}
402 |     else:
403 |         PSV = np.zeros(shape=[n_neighbors, num_depths, height, width, 3], dtype=np.float32)
404 | 
405 |     int_dest = cam_dest['intrinsic']
406 |     fx_dest = int_dest[0, 0]
407 |     fy_dest = int_dest[1, 1]
408 |     cx_dest = int_dest[0, 2]
409 |     cy_dest = int_dest[1, 2]
410 |     ext_dest = cam_dest['extrinsic']
411 |     inv_ext_dest = np.linalg.inv(ext_dest)
412 | 
413 |     # for each neighbor image
414 |     counter_img = 0
415 |     for i in range(len(list_src_img)):
416 |         img_i = list_src_img[i]
417 |         cam_i = list_src_cams[i]
418 |         # get the parameters
419 |         int_i = cam_i['intrinsic']
420 |         fx_i = int_i[0, 0]
421 |         fy_i = int_i[1, 1]
422 |         cx_i = int_i[0, 2]
423 |         cy_i = int_i[1, 2]
424 |         ext_i = cam_i['extrinsic']
425 | 
426 |         # 4 Corners on the virtual camera to get te 4 rays that intersect with the depth plane
427 |         src_pts = np.reshape([0, 0,
428 |                               width, 0,
429 |                               width, height,
430 |                               0, height], (4, 2))
431 | 
432 |         if USE_DICT:
433 |             PSV_i = np.zeros(shape=[num_depths, height, width, 3], dtype=np.float32)
434 | 
435 |         # for each depth plane
436 |         for d in range(num_depths):
437 | 
438 |             disp = d * disp_step + min_disp
439 |             if d == 0:
440 |                 depth = max_depth
441 |             else:
442 |                 depth = 1.0 / disp
443 | 
444 |             # print(depth)
445 | 
446 |             # compute dst points
447 |             dst_pts = np.zeros((4, 2))
448 |             counter_pt = 0
449 |             for p in src_pts:
450 |                 p_3D_ref = np.asarray([(depth * p[0] - depth * cx_dest) / fx_dest,
451 |                                        (depth * p[1] - depth * cy_dest) / fy_dest,
452 |                                        depth])
453 |                 p_4D_ref = np.array([p_3D_ref[0], p_3D_ref[1], p_3D_ref[2], 1.0])
454 |                 p_4D_world = inv_ext_dest.dot(p_4D_ref)
455 |                 p_4D_i = ext_i.dot(p_4D_world)
456 |                 dst = np.asarray([cx_i + fx_i * p_4D_i[0] / p_4D_i[2], cy_i + fy_i * p_4D_i[1] / p_4D_i[2]])
457 |                 dst_pts[counter_pt, :] = dst.squeeze()
458 |                 counter_pt += 1
459 | 
460 |             # compute homography
461 |             M, mask = cv2.findHomography(dst_pts, src_pts)
462 |             # warp the image
463 |             result = cv2.warpPerspective(img_i, M, (width, height),
464 |                                          flags=cv2.INTER_LINEAR,
465 |                                          borderMode=cv2.BORDER_REPLICATE)
466 |             # cv2.imshow("img_ref", img_ref)
467 |             # cv2.imshow("PSV of img %02d" % (i), result)
468 |             # cv2.waitKey()
469 |             if USE_DICT:
470 |                 PSV_i[d, :, :, :] = result
471 |             else:
472 |                 PSV[counter_img, d, :, :, :] = result
473 | 
474 |         if USE_DICT:
475 |             PSV[i] = PSV_i
476 |         counter_img += 1
477 | 
478 |     return PSV
479 | 
480 | 
481 | 
482 | def transform_cost_volume_cuda(list_src_cams, list_src_DPs,
483 |                               dest_cam,
484 |                               num_depths, height, width,
485 |                               min_disp, disp_step, max_depth, min_depth,
486 |                               do_normalization=True):
487 | 
488 | 
489 |     int_dest = dest_cam['intrinsic']
490 |     ext_dest = dest_cam['extrinsic']
491 |     inv_int_dest = np.linalg.inv(int_dest)
492 |     torch_inv_int_dest = torch.from_numpy(inv_int_dest)
493 |     torch_inv_int_dest = torch_inv_int_dest.cuda()
494 |     inv_ext_dest = np.linalg.inv(ext_dest)
495 |     torch_inv_ext_dest = torch.from_numpy(inv_ext_dest)
496 |     torch_inv_ext_dest = torch_inv_ext_dest.cuda()
497 | 
498 |     list_warped_prob = []
499 |     sum_warped_prob = np.zeros(shape=(num_depths, height, width))
500 |     view_counter = np.zeros(shape=(num_depths, height, width))
501 | 
502 |     # define the voxel grid
503 |     Z, Y, X = np.meshgrid(np.arange(0, num_depths), np.arange(0, height), np.arange(0, width), indexing='ij')
504 |     Z = Z * disp_step
505 |     zero_mask = (Z == 0)
506 |     Z[Z != 0] += min_disp
507 |     Z[Z != 0] = 1.0 / Z[Z != 0]
508 |     # Z[Z != 0] += min_disp
509 |     Z[zero_mask] = max_depth
510 |     X = X * Z
511 |     Y = Y * Z
512 |     points = np.array([X.reshape(-1), Y.reshape(-1), Z.reshape(-1)])
513 |     points = np.transpose(points)
514 | 
515 |     torch_points = torch.from_numpy(points)
516 |     torch_points = torch_points.cuda()
517 | 
518 | 
519 |     for src_idx, cam_param_src in enumerate(list_src_cams):
520 |         import time
521 |         start = time.time()
522 |         start_multiply = time.time()
523 |         print('Transforming the cost volume of %02d' % (src_idx))
524 |         src_prob = list_src_DPs[src_idx]
525 | 
526 |         # get the parameters
527 |         ext_i = cam_param_src['extrinsic']
528 |         torch_ext_i = torch.from_numpy(ext_i)
529 |         torch_ext_i = torch_ext_i.cuda()
530 |         int_i = cam_param_src['intrinsic']
531 |         torch_int_i = torch.from_numpy(int_i)
532 |         torch_int_i = torch_int_i.cuda()
533 | 
534 |         warped_prob = np.zeros(shape=(num_depths, height, width))
535 | 
536 |         transformed_points = torch.matmul(torch_points, torch_inv_int_dest.t())
537 |         transformed_points = torch.matmul(transformed_points, torch_inv_ext_dest[0:3, 0:3].t())
538 |         transformed_points = torch.add(transformed_points, torch_inv_ext_dest[0:3, 3])
539 |         transformed_points = torch.matmul(transformed_points, torch_ext_i[0:3, 0:3].t())
540 |         transformed_points = torch.add(transformed_points, torch_ext_i[0:3, 3])
541 |         # transformed_points = transformed_points[:, 0:3]
542 |         transformed_points = torch.matmul(transformed_points, torch_int_i.t())
543 |         X_src = transformed_points[:, 0] / transformed_points[:, 2]
544 |         Y_src = transformed_points[:, 1] / transformed_points[:, 2]
545 |         Z_src = transformed_points[:, 2]
546 | 
547 |         X_src = X_src.cpu().numpy()
548 |         Y_src = Y_src.cpu().numpy()
549 |         Z_src = Z_src.cpu().numpy()
550 | 
551 | 
552 |         end_multipy = time.time()
553 |         print('\t- Multiplication Iteration Took: ' + str(end_multipy - start_multiply))
554 | 
555 |         start_round = time.time()
556 | 
557 |         X_src = X_src.reshape((num_depths, height, width))
558 |         Y_src = Y_src.reshape((num_depths, height, width))
559 |         Z_src = Z_src.reshape((num_depths, height, width))
560 |         disp_src = 1.0 / Z_src - min_disp
561 | 
562 |         round_Y_src = np.round(Y_src).astype(np.int)
563 |         round_X_src = np.round(X_src).astype(np.int)
564 |         round_Z_src = np.round(disp_src / disp_step).astype(np.int)
565 |         round_Z_src[Z_src >= max_depth] = 0
566 |         round_Z_src[Z_src <= min_depth] = num_depths - 1
567 | 
568 |         valid_index = np.bitwise_and(round_Y_src >= 0, round_Y_src < height)
569 |         valid_index = np.bitwise_and(valid_index, round_X_src >= 0)
570 |         valid_index = np.bitwise_and(valid_index, round_X_src < width)
571 |         valid_index = np.bitwise_and(valid_index, round_Z_src >= 0)
572 |         valid_index = np.bitwise_and(valid_index, round_Z_src < num_depths)
573 | 
574 |         end_round = time.time()
575 |         print('\t- Round Iteration Took: ' + str(end_round - start_round))
576 | 
577 |         start_warp = time.time()
578 |         warped_prob[valid_index] = src_prob[round_Z_src[valid_index],
579 |                                             round_Y_src[valid_index],
580 |                                             round_X_src[valid_index]]
581 |         end_warp = time.time()
582 | 
583 |         print('\t- Warp Iteration Took: ' + str(end_warp - start_warp))
584 | 
585 |         view_counter[valid_index] += 1.0
586 |         list_warped_prob.append(warped_prob)
587 |         sum_warped_prob += warped_prob
588 | 
589 |         end = time.time()
590 |         print('\t\t- One Iteration Took: ' + str(end - start))
591 | 
592 |         # save to a file
593 |         # np.save('%s/warped_prob_from_%02d.npy' % (out_dir, prob_src_id), warped_prob)
594 | 
595 |         # # save the depth probability
596 |         # for i in range(num_depths):
597 |         #     prob = warped_prob[i]
598 |         #     prob_color = cv2.applyColorMap((prob * 255).astype(np.uint8), cv2.COLORMAP_JET)
599 |         #     cv2.imshow("prob", prob_color)
600 |         #     cv2.waitKey()
601 | 
602 |     if do_normalization:
603 |         sum_warped_prob = np.multiply(sum_warped_prob, 1.0/(view_counter + 1e-10))
604 |         dp_sq_sum = np.sqrt(np.sum(np.multiply(sum_warped_prob, sum_warped_prob), axis=0)) + 1e-10
605 |         sum_warped_prob = sum_warped_prob / dp_sq_sum
606 | 
607 |         # ## remove some inconfident ray
608 |         # confident_ray = np.sum(sum_warped_prob, axis=0) >= 0.75
609 |         # confident_ray = np.expand_dims(confident_ray, axis=0)
610 |         # confident_ray = np.tile(confident_ray, [num_depths, 1, 1])
611 |         # # valid_index = np.bitwise_and(valid_index, confident_ray)
612 |         # warped_prob[np.logical_not(confident_ray)] = 0.0
613 |         # ##
614 | 
615 | 
616 | 
617 |     return sum_warped_prob, list_warped_prob
618 | 
619 | 
620 | def synthesize_a_view(cam_dest, PSV, depth_prob,
621 |                      list_img, list_cam_params, list_depth_prob,
622 |                      min_disp, disp_step, depth_at_infinity,
623 |                      height, width, num_depths,
624 |                      list_validity_maps=[], with_ULR_weight=False,
625 |                      color_max_depth=None,
626 |                      winner_takes_all=False):
627 |     my_comparator = depth_util.my_comparator_greater
628 | 
629 |     int_dest = cam_dest['intrinsic']
630 |     inv_int_dest = np.linalg.inv(int_dest)
631 |     ext_dest = cam_dest['extrinsic']
632 |     inv_ext_dest = np.linalg.inv(cam_dest['extrinsic'])
633 |     campos_dest = inv_ext_dest[0:3, 3]
634 |     camdir_dest = ext_dest[2, 0:3]
635 | 
636 |     # check zero_prob_idx
637 |     # sum_prob = np.sqrt(np.sum(depth_prob*depth_prob, axis=0))
638 |     # nonzero_prob_idx = sum_prob > 0.35
639 |     # nonzero_prob_idx = np.expand_dims(nonzero_prob_idx, -1)
640 |     # nonzero_prob_idx = np.tile(nonzero_prob_idx, [1, 1, 3])
641 |     abs_max = np.max(depth_prob, axis=0)
642 |     valid_prob_1d = abs_max >= 0.10
643 |     valid_prob = np.expand_dims(valid_prob_1d, -1)
644 |     valid_prob = np.tile(valid_prob, [1, 1, 3])
645 |     if with_ULR_weight:
646 |         ULR_weight_sum = 0.0
647 | 
648 |     avg_new_view = np.zeros((height, width, 3), dtype=np.float)
649 |     normalizer = np.zeros((height, width, 3), dtype=np.float)
650 |     visibility_map = np.zeros((height, width), dtype=np.float)
651 | 
652 |     # for each PSV_k, perfrom view synthesis
653 |     dict_new_views = dict()
654 |     list_new_views = []
655 | 
656 |     # compute the weight
657 |     import time
658 |     start_time = time.time()
659 |     # weight_volume = np.zeros((num_depths, height, width, 3), dtype=np.float)
660 |     # for j in range(height):
661 |     #     for i in range(width):
662 |     #         data = depth_prob[:, j, i]
663 |     #         idx_local_max = argrelextrema(data, my_comparator, order=5)
664 |     #         max_index = np.argmax(data)
665 |     #         global_max_value = data[max_index]
666 |     #
667 |     #         if len(idx_local_max) == 0:
668 |     #             depth_idx = max_index
669 |     #         else:
670 |     #             idx_local_max = idx_local_max[0]
671 |     #             if len(idx_local_max) == 0 or len(idx_local_max) == 1:
672 |     #                 depth_idx = max_index
673 |     #             else:
674 |     #                 for idx in reversed(idx_local_max):
675 |     #                     local_max_value = data[idx]
676 |     #                     if local_max_value >= global_max_value * VSynth.DT_THRESHOLD:
677 |     #                         depth_idx = idx
678 |     #                         break
679 |     #
680 |     #         weight_volume[depth_idx, j, i, :] = 1.0
681 | 
682 |     weight_volume = np.zeros((num_depths, height, width, 3), dtype=np.float)
683 |     weight_volume_2 = np.zeros((num_depths, height, width, 3), dtype=np.float)
684 |     obj_local_max = argrelextrema(depth_prob, my_comparator, order=3, mode='wrap')
685 |     obj_global_max = np.argmax(depth_prob, axis=0)
686 | 
687 |     first_closest_peak = np.ones(shape=(height, width))*(-1)
688 |     second_closest_peak = np.ones(shape=(height, width))*(-1)
689 | 
690 |     for i in reversed(range(len(obj_local_max[0]))):
691 |         idx_d = obj_local_max[0][i]
692 |         idx_y = obj_local_max[1][i]
693 |         idx_x = obj_local_max[2][i]
694 | 
695 |         local_max_value = depth_prob[idx_d, idx_y, idx_x]
696 |         global_max_value = depth_prob[obj_global_max[idx_y, idx_x], idx_y, idx_x]
697 | 
698 |         if local_max_value >= global_max_value * VSynth.DT_THRESHOLD:
699 |             fp_exists = first_closest_peak[idx_y, idx_x] != -1
700 |             sp_exists = second_closest_peak[idx_y, idx_x] != -1
701 |             if sp_exists:
702 |                 continue
703 |             if not sp_exists and not fp_exists:
704 |                 first_closest_peak[idx_y, idx_x] = idx_d
705 |                 weight_volume[idx_d, idx_y, idx_x] = 1.0
706 |             elif fp_exists and not sp_exists:
707 |                 second_closest_peak[idx_y, idx_x] = idx_d
708 |                 weight_volume_2[idx_d, idx_y, idx_x] = 1.0
709 | 
710 |     fp_empty = first_closest_peak == -1
711 |     sp_empty = second_closest_peak == -1
712 |     j_grid, i_grid = np.meshgrid(range(0, height), range(0, width), indexing='ij')
713 |     weight_volume[obj_global_max[fp_empty], j_grid[fp_empty], i_grid[fp_empty]] = 1.0
714 |     weight_volume_2[obj_global_max[sp_empty], j_grid[sp_empty], i_grid[sp_empty]] = 1.0
715 | 
716 |     elapsed_time = time.time() - start_time
717 |     print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
718 | 
719 |     # compute the depth map
720 |     ref_depth_map, ref_depth_map_colored, zero_disp \
721 |         = depth_util.generate_depthmap(weight_volume[:,:,:,0],
722 |                                        min_disp, disp_step, depth_at_infinity, color_max_val=color_max_depth)
723 |     ref_depth_map_2, ref_depth_map_colored_2, _\
724 |         = depth_util.generate_depthmap(weight_volume_2[:,:,:,0],
725 |                                        min_disp, disp_step, depth_at_infinity, color_max_val=color_max_depth)
726 | 
727 |     # filter the depthmap
728 |     invalid_prob_1d = np.logical_not(valid_prob_1d)
729 |     invalid_prob = np.logical_not(valid_prob)
730 |     ref_depth_map[invalid_prob_1d] = 0.0
731 |     ref_depth_map_colored[invalid_prob] = 0.0
732 |     ref_depth_map_2[invalid_prob_1d] = 0.0
733 |     ref_depth_map_colored_2[invalid_prob] = 0.0
734 | 
735 | 
736 |     # imageio.imwrite('./ref_depth_map_colored.png', ref_depth_map_colored)
737 |     # imageio.imwrite('./ref_depth_map_colored_2.png', ref_depth_map_colored_2)
738 | 
739 |     # compute weights
740 |     if with_ULR_weight:
741 | 
742 |         # compute max distance and min distance
743 |         max_dist = -9999999.0
744 |         min_dist = 9999999.0
745 |         min_idx = -1
746 |         for src_idx in range(len(list_img)):
747 |             # get the parameters ready
748 |             cam_param_src = list_cam_params[src_idx]
749 |             ext_i = cam_param_src['extrinsic']
750 | 
751 |             # positional weight
752 |             inv_ext_i = np.linalg.inv(ext_i)
753 |             campos_i = inv_ext_i[0:3, 3]
754 |             campos_diff = campos_i - campos_dest
755 |             campos_dist = np.sqrt(np.sum(campos_diff * campos_diff))
756 |             if campos_dist > max_dist:
757 |                 max_dist = campos_dist
758 |             if campos_dist < min_dist:
759 |                 min_dist = campos_dist
760 |                 min_idx = src_idx
761 | 
762 |         # compute weights
763 |         list_ULR_weights = []
764 |         for src_idx in range(len(list_img)):
765 |             # get the parameters ready
766 |             cam_param_src = list_cam_params[src_idx]
767 |             ext_i = cam_param_src['extrinsic']
768 | 
769 |             # positional weight
770 |             inv_ext_i = np.linalg.inv(ext_i)
771 |             campos_i = inv_ext_i[0:3, 3]
772 |             campos_diff = campos_i - campos_dest
773 |             campos_dist = np.sqrt(np.sum(campos_diff * campos_diff))/max_dist
774 |             # campos_weight = np.exp(-campos_dist / (0.40 * 0.40))
775 |             campos_weight = 100*np.exp(-campos_dist / (0.2 * 0.2))
776 | 
777 | 
778 | 
779 |             # directional weight
780 |             camdir_i = ext_i[2, 0:3]
781 |             camdir_dot = camdir_i * camdir_dest
782 |             camdir_dist = np.sqrt(np.sum(camdir_dot * camdir_dot))
783 |             if camdir_dist < 0.5:
784 |                 camdir_weight = 0.0
785 |             else:
786 |                 # camdir_weight = np.exp(-camdir_dist / (0.8 * 0.8))
787 |                 camdir_weight = 100*np.exp(-camdir_dist / (0.4 * 0.4))
788 | 
789 |             if winner_takes_all:
790 |                 if src_idx == min_idx:
791 |                     campos_weight = 1.0
792 |                     camdir_weight = 1.0
793 |                 else:
794 |                     campos_weight = 0.1
795 |                     camdir_weight = 0.1
796 | 
797 |             ULR_weight = campos_weight * camdir_weight
798 |             print("%f / %f / %f"%(campos_weight, camdir_weight, ULR_weight))
799 |             list_ULR_weights.append(ULR_weight)
800 | 
801 |     for src_idx in range(len(list_img)):
802 |         # compute the depth map
803 |         depth_map_k, _, _, _ \
804 |             = depth_util.generate_depthmap(list_depth_prob[src_idx], min_disp, disp_step, depth_at_infinity)
805 | 
806 |         # get the parameters ready
807 |         cam_param_src = list_cam_params[src_idx]
808 |         # get the parameters
809 |         ext_i = cam_param_src['extrinsic']
810 |         int_i = cam_param_src['intrinsic']
811 | 
812 |         # reproject to
813 |         Y, X = np.meshgrid(np.arange(0, height), np.arange(0, width), indexing='ij')
814 |         Z = ref_depth_map
815 |         X = X * Z
816 |         Y = Y * Z
817 |         points = np.array([X.reshape(-1), Y.reshape(-1), Z.reshape(-1)])
818 |         points = np.matmul(inv_int_dest, points)
819 |         points = np.vstack([points, np.ones((1, height * width))])
820 |         points = np.matmul(inv_ext_dest, points)
821 |         points = np.matmul(ext_i, points)
822 |         points = points[0:3]
823 |         points = np.matmul(int_i, points)
824 |         X_src = points[0] / points[2]
825 |         Y_src = points[1] / points[2]
826 |         Z_src = points[2]
827 | 
828 |         X_src = X_src.reshape((height, width))
829 |         Y_src = Y_src.reshape((height, width))
830 |         Z_src = Z_src.reshape((height, width))
831 |         # make an exception for sky
832 |         Z_src[zero_disp] = depth_at_infinity
833 |         Z_src[Z_src > depth_at_infinity] = depth_at_infinity
834 | 
835 |         round_Y_src = np.round(Y_src).astype(np.int)
836 |         round_X_src = np.round(X_src).astype(np.int)
837 |         valid_index = np.bitwise_and(round_Y_src >= 0, round_Y_src < height)
838 |         valid_index = np.bitwise_and(valid_index, round_X_src >= 0)
839 |         valid_index = np.bitwise_and(valid_index, round_X_src < width)
840 | 
841 |         # warped_depth_map_k = np.zeros(shape=(height, width))
842 |         # warped_depth_map_k[valid_index] = depth_map_k[round_Y_src[valid_index],
843 |         #                                               round_X_src[valid_index]]
844 |         #
845 |         # depth_diff = ref_depth_map - warped_depth_map_k
846 |         # invalid_depth = depth_diff > ref_depth_map*VISIBILITY_TEST_THRESHOLD
847 |         # invalid_depth = depth_diff > 0
848 |         depth_diff = np.zeros(shape=(height, width))
849 |         depth_diff[valid_index] = Z_src[valid_index]\
850 |                                     - depth_map_k[round_Y_src[valid_index],
851 |                                                   round_X_src[valid_index]]
852 |         invalid_depth = depth_diff > Z_src*VSynth.VISIBILITY_TEST_THRESHOLD
853 |         valid_depth = np.logical_not(invalid_depth)
854 |         valid_index = np.logical_and(valid_index, valid_depth)
855 | 
856 |         # get PSV
857 |         PSV_k = PSV[src_idx]
858 | 
859 |         if list_validity_maps != []:
860 |             validity_map = list_validity_maps[src_idx]
861 |             check_validity = np.zeros(shape=(height, width))
862 |             check_validity[valid_index] = validity_map[round_Y_src[valid_index],
863 |                                                        round_X_src[valid_index]]
864 |             valid_index = np.logical_and(valid_index, check_validity)
865 | 
866 |         # perform
867 |         valid_index = valid_index.astype(np.float)
868 |         valid_index = np.expand_dims(valid_index, -1)
869 |         valid_index = np.tile(valid_index, [1, 1, 3])
870 |         # valid_index = np.logical_and(valid_index, nonzero_prob_idx)
871 |         valid_index = np.logical_and(valid_index, valid_prob)
872 | 
873 | 
874 |         view_k = np.multiply(PSV_k, weight_volume)
875 |         view_k = np.sum(view_k, 0)
876 |         view_k = view_k * valid_index
877 |         # import imageio
878 |         # imageio.imwrite('./view_%04d.png' % src_idx, view_k)
879 |         # imageio.imwrite('./view_%04d_mask.png' % src_idx, valid_index.astype(np.float))
880 | 
881 |         if with_ULR_weight:
882 |             # apply the weight
883 |             ULR_weight = list_ULR_weights[src_idx]
884 |             ULR_weight_sum += ULR_weight
885 | 
886 |             avg_new_view += view_k*ULR_weight
887 |             normalizer += valid_index*ULR_weight
888 | 
889 |         else:
890 |             avg_new_view += view_k
891 |             normalizer += valid_index
892 | 
893 |         visibility_map += valid_index.astype(np.float)[:,:,0]
894 |         dict_new_views[src_idx] = view_k
895 |         list_new_views.append(view_k)
896 | 
897 | 
898 |         # fig = plt.figure()
899 |         # plt.subplot(121)
900 |         # plt.imshow(img_k)
901 |         # plt.title("Source Image")
902 |         # plt.subplot(122)
903 |         # plt.imshow(view_k)
904 |         # plt.title("View Synth [%02d]" % (k))
905 | 
906 |     zero_pixels = avg_new_view == 0.0
907 |     avg_new_view = np.multiply(avg_new_view, 1.0 / (normalizer + 1e-10))
908 |     avg_new_view[zero_pixels] = 0.0
909 | 
910 | 
911 |     # remove area where only one view sees
912 |     # one_view_map = (visibility_map == 1.0)
913 |     # one_view_map = np.expand_dims(one_view_map, -1)
914 |     # one_view_map = np.tile(one_view_map, (1, 1, 3))
915 |     # avg_new_view[one_view_map] = 0.0
916 | 
917 |     validity_map = visibility_map > 1.0
918 |     visibility_map /= float(len(list_img))
919 | 
920 |     # remove outliers
921 |     avg_new_view[avg_new_view > 1.0] = 1.0
922 |     avg_new_view[avg_new_view < 0.0] = 0.0
923 | 
924 |     # imageio.imwrite('./view_merged.png', avg_new_view)
925 |     # fig = plt.figure()
926 |     # plt.imshow(visibility_map)
927 |     # plt.show()
928 |     if list_validity_maps != []:
929 |         return avg_new_view, list_new_views, visibility_map, validity_map, \
930 |                ref_depth_map, ref_depth_map_colored, \
931 |                ref_depth_map_2, ref_depth_map_colored_2
932 |     else:
933 |         return avg_new_view, list_new_views, visibility_map,\
934 |                ref_depth_map, ref_depth_map_colored,\
935 |                ref_depth_map_2, ref_depth_map_colored_2
936 | 
937 | 
938 | 


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/deepmvs_wrapper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | import torch
  8 | import torchvision as vision
  9 | import torch.nn.functional as F
 10 | from torch.autograd import Variable
 11 | import pydensecrf.densecrf as dcrf
 12 | import numpy as np
 13 | import cv2
 14 | 
 15 | from DeepMVS.model import DeepMVS
 16 | 
 17 | 
 18 | 
 19 | 
 20 | class DeepMVSWrapper(object):
 21 |     def __init__(self, filename_DeepMVS,
 22 |                  n_depths=100,
 23 |                  enable_CUDA=True,
 24 |                  do_filter=True):
 25 | 
 26 |         self.dev_id = 0
 27 |         if torch.cuda.device_count() > 1:
 28 |             self.dev_id = 1
 29 | 
 30 |         self.model_deepMVS = DeepMVS(n_depths, use_gpu=enable_CUDA, gpu_id=self.dev_id)
 31 |         self.model_deepMVS.load_state_dict(torch.load(filename_DeepMVS))
 32 |         self.model_deepMVS.share_memory()
 33 |         print('DeepMVS model loaded!', filename_DeepMVS)
 34 | 
 35 |         if enable_CUDA:
 36 |             self.model_VGGNet = vision.models.vgg19(pretrained=True).cuda(self.dev_id)
 37 |         else:
 38 |             self.model_VGGNet = vision.models.vgg19(pretrained=True)
 39 | 
 40 |         self.model_VGGNet.share_memory()
 41 |         self.model_VGGNet_normalize\
 42 |             = vision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 43 |         print('VGGNET model loaded!')
 44 | 
 45 |         # Constants for DenseCRF.
 46 |         self.dict_DenseCRF = dict()
 47 | 
 48 |         ######################################
 49 |         # default from DeepMVS
 50 |         ######################################
 51 |         # self.dict_DenseCRF['sigma_xy'] = 80.0
 52 |         # self.dict_DenseCRF['sigma_rgb'] = 15.0
 53 |         # self.dict_DenseCRF['sigma_d'] = 10.0
 54 |         # self.dict_DenseCRF['iteration_num'] = 5
 55 |         # compat = np.zeros((n_depths, n_depths), dtype=np.float32)
 56 |         # for row in range(0, n_depths):
 57 |         #     for col in range(0, n_depths):
 58 |         #         compat[row, col] = (row - col) ** 2 / self.dict_DenseCRF['sigma_d'] ** 2 / 2
 59 |         # self.dict_DenseCRF['compat'] = compat
 60 |         #####################################
 61 | 
 62 |         ######################################
 63 |         # For museum and others
 64 |         ######################################
 65 |         self.dict_DenseCRF['sigma_xy'] = 30.0
 66 |         self.dict_DenseCRF['sigma_rgb'] = 3
 67 |         self.dict_DenseCRF['iteration_num'] = 20
 68 |         self.dict_DenseCRF['compat'] = 10.0
 69 | 
 70 |         # for high res
 71 |         # self.dict_DenseCRF['sigma_xy'] = 60
 72 |         # self.dict_DenseCRF['sigma_rgb'] = 3.0
 73 |         # self.dict_DenseCRF['iteration_num'] = 20
 74 |         # self.dict_DenseCRF['compat'] = 10.0
 75 | 
 76 |         ######################################
 77 |         # For bikes of StereoMagnificiation
 78 |         ######################################
 79 |         # self.dict_DenseCRF['sigma_xy'] = 25.0
 80 |         # self.dict_DenseCRF['sigma_rgb'] = 10.0
 81 |         # self.dict_DenseCRF['iteration_num'] = 5
 82 |         # self.dict_DenseCRF['compat'] = 5.0
 83 | 
 84 |         self.n_depths = n_depths
 85 |         self.patch_size = 128
 86 |         self.stride = int(self.patch_size/2)
 87 |         self.do_filter = do_filter
 88 | 
 89 |     def build_PSV(self, list_src_img, list_src_cam, ref_idx,
 90 |                   height, width,
 91 |                   min_disp, disp_step, max_depth):
 92 | 
 93 |         n_neighbors = len(list_src_img) - 1
 94 | 
 95 |         PSV = np.zeros(shape=[n_neighbors, self.n_depths, height, width, 3], dtype=np.float32)
 96 | 
 97 |         cam_param_ref = list_src_cam[ref_idx]
 98 |         int_mat_ref = cam_param_ref['intrinsic']
 99 |         fx_ref = int_mat_ref[0, 0]
100 |         fy_ref = int_mat_ref[1, 1]
101 |         cx_ref = int_mat_ref[0, 2]
102 |         cy_ref = int_mat_ref[1, 2]
103 |         ext_ref = cam_param_ref['extrinsic']
104 |         inv_ext_ref = np.linalg.inv(ext_ref)
105 | 
106 |         # for each neighbor image
107 |         counter_img = 0
108 |         for i in range(len(list_src_img)):
109 |             if i == ref_idx:
110 |                 continue
111 | 
112 |             img_i = list_src_img[i]
113 |             cam_param_i = list_src_cam[i]
114 |             # get the parameters
115 |             int_mat = cam_param_i['intrinsic']
116 |             fx_i = int_mat[0, 0]
117 |             fy_i = int_mat[1, 1]
118 |             cx_i = int_mat[0, 2]
119 |             cy_i = int_mat[1, 2]
120 |             ext_i = cam_param_i['extrinsic']
121 | 
122 |             # 4 Corners on the virtual camera to get te 4 rays that intersect with the depth plane
123 |             src_pts = np.reshape([0, 0,
124 |                                   width, 0,
125 |                                   width, height,
126 |                                   0, height], (4, 2))
127 | 
128 |             # for each depth plane
129 |             for d in range(self.n_depths):
130 | 
131 |                 disp = d * disp_step + min_disp
132 |                 if d == 0:
133 |                     depth = max_depth
134 |                 else:
135 |                     depth = 1.0 / disp
136 | 
137 |                 # print(depth)
138 | 
139 |                 # compute dst points
140 |                 dst_pts = np.zeros((4, 2))
141 |                 counter_pt = 0
142 |                 for p in src_pts:
143 |                     p_3D_ref = np.asarray([(depth * p[0] - depth * cx_ref) / fx_ref,
144 |                                            (depth * p[1] - depth * cy_ref) / fy_ref,
145 |                                            depth])
146 |                     p_4D_ref = np.array([p_3D_ref[0], p_3D_ref[1], p_3D_ref[2], 1.0])
147 |                     p_4D_world = inv_ext_ref.dot(p_4D_ref)
148 |                     p_4D_i = ext_i.dot(p_4D_world)
149 |                     dst = np.asarray([cx_i + fx_i * p_4D_i[0] / p_4D_i[2], cy_i + fy_i * p_4D_i[1] / p_4D_i[2]])
150 |                     dst_pts[counter_pt, :] = dst.squeeze()
151 |                     counter_pt += 1
152 | 
153 |                 # compute homography
154 |                 M, mask = cv2.findHomography(dst_pts, src_pts)
155 |                 # warp the image
156 |                 result = cv2.warpPerspective(img_i, M, (width, height),
157 |                                              flags=cv2.INTER_LINEAR,
158 |                                              borderMode=cv2.BORDER_REPLICATE)
159 |                 # cv2.imshow("img_ref", img_ref)
160 |                 # cv2.imshow("PSV of img %02d" % (i), result)
161 |                 # cv2.waitKey()
162 | 
163 |                 PSV[counter_img, d, :, :, :] = result
164 | 
165 |             counter_img += 1
166 | 
167 |         return PSV
168 | 
169 |     def perform_DeepMVS(self, list_img, ref_idx, PSV,
170 |                         height, width, batch_size=1, use_gpu=True):
171 | 
172 |         # Generate VGG features.
173 |         with torch.no_grad():
174 |             VGG_tensor = Variable(
175 |                 self.model_VGGNet_normalize(torch.FloatTensor(list_img[ref_idx].copy())).permute(2, 0, 1).unsqueeze(0))
176 | 
177 |         if use_gpu:
178 |             VGG_tensor = VGG_tensor.cuda(self.dev_id)
179 |         VGG_scaling_factor = 0.01
180 |         for i in range(0, 4):
181 |             VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor)
182 |         if use_gpu:
183 |             feature_input_1x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor
184 |         else:
185 |             feature_input_1x_whole = VGG_tensor.data.clone() * VGG_scaling_factor
186 |         for i in range(4, 9):
187 |             VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor)
188 |         if use_gpu:
189 |             feature_input_2x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor
190 |         else:
191 |             feature_input_2x_whole = VGG_tensor.data.clone() * VGG_scaling_factor
192 |         for i in range(9, 14):
193 |             VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor)
194 |         if use_gpu:
195 |             feature_input_4x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor
196 |         else:
197 |             feature_input_4x_whole = VGG_tensor.data.clone() * VGG_scaling_factor
198 |         for i in range(14, 23):
199 |             VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor)
200 |         if use_gpu:
201 |             feature_input_8x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor
202 |         else:
203 |             feature_input_8x_whole = VGG_tensor.data.clone() * VGG_scaling_factor
204 |         for i in range(23, 32):
205 |             VGG_tensor = self.model_VGGNet.features[i].forward(VGG_tensor)
206 |         if use_gpu:
207 |             feature_input_16x_whole = VGG_tensor.data.cpu().clone() * VGG_scaling_factor
208 |         else:
209 |             feature_input_16x_whole = VGG_tensor.data.clone() * VGG_scaling_factor
210 |         del VGG_tensor
211 | 
212 |         # Stride through entire reference image.
213 |         predict_raw = torch.zeros(self.n_depths, height, width)
214 |         border_x = (self.patch_size - self.stride) / 2
215 |         border_y = (self.patch_size - self.stride) / 2
216 |         col_total = int((width - 2 * border_x - 1) / self.stride) + 1
217 |         row_total = int((height - 2 * border_y - 1) / self.stride) + 1
218 | 
219 |         for row_idx in range(0, row_total):
220 |             for col_idx in range(0, col_total):
221 | 
222 |                 # Compute patch location for this patch and next patch.
223 |                 if col_idx != col_total - 1:
224 |                     start_x = col_idx * self.stride
225 |                 else:
226 |                     start_x = width - self.patch_size
227 | 
228 |                 if row_idx != row_total - 1:
229 |                     start_y = row_idx * self.stride
230 |                 else:
231 |                     start_y = height - self.patch_size
232 | 
233 |                 # Read plane-sweep volume and start next patch.
234 |                 ref_img = list_img[ref_idx][start_y:(start_y + self.patch_size), start_x:(start_x + self.patch_size),
235 |                           :].copy() - 0.5
236 |                 sweep_volume = PSV[:, :, start_y:(start_y + self.patch_size), start_x:(start_x + self.patch_size),
237 |                                :].copy() - 0.5
238 |                 num_neighbors = len(list_img) - 1
239 | 
240 |                 # Prepare the inputs.
241 |                 data_in_tensor = torch.FloatTensor(batch_size, 1, self.n_depths, 2, 3, self.patch_size, self.patch_size)
242 |                 ref_img_tensor = torch.FloatTensor(ref_img).permute(2, 0, 1).unsqueeze(0)
243 |                 data_in_tensor[0, 0, :, 0, ...] = ref_img_tensor.expand(self.n_depths, -1, -1, -1)
244 |                 with torch.no_grad():
245 |                     feature_input_1x \
246 |                         = Variable(
247 |                         feature_input_1x_whole[..., start_y:start_y + self.patch_size, start_x:start_x + self.patch_size])
248 |                     feature_input_2x \
249 |                         = Variable(
250 |                         feature_input_2x_whole[..., int(start_y / 2):int(start_y / 2) + int(self.patch_size / 2),
251 |                         int(start_x / 2):int(start_x / 2) + int(self.patch_size / 2)])
252 |                     feature_input_4x \
253 |                         = Variable(
254 |                         feature_input_4x_whole[..., int(start_y / 4):int(start_y / 4) + int(self.patch_size / 4),
255 |                         int(start_x / 4):int(start_x / 4) + int(self.patch_size / 4)])
256 |                     feature_input_8x \
257 |                         = Variable(
258 |                         feature_input_8x_whole[..., int(start_y / 8):int(start_y / 8) + int(self.patch_size / 8),
259 |                         int(start_x / 8):int(start_x / 8) + int(self.patch_size / 8)])
260 |                     feature_input_16x \
261 |                         = Variable(
262 |                         feature_input_16x_whole[..., int(start_y / 16):int(start_y / 16) + int(self.patch_size / 16),
263 |                         int(start_x / 16):int(start_x / 16) + int(self.patch_size / 16)])
264 |                 if use_gpu:
265 |                     feature_input_1x = feature_input_1x.cuda(self.dev_id)
266 |                     feature_input_2x = feature_input_2x.cuda(self.dev_id)
267 |                     feature_input_4x = feature_input_4x.cuda(self.dev_id)
268 |                     feature_input_8x = feature_input_8x.cuda(self.dev_id)
269 |                     feature_input_16x = feature_input_16x.cuda(self.dev_id)
270 |                 # Loop through all neighbor images.
271 |                 for neighbor_idx in range(0, num_neighbors):
272 |                     data_in_tensor[0, 0, :, 1, ...] = torch.FloatTensor(
273 |                         np.moveaxis(sweep_volume[neighbor_idx, ...], -1, -3))
274 |                     with torch.no_grad():
275 |                         data_in = Variable(data_in_tensor)
276 |                     if use_gpu:
277 |                         data_in = data_in.cuda(self.dev_id)
278 |                     if neighbor_idx == 0:
279 |                         cost_volume \
280 |                             = self.model_deepMVS.forward_feature(data_in, [feature_input_1x, feature_input_2x, feature_input_4x,
281 |                                                               feature_input_8x, feature_input_16x]).data[...]
282 |                     else:
283 |                         cost_volume \
284 |                             = torch.max(cost_volume, self.model_deepMVS.forward_feature(data_in, [feature_input_1x, feature_input_2x,
285 |                                                                                      feature_input_4x, feature_input_8x,
286 |                                                                                      feature_input_16x]).data[...])
287 |                 # Make final prediction.
288 |                 with torch.no_grad():
289 |                     predict = self.model_deepMVS.forward_predict(Variable(cost_volume[:, 0, ...]))
290 | 
291 |                 # Compute copy range.
292 |                 if col_idx == 0:
293 |                     copy_x_start = 0
294 |                     copy_x_end = self.patch_size - border_x
295 |                 elif col_idx == col_total - 1:
296 |                     copy_x_start = border_x + col_idx * self.stride
297 |                     copy_x_end = width
298 |                 else:
299 |                     copy_x_start = border_x + col_idx * self.stride
300 |                     copy_x_end = copy_x_start + self.stride
301 | 
302 |                 if row_idx == 0:
303 |                     copy_y_start = 0
304 |                     copy_y_end = self.patch_size - border_y
305 |                 elif row_idx == row_total - 1:
306 |                     copy_y_start = border_y + row_idx * self.stride
307 |                     copy_y_end = height
308 |                 else:
309 |                     copy_y_start = border_y + row_idx * self.stride
310 |                     copy_y_end = copy_y_start + self.stride
311 | 
312 |                 # Copy the prediction to buffer.
313 |                 copy_x_start = int(copy_x_start)
314 |                 copy_x_end = int(copy_x_end)
315 |                 copy_y_start = int(copy_y_start)
316 |                 copy_y_end = int(copy_y_end)
317 |                 predict_raw[..., copy_y_start:copy_y_end, copy_x_start:copy_x_end] \
318 |                     = predict.data[0, :, copy_y_start - start_y:copy_y_end - start_y,
319 |                       copy_x_start - start_x:copy_x_end - start_x]
320 | 
321 |         ######################################################
322 |         # compute the depth probability
323 |         ######################################################
324 |         with torch.no_grad():
325 |             depth_prob = F.softmax(Variable(predict_raw), dim=0).data.numpy()
326 | 
327 |         ######################################################
328 |         # Pass through DenseCRF.
329 |         ######################################################
330 |         with torch.no_grad():
331 |             unary_energy = F.log_softmax(Variable(predict_raw), dim=0).data.numpy()
332 | 
333 |         crf = dcrf.DenseCRF2D(width, height, self.n_depths)
334 |         crf.setUnaryEnergy(-unary_energy.reshape(self.n_depths, height * width))
335 |         ref_img_full = (list_img[ref_idx] * 255.0).astype(np.uint8)
336 |         crf.addPairwiseBilateral(sxy=(self.dict_DenseCRF['sigma_xy'], self.dict_DenseCRF['sigma_xy']),
337 |                                  srgb=(
338 |                                  self.dict_DenseCRF['sigma_rgb'], self.dict_DenseCRF['sigma_rgb'], self.dict_DenseCRF['sigma_rgb']),
339 |                                  rgbim=ref_img_full,
340 |                                  compat=self.dict_DenseCRF['compat'],
341 |                                  kernel=dcrf.FULL_KERNEL,
342 |                                  normalization=dcrf.NORMALIZE_SYMMETRIC)
343 |         new_raw = crf.inference(self.dict_DenseCRF['iteration_num'])
344 |         new_raw = np.array(new_raw).reshape(self.n_depths, height, width)
345 | 
346 |         return new_raw, depth_prob
347 | 
348 |     def compute(self, list_src_img, list_src_cam, ref_idx,
349 |                        min_disp, disp_step, max_depth):
350 | 
351 |         img = list_src_img[0]
352 |         height, width, n_channels = img.shape
353 | 
354 |         # build PSV
355 |         PSVs = self.build_PSV(list_src_img, list_src_cam, ref_idx,
356 |                               height, width,
357 |                               min_disp, disp_step, max_depth)
358 | 
359 |         # call deepMVS
360 |         dp_refined, dp = self.perform_DeepMVS(list_src_img, ref_idx, PSVs,
361 |                                                height, width)
362 | 
363 |         if self.do_filter:
364 |             return dp_refined
365 |         else:
366 |             return dp
367 | 
368 | 
369 | 
370 | 


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/depth_util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | import numpy as np
  8 | import cv2
  9 | from scipy.signal import argrelextrema
 10 | 
 11 | def my_comparator_greater(x1, x2):
 12 |     res_1 = np.greater_equal(x1, x2)
 13 |     # res_2 = x1 > DT_THRESHOLD
 14 |     # res = np.logical_and(res_1, res_2)
 15 |     return res_1
 16 | 
 17 | 
 18 | def generate_depthmap(depth_prob, min_disp, disp_step, depht_at_inifinity,
 19 |                       color_max_val=None, use_argmax=True):
 20 |     if use_argmax:
 21 |         depth_idx = np.argmax(depth_prob, axis=0)
 22 |     else:
 23 |         depth_idx = np.argmin(depth_prob, axis=0)
 24 | 
 25 | 
 26 |     img_depth = depth_idx*disp_step
 27 |     zero_disp = (depth_idx == 0)
 28 |     img_depth = 1.0 / (img_depth + min_disp)
 29 |     img_depth[zero_disp] = depht_at_inifinity
 30 | 
 31 |     if color_max_val is None:
 32 |         img_depth_colored, color_max_val = apply_colormap_to_depth(img_depth, depht_at_inifinity)
 33 |         return img_depth, img_depth_colored, zero_disp, color_max_val
 34 | 
 35 |     else:
 36 |         img_depth_colored  = apply_colormap_to_depth(img_depth, depht_at_inifinity, max_depth=color_max_val)
 37 |         return img_depth, img_depth_colored, zero_disp
 38 | 
 39 | 
 40 | 
 41 | def apply_colormap_to_depth(img_depth, depth_at_infinity, max_depth=None, max_percent=95, RGB=True):
 42 |     img_depth_colored = img_depth.copy()
 43 |     m = np.min(img_depth_colored)
 44 |     M = np.max(img_depth_colored)
 45 | 
 46 |     if max_depth is None:
 47 |         valid_mask = img_depth_colored < depth_at_infinity
 48 |         valid_mask = np.logical_and(valid_mask, np.logical_not(np.isinf(img_depth)))
 49 |         valid_mask = np.logical_and(valid_mask, img_depth != 0.0)
 50 |         list_data = img_depth[valid_mask]
 51 | 
 52 |         hist, bins = np.histogram(list_data, bins=20)
 53 |         n_data = len(list_data)
 54 |         threshold_max = n_data * float(max_percent)/100.0
 55 |         sum_hist = 0
 56 | 
 57 |         for bin_idx, hist_val in enumerate(hist):
 58 |             sum_hist += hist_val
 59 |             if sum_hist > threshold_max:
 60 |                 M = bins[bin_idx + 1]
 61 |                 break
 62 |     else:
 63 |         M = max_depth
 64 | 
 65 |     img_depth_colored[img_depth_colored > M] = M
 66 |     img_depth_colored = (img_depth_colored - m) / (M - m)
 67 |     img_depth_colored = (img_depth_colored * 255).astype(np.uint8)
 68 |     img_depth_colored = cv2.applyColorMap(img_depth_colored, cv2.COLORMAP_JET)
 69 | 
 70 |     if RGB:
 71 |         img_depth_colored = cv2.cvtColor(img_depth_colored, cv2.COLOR_BGR2RGB)
 72 | 
 73 |     if max_depth is None:
 74 |         return img_depth_colored, M
 75 |     else:
 76 |         return img_depth_colored
 77 | 
 78 | 
 79 | 
 80 | def fetch_patches_VNP(y, x, p_size, dest_cam,
 81 |                   img_synth, list_src_img, list_src_cam,
 82 |                   depth_map_P1, depth_map_P2, return_None=True):
 83 | 
 84 |     #########################
 85 |     # define the input and the output
 86 |     #########################
 87 |     # t_input = np.zeros(shape=(p_size, p_size, 27))
 88 |     # list_src_cam_IDs_ref = dest_cam['list_src_cam_IDs_ref']
 89 |     chs_for_fg_patches = 3*len(list_src_img)
 90 |     t_input = np.zeros(shape=(p_size, p_size, 3 + 2*chs_for_fg_patches))
 91 | 
 92 |     t_input_synth = np.zeros(shape=(p_size, p_size, 3))
 93 |     list_t_candi_patch = []
 94 | 
 95 | 
 96 | 
 97 |     #########################
 98 |     # set output
 99 |     #########################
100 |     X_grid, Y_grid = np.meshgrid(np.arange(x, x + p_size),
101 |                                  np.arange(y, y + p_size))
102 | 
103 | 
104 |     #########################
105 |     # set input
106 |     #########################
107 |     synth_patch = img_synth[Y_grid, X_grid]
108 |     t_input_synth = synth_patch
109 |     # cv2.imshow('synth_patch', synth_patch)
110 | 
111 |     # get the reference camera params
112 |     inv_int_dest = np.linalg.inv(dest_cam['intrinsic'])
113 |     inv_ext_dest = np.linalg.inv(dest_cam['extrinsic'])
114 | 
115 | 
116 |     for count in range(len(list_src_img)):
117 |         # get the target camera params
118 |         cam_i = list_src_cam[count]
119 |         ext_i = cam_i['extrinsic']
120 |         int_i = cam_i['intrinsic']
121 | 
122 |         planar_patch_P1_i = backward_warp_center_depth(y, x, depth_map_P1, list_src_img,
123 |                                           p_size, ext_i, int_i, count,
124 |                                                        inv_int_dest, inv_ext_dest)
125 |         planar_patch_P2_i = backward_warp_center_depth(y, x, depth_map_P2, list_src_img,
126 |                                           p_size, ext_i, int_i, count,
127 |                                                        inv_int_dest, inv_ext_dest)
128 | 
129 |         if return_None:
130 |             if planar_patch_P1_i is None or planar_patch_P2_i is None:
131 |                 return None, None
132 | 
133 |         list_t_candi_patch.append(planar_patch_P1_i)
134 | 
135 |         z_1 = depth_map_P1[y, x]
136 |         z_2 = depth_map_P2[y, x]
137 |         diff_z = np.abs(z_1 - z_2)/z_1*100
138 | 
139 |         if diff_z > 2:
140 |             list_t_candi_patch.append(planar_patch_P2_i)
141 | 
142 | 
143 |     # change shape and subtract 0.5
144 |     t_input_synth = np.moveaxis(t_input_synth, -1, 0)
145 |     t_input_synth -= 0.5
146 | 
147 |     for i in range(len(list_t_candi_patch)):
148 |         t_candi_patch = list_t_candi_patch[i]
149 |         t_candi_patch = np.moveaxis(t_candi_patch, -1, 0)
150 |         t_candi_patch -= 0.5
151 |         list_t_candi_patch[i] = t_candi_patch
152 | 
153 |     #########################
154 |     return t_input_synth, list_t_candi_patch
155 | 
156 | 
157 | 
158 | 
159 | def backward_warp_center_depth(y_coord, x_coord, dmap, list_src_img,
160 |                               patch_size, ext_i, int_i, src_idx,
161 |                               inv_int_ref, inv_ext_ref):
162 | 
163 |     z_coord = dmap[int(y_coord + patch_size/2), int(x_coord + patch_size/2)]
164 |     # if z_coord == 0.0:
165 |     #     return None
166 | 
167 |     height, width = dmap.shape
168 |     X_grid, Y_grid = np.meshgrid(np.arange(x_coord, x_coord + patch_size),
169 |                                  np.arange(y_coord, y_coord + patch_size))
170 |     Z_grid = np.ones(shape=X_grid.shape) * z_coord
171 |     X_grid = np.multiply(X_grid, Z_grid)
172 |     Y_grid = np.multiply(Y_grid, Z_grid)
173 | 
174 |     points = np.array([X_grid.reshape(-1), Y_grid.reshape(-1), Z_grid.reshape(-1)])
175 |     points = np.matmul(inv_int_ref, points)
176 |     points = np.vstack([points, np.ones((1, patch_size *patch_size))])
177 |     points = np.matmul(inv_ext_ref, points)
178 |     points = np.matmul(ext_i, points)
179 |     points = points[0:3]
180 |     points = np.matmul(int_i, points)
181 |     Xi = points[0] /points[2]
182 |     Yi = points[1] /points[2]
183 |     Xi = Xi.reshape((patch_size, patch_size))
184 |     Yi = Yi.reshape((patch_size, patch_size))
185 | 
186 |     # handle some exceptions
187 |     invalid_Xi_zero = Xi < 0
188 |     Xi[invalid_Xi_zero] = 0
189 |     invalid_Xi_width = Xi >= width
190 |     Xi[invalid_Xi_width] = width - 1
191 |     invalid_Xi = np.logical_or(invalid_Xi_zero, invalid_Xi_width)
192 | 
193 |     invalid_Yi_zero = Yi < 0
194 |     Yi[invalid_Yi_zero] = 0
195 |     invalid_Yi_height = Yi >= height
196 |     Yi[invalid_Yi_height] = height - 1
197 |     invalid_Yi = np.logical_or(invalid_Yi_zero, invalid_Yi_height)
198 |     invalid_XYi = np.logical_or(invalid_Xi, invalid_Yi)
199 | 
200 |     # do warping
201 |     img_i = list_src_img[src_idx]
202 |     Xi = Xi.astype(np.int)
203 |     Yi = Yi.astype(np.int)
204 | 
205 | 
206 |     planar_patch_i = img_i[Yi, Xi]
207 |     planar_patch_i[invalid_XYi] = 0
208 | 
209 |     return planar_patch_i
210 | 


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/refinement.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.autograd import Variable
 10 | import os
 11 | import time
 12 | import imageio
 13 | 
 14 | from vsynthlib.refinet.models import Model_VNPCAT
 15 | from vsynthlib import depth_util
 16 | 
 17 | 
 18 | class DeepViewRefiner(object):
 19 | 
 20 |     NUM_INPUT_CHANNELS = 27
 21 | 
 22 |     def __init__(self, filename_model_weight, working_dir, out_dir,
 23 |                  patch_size=64,
 24 |                  with_CUDA=True):
 25 | 
 26 |         # define the model
 27 |         self.model = Model_VNPCAT()
 28 | 
 29 |         # load weight
 30 |         self.with_CUDA = with_CUDA
 31 |         self.model.load_state_dict(torch.load(filename_model_weight))
 32 |         if with_CUDA:
 33 |             self.model.cuda()
 34 | 
 35 |         self.working_dir = working_dir
 36 |         self.out_dir = out_dir
 37 |         self.patch_size = patch_size
 38 | 
 39 |         if not os.path.exists(self.out_dir):
 40 |             os.mkdir(self.out_dir)
 41 | 
 42 | 
 43 | 
 44 |     pass
 45 | 
 46 |     def do(self, synth_obj, list_src_img, list_src_cam, count=0,
 47 |            do_stereo=False, return_val=False, custom_outdir=''):
 48 | 
 49 |         value = self.do_VNPCAT(synth_obj, list_src_img, list_src_cam,
 50 |                         count=count, return_val=return_val,
 51 |                         without_candi= False,
 52 |                         custom_outdir=custom_outdir)
 53 | 
 54 |         if return_val:
 55 |             return value
 56 | 
 57 |     def do_VNPCAT(self, synth_obj, list_src_img, list_src_cam, count=0,
 58 |                   return_val=False, without_candi=False, custom_outdir=''):
 59 | 
 60 |         func_fetch_patch = depth_util.fetch_patches_VNP
 61 | 
 62 |         # load synth data
 63 |         img_synth = synth_obj['img_synth']
 64 |         depth_map_P1 = synth_obj['depth_map_P1']
 65 |         depth_map_P2 = synth_obj['depth_map_P2']
 66 |         dest_cam = synth_obj['dest_cam']
 67 |         height, width, _ = img_synth.shape
 68 | 
 69 |         # perform refinement patch-by-patchy
 70 |         #############################################################
 71 |         # Do Testing
 72 |         #############################################################
 73 |         img_merged = np.zeros(shape=(height, width, 3))
 74 |         img_counter = np.zeros(shape=(height, width, 3))
 75 |         for j in range(0, height, int(self.patch_size / 4)):
 76 |             for i in range(0, width, int(self.patch_size / 4)):
 77 | 
 78 |                 t_start = time.time()
 79 |                 # set the model to the evaluation mode
 80 |                 self.model.eval()
 81 | 
 82 |                 # get candidate tensor
 83 |                 x_top = i
 84 |                 y_top = j
 85 |                 if x_top + self.patch_size >= width:
 86 |                     x_top = width - self.patch_size
 87 |                 if y_top + self.patch_size >= height:
 88 |                     y_top = height - self.patch_size
 89 | 
 90 |                 t_input_synth, list_t_candi_patch = func_fetch_patch(y_top, x_top, self.patch_size, dest_cam,
 91 |                                                                     img_synth, list_src_img, list_src_cam,
 92 |                                                                     depth_map_P1, depth_map_P2)
 93 | 
 94 |                 if t_input_synth is None:
 95 |                     print('None!')
 96 |                     continue
 97 | 
 98 |                 # check if more than half of input pixels are valid
 99 |                 t_in_slice = t_input_synth[0]
100 |                 bool_nz = t_in_slice != -0.5
101 |                 bool_nz = bool_nz.astype(np.float)
102 |                 sum_nz = np.sum(bool_nz)
103 |                 if sum_nz < self.patch_size * self.patch_size * 0.6:
104 |                     continue
105 | 
106 |                 t_input_synth = np.expand_dims(t_input_synth, axis=0)
107 |                 t_input_synth = t_input_synth.astype(np.float32)
108 |                 _, chs, _, _ = t_input_synth.shape
109 |                 n_patches = len(list_t_candi_patch)
110 |                 t_in_synth = t_input_synth
111 | 
112 |                 input_synth_tensor \
113 |                     = torch.from_numpy(t_in_synth)
114 | 
115 |                 if self.with_CUDA:
116 |                     input_synth_tensor = input_synth_tensor.cuda()
117 |                 with torch.no_grad():
118 |                     input_synth_variable = Variable(input_synth_tensor, requires_grad=False)
119 | 
120 |                 list_input_candi_variable = []
121 |                 for i in range(n_patches):
122 |                     candi_patch = list_t_candi_patch[i]
123 |                     candi_patch = np.expand_dims(candi_patch, axis=0)
124 |                     candi_patch = candi_patch.astype(np.float32)
125 | 
126 |                     candi_tensor = torch.from_numpy(candi_patch)
127 | 
128 |                     if self.with_CUDA:
129 |                         candi_tensor = candi_tensor.cuda()
130 | 
131 |                     with torch.no_grad():
132 |                         input_candi_variable = Variable(candi_tensor)
133 | 
134 |                     list_input_candi_variable.append(input_candi_variable)
135 | 
136 | 
137 |                 # do forward pass
138 |                 if without_candi:
139 |                     output_variable = self.model(input_synth_variable)
140 |                     output_to_show = output_variable[0].cpu().data[0]
141 |                 else:
142 |                     output_variable = self.model(input_synth_variable, list_input_candi_variable)
143 |                     output_to_show = output_variable.cpu().data[0]
144 | 
145 |                 output_to_show = output_to_show + 0.5
146 |                 output_to_show = output_to_show.permute(1, 2, 0).numpy()
147 |                 output_to_show[output_to_show < 0.0] = 0.0
148 |                 output_to_show[output_to_show > 1.0] = 1.0
149 |                 output_to_show = output_to_show * 255.0
150 |                 output_to_show = output_to_show.astype(np.uint8)
151 | 
152 |                 img_merged[y_top:(y_top + self.patch_size), x_top:(x_top + self.patch_size), :] += output_to_show
153 |                 img_counter[y_top:(y_top + self.patch_size), x_top:(x_top + self.patch_size), :] += 1
154 | 
155 |                 t_current = time.time()
156 |                 t_elapsed_row = t_current - t_start
157 | 
158 |                 # delete variables
159 |                 del input_synth_variable
160 |                 for var in list_input_candi_variable:
161 |                     self.var = var
162 |                     del self.var
163 | 
164 | 
165 |         img_merged = img_merged / (img_counter + 1e-10)
166 |         img_merged /= 255.0
167 |         if return_val:
168 |             return img_merged[0:height, 0:width]
169 |         else:
170 |             filename_out_prefix = 'refined_vsynth_%04d' % (count)
171 |             if custom_outdir != '':
172 |                 imageio.imwrite('%s/%s.png' % (custom_outdir, filename_out_prefix), img_merged[0:height, 0:width])
173 |             else:
174 |                 imageio.imwrite('%s/%s.png' % (self.out_dir, filename_out_prefix), img_merged[0:height, 0:width])
175 | 


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/refinet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVlabs/extreme-view-synth/2820ffdda9f44e70cd2fdd0845ec9145293e4183/xtreme-view/vsynthlib/refinet/__init__.py


--------------------------------------------------------------------------------
/xtreme-view/vsynthlib/refinet/models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
  3 | Licensed under the NVIDIA Source Code License. See LICENSE.md at https://github.com/NVlabs/extreme-view-synth.
  4 | Authors: Inchang Choi, Orazio Gallo, Alejandro Troccoli, Min H. Kim, and Jan Kautz
  5 | """
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.parallel
 10 | import torch.utils.data
 11 | import torch.nn.init as init
 12 | 
 13 | #
 14 | # VNPCAT = Variable Number of Patch using concatenation
 15 | #
 16 | 
 17 | class Model_VNPCAT_Encoder(nn.Module):
 18 |     # Based on Unet and inpainting network
 19 |     def __init__(self, num_in_features = 3):
 20 |         super(Model_VNPCAT_Encoder, self).__init__()
 21 |         self.relu = nn.ReLU()
 22 | 
 23 |         self.conv1 = nn.Conv2d(num_in_features, 128, 3, 1, 1)
 24 |         self.conv2 = nn.Conv2d(128, 128, 3, 1, 1)
 25 |         self.conv2_bnorm = nn.BatchNorm2d(128)
 26 | 
 27 |         self.conv3 = nn.Conv2d(128, 256, 3, 2, 1)
 28 |         self.conv3_bnorm = nn.BatchNorm2d(256)
 29 |         self.conv4 = nn.Conv2d(256, 256, 3, 1, 1)
 30 |         self.conv4_bnorm = nn.BatchNorm2d(256)
 31 | 
 32 |         self.conv5 = nn.Conv2d(256, 512, 3, 2, 1)
 33 |         self.conv5_bnorm = nn.BatchNorm2d(512)
 34 |         self.conv6 = nn.Conv2d(512, 512, 3, 1, 1)
 35 |         self.conv6_bnorm = nn.BatchNorm2d(512)
 36 | 
 37 |         self.conv7 = nn.Conv2d(512, 512, 3, 2, 1)
 38 |         self.conv7_bnorm = nn.BatchNorm2d(512)
 39 | 
 40 |         self.apply(self.initialize_weight)
 41 | 
 42 |     def forward(self, x):
 43 | 
 44 |         # encoder
 45 |         x1 = self.relu(self.conv1(x))
 46 |         x2 = self.relu(self.conv2_bnorm(self.conv2(x1)))
 47 | 
 48 |         x3 = self.relu(self.conv3_bnorm(self.conv3(x2)))
 49 |         x4 = self.relu(self.conv4_bnorm(self.conv4(x3)))
 50 | 
 51 |         x5 = self.relu(self.conv5_bnorm(self.conv5(x4)))
 52 |         x6 = self.relu(self.conv6_bnorm(self.conv6(x5)))
 53 | 
 54 |         x7 = self.relu(self.conv7_bnorm(self.conv7(x6)))
 55 | 
 56 |         return [x2, x4, x6, x7]
 57 | 
 58 |     def initialize_weight(self, m):
 59 |         classname = m.__class__.__name__
 60 |         if classname.find('Conv') != -1:
 61 |             init.xavier_normal_(m.weight)
 62 | 
 63 |         elif classname.find('BatchNorm') != -1:
 64 |             m.weight.data.normal_(1.0, 0.02)
 65 |             m.bias.data.fill_(0)
 66 | 
 67 | 
 68 | class Model_VNPCAT_Decoder(nn.Module):
 69 |     # Based on Unet and inpainting network
 70 |     def __init__(self):
 71 |         super(Model_VNPCAT_Decoder, self).__init__()
 72 |         self.relu = nn.ReLU()
 73 |         self.upsample = nn.Upsample(scale_factor=2)
 74 | 
 75 |         self.conv1 = nn.Conv2d(512*2, 512, 3, 1, 1)
 76 |         self.conv1_bnorm = nn.BatchNorm2d(512)
 77 |         self.conv2 = nn.Conv2d(512, 512, 3, 1, 1)
 78 |         self.conv2_bnorm = nn.BatchNorm2d(512)
 79 |         self.conv2_up = nn.Conv2d(512, 512, 3, 1, 1)
 80 |         self.conv2_up_bnorm = nn.BatchNorm2d(512)
 81 | 
 82 |         self.conv3 = nn.Conv2d(512*3, 512, 3, 1, 1)
 83 |         self.conv3_bnorm = nn.BatchNorm2d(512)
 84 |         self.conv4 = nn.Conv2d(512, 512, 3, 1, 1)
 85 |         self.conv4_bnorm = nn.BatchNorm2d(512)
 86 |         self.conv4_up = nn.Conv2d(512, 256, 3, 1, 1)
 87 |         self.conv4_up_bnorm = nn.BatchNorm2d(256)
 88 | 
 89 |         self.conv5 = nn.Conv2d(256*3, 256, 3, 1, 1)
 90 |         self.conv5_bnorm = nn.BatchNorm2d(256)
 91 |         self.conv6 = nn.Conv2d(256, 256, 3, 1, 1)
 92 |         self.conv6_bnorm = nn.BatchNorm2d(256)
 93 |         self.conv6_up = nn.Conv2d(256, 128, 3, 1, 1)
 94 |         self.conv6_up_bnorm = nn.BatchNorm2d(128)
 95 | 
 96 |         self.conv7 = nn.Conv2d(128*3, 128, 3, 1, 1)
 97 |         self.conv7_bnorm = nn.BatchNorm2d(128)
 98 |         self.conv8 = nn.Conv2d(128, 128, 3, 1, 1)
 99 |         self.conv8_bnorm = nn.BatchNorm2d(128)
100 |         self.conv9 = nn.Conv2d(128, 3, 3, 1, 1)
101 | 
102 |         self.apply(self.initialize_weight)
103 | 
104 |     def forward(self, list_F_synth, list_F_max):
105 | 
106 |         # encoder
107 |         F_synth_3 = list_F_synth[3]
108 |         F_max_3 = list_F_max[3]
109 |         x0 = torch.cat((F_synth_3, F_max_3), 1)
110 |         x1 = self.relu(self.conv1_bnorm(self.conv1(x0)))
111 |         x2 = self.relu(self.conv2_bnorm(self.conv2(x1)))
112 |         x2_up = self.relu(self.conv2_up_bnorm(self.conv2_up(self.upsample(x2))))
113 | 
114 |         F_synth_2 = list_F_synth[2]
115 |         F_max_2 = list_F_max[2]
116 |         x2_cat = torch.cat((x2_up, F_synth_2, F_max_2), 1)
117 |         x3 = self.relu(self.conv3_bnorm(self.conv3(x2_cat)))
118 |         x4 = self.relu(self.conv4_bnorm(self.conv4(x3)))
119 |         x4_up = self.relu(self.conv4_up_bnorm(self.conv4_up(self.upsample(x4))))
120 | 
121 |         F_synth_1 = list_F_synth[1]
122 |         F_max_1 = list_F_max[1]
123 |         x4_cat = torch.cat((x4_up, F_synth_1, F_max_1), 1)
124 |         x5 = self.relu(self.conv5_bnorm(self.conv5(x4_cat)))
125 |         x6 = self.relu(self.conv6_bnorm(self.conv6(x5)))
126 |         x6_up = self.relu(self.conv6_up_bnorm(self.conv6_up(self.upsample(x6))))
127 | 
128 |         F_synth_0 = list_F_synth[0]
129 |         F_max_0 = list_F_max[0]
130 |         x6_cat = torch.cat((x6_up, F_synth_0, F_max_0), 1)
131 |         x7 = self.relu(self.conv7_bnorm(self.conv7(x6_cat)))
132 |         x8 = self.relu(self.conv8_bnorm(self.conv8(x7)))
133 |         x9 = self.conv9(x8)
134 | 
135 |         return x9
136 | 
137 |     def initialize_weight(self, m):
138 |         classname = m.__class__.__name__
139 |         if classname.find('Conv') != -1:
140 |             init.xavier_normal_(m.weight)
141 | 
142 |         elif classname.find('BatchNorm') != -1:
143 |             m.weight.data.normal_(1.0, 0.02)
144 |             m.bias.data.fill_(0)
145 | 
146 | 
147 | class Model_VNPCAT(nn.Module):
148 |     # Based on Unet and inpainting network
149 |     def __init__(self):
150 |         super(Model_VNPCAT, self).__init__()
151 |         self.E = Model_VNPCAT_Encoder()
152 |         self.D = Model_VNPCAT_Decoder()
153 |         self.apply(self.initialize_weight)
154 | 
155 |     def forward(self, x_synth, list_x_candi):
156 | 
157 |         # encoder
158 |         list_F_synth = self.E(x_synth)
159 |         list_list_F_candi = []
160 |         for x_candi in list_x_candi:
161 |             list_F_candi = self.E(x_candi)
162 |             list_list_F_candi.append(list_F_candi)
163 | 
164 |         # do max pool
165 |         list_F0 = []
166 |         list_F1 = []
167 |         list_F2 = []
168 |         list_F3 = []
169 | 
170 |         for list_F_candi in list_list_F_candi:
171 |             list_F0.append(list_F_candi[0][None])
172 |             list_F1.append(list_F_candi[1][None])
173 |             list_F2.append(list_F_candi[2][None])
174 |             list_F3.append(list_F_candi[3][None])
175 | 
176 |         concat_F0 = torch.cat(list_F0)
177 |         concat_F1 = torch.cat(list_F1)
178 |         concat_F2 = torch.cat(list_F2)
179 |         concat_F3 = torch.cat(list_F3)
180 | 
181 |         F0_max, _ = torch.max(concat_F0, dim=0)
182 |         F1_max, _ = torch.max(concat_F1, dim=0)
183 |         F2_max, _ = torch.max(concat_F2, dim=0)
184 |         F3_max, _ = torch.max(concat_F3, dim=0)
185 | 
186 |         list_F_max = [F0_max, F1_max, F2_max, F3_max]
187 | 
188 |         # decoder
189 |         x_refined = self.D(list_F_synth, list_F_max)
190 | 
191 |         return x_refined
192 | 
193 |     def initialize_weight(self, m):
194 |         classname = m.__class__.__name__
195 |         if classname.find('Conv') != -1:
196 |             init.xavier_normal_(m.weight)
197 | 
198 |         elif classname.find('BatchNorm') != -1:
199 |             m.weight.data.normal_(1.0, 0.02)
200 |             m.bias.data.fill_(0)
201 | 


--------------------------------------------------------------------------------