├── CoordConv.py
├── LICENSE
├── README.md
├── basic.py
├── criteria.py
├── dataloaders
    ├── __pycache__
    │   ├── kitti_loader.cpython-36.pyc
    │   ├── kitti_loader.cpython-38.pyc
    │   ├── kitti_loader.cpython-39.pyc
    │   ├── pose_estimator.cpython-36.pyc
    │   ├── pose_estimator.cpython-38.pyc
    │   ├── pose_estimator.cpython-39.pyc
    │   ├── transforms.cpython-36.pyc
    │   ├── transforms.cpython-38.pyc
    │   └── transforms.cpython-39.pyc
    ├── calib_cam_to_cam.txt
    ├── kitti_loader.py
    ├── pose_estimator.py
    └── transforms.py
├── demo.gif
├── download
    ├── rgb_train_downloader.sh
    └── rgb_val_downloader.sh
├── helper.py
├── main.py
├── metrics.py
├── model.py
├── results.png
└── vis_utils.py


/CoordConv.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | 
 5 | class AddCoordsNp():
 6 | 	"""Add coords to a tensor"""
 7 | 	def __init__(self, x_dim=64, y_dim=64, with_r=False):
 8 | 		self.x_dim = x_dim
 9 | 		self.y_dim = y_dim
10 | 		self.with_r = with_r
11 | 
12 | 	def call(self):
13 | 		"""
14 | 		input_tensor: (batch, x_dim, y_dim, c)
15 | 		"""
16 | 		#batch_size_tensor = np.shape(input_tensor)[0]
17 | 
18 | 		xx_ones = np.ones([self.x_dim], dtype=np.int32)
19 | 		xx_ones = np.expand_dims(xx_ones, 1)
20 | 
21 | 		#print(xx_ones.shape)
22 | 
23 | 		xx_range = np.expand_dims(np.arange(self.y_dim), 0)
24 | 		#xx_range = np.expand_dims(xx_range, 1)
25 | 
26 | 		#print(xx_range.shape)
27 | 
28 | 		xx_channel = np.matmul(xx_ones, xx_range)
29 | 		xx_channel = np.expand_dims(xx_channel, -1)
30 | 
31 | 		yy_ones = np.ones([self.y_dim], dtype=np.int32)
32 | 		yy_ones = np.expand_dims(yy_ones, 0)
33 | 
34 | 		#print(yy_ones.shape)
35 | 
36 | 		yy_range = np.expand_dims(np.arange(self.x_dim), 1)
37 | 		#yy_range = np.expand_dims(yy_range, -1)
38 | 
39 | 		#print(yy_range.shape)
40 | 
41 | 		yy_channel = np.matmul(yy_range, yy_ones)
42 | 		yy_channel = np.expand_dims(yy_channel, -1)
43 | 
44 | 		xx_channel = xx_channel.astype('float32') / (self.y_dim - 1)
45 | 		yy_channel = yy_channel.astype('float32') / (self.x_dim - 1)
46 | 
47 | 		xx_channel = xx_channel*2 - 1
48 | 		yy_channel = yy_channel*2 - 1
49 | 	
50 | 
51 | 		#xx_channel = xx_channel.repeat(batch_size_tensor, axis=0)
52 | 		#yy_channel = yy_channel.repeat(batch_size_tensor, axis=0)
53 | 
54 | 		ret = np.concatenate([xx_channel, yy_channel], axis=-1)
55 | 
56 | 		if self.with_r:
57 | 			rr = np.sqrt( np.square(xx_channel-0.5) + np.square(yy_channel-0.5))
58 | 			ret = np.concatenate([ret, rr], axis=-1)
59 | 
60 | 		return ret
61 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Fangchang Ma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A Concise but High-performing Network for Image Guided Depth Completion in Autonomous Driving
 2 | This repository is the implementation of our paper [A Concise but High-performing Network for Image Guided Depth Completion in Autonomous Driving](https://www.sciencedirect.com/science/article/pii/S0950705124005112).
 3 | 
 4 | ## Demo
 5 | <p align="center">
 6 |   <img src="demo.gif" alt="example input output gif" width="1920" />
 7 | </p>
 8 | 
 9 | ## Results
10 | <p align="center">
11 |   <img src="results.png" alt="example input output gif" width="500" />
12 | </p>
13 | 
14 | ## Dependent Environment
15 | You can refer to the following environment:
16 | + python=3.6.2
17 | + torch==1.9.0+cu111
18 | + torchvision==0.10.0+cu111
19 | ```bash
20 | pip install numpy matplotlib Pillow
21 | pip install scikit-image
22 | pip install opencv-contrib-python
23 | ```
24 | 
25 | ## Data
26 | - Download the [KITTI Depth](http://www.cvlibs.net/datasets/kitti/eval_depth.php?benchmark=depth_completion) Dataset from their website. Use the following scripts to extract corresponding RGB images from the raw dataset. 
27 | ```bash
28 | ./download/rgb_train_downloader.sh
29 | ./download/rgb_val_downloader.sh
30 | ```
31 | The downloaded rgb files will be stored in the `../data/data_rgb` folder. The overall code, data, and results directory is structured as follows.
32 | ```
33 | ├── CHNet
34 | ├── data
35 | |   ├── data_depth_annotated
36 | |   |   ├── train
37 | |   |   ├── val
38 | |   ├── data_depth_velodyne
39 | |   |   ├── train
40 | |   |   ├── val
41 | |   ├── depth_selection
42 | |   |   ├── test_depth_completion_anonymous
43 | |   |   ├── test_depth_prediction_anonymous
44 | |   |   ├── val_selection_cropped
45 | |   └── data_rgb
46 | |   |   ├── train
47 | |   |   ├── val
48 | ├── results
49 | ```
50 | 
51 | ## Train 
52 | You can train the CHNet through the following command:
53 | ```
54 | python main.py -b 8 (8 is a example of batch size)
55 | ```
56 | ## Evalution
57 | You can evaluate the CHNet through the following command:
58 | ```
59 | python main.py -b 1 --evaluate [checkpoint-path]
60 | ```
61 | ## Test
62 | You can test the CHNet through the following command for online submission:
63 | ```
64 | python main.py -b 1 --evaluate [checkpoint-path] --test
65 | ```
66 | 
67 | ## Acknowledgement
68 | Many thanks to these excellent opensource projects 
69 | * [PENet](https://github.com/JUGGHM/PENet_ICRA2021)
70 | * [GuideNet](https://github.com/kakaxi314/GuideNet)
71 | * [self-supervised-depth-completion](https://github.com/fangchangma/self-supervised-depth-completion)
72 | 
73 | ## Citation
74 | Please consider citing my work as follows if it is helpful for you.
75 | ```
76 | @article{liu2024concise,
77 |   title={A concise but high-performing network for image guided depth completion in autonomous driving},
78 |   author={Liu, Moyun and Chen, Bing and Chen, Youping and Xie, Jingming and Yao, Lei and Zhang, Yang and Zhou, Joey Tianyi},
79 |   journal={Knowledge-Based Systems},
80 |   pages={111877},
81 |   year={2024},
82 |   publisher={Elsevier}
83 | }
84 | ```
85 | 
86 | 


--------------------------------------------------------------------------------
/basic.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | 
  6 | gks = 5
  7 | pad = [i for i in range(gks*gks)]
  8 | shift = torch.zeros(gks*gks, 4)
  9 | for i in range(gks):
 10 |     for j in range(gks):
 11 |         top = i
 12 |         bottom = gks-1-i
 13 |         left = j
 14 |         right = gks-1-j
 15 |         pad[i*gks + j] = torch.nn.ZeroPad2d((left, right, top, bottom))
 16 |         #shift[i*gks + j, :] = torch.tensor([left, right, top, bottom])
 17 | mid_pad = torch.nn.ZeroPad2d(((gks-1)/2, (gks-1)/2, (gks-1)/2, (gks-1)/2))
 18 | zero_pad = pad[0]
 19 | 
 20 | gks2 = 3     #guide kernel size
 21 | pad2 = [i for i in range(gks2*gks2)]
 22 | shift = torch.zeros(gks2*gks2, 4)
 23 | for i in range(gks2):
 24 |     for j in range(gks2):
 25 |         top = i
 26 |         bottom = gks2-1-i
 27 |         left = j
 28 |         right = gks2-1-j
 29 |         pad2[i*gks2 + j] = torch.nn.ZeroPad2d((left, right, top, bottom))
 30 | 
 31 | gks3 = 7     #guide kernel size
 32 | pad3 = [i for i in range(gks3*gks3)]
 33 | shift = torch.zeros(gks3*gks3, 4)
 34 | for i in range(gks3):
 35 |     for j in range(gks3):
 36 |         top = i
 37 |         bottom = gks3-1-i
 38 |         left = j
 39 |         right = gks3-1-j
 40 |         pad3[i*gks3 + j] = torch.nn.ZeroPad2d((left, right, top, bottom))
 41 | 
 42 | def weights_init(m):
 43 |     # Initialize filters with Gaussian random weights
 44 |     if isinstance(m, nn.Conv2d):
 45 |         n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 46 |         m.weight.data.normal_(0, math.sqrt(2. / n))
 47 |         if m.bias is not None:
 48 |             m.bias.data.zero_()
 49 |     elif isinstance(m, nn.ConvTranspose2d):
 50 |         n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
 51 |         m.weight.data.normal_(0, math.sqrt(2. / n))
 52 |         if m.bias is not None:
 53 |             m.bias.data.zero_()
 54 |     elif isinstance(m, nn.BatchNorm2d):
 55 |         m.weight.data.fill_(1)
 56 |         m.bias.data.zero_()
 57 | 
 58 | def convbnrelu(in_channels, out_channels, kernel_size=3,stride=1, padding=1):
 59 |     return nn.Sequential(
 60 | 		nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
 61 | 		nn.BatchNorm2d(out_channels),
 62 | 		nn.ReLU(inplace=True)
 63 | 	)
 64 | 
 65 | def deconvbnrelu(in_channels, out_channels, kernel_size=5, stride=2, padding=2, output_padding=1):
 66 |     return nn.Sequential(
 67 | 		nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=False),
 68 | 		nn.BatchNorm2d(out_channels),
 69 | 		nn.ReLU(inplace=True)
 70 | 	)
 71 | 
 72 | def convbn(in_channels, out_channels, kernel_size=3,stride=1, padding=1):
 73 |     return nn.Sequential(
 74 | 		nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
 75 | 		nn.BatchNorm2d(out_channels)
 76 | 	)
 77 | 
 78 | def deconvbn(in_channels, out_channels, kernel_size=4, stride=2, padding=1, output_padding=0):
 79 |     return nn.Sequential(
 80 | 		nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, output_padding=output_padding, bias=False),
 81 | 		nn.BatchNorm2d(out_channels)
 82 | 	)
 83 | 
 84 | class BasicBlock(nn.Module):
 85 |     expansion = 1
 86 |     __constants__ = ['downsample']
 87 | 
 88 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
 89 |                  base_width=64, dilation=1, norm_layer=None):
 90 |         super(BasicBlock, self).__init__()
 91 |         if norm_layer is None:
 92 |             norm_layer = nn.BatchNorm2d
 93 |             #norm_layer = encoding.nn.BatchNorm2d
 94 |         if groups != 1 or base_width != 64:
 95 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
 96 |         if dilation > 1:
 97 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 98 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
 99 |         self.conv1 = conv3x3(inplanes, planes, stride)
100 |         self.bn1 = norm_layer(planes)
101 |         self.relu = nn.ReLU(inplace=True)
102 |         self.conv2 = conv3x3(planes, planes)
103 |         self.bn2 = norm_layer(planes)
104 |         if stride != 1 or inplanes != planes:
105 |             downsample = nn.Sequential(
106 |                 conv1x1(inplanes, planes, stride),
107 |                 norm_layer(planes),
108 |             )
109 |         self.downsample = downsample
110 |         self.stride = stride
111 | 
112 |     def forward(self, x):
113 |         identity = x
114 | 
115 |         out = self.conv1(x)
116 |         out = self.bn1(out)
117 |         out = self.relu(out)
118 | 
119 |         out = self.conv2(out)
120 |         out = self.bn2(out)
121 | 
122 |         if self.downsample is not None:
123 |             identity = self.downsample(x)
124 | 
125 |         out += identity
126 |         out = self.relu(out)
127 | 
128 |         return out
129 | 
130 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1, bias=False, padding=1):
131 |     """3x3 convolution with padding"""
132 |     if padding >= 1:
133 |         padding = dilation
134 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
135 |                      padding=padding, groups=groups, bias=bias, dilation=dilation)
136 | 
137 | def conv1x1(in_planes, out_planes, stride=1, groups=1, bias=False):
138 |     """1x1 convolution"""
139 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, groups=groups, bias=bias)
140 | 
141 | class SparseDownSampleClose(nn.Module):
142 |     def __init__(self, stride):
143 |         super(SparseDownSampleClose, self).__init__()
144 |         self.pooling = nn.MaxPool2d(stride, stride)
145 |         self.large_number = 600
146 |     def forward(self, d, mask):
147 |         encode_d = - (1-mask)*self.large_number - d
148 | 
149 |         d = - self.pooling(encode_d)
150 |         mask_result = self.pooling(mask)
151 |         d_result = d - (1-mask_result)*self.large_number
152 | 
153 |         return d_result, mask_result
154 | 
155 | class CSPNGenerate(nn.Module):
156 |     def __init__(self, in_channels, kernel_size):
157 |         super(CSPNGenerate, self).__init__()
158 |         self.kernel_size = kernel_size
159 |         self.generate = convbn(in_channels, self.kernel_size * self.kernel_size - 1, kernel_size=3, stride=1, padding=1)
160 | 
161 |     def forward(self, feature):
162 | 
163 |         guide = self.generate(feature)
164 | 
165 |         #normalization
166 |         guide_sum = torch.sum(guide.abs(), dim=1).unsqueeze(1)
167 |         guide = torch.div(guide, guide_sum)
168 |         guide_mid = (1 - torch.sum(guide, dim=1)).unsqueeze(1)
169 | 
170 |         #padding
171 |         weight_pad = [i for i in range(self.kernel_size * self.kernel_size)]
172 |         for t in range(self.kernel_size*self.kernel_size):
173 |             zero_pad = 0
174 |             if(self.kernel_size==3):
175 |                 zero_pad = pad2[t]
176 |             elif(self.kernel_size==5):
177 |                 zero_pad = pad[t]
178 |             elif(self.kernel_size==7):
179 |                 zero_pad = pad3[t]
180 |             if(t < int((self.kernel_size*self.kernel_size-1)/2)):
181 |                 weight_pad[t] = zero_pad(guide[:, t:t+1, :, :])
182 |             elif(t > int((self.kernel_size*self.kernel_size-1)/2)):
183 |                 weight_pad[t] = zero_pad(guide[:, t-1:t, :, :])
184 |             else:
185 |                 weight_pad[t] = zero_pad(guide_mid)
186 | 
187 |         guide_weight = torch.cat([weight_pad[t] for t in range(self.kernel_size*self.kernel_size)], dim=1)
188 |         return guide_weight
189 | 
190 | class CSPN(nn.Module):
191 |   def __init__(self, kernel_size):
192 |       super(CSPN, self).__init__()
193 |       self.kernel_size = kernel_size
194 | 
195 |   def forward(self, guide_weight, hn, h0):
196 | 
197 |         #CSPN
198 |         half = int(0.5 * (self.kernel_size * self.kernel_size - 1))
199 |         result_pad = [i for i in range(self.kernel_size * self.kernel_size)]
200 |         for t in range(self.kernel_size*self.kernel_size):
201 |             zero_pad = 0
202 |             if(self.kernel_size==3):
203 |                 zero_pad = pad2[t]
204 |             elif(self.kernel_size==5):
205 |                 zero_pad = pad[t]
206 |             elif(self.kernel_size==7):
207 |                 zero_pad = pad3[t]
208 |             if(t == half):
209 |                 result_pad[t] = zero_pad(h0)
210 |             else:
211 |                 result_pad[t] = zero_pad(hn)
212 |         guide_result = torch.cat([result_pad[t] for t in range(self.kernel_size*self.kernel_size)], dim=1)
213 |         #guide_result = torch.cat([result0_pad, result1_pad, result2_pad, result3_pad,result4_pad, result5_pad, result6_pad, result7_pad, result8_pad], 1)
214 | 
215 |         guide_result = torch.sum((guide_weight.mul(guide_result)), dim=1)
216 |         guide_result = guide_result[:, int((self.kernel_size-1)/2):-int((self.kernel_size-1)/2), int((self.kernel_size-1)/2):-int((self.kernel_size-1)/2)]
217 | 
218 |         return guide_result.unsqueeze(dim=1)
219 | 
220 | class CSPNGenerateAccelerate(nn.Module):
221 |     def __init__(self, in_channels, kernel_size):
222 |         super(CSPNGenerateAccelerate, self).__init__()
223 |         self.kernel_size = kernel_size
224 |         self.generate = convbn(in_channels, self.kernel_size * self.kernel_size - 1, kernel_size=3, stride=1, padding=1)
225 | 
226 |     def forward(self, feature):
227 | 
228 |         guide = self.generate(feature)
229 | 
230 |         #normalization in standard CSPN
231 |         #'''
232 |         guide_sum = torch.sum(guide.abs(), dim=1).unsqueeze(1)
233 |         guide = torch.div(guide, guide_sum)
234 |         guide_mid = (1 - torch.sum(guide, dim=1)).unsqueeze(1)
235 |         #'''
236 |         #weight_pad = [i for i in range(self.kernel_size * self.kernel_size)]
237 | 
238 |         half1, half2 = torch.chunk(guide, 2, dim=1)
239 |         output =  torch.cat((half1, guide_mid, half2), dim=1)
240 |         return output
241 | 
242 | def kernel_trans(kernel, weight):
243 |     kernel_size = int(math.sqrt(kernel.size()[1]))
244 |     kernel = F.conv2d(kernel, weight, stride=1, padding=int((kernel_size-1)/2))
245 |     return kernel
246 | 
247 | class CSPNAccelerate(nn.Module):
248 |     def __init__(self, kernel_size, dilation=1, padding=1, stride=1):
249 |         super(CSPNAccelerate, self).__init__()
250 |         self.kernel_size = kernel_size
251 |         self.dilation = dilation
252 |         self.padding = padding
253 |         self.stride = stride
254 | 
255 |     def forward(self, kernel, input, input0): #with standard CSPN, an addition input0 port is added
256 |         bs = input.size()[0]
257 |         h, w = input.size()[2], input.size()[3]
258 |         input_im2col = F.unfold(input, self.kernel_size, self.dilation, self.padding, self.stride)
259 |         kernel = kernel.reshape(bs, self.kernel_size * self.kernel_size, h * w)
260 | 
261 |         # standard CSPN
262 |         input0 = input0.view(bs, 1, h * w)
263 |         mid_index = int((self.kernel_size*self.kernel_size-1)/2)
264 |         input_im2col[:, mid_index:mid_index+1, :] = input0
265 | 
266 |         #print(input_im2col.size(), kernel.size())
267 |         output = torch.einsum('ijk,ijk->ik', (input_im2col, kernel))
268 |         return output.view(bs, 1, h, w)
269 | 
270 | class GeometryFeature(nn.Module):
271 |     def __init__(self):
272 |         super(GeometryFeature, self).__init__()
273 | 
274 |     def forward(self, z, vnorm, unorm, h, w, ch, cw, fh, fw):
275 |         x = z*(0.5*h*(vnorm+1)-ch)/fh
276 |         y = z*(0.5*w*(unorm+1)-cw)/fw
277 |         return torch.cat((x, y, z),1)
278 | 
279 | class BasicBlockGeo(nn.Module):
280 |     expansion = 1
281 |     __constants__ = ['downsample']
282 | 
283 |     def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
284 |                  base_width=64, dilation=1, norm_layer=None, geoplanes=3):
285 |         super(BasicBlockGeo, self).__init__()
286 | 
287 |         if norm_layer is None:
288 |             norm_layer = nn.BatchNorm2d
289 |             #norm_layer = encoding.nn.BatchNorm2d
290 |         if groups != 1 or base_width != 64:
291 |             raise ValueError('BasicBlock only supports groups=1 and base_width=64')
292 |         if dilation > 1:
293 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
294 |         # Both self.conv1 and self.downsample layers downsample the input when stride != 1
295 |         self.conv1 = conv3x3(inplanes + geoplanes, planes, stride)
296 |         self.bn1 = norm_layer(planes)
297 |         self.relu = nn.ReLU(inplace=True)
298 |         self.conv2 = conv3x3(planes+geoplanes, planes)
299 |         self.bn2 = norm_layer(planes)
300 |         if stride != 1 or inplanes != planes:
301 |             downsample = nn.Sequential(
302 |                 conv1x1(inplanes+geoplanes, planes, stride),
303 |                 norm_layer(planes),
304 |             )
305 |         self.downsample = downsample
306 |         self.stride = stride
307 | 
308 |     def forward(self, x, g1=None, g2=None):
309 |         identity = x
310 |         if g1 is not None:
311 |             x = torch.cat((x, g1), 1)
312 |         out = self.conv1(x)
313 |         out = self.bn1(out)
314 |         out = self.relu(out)
315 | 
316 |         if g2 is not None:
317 |             out = torch.cat((g2,out), 1)
318 |         out = self.conv2(out)
319 |         out = self.bn2(out)
320 | 
321 |         if self.downsample is not None:
322 |             identity = self.downsample(x)
323 | 
324 |         out += identity
325 |         out = self.relu(out)
326 | 
327 |         return out


--------------------------------------------------------------------------------
/criteria.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | loss_names = ['l1', 'l2']
 5 | 
 6 | class MaskedMSELoss(nn.Module):
 7 |     def __init__(self):
 8 |         super(MaskedMSELoss, self).__init__()
 9 | 
10 |     def forward(self, pred, target):
11 |         assert pred.dim() == target.dim(), "inconsistent dimensions"
12 |         valid_mask = (target > 0).detach()
13 |         diff = target - pred
14 |         diff = diff[valid_mask]
15 |         self.loss = (diff**2).mean()
16 |         return self.loss
17 | 
18 | class MaskedL1Loss(nn.Module):
19 |     def __init__(self):
20 |         super(MaskedL1Loss, self).__init__()
21 | 
22 |     def forward(self, pred, target, weight=None):
23 |         assert pred.dim() == target.dim(), "inconsistent dimensions"
24 |         valid_mask = (target > 0).detach()
25 |         diff = target - pred
26 |         diff = diff[valid_mask]
27 |         self.loss = diff.abs().mean()
28 |         return self.loss
29 | 
30 | # import torch
31 | # import torch.nn as nn
32 | 
33 | # loss_names = ['l1', 'l2']
34 | 
35 | # def cal_weight(lidar_weight, L1_ratio):
36 | #     # lidar_weight = loss_ori / (loss_extra*L1_ratio + loss_lidar)    
37 | #     extra_weight = lidar_weight * L1_ratio
38 | 
39 | #     return extra_weight, lidar_weight
40 | 
41 | # class MaskedMSELoss(nn.Module):
42 | #     def __init__(self):
43 | #         super(MaskedMSELoss, self).__init__()
44 | 
45 | #     def forward(self, pred, target, lidar_mask):
46 | #         assert pred.dim() == target.dim(), "inconsistent dimensions"
47 | #         lidar_weight = 1
48 |         
49 | #         valid_mask = (target > 0).detach()
50 | #         extra_mask = (valid_mask.int() - (valid_mask * lidar_mask).int()).bool()
51 | #         diff = target - pred
52 |         
53 | #         #############################
54 | #         extra_num = (extra_mask > 0).sum()
55 | #         lidar_num = (valid_mask * lidar_mask > 0).sum()
56 |         
57 | #         extra_diff = (diff[extra_mask]**2).sum()
58 | #         lidar_diff = (diff[valid_mask * lidar_mask]**2).sum()
59 | 
60 | #         loss_extra = (extra_diff) / (extra_num + lidar_num)
61 | #         loss_lidar = (lidar_diff) / (extra_num + lidar_num)
62 | #         extra_diff_L1 = (diff[extra_mask]).abs().sum()
63 | #         lidar_diff_L1 = (diff[valid_mask * lidar_mask]).abs().sum()
64 | #         loss_extra_L1 = (extra_diff_L1) / (extra_num + lidar_num)
65 | #         loss_lidar_L1 = (lidar_diff_L1) / (extra_num + lidar_num)
66 | #         L1_ratio = (loss_extra_L1 / loss_lidar_L1).detach().item()
67 | #         L2_ratio = (loss_extra / loss_lidar).detach().item()
68 | #         num_ratio = (extra_num / lidar_num).detach().item()
69 | #         #############################
70 | 
71 | #         diff = diff[valid_mask]
72 | #         self.loss = (diff**2).mean()
73 | 
74 | #         return self.loss, L1_ratio, L2_ratio, num_ratio, loss_lidar.detach().item()
75 | 
76 | # class MaskedL1Loss(nn.Module):
77 | #     def __init__(self):
78 | #         super(MaskedL1Loss, self).__init__()
79 | 
80 | #     def forward(self, pred, target, weight=None):
81 | #         assert pred.dim() == target.dim(), "inconsistent dimensions"
82 | #         valid_mask = (target > 0).detach()
83 | #         diff = target - pred
84 | #         diff = diff[valid_mask]
85 | #         self.loss = diff.abs().mean()
86 | #         return self.loss


--------------------------------------------------------------------------------
/dataloaders/__pycache__/kitti_loader.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/kitti_loader.cpython-36.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/kitti_loader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/kitti_loader.cpython-38.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/kitti_loader.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/kitti_loader.cpython-39.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/pose_estimator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/pose_estimator.cpython-36.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/pose_estimator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/pose_estimator.cpython-38.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/pose_estimator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/pose_estimator.cpython-39.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/transforms.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/transforms.cpython-36.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/transforms.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/transforms.cpython-38.pyc


--------------------------------------------------------------------------------
/dataloaders/__pycache__/transforms.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/dataloaders/__pycache__/transforms.cpython-39.pyc


--------------------------------------------------------------------------------
/dataloaders/calib_cam_to_cam.txt:
--------------------------------------------------------------------------------
 1 | calib_time: 09-Jan-2012 13:57:47
 2 | corner_dist: 9.950000e-02
 3 | S_00: 1.392000e+03 5.120000e+02
 4 | K_00: 9.842439e+02 0.000000e+00 6.900000e+02 0.000000e+00 9.808141e+02 2.331966e+02 0.000000e+00 0.000000e+00 1.000000e+00
 5 | D_00: -3.728755e-01 2.037299e-01 2.219027e-03 1.383707e-03 -7.233722e-02
 6 | R_00: 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
 7 | T_00: 2.573699e-16 -1.059758e-16 1.614870e-16
 8 | S_rect_00: 1.242000e+03 3.750000e+02
 9 | R_rect_00: 9.999239e-01 9.837760e-03 -7.445048e-03 -9.869795e-03 9.999421e-01 -4.278459e-03 7.402527e-03 4.351614e-03 9.999631e-01
10 | P_rect_00: 7.215377e+02 0.000000e+00 6.095593e+02 0.000000e+00 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
11 | S_01: 1.392000e+03 5.120000e+02
12 | K_01: 9.895267e+02 0.000000e+00 7.020000e+02 0.000000e+00 9.878386e+02 2.455590e+02 0.000000e+00 0.000000e+00 1.000000e+00
13 | D_01: -3.644661e-01 1.790019e-01 1.148107e-03 -6.298563e-04 -5.314062e-02
14 | R_01: 9.993513e-01 1.860866e-02 -3.083487e-02 -1.887662e-02 9.997863e-01 -8.421873e-03 3.067156e-02 8.998467e-03 9.994890e-01
15 | T_01: -5.370000e-01 4.822061e-03 -1.252488e-02
16 | S_rect_01: 1.242000e+03 3.750000e+02
17 | R_rect_01: 9.996878e-01 -8.976826e-03 2.331651e-02 8.876121e-03 9.999508e-01 4.418952e-03 -2.335503e-02 -4.210612e-03 9.997184e-01
18 | P_rect_01: 7.215377e+02 0.000000e+00 6.095593e+02 -3.875744e+02 0.000000e+00 7.215377e+02 1.728540e+02 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00
19 | S_02: 1.392000e+03 5.120000e+02
20 | K_02: 9.597910e+02 0.000000e+00 6.960217e+02 0.000000e+00 9.569251e+02 2.241806e+02 0.000000e+00 0.000000e+00 1.000000e+00
21 | D_02: -3.691481e-01 1.968681e-01 1.353473e-03 5.677587e-04 -6.770705e-02
22 | R_02: 9.999758e-01 -5.267463e-03 -4.552439e-03 5.251945e-03 9.999804e-01 -3.413835e-03 4.570332e-03 3.389843e-03 9.999838e-01
23 | T_02: 5.956621e-02 2.900141e-04 2.577209e-03
24 | S_rect_02: 1.242000e+03 3.750000e+02
25 | R_rect_02: 9.998817e-01 1.511453e-02 -2.841595e-03 -1.511724e-02 9.998853e-01 -9.338510e-04 2.827154e-03 9.766976e-04 9.999955e-01
26 | P_rect_02: 7.215377e+02 0.000000e+00 6.095593e+02 4.485728e+01 0.000000e+00 7.215377e+02 1.728540e+02 2.163791e-01 0.000000e+00 0.000000e+00 1.000000e+00 2.745884e-03
27 | S_03: 1.392000e+03 5.120000e+02
28 | K_03: 9.037596e+02 0.000000e+00 6.957519e+02 0.000000e+00 9.019653e+02 2.242509e+02 0.000000e+00 0.000000e+00 1.000000e+00
29 | D_03: -3.639558e-01 1.788651e-01 6.029694e-04 -3.922424e-04 -5.382460e-02
30 | R_03: 9.995599e-01 1.699522e-02 -2.431313e-02 -1.704422e-02 9.998531e-01 -1.809756e-03 2.427880e-02 2.223358e-03 9.997028e-01
31 | T_03: -4.731050e-01 5.551470e-03 -5.250882e-03
32 | S_rect_03: 1.242000e+03 3.750000e+02
33 | R_rect_03: 9.998321e-01 -7.193136e-03 1.685599e-02 7.232804e-03 9.999712e-01 -2.293585e-03 -1.683901e-02 2.415116e-03 9.998553e-01
34 | P_rect_03: 7.215377e+02 0.000000e+00 6.095593e+02 -3.395242e+02 0.000000e+00 7.215377e+02 1.728540e+02 2.199936e+00 0.000000e+00 0.000000e+00 1.000000e+00 2.729905e-03
35 | 


--------------------------------------------------------------------------------
/dataloaders/kitti_loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import glob
  4 | import fnmatch  # pattern matching
  5 | import numpy as np
  6 | from numpy import linalg as LA
  7 | from random import choice
  8 | from PIL import Image
  9 | import torch
 10 | import torch.utils.data as data
 11 | import cv2
 12 | from dataloaders import transforms
 13 | import CoordConv
 14 | 
 15 | input_options = ['d', 'rgb', 'rgbd', 'g', 'gd']
 16 | 
 17 | def load_calib():
 18 |     """
 19 |     Temporarily hardcoding the calibration matrix using calib file from 2011_09_26
 20 |     """
 21 |     calib = open("dataloaders/calib_cam_to_cam.txt", "r")
 22 |     lines = calib.readlines()
 23 |     P_rect_line = lines[25]
 24 | 
 25 |     Proj_str = P_rect_line.split(":")[1].split(" ")[1:]
 26 |     Proj = np.reshape(np.array([float(p) for p in Proj_str]),
 27 |                       (3, 4)).astype(np.float32)
 28 |     K = Proj[:3, :3]  # camera matrix
 29 | 
 30 |     # note: we will take the center crop of the images during augmentation
 31 |     # that changes the optical centers, but not focal lengths
 32 |     # K[0, 2] = K[0, 2] - 13  # from width = 1242 to 1216, with a 13-pixel cut on both sides
 33 |     # K[1, 2] = K[1, 2] - 11.5  # from width = 375 to 352, with a 11.5-pixel cut on both sides
 34 |     K[0, 2] = K[0, 2] - 13;
 35 |     K[1, 2] = K[1, 2] - 11.5;
 36 |     return K
 37 | 
 38 | 
 39 | def get_paths_and_transform(split, args):
 40 |     assert (args.use_d or args.use_rgb
 41 |             or args.use_g), 'no proper input selected'
 42 | 
 43 |     if split == "train":
 44 |         transform = train_transform
 45 |         # transform = val_transform
 46 |         glob_d = os.path.join(
 47 |             args.data_folder,
 48 |             'data_depth_velodyne/train/*_sync/proj_depth/velodyne_raw/image_0[2,3]/*.png'
 49 |         )
 50 |         glob_gt = os.path.join(
 51 |             args.data_folder,
 52 |             'data_depth_annotated/train/*_sync/proj_depth/groundtruth/image_0[2,3]/*.png'
 53 |         )
 54 | 
 55 |         # def get_rgb_paths(p):
 56 |         #     ps = p.split('/')
 57 |         #     date_liststr = []
 58 |         #     date_liststr.append(ps[-5][:10])
 59 |         #     # pnew = '/'.join([args.data_folder] + ['data_rgb'] + ps[-6:-4] +
 60 |         #     #                ps[-2:-1] + ['data'] + ps[-1:])
 61 |         #     pnew = '/'.join(date_liststr + ps[-5:-4] + ps[-2:-1] + ['data'] + ps[-1:])
 62 |         #     pnew = os.path.join(args.data_folder_rgb, pnew)
 63 |         #     return pnew
 64 | 
 65 |         def get_rgb_paths(p):
 66 |             ps = p.split('/')
 67 |             pnew = '/'.join([args.data_folder] + ['data_rgb'] + ps[-6:-4] +
 68 |                             ps[-2:-1] + ['data'] + ps[-1:])
 69 |             return pnew
 70 | 
 71 |     elif split == "val":
 72 |         if args.val == "full":
 73 |             transform = val_transform
 74 |             glob_d = os.path.join(
 75 |                 args.data_folder,
 76 |                 'data_depth_velodyne/val/*_sync/proj_depth/velodyne_raw/image_0[2,3]/*.png'
 77 |             )
 78 |             glob_gt = os.path.join(
 79 |                 args.data_folder,
 80 |                 'data_depth_annotated/val/*_sync/proj_depth/groundtruth/image_0[2,3]/*.png'
 81 |             )
 82 | 
 83 |             # def get_rgb_paths(p):
 84 |             #     ps = p.split('/')
 85 |             #     date_liststr = []
 86 |             #     date_liststr.append(ps[-5][:10])
 87 |             #     # pnew = '/'.join(ps[:-7] +
 88 |             #     #   ['data_rgb']+ps[-6:-4]+ps[-2:-1]+['data']+ps[-1:])
 89 |             #     pnew = '/'.join(date_liststr + ps[-5:-4] + ps[-2:-1] + ['data'] + ps[-1:])
 90 |             #     pnew = os.path.join(args.data_folder_rgb, pnew)
 91 |             #     return pnew
 92 | 
 93 |             def get_rgb_paths(p):
 94 |                 ps = p.split('/')
 95 |                 pnew = '/'.join(ps[:-7] +  
 96 |                     ['data_rgb']+ps[-6:-4]+ps[-2:-1]+['data']+ps[-1:])
 97 |                 return pnew
 98 | 
 99 |         elif args.val == "select":
100 |             # transform = no_transform
101 |             transform = val_transform
102 |             glob_d = os.path.join(
103 |                 args.data_folder,
104 |                 "depth_selection/val_selection_cropped/velodyne_raw/*.png")
105 |             glob_gt = os.path.join(
106 |                 args.data_folder,
107 |                 "depth_selection/val_selection_cropped/groundtruth_depth/*.png"
108 |             )
109 | 
110 |             def get_rgb_paths(p):
111 |                 return p.replace("groundtruth_depth", "image")
112 |     elif split == "test_completion":
113 |         transform = no_transform
114 |         glob_d = os.path.join(
115 |             args.data_folder,
116 |             "depth_selection/test_depth_completion_anonymous/velodyne_raw/*.png"
117 |         )
118 |         glob_gt = None  # "test_depth_completion_anonymous/"
119 |         glob_rgb = os.path.join(
120 |             args.data_folder,
121 |             "depth_selection/test_depth_completion_anonymous/image/*.png")
122 |     elif split == "test_prediction":
123 |         transform = no_transform
124 |         glob_d = None
125 |         glob_gt = None  # "test_depth_completion_anonymous/"
126 |         glob_rgb = os.path.join(
127 |             args.data_folder,
128 |             "depth_selection/test_depth_prediction_anonymous/image/*.png")
129 |     else:
130 |         raise ValueError("Unrecognized split " + str(split))
131 | 
132 |     if glob_gt is not None:
133 |         # train or val-full or val-select
134 |         paths_d = sorted(glob.glob(glob_d))
135 |         paths_gt = sorted(glob.glob(glob_gt))
136 |         paths_rgb = [get_rgb_paths(p) for p in paths_gt]
137 |     else:
138 |         # test only has d or rgb
139 |         paths_rgb = sorted(glob.glob(glob_rgb))
140 |         paths_gt = [None] * len(paths_rgb)
141 |         if split == "test_prediction":
142 |             paths_d = [None] * len(
143 |                 paths_rgb)  # test_prediction has no sparse depth
144 |         else:
145 |             paths_d = sorted(glob.glob(glob_d))
146 | 
147 |     if len(paths_d) == 0 and len(paths_rgb) == 0 and len(paths_gt) == 0:
148 |         raise (RuntimeError("Found 0 images under {}".format(glob_gt)))
149 |     if len(paths_d) == 0 and args.use_d:
150 |         raise (RuntimeError("Requested sparse depth but none was found"))
151 |     if len(paths_rgb) == 0 and args.use_rgb:
152 |         raise (RuntimeError("Requested rgb images but none was found"))
153 |     if len(paths_rgb) == 0 and args.use_g:
154 |         raise (RuntimeError("Requested gray images but no rgb was found"))
155 |     if len(paths_rgb) != len(paths_d) or len(paths_rgb) != len(paths_gt):
156 |         print(len(paths_rgb), len(paths_d), len(paths_gt))
157 |         # for i in range(999):
158 |         #    print("#####")
159 |         #    print(paths_rgb[i])
160 |         #    print(paths_d[i])
161 |         #    print(paths_gt[i])
162 |         # raise (RuntimeError("Produced different sizes for datasets"))
163 |     paths = {"rgb": paths_rgb, "d": paths_d, "gt": paths_gt}
164 |     return paths, transform
165 | 
166 | 
167 | def rgb_read(filename):
168 |     assert os.path.exists(filename), "file not found: {}".format(filename)
169 |     img_file = Image.open(filename)
170 |     # rgb_png = np.array(img_file, dtype=float) / 255.0 # scale pixels to the range [0,1]
171 |     rgb_png = np.array(img_file, dtype='uint8')  # in the range [0,255]
172 |     img_file.close()
173 |     return rgb_png
174 | 
175 | 
176 | def depth_read(filename):
177 |     # loads depth map D from png file
178 |     # and returns it as a numpy array,
179 |     # for details see readme.txt
180 |     assert os.path.exists(filename), "file not found: {}".format(filename)
181 |     img_file = Image.open(filename)
182 |     depth_png = np.array(img_file, dtype=int)
183 |     img_file.close()
184 |     # make sure we have a proper 16bit depth map here.. not 8bit!
185 |     assert np.max(depth_png) > 255, \
186 |         "np.max(depth_png)={}, path={}".format(np.max(depth_png), filename)
187 | 
188 |     depth = depth_png.astype(np.float) / 256.
189 |     # depth[depth_png == 0] = -1.
190 |     depth = np.expand_dims(depth, -1)
191 |     return depth
192 | 
193 | def drop_depth_measurements(depth, prob_keep):
194 |     mask = np.random.binomial(1, prob_keep, depth.shape)
195 |     depth *= mask
196 |     return depth
197 | 
198 | def train_transform(rgb, sparse, target, position, args):
199 |     # s = np.random.uniform(1.0, 1.5) # random scaling
200 |     # angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
201 |     oheight = args.val_h
202 |     owidth = args.val_w
203 | 
204 |     do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
205 | 
206 |     transforms_list = [
207 |         # transforms.Rotate(angle),
208 |         # transforms.Resize(s),
209 |         transforms.BottomCrop((oheight, owidth)),
210 |         transforms.HorizontalFlip(do_flip)
211 |     ]
212 | 
213 |     # if small_training == True:
214 |     # transforms_list.append(transforms.RandomCrop((rheight, rwidth)))
215 | 
216 |     transform_geometric = transforms.Compose(transforms_list)
217 | 
218 |     if sparse is not None:
219 |         sparse = transform_geometric(sparse)
220 |     target = transform_geometric(target)
221 |     if rgb is not None:
222 |         brightness = np.random.uniform(max(0, 1 - args.jitter),
223 |                                        1 + args.jitter)
224 |         contrast = np.random.uniform(max(0, 1 - args.jitter), 1 + args.jitter)
225 |         saturation = np.random.uniform(max(0, 1 - args.jitter),
226 |                                        1 + args.jitter)
227 |         transform_rgb = transforms.Compose([
228 |             transforms.ColorJitter(brightness, contrast, saturation, 0),
229 |             transform_geometric
230 |         ])
231 |         rgb = transform_rgb(rgb)
232 |     # sparse = drop_depth_measurements(sparse, 0.9)
233 | 
234 |     if position is not None:
235 |         bottom_crop_only = transforms.Compose([transforms.BottomCrop((oheight, owidth))])
236 |         position = bottom_crop_only(position)
237 | 
238 |     # random crop
239 |     #if small_training == True:
240 |     if args.not_random_crop == False:
241 |         h = oheight
242 |         w = owidth
243 |         rheight = args.random_crop_height
244 |         rwidth = args.random_crop_width
245 |         # randomlize
246 |         i = np.random.randint(0, h - rheight + 1)
247 |         j = np.random.randint(0, w - rwidth + 1)
248 | 
249 |         if rgb is not None:
250 |             if rgb.ndim == 3:
251 |                 rgb = rgb[i:i + rheight, j:j + rwidth, :]
252 |             elif rgb.ndim == 2:
253 |                 rgb = rgb[i:i + rheight, j:j + rwidth]
254 | 
255 |         if sparse is not None:
256 |             if sparse.ndim == 3:
257 |                 sparse = sparse[i:i + rheight, j:j + rwidth, :]
258 |             elif sparse.ndim == 2:
259 |                 sparse = sparse[i:i + rheight, j:j + rwidth]
260 | 
261 |         if target is not None:
262 |             if target.ndim == 3:
263 |                 target = target[i:i + rheight, j:j + rwidth, :]
264 |             elif target.ndim == 2:
265 |                 target = target[i:i + rheight, j:j + rwidth]
266 | 
267 |         if position is not None:
268 |             if position.ndim == 3:
269 |                 position = position[i:i + rheight, j:j + rwidth, :]
270 |             elif position.ndim == 2:
271 |                 position = position[i:i + rheight, j:j + rwidth]
272 | 
273 |     return rgb, sparse, target, position
274 | 
275 | def val_transform(rgb, sparse, target, position, args):
276 |     oheight = args.val_h
277 |     owidth = args.val_w
278 | 
279 |     transform = transforms.Compose([
280 |         transforms.BottomCrop((oheight, owidth)),
281 |     ])
282 |     if rgb is not None:
283 |         rgb = transform(rgb)
284 |     if sparse is not None:
285 |         sparse = transform(sparse)
286 |     if target is not None:
287 |         target = transform(target)
288 |     if position is not None:
289 |         position = transform(position)
290 | 
291 |     return rgb, sparse, target, position
292 | 
293 | 
294 | def no_transform(rgb, sparse, target, position, args):
295 |     return rgb, sparse, target, position
296 | 
297 | 
298 | to_tensor = transforms.ToTensor()
299 | to_float_tensor = lambda x: to_tensor(x).float()
300 | 
301 | 
302 | def handle_gray(rgb, args):
303 |     if rgb is None:
304 |         return None, None
305 |     if not args.use_g:
306 |         return rgb, None
307 |     else:
308 |         img = np.array(Image.fromarray(rgb).convert('L'))
309 |         img = np.expand_dims(img, -1)
310 |         if not args.use_rgb:
311 |             rgb_ret = None
312 |         else:
313 |             rgb_ret = rgb
314 |         return rgb_ret, img
315 | 
316 | 
317 | def get_rgb_near(path, args):
318 |     assert path is not None, "path is None"
319 | 
320 |     def extract_frame_id(filename):
321 |         head, tail = os.path.split(filename)
322 |         number_string = tail[0:tail.find('.')]
323 |         number = int(number_string)
324 |         return head, number
325 | 
326 |     def get_nearby_filename(filename, new_id):
327 |         head, _ = os.path.split(filename)
328 |         new_filename = os.path.join(head, '%010d.png' % new_id)
329 |         return new_filename
330 | 
331 |     head, number = extract_frame_id(path)
332 |     count = 0
333 |     max_frame_diff = 3
334 |     candidates = [
335 |         i - max_frame_diff for i in range(max_frame_diff * 2 + 1)
336 |         if i - max_frame_diff != 0
337 |     ]
338 |     while True:
339 |         random_offset = choice(candidates)
340 |         path_near = get_nearby_filename(path, number + random_offset)
341 |         if os.path.exists(path_near):
342 |             break
343 |         assert count < 20, "cannot find a nearby frame in 20 trials for {}".format(path_near)
344 | 
345 |     return rgb_read(path_near)
346 | 
347 | 
348 | class KittiDepth(data.Dataset):
349 |     """A data loader for the Kitti dataset
350 |     """
351 | 
352 |     def __init__(self, split, args):
353 |         self.args = args
354 |         self.split = split
355 |         paths, transform = get_paths_and_transform(split, args)
356 |         self.paths = paths
357 |         self.transform = transform
358 |         self.K = load_calib()
359 |         self.threshold_translation = 0.1
360 | 
361 |     def __getraw__(self, index):
362 |         rgb = rgb_read(self.paths['rgb'][index]) if \
363 |             (self.paths['rgb'][index] is not None and (self.args.use_rgb or self.args.use_g)) else None
364 |         sparse = depth_read(self.paths['d'][index]) if \
365 |             (self.paths['d'][index] is not None and self.args.use_d) else None
366 |         target = depth_read(self.paths['gt'][index]) if \
367 |             self.paths['gt'][index] is not None else None
368 |         return rgb, sparse, target
369 | 
370 |     def __getitem__(self, index):
371 |         rgb, sparse, target = self.__getraw__(index)
372 |         position = CoordConv.AddCoordsNp(self.args.val_h, self.args.val_w)
373 |         position = position.call()
374 |         rgb, sparse, target, position = self.transform(rgb, sparse, target, position, self.args)
375 | 
376 |         rgb, gray = handle_gray(rgb, self.args)
377 |         # candidates = {"rgb": rgb, "d": sparse, "gt": target, \
378 |         #              "g": gray, "r_mat": r_mat, "t_vec": t_vec, "rgb_near": rgb_near}
379 |         candidates = {"rgb": rgb, "d": sparse, "gt": target, \
380 |                       "g": gray, 'position': position, 'K': self.K}
381 | 
382 |         items = {
383 |             key: to_float_tensor(val)
384 |             for key, val in candidates.items() if val is not None
385 |         }
386 | 
387 |         return items
388 | 
389 |     def __len__(self):
390 |         return len(self.paths['gt'])


--------------------------------------------------------------------------------
/dataloaders/pose_estimator.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | 
  5 | def rgb2gray(rgb):
  6 |     return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])
  7 | 
  8 | 
  9 | def convert_2d_to_3d(u, v, z, K):
 10 |     v0 = K[1][2]
 11 |     u0 = K[0][2]
 12 |     fy = K[1][1]
 13 |     fx = K[0][0]
 14 |     x = (u - u0) * z / fx
 15 |     y = (v - v0) * z / fy
 16 |     return (x, y, z)
 17 | 
 18 | 
 19 | def feature_match(img1, img2):
 20 |     r''' Find features on both images and match them pairwise
 21 |    '''
 22 |     max_n_features = 1000
 23 |     # max_n_features = 500
 24 |     use_flann = False  # better not use flann
 25 | 
 26 |     detector = cv2.xfeatures2d.SIFT_create(max_n_features)
 27 | 
 28 |     # find the keypoints and descriptors with SIFT
 29 |     kp1, des1 = detector.detectAndCompute(img1, None)
 30 |     kp2, des2 = detector.detectAndCompute(img2, None)
 31 |     if (des1 is None) or (des2 is None):
 32 |         return [], []
 33 |     des1 = des1.astype(np.float32)
 34 |     des2 = des2.astype(np.float32)
 35 | 
 36 |     if use_flann:
 37 |         # FLANN parameters
 38 |         FLANN_INDEX_KDTREE = 0
 39 |         index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
 40 |         search_params = dict(checks=50)
 41 |         flann = cv2.FlannBasedMatcher(index_params, search_params)
 42 |         matches = flann.knnMatch(des1, des2, k=2)
 43 |     else:
 44 |         matcher = cv2.DescriptorMatcher().create('BruteForce')
 45 |         matches = matcher.knnMatch(des1, des2, k=2)
 46 | 
 47 |     good = []
 48 |     pts1 = []
 49 |     pts2 = []
 50 |     # ratio test as per Lowe's paper
 51 |     for i, (m, n) in enumerate(matches):
 52 |         if m.distance < 0.8 * n.distance:
 53 |             good.append(m)
 54 |             pts2.append(kp2[m.trainIdx].pt)
 55 |             pts1.append(kp1[m.queryIdx].pt)
 56 | 
 57 |     pts1 = np.int32(pts1)
 58 |     pts2 = np.int32(pts2)
 59 |     return pts1, pts2
 60 | 
 61 | 
 62 | def get_pose_pnp(rgb_curr, rgb_near, depth_curr, K):
 63 |     gray_curr = rgb2gray(rgb_curr).astype(np.uint8)
 64 |     gray_near = rgb2gray(rgb_near).astype(np.uint8)
 65 |     height, width = gray_curr.shape
 66 | 
 67 |     pts2d_curr, pts2d_near = feature_match(gray_curr,
 68 |                                            gray_near)  # feature matching
 69 | 
 70 |     # dilation of depth
 71 |     kernel = np.ones((4, 4), np.uint8)
 72 |     depth_curr_dilated = cv2.dilate(depth_curr, kernel)
 73 | 
 74 |     # extract 3d pts
 75 |     pts3d_curr = []
 76 |     pts2d_near_filtered = [
 77 |     ]  # keep only feature points with depth in the current frame
 78 |     for i, pt2d in enumerate(pts2d_curr):
 79 |         # print(pt2d)
 80 |         u, v = pt2d[0], pt2d[1]
 81 |         z = depth_curr_dilated[v, u]
 82 |         if z > 0:
 83 |             xyz_curr = convert_2d_to_3d(u, v, z, K)
 84 |             pts3d_curr.append(xyz_curr)
 85 |             pts2d_near_filtered.append(pts2d_near[i])
 86 | 
 87 |     # the minimal number of points accepted by solvePnP is 4:
 88 |     if len(pts3d_curr) >= 4 and len(pts2d_near_filtered) >= 4:
 89 |         pts3d_curr = np.expand_dims(np.array(pts3d_curr).astype(np.float32),
 90 |                                     axis=1)
 91 |         pts2d_near_filtered = np.expand_dims(
 92 |             np.array(pts2d_near_filtered).astype(np.float32), axis=1)
 93 | 
 94 |         # ransac
 95 |         ret = cv2.solvePnPRansac(pts3d_curr,
 96 |                                  pts2d_near_filtered,
 97 |                                  K,
 98 |                                  distCoeffs=None)
 99 |         success = ret[0]
100 |         rotation_vector = ret[1]
101 |         translation_vector = ret[2]
102 |         return (success, rotation_vector, translation_vector)
103 |     else:
104 |         return (0, None, None)
105 | 


--------------------------------------------------------------------------------
/dataloaders/transforms.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import torch
  3 | import math
  4 | import random
  5 | 
  6 | from PIL import Image, ImageOps, ImageEnhance
  7 | try:
  8 |     import accimage
  9 | except ImportError:
 10 |     accimage = None
 11 | 
 12 | import numpy as np
 13 | import numbers
 14 | import types
 15 | import collections
 16 | import warnings
 17 | 
 18 | import scipy.ndimage.interpolation as itpl
 19 | import skimage.transform
 20 | 
 21 | 
 22 | def _is_numpy_image(img):
 23 |     return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
 24 | 
 25 | 
 26 | def _is_pil_image(img):
 27 |     if accimage is not None:
 28 |         return isinstance(img, (Image.Image, accimage.Image))
 29 |     else:
 30 |         return isinstance(img, Image.Image)
 31 | 
 32 | 
 33 | def _is_tensor_image(img):
 34 |     return torch.is_tensor(img) and img.ndimension() == 3
 35 | 
 36 | 
 37 | def adjust_brightness(img, brightness_factor):
 38 |     """Adjust brightness of an Image.
 39 |     Args:
 40 |         img (PIL Image): PIL Image to be adjusted.
 41 |         brightness_factor (float):  How much to adjust the brightness. Can be
 42 |             any non negative number. 0 gives a black image, 1 gives the
 43 |             original image while 2 increases the brightness by a factor of 2.
 44 |     Returns:
 45 |         PIL Image: Brightness adjusted image.
 46 |     """
 47 |     if not _is_pil_image(img):
 48 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 49 | 
 50 |     enhancer = ImageEnhance.Brightness(img)
 51 |     img = enhancer.enhance(brightness_factor)
 52 |     return img
 53 | 
 54 | 
 55 | def adjust_contrast(img, contrast_factor):
 56 |     """Adjust contrast of an Image.
 57 |     Args:
 58 |         img (PIL Image): PIL Image to be adjusted.
 59 |         contrast_factor (float): How much to adjust the contrast. Can be any
 60 |             non negative number. 0 gives a solid gray image, 1 gives the
 61 |             original image while 2 increases the contrast by a factor of 2.
 62 |     Returns:
 63 |         PIL Image: Contrast adjusted image.
 64 |     """
 65 |     if not _is_pil_image(img):
 66 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 67 | 
 68 |     enhancer = ImageEnhance.Contrast(img)
 69 |     img = enhancer.enhance(contrast_factor)
 70 |     return img
 71 | 
 72 | 
 73 | def adjust_saturation(img, saturation_factor):
 74 |     """Adjust color saturation of an image.
 75 |     Args:
 76 |         img (PIL Image): PIL Image to be adjusted.
 77 |         saturation_factor (float):  How much to adjust the saturation. 0 will
 78 |             give a black and white image, 1 will give the original image while
 79 |             2 will enhance the saturation by a factor of 2.
 80 |     Returns:
 81 |         PIL Image: Saturation adjusted image.
 82 |     """
 83 |     if not _is_pil_image(img):
 84 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 85 | 
 86 |     enhancer = ImageEnhance.Color(img)
 87 |     img = enhancer.enhance(saturation_factor)
 88 |     return img
 89 | 
 90 | 
 91 | def adjust_hue(img, hue_factor):
 92 |     """Adjust hue of an image.
 93 |     The image hue is adjusted by converting the image to HSV and
 94 |     cyclically shifting the intensities in the hue channel (H).
 95 |     The image is then converted back to original image mode.
 96 |     `hue_factor` is the amount of shift in H channel and must be in the
 97 |     interval `[-0.5, 0.5]`.
 98 |     See https://en.wikipedia.org/wiki/Hue for more details on Hue.
 99 |     Args:
100 |         img (PIL Image): PIL Image to be adjusted.
101 |         hue_factor (float):  How much to shift the hue channel. Should be in
102 |             [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
103 |             HSV space in positive and negative direction respectively.
104 |             0 means no shift. Therefore, both -0.5 and 0.5 will give an image
105 |             with complementary colors while 0 gives the original image.
106 |     Returns:
107 |         PIL Image: Hue adjusted image.
108 |     """
109 |     if not (-0.5 <= hue_factor <= 0.5):
110 |         raise ValueError(
111 |             'hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
112 | 
113 |     if not _is_pil_image(img):
114 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
115 | 
116 |     input_mode = img.mode
117 |     if input_mode in {'L', '1', 'I', 'F'}:
118 |         return img
119 | 
120 |     h, s, v = img.convert('HSV').split()
121 | 
122 |     np_h = np.array(h, dtype=np.uint8)
123 |     # uint8 addition take cares of rotation across boundaries
124 |     with np.errstate(over='ignore'):
125 |         np_h += np.uint8(hue_factor * 255)
126 |     h = Image.fromarray(np_h, 'L')
127 | 
128 |     img = Image.merge('HSV', (h, s, v)).convert(input_mode)
129 |     return img
130 | 
131 | 
132 | def adjust_gamma(img, gamma, gain=1):
133 |     """Perform gamma correction on an image.
134 |     Also known as Power Law Transform. Intensities in RGB mode are adjusted
135 |     based on the following equation:
136 |         I_out = 255 * gain * ((I_in / 255) ** gamma)
137 |     See https://en.wikipedia.org/wiki/Gamma_correction for more details.
138 |     Args:
139 |         img (PIL Image): PIL Image to be adjusted.
140 |         gamma (float): Non negative real number. gamma larger than 1 make the
141 |             shadows darker, while gamma smaller than 1 make dark regions
142 |             lighter.
143 |         gain (float): The constant multiplier.
144 |     """
145 |     if not _is_pil_image(img):
146 |         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
147 | 
148 |     if gamma < 0:
149 |         raise ValueError('Gamma should be a non-negative real number')
150 | 
151 |     input_mode = img.mode
152 |     img = img.convert('RGB')
153 | 
154 |     np_img = np.array(img, dtype=np.float32)
155 |     np_img = 255 * gain * ((np_img / 255)**gamma)
156 |     np_img = np.uint8(np.clip(np_img, 0, 255))
157 | 
158 |     img = Image.fromarray(np_img, 'RGB').convert(input_mode)
159 |     return img
160 | 
161 | 
162 | class Compose(object):
163 |     """Composes several transforms together.
164 |     Args:
165 |         transforms (list of ``Transform`` objects): list of transforms to compose.
166 |     Example:
167 |         >>> transforms.Compose([
168 |         >>>     transforms.CenterCrop(10),
169 |         >>>     transforms.ToTensor(),
170 |         >>> ])
171 |     """
172 |     def __init__(self, transforms):
173 |         self.transforms = transforms
174 | 
175 |     def __call__(self, img):
176 |         for t in self.transforms:
177 |             img = t(img)
178 |         return img
179 | 
180 | 
181 | class ToTensor(object):
182 |     """Convert a ``numpy.ndarray`` to tensor.
183 |     Converts a numpy.ndarray (H x W x C) to a torch.FloatTensor of shape (C x H x W).
184 |     """
185 |     def __call__(self, img):
186 |         """Convert a ``numpy.ndarray`` to tensor.
187 |         Args:
188 |             img (numpy.ndarray): Image to be converted to tensor.
189 |         Returns:
190 |             Tensor: Converted image.
191 |         """
192 |         if not (_is_numpy_image(img)):
193 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
194 | 
195 |         if isinstance(img, np.ndarray):
196 |             # handle numpy array
197 |             if img.ndim == 3:
198 |                 img = torch.from_numpy(img.transpose((2, 0, 1)).copy())
199 |             elif img.ndim == 2:
200 |                 img = torch.from_numpy(img.copy())
201 |             else:
202 |                 raise RuntimeError(
203 |                     'img should be ndarray with 2 or 3 dimensions. Got {}'.
204 |                     format(img.ndim))
205 | 
206 |             return img
207 | 
208 | 
209 | class NormalizeNumpyArray(object):
210 |     """Normalize a ``numpy.ndarray`` with mean and standard deviation.
211 |     Given mean: ``(M1,...,Mn)`` and std: ``(M1,..,Mn)`` for ``n`` channels, this transform
212 |     will normalize each channel of the input ``numpy.ndarray`` i.e.
213 |     ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
214 |     Args:
215 |         mean (sequence): Sequence of means for each channel.
216 |         std (sequence): Sequence of standard deviations for each channel.
217 |     """
218 |     def __init__(self, mean, std):
219 |         self.mean = mean
220 |         self.std = std
221 | 
222 |     def __call__(self, img):
223 |         """
224 |         Args:
225 |             img (numpy.ndarray): Image of size (H, W, C) to be normalized.
226 |         Returns:
227 |             Tensor: Normalized image.
228 |         """
229 |         if not (_is_numpy_image(img)):
230 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
231 |         # TODO: make efficient
232 |         print(img.shape)
233 |         for i in range(3):
234 |             img[:, :, i] = (img[:, :, i] - self.mean[i]) / self.std[i]
235 |         return img
236 | 
237 | 
238 | class NormalizeTensor(object):
239 |     """Normalize an tensor image with mean and standard deviation.
240 |     Given mean: ``(M1,...,Mn)`` and std: ``(M1,..,Mn)`` for ``n`` channels, this transform
241 |     will normalize each channel of the input ``torch.*Tensor`` i.e.
242 |     ``input[channel] = (input[channel] - mean[channel]) / std[channel]``
243 |     Args:
244 |         mean (sequence): Sequence of means for each channel.
245 |         std (sequence): Sequence of standard deviations for each channel.
246 |     """
247 |     def __init__(self, mean, std):
248 |         self.mean = mean
249 |         self.std = std
250 | 
251 |     def __call__(self, tensor):
252 |         """
253 |         Args:
254 |             tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
255 |         Returns:
256 |             Tensor: Normalized Tensor image.
257 |         """
258 |         if not _is_tensor_image(tensor):
259 |             raise TypeError('tensor is not a torch image.')
260 |         # TODO: make efficient
261 |         for t, m, s in zip(tensor, self.mean, self.std):
262 |             t.sub_(m).div_(s)
263 |         return tensor
264 | 
265 | 
266 | class Rotate(object):
267 |     """Rotates the given ``numpy.ndarray``.
268 |     Args:
269 |         angle (float): The rotation angle in degrees.
270 |     """
271 |     def __init__(self, angle):
272 |         self.angle = angle
273 | 
274 |     def __call__(self, img):
275 |         """
276 |         Args:
277 |             img (numpy.ndarray (C x H x W)): Image to be rotated.
278 |         Returns:
279 |             img (numpy.ndarray (C x H x W)): Rotated image.
280 |         """
281 | 
282 |         # order=0 means nearest-neighbor type interpolation
283 |         return skimage.transform.rotate(img, self.angle, resize=False, order=0)
284 | 
285 | 
286 | class Resize(object):
287 |     """Resize the the given ``numpy.ndarray`` to the given size.
288 |     Args:
289 |         size (sequence or int): Desired output size. If size is a sequence like
290 |             (h, w), output size will be matched to this. If size is an int,
291 |             smaller edge of the image will be matched to this number.
292 |             i.e, if height > width, then image will be rescaled to
293 |             (size * height / width, size)
294 |         interpolation (int, optional): Desired interpolation. Default is
295 |             ``PIL.Image.BILINEAR``
296 |     """
297 |     def __init__(self, size, interpolation='nearest'):
298 |         assert isinstance(size, float)
299 |         self.size = size
300 |         self.interpolation = interpolation
301 | 
302 |     def __call__(self, img):
303 |         """
304 |         Args:
305 |             img (numpy.ndarray (C x H x W)): Image to be scaled.
306 |         Returns:
307 |             img (numpy.ndarray (C x H x W)): Rescaled image.
308 |         """
309 |         if img.ndim == 3:
310 |             return skimage.transform.rescale(img, self.size, order=0)
311 |         elif img.ndim == 2:
312 |             return skimage.transform.rescale(img, self.size, order=0)
313 |         else:
314 |             RuntimeError(
315 |                 'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
316 |                     img.ndim))
317 | 
318 | 
319 | class CenterCrop(object):
320 |     """Crops the given ``numpy.ndarray`` at the center.
321 |     Args:
322 |         size (sequence or int): Desired output size of the crop. If size is an
323 |             int instead of sequence like (h, w), a square crop (size, size) is
324 |             made.
325 |     """
326 |     def __init__(self, size):
327 |         if isinstance(size, numbers.Number):
328 |             self.size = (int(size), int(size))
329 |         else:
330 |             self.size = size
331 | 
332 |     @staticmethod
333 |     def get_params(img, output_size):
334 |         """Get parameters for ``crop`` for center crop.
335 |         Args:
336 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
337 |             output_size (tuple): Expected output size of the crop.
338 |         Returns:
339 |             tuple: params (i, j, h, w) to be passed to ``crop`` for center crop.
340 |         """
341 |         h = img.shape[0]
342 |         w = img.shape[1]
343 |         th, tw = output_size
344 |         i = int(round((h - th) / 2.))
345 |         j = int(round((w - tw) / 2.))
346 | 
347 |         # # randomized cropping
348 |         # i = np.random.randint(i-3, i+4)
349 |         # j = np.random.randint(j-3, j+4)
350 | 
351 |         return i, j, th, tw
352 | 
353 |     def __call__(self, img):
354 |         """
355 |         Args:
356 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
357 |         Returns:
358 |             img (numpy.ndarray (C x H x W)): Cropped image.
359 |         """
360 |         i, j, h, w = self.get_params(img, self.size)
361 |         """
362 |         i: Upper pixel coordinate.
363 |         j: Left pixel coordinate.
364 |         h: Height of the cropped image.
365 |         w: Width of the cropped image.
366 |         """
367 |         if not (_is_numpy_image(img)):
368 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
369 |         if img.ndim == 3:
370 |             return img[i:i + h, j:j + w, :]
371 |         elif img.ndim == 2:
372 |             return img[i:i + h, j:j + w]
373 |         else:
374 |             raise RuntimeError(
375 |                 'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
376 |                     img.ndim))
377 | 
378 | 
379 | class BottomCrop(object):
380 |     """Crops the given ``numpy.ndarray`` at the bottom.
381 |     Args:
382 |         size (sequence or int): Desired output size of the crop. If size is an
383 |             int instead of sequence like (h, w), a square crop (size, size) is
384 |             made.
385 |     """
386 |     def __init__(self, size):
387 |         if isinstance(size, numbers.Number):
388 |             self.size = (int(size), int(size))
389 |         else:
390 |             self.size = size
391 | 
392 |     @staticmethod
393 |     def get_params(img, output_size):
394 |         """Get parameters for ``crop`` for bottom crop.
395 |         Args:
396 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
397 |             output_size (tuple): Expected output size of the crop.
398 |         Returns:
399 |             tuple: params (i, j, h, w) to be passed to ``crop`` for bottom crop.
400 |         """
401 |         h = img.shape[0]
402 |         w = img.shape[1]
403 |         th, tw = output_size
404 |         i = h - th
405 |         j = int(round((w - tw) / 2.))
406 | 
407 |         # randomized left and right cropping
408 |         # i = np.random.randint(i-3, i+4)
409 |         # j = np.random.randint(j-1, j+1)
410 | 
411 |         return i, j, th, tw
412 | 
413 |     def __call__(self, img):
414 |         """
415 |         Args:
416 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
417 |         Returns:
418 |             img (numpy.ndarray (C x H x W)): Cropped image.
419 |         """
420 |         i, j, h, w = self.get_params(img, self.size)
421 |         """
422 |         i: Upper pixel coordinate.
423 |         j: Left pixel coordinate.
424 |         h: Height of the cropped image.
425 |         w: Width of the cropped image.
426 |         """
427 |         if not (_is_numpy_image(img)):
428 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
429 |         if img.ndim == 3:
430 |             return img[i:i + h, j:j + w, :]
431 |         elif img.ndim == 2:
432 |             return img[i:i + h, j:j + w]
433 |         else:
434 |             raise RuntimeError(
435 |                 'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
436 |                     img.ndim))
437 | 
438 | 
439 | class RandomCrop(object):
440 |     """Crops the given ``numpy.ndarray`` at the bottom.
441 |     Args:
442 |         size (sequence or int): Desired output size of the crop. If size is an
443 |             int instead of sequence like (h, w), a square crop (size, size) is
444 |             made.
445 |     """
446 |     def __init__(self, size):
447 |         if isinstance(size, numbers.Number):
448 |             self.size = (int(size), int(size))
449 |         else:
450 |             self.size = size
451 | 
452 |     @staticmethod
453 |     def get_params(img, output_size):
454 |         """Get parameters for ``crop`` for bottom crop.
455 |         Args:
456 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
457 |             output_size (tuple): Expected output size of the crop.
458 |         Returns:
459 |             tuple: params (i, j, h, w) to be passed to ``crop`` for bottom crop.
460 |         """
461 |         h = img.shape[0]
462 |         w = img.shape[1]
463 |         th, tw = output_size
464 | 
465 |         # randomized left and right cropping
466 |         i = np.random.randint(0, h-th+1)
467 |         j = np.random.randint(0, w-tw+1)
468 | 
469 |         return i, j, th, tw
470 | 
471 |     def __call__(self, img):
472 |         """
473 |         Args:
474 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
475 |         Returns:
476 |             img (numpy.ndarray (C x H x W)): Cropped image.
477 |         """
478 |         i, j, h, w = self.get_params(img, self.size)
479 |         """
480 |         i: Upper pixel coordinate.
481 |         j: Left pixel coordinate.
482 |         h: Height of the cropped image.
483 |         w: Width of the cropped image.
484 |         """
485 |         if not (_is_numpy_image(img)):
486 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
487 |         if img.ndim == 3:
488 |             return img[i:i + h, j:j + w, :]
489 |         elif img.ndim == 2:
490 |             return img[i:i + h, j:j + w]
491 |         else:
492 |             raise RuntimeError(
493 |                 'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
494 |                     img.ndim))
495 | 
496 | 
497 | class Crop(object):
498 |     """Crops the given ``numpy.ndarray`` at the center.
499 |     Args:
500 |         size (sequence or int): Desired output size of the crop. If size is an
501 |             int instead of sequence like (h, w), a square crop (size, size) is
502 |             made.
503 |     """
504 |     def __init__(self, crop):
505 |         self.crop = crop
506 | 
507 |     @staticmethod
508 |     def get_params(img, crop):
509 |         """Get parameters for ``crop`` for center crop.
510 |         Args:
511 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
512 |             output_size (tuple): Expected output size of the crop.
513 |         Returns:
514 |             tuple: params (i, j, h, w) to be passed to ``crop`` for center crop.
515 |         """
516 |         x_l, x_r, y_b, y_t = crop
517 |         h = img.shape[0]
518 |         w = img.shape[1]
519 |         assert x_l >= 0 and x_l < w
520 |         assert x_r >= 0 and x_r < w
521 |         assert y_b >= 0 and y_b < h
522 |         assert y_t >= 0 and y_t < h
523 |         assert x_l < x_r and y_b < y_t
524 | 
525 |         return x_l, x_r, y_b, y_t
526 | 
527 |     def __call__(self, img):
528 |         """
529 |         Args:
530 |             img (numpy.ndarray (C x H x W)): Image to be cropped.
531 |         Returns:
532 |             img (numpy.ndarray (C x H x W)): Cropped image.
533 |         """
534 |         x_l, x_r, y_b, y_t = self.get_params(img, self.crop)
535 |         """
536 |         i: Upper pixel coordinate.
537 |         j: Left pixel coordinate.
538 |         h: Height of the cropped image.
539 |         w: Width of the cropped image.
540 |         """
541 |         if not (_is_numpy_image(img)):
542 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
543 |         if img.ndim == 3:
544 |             return img[y_b:y_t, x_l:x_r, :]
545 |         elif img.ndim == 2:
546 |             return img[y_b:y_t, x_l:x_r]
547 |         else:
548 |             raise RuntimeError(
549 |                 'img should be ndarray with 2 or 3 dimensions. Got {}'.format(
550 |                     img.ndim))
551 | 
552 | 
553 | class Lambda(object):
554 |     """Apply a user-defined lambda as a transform.
555 |     Args:
556 |         lambd (function): Lambda/function to be used for transform.
557 |     """
558 |     def __init__(self, lambd):
559 |         assert isinstance(lambd, types.LambdaType)
560 |         self.lambd = lambd
561 | 
562 |     def __call__(self, img):
563 |         return self.lambd(img)
564 | 
565 | 
566 | class HorizontalFlip(object):
567 |     """Horizontally flip the given ``numpy.ndarray``.
568 |     Args:
569 |         do_flip (boolean): whether or not do horizontal flip.
570 |     """
571 |     def __init__(self, do_flip):
572 |         self.do_flip = do_flip
573 | 
574 |     def __call__(self, img):
575 |         """
576 |         Args:
577 |             img (numpy.ndarray (C x H x W)): Image to be flipped.
578 |         Returns:
579 |             img (numpy.ndarray (C x H x W)): flipped image.
580 |         """
581 |         if not (_is_numpy_image(img)):
582 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
583 | 
584 |         if self.do_flip:
585 |             return np.fliplr(img)
586 |         else:
587 |             return img
588 | 
589 | 
590 | class ColorJitter(object):
591 |     """Randomly change the brightness, contrast and saturation of an image.
592 |     Args:
593 |         brightness (float): How much to jitter brightness. brightness_factor
594 |             is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
595 |         contrast (float): How much to jitter contrast. contrast_factor
596 |             is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
597 |         saturation (float): How much to jitter saturation. saturation_factor
598 |             is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
599 |         hue(float): How much to jitter hue. hue_factor is chosen uniformly from
600 |             [-hue, hue]. Should be >=0 and <= 0.5.
601 |     """
602 |     def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
603 |         transforms = []
604 |         transforms.append(
605 |             Lambda(lambda img: adjust_brightness(img, brightness)))
606 |         transforms.append(Lambda(lambda img: adjust_contrast(img, contrast)))
607 |         transforms.append(
608 |             Lambda(lambda img: adjust_saturation(img, saturation)))
609 |         transforms.append(Lambda(lambda img: adjust_hue(img, hue)))
610 |         np.random.shuffle(transforms)
611 |         self.transform = Compose(transforms)
612 | 
613 |     def __call__(self, img):
614 |         """
615 |         Args:
616 |             img (numpy.ndarray (C x H x W)): Input image.
617 |         Returns:
618 |             img (numpy.ndarray (C x H x W)): Color jittered image.
619 |         """
620 |         if not (_is_numpy_image(img)):
621 |             raise TypeError('img should be ndarray. Got {}'.format(type(img)))
622 | 
623 |         pil = Image.fromarray(img)
624 |         return np.array(self.transform(pil))


--------------------------------------------------------------------------------
/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/demo.gif


--------------------------------------------------------------------------------
/download/rgb_train_downloader.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | files=(
  4 | # 2011_09_26_calib.zip
  5 | 2011_09_26_drive_0001
  6 | # 2011_09_26_drive_0002
  7 | # 2011_09_26_drive_0005
  8 | 2011_09_26_drive_0009
  9 | 2011_09_26_drive_0011
 10 | # 2011_09_26_drive_0013
 11 | 2011_09_26_drive_0014
 12 | 2011_09_26_drive_0015
 13 | 2011_09_26_drive_0017
 14 | 2011_09_26_drive_0018
 15 | 2011_09_26_drive_0019
 16 | # 2011_09_26_drive_0020
 17 | 2011_09_26_drive_0022
 18 | # 2011_09_26_drive_0023
 19 | 2011_09_26_drive_0027
 20 | 2011_09_26_drive_0028
 21 | 2011_09_26_drive_0029
 22 | 2011_09_26_drive_0032
 23 | 2011_09_26_drive_0035
 24 | # 2011_09_26_drive_0036
 25 | 2011_09_26_drive_0039
 26 | 2011_09_26_drive_0046
 27 | 2011_09_26_drive_0048
 28 | 2011_09_26_drive_0051
 29 | 2011_09_26_drive_0052
 30 | 2011_09_26_drive_0056
 31 | 2011_09_26_drive_0057
 32 | 2011_09_26_drive_0059
 33 | 2011_09_26_drive_0060
 34 | 2011_09_26_drive_0061
 35 | 2011_09_26_drive_0064
 36 | 2011_09_26_drive_0070
 37 | # 2011_09_26_drive_0079
 38 | 2011_09_26_drive_0084
 39 | 2011_09_26_drive_0086
 40 | 2011_09_26_drive_0087
 41 | 2011_09_26_drive_0091
 42 | 2011_09_26_drive_0093
 43 | # 2011_09_26_drive_0095
 44 | 2011_09_26_drive_0096
 45 | 2011_09_26_drive_0101
 46 | 2011_09_26_drive_0104
 47 | 2011_09_26_drive_0106
 48 | # 2011_09_26_drive_0113
 49 | 2011_09_26_drive_0117
 50 | # 2011_09_26_drive_0119
 51 | # 2011_09_28_calib.zip
 52 | 2011_09_28_drive_0001
 53 | 2011_09_28_drive_0002
 54 | 2011_09_28_drive_0016
 55 | 2011_09_28_drive_0021
 56 | 2011_09_28_drive_0034
 57 | 2011_09_28_drive_0035
 58 | # 2011_09_28_drive_0037
 59 | 2011_09_28_drive_0038
 60 | 2011_09_28_drive_0039
 61 | 2011_09_28_drive_0043
 62 | 2011_09_28_drive_0045
 63 | 2011_09_28_drive_0047
 64 | 2011_09_28_drive_0053
 65 | 2011_09_28_drive_0054
 66 | 2011_09_28_drive_0057
 67 | 2011_09_28_drive_0065
 68 | 2011_09_28_drive_0066
 69 | 2011_09_28_drive_0068
 70 | 2011_09_28_drive_0070
 71 | 2011_09_28_drive_0071
 72 | 2011_09_28_drive_0075
 73 | 2011_09_28_drive_0077
 74 | 2011_09_28_drive_0078
 75 | 2011_09_28_drive_0080
 76 | 2011_09_28_drive_0082
 77 | 2011_09_28_drive_0086
 78 | 2011_09_28_drive_0087
 79 | 2011_09_28_drive_0089
 80 | 2011_09_28_drive_0090
 81 | 2011_09_28_drive_0094
 82 | 2011_09_28_drive_0095
 83 | 2011_09_28_drive_0096
 84 | 2011_09_28_drive_0098
 85 | 2011_09_28_drive_0100
 86 | 2011_09_28_drive_0102
 87 | 2011_09_28_drive_0103
 88 | 2011_09_28_drive_0104
 89 | 2011_09_28_drive_0106
 90 | 2011_09_28_drive_0108
 91 | 2011_09_28_drive_0110
 92 | 2011_09_28_drive_0113
 93 | 2011_09_28_drive_0117
 94 | 2011_09_28_drive_0119
 95 | 2011_09_28_drive_0121
 96 | 2011_09_28_drive_0122
 97 | 2011_09_28_drive_0125
 98 | 2011_09_28_drive_0126
 99 | 2011_09_28_drive_0128
100 | 2011_09_28_drive_0132
101 | 2011_09_28_drive_0134
102 | 2011_09_28_drive_0135
103 | 2011_09_28_drive_0136
104 | 2011_09_28_drive_0138
105 | 2011_09_28_drive_0141
106 | 2011_09_28_drive_0143
107 | 2011_09_28_drive_0145
108 | 2011_09_28_drive_0146
109 | 2011_09_28_drive_0149
110 | 2011_09_28_drive_0153
111 | 2011_09_28_drive_0154
112 | 2011_09_28_drive_0155
113 | 2011_09_28_drive_0156
114 | 2011_09_28_drive_0160
115 | 2011_09_28_drive_0161
116 | 2011_09_28_drive_0162
117 | 2011_09_28_drive_0165
118 | 2011_09_28_drive_0166
119 | 2011_09_28_drive_0167
120 | 2011_09_28_drive_0168
121 | 2011_09_28_drive_0171
122 | 2011_09_28_drive_0174
123 | 2011_09_28_drive_0177
124 | 2011_09_28_drive_0179
125 | 2011_09_28_drive_0183
126 | 2011_09_28_drive_0184
127 | 2011_09_28_drive_0185
128 | 2011_09_28_drive_0186
129 | 2011_09_28_drive_0187
130 | 2011_09_28_drive_0191
131 | 2011_09_28_drive_0192
132 | 2011_09_28_drive_0195
133 | 2011_09_28_drive_0198
134 | 2011_09_28_drive_0199
135 | 2011_09_28_drive_0201
136 | 2011_09_28_drive_0204
137 | 2011_09_28_drive_0205
138 | 2011_09_28_drive_0208
139 | 2011_09_28_drive_0209
140 | 2011_09_28_drive_0214
141 | 2011_09_28_drive_0216
142 | 2011_09_28_drive_0220
143 | 2011_09_28_drive_0222
144 | # 2011_09_28_drive_0225
145 | # 2011_09_29_calib.zip
146 | 2011_09_29_drive_0004
147 | # 2011_09_29_drive_0026
148 | 2011_09_29_drive_0071
149 | # 2011_09_29_drive_0108
150 | # 2011_09_30_calib.zip
151 | # 2011_09_30_drive_0016
152 | 2011_09_30_drive_0018
153 | 2011_09_30_drive_0020
154 | 2011_09_30_drive_0027
155 | 2011_09_30_drive_0028
156 | 2011_09_30_drive_0033
157 | 2011_09_30_drive_0034
158 | # 2011_09_30_drive_0072
159 | # 2011_10_03_calib.zip
160 | 2011_10_03_drive_0027
161 | 2011_10_03_drive_0034
162 | 2011_10_03_drive_0042
163 | # 2011_10_03_drive_0047
164 | # 2011_10_03_drive_0058
165 | )
166 | 
167 | basedir='../data/data_rgb/train/'
168 | mkdir -p $basedir
169 | echo "Saving to "$basedir
170 | for i in ${files[@]}; do
171 |   datadate="${i%%_drive_*}"
172 |   echo $datadate
173 |   shortname=$i'_sync.zip'
174 |   fullname=$i'/'$i'_sync.zip'
175 |   rm -f $shortname # remove zip file
176 |   echo "Downloading: "$shortname
177 | 
178 |   wget 's3.eu-central-1.amazonaws.com/avg-kitti/raw_data/'$fullname
179 |   unzip -o $shortname
180 |   mv $datadate'/'$i'_sync' $basedir$i'_sync'
181 |   rmdir $datadate
182 |   rm -rf $basedir$i'_sync/image_00' $basedir$i'_sync/image_01' $basedir$i'_sync/velodyne_points' $basedir$i'_sync/oxts'
183 |   rm $shortname # remove zip file
184 | done
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/download/rgb_val_downloader.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | files=(
  4 | # 2011_09_26_calib.zip
  5 | # 2011_09_26_drive_0001
  6 | 2011_09_26_drive_0002
  7 | 2011_09_26_drive_0005
  8 | # 2011_09_26_drive_0009
  9 | # 2011_09_26_drive_0011
 10 | 2011_09_26_drive_0013
 11 | # 2011_09_26_drive_0014
 12 | # 2011_09_26_drive_0015
 13 | # 2011_09_26_drive_0017
 14 | # 2011_09_26_drive_0018
 15 | # 2011_09_26_drive_0019
 16 | 2011_09_26_drive_0020
 17 | # 2011_09_26_drive_0022
 18 | 2011_09_26_drive_0023
 19 | # 2011_09_26_drive_0027
 20 | # 2011_09_26_drive_0028
 21 | # 2011_09_26_drive_0029
 22 | # 2011_09_26_drive_0032
 23 | # 2011_09_26_drive_0035
 24 | 2011_09_26_drive_0036
 25 | # 2011_09_26_drive_0039
 26 | # 2011_09_26_drive_0046
 27 | # 2011_09_26_drive_0048
 28 | # 2011_09_26_drive_0051
 29 | # 2011_09_26_drive_0052
 30 | # 2011_09_26_drive_0056
 31 | # 2011_09_26_drive_0057
 32 | # 2011_09_26_drive_0059
 33 | # 2011_09_26_drive_0060
 34 | # 2011_09_26_drive_0061
 35 | # 2011_09_26_drive_0064
 36 | # 2011_09_26_drive_0070
 37 | 2011_09_26_drive_0079
 38 | # 2011_09_26_drive_0084
 39 | # 2011_09_26_drive_0086
 40 | # 2011_09_26_drive_0087
 41 | # 2011_09_26_drive_0091
 42 | # 2011_09_26_drive_0093
 43 | 2011_09_26_drive_0095
 44 | # 2011_09_26_drive_0096
 45 | # 2011_09_26_drive_0101
 46 | # 2011_09_26_drive_0104
 47 | # 2011_09_26_drive_0106
 48 | 2011_09_26_drive_0113
 49 | # 2011_09_26_drive_0117
 50 | 2011_09_26_drive_0119
 51 | # 2011_09_28_calib.zip
 52 | # 2011_09_28_drive_0001
 53 | # 2011_09_28_drive_0002
 54 | # 2011_09_28_drive_0016
 55 | # 2011_09_28_drive_0021
 56 | # 2011_09_28_drive_0034
 57 | # 2011_09_28_drive_0035
 58 | 2011_09_28_drive_0037
 59 | # 2011_09_28_drive_0038
 60 | # 2011_09_28_drive_0039
 61 | # 2011_09_28_drive_0043
 62 | # 2011_09_28_drive_0045
 63 | # 2011_09_28_drive_0047
 64 | # 2011_09_28_drive_0053
 65 | # 2011_09_28_drive_0054
 66 | # 2011_09_28_drive_0057
 67 | # 2011_09_28_drive_0065
 68 | # 2011_09_28_drive_0066
 69 | # 2011_09_28_drive_0068
 70 | # 2011_09_28_drive_0070
 71 | # 2011_09_28_drive_0071
 72 | # 2011_09_28_drive_0075
 73 | # 2011_09_28_drive_0077
 74 | # 2011_09_28_drive_0078
 75 | # 2011_09_28_drive_0080
 76 | # 2011_09_28_drive_0082
 77 | # 2011_09_28_drive_0086
 78 | # 2011_09_28_drive_0087
 79 | # 2011_09_28_drive_0089
 80 | # 2011_09_28_drive_0090
 81 | # 2011_09_28_drive_0094
 82 | # 2011_09_28_drive_0095
 83 | # 2011_09_28_drive_0096
 84 | # 2011_09_28_drive_0098
 85 | # 2011_09_28_drive_0100
 86 | # 2011_09_28_drive_0102
 87 | # 2011_09_28_drive_0103
 88 | # 2011_09_28_drive_0104
 89 | # 2011_09_28_drive_0106
 90 | # 2011_09_28_drive_0108
 91 | # 2011_09_28_drive_0110
 92 | # 2011_09_28_drive_0113
 93 | # 2011_09_28_drive_0117
 94 | # 2011_09_28_drive_0119
 95 | # 2011_09_28_drive_0121
 96 | # 2011_09_28_drive_0122
 97 | # 2011_09_28_drive_0125
 98 | # 2011_09_28_drive_0126
 99 | # 2011_09_28_drive_0128
100 | # 2011_09_28_drive_0132
101 | # 2011_09_28_drive_0134
102 | # 2011_09_28_drive_0135
103 | # 2011_09_28_drive_0136
104 | # 2011_09_28_drive_0138
105 | # 2011_09_28_drive_0141
106 | # 2011_09_28_drive_0143
107 | # 2011_09_28_drive_0145
108 | # 2011_09_28_drive_0146
109 | # 2011_09_28_drive_0149
110 | # 2011_09_28_drive_0153
111 | # 2011_09_28_drive_0154
112 | # 2011_09_28_drive_0155
113 | # 2011_09_28_drive_0156
114 | # 2011_09_28_drive_0160
115 | # 2011_09_28_drive_0161
116 | # 2011_09_28_drive_0162
117 | # 2011_09_28_drive_0165
118 | # 2011_09_28_drive_0166
119 | # 2011_09_28_drive_0167
120 | # 2011_09_28_drive_0168
121 | # 2011_09_28_drive_0171
122 | # 2011_09_28_drive_0174
123 | # 2011_09_28_drive_0177
124 | # 2011_09_28_drive_0179
125 | # 2011_09_28_drive_0183
126 | # 2011_09_28_drive_0184
127 | # 2011_09_28_drive_0185
128 | # 2011_09_28_drive_0186
129 | # 2011_09_28_drive_0187
130 | # 2011_09_28_drive_0191
131 | # 2011_09_28_drive_0192
132 | # 2011_09_28_drive_0195
133 | # 2011_09_28_drive_0198
134 | # 2011_09_28_drive_0199
135 | # 2011_09_28_drive_0201
136 | # 2011_09_28_drive_0204
137 | # 2011_09_28_drive_0205
138 | # 2011_09_28_drive_0208
139 | # 2011_09_28_drive_0209
140 | # 2011_09_28_drive_0214
141 | # 2011_09_28_drive_0216
142 | # 2011_09_28_drive_0220
143 | # 2011_09_28_drive_0222
144 | 2011_09_28_drive_0225
145 | # 2011_09_29_calib.zip
146 | # 2011_09_29_drive_0004
147 | 2011_09_29_drive_0026
148 | # 2011_09_29_drive_0071
149 | 2011_09_29_drive_0108
150 | # 2011_09_30_calib.zip
151 | 2011_09_30_drive_0016
152 | # 2011_09_30_drive_0018
153 | # 2011_09_30_drive_0020
154 | # 2011_09_30_drive_0027
155 | # 2011_09_30_drive_0028
156 | # 2011_09_30_drive_0033
157 | # 2011_09_30_drive_0034
158 | 2011_09_30_drive_0072
159 | # 2011_10_03_calib.zip
160 | # 2011_10_03_drive_0027
161 | # 2011_10_03_drive_0034
162 | # 2011_10_03_drive_0042
163 | 2011_10_03_drive_0047
164 | 2011_10_03_drive_0058
165 | )
166 | 
167 | basedir='../data/data_rgb/val/'
168 | mkdir -p $basedir
169 | echo "Saving to "$basedir
170 | for i in ${files[@]}; do
171 |   datadate="${i%%_drive_*}"
172 |   echo $datadate
173 |   shortname=$i'_sync.zip'
174 |   fullname=$i'/'$i'_sync.zip'
175 |   rm -f $shortname # remove zip file
176 |   echo "Downloading: "$shortname
177 | 
178 |   wget 's3.eu-central-1.amazonaws.com/avg-kitti/raw_data/'$fullname
179 |   unzip -o $shortname
180 |   mv $datadate'/'$i'_sync' $basedir$i'_sync'
181 |   rmdir $datadate
182 |   rm -rf $basedir$i'_sync/image_00' $basedir$i'_sync/image_01' $basedir$i'_sync/velodyne_points' $basedir$i'_sync/oxts'
183 |   rm $shortname # remove zip file
184 | done
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/helper.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os, time
  3 | import shutil
  4 | import torch
  5 | import csv
  6 | import vis_utils
  7 | from metrics import Result
  8 | 
  9 | fieldnames = [
 10 |     'epoch', 'rmse', 'photo', 'mae', 'irmse', 'imae', 'mse', 'absrel', 'lg10',
 11 |     'silog', 'squared_rel', 'delta1', 'delta2', 'delta3', 'data_time',
 12 |     'gpu_time'
 13 | ]
 14 | 
 15 | 
 16 | class logger:
 17 |     def __init__(self, args, prepare=True):
 18 |         self.args = args
 19 |         output_directory = get_folder_name(args)
 20 |         self.output_directory = output_directory
 21 |         self.best_result = Result()
 22 |         self.best_result.set_to_worst()
 23 | 
 24 |         if not prepare:
 25 |             return
 26 |         if not os.path.exists(output_directory):
 27 |             os.makedirs(output_directory)
 28 |         self.train_csv = os.path.join(output_directory, 'train.csv')
 29 |         self.val_csv = os.path.join(output_directory, 'val.csv')
 30 |         self.best_txt = os.path.join(output_directory, 'best.txt')
 31 | 
 32 |         # backup the source code
 33 |         if args.resume == '':
 34 |             print("=> creating source code backup ...")
 35 |             backup_directory = os.path.join(output_directory, "code_backup")
 36 |             self.backup_directory = backup_directory
 37 |             backup_source_code(backup_directory)
 38 |             # create new csv files with only header
 39 |             with open(self.train_csv, 'w') as csvfile:
 40 |                 writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 41 |                 writer.writeheader()
 42 |             with open(self.val_csv, 'w') as csvfile:
 43 |                 writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 44 |                 writer.writeheader()
 45 |             print("=> finished creating source code backup.")
 46 | 
 47 |     def conditional_print(self, split, i, epoch, lr, n_set, blk_avg_meter,
 48 |                           avg_meter):
 49 |         if (i + 1) % self.args.print_freq == 0:
 50 |             avg = avg_meter.average()
 51 |             blk_avg = blk_avg_meter.average()
 52 |             print('=> output: {}'.format(self.output_directory))
 53 |             print(
 54 |                 '{split} Epoch: {0} [{1}/{2}]\tlr={lr} '
 55 |                 't_Data={blk_avg.data_time:.3f}({average.data_time:.3f}) '
 56 |                 't_GPU={blk_avg.gpu_time:.3f}({average.gpu_time:.3f})\n\t'
 57 |                 'RMSE={blk_avg.rmse:.2f}({average.rmse:.2f}) '
 58 |                 'MAE={blk_avg.mae:.2f}({average.mae:.2f}) '
 59 |                 'iRMSE={blk_avg.irmse:.2f}({average.irmse:.2f}) '
 60 |                 'iMAE={blk_avg.imae:.2f}({average.imae:.2f})\n\t'
 61 |                 'silog={blk_avg.silog:.2f}({average.silog:.2f}) '
 62 |                 'squared_rel={blk_avg.squared_rel:.2f}({average.squared_rel:.2f}) '
 63 |                 'Delta1={blk_avg.delta1:.3f}({average.delta1:.3f}) '
 64 |                 'REL={blk_avg.absrel:.3f}({average.absrel:.3f})\n\t'
 65 |                 'Lg10={blk_avg.lg10:.3f}({average.lg10:.3f}) '
 66 |                 'Photometric={blk_avg.photometric:.3f}({average.photometric:.3f}) '
 67 |                 .format(epoch,
 68 |                         i + 1,
 69 |                         n_set,
 70 |                         lr=lr,
 71 |                         blk_avg=blk_avg,
 72 |                         average=avg,
 73 |                         split=split.capitalize()))
 74 |             blk_avg_meter.reset(False)
 75 | 
 76 |     def conditional_save_info(self, split, average_meter, epoch):
 77 |         avg = average_meter.average()
 78 |         if split == "train":
 79 |             csvfile_name = self.train_csv
 80 |         elif split == "val":
 81 |             csvfile_name = self.val_csv
 82 |         elif split == "eval":
 83 |             eval_filename = os.path.join(self.output_directory, 'eval.txt')
 84 |             self.save_single_txt(eval_filename, avg, epoch)
 85 |             return avg
 86 |         elif "test" in split:
 87 |             return avg
 88 |         else:
 89 |             raise ValueError("wrong split provided to logger")
 90 |         with open(csvfile_name, 'a') as csvfile:
 91 |             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 92 |             writer.writerow({
 93 |                 'epoch': epoch,
 94 |                 'rmse': avg.rmse,
 95 |                 'photo': avg.photometric,
 96 |                 'mae': avg.mae,
 97 |                 'irmse': avg.irmse,
 98 |                 'imae': avg.imae,
 99 |                 'mse': avg.mse,
100 |                 'silog': avg.silog,
101 |                 'squared_rel': avg.squared_rel,
102 |                 'absrel': avg.absrel,
103 |                 'lg10': avg.lg10,
104 |                 'delta1': avg.delta1,
105 |                 'delta2': avg.delta2,
106 |                 'delta3': avg.delta3,
107 |                 'gpu_time': avg.gpu_time,
108 |                 'data_time': avg.data_time
109 |             })
110 |         return avg
111 | 
112 |     def save_single_txt(self, filename, result, epoch):
113 |         with open(filename, 'w') as txtfile:
114 |             txtfile.write(
115 |                 ("rank_metric={}\n" + "epoch={}\n" + "rmse={:.3f}\n" +
116 |                  "mae={:.3f}\n" + "silog={:.3f}\n" + "squared_rel={:.3f}\n" +
117 |                  "irmse={:.3f}\n" + "imae={:.3f}\n" + "mse={:.3f}\n" +
118 |                  "absrel={:.3f}\n" + "lg10={:.3f}\n" + "delta1={:.3f}\n" +
119 |                  "t_gpu={:.4f}").format(self.args.rank_metric, epoch,
120 |                                         result.rmse, result.mae, result.silog,
121 |                                         result.squared_rel, result.irmse,
122 |                                         result.imae, result.mse, result.absrel,
123 |                                         result.lg10, result.delta1,
124 |                                         result.gpu_time))
125 | 
126 |     def save_best_txt(self, result, epoch):
127 |         self.save_single_txt(self.best_txt, result, epoch)
128 | 
129 |     def _get_img_comparison_name(self, mode, epoch, is_best=False):
130 |         if mode == 'eval':
131 |             return self.output_directory + '/comparison_eval.png'
132 |         if mode == 'val':
133 |             if is_best:
134 |                 return self.output_directory + '/comparison_best.png'
135 |             else:
136 |                 return self.output_directory + '/comparison_' + str(epoch) + '.png'
137 | 
138 |     def conditional_save_img_comparison(self, mode, i, ele, pred, epoch, predrgb=None, predg=None, extra=None, extra2=None, extrargb=None):
139 |         # save 8 images for visualization
140 |         if mode == 'val' or mode == 'eval':
141 |             skip = 100
142 |             if i == 0:
143 |                 self.img_merge = vis_utils.merge_into_row(ele, pred, predrgb, predg, extra, extra2, extrargb)
144 |             elif i % skip == 0 and i < 8 * skip:
145 |                 row = vis_utils.merge_into_row(ele, pred, predrgb, predg, extra, extra2, extrargb)
146 |                 self.img_merge = vis_utils.add_row(self.img_merge, row)
147 |             elif i == 8 * skip:
148 |                 filename = self._get_img_comparison_name(mode, epoch)
149 |                 vis_utils.save_image(self.img_merge, filename)
150 | 
151 |     def save_img_comparison_as_best(self, mode, epoch):
152 |         if mode == 'val':
153 |             filename = self._get_img_comparison_name(mode, epoch, is_best=True)
154 |             vis_utils.save_image(self.img_merge, filename)
155 | 
156 |     def get_ranking_error(self, result):
157 |         return getattr(result, self.args.rank_metric)
158 | 
159 |     def rank_conditional_save_best(self, mode, result, epoch):
160 |         error = self.get_ranking_error(result)
161 |         best_error = self.get_ranking_error(self.best_result)
162 |         is_best = error < best_error
163 |         if is_best and mode == "val":
164 |             self.old_best_result = self.best_result
165 |             self.best_result = result
166 |             self.save_best_txt(result, epoch)
167 |         return is_best
168 | 
169 |     def conditional_save_pred(self, mode, i, pred, epoch):
170 |         if ("test" in mode or mode == "eval") and self.args.save_pred:
171 | 
172 |             # save images for visualization/ testing
173 |             image_folder = os.path.join(self.output_directory,
174 |                                         mode + "_output")
175 |             if not os.path.exists(image_folder):
176 |                 os.makedirs(image_folder)
177 |             img = torch.squeeze(pred.data.cpu()).numpy()
178 |             filename = os.path.join(image_folder, '{0:010d}.png'.format(i))
179 |             vis_utils.save_depth_as_uint16png(img, filename)
180 | 
181 |     def conditional_summarize(self, mode, avg, is_best):
182 |         print("\n*\nSummary of ", mode, "round")
183 |         print(''
184 |               'RMSE={average.rmse:.3f}\n'
185 |               'MAE={average.mae:.3f}\n'
186 |               'Photo={average.photometric:.3f}\n'
187 |               'iRMSE={average.irmse:.3f}\n'
188 |               'iMAE={average.imae:.3f}\n'
189 |               'squared_rel={average.squared_rel}\n'
190 |               'silog={average.silog}\n'
191 |               'Delta1={average.delta1:.3f}\n'
192 |               'REL={average.absrel:.3f}\n'
193 |               'Lg10={average.lg10:.3f}\n'
194 |               't_GPU={time:.3f}'.format(average=avg, time=avg.gpu_time))
195 |         if is_best and mode == "val":
196 |             print("New best model by %s (was %.3f)" %
197 |                   (self.args.rank_metric,
198 |                    self.get_ranking_error(self.old_best_result)))
199 |         elif mode == "val":
200 |             print("(best %s is %.3f)" %
201 |                   (self.args.rank_metric,
202 |                    self.get_ranking_error(self.best_result)))
203 |         print("*\n")
204 | 
205 | 
206 | ignore_hidden = shutil.ignore_patterns(".", "..", ".git*", "*pycache*",
207 |                                        "*build", "*.fuse*", "*_drive_*")
208 | 
209 | 
210 | def backup_source_code(backup_directory):
211 |     if os.path.exists(backup_directory):
212 |         shutil.rmtree(backup_directory)
213 |     shutil.copytree('.', backup_directory, ignore=ignore_hidden)
214 | 
215 | 
216 | def adjust_learning_rate(lr_init, optimizer, epoch, args):
217 |     """Sets the learning rate to the initial LR decayed by 10 every 5 epochs"""
218 |     #lr = lr_init * (0.5**(epoch // 5))
219 |     #'''
220 |     lr = lr_init
221 |     if (epoch >= 10):
222 |         lr = lr_init * 0.5
223 |     if (epoch >= 15):
224 |         lr = lr_init * 0.1
225 |     if (epoch >= 20):
226 |         lr = lr_init * 0.01
227 |     #'''
228 | 
229 |     for param_group in optimizer.param_groups:
230 |         param_group['lr'] = lr
231 |     return lr
232 | 
233 | def save_checkpoint(state, is_best, epoch, output_directory):
234 |     checkpoint_filename = os.path.join(output_directory,
235 |                                        'checkpoint-' + str(epoch) + '.pth.tar')
236 |     torch.save(state, checkpoint_filename)
237 |     if is_best:
238 |         best_filename = os.path.join(output_directory, 'model_best.pth.tar')
239 |         shutil.copyfile(checkpoint_filename, best_filename)
240 |     if epoch > 0:
241 |         prev_checkpoint_filename = os.path.join(
242 |             output_directory, 'checkpoint-' + str(epoch - 1) + '.pth.tar')
243 |         # if os.path.exists(prev_checkpoint_filename):
244 |         #     os.remove(prev_checkpoint_filename)
245 | 
246 | 
247 | def get_folder_name(args):
248 |     current_time = time.strftime('%Y-%m-%d@%H-%M')
249 |     return os.path.join(args.result,
250 |         'input={}.criterion={}.lr={}.bs={}.wd={}.jitter={}.time={}'.
251 |         format(args.input, args.criterion, \
252 |             args.lr, args.batch_size, args.weight_decay, \
253 |             args.jitter, current_time
254 |             ))
255 | 
256 | 
257 | avgpool = torch.nn.AvgPool2d(kernel_size=2, stride=2).cuda()
258 | 
259 | 
260 | def multiscale(img):
261 |     img1 = avgpool(img)
262 |     img2 = avgpool(img1)
263 |     img3 = avgpool(img2)
264 |     img4 = avgpool(img3)
265 |     img5 = avgpool(img4)
266 |     return img5, img4, img3, img2, img1


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import torch
  5 | import torch.nn.parallel
  6 | import torch.optim
  7 | import torch.utils.data
  8 | import time
  9 | 
 10 | from dataloaders.kitti_loader import load_calib, input_options, KittiDepth
 11 | from metrics import AverageMeter, Result
 12 | import criteria
 13 | import helper
 14 | import vis_utils
 15 | 
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | from model import CHNet
 19 | 
 20 | import matplotlib
 21 |  
 22 | matplotlib.use('AGG')
 23 | 
 24 | parser = argparse.ArgumentParser(description='CHNet')
 25 | parser.add_argument('--workers',
 26 |                     default=4,
 27 |                     type=int,
 28 |                     metavar='N',
 29 |                     help='number of data loading workers (default: 4)')
 30 | parser.add_argument('--epochs',
 31 |                     default=100,
 32 |                     type=int,
 33 |                     metavar='N',
 34 |                     help='number of total epochs to run (default: 100)')
 35 | parser.add_argument('--start-epoch',
 36 |                     default=0,
 37 |                     type=int,
 38 |                     metavar='N',
 39 |                     help='manual epoch number (useful on restarts)')
 40 | parser.add_argument('--start-epoch-bias',
 41 |                     default=0,
 42 |                     type=int,
 43 |                     metavar='N',
 44 |                     help='manual epoch number bias(useful on restarts)')
 45 | parser.add_argument('-c',
 46 |                     '--criterion',
 47 |                     metavar='LOSS',
 48 |                     default='l2',
 49 |                     choices=criteria.loss_names,
 50 |                     help='loss function: | '.join(criteria.loss_names) +
 51 |                     ' (default: l2)')
 52 | parser.add_argument('-b',
 53 |                     '--batch-size',
 54 |                     default=1,
 55 |                     type=int,
 56 |                     help='mini-batch size (default: 1)')
 57 | parser.add_argument('--lr',
 58 |                     '--learning-rate',
 59 |                     default=1e-3,
 60 |                     type=float,
 61 |                     metavar='LR',
 62 |                     help='initial learning rate (default 1e-5)')
 63 | parser.add_argument('--weight-decay',
 64 |                     '--wd',
 65 |                     default=1e-6,
 66 |                     type=float,
 67 |                     metavar='W',
 68 |                     help='weight decay (default: 0)')
 69 | parser.add_argument('--print-freq',
 70 |                     '-p',
 71 |                     default=10,
 72 |                     type=int,
 73 |                     metavar='N',
 74 |                     help='print frequency (default: 10)')
 75 | parser.add_argument('--resume',
 76 |                     default='',
 77 |                     type=str,
 78 |                     metavar='PATH',
 79 |                     help='path to latest checkpoint (default: none)')
 80 | parser.add_argument('--data-folder',
 81 |                     default='../../data',
 82 |                     type=str,
 83 |                     metavar='PATH',
 84 |                     help='data folder (default: none)')
 85 | parser.add_argument('--data-folder-rgb',
 86 |                     default='../data/data_rgb',
 87 |                     type=str,
 88 |                     metavar='PATH',
 89 |                     help='data folder rgb (default: none)')
 90 | parser.add_argument('--data-folder-save',
 91 |                     default='submit_test/',
 92 |                     type=str,
 93 |                     metavar='PATH',
 94 |                     help='data folder test results(default: none)')
 95 | parser.add_argument('-i',
 96 |                     '--input',
 97 |                     type=str,
 98 |                     default='rgbd',
 99 |                     choices=input_options,
100 |                     help='input: | '.join(input_options))
101 | parser.add_argument('--val',
102 |                     type=str,
103 |                     default="select",
104 |                     choices=["select", "full"],
105 |                     help='full or select validation set')
106 | parser.add_argument('--jitter',
107 |                     type=float,
108 |                     default=0.1,
109 |                     help='color jitter for images')
110 | parser.add_argument('--rank-metric',
111 |                     type=str,
112 |                     default='rmse',
113 |                     choices=[m for m in dir(Result()) if not m.startswith('_')],
114 |                     help='metrics for which best result is saved')
115 | 
116 | parser.add_argument('-e', '--evaluate', default='', type=str, metavar='PATH')
117 | parser.add_argument('--test', action="store_true", default=False,
118 |                     help='save result kitti test dataset for submission')
119 | parser.add_argument('--cpu', action="store_true", default=False, help='run on cpu')
120 | 
121 | #random cropping
122 | parser.add_argument('--not-random-crop', action="store_true", default=False,
123 |                     help='prohibit random cropping')
124 | parser.add_argument('-he', '--random-crop-height', default=320, type=int, metavar='N',
125 |                     help='random crop height')
126 | parser.add_argument('-w', '--random-crop-width', default=1216, type=int, metavar='N',
127 |                     help='random crop height')
128 | 
129 | 
130 | args = parser.parse_args()
131 | args.result = os.path.join('..', 'results')
132 | args.use_rgb = ('rgb' in args.input)
133 | args.use_d = 'd' in args.input
134 | args.use_g = 'g' in args.input
135 | args.val_h = 352
136 | args.val_w = 1216
137 | print(args)
138 | 
139 | cuda = torch.cuda.is_available() and not args.cpu
140 | if cuda:
141 |     import torch.backends.cudnn as cudnn
142 |     cudnn.benchmark = True
143 |     device = torch.device("cuda")
144 | else:
145 |     device = torch.device("cpu")
146 | print("=> using '{}' for computation.".format(device))
147 | 
148 | # define loss functions
149 | depth_criterion = criteria.MaskedMSELoss() if (
150 |     args.criterion == 'l2') else criteria.MaskedL1Loss()
151 | 
152 | #multi batch
153 | multi_batch_size = 1
154 | 
155 | def iterate(mode, args, loader, model, optimizer, logger, epoch):
156 |     # actual_epoch = epoch - args.start_epoch + args.start_epoch_bias
157 | 
158 |     block_average_meter = AverageMeter()
159 |     block_average_meter.reset(False)
160 |     average_meter = AverageMeter()
161 |     meters = [block_average_meter, average_meter]
162 | 
163 |     # switch to appropriate mode
164 |     assert mode in ["train", "val", "eval", "test_prediction", "test_completion"], \
165 |         "unsupported mode: {}".format(mode)
166 |     if mode == 'train':
167 |         model.train()
168 |         lr = helper.adjust_learning_rate(args.lr, optimizer, epoch, args)
169 |     else:
170 |         model.eval()
171 |         lr = 0
172 | 
173 |     torch.cuda.empty_cache()
174 |     for i, batch_data in enumerate(loader):
175 |         dstart = time.time()
176 |         batch_data = {
177 |             key: val.to(device)
178 |             for key, val in batch_data.items() if val is not None
179 |         }
180 | 
181 |         gt = batch_data[
182 |             'gt'] if mode != 'test_prediction' and mode != 'test_completion' else None
183 |         data_time = time.time() - dstart
184 | 
185 |         pred = None
186 |         start = None
187 |         gpu_time = 0
188 | 
189 |         torch.cuda.synchronize()
190 |         start = time.time()
191 |         pred, pred_ob, pred_unob = model(batch_data)
192 | 
193 |         if(args.evaluate):
194 |             torch.cuda.synchronize()
195 |             gpu_time = time.time() - start
196 |         #'''
197 | 
198 |         depth_loss, photometric_loss, smooth_loss, mask = 0, 0, 0, None
199 | 
200 |         if mode == 'train':
201 |             depth_loss = depth_criterion(pred, gt)
202 |             
203 |             loss = depth_loss
204 | 
205 |             if i % multi_batch_size == 0:
206 |                 optimizer.zero_grad()
207 |             loss.backward()
208 | 
209 |             if i % multi_batch_size == (multi_batch_size-1) or i==(len(loader)-1):
210 |                 optimizer.step()
211 |             print("loss:", loss, " epoch:", epoch, " ", i, "/", len(loader))
212 |         # print(mode)
213 |         if mode == "test_completion":
214 |             str_i = str(i)
215 |             path_i = str_i.zfill(10) + '.png'
216 |             path = os.path.join(args.data_folder_save, path_i)
217 |             vis_utils.save_depth_as_uint16png_upload(pred, path)
218 | 
219 |         if(not args.evaluate):
220 |             gpu_time = time.time() - start
221 |         # measure accuracy and record loss
222 |         with torch.no_grad():
223 |             mini_batch_size = next(iter(batch_data.values())).size(0)
224 |             result = Result()
225 |             if mode != 'test_prediction' and mode != 'test_completion':
226 |                 result.evaluate(pred.data, gt.data, photometric_loss)
227 |                 [
228 |                     m.update(result, gpu_time, data_time, mini_batch_size)
229 |                     for m in meters
230 |                 ]
231 | 
232 |                 if mode != 'train':
233 |                     logger.conditional_print(mode, i, epoch, lr, len(loader),
234 |                                      block_average_meter, average_meter)
235 |                 logger.conditional_save_img_comparison(mode, i, batch_data, pred,
236 |                                                    epoch)
237 |                 logger.conditional_save_pred(mode, i, pred, epoch)
238 | 
239 |     avg = logger.conditional_save_info(mode, average_meter, epoch)
240 |     is_best = logger.rank_conditional_save_best(mode, avg, epoch)
241 |     if is_best and not (mode == "train"):
242 |         logger.save_img_comparison_as_best(mode, epoch)
243 |     logger.conditional_summarize(mode, avg, is_best)
244 |   
245 |     return avg, is_best
246 | 
247 | def main():
248 |     global args
249 |     checkpoint = None
250 |     is_eval = False
251 |     if args.evaluate:
252 |         args_new = args
253 |         if os.path.isfile(args.evaluate):
254 |             print("=> loading checkpoint '{}' ... ".format(args.evaluate),
255 |                   end='')
256 |             checkpoint = torch.load(args.evaluate, map_location=device)
257 |             #args = checkpoint['args']
258 |             args.start_epoch = checkpoint['epoch'] + 1
259 |             args.data_folder = args_new.data_folder
260 |             args.val = args_new.val
261 |             is_eval = True
262 | 
263 |             print("Completed.")
264 |         else:
265 |             is_eval = True
266 |             print("No model found at '{}'".format(args.evaluate))
267 |             #return
268 | 
269 |     elif args.resume:  # optionally resume from a checkpoint
270 |         args_new = args
271 |         if os.path.isfile(args.resume):
272 |             print("=> loading checkpoint '{}' ... ".format(args.resume),
273 |                   end='')
274 |             checkpoint = torch.load(args.resume, map_location=device)
275 | 
276 |             args.start_epoch = checkpoint['epoch'] + 1
277 |             args.data_folder = args_new.data_folder
278 |             args.val = args_new.val
279 |             print("Completed. Resuming from epoch {}.".format(
280 |                 checkpoint['epoch']))
281 |         else:
282 |             print("No checkpoint found at '{}'".format(args.resume))
283 |             return
284 | 
285 |     print("=> creating model and optimizer ... ", end='')
286 | 
287 |     model = CHNet().to(device)
288 | 
289 |     model_named_params = None
290 |     optimizer = None
291 | 
292 |     if checkpoint is not None:
293 |         model.load_state_dict(checkpoint['model'], strict=True)
294 |         #optimizer.load_state_dict(checkpoint['optimizer'])
295 |         print("=> checkpoint state loaded.")
296 | 
297 |     logger = helper.logger(args)
298 |     if checkpoint is not None:
299 |         logger.best_result = checkpoint['best_result']
300 |         del checkpoint
301 |     print("=> logger created.")
302 | 
303 |     test_dataset = None
304 |     test_loader = None
305 |     if (args.test):
306 |         test_dataset = KittiDepth('test_completion', args)
307 |         test_loader = torch.utils.data.DataLoader(
308 |             test_dataset,
309 |             batch_size=1,
310 |             shuffle=False,
311 |             num_workers=1,
312 |             pin_memory=True)
313 |         iterate("test_completion", args, test_loader, model, None, logger, 0)
314 |         return
315 | 
316 |     val_dataset = KittiDepth('val', args)
317 |     val_loader = torch.utils.data.DataLoader(
318 |         val_dataset,
319 |         batch_size=1,
320 |         shuffle=False,
321 |         num_workers=2,
322 |         pin_memory=True)  # set batch size to be 1 for validation
323 |     print("\t==> val_loader size:{}".format(len(val_loader)))
324 | 
325 |     if is_eval == True:
326 |         for p in model.parameters():
327 |             p.requires_grad = False
328 | 
329 |         result, is_best = iterate("val", args, val_loader, model, None, logger,
330 |                               args.start_epoch - 1)
331 |         return
332 | 
333 |     else:
334 |         model_named_params = [
335 |             p for _, p in model.named_parameters() if p.requires_grad
336 |         ]
337 |         optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay, betas=(0.9, 0.99))
338 |     print("completed.")
339 | 
340 |     model = torch.nn.DataParallel(model)
341 | 
342 |     # Data loading code
343 |     print("=> creating data loaders ... ")
344 |     if not is_eval:
345 |         train_dataset = KittiDepth('train', args)
346 |         train_loader = torch.utils.data.DataLoader(train_dataset,
347 |                                                    batch_size=args.batch_size,
348 |                                                    shuffle=True,
349 |                                                    num_workers=args.workers,
350 |                                                    pin_memory=True,
351 |                                                    sampler=None)
352 |         print("\t==> train_loader size:{}".format(len(train_loader)))
353 | 
354 |     print("=> starting main loop ...")
355 |     for epoch in range(args.start_epoch, args.epochs):
356 |         print("=> starting training epoch {} ..".format(epoch))
357 |         iterate("train", args, train_loader, model, optimizer, logger, epoch)  # train for one epochstate
358 | 
359 |         # validation memory reset
360 |         for p in model.parameters():
361 |             p.requires_grad = False
362 |         result, is_best = iterate("val", args, val_loader, model, None, logger, epoch)  # evaluate on validation set
363 | 
364 |         for p in model.parameters():
365 |             p.requires_grad = True
366 | 
367 |         helper.save_checkpoint({ # save checkpoint
368 |             'epoch': epoch,
369 |             'model': model.module.state_dict(),
370 |             'best_result': logger.best_result,
371 |             'optimizer' : optimizer.state_dict(),
372 |             'args' : args,
373 |         }, is_best, epoch, logger.output_directory)
374 | 
375 | 
376 | if __name__ == '__main__':
377 |     main()


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import numpy as np
  4 | 
  5 | lg_e_10 = math.log(10)
  6 | 
  7 | 
  8 | def log10(x):
  9 |     """Convert a new tensor with the base-10 logarithm of the elements of x. """
 10 |     return torch.log(x) / lg_e_10
 11 | 
 12 | 
 13 | class Result(object):
 14 |     def __init__(self):
 15 |         self.irmse = 0
 16 |         self.imae = 0
 17 |         self.mse = 0
 18 |         self.rmse = 0
 19 |         self.mae = 0
 20 |         self.absrel = 0
 21 |         self.squared_rel = 0
 22 |         self.lg10 = 0
 23 |         self.delta1 = 0
 24 |         self.delta2 = 0
 25 |         self.delta3 = 0
 26 |         self.data_time = 0
 27 |         self.gpu_time = 0
 28 |         self.silog = 0  # Scale invariant logarithmic error [log(m)*100]
 29 |         self.photometric = 0
 30 | 
 31 |     def set_to_worst(self):
 32 |         self.irmse = np.inf
 33 |         self.imae = np.inf
 34 |         self.mse = np.inf
 35 |         self.rmse = np.inf
 36 |         self.mae = np.inf
 37 |         self.absrel = np.inf
 38 |         self.squared_rel = np.inf
 39 |         self.lg10 = np.inf
 40 |         self.silog = np.inf
 41 |         self.delta1 = 0
 42 |         self.delta2 = 0
 43 |         self.delta3 = 0
 44 |         self.data_time = 0
 45 |         self.gpu_time = 0
 46 | 
 47 |     def update(self, irmse, imae, mse, rmse, mae, absrel, squared_rel, lg10, \
 48 |             delta1, delta2, delta3, gpu_time, data_time, silog, photometric=0):
 49 |         self.irmse = irmse
 50 |         self.imae = imae
 51 |         self.mse = mse
 52 |         self.rmse = rmse
 53 |         self.mae = mae
 54 |         self.absrel = absrel
 55 |         self.squared_rel = squared_rel
 56 |         self.lg10 = lg10
 57 |         self.delta1 = delta1
 58 |         self.delta2 = delta2
 59 |         self.delta3 = delta3
 60 |         self.data_time = data_time
 61 |         self.gpu_time = gpu_time
 62 |         self.silog = silog
 63 |         self.photometric = photometric
 64 | 
 65 |     def evaluate(self, output, target, photometric=0):
 66 |         valid_mask = target > 0.1
 67 |         # valid_mask = (valid_mask.int() - (valid_mask * lidar_mask).int()).bool()
 68 |         # valid_mask = valid_mask * lidar_mask
 69 | 
 70 |         # convert from meters to mm
 71 |         output_mm = 1e3 * output[valid_mask]
 72 |         target_mm = 1e3 * target[valid_mask]
 73 | 
 74 |         abs_diff = (output_mm - target_mm).abs()
 75 | 
 76 |         self.mse = float((torch.pow(abs_diff, 2)).mean())
 77 |         self.rmse = math.sqrt(self.mse)
 78 |         self.mae = float(abs_diff.mean())
 79 |         self.lg10 = float((log10(output_mm) - log10(target_mm)).abs().mean())
 80 |         self.absrel = float((abs_diff / target_mm).mean())
 81 |         self.squared_rel = float(((abs_diff / target_mm)**2).mean())
 82 | 
 83 |         maxRatio = torch.max(output_mm / target_mm, target_mm / output_mm)
 84 |         self.delta1 = float((maxRatio < 1.25).float().mean())
 85 |         self.delta2 = float((maxRatio < 1.25**2).float().mean())
 86 |         self.delta3 = float((maxRatio < 1.25**3).float().mean())
 87 |         self.data_time = 0
 88 |         self.gpu_time = 0
 89 | 
 90 |         # silog uses meters
 91 |         err_log = torch.log(target[valid_mask]) - torch.log(output[valid_mask])
 92 |         normalized_squared_log = (err_log**2).mean()
 93 |         log_mean = err_log.mean()
 94 |         self.silog = math.sqrt(normalized_squared_log -
 95 |                                log_mean * log_mean) * 100
 96 | 
 97 |         # convert from meters to km
 98 |         inv_output_km = (1e-3 * output[valid_mask])**(-1)
 99 |         inv_target_km = (1e-3 * target[valid_mask])**(-1)
100 |         abs_inv_diff = (inv_output_km - inv_target_km).abs()
101 |         self.irmse = math.sqrt((torch.pow(abs_inv_diff, 2)).mean())
102 |         self.imae = float(abs_inv_diff.mean())
103 | 
104 |         self.photometric = float(photometric)
105 | 
106 | 
107 | class AverageMeter(object):
108 |     def __init__(self):
109 |         self.reset(time_stable=True)
110 | 
111 |     def reset(self, time_stable):
112 |         self.count = 0.0
113 |         self.sum_irmse = 0
114 |         self.sum_imae = 0
115 |         self.sum_mse = 0
116 |         self.sum_rmse = 0
117 |         self.sum_mae = 0
118 |         self.sum_absrel = 0
119 |         self.sum_squared_rel = 0
120 |         self.sum_lg10 = 0
121 |         self.sum_delta1 = 0
122 |         self.sum_delta2 = 0
123 |         self.sum_delta3 = 0
124 |         self.sum_data_time = 0
125 |         self.sum_gpu_time = 0
126 |         self.sum_photometric = 0
127 |         self.sum_silog = 0
128 |         self.time_stable = time_stable
129 |         self.time_stable_counter_init = 10
130 |         self.time_stable_counter = self.time_stable_counter_init
131 | 
132 |     def update(self, result, gpu_time, data_time, n=1):
133 |         self.count += n
134 |         self.sum_irmse += n * result.irmse
135 |         self.sum_imae += n * result.imae
136 |         self.sum_mse += n * result.mse
137 |         self.sum_rmse += n * result.rmse
138 |         self.sum_mae += n * result.mae
139 |         self.sum_absrel += n * result.absrel
140 |         self.sum_squared_rel += n * result.squared_rel
141 |         self.sum_lg10 += n * result.lg10
142 |         self.sum_delta1 += n * result.delta1
143 |         self.sum_delta2 += n * result.delta2
144 |         self.sum_delta3 += n * result.delta3
145 |         self.sum_data_time += n * data_time
146 |         if self.time_stable == True and self.time_stable_counter > 0:
147 |             self.time_stable_counter = self.time_stable_counter - 1
148 |         else:
149 |             self.sum_gpu_time += n * gpu_time
150 |         self.sum_silog += n * result.silog
151 |         self.sum_photometric += n * result.photometric
152 | 
153 |     def average(self):
154 |         avg = Result()
155 |         if self.time_stable == True:
156 |             if self.count > 0 and self.count - self.time_stable_counter_init > 0:
157 |                 avg.update(
158 |                     self.sum_irmse / self.count, self.sum_imae / self.count,
159 |                     self.sum_mse / self.count, self.sum_rmse / self.count,
160 |                     self.sum_mae / self.count, self.sum_absrel / self.count,
161 |                     self.sum_squared_rel / self.count, self.sum_lg10 / self.count,
162 |                     self.sum_delta1 / self.count, self.sum_delta2 / self.count,
163 |                     self.sum_delta3 / self.count, self.sum_gpu_time / (self.count - self.time_stable_counter_init),
164 |                     self.sum_data_time / self.count, self.sum_silog / self.count,
165 |                     self.sum_photometric / self.count)
166 |             elif self.count > 0:
167 |                 avg.update(
168 |                     self.sum_irmse / self.count, self.sum_imae / self.count,
169 |                     self.sum_mse / self.count, self.sum_rmse / self.count,
170 |                     self.sum_mae / self.count, self.sum_absrel / self.count,
171 |                     self.sum_squared_rel / self.count, self.sum_lg10 / self.count,
172 |                     self.sum_delta1 / self.count, self.sum_delta2 / self.count,
173 |                     self.sum_delta3 / self.count, 0,
174 |                     self.sum_data_time / self.count, self.sum_silog / self.count,
175 |                     self.sum_photometric / self.count)
176 |         elif self.count > 0:
177 |             avg.update(
178 |                 self.sum_irmse / self.count, self.sum_imae / self.count,
179 |                 self.sum_mse / self.count, self.sum_rmse / self.count,
180 |                 self.sum_mae / self.count, self.sum_absrel / self.count,
181 |                 self.sum_squared_rel / self.count, self.sum_lg10 / self.count,
182 |                 self.sum_delta1 / self.count, self.sum_delta2 / self.count,
183 |                 self.sum_delta3 / self.count, self.sum_gpu_time / self.count,
184 |                 self.sum_data_time / self.count, self.sum_silog / self.count,
185 |                 self.sum_photometric / self.count)
186 |         return avg


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from scipy.stats import truncnorm
  4 | import math
  5 | 
  6 | expansion = 1
  7 | 
  8 | def Conv1x1(in_planes, out_planes, stride=1):
  9 |     """1x1 convolution"""
 10 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 11 | 
 12 | 
 13 | def Conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
 14 |     """3x3 convolution with padding"""
 15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 16 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
 17 | 
 18 | 
 19 | class Basic2d(nn.Module):
 20 |     def __init__(self, in_channels, out_channels, norm_layer=None, kernel_size=3, padding=1):
 21 |         super().__init__()
 22 |         if norm_layer:
 23 |             conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
 24 |                              stride=1, padding=padding, bias=False)
 25 |         else:
 26 |             conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
 27 |                              stride=1, padding=padding, bias=True)
 28 |         self.conv = nn.Sequential(conv, )
 29 |         if norm_layer:
 30 |             self.conv.add_module('bn', norm_layer(out_channels))
 31 |         self.conv.add_module('relu', nn.ReLU(inplace=True))
 32 | 
 33 |     def forward(self, x):
 34 |         out = self.conv(x)
 35 |         return out
 36 | 
 37 | 
 38 | class Basic2dTrans(nn.Module):
 39 |     def __init__(self, in_channels, out_channels, norm_layer=None):
 40 |         super().__init__()
 41 |         if norm_layer is None:
 42 |             norm_layer = nn.BatchNorm2d
 43 |         self.conv = nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3,
 44 |                                        stride=2, padding=1, output_padding=1, bias=False)
 45 |         self.bn = norm_layer(out_channels)
 46 |         self.relu = nn.ReLU(inplace=True)
 47 | 
 48 |     def forward(self, x):
 49 |         out = self.conv(x)
 50 |         out = self.bn(out)
 51 |         out = self.relu(out)
 52 |         return out
 53 | 
 54 | 
 55 | class FastGuide(nn.Module):
 56 |     def __init__(self, input_planes, norm_layer=None):
 57 |         super().__init__()
 58 |         if norm_layer is None:
 59 |             norm_layer = nn.BatchNorm2d
 60 |         self.expansion_ratio = 3
 61 |         self.conv1 = Basic2d(input_planes, input_planes, None)      
 62 |         self.weight_expansion = Basic2d(input_planes, input_planes * self.expansion_ratio, norm_layer, kernel_size=1, padding=0)
 63 | 
 64 |         self.conv2 = Basic2d(input_planes, input_planes, norm_layer, kernel_size=1, padding=0)
 65 |         self.conv3 = Basic2d(input_planes, input_planes)
 66 | 
 67 |     def forward(self, input, weight):
 68 |         weight = self.conv1(weight)
 69 |         weight = self.weight_expansion(weight)
 70 | 
 71 |         kernels = torch.chunk(weight, self.expansion_ratio, 1)
 72 |         splits = []
 73 | 
 74 |         for i in range(self.expansion_ratio):
 75 |             splits.append(input*kernels[i])
 76 |         out = sum(splits)
 77 |         out = self.conv2(out)
 78 | 
 79 |         avg_out = torch.mean(weight, dim=1, keepdim=True)
 80 |         out = self.conv3(out * avg_out)
 81 |         
 82 |         return out
 83 | 
 84 | 
 85 | class BasicBlock(nn.Module):
 86 |     __constants__ = ['downsample']
 87 | 
 88 |     def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None, act=True):
 89 |         super().__init__()
 90 |         if norm_layer is None:
 91 |             norm_layer = nn.BatchNorm2d
 92 |         self.conv1 = Conv3x3(inplanes, planes, stride)
 93 |         self.bn1 = norm_layer(planes)
 94 |         self.relu = nn.ReLU(inplace=True)
 95 |         self.conv2 = Conv3x3(planes, planes)
 96 |         self.bn2 = norm_layer(planes)
 97 |         self.downsample = downsample
 98 |         self.stride = stride
 99 |         self.act = act
100 | 
101 |     def forward(self, x):
102 |         identity = x
103 |         out = self.conv1(x)
104 |         out = self.bn1(out)
105 |         out = self.relu(out)
106 |         out = self.conv2(out)
107 |         out = self.bn2(out)
108 |         if self.downsample is not None:
109 |             identity = self.downsample(x)
110 |         out += identity
111 |         if self.act:
112 |             out = self.relu(out)
113 |         return out
114 | 
115 | 
116 | class CHNet(nn.Module):
117 |     def __init__(self, block=BasicBlock, bc=16, img_layers=[2, 2, 2, 2, 2],
118 |                  depth_layers=[2, 2, 2, 2, 2], norm_layer=nn.BatchNorm2d):
119 |         super().__init__()
120 |         self._norm_layer = norm_layer
121 | 
122 |         self.conv_img = Basic2d(3, bc * 2, norm_layer=norm_layer, kernel_size=5, padding=2)
123 |         in_channels = bc * 2
124 |         self.inplanes = in_channels
125 |         self.layer1_img = self._make_layer(block, in_channels * 2, img_layers[0], stride=2)
126 | 
127 |         self.guide1 = FastGuide(in_channels * 2, norm_layer)
128 |         self.inplanes = in_channels * 2 * expansion
129 |         self.layer2_img = self._make_layer(block, in_channels * 4, img_layers[1], stride=2)
130 | 
131 |         self.guide2 = FastGuide(in_channels * 4, norm_layer)
132 |         self.inplanes = in_channels * 4 * expansion
133 |         self.layer3_img = self._make_layer(block, in_channels * 8, img_layers[2], stride=2)
134 | 
135 |         self.guide3 = FastGuide(in_channels * 8, norm_layer)
136 |         self.inplanes = in_channels * 8 * expansion
137 |         self.layer4_img = self._make_layer(block, in_channels * 8, img_layers[3], stride=2)
138 | 
139 |         self.guide4 = FastGuide(in_channels * 8, norm_layer)
140 | 
141 |         self.conv_lidar = Basic2d(1, bc * 2, norm_layer=None, kernel_size=5, padding=2)
142 | 
143 |         self.inplanes = in_channels
144 |         self.layer1_lidar = self._make_layer(block, in_channels * 2, depth_layers[0], stride=2)
145 |         self.inplanes = in_channels * 2 * expansion
146 |         self.layer2_lidar = self._make_layer(block, in_channels * 4, depth_layers[1], stride=2)
147 |         self.inplanes = in_channels * 4 * expansion
148 |         self.layer3_lidar = self._make_layer(block, in_channels * 8, depth_layers[2], stride=2)
149 |         self.inplanes = in_channels * 8 * expansion
150 |         self.layer4_lidar = self._make_layer(block, in_channels * 8, depth_layers[3], stride=2)
151 | 
152 |         self.layer1d = Basic2dTrans(in_channels * 2, in_channels, norm_layer) 
153 |         self.layer2d = Basic2dTrans(in_channels * 4, in_channels * 2, norm_layer)
154 |         self.layer3d = Basic2dTrans(in_channels * 8, in_channels * 4, norm_layer)
155 |         self.layer4d = Basic2dTrans(in_channels * 8, in_channels * 8, norm_layer)
156 | 
157 |         self.conv_ob = nn.Sequential(block(bc * 2, bc * 2, norm_layer=norm_layer, act=False),
158 |                                        nn.Conv2d(bc * 2, 1, kernel_size=3, stride=1, padding=1))
159 |         self.conv_unob = nn.Sequential(block(bc * 2, bc * 2, norm_layer=norm_layer, act=False),
160 |                                        nn.Conv2d(bc * 2, 1, kernel_size=3, stride=1, padding=1))
161 |         self.ref = block(bc * 2, bc * 2, norm_layer=norm_layer, act=False)
162 | 
163 |         self._initialize_weights()
164 | 
165 |     def forward(self, x):
166 |         img = x['rgb']
167 |         lidar = x['d']
168 | 
169 |         lidar_mask = (lidar > 0).detach()
170 | 
171 |         c0_img = self.conv_img(img)
172 |         c0_lidar = self.conv_lidar(lidar)
173 | 
174 |         c1_img = self.layer1_img(c0_img)
175 |         c1_lidar = self.layer1_lidar(c0_lidar)
176 |         c1_lidar = self.guide1(c1_lidar, c1_img)
177 | 
178 |         c2_img = self.layer2_img(c1_img)
179 |         c2_lidar = self.layer2_lidar(c1_lidar)
180 |         c2_lidar = self.guide2(c2_lidar, c2_img)
181 | 
182 |         c3_img = self.layer3_img(c2_img)
183 |         c3_lidar = self.layer3_lidar(c2_lidar)
184 |         c3_lidar = self.guide3(c3_lidar, c3_img)
185 | 
186 |         c4_img = self.layer4_img(c3_img)
187 |         c4_lidar = self.layer4_lidar(c3_lidar)
188 |         c4_lidar = self.guide4(c4_lidar, c4_img)
189 | 
190 |         de2 = self.layer4d(c4_lidar)
191 |         de2 = de2 + c3_lidar
192 | 
193 |         de3 = self.layer3d(de2)
194 |         de3 = de3 + c2_lidar
195 | 
196 |         de4 = self.layer2d(de3)
197 |         de4 = de4 + c1_lidar
198 | 
199 |         de5 = self.layer1d(de4)
200 |         de5 = de5 + c0_lidar  
201 | 
202 |         output = self.ref(de5)
203 | 
204 |         output_ob = self.conv_ob(output)
205 |         output_unob = self.conv_unob(output)
206 | 
207 |         output = lidar_mask * output_ob + ~lidar_mask * output_unob
208 | 
209 |         return output, output_ob, output_unob
210 | 
211 | 
212 |     def _make_layer(self, block, planes, blocks, stride=1):
213 |         norm_layer = self._norm_layer
214 |         downsample = None
215 |         if stride != 1 or self.inplanes != planes * expansion:
216 |             downsample = nn.Sequential(
217 |                 Conv1x1(self.inplanes, planes * expansion, stride),
218 |                 norm_layer(planes * expansion),
219 |             )
220 | 
221 |         layers = []
222 |         layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))
223 |         self.inplanes = planes * expansion
224 |         for _ in range(1, blocks):
225 |             layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
226 | 
227 |         return nn.Sequential(*layers)
228 | 
229 |     def _initialize_weights(self):
230 |         def truncated_normal_(num, mean=0., std=1.):
231 |             lower = -2 * std
232 |             upper = 2 * std
233 |             X = truncnorm((lower - mean) / std, (upper - mean) / std, loc=mean, scale=std)
234 |             samples = X.rvs(num)
235 |             output = torch.from_numpy(samples)
236 |             return output
237 | 
238 |         for m in self.modules():
239 |             if isinstance(m, nn.Conv2d):
240 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
241 |                 data = truncated_normal_(m.weight.nelement(), mean=0, std=math.sqrt(1.3 * 2. / n))
242 |                 data = data.type_as(m.weight.data)
243 |                 m.weight.data = data.view_as(m.weight.data)
244 |                 if m.bias is not None:
245 |                     nn.init.zeros_(m.bias)


--------------------------------------------------------------------------------
/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmomoy/CHNet/a4c9ad267f87cafe9fd95e5e3a70e91a882d94f3/results.png


--------------------------------------------------------------------------------
/vis_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | if not ("DISPLAY" in os.environ):
  3 |     import matplotlib as mpl
  4 |     mpl.use('Agg')
  5 | import matplotlib.pyplot as plt
  6 | from PIL import Image
  7 | import numpy as np
  8 | import cv2
  9 | 
 10 | cmap = plt.cm.jet
 11 | cmap2 = plt.cm.nipy_spectral
 12 | 
 13 | def validcrop(img):
 14 |     ratio = 256/1216
 15 |     h = img.size()[2]
 16 |     w = img.size()[3]
 17 |     return img[:, :, h-int(ratio*w):, :]
 18 | 
 19 | def depth_colorize(depth):
 20 |     depth = (depth - np.min(depth)) / (np.max(depth) - np.min(depth))
 21 |     depth = 255 * cmap(depth)[:, :, :3]  # H, W, C
 22 |     return depth.astype('uint8')
 23 | 
 24 | def feature_colorize(feature):
 25 |     feature = (feature - np.min(feature)) / ((np.max(feature) - np.min(feature)))
 26 |     feature = 255 * cmap2(feature)[:, :, :3]
 27 |     return feature.astype('uint8')
 28 | 
 29 | def mask_vis(mask):
 30 |     mask = (mask - np.min(mask)) / (np.max(mask) - np.min(mask))
 31 |     mask = 255 * mask
 32 |     return mask.astype('uint8')
 33 | 
 34 | def merge_into_row(ele, pred, predrgb=None, predg=None, extra=None, extra2=None, extrargb=None):
 35 |     def preprocess_depth(x):
 36 |         y = np.squeeze(x.data.cpu().numpy())
 37 |         return depth_colorize(y)
 38 | 
 39 |     # if is gray, transforms to rgb
 40 |     img_list = []
 41 |     if 'rgb' in ele:
 42 |         rgb = np.squeeze(ele['rgb'][0, ...].data.cpu().numpy())
 43 |         rgb = np.transpose(rgb, (1, 2, 0))
 44 |         img_list.append(rgb)
 45 |     elif 'g' in ele:
 46 |         g = np.squeeze(ele['g'][0, ...].data.cpu().numpy())
 47 |         g = np.array(Image.fromarray(g).convert('RGB'))
 48 |         img_list.append(g)
 49 |     if 'd' in ele:
 50 |         img_list.append(preprocess_depth(ele['d'][0, ...]))
 51 |         img_list.append(preprocess_depth(pred[0, ...]))
 52 |     if extrargb is not None:
 53 |         img_list.append(preprocess_depth(extrargb[0, ...]))
 54 |     if predrgb is not None:
 55 |         predrgb = np.squeeze(ele['rgb'][0, ...].data.cpu().numpy())
 56 |         predrgb = np.transpose(predrgb, (1, 2, 0))
 57 |         #predrgb = predrgb.astype('uint8')
 58 |         img_list.append(predrgb)
 59 |     if predg is not None:
 60 |         predg = np.squeeze(predg[0, ...].data.cpu().numpy())
 61 |         predg = mask_vis(predg)
 62 |         predg = np.array(Image.fromarray(predg).convert('RGB'))
 63 |         #predg = predg.astype('uint8')
 64 |         img_list.append(predg)
 65 |     if extra is not None:
 66 |         extra = np.squeeze(extra[0, ...].data.cpu().numpy())
 67 |         extra = mask_vis(extra)
 68 |         extra = np.array(Image.fromarray(extra).convert('RGB'))
 69 |         img_list.append(extra)
 70 |     if extra2 is not None:
 71 |         extra2 = np.squeeze(extra2[0, ...].data.cpu().numpy())
 72 |         extra2 = mask_vis(extra2)
 73 |         extra2 = np.array(Image.fromarray(extra2).convert('RGB'))
 74 |         img_list.append(extra2)
 75 |     if 'gt' in ele:
 76 |         img_list.append(preprocess_depth(ele['gt'][0, ...]))
 77 | 
 78 |     img_merge = np.hstack(img_list)
 79 |     return img_merge.astype('uint8')
 80 | 
 81 | 
 82 | def add_row(img_merge, row):
 83 |     return np.vstack([img_merge, row])
 84 | 
 85 | 
 86 | def save_image(img_merge, filename):
 87 |     image_to_write = cv2.cvtColor(img_merge, cv2.COLOR_RGB2BGR)
 88 |     cv2.imwrite(filename, image_to_write)
 89 | 
 90 | def save_image_torch(rgb, filename):
 91 |     #torch2numpy
 92 |     rgb = validcrop(rgb)
 93 |     rgb = np.squeeze(rgb[0, ...].data.cpu().numpy())
 94 |     #print(rgb.size())
 95 |     rgb = np.transpose(rgb, (1, 2, 0))
 96 |     rgb = rgb.astype('uint8')
 97 |     image_to_write = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
 98 |     cv2.imwrite(filename, image_to_write)
 99 | 
100 | def save_depth_as_uint16png(img, filename):
101 |     #from tensor
102 |     img = np.squeeze(img.data.cpu().numpy())
103 |     img = (img * 256).astype('uint16')
104 |     cv2.imwrite(filename, img)
105 | 
106 | def save_depth_as_uint16png_upload(img, filename):
107 |     #from tensor
108 |     img = np.squeeze(img.data.cpu().numpy())
109 |     img = (img * 256.0).astype('uint16')
110 |     img_buffer = img.tobytes()
111 |     imgsave = Image.new("I", img.T.shape)
112 |     imgsave.frombytes(img_buffer, 'raw', "I;16")
113 |     imgsave.save(filename)
114 | 
115 | def save_depth_as_uint8colored(img, filename):
116 |     #from tensor
117 |     img = validcrop(img)
118 |     img = np.squeeze(img.data.cpu().numpy())
119 |     img = depth_colorize(img)
120 |     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
121 |     cv2.imwrite(filename, img)
122 | 
123 | def save_mask_as_uint8colored(img, filename, colored=True, normalized=True):
124 |     img = validcrop(img)
125 |     img = np.squeeze(img.data.cpu().numpy())
126 |     if(normalized==False):
127 |         img = (img - np.min(img)) / (np.max(img) - np.min(img))
128 |     if(colored==True):
129 |         img = 255 * cmap(img)[:, :, :3]
130 |     else:
131 |         img = 255 * img
132 |     img = img.astype('uint8')
133 |     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
134 |     cv2.imwrite(filename, img)
135 | 
136 | def save_feature_as_uint8colored(img, filename):
137 |     img = validcrop(img)
138 |     img = np.squeeze(img.data.cpu().numpy())
139 |     img = feature_colorize(img)
140 |     img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
141 |     cv2.imwrite(filename, img)
142 | 


--------------------------------------------------------------------------------