`
30 |
31 | ## Results
32 |
33 | Note that you may find the references and more comparisons in the aforementioned paper.
34 |
35 | ### Quantitative results
36 |
37 |
38 | ### Qualitative results
39 |
40 |
41 | ### Citation
42 | Please consider citing the following paper if the code is helpful in your research work:
43 |
44 | @ARTICLE{8624409,
45 | author={Shiyu Zhao and Lin Zhang and Ying Shen and Shengjie Zhao and Huijuan Zhang},
46 | journal={IEEE Access},
47 | title={Super-Resolution for Monocular Depth Estimation With Multi-Scale Sub-Pixel Convolutions and a Smoothness Constraint},
48 | year={2019},
49 | volume={7},
50 | pages={16323-16335}
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | import torch
3 |
4 | import glob
5 | import scipy.io as sio
6 | import numpy as np
7 |
8 | # TODO: data argumentation
9 | class NYUv2DataSet(data.Dataset):
10 | def __init__(self, data_root, is_train=True):
11 | super(NYUv2DataSet, self).__init__()
12 |
13 | self.dataRoot = data_root
14 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot)
15 | self.dataNum = len(self.dataFiles)
16 | self.requiredSize = [240, 320]
17 | self.reqSizex4 = [60, 80]
18 | self.reqSizex8 = [30, 40]
19 | self.isTrain = is_train
20 | self.leastScale = 8
21 |
22 | def __getitem__(self, index):
23 | currFile = self.dataFiles[index]
24 | data = sio.loadmat(currFile)
25 | data = data['data']
26 |
27 | rgb = data['rgb'][0,0].transpose((2, 0, 1))
28 | depth = data['depth'][0,0]
29 | depthx4 = data['depthx4'][0,0]
30 | depthx8 = data['depthx8'][0,0]
31 | imageSize = data['imageSize'][0,0][0]
32 |
33 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]:
34 | raise ValueError('input image size is smaller than [240, 320]')
35 |
36 | if self.isTrain:
37 | import random
38 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale
39 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale
40 | else:
41 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale
42 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale
43 |
44 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
45 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
46 |
47 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
48 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
49 |
50 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0],
51 | 2*offset_y:2*offset_y+self.reqSizex4[1]]
52 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0],
53 | offset_y:offset_y+self.reqSizex8[1]]
54 |
55 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \
56 | torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), currFile
57 |
58 | def __len__(self):
59 | return self.dataNum
60 |
61 |
62 | class NYUv2FusionSet(data.Dataset):
63 | def __init__(self, data_root, is_train=True, rgb_norm=False):
64 | super(NYUv2FusionSet, self).__init__()
65 |
66 | self.rgb_norm = rgb_norm
67 | self.dataRoot = data_root
68 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot)
69 | self.dataNum = len(self.dataFiles)
70 | self.requiredSize = [240, 320]
71 | self.reqSizex2 = [120, 160]
72 | self.reqSizex4 = [60, 80]
73 | self.reqSizex8 = [30, 40]
74 | ## for make3d
75 | # self.requiredSize = [230, 172]
76 | # self.reqSizex2 = [115, 86]
77 | # self.reqSizex4 = [57, 43]
78 | # self.reqSizex8 = [28, 21]
79 | self.isTrain = is_train
80 | self.leastScale = 8
81 |
82 | def __getitem__(self, index):
83 | currFile = self.dataFiles[index]
84 | data = sio.loadmat(currFile)
85 | data = data['data']
86 |
87 | rgb = data['rgb'][0,0].transpose((2, 0, 1))
88 | if self.rgb_norm:
89 | rgb = rgb/255.
90 | depth = data['depth'][0,0]
91 | depthx2 = data['depthx2'][0,0]
92 | depthx4 = data['depthx4'][0,0]
93 | depthx8 = data['depthx8'][0,0]
94 | imageSize = data['imageSize'][0,0][0]
95 |
96 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]:
97 | raise ValueError('input image size is smaller than [240, 320]')
98 |
99 | if self.isTrain:
100 | import random
101 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale
102 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale
103 | else:
104 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale
105 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale
106 |
107 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
108 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
109 |
110 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
111 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
112 |
113 | depthx2 = depthx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0],
114 | 4*offset_y:4*offset_y+self.reqSizex2[1]]
115 |
116 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0],
117 | 2*offset_y:2*offset_y+self.reqSizex4[1]]
118 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0],
119 | offset_y:offset_y+self.reqSizex8[1]]
120 |
121 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \
122 | torch.from_numpy(depthx2).float(), torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), \
123 | currFile
124 |
125 | def __len__(self):
126 | return self.dataNum
127 |
128 |
129 | class NYUv2MaskSet(data.Dataset):
130 | def __init__(self, data_root, is_train=True, rgb_norm=False):
131 | super(NYUv2MaskSet, self).__init__()
132 |
133 | self.rgb_norm = rgb_norm
134 | self.dataRoot = data_root
135 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot)
136 | self.dataNum = len(self.dataFiles)
137 | self.requiredSize = [240, 320]
138 | self.reqSizex2 = [120, 160]
139 | self.reqSizex4 = [60, 80]
140 | self.reqSizex8 = [30, 40]
141 | self.isTrain = is_train
142 | self.leastScale = 8
143 |
144 | def __getitem__(self, index):
145 | currFile = self.dataFiles[index]
146 |
147 | # print('load %s'%currFile)
148 | data = sio.loadmat(currFile)
149 | data = data['data']
150 |
151 | rgb = data['rgb'][0,0].transpose((2, 0, 1))
152 | if self.rgb_norm:
153 | rgb = rgb/255.
154 | depth = data['depth'][0,0]
155 | depthx2 = data['depthx2'][0,0]
156 | depthx4 = data['depthx4'][0,0]
157 | depthx8 = data['depthx8'][0,0]
158 | mask = data['dpMask'][0,0]
159 | maskx2 = data['dpMaskx2'][0,0]
160 | maskx4 = data['dpMaskx4'][0,0]
161 | maskx8 = data['dpMaskx8'][0,0]
162 | imageSize = data['imageSize'][0,0][0]
163 |
164 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]:
165 | raise ValueError('input image size is smaller than [240, 320]')
166 |
167 | if self.isTrain:
168 | import random
169 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale
170 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale
171 | else:
172 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale
173 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale
174 |
175 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
176 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
177 |
178 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
179 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
180 |
181 | depthx2 = depthx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0],
182 | 4*offset_y:4*offset_y+self.reqSizex2[1]]
183 |
184 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0],
185 | 2*offset_y:2*offset_y+self.reqSizex4[1]]
186 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0],
187 | offset_y:offset_y+self.reqSizex8[1]]
188 |
189 | mask = mask[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0],
190 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]]
191 |
192 | maskx2 = maskx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0],
193 | 4*offset_y:4*offset_y+self.reqSizex2[1]]
194 |
195 | maskx4 = maskx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0],
196 | 2*offset_y:2*offset_y+self.reqSizex4[1]]
197 | maskx8 = maskx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0],
198 | offset_y:offset_y+self.reqSizex8[1]]
199 |
200 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \
201 | torch.from_numpy(depthx2).float(), torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), \
202 | currFile, torch.from_numpy(mask).float(), torch.from_numpy(maskx2).float(), \
203 | torch.from_numpy(maskx4).float(), torch.from_numpy(maskx8).float()
204 |
205 | def __len__(self):
206 | return self.dataNum
--------------------------------------------------------------------------------
/matlab/gen_test_data_for_mscn.m:
--------------------------------------------------------------------------------
1 | NYUv2_data = ''; % path to nyu_depth_v2_labeled.mat
2 | split_file = ''; % path to splits.mat
3 |
4 | test_root = '../Dataset/test';
5 |
6 | if isempty(NYUv2_data) == 1
7 | ME = MException('Input:DataNotAssigned',...
8 | 'you should assign the path of nyu_depth_v2_labeled.mat to the variable, NYUv2_data');
9 | throw(ME);
10 | end
11 | if isempty(split_file) == 1
12 | ME = MException('Input:DataNotAssigned',...
13 | 'you should assign the path of splits.mat to the variable, split_file');
14 | throw(ME);
15 | end
16 |
17 | if ~exist(test_root, 'dir')
18 | mkdir(test_root);
19 | end
20 |
21 | targetSize = [240, 320];
22 | % targetScale = (480-12)/240;
23 | padding_1 = 6; % up and down
24 | padding_2 = 8; % left and right
25 |
26 | farPlane = 10; % largest depth value is 9.9955
27 | nearPlane = 0.7; % smallest value is 0.7133
28 |
29 | NYUv2Data = load(NYUv2_data);
30 | images = NYUv2Data.images;
31 | depths = NYUv2Data.depths;
32 | clear NYUv2Data
33 |
34 | disp('loading data.. this may need a minute.');
35 | splitIndx = load(split_file);
36 | trainIndx = splitIndx.trainNdxs;
37 | % testIndx = splitIndx.testNdxs;
38 | trainNum = length(trainIndx);
39 |
40 | [~,~,~,imageNum] = size(images);
41 | if imageNum ~= 1449
42 | ME = MException('Input:DataNotAssigned',...
43 | 'do not have 1449 images, check the nyu_depth_v2_labeled.mat.');
44 | throw(ME);
45 | end
46 |
47 | train_count = 0;
48 | tic
49 | for indx = 1:imageNum
50 | isTrain = false;
51 | if train_count+1 <= trainNum && trainIndx(train_count+1) == indx
52 | isTrain = true;
53 | train_count = train_count + 1;
54 | end
55 |
56 | if isTrain == false
57 | % resize image and convert depthdata
58 | RGBImage = images(:,:,:,indx);
59 | DepthMat = depths(:,:,indx);
60 |
61 | % crop white padding
62 | RGBImage = RGBImage(padding_1+1:end-padding_1,padding_2+1:end-padding_2,:);
63 | DepthMat = DepthMat(padding_1+1:end-padding_1,padding_2+1:end-padding_2);
64 |
65 | RGBImage = im2double(RGBImage);
66 | Depth = DepthMat;
67 | InfPos = find(Depth > farPlane);
68 | Depth(InfPos) = farPlane;
69 | Depth = single(Depth);
70 | zerosPos = find(Depth <= 0);
71 | Depth(zerosPos) = (rand(1)+1);
72 |
73 | % scale to target size
74 | RGBImage = imresize(RGBImage, [480, 640]);
75 | Depth = imresize(Depth, [480, 640]);
76 |
77 | RGBImage_rs = imresize(RGBImage, targetSize);
78 | Depth_rs = imresize(Depth, targetSize);
79 | Depth_rsx2 = single(imresize(Depth_rs, 1/2));
80 | Depth_rsx4 = single(imresize(Depth_rs, 1/4));
81 | Depth_rsx8 = single(imresize(Depth_rs, 1/8));
82 |
83 | Depth_rs_t = log(Depth_rs);
84 | Depth_rsx2_t = log(Depth_rsx2);
85 | Depth_rsx4_t = log(Depth_rsx4);
86 | Depth_rsx8_t = log(Depth_rsx8);
87 |
88 | data.rgb = RGBImage_rs;
89 | data.depth = Depth_rs_t;
90 | data.depthx2 = Depth_rsx2_t;
91 | data.depthx4 = Depth_rsx4_t;
92 | data.depthx8 = Depth_rsx8_t;
93 | data.realDepth = Depth_rs;
94 | data.imageSize = size(Depth_rs);
95 |
96 | saveFile = [test_root, '/nyu_v2_', num2str(indx), '.mat'];
97 | save(saveFile, 'data');
98 | end
99 |
100 | if mod(indx, 10) == 0
101 | disp([num2str(indx),' images has been processed!']);
102 | toc
103 | end
104 | end
105 |
106 | disp([num2str(imageNum),' images has been processed!']);
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/my_models.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 | from torch.nn.modules.loss import _Loss
8 |
9 | import scipy.io as sio
10 | import numpy as np
11 |
12 | from tools.densenet import _DenseBlock, _Transition, DenseBlock
13 |
14 | class UpsampleByPS(nn.Module):
15 | def __init__(self, upscale_factor, in_channels=1, is_out_layer=False):
16 | super(UpsampleByPS, self).__init__()
17 | self.is_out_layer = is_out_layer
18 |
19 | self.conv1 = nn.Conv2d(in_channels, 64, (5, 5), (1, 1), (2, 2))
20 | # self.conv2 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1))
21 | self.conv2 = nn.Conv2d(64, 1 * (upscale_factor ** 2), (3, 3), (1, 1), (1, 1))
22 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
23 | self.initParameters()
24 |
25 | def initParameters(self):
26 | stateDict = self.state_dict()
27 | nn.init.xavier_normal(stateDict['conv1.weight'])
28 | nn.init.xavier_normal(stateDict['conv2.weight'])
29 | # nn.init.xavier_normal(stateDict['conv3.weight'])
30 | # nn.init.xavier_normal(stateDict['conv4.weight'])
31 |
32 | def forward(self, x):
33 | # x = F.leaky_relu(self.conv1(x), negative_slope=0.1, inplace=True)
34 | # x = F.leaky_relu(self.conv2(x), negative_slope=0.1, inplace=True)
35 | # x = F.leaky_relu(self.conv3(x), negative_slope=0.1, inplace=True)
36 | # if self.use_sig:
37 | # x = F.sigmoid(self.pixel_shuffle(self.conv4(x)))
38 | # else:
39 | # x = F.leaky_relu(self.pixel_shuffle(self.conv4(x)))
40 | # return x
41 |
42 | # out = F.relu(self.conv1(x[0]), inplace=True)
43 | # cat_out = torch.cat([out, x[1]], 1)
44 | # out = F.relu(self.conv2(cat_out), inplace=True)
45 | # out = F.relu(self.conv3(out), inplace=True)
46 | out = F.relu(self.conv1(x))
47 |
48 | if self.is_out_layer:
49 | out = F.relu(self.pixel_shuffle(self.conv2(out)))
50 | else:
51 | out = self.pixel_shuffle(self.conv2(out))
52 | return out
53 |
54 |
55 | class DFCN_PS_FS(nn.Module):
56 | """DFCN_PS_FS is short for DFCN with pixelshuffle and scale fusion"""
57 | def __init__(self, is_Train=True):
58 | super(DFCN_PS_FS, self).__init__()
59 | self.isTrain = is_Train
60 |
61 | self.conv0 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
62 | self.norm0 = nn.BatchNorm2d(64)
63 | self.relu0 = nn.ReLU(inplace=True)
64 |
65 | self.pool0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
66 | self.denseblock1 = _DenseBlock(num_layers=6, num_input_features=64, bn_size=4, growth_rate=32, drop_rate=0)
67 | self.transition1 = _Transition(num_input_features=256, num_output_features=256 // 2)
68 |
69 | self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
70 | self.denseblock2 = _DenseBlock(num_layers=12, num_input_features=128, bn_size=4, growth_rate=32, drop_rate=0)
71 | self.transition2 = _Transition(num_input_features=512, num_output_features=512 // 2)
72 |
73 | self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
74 | self.denseblock3 = _DenseBlock(num_layers=24, num_input_features=256, bn_size=4, growth_rate=32, drop_rate=0)
75 | self.transition3 = _Transition(num_input_features=1024, num_output_features=1024 // 2)
76 |
77 | self.pool3 = nn.AvgPool2d(kernel_size=2, stride=2)
78 | self.denseblock4 = _DenseBlock(num_layers=16, num_input_features=512, bn_size=4, growth_rate=32, drop_rate=0)
79 | self.norm5 = nn.BatchNorm2d(1024)
80 |
81 |
82 | self.smthBlock = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6)
83 | self.smthConv = nn.Conv2d(in_channels=128, out_channels=4, kernel_size=5, padding=2, bias=False)
84 | self.smthUpsample = nn.PixelShuffle(2)
85 |
86 | self.deconv16_ = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=(4, 4), stride=2, padding=(1, 1))
87 | self.padding16 = nn.ReplicationPad2d((0, 0, 1, 0))
88 |
89 | self.bx32_dconvx8 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=(4, 4), stride=2, padding=(1, 1))
90 | self.bx32_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1))
91 | self.bx32_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1))
92 | self.bx32_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1))
93 | self.bx32_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False)
94 |
95 | self.bx16_dconvx8 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=(4, 4), stride=2, padding=(1, 1))
96 | self.bx16_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1))
97 | self.bx16_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1))
98 | self.bx16_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1))
99 | self.bx16_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False)
100 |
101 | self.bx8_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1))
102 | self.bx8_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1))
103 | self.bx8_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1))
104 | self.bx8_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False)
105 |
106 | # self.bx4_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1))
107 | # self.bx4_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1))
108 | # self.bx4_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False)
109 |
110 | self.subconv_to_8 = UpsampleByPS(2, 512, is_out_layer=True)
111 | self.subconv_to_4 = UpsampleByPS(2, 1+256+256, is_out_layer=True)
112 | self.subconv_to_2 = UpsampleByPS(2, 1+128+128+128, is_out_layer=True)
113 | self.subconv_to_1_ = UpsampleByPS(2, 1+64+64+64, is_out_layer=True)
114 |
115 | self.fs_score_ = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=1, padding=0, bias=False)
116 |
117 | self.initParameters()
118 | self.fixLayer()
119 |
120 | def setTrainMode(self, isTrain):
121 | self.isTrain = isTrain
122 |
123 | def fixLayer(self):
124 | for param in self.parameters():
125 | if param is not None:
126 | param.requires_grad = False
127 | layerList = [self.smthBlock, self.smthConv, self.smthUpsample]
128 | for layer in layerList:
129 | for param in layer.parameters():
130 | if param is not None:
131 | param.requires_grad = True
132 |
133 | def parameters(self):
134 | """
135 | overload Module.parameters
136 | """
137 | for name, param in self.named_parameters():
138 | if param.requires_grad:
139 | yield param
140 |
141 | def initParameters(self):
142 | stateDict = self.state_dict()
143 | # nn.init.xavier_normal(stateDict['conv_1.weight'])
144 |
145 | nn.init.xavier_normal(stateDict['deconv16_.weight'])
146 | nn.init.xavier_normal(stateDict['bx32_dconvx8.weight'])
147 | nn.init.xavier_normal(stateDict['bx32_dconvx4.weight'])
148 | nn.init.xavier_normal(stateDict['bx32_dconvx2.weight'])
149 | nn.init.xavier_normal(stateDict['bx32_dconvx1.weight'])
150 | nn.init.xavier_normal(stateDict['bx32_score.weight'])
151 | nn.init.xavier_normal(stateDict['bx16_dconvx8.weight'])
152 | nn.init.xavier_normal(stateDict['bx16_dconvx4.weight'])
153 | nn.init.xavier_normal(stateDict['bx16_dconvx2.weight'])
154 | nn.init.xavier_normal(stateDict['bx16_dconvx1.weight'])
155 | nn.init.xavier_normal(stateDict['bx16_score.weight'])
156 | nn.init.xavier_normal(stateDict['bx8_dconvx4.weight'])
157 | nn.init.xavier_normal(stateDict['bx8_dconvx2.weight'])
158 | nn.init.xavier_normal(stateDict['bx8_dconvx1.weight'])
159 | nn.init.xavier_normal(stateDict['bx8_score.weight'])
160 | # nn.init.xavier_normal(stateDict['bx4_dconvx2.weight'])
161 | # nn.init.xavier_normal(stateDict['bx4_dconvx1.weight'])
162 | # nn.init.xavier_normal(stateDict['bx4_score.weight'])
163 | nn.init.uniform(stateDict['fs_score_.weight'])
164 | nn.init.xavier_normal(stateDict['smthConv.weight'])
165 |
166 | def alignScale(self, inputData, scaleSize):
167 | inputShape = inputData.data.shape
168 | if scaleSize[0] == inputShape[2] and scaleSize[1] == inputShape[3]:
169 | return inputData
170 | elif abs(scaleSize[0]-inputShape[2]) <= 2 and abs(scaleSize[1]-inputShape[3]) <= 2:
171 | return nn.functional.upsample(inputData, size=scaleSize, mode='bilinear')
172 | else:
173 | raise ValueError('target size[{}, {}] is far from input size[{}, {}]'
174 | .format(scaleSize[0], scaleSize[1], inputShape[2], inputShape[3]))
175 |
176 | def forward(self, x):
177 | # inputShape = x.data.shape
178 | # sizex1 = (inputShape[2], inputShape[3])
179 | # sizex2 = (sizex1[0]//2, sizex1[1]//2)
180 | # sizex4 = (sizex2[0]//2, sizex2[1]//2)
181 | # sizex8 = (sizex4[0]//2, sizex4[1]//2)
182 | # sizex16 = (sizex8[0]//2, sizex8[1]//2)
183 | # sizex32 = (sizex16[0]//2, sizex16[1]//2)
184 |
185 | # out_2 = F.relu(self.bn_1(self.conv_1(x)))
186 | # out_4 = self.denseBlock1(self.pooling_1(out_2))
187 | # out_8 = self.denseBlock2(self.pooling_2(out_4))
188 | # out_16 = self.denseBlock3(self.pooling_3(out_8))
189 | # out_32 = F.relu(self.bn_4(self.denseBlock4(self.pooling_4(out_16))))
190 | # if self.isTrain:
191 | # out.volatile = False
192 |
193 | out_2 = self.relu0(self.norm0(self.conv0(x)))
194 | out_4 = self.transition1(self.denseblock1(self.pool0(out_2)))
195 | out_8 = self.transition2(self.denseblock2(self.pool1(out_4)))
196 | out_16 = self.transition3(self.denseblock3(self.pool2(out_8)))
197 | out_32 = self.norm5(self.denseblock4(self.pool3(out_16)))
198 |
199 | out_up_16 = self.padding16(self.deconv16_(out_32))
200 | bx32_outx8 = F.relu(self.bx32_dconvx8(out_up_16))
201 | bx32_outx4 = F.relu(self.bx32_dconvx4(bx32_outx8))
202 | bx32_outx2 = F.relu(self.bx32_dconvx2(bx32_outx4))
203 | bx32_outx1 = F.relu(self.bx32_dconvx1(bx32_outx2))
204 | bx32_score = self.bx32_score(bx32_outx1)
205 |
206 | bx16_outx8 = F.relu(self.bx16_dconvx8(out_16))
207 | bx16_outx4 = F.relu(self.bx16_dconvx4(bx16_outx8))
208 | bx16_outx2 = F.relu(self.bx16_dconvx2(bx16_outx4))
209 | bx16_outx1 = F.relu(self.bx16_dconvx1(bx16_outx2))
210 | bx16_score = self.bx16_score(bx16_outx1)
211 |
212 | bx8_outx4 = F.relu(self.bx8_dconvx4(out_8))
213 | bx8_outx2 = F.relu(self.bx8_dconvx2(bx8_outx4))
214 | bx8_outx1 = F.relu(self.bx8_dconvx1(bx8_outx2))
215 | bx8_score = self.bx8_score(bx8_outx1)
216 |
217 | # bx4_outx2 = self.alignScale(F.relu(self.bx4_dconvx2(out_4)), sizex2)
218 | # bx4_outx1 = self.alignScale(F.relu(self.bx4_dconvx1(bx4_outx2)), sizex1)
219 | # bx4_score = self.alignScale(self.bx4_score(bx4_outx1), sizex1)
220 | bx4_score = 0
221 |
222 |
223 | outx8 = self.subconv_to_8(out_up_16)
224 | outx4 = self.subconv_to_4(torch.cat([outx8, bx32_outx8, bx16_outx8], 1))
225 | outx2 = self.subconv_to_2(torch.cat([outx4, bx32_outx4, bx16_outx4, bx8_outx4], 1))
226 | outx1 = self.subconv_to_1_(torch.cat([outx2, bx32_outx2, bx16_outx2, bx8_outx2], 1))
227 |
228 | out_fs = self.fs_score_(torch.cat([bx32_score, bx16_score, bx8_score, outx1], 1))
229 |
230 | if self.isTrain:
231 | out_smth = self.smthBlock(out_2)
232 | out_smth = self.smthConv(out_smth)
233 | out_smth = self.smthUpsample(out_smth)
234 |
235 | return (outx1, out_fs, bx4_score, bx8_score, bx16_score, bx32_score,
236 | outx2, outx4, outx8, out_smth)
237 | else:
238 | return (outx1, out_fs, bx4_score, bx8_score, bx16_score, bx32_score,
239 | outx2, outx4, outx8)
240 | # return outx2, outx4, outx8, bx8_score, bx16_score, bx32_score
241 |
242 | def computeLoss(self, targets, predictions, with_mask=False, with_smth=False):
243 | criterion = nn.MSELoss(size_average=True)
244 |
245 | if with_mask:
246 | mask, maskx2, maskx4, maskx8 = targets[4], targets[5], targets[6], targets[7]
247 | lossx1 = criterion(predictions[0]*mask, targets[0]*mask)
248 | fs_loss = criterion(predictions[1]*mask, targets[0]*mask)
249 | # bx4_loss = criterion(predictions[2]*mask, targets[0]*mask)
250 | bx8_loss = criterion(predictions[3]*mask, targets[0]*mask)
251 | bx16_loss = criterion(predictions[4]*mask, targets[0]*mask)
252 | bx32_loss = criterion(predictions[5]*mask, targets[0]*mask)
253 |
254 | lossx2 = criterion(predictions[6]*maskx2, targets[1]*maskx2)
255 | lossx4 = criterion(predictions[7]*maskx4, targets[2]*maskx4)
256 | lossx8 = criterion(predictions[8]*maskx8, targets[3]*maskx8)
257 | else:
258 | lossx1 = criterion(predictions[0], targets[0])
259 | fs_loss = criterion(predictions[1], targets[0])
260 | # bx4_loss = criterion(predictions[2], targets[0])
261 | bx8_loss = criterion(predictions[3], targets[0])
262 | bx16_loss = criterion(predictions[4], targets[0])
263 | bx32_loss = criterion(predictions[5], targets[0])
264 |
265 | lossx2 = criterion(predictions[6], targets[1])
266 | lossx4 = criterion(predictions[7], targets[2])
267 | lossx8 = criterion(predictions[8], targets[3])
268 |
269 | if with_smth:
270 | SmthLoss = NerighborSmthLoss(lamda=0.01, t=2)
271 | smthTerm = SmthLoss(predictions[0], predictions[9])
272 | mainTerm = fs_loss + 0.5*(bx8_loss+bx16_loss+bx32_loss) + lossx1 + lossx2/2 + lossx4/4 + lossx8/8
273 | # print('smooth term: %f, main termL: %f'%(smthTerm.data[0], mainTerm.data[0]))
274 | loss = smthTerm + mainTerm
275 | else:
276 | loss = fs_loss + 0.5*(bx8_loss+bx16_loss+bx32_loss) + lossx1 + lossx2/2 + lossx4/4 + lossx8/8
277 | return loss
278 |
279 |
280 | class NerighborSmthLoss(_Loss):
281 | def __init__(self, size_average=True, lamda=0.01, t=2):
282 | super(NerighborSmthLoss, self).__init__(size_average)
283 | self.lamda = lamda
284 | self.t = t
285 |
286 | def forward(self, input, target):
287 | predict = input
288 | smthMap = target
289 |
290 | horRelDiffMap = smthMap[:,:,:,0:-1] - smthMap[:,:,:,1:]
291 | verRelDiffMap = smthMap[:,:,0:-1,:] - smthMap[:,:,1:,:]
292 | horDpDiffMap = predict[:,:,:,0:-1] - predict[:,:,:,1:]
293 | verDpDiffMap = predict[:,:,0:-1,:] - predict[:,:,1:,:]
294 |
295 | horSmthLoss = torch.sum((horDpDiffMap**2)* torch.exp(-self.t*horRelDiffMap**2)).mean()
296 | verSmthLoss = torch.sum((verDpDiffMap**2)* torch.exp(-self.t*verRelDiffMap**2)).mean()
297 |
298 | return self.lamda/2*(horSmthLoss+verSmthLoss)
299 |
300 |
301 | class DFCN_PS(nn.Module):
302 | """docstring for DFCN_PS"""
303 | def __init__(self, is_Train=True):
304 | super(DFCN_PS, self).__init__()
305 | self.isTrain = is_Train
306 |
307 | self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2, bias=False)
308 | self.bn_1 = nn.BatchNorm2d(64)
309 | self.pooling_1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
310 | self.denseBlock1 = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6)
311 | self.pooling_2 = nn.AvgPool2d(kernel_size=2, stride=2)
312 | self.denseBlock2 = DenseBlock(inputDim=128, outputDim=256 ,growthRate=32, blockDepth=12)
313 | self.pooling_3 = nn.AvgPool2d(kernel_size=2, stride=2)
314 | self.denseBlock3 = DenseBlock(inputDim=256, outputDim=512 ,growthRate=32, blockDepth=24)
315 | self.pooling_4 = nn.AvgPool2d(kernel_size=2, stride=2)
316 |
317 | self.conv_fc_5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, padding=0, bias=False)
318 | self.drop_5 = nn.Dropout2d(p=0.2)
319 | self.conv_fc_5_2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0, bias=False)
320 | self.drop_6 = nn.Dropout2d(p=0.2)
321 |
322 | self.score_32 = nn.Conv2d(in_channels=1024, out_channels=64, kernel_size=3, padding=1, bias=False)
323 |
324 | self.branch_score_16 = nn.Conv2d(in_channels=256, out_channels=32, kernel_size=3, padding=1, bias=False)
325 | self.branch_score_8 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, padding=1, bias=False)
326 | self.branch_score_4 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, padding=1, bias=False)
327 |
328 | self.subconv_to_8_ = UpsampleByPS(2, (64, 32))
329 | self.subconv_to_4_ = UpsampleByPS(2, (1, 32))
330 | self.subconv4x_ = UpsampleByPS(4, (1, 32), is_out_layer=True)
331 |
332 | self.initParameters()
333 |
334 | def setTrainMode(self, isTrain):
335 | self.isTrain = isTrain
336 |
337 | def initParameters(self):
338 | stateDict = self.state_dict()
339 | nn.init.xavier_normal(stateDict['conv_1.weight'])
340 | # nn.init.constant(stateDict['conv_1.bias'], 0)
341 | nn.init.xavier_normal(stateDict['conv_fc_5_1.weight'])
342 | # nn.init.constant(stateDict['conv_fc_5_1.bias'], 0)
343 | nn.init.xavier_normal(stateDict['conv_fc_5_2.weight'])
344 | # nn.init.constant(stateDict['conv_fc_5_2.bias'], 0)
345 | nn.init.xavier_normal(stateDict['score_32.weight'])
346 | # nn.init.constant(stateDict['score_32.bias'], 0)
347 |
348 | def forward(self, x):
349 | out = self.conv_1(x)
350 | out = F.relu(self.bn_1(out))
351 |
352 | out_4 = self.pooling_1(out)
353 | out_8 = self.pooling_2(F.relu(self.denseBlock1(out_4)))
354 | out_16 = self.pooling_3(F.relu(self.denseBlock2(out_8)))
355 | out_32 = self.pooling_4(F.relu(self.denseBlock3(out_16)))
356 | # if self.isTrain:
357 | # out.volatile = False
358 |
359 | out_32 = F.relu(self.drop_5(self.conv_fc_5_1(out_32)))
360 | out_32 = F.relu(self.drop_6(self.conv_fc_5_2(out_32)))
361 |
362 | out_up_16 = nn.functional.upsample(out_32, size=(15,20), mode='bilinear')
363 | score_16 = F.relu(self.score_32(out_up_16))
364 |
365 | score_b_16 = self.branch_score_16(out_16)
366 | score_b_8 = self.branch_score_8(out_8)
367 | score_b_4 = self.branch_score_4(out_4)
368 |
369 | outx8 = self.subconv_to_8_([score_16, score_b_16])
370 | outx4 = self.subconv_to_4_([outx8, score_b_8])
371 | outx1 = self.subconv4x_([outx4, score_b_4])
372 |
373 | return outx1, outx4, outx8
374 |
375 |
376 | class DFCN_32(nn.Module):
377 | """docstring for DFCN_32"""
378 | def __init__(self, is_Train=True):
379 | super(DFCN_32, self).__init__()
380 | self.isTrain = is_Train
381 |
382 | self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2, bias=False)
383 | self.bn_1 = nn.BatchNorm2d(64)
384 | self.relu1 = nn.ReLU(inplace=True)
385 | self.pooling_1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
386 | self.denseBlock1 = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6)
387 | self.relu2 = nn.ReLU(inplace=True)
388 | self.pooling_2 = nn.AvgPool2d(kernel_size=2, stride=2)
389 | self.denseBlock2 = DenseBlock(inputDim=128, outputDim=256 ,growthRate=32, blockDepth=12)
390 | self.relu3 = nn.ReLU(inplace=True)
391 | self.pooling_3 = nn.AvgPool2d(kernel_size=2, stride=2)
392 | self.denseBlock3 = DenseBlock(inputDim=256, outputDim=512 ,growthRate=32, blockDepth=24)
393 | self.relu4 = nn.ReLU(inplace=True)
394 | self.pooling_4 = nn.AvgPool2d(kernel_size=2, stride=2)
395 |
396 | self.conv_fc_5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, padding=0, bias=False)
397 | self.relu5 = nn.ReLU(inplace=True)
398 | self.drop_5 = nn.Dropout2d(p=0.2)
399 | self.conv_fc_5_2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0, bias=False)
400 | self.relu6 = nn.ReLU(inplace=True)
401 | self.drop_6 = nn.Dropout2d(p=0.2)
402 |
403 | self.score_32 = nn.Conv2d(in_channels=1024, out_channels=1, kernel_size=3, padding=1, bias=False)
404 | self.relu7 = nn.ReLU(inplace=True)
405 | # self.upsample_to_16 = nn.Sequential(
406 | # nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
407 | # nn.PixelShuffle(2),
408 | # nn.LeakyReLU(0.2, inplace=True)
409 | # )
410 | self.upsample_to_8 = nn.Sequential(
411 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
412 | nn.PixelShuffle(2),
413 | nn.LeakyReLU(0.2, inplace=True)
414 | )
415 | self.upsample_to_4 = nn.Sequential(
416 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
417 | nn.PixelShuffle(2),
418 | nn.LeakyReLU(0.2, inplace=True)
419 | )
420 | self.upsample4x = nn.Sequential(
421 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
422 | nn.PixelShuffle(2),
423 | nn.LeakyReLU(0.2, inplace=True),
424 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
425 | nn.PixelShuffle(2),
426 | nn.LeakyReLU(0.2, inplace=True)
427 | )
428 |
429 | self.initParameters()
430 |
431 | def setTrainMode(self, isTrain):
432 | self.isTrain = isTrain
433 |
434 | def initParameters(self):
435 | stateDict = self.state_dict()
436 | nn.init.xavier_normal(stateDict['conv_1.weight'])
437 | # nn.init.constant(stateDict['conv_1.bias'], 0)
438 | nn.init.xavier_normal(stateDict['conv_fc_5_1.weight'])
439 | # nn.init.constant(stateDict['conv_fc_5_1.bias'], 0)
440 | nn.init.xavier_normal(stateDict['conv_fc_5_2.weight'])
441 | # nn.init.constant(stateDict['conv_fc_5_2.bias'], 0)
442 | nn.init.xavier_normal(stateDict['score_32.weight'])
443 | # nn.init.constant(stateDict['score_32.bias'], 0)
444 |
445 | # nn.init.xavier_normal(stateDict['upsample_to_16.0.weight'])
446 | nn.init.xavier_normal(stateDict['upsample_to_8.0.weight'])
447 | nn.init.xavier_normal(stateDict['upsample_to_4.0.weight'])
448 | nn.init.xavier_normal(stateDict['upsample4x.0.weight'])
449 |
450 | def forward(self, x):
451 | out = self.conv_1(x)
452 | out = self.bn_1(out)
453 | out = self.relu1(out)
454 | out = self.pooling_1(out)
455 |
456 | out = self.denseBlock1(out)
457 | out = self.relu2(out)
458 | out = self.pooling_2(out)
459 |
460 | out = self.denseBlock2(out)
461 | out = self.relu3(out)
462 | out = self.pooling_3(out)
463 | out = self.denseBlock3(out)
464 | out = self.relu4(out)
465 | out = self.pooling_4(out)
466 | # if self.isTrain:
467 | # out.volatile = False
468 |
469 | out = self.conv_fc_5_1(out)
470 | out = self.drop_5(out)
471 | out = self.relu5(out)
472 | out = self.conv_fc_5_2(out)
473 | out = self.drop_6(out)
474 | out = self.relu6(out)
475 | out = nn.functional.upsample(out, size=(15,20), mode='bilinear')
476 |
477 | out = self.score_32(out)
478 | out = self.relu7(out)
479 |
480 | # out = self.upsample_to_16(out)
481 |
482 | # outSize = out.size()
483 | # marginLeft = Variable(torch.zeros(outSize[0], outSize[1], 1, outSize[3]))
484 | # # marginTop = Variable(torch.zeros(outSize[0], outSize[1], outSize[2]+1, 1))
485 | # if out.is_cuda:
486 | # marginLeft = marginLeft.cuda()
487 | # # marginTop = marginTop.cuda()
488 | # out = torch.cat([out, marginLeft], 2)
489 |
490 | outx8 = self.upsample_to_8(out)
491 | outx4 = self.upsample_to_4(outx8)
492 | outx1 = self.upsample4x(outx4)
493 |
494 | return outx1, outx4, outx8
495 |
496 |
497 | class DFCN_16(DFCN_32):
498 | """docstring for DFCN_16"""
499 | def __init__(self):
500 | super(DFCN_16, self).__init__()
501 |
502 | self.score_16 = nn.Conv2d(in_channels=256, out_channels=1, kernel_size=3, padding=1)
503 | self.upsample = nn.Sequential(
504 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False),
505 | nn.PixelShuffle(2),
506 | nn.LeakyReLU(0.2, inplace=True)
507 | )
508 | self.upsample_to_8 = nn.Sequential(
509 | nn.Conv2d(in_channels=2, out_channels=4, kernel_size=3, padding=1, bias=False),
510 | nn.PixelShuffle(2),
511 | nn.LeakyReLU(0.2, inplace=True)
512 | )
513 |
514 | def forward(self, x):
515 | out = F.relu(self.conv_1(x))
516 | out = self.pooling_1(out)
517 |
518 | out = F.relu(self.denseBlock1(out))
519 | out = self.pooling_2(out)
520 | out = F.relu(self.denseBlock2(out))
521 | out = self.pooling_3(out)
522 | out_16 = out
523 | out_16 = self.score_16(out_16)
524 | out_16 = self.upsample(out_16)
525 |
526 | out = self.denseBlock3(out)
527 | out = self.relu4(out)
528 | out = self.pooling_4(out)
529 |
530 | out = self.conv_fc_5_1(out)
531 | out = self.relu5(out)
532 | out = self.conv_fc_5_2(out)
533 | out = self.relu6(out)
534 | out = self.score_32(out)
535 | out = self.relu7(out)
536 |
537 | out = self.upsample_to_16(out)
538 | out_cat = torch.cat([out, out_16], 1)
539 | out_cat = self.upsample_to_8(out_cat)
540 | out_cat = self.upsample_to_4(out_cat)
541 | out_cat = self.upsample4x(out_cat)
542 |
543 | return out_cat
544 |
545 | class RDCN_VGG(nn.Module):
546 | def __init__(self, rec_num):
547 | super(RDCN_VGG, self).__init__()
548 |
549 | self.recNum = rec_num
550 | self.downsample = nn.Sequential(OrderedDict([
551 | ('data/bn', nn.BatchNorm2d(3)),
552 | ('conv1_1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)),
553 | ('conv1_1/bn', nn.BatchNorm2d(64)),
554 | ('relu1_1', nn.ReLU(inplace=True)),
555 | ('conv1_2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)),
556 | ('conv1_2/bn', nn.BatchNorm2d(64)),
557 | ('relu1_2', nn.ReLU(inplace=True)),
558 | ('pool1', nn.MaxPool2d(kernel_size=2, stride=2)),
559 | ('conv2_1', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)),
560 | ('conv2_1/bn', nn.BatchNorm2d(128)),
561 | ('relu2_1', nn.ReLU(inplace=True)),
562 | ('conv2_2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)),
563 | ('conv2_2/bn', nn.BatchNorm2d(128)),
564 | ('relu2_2', nn.ReLU(inplace=True)),
565 | ('pool2', nn.MaxPool2d(kernel_size=2, stride=2)),
566 | ('conv3_1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)),
567 | ('conv3_1/bn', nn.BatchNorm2d(256)),
568 | ('relu3_1', nn.ReLU(inplace=True)),
569 | ('conv3_2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
570 | ('conv3_2/bn', nn.BatchNorm2d(256)),
571 | ('relu3_2', nn.ReLU(inplace=True)),
572 | ('conv3_3', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
573 | ('conv3_3/bn', nn.BatchNorm2d(256)),
574 | ('relu3_3', nn.ReLU(inplace=True)),
575 | ('conv3_4', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)),
576 | ('conv3_4/bn', nn.BatchNorm2d(256)),
577 | ('relu3_4', nn.ReLU(inplace=True))
578 | ]))
579 |
580 | self.denseBlock = DenseBlock(inputDim=256, outputDim=256 ,growthRate=32, blockDepth=8)
581 | self.predictx4 = nn.Conv2d(in_channels=256, out_channels=1, kernel_size=3, padding=1)
582 | self.weightedAvg = nn.Conv2d(in_channels=self.recNum, out_channels=1, kernel_size=1, bias=True)
583 |
584 | self.upsample4x = nn.Sequential(
585 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, bias=False),
586 | nn.PixelShuffle(2),
587 | nn.LeakyReLU(0.2, inplace=True),
588 | nn.Conv2d(in_channels=64, out_channels=256, kernel_size=3, padding=1, bias=False),
589 | nn.PixelShuffle(2),
590 | nn.LeakyReLU(0.2, inplace=True),
591 | )
592 | self.predict = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, padding=1, bias=False)
593 |
594 | def loadConv(self, pretrain_model):
595 | pretrainModel = sio.loadmat(pretrain_model)
596 | for name, module in self.named_modules():
597 | if isinstance(module, nn.Conv2d):
598 | last_name = name.split('.')[-1]
599 | if module.bias is not None:
600 | for key, value in pretrainModel.items():
601 | if '%s_0'%last_name == key: # for weight
602 | print('load %s'%key)
603 | self.copyArrayToTensor(value, module.weight.data)
604 |
605 | if '%s_1'%last_name == key: # for weight
606 | print('load %s'%key)
607 | self.copyArrayToTensor(value, module.bias.data)
608 | else:
609 | for key, value in pretrainModel.items():
610 | if '%s_0'%last_name == key: # for weight
611 | print('load %s'%key)
612 | self.copyArrayToTensor(value, module.weight.data)
613 |
614 |
615 | def copyArrayToTensor(self, array, tensor):
616 | aShape = array.shape
617 | tShape = tensor.shape
618 |
619 | if len(aShape) == 2 and aShape[0] == 1:
620 | array = np.squeeze(array)
621 | aShape = array.shape
622 |
623 | if len(aShape) != len(tShape):
624 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape))
625 |
626 | for indx in range(len(aShape)):
627 | if aShape[indx] != tShape[indx]:
628 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape))
629 |
630 | if len(aShape) == 1:
631 | for n in range(aShape[0]):
632 | tensor[n] = float(array[n])
633 | elif len(aShape) == 2:
634 | for n in range(aShape[0]):
635 | for c in range(aShape[1]):
636 | tensor[n, c] = float(array[n, c])
637 | elif len(aShape) == 3:
638 | for n in range(aShape[0]):
639 | for c in range(aShape[1]):
640 | for h in range(aShape[2]):
641 | tensor[n, c, h] = float(array[n, c, h])
642 | elif len(aShape) == 4:
643 | for n in range(aShape[0]):
644 | for c in range(aShape[1]):
645 | for h in range(aShape[2]):
646 | for w in range(aShape[3]):
647 | tensor[n, c, h, w] = float(array[n, c, h, w])
648 |
649 |
650 | def forward(self, x):
651 | out = self.downsample(x)
652 | predictx4s = [None for i in range(self.recNum)]
653 | catFlag = False
654 | predictx4Cat = None
655 | predict_final = None
656 |
657 | # input("RDCN_VGG before loop")
658 | for indx in range(self.recNum):
659 | out = self.denseBlock(out)
660 | predictx4s[indx] = self.predictx4(out)
661 | if not catFlag:
662 | catFlag = True
663 | predictx4Cat = predictx4s[indx]
664 | else:
665 | predictx4Cat = torch.cat([predictx4Cat, predictx4s[indx]], 1)
666 | # print(predictx4s[indx])
667 |
668 | predictx4_avg = self.weightedAvg(predictx4Cat)
669 | # print('-- avg\n', predictx4_avg)
670 |
671 | out = self.upsample4x(out)
672 | predict_final = self.predict(out)
673 |
674 | return predictx4s, predictx4_avg, predict_final
675 |
676 | class InvLoss(nn.Module):
677 | def __init__(self, lamda=0.5):
678 | super(InvLoss, self).__init__()
679 | self.lamda = lamda
680 |
681 | def forward(self, _input, _target):
682 | dArr = _input - _target
683 | nVal = _input.data.shape[2]*_input.data.shape[3]
684 |
685 | mseLoss = torch.sum(torch.sum(dArr*dArr, 2), 3)/nVal
686 | dArrSum = torch.sum(torch.sum(dArr, 2), 3)
687 | mssLoss = -self.lamda*(dArrSum*dArrSum)/(nVal**2)
688 |
689 | loss = mseLoss + mssLoss
690 | loss = torch.sum(loss)
691 | return loss
692 |
693 |
694 | def copyArrayToTensor(array, tensor):
695 | aShape = array.shape
696 | tShape = tensor.shape
697 |
698 | if len(aShape) == 2 and aShape[0] == 1:
699 | array = np.squeeze(array)
700 | aShape = array.shape
701 |
702 | if len(aShape) != len(tShape):
703 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape))
704 |
705 | for indx in range(len(aShape)):
706 | if aShape[indx] != tShape[indx]:
707 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape))
708 |
709 | if len(aShape) == 1:
710 | for n in range(aShape[0]):
711 | tensor[n] = float(array[n])
712 | elif len(aShape) == 2:
713 | for n in range(aShape[0]):
714 | for c in range(aShape[1]):
715 | tensor[n, c] = float(array[n, c])
716 | elif len(aShape) == 3:
717 | for n in range(aShape[0]):
718 | for c in range(aShape[1]):
719 | for h in range(aShape[2]):
720 | tensor[n, c, h] = float(array[n, c, h])
721 | elif len(aShape) == 4:
722 | for n in range(aShape[0]):
723 | for c in range(aShape[1]):
724 | for h in range(aShape[2]):
725 | for w in range(aShape[3]):
726 | tensor[n, c, h, w] = float(array[n, c, h, w])
727 |
728 |
729 | def copyParametersToModel(params, modules, rule_file):
730 | ruleDict = dict()
731 | ruleFile = open(rule_file, 'r')
732 | line = ruleFile.readline()
733 | while line != '' and line != '\n':
734 | contents = line.split(' ')
735 | currSrcLayer = contents[0]
736 | if contents[1][-1] == '\n':
737 | currTargetLayer = contents[1][:-1]
738 | else:
739 | currTargetLayer = contents[1]
740 |
741 | if currSrcLayer in params.keys():
742 | ruleDict[currSrcLayer] = currTargetLayer
743 | else:
744 | raise ValueError('pretrainModel has no key: %s'%currSrcLayer)
745 | line = ruleFile.readline()
746 |
747 | ruleFile.close()
748 |
749 | # load parameters
750 | for key, item in ruleDict.items():
751 | copyArrayToTensor(params[key], modules[item])
752 |
753 |
--------------------------------------------------------------------------------
/results/figure_nyu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/results/figure_nyu.png
--------------------------------------------------------------------------------
/results/table_nyu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/results/table_nyu.png
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import torch
4 | from torch.autograd import Variable
5 | from torch.utils.data import DataLoader
6 |
7 | from tools.EvalutateMetrics import myMetrics
8 | from datasets import NYUv2DataSet, NYUv2FusionSet
9 |
10 | import scipy.io as sio
11 | import numpy as np
12 | # from PIL import Image
13 | import glob
14 |
15 | import time
16 |
17 | def loadImage(test_file, in_size=[240, 320]):
18 | data = sio.loadmat(test_file)
19 | data = data['data']
20 |
21 | rgb = data['rgb'][0,0]
22 | depth = data['depth'][0,0]
23 |
24 | imageSize = data['imageSize'][0,0][0]
25 | offset_x = int((imageSize[0] - in_size[0])/2)
26 | offset_y = int((imageSize[1] - in_size[1])/2)
27 |
28 | rgb_new = rgb.transpose((2, 0, 1))
29 | rgb_new = torch.from_numpy(rgb_new[np.newaxis,:,offset_x:in_size[0]+offset_x, offset_y:in_size[1]+offset_y]).float()
30 | rgb_new = rgb_new.cuda()
31 | inputData = Variable(rgb_new)
32 | inputData.volatile = True
33 |
34 | depth_new = depth[offset_x:in_size[0]+offset_x, offset_y:in_size[1]+offset_y]
35 | depth_target = np.exp(depth_new)
36 |
37 | return inputData, depth_target
38 |
39 |
40 | def covert2Array(inData):
41 | cpuData = inData.cpu()
42 | return np.exp( cpuData.data[0].numpy()[0,...].astype(np.float32) )
43 |
44 |
45 | parser = argparse.ArgumentParser(description="pythorch recusive densely-connected nerual network Test")
46 | parser.add_argument("--model", default=None, type=str, help="model path")
47 | parser.add_argument("--image", default=None, type=str, help="image name")
48 | # parser.add_argument("--cpu", action="store_true", help="Use cpu only")
49 | parser.add_argument("--data", default='', type=str, help='assign dataset for test. when assinged, --image become useless')
50 |
51 | opt = parser.parse_args()
52 | print(opt)
53 |
54 | print('build model...')
55 | model = torch.load(opt.model)["model"]
56 | model.setTrainMode(False)
57 | model.eval()
58 |
59 | model = model.cuda()
60 | model.is_train = False
61 |
62 | # print(model)
63 |
64 | metrics = myMetrics()
65 | metrics.resetMetrics()
66 |
67 | if opt.data:
68 | dataFiles = glob.glob('%s/*.mat'%opt.data)
69 | dataNum = len(dataFiles)
70 |
71 | for indx in range(min(dataNum,700)):
72 | inputData, target = loadImage(dataFiles[indx])
73 | predictions = model(inputData)
74 |
75 | begin = time.time()
76 | predictions = model(inputData)
77 | end = time.time()
78 | # print(end-begin)
79 |
80 | if indx <= 1:
81 | detectTime = end-begin
82 | else:
83 | detectTime = detectTime + end-begin
84 |
85 | predictedx1 = predictions[0].cpu()
86 | predictedx1_np = covert2Array(predictedx1)
87 |
88 | metrics.computeMetrics(predictedx1_np, target, disp=True, image_name=dataFiles[indx])
89 |
90 | metricsVals = metrics.getMetrics()
91 |
92 | print('-- [average metrics] -------')
93 | print('rel: %f, log10: %f, rms: %f, thr1: %f, thr2: %f, thr3: %f'%(metricsVals[0],metricsVals[1],
94 | metricsVals[2], metricsVals[3], metricsVals[4], metricsVals[5]))
95 | print('average time: %f'%(detectTime/float(dataNum-1)) )
96 |
97 | else:
98 | test_file = opt.image
99 | data = sio.loadmat(test_file)
100 | data = data['data']
101 |
102 | rgb = data['rgb'][0,0]
103 |
104 | inputData, target = loadImage(test_file)
105 |
106 | # out of the network
107 | predictions = model(inputData)
108 | predictedx1 = covert2Array(predictions[0])
109 | O_8x, O_16x, O_32x = predictions[3:6]
110 | pred2x, pred4x, pred8x = predictions[6:]
111 | O_4x = 0 # No O_4x for this version
112 | O_8x = covert2Array(O_8x)
113 | O_16x = covert2Array(O_16x)
114 | O_32x = covert2Array(O_32x)
115 | pred2x = covert2Array(pred2x)
116 | pred4x = covert2Array(pred4x)
117 | pred8x = covert2Array(pred8x)
118 |
119 | currRel = metrics.computeRel(predictedx1, target)
120 | currRMS = metrics.computeRMS(predictedx1, target)
121 | currL10 = metrics.computeLog10(predictedx1, target)
122 |
123 | print('rel: %f, rms: %f, log10: %f'%(currRel, currRMS, currL10))
124 |
125 | sio.savemat('results.mat', {'rgb': rgb, 'depth':target,
126 | 'pred1x': predictedx1, 'pred2x': pred2x, 'pred4x': pred4x,
127 | 'pred8x': pred8x, 'bx4': O_4x,'bx8': O_8x, 'bx16': O_16x, 'bx32': O_32x})
128 |
129 | print('Done!')
--------------------------------------------------------------------------------
/test_samples/nyu_v2_175.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/test_samples/nyu_v2_175.mat
--------------------------------------------------------------------------------
/test_samples/nyu_v2_9.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/test_samples/nyu_v2_9.mat
--------------------------------------------------------------------------------
/tools/EvalutateMetrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class myMetrics(object):
4 | """docstring for myMetrics"""
5 | def __init__(self):
6 | super(myMetrics, self).__init__()
7 |
8 | self.rel = 0 # average relative error
9 | self.rms = 0 # root mean squared error
10 | self.log10Err = 0 # average log10 error
11 | self.thrAcc1 = 0 # accuracy with threshold
12 | self.thrAcc2 = 0 # accuracy with threshold
13 | self.thrAcc3 = 0 # accuracy with threshold
14 | self.thrCount1 = 0
15 | self.thrValue1 = 1.25
16 | self.thrCount2 = 0
17 | self.thrValue2 = 1.25**2
18 | self.thrCount3 = 0
19 | self.thrValue3 = 1.25**3
20 |
21 | self.pointsNum = 0 # valid point number
22 | self.exclude_list = []
23 | self.exclude_thr = 10 #0.7 for make3d #0.4 for nyn2
24 | self.rms_avg_by_image = True
25 | self.min_depth = 0.7
26 | self.max_depth = 99999
27 | self.test_count = 0
28 |
29 |
30 | def fineModification(self, depth_predicted, depth_gt, max_depth = 70, clip_value = 80):
31 | # fine modification
32 | for indx_x in np.arange(depth_gt.shape[0]):
33 | for indx_y in np.arange(depth_gt.shape[1]):
34 | if depth_predicted[indx_x, indx_y] > max_depth: #70
35 | depth_predicted[indx_x, indx_y] = clip_value #80
36 |
37 | def computeRel(self, depth_predicted, depth_gt):
38 | number = depth_gt.shape[0]*depth_gt.shape[1]
39 | curr_rel_arr = np.abs(depth_predicted-depth_gt)/depth_gt
40 | curr_rel = np.sum(curr_rel_arr)/number
41 |
42 | return curr_rel
43 |
44 | def computeRMS(self, depth_predicted, depth_gt):
45 | number = depth_gt.shape[0]*depth_gt.shape[1]
46 | curr_rms_arr = (depth_predicted-depth_gt)**2
47 | curr_rms = np.sum(curr_rms_arr)/number
48 |
49 | return np.sqrt(curr_rms)
50 |
51 | def computeLog10(self, depth_predicted, depth_gt):
52 | number = depth_gt.shape[0]*depth_gt.shape[1]
53 | curr_log10Err_arr = np.abs(np.log10(depth_predicted)-np.log10(depth_gt))
54 | curr_log10Err = np.sum(curr_log10Err_arr)/number
55 |
56 | return curr_log10Err
57 |
58 | def setMetricsType(self, typeStr = 'c1'):
59 | if typeStr == 'c1':
60 | self.max_depth = 70
61 | else:
62 | self.max_depth = 99999
63 |
64 | def resetMetrics(self):
65 | self.test_count = 0
66 |
67 | self.rel = list() # average relative error
68 | self.rms2 = list() # root mean squared error
69 | self.log10Err = list() # average log10 error
70 | self.thrAcc1 = list() # accuracy with threshold
71 | self.thrAcc2 = list() # accuracy with threshold
72 | self.thrAcc3 = list() # accuracy with threshold
73 |
74 | self.thrCount1 = 0
75 | self.thrCount2 = 0
76 | self.thrCount3 = 0
77 |
78 | self.pointsNum = 0 # valid point number
79 | self.exclude_list = []
80 | self.test_count = 0
81 |
82 | def fastCompute(self, gt, pred):
83 | thresh = np.maximum((gt / pred), (pred / gt))
84 | a1 = (thresh < 1.25 ).mean()
85 | a2 = (thresh < 1.25 ** 2).mean()
86 | a3 = (thresh < 1.25 ** 3).mean()
87 |
88 | rmse2 = np.mean((gt - pred) ** 2)
89 |
90 | log10_err = np.mean(np.absolute(np.log10(gt) - np.log10(pred)))
91 |
92 | abs_rel = np.mean(np.abs(gt - pred) / gt)
93 |
94 | return abs_rel, rmse2, log10_err, a1, a2, a3
95 |
96 |
97 | def computeMetrics(self, pred_depth_real, depth_real, disp=False, image_name=''):
98 | self.test_count += 1
99 |
100 | mask = np.logical_and(depth_real>self.min_depth, depth_real self.exclude_thr:
111 | self.exclude_list.append(self.test_count)
112 |
113 | if disp:
114 | print('({}){}:'.format(self.test_count, image_name))
115 | print('rel: {}, rms: {}, log10: {}'.format(currMetrics[0], np.sqrt(currMetrics[1]), currMetrics[2]))
116 | print('---- file end ----')
117 |
118 |
119 | def getMetrics(self):
120 |
121 | rel = np.array(self.rel).mean()
122 | log10Err = np.array(self.log10Err).mean()
123 | rms = np.sqrt(self.rms2).mean()
124 | a1 = np.array(self.thrAcc1).mean()
125 | a2 = np.array(self.thrAcc2).mean()
126 | a3 = np.array(self.thrAcc3).mean()
127 |
128 | return rel,log10Err,rms,a1,a2,a3, self.exclude_list
--------------------------------------------------------------------------------
/tools/densenet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.utils.model_zoo as model_zoo
5 | from collections import OrderedDict
6 |
7 |
8 | class BaseDenseLayer(nn.Module):
9 | """docstring for BaseLayer"""
10 | def __init__(self, input_dim, growth_rate):
11 | super(BaseDenseLayer, self).__init__()
12 |
13 | self.layer = nn.Sequential(
14 | nn.BatchNorm2d(input_dim),
15 | nn.ReLU(inplace=True),
16 | nn.Conv2d(in_channels=input_dim, out_channels=128, kernel_size=1, bias=False),
17 | nn.BatchNorm2d(128),
18 | nn.ReLU(inplace=True),
19 | nn.Conv2d(in_channels=128, out_channels=growth_rate, kernel_size=3, padding=1, bias=False)
20 | )
21 | self.initParameters()
22 |
23 | def initParameters(self):
24 | stateDict = self.state_dict()
25 | # print(stateDict.keys())
26 | nn.init.xavier_normal(stateDict['layer.2.weight'])
27 | # nn.init.constant(stateDict['layer.2.bias'], 0)
28 | nn.init.xavier_normal(stateDict['layer.5.weight'])
29 | # nn.init.constant(stateDict['layer.5.bias'], 0)
30 |
31 | def forward(self, x):
32 | # input('BaseDenseLayer:forward loop ')
33 | out = self.layer(x)
34 | return torch.cat([x, out], 1)
35 |
36 | class DenseBlock(nn.Module):
37 | def __init__(self, inputDim=256, outputDim=256, growthRate=32, blockDepth=8, withTrans=True):
38 | super(DenseBlock, self).__init__()
39 |
40 | self.inputDim = inputDim
41 | self.outputDim = outputDim
42 | self.blockDepth = blockDepth
43 | self.growthRate = growthRate
44 | self.withTrans = withTrans
45 |
46 |
47 | layers = []
48 | for indx in range(self.blockDepth):
49 | srcDim = self.inputDim + indx*self.growthRate
50 | layers.append(BaseDenseLayer(srcDim, self.growthRate))
51 |
52 | self.denseLayer = nn.Sequential(*layers)
53 |
54 | catDim = self.inputDim+self.blockDepth*self.growthRate
55 | if self.withTrans:
56 | self.transition = nn.Sequential(
57 | nn.BatchNorm2d(catDim),
58 | nn.ReLU(inplace=True),
59 | nn.Conv2d(in_channels=catDim, out_channels=self.outputDim, kernel_size=1, bias=False)
60 | )
61 | self.initParameters()
62 |
63 | def initParameters(self):
64 | stateDict = self.state_dict()
65 | if self.withTrans:
66 | nn.init.xavier_normal(stateDict['transition.2.weight'])
67 | # nn.init.constant(stateDict['transition.2.bias'], 0)
68 |
69 |
70 | def forward(self, x):
71 | # input('DenseBlock:forward')
72 | # catOut = x
73 | # for indx in range(self.blockDepth):
74 | # input('DenseBlock:forward loop %d'%indx)
75 | # currOut = self.layers[indx](catOut)
76 | # catOut = torch.cat([catOut, currOut], 1)
77 | out = self.denseLayer(x)
78 | if self.withTrans:
79 | out = self.transition(out)
80 | # print(out)
81 | return out
82 |
83 | # def baseLayer(self, indx):
84 | # srcDim = self.inputDim+indx*self.growthRate
85 | # return list([
86 | # nn.BatchNorm2d(srcDim),
87 | # nn.ReLU(inplace=True),
88 | # nn.Conv2d(in_channels=srcDim, out_channels=128, kernel_size=1, bias=False),
89 | # nn.BatchNorm2d(128),
90 | # nn.ReLU(inplace=True),
91 | # nn.Conv2d(in_channels=128, out_channels=self.growthRate, kernel_size=3, padding=1)
92 | # ])
93 |
94 |
95 | ### torch vision implementation
96 |
97 |
98 | def densenet121(pretrained=False, **kwargs):
99 | r"""Densenet-121 model from
100 | `"Densely Connected Convolutional Networks" `_
101 | Args:
102 | pretrained (bool): If True, returns a model pre-trained on ImageNet
103 | """
104 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16),
105 | **kwargs)
106 | if pretrained:
107 | model.load_state_dict(model_zoo.load_url(model_urls['densenet121']))
108 | return model
109 |
110 | class _DenseLayer(nn.Sequential):
111 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
112 | super(_DenseLayer, self).__init__()
113 | self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
114 | self.add_module('relu.1', nn.ReLU(inplace=True)),
115 | self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
116 | growth_rate, kernel_size=1, stride=1, bias=False)),
117 | self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
118 | self.add_module('relu.2', nn.ReLU(inplace=True)),
119 | self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
120 | kernel_size=3, stride=1, padding=1, bias=False)),
121 | self.drop_rate = drop_rate
122 |
123 | def forward(self, x):
124 | new_features = super(_DenseLayer, self).forward(x)
125 | if self.drop_rate > 0:
126 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
127 | return torch.cat([x, new_features], 1)
128 |
129 |
130 | class _DenseBlock(nn.Sequential):
131 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
132 | super(_DenseBlock, self).__init__()
133 | for i in range(num_layers):
134 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
135 | self.add_module('denselayer%d' % (i + 1), layer)
136 |
137 |
138 | class _Transition(nn.Sequential):
139 | def __init__(self, num_input_features, num_output_features):
140 | super(_Transition, self).__init__()
141 | self.add_module('norm', nn.BatchNorm2d(num_input_features))
142 | self.add_module('relu', nn.ReLU(inplace=True))
143 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
144 | kernel_size=1, stride=1, bias=False))
145 | # self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
146 |
147 |
148 | class DenseNet(nn.Module):
149 | r"""Densenet-BC model class, based on
150 | `"Densely Connected Convolutional Networks" `_
151 | Args:
152 | growth_rate (int) - how many filters to add each layer (`k` in paper)
153 | block_config (list of 4 ints) - how many layers in each pooling block
154 | num_init_features (int) - the number of filters to learn in the first convolution layer
155 | bn_size (int) - multiplicative factor for number of bottle neck layers
156 | (i.e. bn_size * k features in the bottleneck layer)
157 | drop_rate (float) - dropout rate after each dense layer
158 | num_classes (int) - number of classification classes
159 | """
160 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
161 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
162 |
163 | super(DenseNet, self).__init__()
164 |
165 | # First convolution
166 | self.features = nn.Sequential(OrderedDict([
167 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
168 | ('norm0', nn.BatchNorm2d(num_init_features)),
169 | ('relu0', nn.ReLU(inplace=True)),
170 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
171 | ]))
172 |
173 | # Each denseblock
174 | num_features = num_init_features
175 | for i, num_layers in enumerate(block_config):
176 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
177 | bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
178 | self.features.add_module('denseblock%d' % (i + 1), block)
179 | num_features = num_features + num_layers * growth_rate
180 | if i != len(block_config) - 1:
181 | trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
182 | self.features.add_module('transition%d' % (i + 1), trans)
183 | num_features = num_features // 2
184 |
185 | # Final batch norm
186 | self.features.add_module('norm5', nn.BatchNorm2d(num_features))
187 |
188 | # Linear layer
189 | self.classifier = nn.Linear(num_features, num_classes)
190 |
191 | # Official init from torch repo.
192 | for m in self.modules():
193 | if isinstance(m, nn.Conv2d):
194 | nn.init.kaiming_normal(m.weight.data)
195 | elif isinstance(m, nn.BatchNorm2d):
196 | m.weight.data.fill_(1)
197 | m.bias.data.zero_()
198 | elif isinstance(m, nn.Linear):
199 | m.bias.data.zero_()
200 |
201 | def forward(self, x):
202 | features = self.features(x)
203 | out = F.relu(features, inplace=True)
204 | out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1)
205 | out = self.classifier(out)
206 | return out
--------------------------------------------------------------------------------
/tools/parse_caffe_model.py:
--------------------------------------------------------------------------------
1 | ### used to parse parameters in .caffemodel to a dict
2 | import sys
3 | caffe_root = '/media/xiaofeng/codes/LinuxFiles/caffe'
4 | sys.path.insert(0, caffe_root + '/python')
5 |
6 | import caffe
7 |
8 | import os
9 | import numpy as np
10 |
11 | caffe_cfg = './model/DenseNet_121.prototxt'
12 | caffemodel_path = './model/DenseNet_121.caffemodel'
13 |
14 | target_root = './model/pretrain'
15 | target_path = '%s/densenet_121'%(target_root)
16 | if not os.path.exists(target_root):
17 | os.makedirs(target_root)
18 |
19 |
20 | caffe.set_mode_cpu()
21 | net = caffe.Net(caffe_cfg, caffemodel_path, caffe.TEST)
22 |
23 | indxStr = ['weight', 'bias', '3']
24 | model_dict = dict()
25 | for key in net.params.keys():
26 | if 'conv' in key:
27 | print('-- {}, len: {}'.format(key, len(net.params[key])))
28 | for indx in range(len(net.params[key])):
29 | currKey = '%s.%s'%(key, indxStr[indx])
30 | shape = net.params[key][indx].data.shape
31 | data = net.params[key][indx].data
32 |
33 | model_dict[currKey] = data
34 |
35 | # if key == 'conv1_1':
36 | # print(net.params[key][0].data)
37 | # print(net.params[key][1].data)
38 |
39 | import scipy.io as sio
40 |
41 | # sio.savemat('conv1_1', {'conv1_1': model_dict['conv1_1'][0]})
42 | print('save model to %s'%target_path)
43 | sio.savemat(target_path, model_dict)
--------------------------------------------------------------------------------
/tools/resnet.py:
--------------------------------------------------------------------------------
1 | # copy from torchvision resnet(https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py)
2 |
3 | import torch.nn as nn
4 | import torch.utils.model_zoo as model_zoo
5 |
6 |
7 | model_urls = {
8 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
9 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
10 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
11 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
12 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
13 | }
14 |
15 |
16 | def conv3x3(in_planes, out_planes, stride=1):
17 | """3x3 convolution with padding"""
18 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
19 | padding=1, bias=False)
20 |
21 |
22 | class BasicBlock(nn.Module):
23 | expansion = 1
24 |
25 | def __init__(self, inplanes, planes, stride=1, downsample=None):
26 | super(BasicBlock, self).__init__()
27 | self.conv1 = conv3x3(inplanes, planes, stride)
28 | self.bn1 = nn.BatchNorm2d(planes)
29 | self.relu = nn.ReLU(inplace=True)
30 | self.conv2 = conv3x3(planes, planes)
31 | self.bn2 = nn.BatchNorm2d(planes)
32 | self.downsample = downsample
33 | self.stride = stride
34 |
35 | def forward(self, x):
36 | residual = x
37 |
38 | out = self.conv1(x)
39 | out = self.bn1(out)
40 | out = self.relu(out)
41 |
42 | out = self.conv2(out)
43 | out = self.bn2(out)
44 |
45 | if self.downsample is not None:
46 | residual = self.downsample(x)
47 |
48 | out += residual
49 | out = self.relu(out)
50 |
51 | return out
52 |
53 |
54 | class Bottleneck(nn.Module):
55 | expansion = 4
56 |
57 | def __init__(self, inplanes, planes, stride=1, downsample=None):
58 | super(Bottleneck, self).__init__()
59 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
60 | self.bn1 = nn.BatchNorm2d(planes)
61 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
62 | padding=1, bias=False)
63 | self.bn2 = nn.BatchNorm2d(planes)
64 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
65 | self.bn3 = nn.BatchNorm2d(planes * 4)
66 | self.relu = nn.ReLU(inplace=True)
67 | self.downsample = downsample
68 | self.stride = stride
69 |
70 | def forward(self, x):
71 | residual = x
72 |
73 | out = self.conv1(x)
74 | out = self.bn1(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv2(out)
78 | out = self.bn2(out)
79 | out = self.relu(out)
80 |
81 | out = self.conv3(out)
82 | out = self.bn3(out)
83 |
84 | if self.downsample is not None:
85 | residual = self.downsample(x)
86 |
87 | out += residual
88 | out = self.relu(out)
89 |
90 | return out
91 |
92 |
93 | class ResNet(nn.Module):
94 |
95 | def __init__(self, block, layers, num_classes=1000):
96 | self.inplanes = 64
97 | super(ResNet, self).__init__()
98 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
99 | bias=False)
100 | self.bn1 = nn.BatchNorm2d(64)
101 | self.relu = nn.ReLU(inplace=True)
102 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
103 | self.layer1 = self._make_layer(block, 64, layers[0])
104 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
105 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
106 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
107 | self.avgpool = nn.AvgPool2d(7, stride=1)
108 | self.fc = nn.Linear(512 * block.expansion, num_classes)
109 |
110 | for m in self.modules():
111 | if isinstance(m, nn.Conv2d):
112 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
113 | m.weight.data.normal_(0, math.sqrt(2. / n))
114 | elif isinstance(m, nn.BatchNorm2d):
115 | m.weight.data.fill_(1)
116 | m.bias.data.zero_()
117 |
118 | def _make_layer(self, block, planes, blocks, stride=1):
119 | downsample = None
120 | if stride != 1 or self.inplanes != planes * block.expansion:
121 | downsample = nn.Sequential(
122 | nn.Conv2d(self.inplanes, planes * block.expansion,
123 | kernel_size=1, stride=stride, bias=False),
124 | nn.BatchNorm2d(planes * block.expansion),
125 | )
126 |
127 | layers = []
128 | layers.append(block(self.inplanes, planes, stride, downsample))
129 | self.inplanes = planes * block.expansion
130 | for i in range(1, blocks):
131 | layers.append(block(self.inplanes, planes))
132 |
133 | return nn.Sequential(*layers)
134 |
135 | def forward(self, x):
136 | x = self.conv1(x)
137 | x = self.bn1(x)
138 | x = self.relu(x)
139 | x = self.maxpool(x)
140 |
141 | x = self.layer1(x)
142 | x = self.layer2(x)
143 | x = self.layer3(x)
144 | x = self.layer4(x)
145 |
146 | x = self.avgpool(x)
147 | x = x.view(x.size(0), -1)
148 | x = self.fc(x)
149 |
150 | return x
--------------------------------------------------------------------------------