├── .gitignore ├── README.md ├── datasets.py ├── matlab └── gen_test_data_for_mscn.m ├── my_models.py ├── results ├── figure_nyu.png └── table_nyu.png ├── test.py ├── test_samples ├── nyu_v2_175.mat └── nyu_v2_9.mat └── tools ├── EvalutateMetrics.py ├── densenet.py ├── parse_caffe_model.py └── resnet.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | */pycache__/ 3 | *.mat 4 | Dataset/ 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction for MSCNNS 2 | MSCN_NS (Multi-scale Sub-pixel Convolutional Network with a Neighborhood Smoothness constraint) is a CNN-based approach for monocular depth estimation. 3 | 4 | For technical details, please see this [paper](https://ieeexplore.ieee.org/document/8624409). 5 | 6 | ## Prerequisites 7 | * Matlab R2017a (or other proper version) 8 | * python v3.5.x 9 | * pytorch v0.3.0 (or later version) 10 | * numpy 11 | * scipy 12 | 13 | ## How to test 14 | 15 | ### Quick test 16 | 17 | You may use the provided model (see the BaiduYun link below) and test samples to test this apporach as follows, 18 | 19 | `python3 test.py --model --image ./test_samples/nyu_v2_175.mat` 20 | 21 | ### Test on the whole NYU Depth v2 dataset. 22 | 23 | 1. Download [The Dataset](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html) and [The Train/Test Split file](https://cs.nyu.edu/~silberman/projects/indoor_scene_seg_sup.html). 24 | 2. Suppose you have saved the dataset in and the split file in . Open ```matlab/gen_test_data_for_mscn.m``` and assign to 'NYUv2_data' and to 'split_file'. 25 | 3. run ```matlab/gen_test_data_for_mscn.m``` and the test data will be generated in '../Dataset/test'. You may change the save root 'test_root' to anywhere you like. 26 | 4. Download the model in the `BaiduYun disk (Link: https://pan.baidu.com/s/1U0hw58K2M0y5QE4c3hbNng password: qnv3)` 27 | 5. Test the model as follows, 28 | 29 | `python3 test.py --model --data ` 30 | 31 | ## Results 32 | 33 | Note that you may find the references and more comparisons in the aforementioned paper. 34 | 35 | ### Quantitative results 36 |

37 | 38 | ### Qualitative results 39 |

40 | 41 | ### Citation 42 | Please consider citing the following paper if the code is helpful in your research work: 43 |

44 | @ARTICLE{8624409, 
45 |   author={Shiyu Zhao and Lin Zhang and Ying Shen and Shengjie Zhao and Huijuan Zhang}, 
46 |   journal={IEEE Access}, 
47 |   title={Super-Resolution for Monocular Depth Estimation With Multi-Scale Sub-Pixel Convolutions and a Smoothness Constraint}, 
48 |   year={2019}, 
49 |   volume={7}, 
50 |   pages={16323-16335}
51 | }
52 |

53 | -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import torch 3 | 4 | import glob 5 | import scipy.io as sio 6 | import numpy as np 7 | 8 | # TODO: data argumentation 9 | class NYUv2DataSet(data.Dataset): 10 | def __init__(self, data_root, is_train=True): 11 | super(NYUv2DataSet, self).__init__() 12 | 13 | self.dataRoot = data_root 14 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot) 15 | self.dataNum = len(self.dataFiles) 16 | self.requiredSize = [240, 320] 17 | self.reqSizex4 = [60, 80] 18 | self.reqSizex8 = [30, 40] 19 | self.isTrain = is_train 20 | self.leastScale = 8 21 | 22 | def __getitem__(self, index): 23 | currFile = self.dataFiles[index] 24 | data = sio.loadmat(currFile) 25 | data = data['data'] 26 | 27 | rgb = data['rgb'][0,0].transpose((2, 0, 1)) 28 | depth = data['depth'][0,0] 29 | depthx4 = data['depthx4'][0,0] 30 | depthx8 = data['depthx8'][0,0] 31 | imageSize = data['imageSize'][0,0][0] 32 | 33 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]: 34 | raise ValueError('input image size is smaller than [240, 320]') 35 | 36 | if self.isTrain: 37 | import random 38 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale 39 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale 40 | else: 41 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale 42 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale 43 | 44 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 45 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 46 | 47 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 48 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 49 | 50 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0], 51 | 2*offset_y:2*offset_y+self.reqSizex4[1]] 52 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0], 53 | offset_y:offset_y+self.reqSizex8[1]] 54 | 55 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \ 56 | torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), currFile 57 | 58 | def __len__(self): 59 | return self.dataNum 60 | 61 | 62 | class NYUv2FusionSet(data.Dataset): 63 | def __init__(self, data_root, is_train=True, rgb_norm=False): 64 | super(NYUv2FusionSet, self).__init__() 65 | 66 | self.rgb_norm = rgb_norm 67 | self.dataRoot = data_root 68 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot) 69 | self.dataNum = len(self.dataFiles) 70 | self.requiredSize = [240, 320] 71 | self.reqSizex2 = [120, 160] 72 | self.reqSizex4 = [60, 80] 73 | self.reqSizex8 = [30, 40] 74 | ## for make3d 75 | # self.requiredSize = [230, 172] 76 | # self.reqSizex2 = [115, 86] 77 | # self.reqSizex4 = [57, 43] 78 | # self.reqSizex8 = [28, 21] 79 | self.isTrain = is_train 80 | self.leastScale = 8 81 | 82 | def __getitem__(self, index): 83 | currFile = self.dataFiles[index] 84 | data = sio.loadmat(currFile) 85 | data = data['data'] 86 | 87 | rgb = data['rgb'][0,0].transpose((2, 0, 1)) 88 | if self.rgb_norm: 89 | rgb = rgb/255. 90 | depth = data['depth'][0,0] 91 | depthx2 = data['depthx2'][0,0] 92 | depthx4 = data['depthx4'][0,0] 93 | depthx8 = data['depthx8'][0,0] 94 | imageSize = data['imageSize'][0,0][0] 95 | 96 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]: 97 | raise ValueError('input image size is smaller than [240, 320]') 98 | 99 | if self.isTrain: 100 | import random 101 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale 102 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale 103 | else: 104 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale 105 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale 106 | 107 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 108 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 109 | 110 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 111 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 112 | 113 | depthx2 = depthx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0], 114 | 4*offset_y:4*offset_y+self.reqSizex2[1]] 115 | 116 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0], 117 | 2*offset_y:2*offset_y+self.reqSizex4[1]] 118 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0], 119 | offset_y:offset_y+self.reqSizex8[1]] 120 | 121 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \ 122 | torch.from_numpy(depthx2).float(), torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), \ 123 | currFile 124 | 125 | def __len__(self): 126 | return self.dataNum 127 | 128 | 129 | class NYUv2MaskSet(data.Dataset): 130 | def __init__(self, data_root, is_train=True, rgb_norm=False): 131 | super(NYUv2MaskSet, self).__init__() 132 | 133 | self.rgb_norm = rgb_norm 134 | self.dataRoot = data_root 135 | self.dataFiles = glob.glob('%s/*.mat'%self.dataRoot) 136 | self.dataNum = len(self.dataFiles) 137 | self.requiredSize = [240, 320] 138 | self.reqSizex2 = [120, 160] 139 | self.reqSizex4 = [60, 80] 140 | self.reqSizex8 = [30, 40] 141 | self.isTrain = is_train 142 | self.leastScale = 8 143 | 144 | def __getitem__(self, index): 145 | currFile = self.dataFiles[index] 146 | 147 | # print('load %s'%currFile) 148 | data = sio.loadmat(currFile) 149 | data = data['data'] 150 | 151 | rgb = data['rgb'][0,0].transpose((2, 0, 1)) 152 | if self.rgb_norm: 153 | rgb = rgb/255. 154 | depth = data['depth'][0,0] 155 | depthx2 = data['depthx2'][0,0] 156 | depthx4 = data['depthx4'][0,0] 157 | depthx8 = data['depthx8'][0,0] 158 | mask = data['dpMask'][0,0] 159 | maskx2 = data['dpMaskx2'][0,0] 160 | maskx4 = data['dpMaskx4'][0,0] 161 | maskx8 = data['dpMaskx8'][0,0] 162 | imageSize = data['imageSize'][0,0][0] 163 | 164 | if imageSize[0] < self.requiredSize[0] or imageSize[1] < self.requiredSize[1]: 165 | raise ValueError('input image size is smaller than [240, 320]') 166 | 167 | if self.isTrain: 168 | import random 169 | offset_x = random.randint(0, imageSize[0] - self.requiredSize[0]) // self.leastScale 170 | offset_y = random.randint(0, imageSize[1] - self.requiredSize[1]) // self.leastScale 171 | else: 172 | offset_x = int((imageSize[0] - self.requiredSize[0])/2) // self.leastScale 173 | offset_y = int((imageSize[1] - self.requiredSize[1])/2) // self.leastScale 174 | 175 | rgb = rgb[:, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 176 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 177 | 178 | depth = depth[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 179 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 180 | 181 | depthx2 = depthx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0], 182 | 4*offset_y:4*offset_y+self.reqSizex2[1]] 183 | 184 | depthx4 = depthx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0], 185 | 2*offset_y:2*offset_y+self.reqSizex4[1]] 186 | depthx8 = depthx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0], 187 | offset_y:offset_y+self.reqSizex8[1]] 188 | 189 | mask = mask[np.newaxis, self.leastScale*offset_x:self.leastScale*offset_x+self.requiredSize[0], 190 | self.leastScale*offset_y:self.leastScale*offset_y+self.requiredSize[1]] 191 | 192 | maskx2 = maskx2[np.newaxis, 4*offset_x:4*offset_x+self.reqSizex2[0], 193 | 4*offset_y:4*offset_y+self.reqSizex2[1]] 194 | 195 | maskx4 = maskx4[np.newaxis, 2*offset_x:2*offset_x+self.reqSizex4[0], 196 | 2*offset_y:2*offset_y+self.reqSizex4[1]] 197 | maskx8 = maskx8[np.newaxis, offset_x:offset_x+self.reqSizex8[0], 198 | offset_y:offset_y+self.reqSizex8[1]] 199 | 200 | return torch.from_numpy(rgb).float(), torch.from_numpy(depth).float(), \ 201 | torch.from_numpy(depthx2).float(), torch.from_numpy(depthx4).float(), torch.from_numpy(depthx8).float(), \ 202 | currFile, torch.from_numpy(mask).float(), torch.from_numpy(maskx2).float(), \ 203 | torch.from_numpy(maskx4).float(), torch.from_numpy(maskx8).float() 204 | 205 | def __len__(self): 206 | return self.dataNum -------------------------------------------------------------------------------- /matlab/gen_test_data_for_mscn.m: -------------------------------------------------------------------------------- 1 | NYUv2_data = ''; % path to nyu_depth_v2_labeled.mat 2 | split_file = ''; % path to splits.mat 3 | 4 | test_root = '../Dataset/test'; 5 | 6 | if isempty(NYUv2_data) == 1 7 | ME = MException('Input:DataNotAssigned',... 8 | 'you should assign the path of nyu_depth_v2_labeled.mat to the variable, NYUv2_data'); 9 | throw(ME); 10 | end 11 | if isempty(split_file) == 1 12 | ME = MException('Input:DataNotAssigned',... 13 | 'you should assign the path of splits.mat to the variable, split_file'); 14 | throw(ME); 15 | end 16 | 17 | if ~exist(test_root, 'dir') 18 | mkdir(test_root); 19 | end 20 | 21 | targetSize = [240, 320]; 22 | % targetScale = (480-12)/240; 23 | padding_1 = 6; % up and down 24 | padding_2 = 8; % left and right 25 | 26 | farPlane = 10; % largest depth value is 9.9955 27 | nearPlane = 0.7; % smallest value is 0.7133 28 | 29 | NYUv2Data = load(NYUv2_data); 30 | images = NYUv2Data.images; 31 | depths = NYUv2Data.depths; 32 | clear NYUv2Data 33 | 34 | disp('loading data.. this may need a minute.'); 35 | splitIndx = load(split_file); 36 | trainIndx = splitIndx.trainNdxs; 37 | % testIndx = splitIndx.testNdxs; 38 | trainNum = length(trainIndx); 39 | 40 | [~,~,~,imageNum] = size(images); 41 | if imageNum ~= 1449 42 | ME = MException('Input:DataNotAssigned',... 43 | 'do not have 1449 images, check the nyu_depth_v2_labeled.mat.'); 44 | throw(ME); 45 | end 46 | 47 | train_count = 0; 48 | tic 49 | for indx = 1:imageNum 50 | isTrain = false; 51 | if train_count+1 <= trainNum && trainIndx(train_count+1) == indx 52 | isTrain = true; 53 | train_count = train_count + 1; 54 | end 55 | 56 | if isTrain == false 57 | % resize image and convert depthdata 58 | RGBImage = images(:,:,:,indx); 59 | DepthMat = depths(:,:,indx); 60 | 61 | % crop white padding 62 | RGBImage = RGBImage(padding_1+1:end-padding_1,padding_2+1:end-padding_2,:); 63 | DepthMat = DepthMat(padding_1+1:end-padding_1,padding_2+1:end-padding_2); 64 | 65 | RGBImage = im2double(RGBImage); 66 | Depth = DepthMat; 67 | InfPos = find(Depth > farPlane); 68 | Depth(InfPos) = farPlane; 69 | Depth = single(Depth); 70 | zerosPos = find(Depth <= 0); 71 | Depth(zerosPos) = (rand(1)+1); 72 | 73 | % scale to target size 74 | RGBImage = imresize(RGBImage, [480, 640]); 75 | Depth = imresize(Depth, [480, 640]); 76 | 77 | RGBImage_rs = imresize(RGBImage, targetSize); 78 | Depth_rs = imresize(Depth, targetSize); 79 | Depth_rsx2 = single(imresize(Depth_rs, 1/2)); 80 | Depth_rsx4 = single(imresize(Depth_rs, 1/4)); 81 | Depth_rsx8 = single(imresize(Depth_rs, 1/8)); 82 | 83 | Depth_rs_t = log(Depth_rs); 84 | Depth_rsx2_t = log(Depth_rsx2); 85 | Depth_rsx4_t = log(Depth_rsx4); 86 | Depth_rsx8_t = log(Depth_rsx8); 87 | 88 | data.rgb = RGBImage_rs; 89 | data.depth = Depth_rs_t; 90 | data.depthx2 = Depth_rsx2_t; 91 | data.depthx4 = Depth_rsx4_t; 92 | data.depthx8 = Depth_rsx8_t; 93 | data.realDepth = Depth_rs; 94 | data.imageSize = size(Depth_rs); 95 | 96 | saveFile = [test_root, '/nyu_v2_', num2str(indx), '.mat']; 97 | save(saveFile, 'data'); 98 | end 99 | 100 | if mod(indx, 10) == 0 101 | disp([num2str(indx),' images has been processed!']); 102 | toc 103 | end 104 | end 105 | 106 | disp([num2str(imageNum),' images has been processed!']); 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /my_models.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | from torch.nn.modules.loss import _Loss 8 | 9 | import scipy.io as sio 10 | import numpy as np 11 | 12 | from tools.densenet import _DenseBlock, _Transition, DenseBlock 13 | 14 | class UpsampleByPS(nn.Module): 15 | def __init__(self, upscale_factor, in_channels=1, is_out_layer=False): 16 | super(UpsampleByPS, self).__init__() 17 | self.is_out_layer = is_out_layer 18 | 19 | self.conv1 = nn.Conv2d(in_channels, 64, (5, 5), (1, 1), (2, 2)) 20 | # self.conv2 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1)) 21 | self.conv2 = nn.Conv2d(64, 1 * (upscale_factor ** 2), (3, 3), (1, 1), (1, 1)) 22 | self.pixel_shuffle = nn.PixelShuffle(upscale_factor) 23 | self.initParameters() 24 | 25 | def initParameters(self): 26 | stateDict = self.state_dict() 27 | nn.init.xavier_normal(stateDict['conv1.weight']) 28 | nn.init.xavier_normal(stateDict['conv2.weight']) 29 | # nn.init.xavier_normal(stateDict['conv3.weight']) 30 | # nn.init.xavier_normal(stateDict['conv4.weight']) 31 | 32 | def forward(self, x): 33 | # x = F.leaky_relu(self.conv1(x), negative_slope=0.1, inplace=True) 34 | # x = F.leaky_relu(self.conv2(x), negative_slope=0.1, inplace=True) 35 | # x = F.leaky_relu(self.conv3(x), negative_slope=0.1, inplace=True) 36 | # if self.use_sig: 37 | # x = F.sigmoid(self.pixel_shuffle(self.conv4(x))) 38 | # else: 39 | # x = F.leaky_relu(self.pixel_shuffle(self.conv4(x))) 40 | # return x 41 | 42 | # out = F.relu(self.conv1(x[0]), inplace=True) 43 | # cat_out = torch.cat([out, x[1]], 1) 44 | # out = F.relu(self.conv2(cat_out), inplace=True) 45 | # out = F.relu(self.conv3(out), inplace=True) 46 | out = F.relu(self.conv1(x)) 47 | 48 | if self.is_out_layer: 49 | out = F.relu(self.pixel_shuffle(self.conv2(out))) 50 | else: 51 | out = self.pixel_shuffle(self.conv2(out)) 52 | return out 53 | 54 | 55 | class DFCN_PS_FS(nn.Module): 56 | """DFCN_PS_FS is short for DFCN with pixelshuffle and scale fusion""" 57 | def __init__(self, is_Train=True): 58 | super(DFCN_PS_FS, self).__init__() 59 | self.isTrain = is_Train 60 | 61 | self.conv0 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) 62 | self.norm0 = nn.BatchNorm2d(64) 63 | self.relu0 = nn.ReLU(inplace=True) 64 | 65 | self.pool0 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 66 | self.denseblock1 = _DenseBlock(num_layers=6, num_input_features=64, bn_size=4, growth_rate=32, drop_rate=0) 67 | self.transition1 = _Transition(num_input_features=256, num_output_features=256 // 2) 68 | 69 | self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2) 70 | self.denseblock2 = _DenseBlock(num_layers=12, num_input_features=128, bn_size=4, growth_rate=32, drop_rate=0) 71 | self.transition2 = _Transition(num_input_features=512, num_output_features=512 // 2) 72 | 73 | self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2) 74 | self.denseblock3 = _DenseBlock(num_layers=24, num_input_features=256, bn_size=4, growth_rate=32, drop_rate=0) 75 | self.transition3 = _Transition(num_input_features=1024, num_output_features=1024 // 2) 76 | 77 | self.pool3 = nn.AvgPool2d(kernel_size=2, stride=2) 78 | self.denseblock4 = _DenseBlock(num_layers=16, num_input_features=512, bn_size=4, growth_rate=32, drop_rate=0) 79 | self.norm5 = nn.BatchNorm2d(1024) 80 | 81 | 82 | self.smthBlock = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6) 83 | self.smthConv = nn.Conv2d(in_channels=128, out_channels=4, kernel_size=5, padding=2, bias=False) 84 | self.smthUpsample = nn.PixelShuffle(2) 85 | 86 | self.deconv16_ = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=(4, 4), stride=2, padding=(1, 1)) 87 | self.padding16 = nn.ReplicationPad2d((0, 0, 1, 0)) 88 | 89 | self.bx32_dconvx8 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=(4, 4), stride=2, padding=(1, 1)) 90 | self.bx32_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1)) 91 | self.bx32_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1)) 92 | self.bx32_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1)) 93 | self.bx32_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False) 94 | 95 | self.bx16_dconvx8 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=(4, 4), stride=2, padding=(1, 1)) 96 | self.bx16_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1)) 97 | self.bx16_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1)) 98 | self.bx16_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1)) 99 | self.bx16_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False) 100 | 101 | self.bx8_dconvx4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=(4, 4), stride=2, padding=(1, 1)) 102 | self.bx8_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1)) 103 | self.bx8_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1)) 104 | self.bx8_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False) 105 | 106 | # self.bx4_dconvx2 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=(4, 4), stride=2, padding=(1, 1)) 107 | # self.bx4_dconvx1 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=(4, 4), stride=2, padding=(1, 1)) 108 | # self.bx4_score = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=3, padding=1, bias=False) 109 | 110 | self.subconv_to_8 = UpsampleByPS(2, 512, is_out_layer=True) 111 | self.subconv_to_4 = UpsampleByPS(2, 1+256+256, is_out_layer=True) 112 | self.subconv_to_2 = UpsampleByPS(2, 1+128+128+128, is_out_layer=True) 113 | self.subconv_to_1_ = UpsampleByPS(2, 1+64+64+64, is_out_layer=True) 114 | 115 | self.fs_score_ = nn.Conv2d(in_channels=4, out_channels=1, kernel_size=1, padding=0, bias=False) 116 | 117 | self.initParameters() 118 | self.fixLayer() 119 | 120 | def setTrainMode(self, isTrain): 121 | self.isTrain = isTrain 122 | 123 | def fixLayer(self): 124 | for param in self.parameters(): 125 | if param is not None: 126 | param.requires_grad = False 127 | layerList = [self.smthBlock, self.smthConv, self.smthUpsample] 128 | for layer in layerList: 129 | for param in layer.parameters(): 130 | if param is not None: 131 | param.requires_grad = True 132 | 133 | def parameters(self): 134 | """ 135 | overload Module.parameters 136 | """ 137 | for name, param in self.named_parameters(): 138 | if param.requires_grad: 139 | yield param 140 | 141 | def initParameters(self): 142 | stateDict = self.state_dict() 143 | # nn.init.xavier_normal(stateDict['conv_1.weight']) 144 | 145 | nn.init.xavier_normal(stateDict['deconv16_.weight']) 146 | nn.init.xavier_normal(stateDict['bx32_dconvx8.weight']) 147 | nn.init.xavier_normal(stateDict['bx32_dconvx4.weight']) 148 | nn.init.xavier_normal(stateDict['bx32_dconvx2.weight']) 149 | nn.init.xavier_normal(stateDict['bx32_dconvx1.weight']) 150 | nn.init.xavier_normal(stateDict['bx32_score.weight']) 151 | nn.init.xavier_normal(stateDict['bx16_dconvx8.weight']) 152 | nn.init.xavier_normal(stateDict['bx16_dconvx4.weight']) 153 | nn.init.xavier_normal(stateDict['bx16_dconvx2.weight']) 154 | nn.init.xavier_normal(stateDict['bx16_dconvx1.weight']) 155 | nn.init.xavier_normal(stateDict['bx16_score.weight']) 156 | nn.init.xavier_normal(stateDict['bx8_dconvx4.weight']) 157 | nn.init.xavier_normal(stateDict['bx8_dconvx2.weight']) 158 | nn.init.xavier_normal(stateDict['bx8_dconvx1.weight']) 159 | nn.init.xavier_normal(stateDict['bx8_score.weight']) 160 | # nn.init.xavier_normal(stateDict['bx4_dconvx2.weight']) 161 | # nn.init.xavier_normal(stateDict['bx4_dconvx1.weight']) 162 | # nn.init.xavier_normal(stateDict['bx4_score.weight']) 163 | nn.init.uniform(stateDict['fs_score_.weight']) 164 | nn.init.xavier_normal(stateDict['smthConv.weight']) 165 | 166 | def alignScale(self, inputData, scaleSize): 167 | inputShape = inputData.data.shape 168 | if scaleSize[0] == inputShape[2] and scaleSize[1] == inputShape[3]: 169 | return inputData 170 | elif abs(scaleSize[0]-inputShape[2]) <= 2 and abs(scaleSize[1]-inputShape[3]) <= 2: 171 | return nn.functional.upsample(inputData, size=scaleSize, mode='bilinear') 172 | else: 173 | raise ValueError('target size[{}, {}] is far from input size[{}, {}]' 174 | .format(scaleSize[0], scaleSize[1], inputShape[2], inputShape[3])) 175 | 176 | def forward(self, x): 177 | # inputShape = x.data.shape 178 | # sizex1 = (inputShape[2], inputShape[3]) 179 | # sizex2 = (sizex1[0]//2, sizex1[1]//2) 180 | # sizex4 = (sizex2[0]//2, sizex2[1]//2) 181 | # sizex8 = (sizex4[0]//2, sizex4[1]//2) 182 | # sizex16 = (sizex8[0]//2, sizex8[1]//2) 183 | # sizex32 = (sizex16[0]//2, sizex16[1]//2) 184 | 185 | # out_2 = F.relu(self.bn_1(self.conv_1(x))) 186 | # out_4 = self.denseBlock1(self.pooling_1(out_2)) 187 | # out_8 = self.denseBlock2(self.pooling_2(out_4)) 188 | # out_16 = self.denseBlock3(self.pooling_3(out_8)) 189 | # out_32 = F.relu(self.bn_4(self.denseBlock4(self.pooling_4(out_16)))) 190 | # if self.isTrain: 191 | # out.volatile = False 192 | 193 | out_2 = self.relu0(self.norm0(self.conv0(x))) 194 | out_4 = self.transition1(self.denseblock1(self.pool0(out_2))) 195 | out_8 = self.transition2(self.denseblock2(self.pool1(out_4))) 196 | out_16 = self.transition3(self.denseblock3(self.pool2(out_8))) 197 | out_32 = self.norm5(self.denseblock4(self.pool3(out_16))) 198 | 199 | out_up_16 = self.padding16(self.deconv16_(out_32)) 200 | bx32_outx8 = F.relu(self.bx32_dconvx8(out_up_16)) 201 | bx32_outx4 = F.relu(self.bx32_dconvx4(bx32_outx8)) 202 | bx32_outx2 = F.relu(self.bx32_dconvx2(bx32_outx4)) 203 | bx32_outx1 = F.relu(self.bx32_dconvx1(bx32_outx2)) 204 | bx32_score = self.bx32_score(bx32_outx1) 205 | 206 | bx16_outx8 = F.relu(self.bx16_dconvx8(out_16)) 207 | bx16_outx4 = F.relu(self.bx16_dconvx4(bx16_outx8)) 208 | bx16_outx2 = F.relu(self.bx16_dconvx2(bx16_outx4)) 209 | bx16_outx1 = F.relu(self.bx16_dconvx1(bx16_outx2)) 210 | bx16_score = self.bx16_score(bx16_outx1) 211 | 212 | bx8_outx4 = F.relu(self.bx8_dconvx4(out_8)) 213 | bx8_outx2 = F.relu(self.bx8_dconvx2(bx8_outx4)) 214 | bx8_outx1 = F.relu(self.bx8_dconvx1(bx8_outx2)) 215 | bx8_score = self.bx8_score(bx8_outx1) 216 | 217 | # bx4_outx2 = self.alignScale(F.relu(self.bx4_dconvx2(out_4)), sizex2) 218 | # bx4_outx1 = self.alignScale(F.relu(self.bx4_dconvx1(bx4_outx2)), sizex1) 219 | # bx4_score = self.alignScale(self.bx4_score(bx4_outx1), sizex1) 220 | bx4_score = 0 221 | 222 | 223 | outx8 = self.subconv_to_8(out_up_16) 224 | outx4 = self.subconv_to_4(torch.cat([outx8, bx32_outx8, bx16_outx8], 1)) 225 | outx2 = self.subconv_to_2(torch.cat([outx4, bx32_outx4, bx16_outx4, bx8_outx4], 1)) 226 | outx1 = self.subconv_to_1_(torch.cat([outx2, bx32_outx2, bx16_outx2, bx8_outx2], 1)) 227 | 228 | out_fs = self.fs_score_(torch.cat([bx32_score, bx16_score, bx8_score, outx1], 1)) 229 | 230 | if self.isTrain: 231 | out_smth = self.smthBlock(out_2) 232 | out_smth = self.smthConv(out_smth) 233 | out_smth = self.smthUpsample(out_smth) 234 | 235 | return (outx1, out_fs, bx4_score, bx8_score, bx16_score, bx32_score, 236 | outx2, outx4, outx8, out_smth) 237 | else: 238 | return (outx1, out_fs, bx4_score, bx8_score, bx16_score, bx32_score, 239 | outx2, outx4, outx8) 240 | # return outx2, outx4, outx8, bx8_score, bx16_score, bx32_score 241 | 242 | def computeLoss(self, targets, predictions, with_mask=False, with_smth=False): 243 | criterion = nn.MSELoss(size_average=True) 244 | 245 | if with_mask: 246 | mask, maskx2, maskx4, maskx8 = targets[4], targets[5], targets[6], targets[7] 247 | lossx1 = criterion(predictions[0]*mask, targets[0]*mask) 248 | fs_loss = criterion(predictions[1]*mask, targets[0]*mask) 249 | # bx4_loss = criterion(predictions[2]*mask, targets[0]*mask) 250 | bx8_loss = criterion(predictions[3]*mask, targets[0]*mask) 251 | bx16_loss = criterion(predictions[4]*mask, targets[0]*mask) 252 | bx32_loss = criterion(predictions[5]*mask, targets[0]*mask) 253 | 254 | lossx2 = criterion(predictions[6]*maskx2, targets[1]*maskx2) 255 | lossx4 = criterion(predictions[7]*maskx4, targets[2]*maskx4) 256 | lossx8 = criterion(predictions[8]*maskx8, targets[3]*maskx8) 257 | else: 258 | lossx1 = criterion(predictions[0], targets[0]) 259 | fs_loss = criterion(predictions[1], targets[0]) 260 | # bx4_loss = criterion(predictions[2], targets[0]) 261 | bx8_loss = criterion(predictions[3], targets[0]) 262 | bx16_loss = criterion(predictions[4], targets[0]) 263 | bx32_loss = criterion(predictions[5], targets[0]) 264 | 265 | lossx2 = criterion(predictions[6], targets[1]) 266 | lossx4 = criterion(predictions[7], targets[2]) 267 | lossx8 = criterion(predictions[8], targets[3]) 268 | 269 | if with_smth: 270 | SmthLoss = NerighborSmthLoss(lamda=0.01, t=2) 271 | smthTerm = SmthLoss(predictions[0], predictions[9]) 272 | mainTerm = fs_loss + 0.5*(bx8_loss+bx16_loss+bx32_loss) + lossx1 + lossx2/2 + lossx4/4 + lossx8/8 273 | # print('smooth term: %f, main termL: %f'%(smthTerm.data[0], mainTerm.data[0])) 274 | loss = smthTerm + mainTerm 275 | else: 276 | loss = fs_loss + 0.5*(bx8_loss+bx16_loss+bx32_loss) + lossx1 + lossx2/2 + lossx4/4 + lossx8/8 277 | return loss 278 | 279 | 280 | class NerighborSmthLoss(_Loss): 281 | def __init__(self, size_average=True, lamda=0.01, t=2): 282 | super(NerighborSmthLoss, self).__init__(size_average) 283 | self.lamda = lamda 284 | self.t = t 285 | 286 | def forward(self, input, target): 287 | predict = input 288 | smthMap = target 289 | 290 | horRelDiffMap = smthMap[:,:,:,0:-1] - smthMap[:,:,:,1:] 291 | verRelDiffMap = smthMap[:,:,0:-1,:] - smthMap[:,:,1:,:] 292 | horDpDiffMap = predict[:,:,:,0:-1] - predict[:,:,:,1:] 293 | verDpDiffMap = predict[:,:,0:-1,:] - predict[:,:,1:,:] 294 | 295 | horSmthLoss = torch.sum((horDpDiffMap**2)* torch.exp(-self.t*horRelDiffMap**2)).mean() 296 | verSmthLoss = torch.sum((verDpDiffMap**2)* torch.exp(-self.t*verRelDiffMap**2)).mean() 297 | 298 | return self.lamda/2*(horSmthLoss+verSmthLoss) 299 | 300 | 301 | class DFCN_PS(nn.Module): 302 | """docstring for DFCN_PS""" 303 | def __init__(self, is_Train=True): 304 | super(DFCN_PS, self).__init__() 305 | self.isTrain = is_Train 306 | 307 | self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2, bias=False) 308 | self.bn_1 = nn.BatchNorm2d(64) 309 | self.pooling_1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 310 | self.denseBlock1 = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6) 311 | self.pooling_2 = nn.AvgPool2d(kernel_size=2, stride=2) 312 | self.denseBlock2 = DenseBlock(inputDim=128, outputDim=256 ,growthRate=32, blockDepth=12) 313 | self.pooling_3 = nn.AvgPool2d(kernel_size=2, stride=2) 314 | self.denseBlock3 = DenseBlock(inputDim=256, outputDim=512 ,growthRate=32, blockDepth=24) 315 | self.pooling_4 = nn.AvgPool2d(kernel_size=2, stride=2) 316 | 317 | self.conv_fc_5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, padding=0, bias=False) 318 | self.drop_5 = nn.Dropout2d(p=0.2) 319 | self.conv_fc_5_2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0, bias=False) 320 | self.drop_6 = nn.Dropout2d(p=0.2) 321 | 322 | self.score_32 = nn.Conv2d(in_channels=1024, out_channels=64, kernel_size=3, padding=1, bias=False) 323 | 324 | self.branch_score_16 = nn.Conv2d(in_channels=256, out_channels=32, kernel_size=3, padding=1, bias=False) 325 | self.branch_score_8 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=3, padding=1, bias=False) 326 | self.branch_score_4 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, padding=1, bias=False) 327 | 328 | self.subconv_to_8_ = UpsampleByPS(2, (64, 32)) 329 | self.subconv_to_4_ = UpsampleByPS(2, (1, 32)) 330 | self.subconv4x_ = UpsampleByPS(4, (1, 32), is_out_layer=True) 331 | 332 | self.initParameters() 333 | 334 | def setTrainMode(self, isTrain): 335 | self.isTrain = isTrain 336 | 337 | def initParameters(self): 338 | stateDict = self.state_dict() 339 | nn.init.xavier_normal(stateDict['conv_1.weight']) 340 | # nn.init.constant(stateDict['conv_1.bias'], 0) 341 | nn.init.xavier_normal(stateDict['conv_fc_5_1.weight']) 342 | # nn.init.constant(stateDict['conv_fc_5_1.bias'], 0) 343 | nn.init.xavier_normal(stateDict['conv_fc_5_2.weight']) 344 | # nn.init.constant(stateDict['conv_fc_5_2.bias'], 0) 345 | nn.init.xavier_normal(stateDict['score_32.weight']) 346 | # nn.init.constant(stateDict['score_32.bias'], 0) 347 | 348 | def forward(self, x): 349 | out = self.conv_1(x) 350 | out = F.relu(self.bn_1(out)) 351 | 352 | out_4 = self.pooling_1(out) 353 | out_8 = self.pooling_2(F.relu(self.denseBlock1(out_4))) 354 | out_16 = self.pooling_3(F.relu(self.denseBlock2(out_8))) 355 | out_32 = self.pooling_4(F.relu(self.denseBlock3(out_16))) 356 | # if self.isTrain: 357 | # out.volatile = False 358 | 359 | out_32 = F.relu(self.drop_5(self.conv_fc_5_1(out_32))) 360 | out_32 = F.relu(self.drop_6(self.conv_fc_5_2(out_32))) 361 | 362 | out_up_16 = nn.functional.upsample(out_32, size=(15,20), mode='bilinear') 363 | score_16 = F.relu(self.score_32(out_up_16)) 364 | 365 | score_b_16 = self.branch_score_16(out_16) 366 | score_b_8 = self.branch_score_8(out_8) 367 | score_b_4 = self.branch_score_4(out_4) 368 | 369 | outx8 = self.subconv_to_8_([score_16, score_b_16]) 370 | outx4 = self.subconv_to_4_([outx8, score_b_8]) 371 | outx1 = self.subconv4x_([outx4, score_b_4]) 372 | 373 | return outx1, outx4, outx8 374 | 375 | 376 | class DFCN_32(nn.Module): 377 | """docstring for DFCN_32""" 378 | def __init__(self, is_Train=True): 379 | super(DFCN_32, self).__init__() 380 | self.isTrain = is_Train 381 | 382 | self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, padding=3, stride=2, bias=False) 383 | self.bn_1 = nn.BatchNorm2d(64) 384 | self.relu1 = nn.ReLU(inplace=True) 385 | self.pooling_1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 386 | self.denseBlock1 = DenseBlock(inputDim=64, outputDim=128 ,growthRate=32, blockDepth=6) 387 | self.relu2 = nn.ReLU(inplace=True) 388 | self.pooling_2 = nn.AvgPool2d(kernel_size=2, stride=2) 389 | self.denseBlock2 = DenseBlock(inputDim=128, outputDim=256 ,growthRate=32, blockDepth=12) 390 | self.relu3 = nn.ReLU(inplace=True) 391 | self.pooling_3 = nn.AvgPool2d(kernel_size=2, stride=2) 392 | self.denseBlock3 = DenseBlock(inputDim=256, outputDim=512 ,growthRate=32, blockDepth=24) 393 | self.relu4 = nn.ReLU(inplace=True) 394 | self.pooling_4 = nn.AvgPool2d(kernel_size=2, stride=2) 395 | 396 | self.conv_fc_5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, padding=0, bias=False) 397 | self.relu5 = nn.ReLU(inplace=True) 398 | self.drop_5 = nn.Dropout2d(p=0.2) 399 | self.conv_fc_5_2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, padding=0, bias=False) 400 | self.relu6 = nn.ReLU(inplace=True) 401 | self.drop_6 = nn.Dropout2d(p=0.2) 402 | 403 | self.score_32 = nn.Conv2d(in_channels=1024, out_channels=1, kernel_size=3, padding=1, bias=False) 404 | self.relu7 = nn.ReLU(inplace=True) 405 | # self.upsample_to_16 = nn.Sequential( 406 | # nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 407 | # nn.PixelShuffle(2), 408 | # nn.LeakyReLU(0.2, inplace=True) 409 | # ) 410 | self.upsample_to_8 = nn.Sequential( 411 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 412 | nn.PixelShuffle(2), 413 | nn.LeakyReLU(0.2, inplace=True) 414 | ) 415 | self.upsample_to_4 = nn.Sequential( 416 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 417 | nn.PixelShuffle(2), 418 | nn.LeakyReLU(0.2, inplace=True) 419 | ) 420 | self.upsample4x = nn.Sequential( 421 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 422 | nn.PixelShuffle(2), 423 | nn.LeakyReLU(0.2, inplace=True), 424 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 425 | nn.PixelShuffle(2), 426 | nn.LeakyReLU(0.2, inplace=True) 427 | ) 428 | 429 | self.initParameters() 430 | 431 | def setTrainMode(self, isTrain): 432 | self.isTrain = isTrain 433 | 434 | def initParameters(self): 435 | stateDict = self.state_dict() 436 | nn.init.xavier_normal(stateDict['conv_1.weight']) 437 | # nn.init.constant(stateDict['conv_1.bias'], 0) 438 | nn.init.xavier_normal(stateDict['conv_fc_5_1.weight']) 439 | # nn.init.constant(stateDict['conv_fc_5_1.bias'], 0) 440 | nn.init.xavier_normal(stateDict['conv_fc_5_2.weight']) 441 | # nn.init.constant(stateDict['conv_fc_5_2.bias'], 0) 442 | nn.init.xavier_normal(stateDict['score_32.weight']) 443 | # nn.init.constant(stateDict['score_32.bias'], 0) 444 | 445 | # nn.init.xavier_normal(stateDict['upsample_to_16.0.weight']) 446 | nn.init.xavier_normal(stateDict['upsample_to_8.0.weight']) 447 | nn.init.xavier_normal(stateDict['upsample_to_4.0.weight']) 448 | nn.init.xavier_normal(stateDict['upsample4x.0.weight']) 449 | 450 | def forward(self, x): 451 | out = self.conv_1(x) 452 | out = self.bn_1(out) 453 | out = self.relu1(out) 454 | out = self.pooling_1(out) 455 | 456 | out = self.denseBlock1(out) 457 | out = self.relu2(out) 458 | out = self.pooling_2(out) 459 | 460 | out = self.denseBlock2(out) 461 | out = self.relu3(out) 462 | out = self.pooling_3(out) 463 | out = self.denseBlock3(out) 464 | out = self.relu4(out) 465 | out = self.pooling_4(out) 466 | # if self.isTrain: 467 | # out.volatile = False 468 | 469 | out = self.conv_fc_5_1(out) 470 | out = self.drop_5(out) 471 | out = self.relu5(out) 472 | out = self.conv_fc_5_2(out) 473 | out = self.drop_6(out) 474 | out = self.relu6(out) 475 | out = nn.functional.upsample(out, size=(15,20), mode='bilinear') 476 | 477 | out = self.score_32(out) 478 | out = self.relu7(out) 479 | 480 | # out = self.upsample_to_16(out) 481 | 482 | # outSize = out.size() 483 | # marginLeft = Variable(torch.zeros(outSize[0], outSize[1], 1, outSize[3])) 484 | # # marginTop = Variable(torch.zeros(outSize[0], outSize[1], outSize[2]+1, 1)) 485 | # if out.is_cuda: 486 | # marginLeft = marginLeft.cuda() 487 | # # marginTop = marginTop.cuda() 488 | # out = torch.cat([out, marginLeft], 2) 489 | 490 | outx8 = self.upsample_to_8(out) 491 | outx4 = self.upsample_to_4(outx8) 492 | outx1 = self.upsample4x(outx4) 493 | 494 | return outx1, outx4, outx8 495 | 496 | 497 | class DFCN_16(DFCN_32): 498 | """docstring for DFCN_16""" 499 | def __init__(self): 500 | super(DFCN_16, self).__init__() 501 | 502 | self.score_16 = nn.Conv2d(in_channels=256, out_channels=1, kernel_size=3, padding=1) 503 | self.upsample = nn.Sequential( 504 | nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1, bias=False), 505 | nn.PixelShuffle(2), 506 | nn.LeakyReLU(0.2, inplace=True) 507 | ) 508 | self.upsample_to_8 = nn.Sequential( 509 | nn.Conv2d(in_channels=2, out_channels=4, kernel_size=3, padding=1, bias=False), 510 | nn.PixelShuffle(2), 511 | nn.LeakyReLU(0.2, inplace=True) 512 | ) 513 | 514 | def forward(self, x): 515 | out = F.relu(self.conv_1(x)) 516 | out = self.pooling_1(out) 517 | 518 | out = F.relu(self.denseBlock1(out)) 519 | out = self.pooling_2(out) 520 | out = F.relu(self.denseBlock2(out)) 521 | out = self.pooling_3(out) 522 | out_16 = out 523 | out_16 = self.score_16(out_16) 524 | out_16 = self.upsample(out_16) 525 | 526 | out = self.denseBlock3(out) 527 | out = self.relu4(out) 528 | out = self.pooling_4(out) 529 | 530 | out = self.conv_fc_5_1(out) 531 | out = self.relu5(out) 532 | out = self.conv_fc_5_2(out) 533 | out = self.relu6(out) 534 | out = self.score_32(out) 535 | out = self.relu7(out) 536 | 537 | out = self.upsample_to_16(out) 538 | out_cat = torch.cat([out, out_16], 1) 539 | out_cat = self.upsample_to_8(out_cat) 540 | out_cat = self.upsample_to_4(out_cat) 541 | out_cat = self.upsample4x(out_cat) 542 | 543 | return out_cat 544 | 545 | class RDCN_VGG(nn.Module): 546 | def __init__(self, rec_num): 547 | super(RDCN_VGG, self).__init__() 548 | 549 | self.recNum = rec_num 550 | self.downsample = nn.Sequential(OrderedDict([ 551 | ('data/bn', nn.BatchNorm2d(3)), 552 | ('conv1_1', nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)), 553 | ('conv1_1/bn', nn.BatchNorm2d(64)), 554 | ('relu1_1', nn.ReLU(inplace=True)), 555 | ('conv1_2', nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)), 556 | ('conv1_2/bn', nn.BatchNorm2d(64)), 557 | ('relu1_2', nn.ReLU(inplace=True)), 558 | ('pool1', nn.MaxPool2d(kernel_size=2, stride=2)), 559 | ('conv2_1', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)), 560 | ('conv2_1/bn', nn.BatchNorm2d(128)), 561 | ('relu2_1', nn.ReLU(inplace=True)), 562 | ('conv2_2', nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)), 563 | ('conv2_2/bn', nn.BatchNorm2d(128)), 564 | ('relu2_2', nn.ReLU(inplace=True)), 565 | ('pool2', nn.MaxPool2d(kernel_size=2, stride=2)), 566 | ('conv3_1', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)), 567 | ('conv3_1/bn', nn.BatchNorm2d(256)), 568 | ('relu3_1', nn.ReLU(inplace=True)), 569 | ('conv3_2', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)), 570 | ('conv3_2/bn', nn.BatchNorm2d(256)), 571 | ('relu3_2', nn.ReLU(inplace=True)), 572 | ('conv3_3', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)), 573 | ('conv3_3/bn', nn.BatchNorm2d(256)), 574 | ('relu3_3', nn.ReLU(inplace=True)), 575 | ('conv3_4', nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)), 576 | ('conv3_4/bn', nn.BatchNorm2d(256)), 577 | ('relu3_4', nn.ReLU(inplace=True)) 578 | ])) 579 | 580 | self.denseBlock = DenseBlock(inputDim=256, outputDim=256 ,growthRate=32, blockDepth=8) 581 | self.predictx4 = nn.Conv2d(in_channels=256, out_channels=1, kernel_size=3, padding=1) 582 | self.weightedAvg = nn.Conv2d(in_channels=self.recNum, out_channels=1, kernel_size=1, bias=True) 583 | 584 | self.upsample4x = nn.Sequential( 585 | nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, bias=False), 586 | nn.PixelShuffle(2), 587 | nn.LeakyReLU(0.2, inplace=True), 588 | nn.Conv2d(in_channels=64, out_channels=256, kernel_size=3, padding=1, bias=False), 589 | nn.PixelShuffle(2), 590 | nn.LeakyReLU(0.2, inplace=True), 591 | ) 592 | self.predict = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, padding=1, bias=False) 593 | 594 | def loadConv(self, pretrain_model): 595 | pretrainModel = sio.loadmat(pretrain_model) 596 | for name, module in self.named_modules(): 597 | if isinstance(module, nn.Conv2d): 598 | last_name = name.split('.')[-1] 599 | if module.bias is not None: 600 | for key, value in pretrainModel.items(): 601 | if '%s_0'%last_name == key: # for weight 602 | print('load %s'%key) 603 | self.copyArrayToTensor(value, module.weight.data) 604 | 605 | if '%s_1'%last_name == key: # for weight 606 | print('load %s'%key) 607 | self.copyArrayToTensor(value, module.bias.data) 608 | else: 609 | for key, value in pretrainModel.items(): 610 | if '%s_0'%last_name == key: # for weight 611 | print('load %s'%key) 612 | self.copyArrayToTensor(value, module.weight.data) 613 | 614 | 615 | def copyArrayToTensor(self, array, tensor): 616 | aShape = array.shape 617 | tShape = tensor.shape 618 | 619 | if len(aShape) == 2 and aShape[0] == 1: 620 | array = np.squeeze(array) 621 | aShape = array.shape 622 | 623 | if len(aShape) != len(tShape): 624 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape)) 625 | 626 | for indx in range(len(aShape)): 627 | if aShape[indx] != tShape[indx]: 628 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape)) 629 | 630 | if len(aShape) == 1: 631 | for n in range(aShape[0]): 632 | tensor[n] = float(array[n]) 633 | elif len(aShape) == 2: 634 | for n in range(aShape[0]): 635 | for c in range(aShape[1]): 636 | tensor[n, c] = float(array[n, c]) 637 | elif len(aShape) == 3: 638 | for n in range(aShape[0]): 639 | for c in range(aShape[1]): 640 | for h in range(aShape[2]): 641 | tensor[n, c, h] = float(array[n, c, h]) 642 | elif len(aShape) == 4: 643 | for n in range(aShape[0]): 644 | for c in range(aShape[1]): 645 | for h in range(aShape[2]): 646 | for w in range(aShape[3]): 647 | tensor[n, c, h, w] = float(array[n, c, h, w]) 648 | 649 | 650 | def forward(self, x): 651 | out = self.downsample(x) 652 | predictx4s = [None for i in range(self.recNum)] 653 | catFlag = False 654 | predictx4Cat = None 655 | predict_final = None 656 | 657 | # input("RDCN_VGG before loop") 658 | for indx in range(self.recNum): 659 | out = self.denseBlock(out) 660 | predictx4s[indx] = self.predictx4(out) 661 | if not catFlag: 662 | catFlag = True 663 | predictx4Cat = predictx4s[indx] 664 | else: 665 | predictx4Cat = torch.cat([predictx4Cat, predictx4s[indx]], 1) 666 | # print(predictx4s[indx]) 667 | 668 | predictx4_avg = self.weightedAvg(predictx4Cat) 669 | # print('-- avg\n', predictx4_avg) 670 | 671 | out = self.upsample4x(out) 672 | predict_final = self.predict(out) 673 | 674 | return predictx4s, predictx4_avg, predict_final 675 | 676 | class InvLoss(nn.Module): 677 | def __init__(self, lamda=0.5): 678 | super(InvLoss, self).__init__() 679 | self.lamda = lamda 680 | 681 | def forward(self, _input, _target): 682 | dArr = _input - _target 683 | nVal = _input.data.shape[2]*_input.data.shape[3] 684 | 685 | mseLoss = torch.sum(torch.sum(dArr*dArr, 2), 3)/nVal 686 | dArrSum = torch.sum(torch.sum(dArr, 2), 3) 687 | mssLoss = -self.lamda*(dArrSum*dArrSum)/(nVal**2) 688 | 689 | loss = mseLoss + mssLoss 690 | loss = torch.sum(loss) 691 | return loss 692 | 693 | 694 | def copyArrayToTensor(array, tensor): 695 | aShape = array.shape 696 | tShape = tensor.shape 697 | 698 | if len(aShape) == 2 and aShape[0] == 1: 699 | array = np.squeeze(array) 700 | aShape = array.shape 701 | 702 | if len(aShape) != len(tShape): 703 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape)) 704 | 705 | for indx in range(len(aShape)): 706 | if aShape[indx] != tShape[indx]: 707 | raise ValueError('array shape:{} mismatches with tensor: {}'.format(aShape, tShape)) 708 | 709 | if len(aShape) == 1: 710 | for n in range(aShape[0]): 711 | tensor[n] = float(array[n]) 712 | elif len(aShape) == 2: 713 | for n in range(aShape[0]): 714 | for c in range(aShape[1]): 715 | tensor[n, c] = float(array[n, c]) 716 | elif len(aShape) == 3: 717 | for n in range(aShape[0]): 718 | for c in range(aShape[1]): 719 | for h in range(aShape[2]): 720 | tensor[n, c, h] = float(array[n, c, h]) 721 | elif len(aShape) == 4: 722 | for n in range(aShape[0]): 723 | for c in range(aShape[1]): 724 | for h in range(aShape[2]): 725 | for w in range(aShape[3]): 726 | tensor[n, c, h, w] = float(array[n, c, h, w]) 727 | 728 | 729 | def copyParametersToModel(params, modules, rule_file): 730 | ruleDict = dict() 731 | ruleFile = open(rule_file, 'r') 732 | line = ruleFile.readline() 733 | while line != '' and line != '\n': 734 | contents = line.split(' ') 735 | currSrcLayer = contents[0] 736 | if contents[1][-1] == '\n': 737 | currTargetLayer = contents[1][:-1] 738 | else: 739 | currTargetLayer = contents[1] 740 | 741 | if currSrcLayer in params.keys(): 742 | ruleDict[currSrcLayer] = currTargetLayer 743 | else: 744 | raise ValueError('pretrainModel has no key: %s'%currSrcLayer) 745 | line = ruleFile.readline() 746 | 747 | ruleFile.close() 748 | 749 | # load parameters 750 | for key, item in ruleDict.items(): 751 | copyArrayToTensor(params[key], modules[item]) 752 | 753 | -------------------------------------------------------------------------------- /results/figure_nyu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/results/figure_nyu.png -------------------------------------------------------------------------------- /results/table_nyu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/results/table_nyu.png -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | from torch.utils.data import DataLoader 6 | 7 | from tools.EvalutateMetrics import myMetrics 8 | from datasets import NYUv2DataSet, NYUv2FusionSet 9 | 10 | import scipy.io as sio 11 | import numpy as np 12 | # from PIL import Image 13 | import glob 14 | 15 | import time 16 | 17 | def loadImage(test_file, in_size=[240, 320]): 18 | data = sio.loadmat(test_file) 19 | data = data['data'] 20 | 21 | rgb = data['rgb'][0,0] 22 | depth = data['depth'][0,0] 23 | 24 | imageSize = data['imageSize'][0,0][0] 25 | offset_x = int((imageSize[0] - in_size[0])/2) 26 | offset_y = int((imageSize[1] - in_size[1])/2) 27 | 28 | rgb_new = rgb.transpose((2, 0, 1)) 29 | rgb_new = torch.from_numpy(rgb_new[np.newaxis,:,offset_x:in_size[0]+offset_x, offset_y:in_size[1]+offset_y]).float() 30 | rgb_new = rgb_new.cuda() 31 | inputData = Variable(rgb_new) 32 | inputData.volatile = True 33 | 34 | depth_new = depth[offset_x:in_size[0]+offset_x, offset_y:in_size[1]+offset_y] 35 | depth_target = np.exp(depth_new) 36 | 37 | return inputData, depth_target 38 | 39 | 40 | def covert2Array(inData): 41 | cpuData = inData.cpu() 42 | return np.exp( cpuData.data[0].numpy()[0,...].astype(np.float32) ) 43 | 44 | 45 | parser = argparse.ArgumentParser(description="pythorch recusive densely-connected nerual network Test") 46 | parser.add_argument("--model", default=None, type=str, help="model path") 47 | parser.add_argument("--image", default=None, type=str, help="image name") 48 | # parser.add_argument("--cpu", action="store_true", help="Use cpu only") 49 | parser.add_argument("--data", default='', type=str, help='assign dataset for test. when assinged, --image become useless') 50 | 51 | opt = parser.parse_args() 52 | print(opt) 53 | 54 | print('build model...') 55 | model = torch.load(opt.model)["model"] 56 | model.setTrainMode(False) 57 | model.eval() 58 | 59 | model = model.cuda() 60 | model.is_train = False 61 | 62 | # print(model) 63 | 64 | metrics = myMetrics() 65 | metrics.resetMetrics() 66 | 67 | if opt.data: 68 | dataFiles = glob.glob('%s/*.mat'%opt.data) 69 | dataNum = len(dataFiles) 70 | 71 | for indx in range(min(dataNum,700)): 72 | inputData, target = loadImage(dataFiles[indx]) 73 | predictions = model(inputData) 74 | 75 | begin = time.time() 76 | predictions = model(inputData) 77 | end = time.time() 78 | # print(end-begin) 79 | 80 | if indx <= 1: 81 | detectTime = end-begin 82 | else: 83 | detectTime = detectTime + end-begin 84 | 85 | predictedx1 = predictions[0].cpu() 86 | predictedx1_np = covert2Array(predictedx1) 87 | 88 | metrics.computeMetrics(predictedx1_np, target, disp=True, image_name=dataFiles[indx]) 89 | 90 | metricsVals = metrics.getMetrics() 91 | 92 | print('-- [average metrics] -------') 93 | print('rel: %f, log10: %f, rms: %f, thr1: %f, thr2: %f, thr3: %f'%(metricsVals[0],metricsVals[1], 94 | metricsVals[2], metricsVals[3], metricsVals[4], metricsVals[5])) 95 | print('average time: %f'%(detectTime/float(dataNum-1)) ) 96 | 97 | else: 98 | test_file = opt.image 99 | data = sio.loadmat(test_file) 100 | data = data['data'] 101 | 102 | rgb = data['rgb'][0,0] 103 | 104 | inputData, target = loadImage(test_file) 105 | 106 | # out of the network 107 | predictions = model(inputData) 108 | predictedx1 = covert2Array(predictions[0]) 109 | O_8x, O_16x, O_32x = predictions[3:6] 110 | pred2x, pred4x, pred8x = predictions[6:] 111 | O_4x = 0 # No O_4x for this version 112 | O_8x = covert2Array(O_8x) 113 | O_16x = covert2Array(O_16x) 114 | O_32x = covert2Array(O_32x) 115 | pred2x = covert2Array(pred2x) 116 | pred4x = covert2Array(pred4x) 117 | pred8x = covert2Array(pred8x) 118 | 119 | currRel = metrics.computeRel(predictedx1, target) 120 | currRMS = metrics.computeRMS(predictedx1, target) 121 | currL10 = metrics.computeLog10(predictedx1, target) 122 | 123 | print('rel: %f, rms: %f, log10: %f'%(currRel, currRMS, currL10)) 124 | 125 | sio.savemat('results.mat', {'rgb': rgb, 'depth':target, 126 | 'pred1x': predictedx1, 'pred2x': pred2x, 'pred4x': pred4x, 127 | 'pred8x': pred8x, 'bx4': O_4x,'bx8': O_8x, 'bx16': O_16x, 'bx32': O_32x}) 128 | 129 | print('Done!') -------------------------------------------------------------------------------- /test_samples/nyu_v2_175.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/test_samples/nyu_v2_175.mat -------------------------------------------------------------------------------- /test_samples/nyu_v2_9.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaofeng94/MSCNNS-for-monocular-depth-estimation/71b9bc876688e80be1b646851fc8327c0dccd6e2/test_samples/nyu_v2_9.mat -------------------------------------------------------------------------------- /tools/EvalutateMetrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class myMetrics(object): 4 | """docstring for myMetrics""" 5 | def __init__(self): 6 | super(myMetrics, self).__init__() 7 | 8 | self.rel = 0 # average relative error 9 | self.rms = 0 # root mean squared error 10 | self.log10Err = 0 # average log10 error 11 | self.thrAcc1 = 0 # accuracy with threshold 12 | self.thrAcc2 = 0 # accuracy with threshold 13 | self.thrAcc3 = 0 # accuracy with threshold 14 | self.thrCount1 = 0 15 | self.thrValue1 = 1.25 16 | self.thrCount2 = 0 17 | self.thrValue2 = 1.25**2 18 | self.thrCount3 = 0 19 | self.thrValue3 = 1.25**3 20 | 21 | self.pointsNum = 0 # valid point number 22 | self.exclude_list = [] 23 | self.exclude_thr = 10 #0.7 for make3d #0.4 for nyn2 24 | self.rms_avg_by_image = True 25 | self.min_depth = 0.7 26 | self.max_depth = 99999 27 | self.test_count = 0 28 | 29 | 30 | def fineModification(self, depth_predicted, depth_gt, max_depth = 70, clip_value = 80): 31 | # fine modification 32 | for indx_x in np.arange(depth_gt.shape[0]): 33 | for indx_y in np.arange(depth_gt.shape[1]): 34 | if depth_predicted[indx_x, indx_y] > max_depth: #70 35 | depth_predicted[indx_x, indx_y] = clip_value #80 36 | 37 | def computeRel(self, depth_predicted, depth_gt): 38 | number = depth_gt.shape[0]*depth_gt.shape[1] 39 | curr_rel_arr = np.abs(depth_predicted-depth_gt)/depth_gt 40 | curr_rel = np.sum(curr_rel_arr)/number 41 | 42 | return curr_rel 43 | 44 | def computeRMS(self, depth_predicted, depth_gt): 45 | number = depth_gt.shape[0]*depth_gt.shape[1] 46 | curr_rms_arr = (depth_predicted-depth_gt)**2 47 | curr_rms = np.sum(curr_rms_arr)/number 48 | 49 | return np.sqrt(curr_rms) 50 | 51 | def computeLog10(self, depth_predicted, depth_gt): 52 | number = depth_gt.shape[0]*depth_gt.shape[1] 53 | curr_log10Err_arr = np.abs(np.log10(depth_predicted)-np.log10(depth_gt)) 54 | curr_log10Err = np.sum(curr_log10Err_arr)/number 55 | 56 | return curr_log10Err 57 | 58 | def setMetricsType(self, typeStr = 'c1'): 59 | if typeStr == 'c1': 60 | self.max_depth = 70 61 | else: 62 | self.max_depth = 99999 63 | 64 | def resetMetrics(self): 65 | self.test_count = 0 66 | 67 | self.rel = list() # average relative error 68 | self.rms2 = list() # root mean squared error 69 | self.log10Err = list() # average log10 error 70 | self.thrAcc1 = list() # accuracy with threshold 71 | self.thrAcc2 = list() # accuracy with threshold 72 | self.thrAcc3 = list() # accuracy with threshold 73 | 74 | self.thrCount1 = 0 75 | self.thrCount2 = 0 76 | self.thrCount3 = 0 77 | 78 | self.pointsNum = 0 # valid point number 79 | self.exclude_list = [] 80 | self.test_count = 0 81 | 82 | def fastCompute(self, gt, pred): 83 | thresh = np.maximum((gt / pred), (pred / gt)) 84 | a1 = (thresh < 1.25 ).mean() 85 | a2 = (thresh < 1.25 ** 2).mean() 86 | a3 = (thresh < 1.25 ** 3).mean() 87 | 88 | rmse2 = np.mean((gt - pred) ** 2) 89 | 90 | log10_err = np.mean(np.absolute(np.log10(gt) - np.log10(pred))) 91 | 92 | abs_rel = np.mean(np.abs(gt - pred) / gt) 93 | 94 | return abs_rel, rmse2, log10_err, a1, a2, a3 95 | 96 | 97 | def computeMetrics(self, pred_depth_real, depth_real, disp=False, image_name=''): 98 | self.test_count += 1 99 | 100 | mask = np.logical_and(depth_real>self.min_depth, depth_real self.exclude_thr: 111 | self.exclude_list.append(self.test_count) 112 | 113 | if disp: 114 | print('({}){}:'.format(self.test_count, image_name)) 115 | print('rel: {}, rms: {}, log10: {}'.format(currMetrics[0], np.sqrt(currMetrics[1]), currMetrics[2])) 116 | print('---- file end ----') 117 | 118 | 119 | def getMetrics(self): 120 | 121 | rel = np.array(self.rel).mean() 122 | log10Err = np.array(self.log10Err).mean() 123 | rms = np.sqrt(self.rms2).mean() 124 | a1 = np.array(self.thrAcc1).mean() 125 | a2 = np.array(self.thrAcc2).mean() 126 | a3 = np.array(self.thrAcc3).mean() 127 | 128 | return rel,log10Err,rms,a1,a2,a3, self.exclude_list -------------------------------------------------------------------------------- /tools/densenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | from collections import OrderedDict 6 | 7 | 8 | class BaseDenseLayer(nn.Module): 9 | """docstring for BaseLayer""" 10 | def __init__(self, input_dim, growth_rate): 11 | super(BaseDenseLayer, self).__init__() 12 | 13 | self.layer = nn.Sequential( 14 | nn.BatchNorm2d(input_dim), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(in_channels=input_dim, out_channels=128, kernel_size=1, bias=False), 17 | nn.BatchNorm2d(128), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(in_channels=128, out_channels=growth_rate, kernel_size=3, padding=1, bias=False) 20 | ) 21 | self.initParameters() 22 | 23 | def initParameters(self): 24 | stateDict = self.state_dict() 25 | # print(stateDict.keys()) 26 | nn.init.xavier_normal(stateDict['layer.2.weight']) 27 | # nn.init.constant(stateDict['layer.2.bias'], 0) 28 | nn.init.xavier_normal(stateDict['layer.5.weight']) 29 | # nn.init.constant(stateDict['layer.5.bias'], 0) 30 | 31 | def forward(self, x): 32 | # input('BaseDenseLayer:forward loop ') 33 | out = self.layer(x) 34 | return torch.cat([x, out], 1) 35 | 36 | class DenseBlock(nn.Module): 37 | def __init__(self, inputDim=256, outputDim=256, growthRate=32, blockDepth=8, withTrans=True): 38 | super(DenseBlock, self).__init__() 39 | 40 | self.inputDim = inputDim 41 | self.outputDim = outputDim 42 | self.blockDepth = blockDepth 43 | self.growthRate = growthRate 44 | self.withTrans = withTrans 45 | 46 | 47 | layers = [] 48 | for indx in range(self.blockDepth): 49 | srcDim = self.inputDim + indx*self.growthRate 50 | layers.append(BaseDenseLayer(srcDim, self.growthRate)) 51 | 52 | self.denseLayer = nn.Sequential(*layers) 53 | 54 | catDim = self.inputDim+self.blockDepth*self.growthRate 55 | if self.withTrans: 56 | self.transition = nn.Sequential( 57 | nn.BatchNorm2d(catDim), 58 | nn.ReLU(inplace=True), 59 | nn.Conv2d(in_channels=catDim, out_channels=self.outputDim, kernel_size=1, bias=False) 60 | ) 61 | self.initParameters() 62 | 63 | def initParameters(self): 64 | stateDict = self.state_dict() 65 | if self.withTrans: 66 | nn.init.xavier_normal(stateDict['transition.2.weight']) 67 | # nn.init.constant(stateDict['transition.2.bias'], 0) 68 | 69 | 70 | def forward(self, x): 71 | # input('DenseBlock:forward') 72 | # catOut = x 73 | # for indx in range(self.blockDepth): 74 | # input('DenseBlock:forward loop %d'%indx) 75 | # currOut = self.layers[indx](catOut) 76 | # catOut = torch.cat([catOut, currOut], 1) 77 | out = self.denseLayer(x) 78 | if self.withTrans: 79 | out = self.transition(out) 80 | # print(out) 81 | return out 82 | 83 | # def baseLayer(self, indx): 84 | # srcDim = self.inputDim+indx*self.growthRate 85 | # return list([ 86 | # nn.BatchNorm2d(srcDim), 87 | # nn.ReLU(inplace=True), 88 | # nn.Conv2d(in_channels=srcDim, out_channels=128, kernel_size=1, bias=False), 89 | # nn.BatchNorm2d(128), 90 | # nn.ReLU(inplace=True), 91 | # nn.Conv2d(in_channels=128, out_channels=self.growthRate, kernel_size=3, padding=1) 92 | # ]) 93 | 94 | 95 | ### torch vision implementation 96 | 97 | 98 | def densenet121(pretrained=False, **kwargs): 99 | r"""Densenet-121 model from 100 | `"Densely Connected Convolutional Networks" `_ 101 | Args: 102 | pretrained (bool): If True, returns a model pre-trained on ImageNet 103 | """ 104 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), 105 | **kwargs) 106 | if pretrained: 107 | model.load_state_dict(model_zoo.load_url(model_urls['densenet121'])) 108 | return model 109 | 110 | class _DenseLayer(nn.Sequential): 111 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 112 | super(_DenseLayer, self).__init__() 113 | self.add_module('norm.1', nn.BatchNorm2d(num_input_features)), 114 | self.add_module('relu.1', nn.ReLU(inplace=True)), 115 | self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size * 116 | growth_rate, kernel_size=1, stride=1, bias=False)), 117 | self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)), 118 | self.add_module('relu.2', nn.ReLU(inplace=True)), 119 | self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate, 120 | kernel_size=3, stride=1, padding=1, bias=False)), 121 | self.drop_rate = drop_rate 122 | 123 | def forward(self, x): 124 | new_features = super(_DenseLayer, self).forward(x) 125 | if self.drop_rate > 0: 126 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) 127 | return torch.cat([x, new_features], 1) 128 | 129 | 130 | class _DenseBlock(nn.Sequential): 131 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 132 | super(_DenseBlock, self).__init__() 133 | for i in range(num_layers): 134 | layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate) 135 | self.add_module('denselayer%d' % (i + 1), layer) 136 | 137 | 138 | class _Transition(nn.Sequential): 139 | def __init__(self, num_input_features, num_output_features): 140 | super(_Transition, self).__init__() 141 | self.add_module('norm', nn.BatchNorm2d(num_input_features)) 142 | self.add_module('relu', nn.ReLU(inplace=True)) 143 | self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, 144 | kernel_size=1, stride=1, bias=False)) 145 | # self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) 146 | 147 | 148 | class DenseNet(nn.Module): 149 | r"""Densenet-BC model class, based on 150 | `"Densely Connected Convolutional Networks" `_ 151 | Args: 152 | growth_rate (int) - how many filters to add each layer (`k` in paper) 153 | block_config (list of 4 ints) - how many layers in each pooling block 154 | num_init_features (int) - the number of filters to learn in the first convolution layer 155 | bn_size (int) - multiplicative factor for number of bottle neck layers 156 | (i.e. bn_size * k features in the bottleneck layer) 157 | drop_rate (float) - dropout rate after each dense layer 158 | num_classes (int) - number of classification classes 159 | """ 160 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), 161 | num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): 162 | 163 | super(DenseNet, self).__init__() 164 | 165 | # First convolution 166 | self.features = nn.Sequential(OrderedDict([ 167 | ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), 168 | ('norm0', nn.BatchNorm2d(num_init_features)), 169 | ('relu0', nn.ReLU(inplace=True)), 170 | ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 171 | ])) 172 | 173 | # Each denseblock 174 | num_features = num_init_features 175 | for i, num_layers in enumerate(block_config): 176 | block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, 177 | bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) 178 | self.features.add_module('denseblock%d' % (i + 1), block) 179 | num_features = num_features + num_layers * growth_rate 180 | if i != len(block_config) - 1: 181 | trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2) 182 | self.features.add_module('transition%d' % (i + 1), trans) 183 | num_features = num_features // 2 184 | 185 | # Final batch norm 186 | self.features.add_module('norm5', nn.BatchNorm2d(num_features)) 187 | 188 | # Linear layer 189 | self.classifier = nn.Linear(num_features, num_classes) 190 | 191 | # Official init from torch repo. 192 | for m in self.modules(): 193 | if isinstance(m, nn.Conv2d): 194 | nn.init.kaiming_normal(m.weight.data) 195 | elif isinstance(m, nn.BatchNorm2d): 196 | m.weight.data.fill_(1) 197 | m.bias.data.zero_() 198 | elif isinstance(m, nn.Linear): 199 | m.bias.data.zero_() 200 | 201 | def forward(self, x): 202 | features = self.features(x) 203 | out = F.relu(features, inplace=True) 204 | out = F.avg_pool2d(out, kernel_size=7, stride=1).view(features.size(0), -1) 205 | out = self.classifier(out) 206 | return out -------------------------------------------------------------------------------- /tools/parse_caffe_model.py: -------------------------------------------------------------------------------- 1 | ### used to parse parameters in .caffemodel to a dict 2 | import sys 3 | caffe_root = '/media/xiaofeng/codes/LinuxFiles/caffe' 4 | sys.path.insert(0, caffe_root + '/python') 5 | 6 | import caffe 7 | 8 | import os 9 | import numpy as np 10 | 11 | caffe_cfg = './model/DenseNet_121.prototxt' 12 | caffemodel_path = './model/DenseNet_121.caffemodel' 13 | 14 | target_root = './model/pretrain' 15 | target_path = '%s/densenet_121'%(target_root) 16 | if not os.path.exists(target_root): 17 | os.makedirs(target_root) 18 | 19 | 20 | caffe.set_mode_cpu() 21 | net = caffe.Net(caffe_cfg, caffemodel_path, caffe.TEST) 22 | 23 | indxStr = ['weight', 'bias', '3'] 24 | model_dict = dict() 25 | for key in net.params.keys(): 26 | if 'conv' in key: 27 | print('-- {}, len: {}'.format(key, len(net.params[key]))) 28 | for indx in range(len(net.params[key])): 29 | currKey = '%s.%s'%(key, indxStr[indx]) 30 | shape = net.params[key][indx].data.shape 31 | data = net.params[key][indx].data 32 | 33 | model_dict[currKey] = data 34 | 35 | # if key == 'conv1_1': 36 | # print(net.params[key][0].data) 37 | # print(net.params[key][1].data) 38 | 39 | import scipy.io as sio 40 | 41 | # sio.savemat('conv1_1', {'conv1_1': model_dict['conv1_1'][0]}) 42 | print('save model to %s'%target_path) 43 | sio.savemat(target_path, model_dict) -------------------------------------------------------------------------------- /tools/resnet.py: -------------------------------------------------------------------------------- 1 | # copy from torchvision resnet(https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py) 2 | 3 | import torch.nn as nn 4 | import torch.utils.model_zoo as model_zoo 5 | 6 | 7 | model_urls = { 8 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 9 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 10 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 11 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 12 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 13 | } 14 | 15 | 16 | def conv3x3(in_planes, out_planes, stride=1): 17 | """3x3 convolution with padding""" 18 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 19 | padding=1, bias=False) 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | expansion = 1 24 | 25 | def __init__(self, inplanes, planes, stride=1, downsample=None): 26 | super(BasicBlock, self).__init__() 27 | self.conv1 = conv3x3(inplanes, planes, stride) 28 | self.bn1 = nn.BatchNorm2d(planes) 29 | self.relu = nn.ReLU(inplace=True) 30 | self.conv2 = conv3x3(planes, planes) 31 | self.bn2 = nn.BatchNorm2d(planes) 32 | self.downsample = downsample 33 | self.stride = stride 34 | 35 | def forward(self, x): 36 | residual = x 37 | 38 | out = self.conv1(x) 39 | out = self.bn1(out) 40 | out = self.relu(out) 41 | 42 | out = self.conv2(out) 43 | out = self.bn2(out) 44 | 45 | if self.downsample is not None: 46 | residual = self.downsample(x) 47 | 48 | out += residual 49 | out = self.relu(out) 50 | 51 | return out 52 | 53 | 54 | class Bottleneck(nn.Module): 55 | expansion = 4 56 | 57 | def __init__(self, inplanes, planes, stride=1, downsample=None): 58 | super(Bottleneck, self).__init__() 59 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 60 | self.bn1 = nn.BatchNorm2d(planes) 61 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 62 | padding=1, bias=False) 63 | self.bn2 = nn.BatchNorm2d(planes) 64 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 65 | self.bn3 = nn.BatchNorm2d(planes * 4) 66 | self.relu = nn.ReLU(inplace=True) 67 | self.downsample = downsample 68 | self.stride = stride 69 | 70 | def forward(self, x): 71 | residual = x 72 | 73 | out = self.conv1(x) 74 | out = self.bn1(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv2(out) 78 | out = self.bn2(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv3(out) 82 | out = self.bn3(out) 83 | 84 | if self.downsample is not None: 85 | residual = self.downsample(x) 86 | 87 | out += residual 88 | out = self.relu(out) 89 | 90 | return out 91 | 92 | 93 | class ResNet(nn.Module): 94 | 95 | def __init__(self, block, layers, num_classes=1000): 96 | self.inplanes = 64 97 | super(ResNet, self).__init__() 98 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 99 | bias=False) 100 | self.bn1 = nn.BatchNorm2d(64) 101 | self.relu = nn.ReLU(inplace=True) 102 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 103 | self.layer1 = self._make_layer(block, 64, layers[0]) 104 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 105 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 106 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 107 | self.avgpool = nn.AvgPool2d(7, stride=1) 108 | self.fc = nn.Linear(512 * block.expansion, num_classes) 109 | 110 | for m in self.modules(): 111 | if isinstance(m, nn.Conv2d): 112 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 113 | m.weight.data.normal_(0, math.sqrt(2. / n)) 114 | elif isinstance(m, nn.BatchNorm2d): 115 | m.weight.data.fill_(1) 116 | m.bias.data.zero_() 117 | 118 | def _make_layer(self, block, planes, blocks, stride=1): 119 | downsample = None 120 | if stride != 1 or self.inplanes != planes * block.expansion: 121 | downsample = nn.Sequential( 122 | nn.Conv2d(self.inplanes, planes * block.expansion, 123 | kernel_size=1, stride=stride, bias=False), 124 | nn.BatchNorm2d(planes * block.expansion), 125 | ) 126 | 127 | layers = [] 128 | layers.append(block(self.inplanes, planes, stride, downsample)) 129 | self.inplanes = planes * block.expansion 130 | for i in range(1, blocks): 131 | layers.append(block(self.inplanes, planes)) 132 | 133 | return nn.Sequential(*layers) 134 | 135 | def forward(self, x): 136 | x = self.conv1(x) 137 | x = self.bn1(x) 138 | x = self.relu(x) 139 | x = self.maxpool(x) 140 | 141 | x = self.layer1(x) 142 | x = self.layer2(x) 143 | x = self.layer3(x) 144 | x = self.layer4(x) 145 | 146 | x = self.avgpool(x) 147 | x = x.view(x.size(0), -1) 148 | x = self.fc(x) 149 | 150 | return x --------------------------------------------------------------------------------