├── .idea ├── dictionaries │ └── lxt.xml ├── fuse_seg.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md └── model ├── __init__.py ├── deeplab_resnet.py ├── large_kernel.py ├── large_kernel_exfuse.py ├── seg_resnet.py └── seg_resnext.py /.idea/dictionaries/lxt.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/fuse_seg.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | BashSupport 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 20 | 21 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 115 | 116 | 117 | 118 | SynchronizedBatchNorm2d 119 | 120 | 121 | 122 | 124 | 125 | 135 | 136 | 137 | 138 | 139 | true 140 | DEFINITION_ORDER 141 | 142 | 143 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 171 | 172 | 175 | 176 | 177 | 178 | 181 | 182 | 185 | 186 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 218 | 219 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 261 | 262 | 275 | 276 | 293 | 294 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 336 | 337 | 356 | 357 | 378 | 379 | 401 | 402 | 426 | 427 | 428 | 430 | 431 | 432 | 433 | 1526212737885 434 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 468 | 469 | 471 | 472 | 473 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fuse_seg 2 | Pytorch Implementation of Paper:
3 | 1, Enhancing Feature Fusion for Semantic Segmentation (face++) [paper](https://arxiv.org/abs/1804.03821)
4 | 2, Large kernel matters (face++) [paper](https://arxiv.org/abs/1703.02719)
5 | All models are based on resnet101 (MIT pretrained models)
6 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/fuse_seg_pytorch/942f1cd34cc5d0864d91327595ebcbdef1c628d5/model/__init__.py -------------------------------------------------------------------------------- /model/deeplab_resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Author: Xiangtai(lxtpku@pku.edu.cn) 4 | # this file contains the baseline resnet(encoder), baseline pspnet(decoder) segmentation models 5 | # and mul grid bottleneck modules which can be used for deeplab models 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | import model.seg_resnet as resnet 11 | import model.seg_resnext as resnext 12 | 13 | 14 | # this is for encoder part 15 | # resnet encoder 16 | class Resnet(nn.Module): 17 | def __init__(self, orig_resnet): 18 | super(Resnet, self).__init__() 19 | 20 | # take pretrained resnet, except AvgPool and FC 21 | self.conv1 = orig_resnet.conv1 22 | self.bn1 = orig_resnet.bn1 23 | self.relu1 = orig_resnet.relu1 24 | self.conv2 = orig_resnet.conv2 25 | self.bn2 = orig_resnet.bn2 26 | self.relu2 = orig_resnet.relu2 27 | self.conv3 = orig_resnet.conv3 28 | self.bn3 = orig_resnet.bn3 29 | self.relu3 = orig_resnet.relu3 30 | self.maxpool = orig_resnet.maxpool 31 | self.layer1 = orig_resnet.layer1 32 | self.layer2 = orig_resnet.layer2 33 | self.layer3 = orig_resnet.layer3 34 | self.layer4 = orig_resnet.layer4 35 | 36 | def forward(self, x, return_feature_maps=False): 37 | conv_out = [] 38 | 39 | x = self.relu1(self.bn1(self.conv1(x))) 40 | x = self.relu2(self.bn2(self.conv2(x))) 41 | x = self.relu3(self.bn3(self.conv3(x))) 42 | x = self.maxpool(x) 43 | 44 | x = self.layer1(x); conv_out.append(x); 45 | x = self.layer2(x); conv_out.append(x); 46 | x = self.layer3(x); conv_out.append(x); 47 | x = self.layer4(x); conv_out.append(x); 48 | 49 | if return_feature_maps: 50 | return conv_out 51 | return x 52 | 53 | # dilated resnet encoder 54 | class ResnetDilated(nn.Module): 55 | def __init__(self, orig_resnet, dilate_scale=8): 56 | super(ResnetDilated, self).__init__() 57 | from functools import partial 58 | 59 | if dilate_scale == 8: 60 | orig_resnet.layer3.apply( 61 | partial(self._nostride_dilate, dilate=2)) 62 | orig_resnet.layer4.apply( 63 | partial(self._nostride_dilate, dilate=4)) 64 | elif dilate_scale == 16: 65 | orig_resnet.layer4.apply( 66 | partial(self._nostride_dilate, dilate=2)) 67 | 68 | # take pretrained resnet, except AvgPool and FC 69 | self.conv1 = orig_resnet.conv1 70 | self.bn1 = orig_resnet.bn1 71 | self.relu1 = orig_resnet.relu1 72 | self.conv2 = orig_resnet.conv2 73 | self.bn2 = orig_resnet.bn2 74 | self.relu2 = orig_resnet.relu2 75 | self.conv3 = orig_resnet.conv3 76 | self.bn3 = orig_resnet.bn3 77 | self.relu3 = orig_resnet.relu3 78 | self.maxpool = orig_resnet.maxpool 79 | self.layer1 = orig_resnet.layer1 80 | self.layer2 = orig_resnet.layer2 81 | self.layer3 = orig_resnet.layer3 82 | self.layer4 = orig_resnet.layer4 83 | 84 | def _nostride_dilate(self, m, dilate): 85 | classname = m.__class__.__name__ 86 | if classname.find('Conv') != -1: 87 | # the convolution with stride 88 | if m.stride == (2, 2): 89 | m.stride = (1, 1) 90 | if m.kernel_size == (3, 3): 91 | m.dilation = (dilate // 2, dilate // 2) 92 | m.padding = (dilate // 2, dilate // 2) 93 | # other convoluions 94 | else: 95 | if m.kernel_size == (3, 3): 96 | m.dilation = (dilate, dilate) 97 | m.padding = (dilate, dilate) 98 | 99 | def forward(self, x, return_feature_maps=False): 100 | conv_out = [] 101 | 102 | x = self.relu1(self.bn1(self.conv1(x))) 103 | x = self.relu2(self.bn2(self.conv2(x))) 104 | x = self.relu3(self.bn3(self.conv3(x))) 105 | x = self.maxpool(x) 106 | 107 | x = self.layer1(x); conv_out.append(x); 108 | x = self.layer2(x); conv_out.append(x); 109 | x = self.layer3(x); conv_out.append(x); 110 | x = self.layer4(x); conv_out.append(x); 111 | 112 | if return_feature_maps: 113 | return conv_out 114 | return x 115 | 116 | 117 | # this is for decoder part 118 | # last conv, bilinear upsample 119 | class C1BilinearDeepSup(nn.Module): 120 | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False): 121 | super(C1BilinearDeepSup, self).__init__() 122 | self.use_softmax = use_softmax 123 | 124 | self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1) 125 | self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1) 126 | 127 | # last conv 128 | self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0) 129 | self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0) 130 | 131 | def forward(self, conv_out, segSize=None): 132 | conv5 = conv_out[-1] 133 | 134 | x = self.cbr(conv5) 135 | x = self.conv_last(x) 136 | 137 | if self.use_softmax: # is True during inference 138 | x = nn.functional.upsample(x, size=segSize, mode='bilinear') 139 | x = nn.functional.softmax(x, dim=1) 140 | return x 141 | 142 | # deep sup 143 | conv4 = conv_out[-2] 144 | _ = self.cbr_deepsup(conv4) 145 | _ = self.conv_last_deepsup(_) 146 | 147 | x = nn.functional.log_softmax(x, dim=1) 148 | _ = nn.functional.log_softmax(_, dim=1) 149 | 150 | return (x, _) 151 | 152 | 153 | # last conv, bilinear upsample 154 | class C1Bilinear(nn.Module): 155 | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False): 156 | super(C1Bilinear, self).__init__() 157 | self.use_softmax = use_softmax 158 | 159 | self.cbr = conv3x3_bn_relu(fc_dim, fc_dim // 4, 1) 160 | 161 | # last conv 162 | self.conv_last = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0) 163 | 164 | def forward(self, conv_out, segSize=None): 165 | conv5 = conv_out[-1] 166 | x = self.cbr(conv5) 167 | x = self.conv_last(x) 168 | 169 | if self.use_softmax: # is True during inference 170 | x = nn.functional.upsample(x, size=segSize, mode='bilinear') 171 | x = nn.functional.softmax(x, dim=1) 172 | else: 173 | x = nn.functional.log_softmax(x, dim=1) 174 | 175 | return x 176 | 177 | 178 | # pyramid pooling, bilinear upsample 179 | class PPMBilinear(nn.Module): 180 | def __init__(self, num_class=150, fc_dim=4096, 181 | use_softmax=False, pool_scales=(1, 2, 3, 6)): 182 | super(PPMBilinear, self).__init__() 183 | self.use_softmax = use_softmax 184 | 185 | self.ppm = [] 186 | for scale in pool_scales: 187 | self.ppm.append(nn.Sequential( 188 | nn.AdaptiveAvgPool2d(scale), 189 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False), 190 | nn.BatchNorm2d(512), 191 | nn.ReLU(inplace=True) 192 | )) 193 | self.ppm = nn.ModuleList(self.ppm) 194 | 195 | self.conv_last = nn.Sequential( 196 | nn.Conv2d(fc_dim+len(pool_scales)*512, 512, 197 | kernel_size=3, padding=1, bias=False), 198 | nn.BatchNorm2d(512), 199 | nn.ReLU(inplace=True), 200 | nn.Dropout2d(0.1), 201 | nn.Conv2d(512, num_class, kernel_size=1) 202 | ) 203 | 204 | def forward(self, conv_out, segSize=None): 205 | conv5 = conv_out[-1] 206 | 207 | input_size = conv5.size() 208 | ppm_out = [conv5] 209 | for pool_scale in self.ppm: 210 | ppm_out.append(nn.functional.upsample( 211 | pool_scale(conv5), 212 | (input_size[2], input_size[3]), 213 | mode='bilinear')) 214 | ppm_out = torch.cat(ppm_out, 1) 215 | 216 | x = self.conv_last(ppm_out) 217 | 218 | if self.use_softmax: # is True during inference 219 | x = nn.functional.upsample(x, size=segSize, mode='bilinear') 220 | x = nn.functional.softmax(x, dim=1) 221 | else: 222 | x = nn.functional.log_softmax(x, dim=1) 223 | return x 224 | 225 | 226 | # pyramid pooling, bilinear upsample 227 | class PPMBilinearDeepsup(nn.Module): 228 | def __init__(self, num_class=150, fc_dim=4096, 229 | use_softmax=False, pool_scales=(1, 2, 3, 6)): 230 | super(PPMBilinearDeepsup, self).__init__() 231 | self.use_softmax = use_softmax 232 | 233 | self.ppm = [] 234 | for scale in pool_scales: 235 | self.ppm.append(nn.Sequential( 236 | nn.AdaptiveAvgPool2d(scale), 237 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False), 238 | nn.BatchNorm2d(512), 239 | nn.ReLU(inplace=True) 240 | )) 241 | self.ppm = nn.ModuleList(self.ppm) 242 | self.cbr_deepsup = conv3x3_bn_relu(fc_dim // 2, fc_dim // 4, 1) 243 | 244 | self.conv_last = nn.Sequential( 245 | nn.Conv2d(fc_dim+len(pool_scales)*512, 512, 246 | kernel_size=3, padding=1, bias=False), 247 | nn.BatchNorm2d(512), 248 | nn.ReLU(inplace=True), 249 | nn.Dropout2d(0.1), 250 | nn.Conv2d(512, num_class, kernel_size=1) 251 | ) 252 | self.conv_last_deepsup = nn.Conv2d(fc_dim // 4, num_class, 1, 1, 0) 253 | self.dropout_deepsup = nn.Dropout2d(0.1) 254 | 255 | def forward(self, conv_out, segSize=None): 256 | conv5 = conv_out[-1] 257 | 258 | input_size = conv5.size() 259 | ppm_out = [conv5] 260 | for pool_scale in self.ppm: 261 | ppm_out.append(nn.functional.upsample( 262 | pool_scale(conv5), 263 | (input_size[2], input_size[3]), 264 | mode='bilinear')) 265 | ppm_out = torch.cat(ppm_out, 1) 266 | 267 | x = self.conv_last(ppm_out) 268 | 269 | if self.use_softmax: # is True during inference 270 | x = nn.functional.upsample(x, size=segSize, mode='bilinear') 271 | x = nn.functional.softmax(x, dim=1) 272 | return x 273 | 274 | # deep sup 275 | conv4 = conv_out[-2] 276 | _ = self.cbr_deepsup(conv4) 277 | _ = self.dropout_deepsup(_) 278 | _ = self.conv_last_deepsup(_) 279 | 280 | x = nn.functional.log_softmax(x, dim=1) 281 | _ = nn.functional.log_softmax(_, dim=1) 282 | 283 | return (x, _) 284 | 285 | 286 | # upernet 287 | class UPerNet(nn.Module): 288 | def __init__(self, num_class=150, fc_dim=4096, 289 | use_softmax=False, pool_scales=(1, 2, 3, 6), 290 | fpn_inplanes=(256,512,1024,2048), fpn_dim=256): 291 | super(UPerNet, self).__init__() 292 | self.use_softmax = use_softmax 293 | 294 | # PPM Module 295 | self.ppm_pooling = [] 296 | self.ppm_conv = [] 297 | 298 | for scale in pool_scales: 299 | self.ppm_pooling.append(nn.AdaptiveAvgPool2d(scale)) 300 | self.ppm_conv.append(nn.Sequential( 301 | nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False), 302 | nn.BatchNorm2d(512), 303 | nn.ReLU(inplace=True) 304 | )) 305 | self.ppm_pooling = nn.ModuleList(self.ppm_pooling) 306 | self.ppm_conv = nn.ModuleList(self.ppm_conv) 307 | self.ppm_last_conv = conv3x3_bn_relu(fc_dim + len(pool_scales)*512, fpn_dim, 1) 308 | 309 | # FPN Module 310 | self.fpn_in = [] 311 | for fpn_inplane in fpn_inplanes[:-1]: # skip the top layer 312 | self.fpn_in.append(nn.Sequential( 313 | nn.Conv2d(fpn_inplane, fpn_dim, kernel_size=1, bias=False), 314 | nn.BatchNorm2d(fpn_dim), 315 | nn.ReLU(inplace=True) 316 | )) 317 | self.fpn_in = nn.ModuleList(self.fpn_in) 318 | 319 | self.fpn_out = [] 320 | for i in range(len(fpn_inplanes) - 1): # skip the top layer 321 | self.fpn_out.append(nn.Sequential( 322 | conv3x3_bn_relu(fpn_dim, fpn_dim, 1), 323 | )) 324 | self.fpn_out = nn.ModuleList(self.fpn_out) 325 | 326 | self.conv_last = nn.Sequential( 327 | conv3x3_bn_relu(len(fpn_inplanes) * fpn_dim, fpn_dim, 1), 328 | nn.Conv2d(fpn_dim, num_class, kernel_size=1) 329 | ) 330 | 331 | def forward(self, conv_out, segSize=None): 332 | conv5 = conv_out[-1] 333 | 334 | input_size = conv5.size() 335 | ppm_out = [conv5] 336 | for pool_scale, pool_conv in zip(self.ppm_pooling, self.ppm_conv): 337 | ppm_out.append(pool_conv(nn.functional.upsample( 338 | pool_scale(conv5), 339 | (input_size[2], input_size[3]), 340 | mode='bilinear'))) 341 | ppm_out = torch.cat(ppm_out, 1) 342 | f = self.ppm_last_conv(ppm_out) 343 | 344 | fpn_feature_list = [f] 345 | for i in reversed(range(len(conv_out) - 1)): 346 | conv_x = conv_out[i] 347 | conv_x = self.fpn_in[i](conv_x) # lateral branch 348 | 349 | f = nn.functional.upsample(f, size=conv_x.size()[2:], mode='bilinear') # top-down branch 350 | f = conv_x + f 351 | 352 | fpn_feature_list.append(self.fpn_out[i](f)) 353 | 354 | fpn_feature_list.reverse() # [P2 - P5] 355 | output_size = fpn_feature_list[0].size()[2:] 356 | fusion_list = [fpn_feature_list[0]] 357 | for i in range(1, len(fpn_feature_list)): 358 | fusion_list.append(nn.functional.upsample( 359 | fpn_feature_list[i], 360 | output_size, 361 | mode='bilinear')) 362 | fusion_out = torch.cat(fusion_list, 1) 363 | x = self.conv_last(fusion_out) 364 | 365 | if self.use_softmax: # is True during inference 366 | x = nn.functional.upsample(x, size=segSize, mode='bilinear') 367 | x = nn.functional.softmax(x, dim=1) 368 | return x 369 | 370 | x = nn.functional.log_softmax(x, dim=1) 371 | 372 | return x 373 | 374 | 375 | 376 | def conv3x3(in_planes, out_planes, stride=1, has_bias=False): 377 | "3x3 convolution with padding" 378 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 379 | padding=1, bias=has_bias) 380 | 381 | 382 | def conv3x3_bn_relu(in_planes, out_planes, stride=1): 383 | return nn.Sequential( 384 | conv3x3(in_planes, out_planes, stride), 385 | nn.BatchNorm2d(out_planes), 386 | nn.ReLU(inplace=True), 387 | ) 388 | 389 | # this is used to build the different models, both encoder and decoder 390 | class ModelBuilder(): 391 | # custom weights initialization 392 | def weights_init(self, m): 393 | classname = m.__class__.__name__ 394 | if classname.find('Conv') != -1: 395 | nn.init.kaiming_normal(m.weight.data) 396 | elif classname.find('BatchNorm') != -1: 397 | m.weight.data.fill_(1.) 398 | m.bias.data.fill_(1e-4) 399 | elif classname.find('Linear') != -1: 400 | m.weight.data.normal_(0.0, 0.0001) 401 | 402 | def build_encoder(self, arch='resnet50_dilated8', fc_dim=512, weights=''): 403 | pretrained = True if len(weights) == 0 else False 404 | if arch == 'resnet34': 405 | raise NotImplementedError 406 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained) 407 | net_encoder = Resnet(orig_resnet) 408 | elif arch == 'resnet34_dilated8': 409 | raise NotImplementedError 410 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained) 411 | net_encoder = ResnetDilated(orig_resnet, 412 | dilate_scale=8) 413 | elif arch == 'resnet34_dilated16': 414 | raise NotImplementedError 415 | orig_resnet = resnet.__dict__['resnet34'](pretrained=pretrained) 416 | net_encoder = ResnetDilated(orig_resnet, 417 | dilate_scale=16) 418 | elif arch == 'resnet50': 419 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained) 420 | net_encoder = Resnet(orig_resnet) 421 | elif arch == 'resnet50_dilated8': 422 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained) 423 | net_encoder = ResnetDilated(orig_resnet, 424 | dilate_scale=8) 425 | elif arch == 'resnet50_dilated16': 426 | orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained) 427 | net_encoder = ResnetDilated(orig_resnet, 428 | dilate_scale=16) 429 | elif arch == 'resnet101': 430 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained) 431 | net_encoder = Resnet(orig_resnet) 432 | elif arch == 'resnet101_dilated8': 433 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained) 434 | net_encoder = ResnetDilated(orig_resnet, 435 | dilate_scale=8) 436 | elif arch == 'resnet101_dilated16': 437 | orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained) 438 | net_encoder = ResnetDilated(orig_resnet, 439 | dilate_scale=16) 440 | elif arch == 'resnext101': 441 | orig_resnext = resnext.__dict__['resnext101'](pretrained=pretrained) 442 | net_encoder = Resnet(orig_resnext) # we can still use class Resnet 443 | else: 444 | raise Exception('Architecture undefined!') 445 | 446 | # net_encoder.apply(self.weights_init) 447 | if len(weights) > 0: 448 | print('Loading weights for net_encoder') 449 | net_encoder.load_state_dict( 450 | torch.load(weights, map_location=lambda storage, loc: storage), strict=False) 451 | return net_encoder 452 | 453 | def build_decoder(self, arch='ppm_bilinear_deepsup', 454 | fc_dim=512, num_class=150, 455 | weights='', use_softmax=False): 456 | if arch == 'c1_bilinear_deepsup': 457 | net_decoder = C1BilinearDeepSup( 458 | num_class=num_class, 459 | fc_dim=fc_dim, 460 | use_softmax=use_softmax) 461 | elif arch == 'c1_bilinear': 462 | net_decoder = C1Bilinear( 463 | num_class=num_class, 464 | fc_dim=fc_dim, 465 | use_softmax=use_softmax) 466 | elif arch == 'ppm_bilinear': 467 | net_decoder = PPMBilinear( 468 | num_class=num_class, 469 | fc_dim=fc_dim, 470 | use_softmax=use_softmax) 471 | elif arch == 'ppm_bilinear_deepsup': 472 | net_decoder = PPMBilinearDeepsup( 473 | num_class=num_class, 474 | fc_dim=fc_dim, 475 | use_softmax=use_softmax) 476 | elif arch == 'upernet_lite': 477 | net_decoder = UPerNet( 478 | num_class=num_class, 479 | fc_dim=fc_dim, 480 | use_softmax=use_softmax, 481 | fpn_dim=256) 482 | elif arch == 'upernet': 483 | net_decoder = UPerNet( 484 | num_class=num_class, 485 | fc_dim=fc_dim, 486 | use_softmax=use_softmax, 487 | fpn_dim=512) 488 | else: 489 | raise Exception('Architecture undefined!') 490 | 491 | net_decoder.apply(self.weights_init) 492 | if len(weights) > 0: 493 | print('Loading weights for net_decoder') 494 | net_decoder.load_state_dict( 495 | torch.load(weights, map_location=lambda storage, loc: storage), strict=False) 496 | return net_decoder 497 | 498 | 499 | 500 | # this is used to build deeplabv3, deeplabv3+ 501 | 502 | class _ConvBatchNormReluBlock(nn.Sequential): 503 | def __init__(self, inplanes, outplanes, kernel_size, stride, padding, dilation, relu=True): 504 | super(_ConvBatchNormReluBlock, self).__init__() 505 | self.add_module("cov", nn.Conv2d(in_channels=inplanes,out_channels=outplanes, 506 | kernel_size=kernel_size, stride=stride, padding = padding, 507 | dilation = dilation, bias=False)) 508 | self.add_module("bn", nn.BatchNorm2d(num_features=outplanes, momentum=0.999, affine=True)) 509 | if relu: 510 | self.add_module("relu", nn.ReLU()) 511 | def forward(self, x): 512 | return super(_ConvBatchNormReluBlock, self).forward(x) 513 | 514 | class _ResidualBlockMulGrid(nn.Sequential): 515 | """ 516 | Residual Block with multi-grid , note: best model-> (1, 2, 1) 517 | """ 518 | def __init__(self, layers, inplanes, midplanes, outplanes, stride, dilation, mulgrid=[1,2,1]): 519 | super(_ResidualBlockMulGrid, self).__init__() 520 | self.add_module("block1", _Bottleneck(inplanes, midplanes, outplanes, stride, dilation * mulgrid[0], True)) 521 | self.add_module("block2", _Bottleneck(outplanes, midplanes, outplanes, stride, dilation * mulgrid[1], False)) 522 | self.add_module("block3", _Bottleneck(outplanes, midplanes, outplanes, stride, dilation * mulgrid[2], False)) 523 | def forward(self, x): 524 | return super(_ResidualBlockMulGrid, self).forward(x) 525 | 526 | class _Bottleneck(nn.Sequential): 527 | def __init__(self, inplanes, midplanes, outplanes, stride, dilation, downsample): 528 | super(_Bottleneck, self).__init__() 529 | self.reduce = _ConvBatchNormReluBlock(inplanes, midplanes, 1, stride, 0, 1) 530 | self.conv3x3 = _ConvBatchNormReluBlock(midplanes, midplanes, 3, 1, dilation, dilation) 531 | self.increase = _ConvBatchNormReluBlock(midplanes, outplanes, 1, 1, 0, 1, relu=False) 532 | self.downsample = downsample 533 | if self.downsample: 534 | self.proj = _ConvBatchNormReluBlock(inplanes, outplanes, 1, stride, 0, 1, relu=False) 535 | def forward(self, x): 536 | h = self.reduce(x) 537 | h = self.conv3x3(h) 538 | h = self.increase(h) 539 | if self.downsample: 540 | h += self.proj(x) 541 | else: 542 | h += x 543 | return F.relu(h) 544 | 545 | 546 | 547 | 548 | 549 | if __name__ == '__main__': 550 | # test for model builder 551 | builder = ModelBuilder() 552 | net_encoder = builder.build_encoder( 553 | arch ="resnet101_dilated8" 554 | ).cuda() 555 | test_input = torch.autograd.Variable(torch.randn(1, 3, 1024, 512), volatile=True).cuda() 556 | out = net_encoder.forward(test_input) 557 | print (out[0].size()) -------------------------------------------------------------------------------- /model/large_kernel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Implementation of Large Kernel Matters Paper (face++) 4 | # Author: Xiangtai(lxtpku@pku.edu.cn) 5 | 6 | import torch 7 | from torch import nn 8 | 9 | 10 | from model.deeplab_resnet import ModelBuilder 11 | 12 | 13 | class _BoundaryRefineModule(nn.Module): 14 | def __init__(self, dim): 15 | super(_BoundaryRefineModule, self).__init__() 16 | self.relu = nn.ReLU(inplace=True) 17 | self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, padding=1) 18 | self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, padding=1) 19 | 20 | def forward(self, x): 21 | residual = self.conv1(x) 22 | residual = self.relu(residual) 23 | residual = self.conv2(residual) 24 | out = x + residual 25 | return out 26 | 27 | 28 | class _GlobalConvModule(nn.Module): 29 | def __init__(self, in_dim, out_dim, kernel_size): 30 | super(_GlobalConvModule, self).__init__() 31 | pad0 = (kernel_size[0] - 1) / 2 32 | pad1 = (kernel_size[1] - 1) / 2 33 | # kernel size had better be odd number so as to avoid alignment error 34 | super(_GlobalConvModule, self).__init__() 35 | self.conv_l1 = nn.Conv2d(in_dim, out_dim, kernel_size=(kernel_size[0], 1), 36 | padding=(pad0, 0)) 37 | self.conv_l2 = nn.Conv2d(out_dim, out_dim, kernel_size=(1, kernel_size[1]), 38 | padding=(0, pad1)) 39 | self.conv_r1 = nn.Conv2d(in_dim, out_dim, kernel_size=(1, kernel_size[1]), 40 | padding=(0, pad1)) 41 | self.conv_r2 = nn.Conv2d(out_dim, out_dim, kernel_size=(kernel_size[0], 1), 42 | padding=(pad0, 0)) 43 | 44 | def forward(self, x): 45 | x_l = self.conv_l1(x) 46 | x_l = self.conv_l2(x_l) 47 | x_r = self.conv_r1(x) 48 | x_r = self.conv_r2(x_r) 49 | x = x_l + x_r 50 | return x 51 | 52 | 53 | class GCN(nn.Module): 54 | def __init__(self, num_classes, kernel_size=7): 55 | super(GCN, self).__init__() 56 | self.resnet_features = ModelBuilder().build_encoder("resnet101") 57 | self.layer0 = nn.Sequential(self.resnet_features.conv1, self.resnet_features.bn1, 58 | self.resnet_features.relu1, self.resnet_features.conv3, 59 | self.resnet_features.bn3, self.resnet_features.relu3 60 | ) 61 | self.layer1 = nn.Sequential(self.resnet_features.maxpool, self.resnet_features.layer1) 62 | self.layer2 = self.resnet_features.layer2 63 | self.layer3 = self.resnet_features.layer3 64 | self.layer4 = self.resnet_features.layer4 65 | 66 | self.gcm1 = _GlobalConvModule(2048, num_classes, (kernel_size, kernel_size)) 67 | self.gcm2 = _GlobalConvModule(1024, num_classes, (kernel_size, kernel_size)) 68 | self.gcm3 = _GlobalConvModule(512, num_classes, (kernel_size, kernel_size)) 69 | self.gcm4 = _GlobalConvModule(256, num_classes, (kernel_size, kernel_size)) 70 | 71 | self.brm1 = _BoundaryRefineModule(num_classes) 72 | self.brm2 = _BoundaryRefineModule(num_classes) 73 | self.brm3 = _BoundaryRefineModule(num_classes) 74 | self.brm4 = _BoundaryRefineModule(num_classes) 75 | self.brm5 = _BoundaryRefineModule(num_classes) 76 | self.brm6 = _BoundaryRefineModule(num_classes) 77 | self.brm7 = _BoundaryRefineModule(num_classes) 78 | self.brm8 = _BoundaryRefineModule(num_classes) 79 | self.brm9 = _BoundaryRefineModule(num_classes) 80 | 81 | self.deconv1 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False) 82 | self.deconv2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False) 83 | self.deconv3 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False) 84 | self.deconv4 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False) 85 | self.deconv5 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, padding=1, bias=False) 86 | 87 | def forward(self, x): 88 | # suppose input = x , if x 512 89 | f0 = self.layer0(x) # 256 90 | f1 = self.layer1(f0) # 128 91 | f2 = self.layer2(f1) # 64 92 | f3 = self.layer3(f2) # 32 93 | f4 = self.layer4(f3) # 16 94 | 95 | gcfm1 = self.brm1(self.gcm1(f4)) # 16 96 | gcfm2 = self.brm2(self.gcm2(f3)) # 32 97 | gcfm3 = self.brm3(self.gcm3(f2)) # 64 98 | gcfm4 = self.brm4(self.gcm4(f1)) # 128 99 | 100 | fs1 = self.brm5(self.deconv1(gcfm1) + gcfm2) # 32 101 | fs2 = self.brm6(self.deconv2(fs1) + gcfm3) # 64 102 | fs3 = self.brm7(self.deconv3(fs2) + gcfm4) # 128 103 | fs4 = self.brm8(self.deconv4(fs3)) # 256 104 | out = self.brm9(self.deconv5(fs4)) 105 | 106 | return out 107 | 108 | def freeze_bn(self): 109 | for m in self.modules(): 110 | if isinstance(m, nn.BatchNorm2d): 111 | m.eval() 112 | 113 | if __name__ == '__main__': 114 | model = GCN(20).cuda() 115 | model.freeze_bn() 116 | model.eval() 117 | image = torch.autograd.Variable(torch.randn(1, 3, 512, 512), volatile=True).cuda() 118 | print (model(image).size()) -------------------------------------------------------------------------------- /model/large_kernel_exfuse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # Implementation of ExFuse: Enhancing Feature Fusion for Semantic Segmentation Paper (face++) 4 | # Author: Xiangtai(lxtpku@pku.edu.cn) 5 | # ########### 6 | # backbone GCN framework(large_kernel.py) and ResNext101 (Resnet) as pretrained model 7 | # Layer Rearrangement (LR) (0.8%): re-arrange the layer in the resnet model 8 | # Semantic Supervision (SS) (1.1%): used when training the model on the ImageNet 9 | # assign auxiliary supervisions directly to the early stages of the encoder network 10 | # Semantic Embedding Branch (SEB) (0.7%) 11 | # Explicit Channel Resolution Embedding (ECRE) (0.5%) 12 | # Densely Adjacent Prediction (0.6%) 13 | 14 | # ########### 15 | 16 | import torch 17 | from torch import nn 18 | 19 | from model.deeplab_resnet import ModelBuilder 20 | 21 | from .large_kernel import _GlobalConvModule 22 | 23 | 24 | class SEB(nn.Module): 25 | def __init__(self, in_channels, out_channels): 26 | super(SEB, self).__init__() 27 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1,padding=1) 28 | self.upsample = nn.Upsample(scale_factor=2, mode="bilinear") 29 | 30 | def forward(self, x): 31 | x1, x2 = x 32 | return x1 * self.upsample(self.conv(x2)) 33 | 34 | 35 | 36 | class GCNFuse(nn.Module): 37 | def __init__(self, configer=None,kernel_size=7, dap_k=3): 38 | super(GCNFuse, self).__init__() 39 | self.num_classes =20 40 | num_classes = self.num_classes 41 | self.resnet_features = ModelBuilder().build_encoder("resnet101") 42 | self.layer0 = nn.Sequential(self.resnet_features.conv1, self.resnet_features.bn1, 43 | self.resnet_features.relu1, self.resnet_features.conv3, 44 | self.resnet_features.bn3, self.resnet_features.relu3 45 | ) 46 | self.layer1 = nn.Sequential(self.resnet_features.maxpool, self.resnet_features.layer1) 47 | self.layer2 = self.resnet_features.layer2 48 | self.layer3 = self.resnet_features.layer3 49 | self.layer4 = self.resnet_features.layer4 50 | 51 | self.gcm1 = _GlobalConvModule(2048, num_classes * 4, (kernel_size, kernel_size)) 52 | self.gcm2 = _GlobalConvModule(1024, num_classes, (kernel_size, kernel_size)) 53 | self.gcm3 = _GlobalConvModule(512, num_classes * dap_k**2, (kernel_size, kernel_size)) 54 | self.gcm4 = _GlobalConvModule(256, num_classes * dap_k**2, (kernel_size, kernel_size)) 55 | 56 | self.deconv1 = nn.ConvTranspose2d(num_classes, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False) 57 | self.deconv2 = nn.ConvTranspose2d(num_classes, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False) 58 | self.deconv3 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False) 59 | self.deconv4 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False) 60 | self.deconv5 = nn.ConvTranspose2d(num_classes * dap_k**2, num_classes * dap_k**2, kernel_size=4, stride=2, padding=1, bias=False) 61 | 62 | self.ecre = nn.PixelShuffle(2) 63 | 64 | self.seb1 = SEB(2048, 1024) 65 | self.seb2 = SEB(3072, 512) 66 | self.seb3 = SEB(3584, 256) 67 | 68 | self.upsample2 = nn.Upsample(scale_factor=2, mode="bilinear") 69 | self.upsample4 = nn.Upsample(scale_factor=4, mode="bilinear") 70 | 71 | self.DAP = nn.Sequential( 72 | nn.PixelShuffle(dap_k), 73 | nn.AvgPool2d((dap_k,dap_k)) 74 | ) 75 | 76 | def forward(self, x): 77 | # suppose input = x , if x 512 78 | f0 = self.layer0(x) # 256 79 | f1 = self.layer1(f0) # 128 80 | print (f1.size()) 81 | f2 = self.layer2(f1) # 64 82 | print (f2.size()) 83 | f3 = self.layer3(f2) # 32 84 | print (f3.size()) 85 | f4 = self.layer4(f3) # 16 86 | print (f4.size()) 87 | x = self.gcm1(f4) 88 | out1 = self.ecre(x) 89 | seb1 = self.seb1([f3, f4]) 90 | gcn1 = self.gcm2(seb1) 91 | 92 | seb2 = self.seb2([f2, torch.cat([f3, self.upsample2(f4)], dim=1)]) 93 | gcn2 = self.gcm3(seb2) 94 | 95 | seb3 = self.seb3([f1, torch.cat([f2, self.upsample2(f3), self.upsample4(f4)], dim=1)]) 96 | gcn3 = self.gcm4(seb3) 97 | 98 | y = self.deconv2(gcn1 + out1) 99 | y = self.deconv3(gcn2 + y) 100 | y = self.deconv4(gcn3 + y) 101 | y = self.deconv5(y) 102 | y = self.DAP(y) 103 | return y 104 | 105 | def freeze_bn(self): 106 | for m in self.modules(): 107 | if isinstance(m, nn.BatchNorm2d): 108 | m.eval() 109 | 110 | 111 | if __name__ == '__main__': 112 | model = GCNFuse(20).cuda() 113 | model.freeze_bn() 114 | model.eval() 115 | image = torch.autograd.Variable(torch.randn(1, 3, 512, 512), volatile=True).cuda() 116 | res1, res2 = model(image) 117 | print (res1.size(), res2.size()) -------------------------------------------------------------------------------- /model/seg_resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #resnet50 and resnet 101 4 | import os 5 | import sys 6 | import torch 7 | import torch.nn as nn 8 | import math 9 | 10 | try: 11 | from urllib import urlretrieve 12 | except ImportError: 13 | from urllib.request import urlretrieve 14 | 15 | 16 | __all__ = ['ResNet', 'resnet50', 'resnet101'] # resnet101 is coming soon! 17 | 18 | 19 | model_urls = { 20 | 'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth', 21 | 'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth' 22 | } 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | "3x3 convolution with padding" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 28 | padding=1, bias=False) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.bn1 = nn.BatchNorm2d(planes) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | self.bn2 = nn.BatchNorm2d(planes) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 69 | self.bn1 = nn.BatchNorm2d(planes) 70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=False) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class ResNet(nn.Module): 103 | 104 | def __init__(self, block, layers, num_classes=1000): 105 | self.inplanes = 128 106 | super(ResNet, self).__init__() 107 | self.conv1 = conv3x3(3, 64, stride=2) 108 | self.bn1 = nn.BatchNorm2d(64) 109 | self.relu1 = nn.ReLU(inplace=True) 110 | self.conv2 = conv3x3(64, 64) 111 | self.bn2 = nn.BatchNorm2d(64) 112 | self.relu2 = nn.ReLU(inplace=True) 113 | self.conv3 = conv3x3(64, 128) 114 | self.bn3 = nn.BatchNorm2d(128) 115 | self.relu3 = nn.ReLU(inplace=True) 116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 117 | 118 | self.layer1 = self._make_layer(block, 64, layers[0]) 119 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 120 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 121 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 122 | self.avgpool = nn.AvgPool2d(7, stride=1) 123 | self.fc = nn.Linear(512 * block.expansion, num_classes) 124 | 125 | for m in self.modules(): 126 | if isinstance(m, nn.Conv2d): 127 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 128 | m.weight.data.normal_(0, math.sqrt(2. / n)) 129 | elif isinstance(m, nn.BatchNorm2d): 130 | m.weight.data.fill_(1) 131 | m.bias.data.zero_() 132 | 133 | def _make_layer(self, block, planes, blocks, stride=1): 134 | downsample = None 135 | if stride != 1 or self.inplanes != planes * block.expansion: 136 | downsample = nn.Sequential( 137 | nn.Conv2d(self.inplanes, planes * block.expansion, 138 | kernel_size=1, stride=stride, bias=False), 139 | nn.BatchNorm2d(planes * block.expansion), 140 | ) 141 | 142 | layers = [] 143 | layers.append(block(self.inplanes, planes, stride, downsample)) 144 | self.inplanes = planes * block.expansion 145 | for i in range(1, blocks): 146 | layers.append(block(self.inplanes, planes)) 147 | 148 | return nn.Sequential(*layers) 149 | 150 | def forward(self, x): 151 | x = self.relu1(self.bn1(self.conv1(x))) 152 | x = self.relu2(self.bn2(self.conv2(x))) 153 | x = self.relu3(self.bn3(self.conv3(x))) 154 | x = self.maxpool(x) 155 | 156 | x = self.layer1(x) 157 | x = self.layer2(x) 158 | x = self.layer3(x) 159 | x = self.layer4(x) 160 | 161 | x = self.avgpool(x) 162 | x = x.view(x.size(0), -1) 163 | x = self.fc(x) 164 | 165 | return x 166 | 167 | 168 | 169 | def resnet50(pretrained=False, **kwargs): 170 | """Constructs a ResNet-50 model. 171 | 172 | Args: 173 | pretrained (bool): If True, returns a model pre-trained on Places 174 | """ 175 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 176 | if pretrained: 177 | model.load_state_dict(load_url(model_urls['resnet50']), strict=False) 178 | return model 179 | 180 | 181 | def resnet101(pretrained=False, **kwargs): 182 | """Constructs a ResNet-101 model. 183 | 184 | Args: 185 | pretrained (bool): If True, returns a model pre-trained on Places 186 | """ 187 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 188 | if pretrained: 189 | model.load_state_dict(load_url(model_urls['resnet101']), strict=False) 190 | return model 191 | 192 | 193 | def load_url(url, model_dir='/home/xiangtai/projec/pretrained', map_location=None): 194 | if not os.path.exists(model_dir): 195 | os.makedirs(model_dir) 196 | filename = url.split('/')[-1] 197 | cached_file = os.path.join(model_dir, filename) 198 | if not os.path.exists(cached_file): 199 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 200 | urlretrieve(url, cached_file) 201 | return torch.load(cached_file, map_location=map_location) 202 | 203 | 204 | if __name__ == '__main__': 205 | res = resnet101(pretrained=True) 206 | print (res) -------------------------------------------------------------------------------- /model/seg_resnext.py: -------------------------------------------------------------------------------- 1 | # synchronized batchnorm version of resnext101 2 | import os 3 | import sys 4 | import torch 5 | import torch.nn as nn 6 | import math 7 | 8 | try: 9 | from urllib import urlretrieve 10 | except ImportError: 11 | from urllib.request import urlretrieve 12 | 13 | 14 | __all__ = ['ResNeXt', 'resnext101'] # support resnext 101 15 | 16 | # can not used for now 17 | model_urls = { 18 | 'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth' 19 | } 20 | 21 | 22 | def conv3x3(in_planes, out_planes, stride=1): 23 | "3x3 convolution with padding" 24 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 25 | padding=1, bias=False) 26 | 27 | 28 | class GroupBottleneck(nn.Module): 29 | expansion = 2 30 | 31 | def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None): 32 | super(GroupBottleneck, self).__init__() 33 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 34 | self.bn1 = nn.BatchNorm2d(planes) 35 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 36 | padding=1, groups=groups, bias=False) 37 | self.bn2 = nn.BatchNorm2d(planes) 38 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False) 39 | self.bn3 = nn.BatchNorm2d(planes * 2) 40 | self.relu = nn.ReLU(inplace=True) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | out = self.relu(out) 54 | 55 | out = self.conv3(out) 56 | out = self.bn3(out) 57 | 58 | if self.downsample is not None: 59 | residual = self.downsample(x) 60 | 61 | out += residual 62 | out = self.relu(out) 63 | 64 | return out 65 | 66 | 67 | class ResNeXt(nn.Module): 68 | 69 | def __init__(self, block, layers, groups=32, num_classes=1000): 70 | self.inplanes = 128 71 | super(ResNeXt, self).__init__() 72 | self.conv1 = conv3x3(3, 64, stride=2) 73 | self.bn1 = nn.BatchNorm2d(64) 74 | self.relu1 = nn.ReLU(inplace=True) 75 | self.conv2 = conv3x3(64, 64) 76 | self.bn2 = nn.BatchNorm2d(64) 77 | self.relu2 = nn.ReLU(inplace=True) 78 | self.conv3 = conv3x3(64, 128) 79 | self.bn3 = nn.BatchNorm2d(128) 80 | self.relu3 = nn.ReLU(inplace=True) 81 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 82 | 83 | self.layer1 = self._make_layer(block, 128, layers[0], groups=groups) 84 | self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups) 85 | self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups) 86 | self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups) 87 | self.avgpool = nn.AvgPool2d(7, stride=1) 88 | self.fc = nn.Linear(1024 * block.expansion, num_classes) 89 | 90 | for m in self.modules(): 91 | if isinstance(m, nn.Conv2d): 92 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups 93 | m.weight.data.normal_(0, math.sqrt(2. / n)) 94 | elif isinstance(m, nn.BatchNorm2d): 95 | m.weight.data.fill_(1) 96 | m.bias.data.zero_() 97 | 98 | def _make_layer(self, block, planes, blocks, stride=1, groups=1): 99 | downsample = None 100 | if stride != 1 or self.inplanes != planes * block.expansion: 101 | downsample = nn.Sequential( 102 | nn.Conv2d(self.inplanes, planes * block.expansion, 103 | kernel_size=1, stride=stride, bias=False), 104 | nn.BatchNorm2d(planes * block.expansion), 105 | ) 106 | 107 | layers = [] 108 | layers.append(block(self.inplanes, planes, stride, groups, downsample)) 109 | self.inplanes = planes * block.expansion 110 | for i in range(1, blocks): 111 | layers.append(block(self.inplanes, planes, groups=groups)) 112 | 113 | return nn.Sequential(*layers) 114 | 115 | def forward(self, x): 116 | x = self.relu1(self.bn1(self.conv1(x))) 117 | x = self.relu2(self.bn2(self.conv2(x))) 118 | x = self.relu3(self.bn3(self.conv3(x))) 119 | x = self.maxpool(x) 120 | 121 | x = self.layer1(x) 122 | x = self.layer2(x) 123 | x = self.layer3(x) 124 | x = self.layer4(x) 125 | 126 | x = self.avgpool(x) 127 | x = x.view(x.size(0), -1) 128 | x = self.fc(x) 129 | 130 | return x 131 | 132 | 133 | def resnext101(pretrained=False, **kwargs): 134 | """Constructs a ResNet-101 model. 135 | 136 | Args: 137 | pretrained (bool): If True, returns a model pre-trained on Places 138 | """ 139 | model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs) 140 | if pretrained: 141 | model.load_state_dict(load_url(model_urls['resnext101']), strict=False) 142 | return model 143 | 144 | 145 | def load_url(url, model_dir='./models/backbones/pretrained', map_location=None): 146 | if not os.path.exists(model_dir): 147 | os.makedirs(model_dir) 148 | filename = url.split('/')[-1] 149 | cached_file = os.path.join(model_dir, filename) 150 | if not os.path.exists(cached_file): 151 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 152 | urlretrieve(url, cached_file) 153 | return torch.load(cached_file, map_location=map_location) 154 | 155 | if __name__ == '__main__': 156 | res = resnext101(pretrained=True) 157 | print (res) --------------------------------------------------------------------------------