├── .gitignore ├── DeepLab_v2_res.py ├── DeepLab_v2_vgg.py ├── DeepLab_v3.py ├── DeepLab_v3_plus.py ├── MIT-LICENSE.txt ├── README.md └── poly_scheduler.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | -------------------------------------------------------------------------------- /DeepLab_v2_res.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | import torchvision.models as models 6 | import math 7 | 8 | model_url = 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth' 9 | 10 | class Atrous_Bottleneck(nn.Module): 11 | expansion = 4 12 | 13 | def __init__(self, inplanes, planes, stride=1, rate=1, downsample=None): 14 | super(Atrous_Bottleneck, self).__init__() 15 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(planes) 17 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 18 | dilation=rate, padding=rate, bias=False) 19 | self.bn2 = nn.BatchNorm2d(planes) 20 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 21 | self.bn3 = nn.BatchNorm2d(planes * 4) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.downsample = downsample 24 | self.stride = stride 25 | 26 | def forward(self, x): 27 | residual = x 28 | 29 | out = self.conv1(x) 30 | out = self.bn1(out) 31 | out = self.relu(out) 32 | 33 | out = self.conv2(out) 34 | out = self.bn2(out) 35 | out = self.relu(out) 36 | 37 | out = self.conv3(out) 38 | out = self.bn3(out) 39 | 40 | if self.downsample is not None: 41 | residual = self.downsample(x) 42 | 43 | out += residual 44 | out = self.relu(out) 45 | 46 | return out 47 | 48 | class Atrous_ResNet_features(nn.Module): 49 | 50 | def __init__(self, block, layers, pretrained=False): 51 | super(Atrous_ResNet_features, self).__init__() 52 | self.inplanes = 64 53 | 54 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 55 | bias=False) 56 | self.bn1 = nn.BatchNorm2d(64) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 59 | self.layer1 = self._make_layer(block, 64, layers[0], stride=1, rate=1) 60 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, rate=1) 61 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, rate=2) 62 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, rate=4) 63 | 64 | for m in self.modules(): 65 | if isinstance(m, nn.Conv2d): 66 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 67 | m.weight.data.normal_(0, math.sqrt(2. / n)) 68 | elif isinstance(m, nn.BatchNorm2d): 69 | m.weight.data.fill_(1) 70 | m.bias.data.zero_() 71 | 72 | if pretrained: 73 | print('load the pre-trained model.') 74 | resnet = models.resnet101(pretrained) 75 | self.conv1 = resnet.conv1 76 | self.bn1 = resnet.bn1 77 | self.layer1 = resnet.layer1 78 | self.layer2 = resnet.layer2 79 | 80 | def _make_layer(self, block, planes, blocks, stride=1, rate=1): 81 | downsample = None 82 | if stride != 1 or self.inplanes != planes * block.expansion: 83 | downsample = nn.Sequential( 84 | nn.Conv2d(self.inplanes, planes * block.expansion, 85 | kernel_size=1, stride=stride, bias=False), 86 | nn.BatchNorm2d(planes * block.expansion), 87 | ) 88 | 89 | layers = [] 90 | layers.append(block(self.inplanes, planes, stride, rate, downsample)) 91 | self.inplanes = planes * block.expansion 92 | for i in range(1, blocks): 93 | layers.append(block(self.inplanes, planes, stride=1, rate=rate)) 94 | 95 | return nn.Sequential(*layers) 96 | 97 | def forward(self, x): 98 | x = self.conv1(x) 99 | x = self.bn1(x) 100 | x = self.relu(x) 101 | x = self.maxpool(x) 102 | 103 | x = self.layer1(x) 104 | x = self.layer2(x) 105 | x = self.layer3(x) 106 | x = self.layer4(x) 107 | 108 | return x 109 | 110 | class Atrous_module(nn.Module): 111 | def __init__(self, inplanes, num_classes, rate): 112 | super(Atrous_module, self).__init__() 113 | planes = inplanes 114 | self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=3, 115 | stride=1, padding=rate, dilation=rate) 116 | self.fc1 = nn.Conv2d(planes, planes, kernel_size=1, stride=1) 117 | self.fc2 = nn.Conv2d(planes, num_classes, kernel_size=1, stride=1) 118 | 119 | def forward(self, x): 120 | x = self.atrous_convolution(x) 121 | x = self.fc1(x) 122 | x = self.fc2(x) 123 | 124 | return x 125 | 126 | class DeepLabv2_ASPP(nn.Module): 127 | def __init__(self, num_classes, small=True, pretrained=False): 128 | super(DeepLabv2_ASPP, self).__init__() 129 | block = Atrous_Bottleneck 130 | self.resnet_features = Atrous_ResNet_features(block, [3, 4, 23, 3], pretrained) 131 | 132 | if small: 133 | rates = [2, 4, 8, 12] 134 | else: 135 | rates = [6, 12, 18, 24] 136 | self.aspp1 = Atrous_module(2048 , num_classes, rate=rates[0]) 137 | self.aspp2 = Atrous_module(2048 , num_classes, rate=rates[1]) 138 | self.aspp3 = Atrous_module(2048 , num_classes, rate=rates[2]) 139 | self.aspp4 = Atrous_module(2048 , num_classes, rate=rates[3]) 140 | 141 | def forward(self, x): 142 | x = self.resnet_features(x) 143 | x1 = self.aspp1(x) 144 | x2 = self.aspp2(x) 145 | x3 = self.aspp3(x) 146 | x4 = self.aspp4(x) 147 | 148 | x = x1 + x2 + x3 + x4 149 | x = F.upsample(x, scale_factor=8, mode='bilinear') 150 | 151 | return x 152 | 153 | class DeepLabv2_FOV(nn.Module): 154 | def __init__(self, num_classes, pretrained=True): 155 | super(DeepLabv2_FOV, self).__init__() 156 | block = Atrous_Bottleneck 157 | self.resnet_features = Atrous_ResNet_features(block, [3, 4, 23, 3], pretrained) 158 | 159 | self.atrous = Atrous_module(2048 , num_classes, rate=12) 160 | 161 | def forward(self, x): 162 | x = self.resnet_features(x) 163 | x = self.atrous(x) 164 | x = F.upsample(x, scale_factor=8, mode='bilinear') 165 | 166 | return x 167 | -------------------------------------------------------------------------------- /DeepLab_v2_vgg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | import torchvision.models as models 6 | import math 7 | from collections import OrderedDict 8 | 9 | def conv3x3_relu(inplanes, planes, rate=1): 10 | conv3x3_relu = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=3, 11 | stride=1, padding=rate, dilation=rate), 12 | nn.ReLU()) 13 | return conv3x3_relu 14 | 15 | class VGG16_feature(nn.Module): 16 | def __init__(self, pretrained=False): 17 | super(VGG16_feature, self).__init__() 18 | 19 | self.features = nn.Sequential(conv3x3_relu(3, 64), 20 | conv3x3_relu(64, 64), 21 | nn.MaxPool2d(2, stride=2), 22 | conv3x3_relu(64, 128), 23 | conv3x3_relu(128, 128), 24 | nn.MaxPool2d(2, stride=2), 25 | conv3x3_relu(128, 256), 26 | conv3x3_relu(256, 256), 27 | conv3x3_relu(256, 256), 28 | nn.MaxPool2d(2, stride=2), 29 | conv3x3_relu(256, 512), 30 | conv3x3_relu(512, 512), 31 | conv3x3_relu(512, 512), 32 | nn.MaxPool2d(3, stride=1, padding=1)) 33 | self.features2 = nn.Sequential(conv3x3_relu(512, 512, rate=2), 34 | conv3x3_relu(512, 512, rate=2), 35 | conv3x3_relu(512, 512, rate=2), 36 | nn.MaxPool2d(3, stride=1, padding=1)) 37 | 38 | """ 39 | if pretrained: 40 | url = 'https://download.pytorch.org/models/vgg16-397923af.pth' 41 | weight = model_zoo.load_url(url) 42 | weight2 = OrderedDict() 43 | for key in list(weight.keys())[:20]: 44 | weight2[key] = weight[key] 45 | 46 | self.features.load_state_dict(weight2) 47 | """ 48 | 49 | def forward(self, x): 50 | x = self.features(x) 51 | x = self.features2(x) 52 | 53 | return x 54 | 55 | 56 | class Atrous_module(nn.Module): 57 | def __init__(self, inplanes, num_classes, rate): 58 | super(Atrous_module, self).__init__() 59 | planes = inplanes 60 | self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=3, 61 | stride=1, padding=rate, dilation=rate) 62 | self.fc1 = nn.Conv2d(planes, planes, kernel_size=1, stride=1) 63 | self.fc2 = nn.Conv2d(planes, num_classes, kernel_size=1, stride=1) 64 | 65 | def forward(self, x): 66 | x = self.atrous_convolution(x) 67 | x = self.fc1(x) 68 | x = self.fc2(x) 69 | 70 | return x 71 | 72 | 73 | class DeepLabv1_ASPP(nn.Module): 74 | def __init__(self, num_classes, small=True, pretrained=False): 75 | super(DeepLabv2_ASPP, self).__init__() 76 | self.vgg_feature = VGG16_feature(pretrained) 77 | 78 | if small: 79 | rates = [2, 4, 8, 12] 80 | else: 81 | rates = [6, 12, 18, 24] 82 | self.aspp1 = Atrous_module(2048 , num_classes, rate=rates[0]) 83 | self.aspp2 = Atrous_module(2048 , num_classes, rate=rates[1]) 84 | self.aspp3 = Atrous_module(2048 , num_classes, rate=rates[2]) 85 | self.aspp4 = Atrous_module(2048 , num_classes, rate=rates[3]) 86 | 87 | def forward(self, x): 88 | x = self.vgg_feature(x) 89 | x1 = self.aspp1(x) 90 | x2 = self.aspp2(x) 91 | x3 = self.aspp3(x) 92 | x4 = self.aspp4(x) 93 | 94 | x = x1 + x2 + x3 + x4 95 | x = F.upsample(x, scale_factor=8, mode='bilinear') 96 | 97 | return x 98 | 99 | class DeepLabv1_FOV(nn.Module): 100 | def __init__(self, num_classes, pretrained=True): 101 | super(DeepLabv2_FOV, self).__init__() 102 | self.vgg_feature = VGG16_feature(pretrained) 103 | 104 | self.atrous = Atrous_module(2048 , num_classes, rate=12) 105 | 106 | def forward(self, x): 107 | x = self.vgg_feature(x) 108 | x = self.atrous(x) 109 | x = F.upsample(x, scale_factor=8, mode='bilinear') 110 | 111 | return x 112 | -------------------------------------------------------------------------------- /DeepLab_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | import torchvision.models as models 6 | import numpy as np 7 | import math 8 | 9 | model_url = 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth' 10 | 11 | class Atrous_Bottleneck(nn.Module): 12 | expansion = 4 13 | 14 | def __init__(self, inplanes, planes, stride=1, rate=1, downsample=None): 15 | super(Atrous_Bottleneck, self).__init__() 16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 17 | self.bn1 = nn.BatchNorm2d(planes) 18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 19 | dilation=rate, padding=rate, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(planes * 4) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.downsample = downsample 25 | self.stride = stride 26 | 27 | def forward(self, x): 28 | residual = x 29 | 30 | out = self.conv1(x) 31 | out = self.bn1(out) 32 | out = self.relu(out) 33 | 34 | out = self.conv2(out) 35 | out = self.bn2(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv3(out) 39 | out = self.bn3(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | class Atrous_ResNet_features(nn.Module): 50 | 51 | def __init__(self, block, layers, pretrained=False): 52 | super(Atrous_ResNet_features, self).__init__() 53 | self.inplanes = 64 54 | 55 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 56 | bias=False) 57 | self.bn1 = nn.BatchNorm2d(64) 58 | self.relu = nn.ReLU(inplace=True) 59 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 60 | self.layer1 = self._make_layer(block, 64, layers[0], stride=1, rate=1) 61 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, rate=1) 62 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, rate=1) 63 | self.layer4 = self._make_MG_unit(block, 512, stride=1, rate=2) 64 | 65 | for m in self.modules(): 66 | if isinstance(m, nn.Conv2d): 67 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 68 | m.weight.data.normal_(0, math.sqrt(2. / n)) 69 | elif isinstance(m, nn.BatchNorm2d): 70 | m.weight.data.fill_(1) 71 | m.bias.data.zero_() 72 | 73 | if pretrained: 74 | print('load the pre-trained model.') 75 | resnet = models.resnet101(pretrained) 76 | self.conv1 = resnet.conv1 77 | self.bn1 = resnet.bn1 78 | self.layer1 = resnet.layer1 79 | self.layer2 = resnet.layer2 80 | 81 | def _make_layer(self, block, planes, blocks, stride=1, rate=1): 82 | downsample = None 83 | if stride != 1 or self.inplanes != planes * block.expansion: 84 | downsample = nn.Sequential( 85 | nn.Conv2d(self.inplanes, planes * block.expansion, 86 | kernel_size=1, stride=stride, bias=False), 87 | nn.BatchNorm2d(planes * block.expansion), 88 | ) 89 | 90 | layers = [] 91 | layers.append(block(self.inplanes, planes, stride, rate, downsample)) 92 | self.inplanes = planes * block.expansion 93 | for i in range(1, blocks): 94 | layers.append(block(self.inplanes, planes)) 95 | 96 | return nn.Sequential(*layers) 97 | 98 | def _make_MG_unit(self, block, planes, blocks=[1,2,4], stride=1, rate=1): 99 | downsample = None 100 | if stride != 1 or self.inplanes != planes * block.expansion: 101 | downsample = nn.Sequential( 102 | nn.Conv2d(self.inplanes, planes * block.expansion, 103 | kernel_size=1, stride=stride, bias=False), 104 | nn.BatchNorm2d(planes * block.expansion), 105 | ) 106 | 107 | layers = [] 108 | layers.append(block(self.inplanes, planes, stride, rate=blocks[0]*rate, downsample=downsample)) 109 | self.inplanes = planes * block.expansion 110 | for i in range(1, len(blocks)): 111 | layers.append(block(self.inplanes, planes, stride=1, rate=blocks[i]*rate)) 112 | 113 | return nn.Sequential(*layers) 114 | 115 | def forward(self, x): 116 | x = self.conv1(x) 117 | x = self.bn1(x) 118 | x = self.relu(x) 119 | x = self.maxpool(x) 120 | 121 | x = self.layer1(x) 122 | x = self.layer2(x) 123 | x = self.layer3(x) 124 | x = self.layer4(x) 125 | 126 | return x 127 | 128 | class Atrous_module(nn.Module): 129 | def __init__(self, inplanes, planes, rate): 130 | super(Atrous_module, self).__init__() 131 | self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=3, 132 | stride=1, padding=rate, dilation=rate) 133 | self.batch_norm = nn.BatchNorm2d(planes) 134 | 135 | def forward(self, x): 136 | x = self.atrous_convolution(x) 137 | x = self.batch_norm(x) 138 | 139 | return x 140 | 141 | class DeepLabv3(nn.Module): 142 | def __init__(self, num_classes, small=True, pretrained=False): 143 | super(DeepLabv3, self).__init__() 144 | block = Atrous_Bottleneck 145 | self.resnet_features = Atrous_ResNet_features(block, [3, 4, 23], pretrained) 146 | 147 | rates = [1, 6, 12, 18] 148 | self.aspp1 = Atrous_module(2048 , 256, rate=rates[0]) 149 | self.aspp2 = Atrous_module(2048 , 256, rate=rates[1]) 150 | self.aspp3 = Atrous_module(2048 , 256, rate=rates[2]) 151 | self.aspp4 = Atrous_module(2048 , 256, rate=rates[3]) 152 | self.image_pool = nn.Sequential(nn.AdaptiveMaxPool2d(1), 153 | nn.Conv2d(2048, 256, kernel_size=1)) 154 | 155 | self.fc1 = nn.Sequential(nn.Conv2d(1280, 256, kernel_size=1), 156 | nn.BatchNorm2d(256)) 157 | self.fc2 = nn.Conv2d(256, num_classes, kernel_size=1) 158 | 159 | def forward(self, x): 160 | x = self.resnet_features(x) 161 | x1 = self.aspp1(x) 162 | x2 = self.aspp2(x) 163 | x3 = self.aspp3(x) 164 | x4 = self.aspp4(x) 165 | x5 = self.image_pool(x) 166 | x5 = F.upsample(x5, size=x4.size()[2:], mode='nearest') 167 | 168 | x = torch.cat((x1, x2, x3, x4, x5), dim=1) 169 | x = self.fc1(x) 170 | x = self.fc2(x) 171 | x = F.upsample(x, scale_factor=(16,16), mode='bilinear') 172 | 173 | return x 174 | -------------------------------------------------------------------------------- /DeepLab_v3_plus.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.utils.model_zoo as model_zoo 5 | import torchvision.models as models 6 | import numpy as np 7 | import math 8 | 9 | model_url = 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth' 10 | 11 | class Atrous_Bottleneck(nn.Module): 12 | expansion = 4 13 | 14 | def __init__(self, inplanes, planes, stride=1, rate=1, downsample=None): 15 | super(Atrous_Bottleneck, self).__init__() 16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 17 | self.bn1 = nn.BatchNorm2d(planes) 18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 19 | dilation=rate, padding=rate, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(planes * 4) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.downsample = downsample 25 | self.stride = stride 26 | 27 | def forward(self, x): 28 | residual = x 29 | 30 | out = self.conv1(x) 31 | out = self.bn1(out) 32 | out = self.relu(out) 33 | 34 | out = self.conv2(out) 35 | out = self.bn2(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv3(out) 39 | out = self.bn3(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | class Atrous_ResNet_features(nn.Module): 50 | 51 | def __init__(self, block, layers, pretrained=False): 52 | super(Atrous_ResNet_features, self).__init__() 53 | self.inplanes = 64 54 | 55 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 56 | bias=False) 57 | self.bn1 = nn.BatchNorm2d(64) 58 | self.relu = nn.ReLU(inplace=True) 59 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 60 | self.layer1 = self._make_layer(block, 64, layers[0], stride=1, rate=1) 61 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, rate=1) 62 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, rate=1) 63 | self.layer4 = self._make_MG_unit(block, 512, stride=1, rate=2) 64 | 65 | for m in self.modules(): 66 | if isinstance(m, nn.Conv2d): 67 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 68 | m.weight.data.normal_(0, math.sqrt(2. / n)) 69 | elif isinstance(m, nn.BatchNorm2d): 70 | m.weight.data.fill_(1) 71 | m.bias.data.zero_() 72 | 73 | if pretrained: 74 | print('load the pre-trained model.') 75 | resnet = models.resnet101(pretrained) 76 | self.conv1 = resnet.conv1 77 | self.bn1 = resnet.bn1 78 | self.layer1 = resnet.layer1 79 | self.layer2 = resnet.layer2 80 | 81 | def _make_layer(self, block, planes, blocks, stride=1, rate=1): 82 | downsample = None 83 | if stride != 1 or self.inplanes != planes * block.expansion: 84 | downsample = nn.Sequential( 85 | nn.Conv2d(self.inplanes, planes * block.expansion, 86 | kernel_size=1, stride=stride, bias=False), 87 | nn.BatchNorm2d(planes * block.expansion), 88 | ) 89 | 90 | layers = [] 91 | layers.append(block(self.inplanes, planes, stride, rate, downsample)) 92 | self.inplanes = planes * block.expansion 93 | for i in range(1, blocks): 94 | layers.append(block(self.inplanes, planes)) 95 | 96 | return nn.Sequential(*layers) 97 | 98 | def _make_MG_unit(self, block, planes, blocks=[1,2,4], stride=1, rate=1): 99 | downsample = None 100 | if stride != 1 or self.inplanes != planes * block.expansion: 101 | downsample = nn.Sequential( 102 | nn.Conv2d(self.inplanes, planes * block.expansion, 103 | kernel_size=1, stride=stride, bias=False), 104 | nn.BatchNorm2d(planes * block.expansion), 105 | ) 106 | 107 | layers = [] 108 | layers.append(block(self.inplanes, planes, stride, rate=blocks[0]*rate, downsample=downsample)) 109 | self.inplanes = planes * block.expansion 110 | for i in range(1, len(blocks)): 111 | layers.append(block(self.inplanes, planes, stride=1, rate=blocks[i]*rate)) 112 | 113 | return nn.Sequential(*layers) 114 | 115 | def forward(self, x): 116 | x = self.conv1(x) 117 | x = self.bn1(x) 118 | x = self.relu(x) 119 | x = self.maxpool(x) 120 | 121 | x = self.layer1(x) 122 | conv2 = x 123 | x = self.layer2(x) 124 | x = self.layer3(x) 125 | x = self.layer4(x) 126 | 127 | return x, conv2 128 | 129 | class Atrous_module(nn.Module): 130 | def __init__(self, inplanes, planes, rate): 131 | super(Atrous_module, self).__init__() 132 | self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=3, 133 | stride=1, padding=rate, dilation=rate) 134 | self.batch_norm = nn.BatchNorm2d(planes) 135 | 136 | def forward(self, x): 137 | x = self.atrous_convolution(x) 138 | x = self.batch_norm(x) 139 | 140 | return x 141 | 142 | class DeepLabv3_plus(nn.Module): 143 | def __init__(self, num_classes, small=True, pretrained=False): 144 | super(DeepLabv3_plus, self).__init__() 145 | block = Atrous_Bottleneck 146 | self.resnet_features = Atrous_ResNet_features(block, [3, 4, 23], pretrained) 147 | 148 | rates = [1, 6, 12, 18] 149 | self.aspp1 = Atrous_module(2048 , 256, rate=rates[0]) 150 | self.aspp2 = Atrous_module(2048 , 256, rate=rates[1]) 151 | self.aspp3 = Atrous_module(2048 , 256, rate=rates[2]) 152 | self.aspp4 = Atrous_module(2048 , 256, rate=rates[3]) 153 | self.image_pool = nn.Sequential(nn.AdaptiveMaxPool2d(1), 154 | nn.Conv2d(2048, 256, kernel_size=1)) 155 | 156 | self.fc1 = nn.Sequential(nn.Conv2d(1280, 256, kernel_size=1), 157 | nn.BatchNorm2d(256)) 158 | 159 | self.reduce_conv2 = nn.Sequential(nn.Conv2d(256, 48, kernel_size=1), 160 | nn.BatchNorm2d(48)) 161 | self.last_conv = nn.Sequential(nn.Conv2d(304, 256, kernel_size=3, stride=1, padding=1), 162 | nn.BatchNorm2d(256), 163 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), 164 | nn.BatchNorm2d(256), 165 | nn.Conv2d(256, num_classes, kernel_size=1, stride=1)) 166 | 167 | 168 | def forward(self, x): 169 | x, conv2 = self.resnet_features(x) 170 | x1 = self.aspp1(x) 171 | x2 = self.aspp2(x) 172 | x3 = self.aspp3(x) 173 | x4 = self.aspp4(x) 174 | x5 = self.image_pool(x) 175 | x5 = F.upsample(x5, size=x4.size()[2:], mode='nearest') 176 | 177 | x = torch.cat((x1, x2, x3, x4, x5), dim=1) 178 | x = self.fc1(x) 179 | x = F.upsample(x, scale_factor=(4,4), mode='bilinear') 180 | 181 | low_lebel_features = self.reduce_conv2(conv2) 182 | 183 | x = torch.cat((x, low_lebel_features), dim=1) 184 | x = self.last_conv(x) 185 | x = F.upsample(x, scale_factor=(4, 4), mode='bilinear') 186 | 187 | return x 188 | -------------------------------------------------------------------------------- /MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) [2018] [Doi Kento] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepLab family 2 | 3 | ## Papers 4 | 5 | * DeepLab v2 [DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs](https://arxiv.org/abs/1606.00915) 6 | * DeepLab v3 [Rethinking Atrous Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1706.05587) 7 | * DeepLab v3+ [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) 8 | 9 | ## Contents 10 | 11 | * DeepLab v2 (VGG, ResNet101) 12 | * DeepLab v3 (ResNet101) 13 | * DeepLab v3+ (ResNet101) 14 | 15 | (DeepLab v2 (VGG16) is a little different from original implementation!!) 16 | 17 | ## description 18 | 19 | Network | description 20 | :-- | :-- 21 | DeepLab v2 (VGG, FOV)| VGG16 + atrous convolution 22 | DeepLab v2 (VGG, ASPP) | VGG16 + atrous spatial pyramid pooling 23 | DeepLab v2 (ResNet, FOV)| ResNet101 + atrous convolution 24 | DeepLab v2 (ResNet, ASPP) | ResNet101 + atrous spatial pyramid pooling 25 | DeepLab v3 | ResNet101 + atrous convolution in cascadea and in parallel 26 | DeepLab v3+ | DeepLab v3 + good decoder (and Xception) 27 | 28 | ## LICENSE 29 | 30 | * MIT 31 | * see LICENSE 32 | -------------------------------------------------------------------------------- /poly_scheduler.py: -------------------------------------------------------------------------------- 1 | def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1, 2 | max_iter=100, power=0.9): 3 | """Polynomial decay of learning rate 4 | :param init_lr is base learning rate 5 | :param iter is a current iteration 6 | :param lr_decay_iter how frequently decay occurs, default is 1 7 | :param max_iter is number of maximum iterations 8 | :param power is a polymomial power 9 | 10 | """ 11 | if iter % lr_decay_iter or iter > max_iter: 12 | return optimizer 13 | 14 | lr = init_lr*(1 - iter/max_iter)**power 15 | for param_group in optimizer.param_groups: 16 | param_group['lr'] = lr 17 | 18 | return lr 19 | --------------------------------------------------------------------------------