├── Attention ├── AFF.py ├── ANN.py ├── CBAM.py ├── CCNet.py ├── ECA-Net.py ├── GAM.py ├── GlobalContextBlock.py ├── NAM.py ├── NonLocalBlock.py ├── README.md ├── SENet.py ├── SEvariants.py └── TripletAttention.py ├── ClassicNetwork ├── AlexNet.py ├── DenseNet.py ├── Efficientnet.py ├── InceptionV1.py ├── InceptionV2.py ├── InceptionV3.py ├── InceptionV4.py ├── README.md ├── ResNeXt.py ├── ResNet.py ├── VGGNet.py └── repVGGNet.py ├── FaceDetectorAndRecognition ├── FaceBoxes.py ├── LFFD.py ├── README.md └── VarGFaceNet.py ├── HumanPoseEstimation ├── Hourglass.py ├── LPN.py ├── README.md ├── SimpleBaseline.py └── context_block.py ├── InstanceSegmentation ├── PolarMask.py └── README.md ├── Lightweight ├── GhostNet.py ├── MixNet.py ├── MobileNetV1.py ├── MobileNetV2.py ├── MobileNetV3.py ├── MobileNetXt.py ├── README.md ├── ShuffleNet.py ├── ShuffleNetV2.py ├── SqueezeNet.py └── Xception.py ├── ObjectDetection ├── ASFF.py ├── CenterNet.py ├── CornerNet.py ├── FCOS.py ├── FPN.py ├── FSAF.py ├── FisheyeMODNet.py ├── FoveaBox.py ├── README.md ├── RetinaNet.py ├── SSD.py ├── VoVNet.py ├── VoVNetV2.py ├── YOLO.py ├── YOLO_Nano.py ├── YOLOv2.py └── YOLOv3.py ├── Others ├── DynamicReLU.py └── PyramidalConvolution.py ├── PortraitSegmentation └── SINet.py ├── README.md ├── SemanticSegmentation ├── DeeplabV3Plus.py ├── ENet.py ├── FCN.py ├── FastSCNN.py ├── FisheyeMODNet.py ├── ICNet.py ├── LEDnet.py ├── LRNnet.py ├── LWnet.py ├── README.md ├── SegNet.py └── Unet.py ├── Utils └── utils.py └── requirements.txt /Attention/AFF.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2021/11/17 10:29 4 | # @Author : liumin 5 | # @File : AFF.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | 11 | class MS_CAM(nn.Module): 12 | def __init__(self, channel, ratio = 16): 13 | super(MS_CAM, self).__init__() 14 | mid_channel = channel // ratio 15 | self.global_att = nn.Sequential( 16 | nn.AdaptiveAvgPool2d(1), 17 | nn.Conv2d(in_channels=channel, out_channels=mid_channel, kernel_size=1, stride=1, padding=0), 18 | nn.BatchNorm2d(mid_channel), 19 | nn.ReLU(inplace=True), 20 | nn.Conv2d(in_channels=mid_channel, out_channels=channel, kernel_size=1, stride=1, padding=0), 21 | nn.BatchNorm2d(channel), 22 | ) 23 | 24 | self.local_att = nn.Sequential( 25 | nn.Conv2d(in_channels=channel, out_channels=mid_channel, kernel_size=1, stride=1, padding=0), 26 | nn.BatchNorm2d(mid_channel), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(in_channels=mid_channel, out_channels=channel, kernel_size=1, stride=1, padding=0), 29 | nn.BatchNorm2d(channel), 30 | ) 31 | 32 | self.sigmoid = nn.Sigmoid() 33 | 34 | def forward(self, x): 35 | b, c, _, _ = x.size() 36 | g_x = self.global_att(x) 37 | l_x = self.local_att(x) 38 | w = self.sigmoid(l_x * g_x.expand_as(l_x)) 39 | return w * x 40 | 41 | 42 | class AFF(nn.Module): 43 | def __init__(self): 44 | super(AFF, self).__init__() 45 | 46 | 47 | def forward(self, x): 48 | pass 49 | 50 | 51 | if __name__=='__main__': 52 | model = MS_CAM(16) 53 | print(model) 54 | 55 | input = torch.randn(2, 16, 64, 64) 56 | out = model(input) 57 | print(out.shape) -------------------------------------------------------------------------------- /Attention/ANN.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/3 15:08 4 | # @Author : liumin 5 | # @File : ANN.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | import numpy as np 11 | 12 | class SpatialPyramidPooling(nn.Module): 13 | def __init__(self, output_sizes = [1, 3, 6, 8]): 14 | super(SpatialPyramidPooling, self).__init__() 15 | 16 | self.pool_layers = nn.ModuleList() 17 | for output_size in output_sizes: 18 | self.pool_layers.append(nn.AdaptiveMaxPool2d(output_size=output_size)) 19 | 20 | def forward(self, x): 21 | outputs = [] 22 | for pool_layer in self.pool_layers: 23 | outputs.append(pool_layer(x).flatten()) 24 | out = torch.cat(outputs, dim=0) 25 | return out 26 | 27 | class APNB(nn.Module): 28 | def __init__(self, channel): 29 | super(APNB, self).__init__() 30 | self.inter_channel = channel // 2 31 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 32 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 33 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 34 | self.softmax = nn.Softmax(dim=1) 35 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 36 | 37 | def forward(self, x): 38 | # [N, C, H , W] 39 | b, c, h, w = x.size() 40 | # [N, C/2, H * W] 41 | x_phi = self.conv_phi(x).view(b, c, -1) 42 | # [N, H * W, C/2] 43 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() 44 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() 45 | # [N, H * W, H * W] 46 | mul_theta_phi = torch.matmul(x_theta, x_phi) 47 | mul_theta_phi = self.softmax(mul_theta_phi) 48 | # [N, H * W, C/2] 49 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 50 | # [N, C/2, H, W] 51 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 52 | # [N, C, H , W] 53 | mask = self.conv_mask(mul_theta_phi_g) 54 | out = mask + x 55 | return out 56 | 57 | 58 | class AFNB(nn.Module): 59 | def __init__(self, channel): 60 | super(AFNB, self).__init__() 61 | self.inter_channel = channel // 2 62 | self.output_sizes = [1, 3, 6, 8] 63 | self.sample_dim = np.sum([size*size for size in self.output_sizes]) 64 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 65 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 66 | self.conv_theta_spp = SpatialPyramidPooling(self.output_sizes) 67 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 68 | self.conv_g_spp = SpatialPyramidPooling(self.output_sizes) 69 | self.softmax = nn.Softmax(dim=1) 70 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 71 | 72 | def forward(self, x): 73 | # [N, C, H , W] 74 | b, c, h, w = x.size() 75 | # [N, C/2, H * W] 76 | x_phi = self.conv_phi(x).view(b, c, -1) 77 | # [N, H * W, C/2] 78 | xxx = self.conv_theta_spp(self.conv_theta(x)) 79 | print(xxx.shape) 80 | x_theta = self.conv_theta_spp(self.conv_theta(x)).view(b, self.sample_dim, -1).permute(0, 2, 1).contiguous() 81 | x_g = self.conv_g_spp(self.conv_g(x)).view(b, self.sample_dim, -1).permute(0, 2, 1).contiguous() 82 | # [N, H * W, H * W] 83 | mul_theta_phi = torch.matmul(x_theta, x_phi) 84 | mul_theta_phi = self.softmax(mul_theta_phi) 85 | # [N, H * W, C/2] 86 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 87 | # [N, C/2, H, W] 88 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 89 | # [N, C, H , W] 90 | mask = self.conv_mask(mul_theta_phi_g) 91 | out = mask + x 92 | return out 93 | 94 | if __name__=='__main__': 95 | model = AFNB(channel=16) 96 | print(model) 97 | 98 | input = torch.randn(1, 16, 64, 64) 99 | out = model(input) 100 | print(out.shape) -------------------------------------------------------------------------------- /Attention/CBAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class ChannelAttentionModule(nn.Module): 7 | def __init__(self, channel, ratio=16): 8 | super(ChannelAttentionModule, self).__init__() 9 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 10 | self.max_pool = nn.AdaptiveMaxPool2d(1) 11 | 12 | self.shared_MLP = nn.Sequential( 13 | nn.Conv2d(channel, channel // ratio, 1, bias=False), 14 | nn.ReLU(), 15 | nn.Conv2d(channel // ratio, channel, 1, bias=False) 16 | ) 17 | self.sigmoid = nn.Sigmoid() 18 | 19 | def forward(self, x): 20 | avgout = self.shared_MLP(self.avg_pool(x)) 21 | maxout = self.shared_MLP(self.max_pool(x)) 22 | return self.sigmoid(avgout + maxout) 23 | 24 | 25 | class SpatialAttentionModule(nn.Module): 26 | def __init__(self): 27 | super(SpatialAttentionModule, self).__init__() 28 | self.conv2d = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3) 29 | self.sigmoid = nn.Sigmoid() 30 | 31 | def forward(self, x): 32 | avgout = torch.mean(x, dim=1, keepdim=True) 33 | maxout, _ = torch.max(x, dim=1, keepdim=True) 34 | out = torch.cat([avgout, maxout], dim=1) 35 | out = self.sigmoid(self.conv2d(out)) 36 | return out 37 | 38 | 39 | class CBAM(nn.Module): 40 | def __init__(self, channel): 41 | super(CBAM, self).__init__() 42 | self.channel_attention = ChannelAttentionModule(channel) 43 | self.spatial_attention = SpatialAttentionModule() 44 | 45 | def forward(self, x): 46 | out = self.channel_attention(x) * x 47 | out = self.spatial_attention(out) * out 48 | return out 49 | 50 | 51 | class ResBlock_CBAM(nn.Module): 52 | def __init__(self,in_places, places, stride=1,downsampling=False, expansion = 4): 53 | super(ResBlock_CBAM,self).__init__() 54 | self.expansion = expansion 55 | self.downsampling = downsampling 56 | 57 | self.bottleneck = nn.Sequential( 58 | nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False), 59 | nn.BatchNorm2d(places), 60 | nn.ReLU(inplace=True), 61 | nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False), 62 | nn.BatchNorm2d(places), 63 | nn.ReLU(inplace=True), 64 | nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False), 65 | nn.BatchNorm2d(places*self.expansion), 66 | ) 67 | self.cbam = CBAM(channel=places*self.expansion) 68 | 69 | if self.downsampling: 70 | self.downsample = nn.Sequential( 71 | nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False), 72 | nn.BatchNorm2d(places*self.expansion) 73 | ) 74 | self.relu = nn.ReLU(inplace=True) 75 | 76 | def forward(self, x): 77 | residual = x 78 | out = self.bottleneck(x) 79 | out = self.cbam(out) 80 | if self.downsampling: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | return out 86 | 87 | if __name__=='__main__': 88 | model = ResBlock_CBAM(in_places=16, places=4) 89 | print(model) 90 | 91 | input = torch.randn(1, 16, 64, 64) 92 | out = model(input) 93 | print(out.shape) 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /Attention/CCNet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/3 9:56 4 | # @Author : liumin 5 | # @File : CCNet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | 11 | def INF(B, H, W): 12 | return -torch.diag(torch.tensor(float("inf")).repeat(H), 0).unsqueeze(0).repeat(B * W, 1, 1) 13 | 14 | 15 | class CrissCrossAttention(nn.Module): 16 | """ Criss-Cross Attention Module""" 17 | 18 | def __init__(self, in_dim): 19 | super(CrissCrossAttention, self).__init__() 20 | self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1) 21 | self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1) 22 | self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) 23 | self.softmax = nn.Softmax(dim=3) 24 | self.INF = INF 25 | self.gamma = nn.Parameter(torch.zeros(1)) 26 | 27 | def forward(self, x): 28 | m_batchsize, _, height, width = x.size() 29 | proj_query = self.query_conv(x) 30 | proj_query_H = proj_query.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height).permute(0, 2, 1) 31 | proj_query_W = proj_query.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width).permute(0, 2, 1) 32 | proj_key = self.key_conv(x) 33 | proj_key_H = proj_key.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height) 34 | proj_key_W = proj_key.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width) 35 | proj_value = self.value_conv(x) 36 | proj_value_H = proj_value.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height) 37 | proj_value_W = proj_value.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width) 38 | energy_H = (torch.bmm(proj_query_H, proj_key_H) + self.INF(m_batchsize, height, width)).view(m_batchsize, width, 39 | height, height).permute(0, 2, 1, 3) 40 | energy_W = torch.bmm(proj_query_W, proj_key_W).view(m_batchsize, height, width, width) 41 | concate = self.softmax(torch.cat([energy_H, energy_W], 3)) 42 | 43 | att_H = concate[:, :, :, 0:height].permute(0, 2, 1, 3).contiguous().view(m_batchsize * width, height, height) 44 | 45 | att_W = concate[:, :, :, height:height + width].contiguous().view(m_batchsize * height, width, width) 46 | out_H = torch.bmm(proj_value_H, att_H.permute(0, 2, 1)).view(m_batchsize, width, -1, height).permute(0, 2, 3, 1) 47 | out_W = torch.bmm(proj_value_W, att_W.permute(0, 2, 1)).view(m_batchsize, height, -1, width).permute(0, 2, 1, 3) 48 | return self.gamma * (out_H + out_W) + x 49 | 50 | 51 | if __name__=='__main__': 52 | model = CrissCrossAttention(16) 53 | print(model) 54 | 55 | input = torch.randn(1, 16, 64, 64) 56 | out = model(input) 57 | print(out.shape) -------------------------------------------------------------------------------- /Attention/ECA-Net.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/10/10 16:45 4 | # @Author : liumin 5 | # @File : ECA-Net.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | from math import log 11 | 12 | 13 | def Conv1(in_planes, places, stride=2): 14 | return nn.Sequential( 15 | nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False), 16 | nn.BatchNorm2d(places), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 19 | ) 20 | 21 | class SE_Module(nn.Module): 22 | def __init__(self, channel,ratio = 16): 23 | super(SE_Module, self).__init__() 24 | self.squeeze = nn.AdaptiveAvgPool2d(1) 25 | self.excitation = nn.Sequential( 26 | nn.Linear(in_features=channel, out_features=channel // ratio), 27 | nn.ReLU(inplace=True), 28 | nn.Linear(in_features=channel // ratio, out_features=channel), 29 | nn.Sigmoid() 30 | ) 31 | def forward(self, x): 32 | b, c, _, _ = x.size() 33 | y = self.squeeze(x).view(b, c) 34 | z = self.excitation(y).view(b, c, 1, 1) 35 | return x * z.expand_as(x) 36 | 37 | class ECA_Module(nn.Module): 38 | def __init__(self, channel,gamma=2, b=1): 39 | super(ECA_Module, self).__init__() 40 | self.gamma = gamma 41 | self.b = b 42 | t = int(abs(log(channel, 2) + self.b) / self.gamma) 43 | k = t if t % 2 else t + 1 44 | 45 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 46 | self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=k//2, bias=False) 47 | self.sigmoid = nn.Sigmoid() 48 | 49 | def forward(self, x): 50 | b, c, _, _ = x.size() 51 | y = self.avg_pool(x) 52 | y = self.conv(y.squeeze(-1).transpose(-1,-2)) 53 | y = y.transpose(-1,-2).unsqueeze(-1) 54 | y = self.sigmoid(y) 55 | return x * y.expand_as(x) 56 | 57 | class ECA_ResNetBlock(nn.Module): 58 | def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4): 59 | super(ECA_ResNetBlock,self).__init__() 60 | self.expansion = expansion 61 | self.downsampling = downsampling 62 | 63 | self.bottleneck = nn.Sequential( 64 | nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False), 65 | nn.BatchNorm2d(places), 66 | nn.ReLU(inplace=True), 67 | nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False), 68 | nn.BatchNorm2d(places), 69 | nn.ReLU(inplace=True), 70 | nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False), 71 | nn.BatchNorm2d(places*self.expansion), 72 | ) 73 | 74 | if self.downsampling: 75 | self.downsample = nn.Sequential( 76 | nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False), 77 | nn.BatchNorm2d(places*self.expansion) 78 | ) 79 | self.relu = nn.ReLU(inplace=True) 80 | 81 | def forward(self, x): 82 | residual = x 83 | out = self.bottleneck(x) 84 | 85 | if self.downsampling: 86 | residual = self.downsample(x) 87 | 88 | out += residual 89 | out = self.relu(out) 90 | return out 91 | 92 | class ECA_ResNet(nn.Module): 93 | def __init__(self,blocks, num_classes=1000, expansion = 4): 94 | super(ECA_ResNet,self).__init__() 95 | self.expansion = expansion 96 | 97 | self.conv1 = Conv1(in_planes = 3, places= 64) 98 | 99 | self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1) 100 | self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2) 101 | self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2) 102 | self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2) 103 | 104 | self.avgpool = nn.AvgPool2d(7, stride=1) 105 | self.fc = nn.Linear(2048,num_classes) 106 | 107 | for m in self.modules(): 108 | if isinstance(m, nn.Conv2d): 109 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 110 | elif isinstance(m, nn.BatchNorm2d): 111 | nn.init.constant_(m.weight, 1) 112 | nn.init.constant_(m.bias, 0) 113 | 114 | def make_layer(self, in_places, places, block, stride): 115 | layers = [] 116 | layers.append(ECA_ResNetBlock(in_places, places,stride, downsampling =True)) 117 | for i in range(1, block): 118 | layers.append(ECA_ResNetBlock(places*self.expansion, places)) 119 | 120 | return nn.Sequential(*layers) 121 | 122 | 123 | def forward(self, x): 124 | x = self.conv1(x) 125 | 126 | x = self.layer1(x) 127 | x = self.layer2(x) 128 | x = self.layer3(x) 129 | x = self.layer4(x) 130 | 131 | x = self.avgpool(x) 132 | x = x.view(x.size(0), -1) 133 | x = self.fc(x) 134 | return x 135 | 136 | def ECA_ResNet50(): 137 | return ECA_ResNet([3, 4, 6, 3]) 138 | 139 | if __name__=='__main__': 140 | model = ECA_ResNet50() 141 | print(model) 142 | 143 | input = torch.randn(1, 3, 224, 224) 144 | out = model(input) 145 | print(out.shape) 146 | -------------------------------------------------------------------------------- /Attention/GAM.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2022/1/17 14:18 4 | # @Author : liumin 5 | # @File : GAM.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | 11 | class GAM(nn.Module): 12 | def __init__(self, channels, rate=4): 13 | super(GAM, self).__init__() 14 | mid_channels = channels // rate 15 | 16 | self.channel_attention = nn.Sequential( 17 | nn.Linear(channels, mid_channels), 18 | nn.ReLU(inplace=True), 19 | nn.Linear(mid_channels, channels) 20 | ) 21 | 22 | self.spatial_attention = nn.Sequential( 23 | nn.Conv2d(channels, mid_channels, kernel_size=7, stride=1, padding=3), 24 | nn.BatchNorm2d(mid_channels), 25 | nn.ReLU(inplace=True), 26 | nn.Conv2d(mid_channels, channels, kernel_size=7, stride=1, padding=3), 27 | nn.BatchNorm2d(channels) 28 | ) 29 | 30 | def forward(self, x): 31 | b, c, h, w = x.shape 32 | # channel attention 33 | x_permute = x.permute(0, 2, 3, 1).view(b, -1, c) 34 | x_att_permute = self.channel_attention(x_permute).view(b, h, w, c) 35 | x_channel_att = x_att_permute.permute(0, 3, 1, 2) 36 | 37 | x = x * x_channel_att 38 | # spatial attention 39 | x_spatial_att = self.spatial_attention(x).sigmoid() 40 | out = x * x_spatial_att 41 | return out 42 | 43 | 44 | if __name__ == '__main__': 45 | x = torch.randn(1, 16, 64, 64) 46 | b, c, h, w = x.shape 47 | net = GAM(channels=c) 48 | out = net(x) 49 | print(out.shape) -------------------------------------------------------------------------------- /Attention/GlobalContextBlock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class GlobalContextBlock(nn.Module): 7 | def __init__(self, 8 | inplanes, 9 | ratio, 10 | pooling_type='att', 11 | fusion_types=('channel_add', )): 12 | super(GlobalContextBlock, self).__init__() 13 | assert pooling_type in ['avg', 'att'] 14 | assert isinstance(fusion_types, (list, tuple)) 15 | valid_fusion_types = ['channel_add', 'channel_mul'] 16 | assert all([f in valid_fusion_types for f in fusion_types]) 17 | assert len(fusion_types) > 0, 'at least one fusion should be used' 18 | self.inplanes = inplanes 19 | self.ratio = ratio 20 | self.planes = int(inplanes * ratio) 21 | self.pooling_type = pooling_type 22 | self.fusion_types = fusion_types 23 | if pooling_type == 'att': 24 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) 25 | self.softmax = nn.Softmax(dim=2) 26 | else: 27 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 28 | if 'channel_add' in fusion_types: 29 | self.channel_add_conv = nn.Sequential( 30 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 31 | nn.LayerNorm([self.planes, 1, 1]), 32 | nn.ReLU(inplace=True), # yapf: disable 33 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 34 | else: 35 | self.channel_add_conv = None 36 | if 'channel_mul' in fusion_types: 37 | self.channel_mul_conv = nn.Sequential( 38 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 39 | nn.LayerNorm([self.planes, 1, 1]), 40 | nn.ReLU(inplace=True), # yapf: disable 41 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 42 | else: 43 | self.channel_mul_conv = None 44 | 45 | def spatial_pool(self, x): 46 | batch, channel, height, width = x.size() 47 | if self.pooling_type == 'att': 48 | input_x = x 49 | # [N, C, H * W] 50 | input_x = input_x.view(batch, channel, height * width) 51 | # [N, 1, C, H * W] 52 | input_x = input_x.unsqueeze(1) 53 | # [N, 1, H, W] 54 | context_mask = self.conv_mask(x) 55 | # [N, 1, H * W] 56 | context_mask = context_mask.view(batch, 1, height * width) 57 | # [N, 1, H * W] 58 | context_mask = self.softmax(context_mask) 59 | # [N, 1, H * W, 1] 60 | context_mask = context_mask.unsqueeze(-1) 61 | # [N, 1, C, 1] 62 | context = torch.matmul(input_x, context_mask) 63 | # [N, C, 1, 1] 64 | context = context.view(batch, channel, 1, 1) 65 | else: 66 | # [N, C, 1, 1] 67 | context = self.avg_pool(x) 68 | 69 | return context 70 | 71 | def forward(self, x): 72 | # [N, C, 1, 1] 73 | context = self.spatial_pool(x) 74 | 75 | out = x 76 | if self.channel_mul_conv is not None: 77 | # [N, C, 1, 1] 78 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) 79 | out = out * channel_mul_term 80 | if self.channel_add_conv is not None: 81 | # [N, C, 1, 1] 82 | channel_add_term = self.channel_add_conv(context) 83 | out = out + channel_add_term 84 | 85 | return out 86 | 87 | 88 | if __name__=='__main__': 89 | model = GlobalContextBlock(inplanes=16, ratio=0.25) 90 | print(model) 91 | 92 | input = torch.randn(1, 16, 64, 64) 93 | out = model(input) 94 | print(out.shape) -------------------------------------------------------------------------------- /Attention/NAM.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2021/12/7 11:06 4 | # @Author : liumin 5 | # @File : NAM.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | """ 11 | NAM: Normalization-based Attention Module 12 | PDF: https://arxiv.org/pdf/2111.12419.pdf 13 | """ 14 | 15 | class NAM(nn.Module): 16 | def __init__(self, channel): 17 | super(NAM, self).__init__() 18 | self.channel = channel 19 | self.bn2 = nn.BatchNorm2d(self.channel, affine=True) 20 | self.sigmoid = nn.Sigmoid() 21 | 22 | def forward(self, x): 23 | residual = x 24 | x = self.bn2(x) 25 | weight_bn = self.bn2.weight.data.abs() / torch.sum(self.bn2.weight.data.abs()) 26 | x = x.permute(0, 2, 3, 1).contiguous() 27 | x = torch.mul(weight_bn, x) 28 | x = x.permute(0, 3, 1, 2).contiguous() 29 | out = self.sigmoid(x) * residual # 30 | return out 31 | 32 | 33 | if __name__=='__main__': 34 | model = NAM(channel=16) 35 | print(model) 36 | 37 | input = torch.randn(1, 16, 64, 64) 38 | out = model(input) 39 | print(out.shape) -------------------------------------------------------------------------------- /Attention/NonLocalBlock.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class NonLocalBlock(nn.Module): 7 | def __init__(self, channel): 8 | super(NonLocalBlock, self).__init__() 9 | self.inter_channel = channel // 2 10 | self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False) 11 | self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 12 | self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False) 13 | self.softmax = nn.Softmax(dim=1) 14 | self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False) 15 | 16 | def forward(self, x): 17 | # [N, C, H , W] 18 | b, c, h, w = x.size() 19 | # [N, C/2, H * W] 20 | x_phi = self.conv_phi(x).view(b, c, -1) 21 | # [N, H * W, C/2] 22 | x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous() 23 | x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous() 24 | # [N, H * W, H * W] 25 | mul_theta_phi = torch.matmul(x_theta, x_phi) 26 | mul_theta_phi = self.softmax(mul_theta_phi) 27 | # [N, H * W, C/2] 28 | mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g) 29 | # [N, C/2, H, W] 30 | mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w) 31 | # [N, C, H , W] 32 | mask = self.conv_mask(mul_theta_phi_g) 33 | out = mask + x 34 | return out 35 | 36 | 37 | if __name__=='__main__': 38 | model = NonLocalBlock(channel=16) 39 | print(model) 40 | 41 | input = torch.randn(1, 16, 64, 64) 42 | out = model(input) 43 | print(out.shape) -------------------------------------------------------------------------------- /Attention/README.md: -------------------------------------------------------------------------------- 1 | # Attention 2 | 3 | 4 | 5 | **SE Net** 6 | 7 | Squeeze-and-Excitation Networks,2017 8 | 9 | https://arxiv.org/pdf/1709.01507.pdf 10 | 11 | https://liumin.blog.csdn.net/article/details/104370739 12 | 13 | 14 | 15 | **scSE** 16 | 17 | Concurrent Spatial and Channel Squeeze & Excitation in Fully Convolutional Networks, 2018 18 | https://arxiv.org/pdf/1803.02579v2.pdf 19 | 20 | https://liumin.blog.csdn.net/article/details/104371065 21 | 22 | 23 | 24 | **NL Net** 25 | 26 | Non-Local neural networks,2018 27 | https://arxiv.org/pdf/1711.07971.pdf 28 | 29 | https://liumin.blog.csdn.net/article/details/104371212 30 | 31 | 32 | 33 | **GCNet** 34 | 35 | GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond, 2019 36 | https://arxiv.org/pdf/1904.11492.pdf 37 | 38 | https://liumin.blog.csdn.net/article/details/104375585 39 | 40 | 41 | 42 | **CBAM** 43 | 44 | CBAM: Convolutional Block Attention Module, 2018 45 | https://arxiv.org/pdf/1807.06521.pdf 46 | 47 | https://liumin.blog.csdn.net/article/details/104371273 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Attention/SENet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | def Conv1(in_planes, places, stride=2): 7 | return nn.Sequential( 8 | nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False), 9 | nn.BatchNorm2d(places), 10 | nn.ReLU(inplace=True), 11 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 12 | ) 13 | 14 | class SE_Module(nn.Module): 15 | def __init__(self, channel,ratio = 16): 16 | super(SE_Module, self).__init__() 17 | self.squeeze = nn.AdaptiveAvgPool2d(1) 18 | self.excitation = nn.Sequential( 19 | nn.Linear(in_features=channel, out_features=channel // ratio), 20 | nn.ReLU(inplace=True), 21 | nn.Linear(in_features=channel // ratio, out_features=channel), 22 | nn.Sigmoid() 23 | ) 24 | def forward(self, x): 25 | b, c, _, _ = x.size() 26 | y = self.squeeze(x).view(b, c) 27 | z = self.excitation(y).view(b, c, 1, 1) 28 | return x * z.expand_as(x) 29 | 30 | 31 | class SE_ResNetBlock(nn.Module): 32 | def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4): 33 | super(SE_ResNetBlock,self).__init__() 34 | self.expansion = expansion 35 | self.downsampling = downsampling 36 | 37 | self.bottleneck = nn.Sequential( 38 | nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False), 39 | nn.BatchNorm2d(places), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False), 42 | nn.BatchNorm2d(places), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False), 45 | nn.BatchNorm2d(places*self.expansion), 46 | ) 47 | 48 | if self.downsampling: 49 | self.downsample = nn.Sequential( 50 | nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False), 51 | nn.BatchNorm2d(places*self.expansion) 52 | ) 53 | self.relu = nn.ReLU(inplace=True) 54 | 55 | def forward(self, x): 56 | residual = x 57 | out = self.bottleneck(x) 58 | 59 | if self.downsampling: 60 | residual = self.downsample(x) 61 | 62 | out += residual 63 | out = self.relu(out) 64 | return out 65 | 66 | class SE_ResNet(nn.Module): 67 | def __init__(self,blocks, num_classes=1000, expansion = 4): 68 | super(SE_ResNet,self).__init__() 69 | self.expansion = expansion 70 | 71 | self.conv1 = Conv1(in_planes = 3, places= 64) 72 | 73 | self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1) 74 | self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2) 75 | self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2) 76 | self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2) 77 | 78 | self.avgpool = nn.AvgPool2d(7, stride=1) 79 | self.fc = nn.Linear(2048,num_classes) 80 | 81 | for m in self.modules(): 82 | if isinstance(m, nn.Conv2d): 83 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 84 | elif isinstance(m, nn.BatchNorm2d): 85 | nn.init.constant_(m.weight, 1) 86 | nn.init.constant_(m.bias, 0) 87 | 88 | def make_layer(self, in_places, places, block, stride): 89 | layers = [] 90 | layers.append(SE_ResNetBlock(in_places, places,stride, downsampling =True)) 91 | for i in range(1, block): 92 | layers.append(SE_ResNetBlock(places*self.expansion, places)) 93 | 94 | return nn.Sequential(*layers) 95 | 96 | 97 | def forward(self, x): 98 | x = self.conv1(x) 99 | 100 | x = self.layer1(x) 101 | x = self.layer2(x) 102 | x = self.layer3(x) 103 | x = self.layer4(x) 104 | 105 | x = self.avgpool(x) 106 | x = x.view(x.size(0), -1) 107 | x = self.fc(x) 108 | return x 109 | 110 | def SE_ResNet50(): 111 | return SE_ResNet([3, 4, 6, 3]) 112 | 113 | if __name__=='__main__': 114 | model = SE_ResNet50() 115 | print(model) 116 | 117 | input = torch.randn(1, 3, 224, 224) 118 | out = model(input) 119 | print(out.shape) 120 | 121 | 122 | 123 | 124 | 125 | -------------------------------------------------------------------------------- /Attention/SEvariants.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class cSE_Module(nn.Module): 7 | def __init__(self, channel,ratio = 16): 8 | super(cSE_Module, self).__init__() 9 | self.squeeze = nn.AdaptiveAvgPool2d(1) 10 | self.excitation = nn.Sequential( 11 | nn.Linear(in_features=channel, out_features=channel // ratio), 12 | nn.ReLU(inplace=True), 13 | nn.Linear(in_features=channel // ratio, out_features=channel), 14 | nn.Sigmoid() 15 | ) 16 | def forward(self, x): 17 | b, c, _, _ = x.size() 18 | y = self.squeeze(x).view(b, c) 19 | z = self.excitation(y).view(b, c, 1, 1) 20 | return x * z.expand_as(x) 21 | 22 | 23 | class sSE_Module(nn.Module): 24 | def __init__(self, channel): 25 | super(sSE_Module, self).__init__() 26 | self.spatial_excitation = nn.Sequential( 27 | nn.Conv2d(in_channels=channel, out_channels=1, kernel_size=1,stride=1,padding=0), 28 | nn.Sigmoid() 29 | ) 30 | def forward(self, x): 31 | z = self.spatial_excitation(x) 32 | return x * z.expand_as(x) 33 | 34 | 35 | class scSE_Module(nn.Module): 36 | def __init__(self, channel,ratio = 16): 37 | super(scSE_Module, self).__init__() 38 | self.cSE = cSE_Module(channel,ratio) 39 | self.sSE = sSE_Module(channel) 40 | 41 | def forward(self, x): 42 | return self.cSE(x) + self.sSE(x) 43 | 44 | 45 | if __name__=='__main__': 46 | # model = cSE_Module(channel=16) 47 | # model = sSE_Module(channel=16) 48 | model = scSE_Module(channel=16) 49 | print(model) 50 | 51 | input = torch.randn(1, 16, 64, 64) 52 | out = model(input) 53 | print(out.shape) -------------------------------------------------------------------------------- /Attention/TripletAttention.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/10/30 14:30 4 | # @Author : liumin 5 | # @File : TripletAttention.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | 11 | 12 | class ChannelPool(nn.Module): 13 | def forward(self, x): 14 | return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 ) 15 | 16 | 17 | class SpatialGate(nn.Module): 18 | def __init__(self): 19 | super(SpatialGate, self).__init__() 20 | 21 | self.channel_pool = ChannelPool() 22 | self.conv = nn.Sequential( 23 | nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3), 24 | nn.BatchNorm2d(1) 25 | ) 26 | self.sigmod = nn.Sigmoid() 27 | 28 | def forward(self, x): 29 | out = self.conv(self.channel_pool(x)) 30 | return out * self.sigmod(out) 31 | 32 | 33 | class TripletAttention(nn.Module): 34 | def __init__(self, spatial=True): 35 | super(TripletAttention, self).__init__() 36 | self.spatial = spatial 37 | self.height_gate = SpatialGate() 38 | self.width_gate = SpatialGate() 39 | if self.spatial: 40 | self.spatial_gate = SpatialGate() 41 | 42 | def forward(self, x): 43 | x_perm1 = x.permute(0, 2, 1, 3).contiguous() 44 | x_out1 = self.height_gate(x_perm1) 45 | x_out1 = x_out1.permute(0, 2, 1, 3).contiguous() 46 | 47 | x_perm2 = x.permute(0, 3, 2, 1).contiguous() 48 | x_out2 = self.width_gate(x_perm2) 49 | x_out2 = x_out2.permute(0, 3, 2, 1).contiguous() 50 | 51 | if self.spatial: 52 | x_out3 = self.spatial_gate(x) 53 | return (1/3) * (x_out1 + x_out2 + x_out3) 54 | else: 55 | return (1/2) * (x_out1 + x_out2) 56 | 57 | 58 | 59 | if __name__=='__main__': 60 | model = TripletAttention() 61 | print(model) 62 | 63 | input = torch.randn(1, 16, 256, 256) 64 | out = model(input) 65 | print(out.shape) -------------------------------------------------------------------------------- /ClassicNetwork/AlexNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class AlexNet(nn.Module): 6 | def __init__(self,num_classes=1000): 7 | super(AlexNet,self).__init__() 8 | self.feature_extraction = nn.Sequential( 9 | nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False), 10 | nn.ReLU(inplace=True), 11 | nn.MaxPool2d(kernel_size=3,stride=2,padding=0), 12 | nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False), 13 | nn.ReLU(inplace=True), 14 | nn.MaxPool2d(kernel_size=3,stride=2,padding=0), 15 | nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False), 16 | nn.ReLU(inplace=True), 17 | nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False), 20 | nn.ReLU(inplace=True), 21 | nn.MaxPool2d(kernel_size=3, stride=2, padding=0), 22 | ) 23 | self.classifier = nn.Sequential( 24 | nn.Dropout(p=0.5), 25 | nn.Linear(in_features=256*6*6,out_features=4096), 26 | nn.Dropout(p=0.5), 27 | nn.Linear(in_features=4096, out_features=4096), 28 | nn.Linear(in_features=4096, out_features=num_classes), 29 | ) 30 | def forward(self,x): 31 | x = self.feature_extraction(x) 32 | x = x.view(x.size(0),256*6*6) 33 | x = self.classifier(x) 34 | return x 35 | 36 | 37 | if __name__ =='__main__': 38 | # model = torchvision.models.AlexNet() 39 | model = AlexNet() 40 | print(model) 41 | 42 | input = torch.randn(8,3,224,224) 43 | out = model(input) 44 | print(out.shape) 45 | 46 | -------------------------------------------------------------------------------- /ClassicNetwork/DenseNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | print("PyTorch Version: ",torch.__version__) 6 | print("Torchvision Version: ",torchvision.__version__) 7 | 8 | __all__ = ['DenseNet121', 'DenseNet169','DenseNet201','DenseNet264'] 9 | 10 | def Conv1(in_planes, places, stride=2): 11 | return nn.Sequential( 12 | nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False), 13 | nn.BatchNorm2d(places), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 16 | ) 17 | 18 | class _TransitionLayer(nn.Module): 19 | def __init__(self, inplace, plance): 20 | super(_TransitionLayer, self).__init__() 21 | self.transition_layer = nn.Sequential( 22 | nn.BatchNorm2d(inplace), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(in_channels=inplace,out_channels=plance,kernel_size=1,stride=1,padding=0,bias=False), 25 | nn.AvgPool2d(kernel_size=2,stride=2), 26 | ) 27 | 28 | def forward(self, x): 29 | return self.transition_layer(x) 30 | 31 | 32 | class _DenseLayer(nn.Module): 33 | def __init__(self, inplace, growth_rate, bn_size, drop_rate=0): 34 | super(_DenseLayer, self).__init__() 35 | self.drop_rate = drop_rate 36 | self.dense_layer = nn.Sequential( 37 | nn.BatchNorm2d(inplace), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(in_channels=inplace, out_channels=bn_size * growth_rate, kernel_size=1, stride=1, padding=0, bias=False), 40 | nn.BatchNorm2d(bn_size * growth_rate), 41 | nn.ReLU(inplace=True), 42 | nn.Conv2d(in_channels=bn_size * growth_rate, out_channels=growth_rate, kernel_size=3, stride=1, padding=1, bias=False), 43 | ) 44 | self.dropout = nn.Dropout(p=self.drop_rate) 45 | 46 | def forward(self, x): 47 | y = self.dense_layer(x) 48 | if self.drop_rate > 0: 49 | y = self.dropout(y) 50 | return torch.cat([x, y], 1) 51 | 52 | 53 | class DenseBlock(nn.Module): 54 | def __init__(self, num_layers, inplances, growth_rate, bn_size , drop_rate=0): 55 | super(DenseBlock, self).__init__() 56 | layers = [] 57 | for i in range(num_layers): 58 | layers.append(_DenseLayer(inplances + i * growth_rate, growth_rate, bn_size, drop_rate)) 59 | self.layers = nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | return self.layers(x) 63 | 64 | 65 | class DenseNet(nn.Module): 66 | def __init__(self, init_channels=64, growth_rate=32, blocks=[6, 12, 24, 16],num_classes=1000): 67 | super(DenseNet, self).__init__() 68 | bn_size = 4 69 | drop_rate = 0 70 | self.conv1 = Conv1(in_planes=3, places=init_channels) 71 | 72 | num_features = init_channels 73 | self.layer1 = DenseBlock(num_layers=blocks[0], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate) 74 | num_features = num_features + blocks[0] * growth_rate 75 | self.transition1 = _TransitionLayer(inplace=num_features, plance=num_features // 2) 76 | num_features = num_features // 2 77 | self.layer2 = DenseBlock(num_layers=blocks[1], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate) 78 | num_features = num_features + blocks[1] * growth_rate 79 | self.transition2 = _TransitionLayer(inplace=num_features, plance=num_features // 2) 80 | num_features = num_features // 2 81 | self.layer3 = DenseBlock(num_layers=blocks[2], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate) 82 | num_features = num_features + blocks[2] * growth_rate 83 | self.transition3 = _TransitionLayer(inplace=num_features, plance=num_features // 2) 84 | num_features = num_features // 2 85 | self.layer4 = DenseBlock(num_layers=blocks[3], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate) 86 | num_features = num_features + blocks[3] * growth_rate 87 | 88 | self.avgpool = nn.AvgPool2d(7, stride=1) 89 | self.fc = nn.Linear(num_features, num_classes) 90 | 91 | def forward(self, x): 92 | x = self.conv1(x) 93 | 94 | x = self.layer1(x) 95 | x = self.transition1(x) 96 | x = self.layer2(x) 97 | x = self.transition2(x) 98 | x = self.layer3(x) 99 | x = self.transition3(x) 100 | x = self.layer4(x) 101 | 102 | x = self.avgpool(x) 103 | x = x.view(x.size(0), -1) 104 | x = self.fc(x) 105 | return x 106 | 107 | def DenseNet121(): 108 | return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 24, 16]) 109 | 110 | def DenseNet169(): 111 | return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 32, 32]) 112 | 113 | def DenseNet201(): 114 | return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 48, 32]) 115 | 116 | def DenseNet264(): 117 | return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 64, 48]) 118 | 119 | if __name__=='__main__': 120 | # model = torchvision.models.densenet121() 121 | model = DenseNet121() 122 | print(model) 123 | 124 | input = torch.randn(1, 3, 224, 224) 125 | out = model(input) 126 | print(out.shape) -------------------------------------------------------------------------------- /ClassicNetwork/InceptionV1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def ConvBNReLU(in_channels,out_channels,kernel_size): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1,padding=kernel_size//2), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | class InceptionV1Module(nn.Module): 13 | def __init__(self, in_channels,out_channels1, out_channels2reduce,out_channels2, out_channels3reduce, out_channels3, out_channels4): 14 | super(InceptionV1Module, self).__init__() 15 | 16 | self.branch1_conv = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1) 17 | 18 | self.branch2_conv1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels2reduce,kernel_size=1) 19 | self.branch2_conv2 = ConvBNReLU(in_channels=out_channels2reduce,out_channels=out_channels2,kernel_size=3) 20 | 21 | self.branch3_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels3reduce, kernel_size=1) 22 | self.branch3_conv2 = ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=5) 23 | 24 | self.branch4_pool = nn.MaxPool2d(kernel_size=3,stride=1,padding=1) 25 | self.branch4_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1) 26 | 27 | def forward(self,x): 28 | out1 = self.branch1_conv(x) 29 | out2 = self.branch2_conv2(self.branch2_conv1(x)) 30 | out3 = self.branch3_conv2(self.branch3_conv1(x)) 31 | out4 = self.branch4_conv1(self.branch4_pool(x)) 32 | out = torch.cat([out1, out2, out3, out4], dim=1) 33 | return out 34 | 35 | class InceptionAux(nn.Module): 36 | def __init__(self, in_channels,out_channels): 37 | super(InceptionAux, self).__init__() 38 | 39 | self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3) 40 | self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1) 41 | self.auxiliary_linear1 = nn.Linear(in_features=128 * 4 * 4, out_features=1024) 42 | self.auxiliary_relu = nn.ReLU6(inplace=True) 43 | self.auxiliary_dropout = nn.Dropout(p=0.7) 44 | self.auxiliary_linear2 = nn.Linear(in_features=1024, out_features=out_channels) 45 | 46 | def forward(self, x): 47 | x = self.auxiliary_conv1(self.auxiliary_avgpool(x)) 48 | x = x.view(x.size(0), -1) 49 | x= self.auxiliary_relu(self.auxiliary_linear1(x)) 50 | out = self.auxiliary_linear2(self.auxiliary_dropout(x)) 51 | return out 52 | 53 | class InceptionV1(nn.Module): 54 | def __init__(self, num_classes=1000, stage='train'): 55 | super(InceptionV1, self).__init__() 56 | self.stage = stage 57 | 58 | self.block1 = nn.Sequential( 59 | nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3), 60 | nn.BatchNorm2d(64), 61 | nn.MaxPool2d(kernel_size=3,stride=2, padding=1), 62 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1), 63 | nn.BatchNorm2d(64), 64 | ) 65 | self.block2 = nn.Sequential( 66 | nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1), 67 | nn.BatchNorm2d(192), 68 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 69 | ) 70 | 71 | self.block3 = nn.Sequential( 72 | InceptionV1Module(in_channels=192,out_channels1=64, out_channels2reduce=96, out_channels2=128, out_channels3reduce = 16, out_channels3=32, out_channels4=32), 73 | InceptionV1Module(in_channels=256, out_channels1=128, out_channels2reduce=128, out_channels2=192,out_channels3reduce=32, out_channels3=96, out_channels4=64), 74 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 75 | ) 76 | 77 | self.block4_1 = InceptionV1Module(in_channels=480, out_channels1=192, out_channels2reduce=96, out_channels2=208,out_channels3reduce=16, out_channels3=48, out_channels4=64) 78 | 79 | if self.stage == 'train': 80 | self.aux_logits1 = InceptionAux(in_channels=512,out_channels=num_classes) 81 | 82 | self.block4_2 = nn.Sequential( 83 | InceptionV1Module(in_channels=512, out_channels1=160, out_channels2reduce=112, out_channels2=224, 84 | out_channels3reduce=24, out_channels3=64, out_channels4=64), 85 | InceptionV1Module(in_channels=512, out_channels1=128, out_channels2reduce=128, out_channels2=256, 86 | out_channels3reduce=24, out_channels3=64, out_channels4=64), 87 | InceptionV1Module(in_channels=512, out_channels1=112, out_channels2reduce=144, out_channels2=288, 88 | out_channels3reduce=32, out_channels3=64, out_channels4=64), 89 | ) 90 | 91 | if self.stage == 'train': 92 | self.aux_logits2 = InceptionAux(in_channels=528,out_channels=num_classes) 93 | 94 | self.block4_3 = nn.Sequential( 95 | InceptionV1Module(in_channels=528, out_channels1=256, out_channels2reduce=160, out_channels2=320, 96 | out_channels3reduce=32, out_channels3=128, out_channels4=128), 97 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 98 | ) 99 | 100 | self.block5 = nn.Sequential( 101 | InceptionV1Module(in_channels=832, out_channels1=256, out_channels2reduce=160, out_channels2=320,out_channels3reduce=32, out_channels3=128, out_channels4=128), 102 | InceptionV1Module(in_channels=832, out_channels1=384, out_channels2reduce=192, out_channels2=384,out_channels3reduce=48, out_channels3=128, out_channels4=128), 103 | ) 104 | 105 | self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1) 106 | self.dropout = nn.Dropout(p=0.4) 107 | self.linear = nn.Linear(in_features=1024,out_features=num_classes) 108 | 109 | def forward(self, x): 110 | x = self.block1(x) 111 | x = self.block2(x) 112 | x = self.block3(x) 113 | aux1 = x = self.block4_1(x) 114 | aux2 = x = self.block4_2(x) 115 | x = self.block4_3(x) 116 | out = self.block5(x) 117 | out = self.avgpool(out) 118 | out = self.dropout(out) 119 | out = out.view(out.size(0), -1) 120 | out = self.linear(out) 121 | if self.stage == 'train': 122 | aux1 = self.aux_logits1(aux1) 123 | aux2 = self.aux_logits2(aux2) 124 | return aux1, aux2, out 125 | else: 126 | return out 127 | 128 | if __name__=='__main__': 129 | model = InceptionV1() 130 | print(model) 131 | 132 | input = torch.randn(1, 3, 224, 224) 133 | aux1, aux2, out = model(input) 134 | print(aux1.shape) 135 | print(aux2.shape) 136 | print(out.shape) 137 | -------------------------------------------------------------------------------- /ClassicNetwork/InceptionV4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class InceptionV4(nn.Module): 6 | def __init__(self): 7 | super(InceptionV4, self).__init__() 8 | 9 | def forward(self): 10 | return out 11 | 12 | if __name__=='__main__': 13 | model = InceptionV4() 14 | print(model) 15 | 16 | input = torch.randn(1, 3, 224, 224) 17 | out = model(input) 18 | print(out.shape) -------------------------------------------------------------------------------- /ClassicNetwork/README.md: -------------------------------------------------------------------------------- 1 | # ClassicNetwork 2 | Classical network implemented by pytorch 3 | 4 | 5 | 6 | **AlexNet:** 7 | 8 | - ImageNet Classification with Deep Convolutional Neural Networks, Alex Krizhevsky, 2012 9 | 10 | 11 | 12 | **VGG:** 13 | 14 | - Very Deep Convolutional Networks for Large-Scale Image Recognition,Karen Simonyan,2014 15 | 16 | 17 | 18 | **ResNet:** 19 | 20 | - Deep Residual Learning for Image Recognition, He-Kaiming, 2015 21 | 22 | 23 | 24 | **InceptionV1:** 25 | 26 | - Going deeper with convolutions , Christian Szegedy , 2014 27 | 28 | 29 | 30 | **InceptionV2 and InceptionV3:** 31 | 32 | - Rethinking the Inception Architecture for Computer Vision , Christian Szegedy ,2015 33 | 34 | 35 | 36 | **InceptionV4 and Inception-ResNet:** 37 | 38 | - Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning , Christian Szegedy ,2016 39 | 40 | 41 | 42 | **DenseNet:** 43 | 44 | Densely Connected Convolutional Networks, 2017 45 | 46 | 47 | 48 | **ResNeXt:** 49 | 50 | Aggregated Residual Transformations for Deep Neural Networks,2017 -------------------------------------------------------------------------------- /ClassicNetwork/ResNeXt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class ResNeXtBlock(nn.Module): 7 | def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 2, cardinality=32): 8 | super(ResNeXtBlock,self).__init__() 9 | self.expansion = expansion 10 | self.downsampling = downsampling 11 | 12 | self.bottleneck = nn.Sequential( 13 | nn.Conv2d(in_channels=in_places, out_channels=places, kernel_size=1, stride=1, bias=False), 14 | nn.BatchNorm2d(places), 15 | nn.ReLU(inplace=True), 16 | nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False, groups=cardinality), 17 | nn.BatchNorm2d(places), 18 | nn.ReLU(inplace=True), 19 | nn.Conv2d(in_channels=places, out_channels=places * self.expansion, kernel_size=1, stride=1, bias=False), 20 | nn.BatchNorm2d(places * self.expansion), 21 | ) 22 | 23 | if self.downsampling: 24 | self.downsample = nn.Sequential( 25 | nn.Conv2d(in_channels=in_places, out_channels=places * self.expansion, kernel_size=1, stride=stride,bias=False), 26 | nn.BatchNorm2d(places * self.expansion) 27 | ) 28 | self.relu = nn.ReLU(inplace=True) 29 | 30 | def forward(self, x): 31 | residual = x 32 | out = self.bottleneck(x) 33 | 34 | if self.downsampling: 35 | residual = self.downsample(x) 36 | 37 | out += residual 38 | out = self.relu(out) 39 | return out 40 | 41 | 42 | if __name__ =='__main__': 43 | model = ResNeXtBlock(in_places=256, places=128) 44 | print(model) 45 | 46 | input = torch.randn(1,256,64,64) 47 | out = model(input) 48 | print(out.shape) -------------------------------------------------------------------------------- /ClassicNetwork/ResNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | print("PyTorch Version: ",torch.__version__) 6 | print("Torchvision Version: ",torchvision.__version__) 7 | 8 | __all__ = ['ResNet50', 'ResNet101','ResNet152'] 9 | 10 | def Conv1(in_planes, places, stride=2): 11 | return nn.Sequential( 12 | nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False), 13 | nn.BatchNorm2d(places), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 16 | ) 17 | 18 | class Bottleneck(nn.Module): 19 | def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4): 20 | super(Bottleneck,self).__init__() 21 | self.expansion = expansion 22 | self.downsampling = downsampling 23 | 24 | self.bottleneck = nn.Sequential( 25 | nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False), 26 | nn.BatchNorm2d(places), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False), 29 | nn.BatchNorm2d(places), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False), 32 | nn.BatchNorm2d(places*self.expansion), 33 | ) 34 | 35 | if self.downsampling: 36 | self.downsample = nn.Sequential( 37 | nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False), 38 | nn.BatchNorm2d(places*self.expansion) 39 | ) 40 | self.relu = nn.ReLU(inplace=True) 41 | 42 | def forward(self, x): 43 | residual = x 44 | out = self.bottleneck(x) 45 | 46 | if self.downsampling: 47 | residual = self.downsample(x) 48 | 49 | out += residual 50 | out = self.relu(out) 51 | return out 52 | 53 | class ResNet(nn.Module): 54 | def __init__(self,blocks, num_classes=1000, expansion = 4): 55 | super(ResNet,self).__init__() 56 | self.expansion = expansion 57 | 58 | self.conv1 = Conv1(in_planes = 3, places= 64) 59 | 60 | self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1) 61 | self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2) 62 | self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2) 63 | self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2) 64 | 65 | self.avgpool = nn.AvgPool2d(7, stride=1) 66 | self.fc = nn.Linear(2048,num_classes) 67 | 68 | for m in self.modules(): 69 | if isinstance(m, nn.Conv2d): 70 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 71 | elif isinstance(m, nn.BatchNorm2d): 72 | nn.init.constant_(m.weight, 1) 73 | nn.init.constant_(m.bias, 0) 74 | 75 | def make_layer(self, in_places, places, block, stride): 76 | layers = [] 77 | layers.append(Bottleneck(in_places, places,stride, downsampling =True)) 78 | for i in range(1, block): 79 | layers.append(Bottleneck(places*self.expansion, places)) 80 | 81 | return nn.Sequential(*layers) 82 | 83 | 84 | def forward(self, x): 85 | x = self.conv1(x) 86 | 87 | x = self.layer1(x) 88 | x = self.layer2(x) 89 | x = self.layer3(x) 90 | x = self.layer4(x) 91 | 92 | x = self.avgpool(x) 93 | x = x.view(x.size(0), -1) 94 | x = self.fc(x) 95 | return x 96 | 97 | def ResNet50(): 98 | return ResNet([3, 4, 6, 3]) 99 | 100 | def ResNet101(): 101 | return ResNet([3, 4, 23, 3]) 102 | 103 | def ResNet152(): 104 | return ResNet([3, 8, 36, 3]) 105 | 106 | 107 | if __name__=='__main__': 108 | #model = torchvision.models.resnet50() 109 | model = ResNet50() 110 | print(model) 111 | 112 | input = torch.randn(1, 3, 224, 224) 113 | out = model(input) 114 | print(out.shape) 115 | -------------------------------------------------------------------------------- /ClassicNetwork/VGGNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3BNReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | class VGG(nn.Module): 13 | def __init__(self, block_nums,num_classes=1000): 14 | super(VGG, self).__init__() 15 | 16 | self.stage1 = self._make_layers(in_channels=3, out_channels=64, block_num=block_nums[0]) 17 | self.stage2 = self._make_layers(in_channels=64, out_channels=128, block_num=block_nums[1]) 18 | self.stage3 = self._make_layers(in_channels=128, out_channels=256, block_num=block_nums[2]) 19 | self.stage4 = self._make_layers(in_channels=256, out_channels=512, block_num=block_nums[3]) 20 | self.stage5 = self._make_layers(in_channels=512, out_channels=512, block_num=block_nums[4]) 21 | 22 | self.classifier = nn.Sequential( 23 | nn.Linear(in_features=512*7*7,out_features=4096), 24 | nn.Dropout(p=0.2), 25 | nn.Linear(in_features=4096, out_features=4096), 26 | nn.Dropout(p=0.2), 27 | nn.Linear(in_features=4096, out_features=num_classes) 28 | ) 29 | 30 | self._init_params() 31 | 32 | def _make_layers(self, in_channels, out_channels, block_num): 33 | layers = [] 34 | layers.append(Conv3x3BNReLU(in_channels,out_channels)) 35 | for i in range(1,block_num): 36 | layers.append(Conv3x3BNReLU(out_channels,out_channels)) 37 | layers.append(nn.MaxPool2d(kernel_size=2,stride=2, ceil_mode=False)) 38 | return nn.Sequential(*layers) 39 | 40 | def _init_params(self): 41 | for m in self.modules(): 42 | if isinstance(m, nn.Conv2d): 43 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 44 | elif isinstance(m, nn.BatchNorm2d): 45 | nn.init.constant_(m.weight, 1) 46 | nn.init.constant_(m.bias, 0) 47 | 48 | def forward(self, x): 49 | x = self.stage1(x) 50 | x = self.stage2(x) 51 | x = self.stage3(x) 52 | x = self.stage4(x) 53 | x = self.stage5(x) 54 | x = x.view(x.size(0),-1) 55 | out = self.classifier(x) 56 | return out 57 | 58 | def VGG16(): 59 | block_nums = [2, 2, 3, 3, 3] 60 | model = VGG(block_nums) 61 | return model 62 | 63 | def VGG19(): 64 | block_nums = [2, 2, 4, 4, 4] 65 | model = VGG(block_nums) 66 | return model 67 | 68 | if __name__ == '__main__': 69 | model = VGG16() 70 | print(model) 71 | torchvision.models.vgg16_bn() 72 | 73 | input = torch.randn(1,3,224,224) 74 | out = model(input) 75 | print(out.shape) 76 | 77 | -------------------------------------------------------------------------------- /FaceDetectorAndRecognition/FaceBoxes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class Conv2dCReLU(nn.Module): 6 | def __init__(self,in_channels,out_channels,kernel_size,stride,padding): 7 | super(Conv2dCReLU, self).__init__() 8 | self.conv = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding) 9 | self.bn = nn.BatchNorm2d(out_channels) 10 | self.relu = nn.ReLU6(inplace=True) 11 | 12 | def forward(self, x): 13 | x = self.bn(self.conv(x)) 14 | out = torch.cat([x, -x], dim=1) 15 | return self.relu(out) 16 | 17 | 18 | class InceptionModules(nn.Module): 19 | def __init__(self): 20 | super(InceptionModules, self).__init__() 21 | 22 | self.branch1_conv1 = nn.Conv2d(in_channels=128,out_channels=32,kernel_size=1,stride=1) 23 | self.branch1_conv1_bn = nn.BatchNorm2d(32) 24 | 25 | self.branch2_pool = nn.MaxPool2d(kernel_size=3,stride=1,padding=1) 26 | self.branch2_conv1 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=1, stride=1) 27 | self.branch2_conv1_bn = nn.BatchNorm2d(32) 28 | 29 | self.branch3_conv1 = nn.Conv2d(in_channels=128, out_channels=24, kernel_size=1, stride=1) 30 | self.branch3_conv1_bn = nn.BatchNorm2d(24) 31 | self.branch3_conv2 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1) 32 | self.branch3_conv2_bn = nn.BatchNorm2d(32) 33 | 34 | self.branch4_conv1 = nn.Conv2d(in_channels=128, out_channels=24, kernel_size=1, stride=1) 35 | self.branch4_conv1_bn = nn.BatchNorm2d(24) 36 | self.branch4_conv2 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1) 37 | self.branch4_conv2_bn = nn.BatchNorm2d(32) 38 | self.branch4_conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1) 39 | self.branch4_conv3_bn = nn.BatchNorm2d(32) 40 | 41 | 42 | def forward(self, x): 43 | x1 = self.branch1_conv1_bn(self.branch1_conv1(x)) 44 | x2 = self.branch2_conv1_bn(self.branch2_conv1(self.branch2_pool(x))) 45 | x3 = self.branch3_conv2_bn(self.branch3_conv2(self.branch3_conv1_bn(self.branch3_conv1(x)))) 46 | x4 = self.branch4_conv3_bn(self.branch4_conv3(self.branch4_conv2_bn(self.branch4_conv2(self.branch4_conv1_bn(self.branch4_conv1(x)))))) 47 | out = torch.cat([x1, x2, x3, x4],dim=1) 48 | return out 49 | 50 | class FaceBoxes(nn.Module): 51 | def __init__(self, num_classes, phase): 52 | super(FaceBoxes, self).__init__() 53 | self.phase = phase 54 | self.num_classes = num_classes 55 | 56 | self.RapidlyDigestedConvolutionalLayers = nn.Sequential( 57 | Conv2dCReLU(in_channels=3,out_channels=24,kernel_size=7,stride=4,padding=3), 58 | nn.MaxPool2d(kernel_size=3,stride=2,padding=1), 59 | Conv2dCReLU(in_channels=48,out_channels=64,kernel_size=5,stride=2,padding=2), 60 | nn.MaxPool2d(kernel_size=3, stride=2,padding=1) 61 | ) 62 | 63 | self.MultipleScaleConvolutionalLayers = nn.Sequential( 64 | InceptionModules(), 65 | InceptionModules(), 66 | InceptionModules(), 67 | ) 68 | 69 | self.conv3_1 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=1,stride=1) 70 | self.conv3_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1) 71 | self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1) 72 | self.conv4_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1) 73 | 74 | self.loc_layer1 = nn.Conv2d(in_channels=128, out_channels=21*4, kernel_size=3, stride=1, padding=1) 75 | self.conf_layer1 = nn.Conv2d(in_channels=128, out_channels=21*num_classes, kernel_size=3, stride=1, padding=1) 76 | 77 | self.loc_layer2 = nn.Conv2d(in_channels=256, out_channels=4, kernel_size=3, stride=1, padding=1) 78 | self.conf_layer2 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=3, stride=1, padding=1) 79 | 80 | self.loc_layer3 = nn.Conv2d(in_channels=256, out_channels=4, kernel_size=3, stride=1, padding=1) 81 | self.conf_layer3 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=3, stride=1, padding=1) 82 | 83 | if self.phase == 'test': 84 | self.softmax = nn.Softmax(dim=-1) 85 | elif self.phase == 'train': 86 | for m in self.modules(): 87 | if isinstance(m, nn.Conv2d): 88 | if m.bias is not None: 89 | nn.init.xavier_normal_(m.weight.data) 90 | nn.init.constant_(m.bias, 0) 91 | else: 92 | nn.init.xavier_normal_(m.weight.data) 93 | 94 | elif isinstance(m, nn.BatchNorm2d): 95 | nn.init.constant_(m.weight, 1) 96 | nn.init.constant_(m.bias, 0) 97 | 98 | def forward(self, x): 99 | x = self.RapidlyDigestedConvolutionalLayers(x) 100 | out1 = self.MultipleScaleConvolutionalLayers(x) 101 | out2 = self.conv3_2(self.conv3_1(out1)) 102 | out3 = self.conv4_2(self.conv4_1(out2)) 103 | 104 | loc1 = self.loc_layer1(out1) 105 | conf1 = self.conf_layer1(out1) 106 | 107 | loc2 = self.loc_layer2(out2) 108 | conf2 = self.conf_layer2(out2) 109 | 110 | loc3 = self.loc_layer3(out3) 111 | conf3 = self.conf_layer3(out3) 112 | 113 | locs = torch.cat([loc1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 114 | loc2.permute(0, 2, 3, 1).contiguous().view(loc2.size(0), -1), 115 | loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1)], dim=1) 116 | confs = torch.cat([conf1.permute(0, 2, 3, 1).contiguous().view(conf1.size(0), -1), 117 | conf2.permute(0, 2, 3, 1).contiguous().view(conf2.size(0), -1), 118 | conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1)], dim=1) 119 | 120 | if self.phase == 'test': 121 | out = (locs.view(locs.size(0), -1, 4), 122 | self.softmax(confs.view(-1, self.num_classes))) 123 | else: 124 | out = (locs.view(locs.size(0), -1, 4), 125 | confs.view(-1, self.num_classes)) 126 | return out 127 | 128 | 129 | if __name__ == '__main__': 130 | model = FaceBoxes(num_classes=2, phase='train') 131 | print(model) 132 | 133 | input = torch.randn(1, 3, 1024, 1024) 134 | out = model(input) 135 | print(out[0].shape) 136 | print(out[1].shape) 137 | 138 | -------------------------------------------------------------------------------- /FaceDetectorAndRecognition/LFFD.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv1x1ReLU(in_channels,out_channels): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 7 | nn.ReLU6(inplace=True) 8 | ) 9 | 10 | def Conv3x3ReLU(in_channels,out_channels,stride,padding): 11 | return nn.Sequential( 12 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=padding), 13 | nn.ReLU6(inplace=True) 14 | ) 15 | 16 | class LossBranch(nn.Module): 17 | def __init__(self,in_channels, mid_channels=64): 18 | super(LossBranch, self).__init__() 19 | self.conv1 = Conv1x1ReLU(in_channels, mid_channels) 20 | 21 | self.conv2_score = Conv1x1ReLU(mid_channels, mid_channels) 22 | self.classify = nn.Conv2d(in_channels=mid_channels, out_channels=2, kernel_size=1, stride=1) 23 | 24 | self.conv2_bbox = Conv1x1ReLU(mid_channels, mid_channels) 25 | self.regress = nn.Conv2d(in_channels=mid_channels, out_channels=4, kernel_size=1, stride=1) 26 | 27 | def forward(self, x): 28 | x = self.conv1(x) 29 | cls = self.classify(self.conv2_score(x)) 30 | reg = self.regress(self.conv2_bbox(x)) 31 | return cls,reg 32 | 33 | class LFFDBlock(nn.Module): 34 | def __init__(self, in_channels, out_channels, stride): 35 | super(LFFDBlock, self).__init__() 36 | mid_channels = out_channels 37 | self.downsampling = True if stride == 2 else False 38 | 39 | if self.downsampling: 40 | self.conv = nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=3, stride=stride, padding=0) 41 | 42 | self.branch1_relu1 = nn.ReLU6(inplace=True) 43 | self.branch1_conv1 = Conv3x3ReLU(in_channels=mid_channels, out_channels=mid_channels, stride=1, padding=1) 44 | self.branch1_conv2 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1) 45 | 46 | self.relu = nn.ReLU6(inplace=True) 47 | 48 | def forward(self, x): 49 | if self.downsampling: 50 | x = self.conv(x) 51 | out = self.branch1_conv2(self.branch1_conv1(self.branch1_relu1(x))) 52 | return self.relu(out+x) 53 | 54 | class LFFD(nn.Module): 55 | def __init__(self, classes_num = 2): 56 | super(LFFD, self).__init__() 57 | 58 | self.tiny_part1 = nn.Sequential( 59 | Conv3x3ReLU(in_channels=3, out_channels=64, stride=2, padding = 0), 60 | LFFDBlock(in_channels=64, out_channels=64, stride=2), 61 | LFFDBlock(in_channels=64, out_channels=64, stride=1), 62 | LFFDBlock(in_channels=64, out_channels=64, stride=1), 63 | ) 64 | self.tiny_part2 = LFFDBlock(in_channels=64, out_channels=64, stride=1) 65 | 66 | self.small_part1 = LFFDBlock(in_channels=64, out_channels=64, stride=2) 67 | self.small_part2 = LFFDBlock(in_channels=64, out_channels=64, stride=1) 68 | 69 | self.medium_part = nn.Sequential( 70 | LFFDBlock(in_channels=64, out_channels=128, stride=2), 71 | LFFDBlock(in_channels=128, out_channels=128, stride=1), 72 | ) 73 | 74 | self.large_part1 = LFFDBlock(in_channels=128, out_channels=128, stride=2) 75 | self.large_part2 = LFFDBlock(in_channels=128, out_channels=128, stride=1) 76 | self.large_part3 = LFFDBlock(in_channels=128, out_channels=128, stride=1) 77 | 78 | self.loss_branch1 = LossBranch(in_channels=64) 79 | self.loss_branch2 = LossBranch(in_channels=64) 80 | self.loss_branch3 = LossBranch(in_channels=64) 81 | self.loss_branch4 = LossBranch(in_channels=64) 82 | self.loss_branch5 = LossBranch(in_channels=128) 83 | self.loss_branch6 = LossBranch(in_channels=128) 84 | self.loss_branch7 = LossBranch(in_channels=128) 85 | self.loss_branch8 = LossBranch(in_channels=128) 86 | 87 | def forward(self, x): 88 | branch1 = self.tiny_part1(x) 89 | branch2 = self.tiny_part2(branch1) 90 | branch3 = self.small_part1(branch2) 91 | branch4 = self.small_part2(branch3) 92 | branch5 = self.medium_part(branch4) 93 | branch6 = self.large_part1(branch5) 94 | branch7 = self.large_part2(branch6) 95 | branch8 = self.large_part3(branch7) 96 | 97 | cls1,loc1 = self.loss_branch1(branch1) 98 | cls2,loc2 = self.loss_branch2(branch2) 99 | cls3,loc3 = self.loss_branch3(branch3) 100 | cls4,loc4 = self.loss_branch4(branch4) 101 | cls5,loc5 = self.loss_branch5(branch5) 102 | cls6,loc6 = self.loss_branch6(branch6) 103 | cls7,loc7 = self.loss_branch7(branch7) 104 | cls8,loc8 = self.loss_branch8(branch8) 105 | 106 | cls = torch.cat([cls1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 107 | cls2.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 108 | cls3.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 109 | cls4.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 110 | cls5.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 111 | cls6.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 112 | cls7.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 113 | cls8.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1)], dim=1) 114 | loc = torch.cat([loc1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 115 | loc2.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 116 | loc3.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 117 | loc4.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 118 | loc5.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 119 | loc6.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 120 | loc7.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1), 121 | loc8.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1)], dim=1) 122 | out = (cls,loc) 123 | return out 124 | 125 | if __name__ == '__main__': 126 | net = LFFD() 127 | print(net) 128 | 129 | input = torch.randn(1,3,480,640) 130 | output = net(input) 131 | print(output[0].shape) 132 | print(output[1].shape) 133 | 134 | -------------------------------------------------------------------------------- /FaceDetectorAndRecognition/README.md: -------------------------------------------------------------------------------- 1 | # FaceDetectorAndRecognition 2 | 3 | 4 | 5 | **FaceBoxes** 6 | 7 | FaceBoxes: A CPU Real-time Face Detector with High Accuracy,2018 8 | 9 | https://arxiv.org/pdf/1708.05234.pdf 10 | 11 | https://liumin.blog.csdn.net/article/details/97698853 12 | 13 | 14 | 15 | **LFFD** 16 | 17 | LFFD: A Light and Fast Face Detector for Edge Devices,2019 18 | 19 | https://arxiv.org/pdf/1904.10633.pdf 20 | 21 | https://liumin.blog.csdn.net/article/details/100181190 -------------------------------------------------------------------------------- /HumanPoseEstimation/Hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=0): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | class ResidualBlock(nn.Module): 13 | def __init__(self, in_channels, out_channels): 14 | super(ResidualBlock, self).__init__() 15 | mid_channels = out_channels//2 16 | 17 | self.bottleneck = nn.Sequential( 18 | ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1), 19 | ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1), 20 | ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1), 21 | ) 22 | self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1) 23 | 24 | def forward(self, x): 25 | out = self.bottleneck(x) 26 | return out+self.shortcut(x) 27 | 28 | 29 | class HourglassModule(nn.Module): 30 | def __init__(self, nChannels=256, nModules=2, numReductions = 4): 31 | super(HourglassModule, self).__init__() 32 | self.nChannels = nChannels 33 | self.nModules = nModules 34 | self.numReductions = numReductions 35 | 36 | self.residual_block = self._make_residual_layer(self.nModules, self.nChannels) 37 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 38 | self.after_pool_block = self._make_residual_layer(self.nModules, self.nChannels) 39 | 40 | if numReductions > 1: 41 | self.hourglass_module = HourglassModule(self.nChannels, self.numReductions - 1, self.nModules) 42 | else: 43 | self.num1res_block = self._make_residual_layer(self.nModules, self.nChannels) 44 | 45 | self.lowres_block = self._make_residual_layer(self.nModules, self.nChannels) 46 | 47 | self.upsample = nn.Upsample(scale_factor=2) 48 | 49 | def _make_residual_layer(self, nModules, nChannels): 50 | _residual_blocks = [] 51 | for _ in range(nModules): 52 | _residual_blocks.append(ResidualBlock(in_channels=nChannels, out_channels=nChannels)) 53 | return nn.Sequential(*_residual_blocks) 54 | 55 | def forward(self, x): 56 | out1 = self.residual_block(x) 57 | 58 | out2 = self.max_pool(x) 59 | out2 = self.after_pool_block(out2) 60 | 61 | if self.numReductions > 1: 62 | out2 = self.hourglass_module(out2) 63 | else: 64 | out2 = self.num1res_block(out2) 65 | out2 = self.lowres_block(out2) 66 | out2 = self.upsample(out2) 67 | 68 | return out1 + out2 69 | 70 | class Hourglass(nn.Module): 71 | def __init__(self, nJoints): 72 | super(Hourglass, self).__init__() 73 | 74 | self.first_conv = ConvBNReLU(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3) 75 | self.residual_block1 = ResidualBlock(in_channels=64, out_channels=128) 76 | self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2) 77 | self.residual_block2 = ResidualBlock(in_channels=128, out_channels=128) 78 | self.residual_block3 = ResidualBlock(in_channels=128, out_channels=256) 79 | 80 | self.hourglass_module1 = HourglassModule(nChannels=256, nModules=2, numReductions = 4) 81 | self.hourglass_module2 = HourglassModule(nChannels=256, nModules=2, numReductions = 4) 82 | 83 | self.after_hourglass_conv1 = ConvBNReLU(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 84 | self.proj_conv1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1) 85 | self.out_conv1 = nn.Conv2d(in_channels=256,out_channels=nJoints,kernel_size=1,stride=1) 86 | self.remap_conv1 = nn.Conv2d(in_channels=nJoints, out_channels=256, kernel_size=1, stride=1) 87 | 88 | self.after_hourglass_conv2 = ConvBNReLU(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 89 | self.proj_conv2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1) 90 | self.out_conv2 = nn.Conv2d(in_channels=256, out_channels=nJoints, kernel_size=1, stride=1) 91 | self.remap_conv2 = nn.Conv2d(in_channels=nJoints, out_channels=256, kernel_size=1, stride=1) 92 | 93 | def forward(self, x): 94 | x = self.max_pool(self.residual_block1(self.first_conv(x))) 95 | x = self.residual_block3(self.residual_block2(x)) 96 | 97 | x = self.hourglass_module1(x) 98 | residual1= x = self.after_hourglass_conv1(x) 99 | out1 = self.out_conv1(x) 100 | residual2 = x = residual1 + self.remap_conv1(out1)+self.proj_conv1(x) 101 | 102 | x = self.hourglass_module2(x) 103 | x = self.after_hourglass_conv2(x) 104 | out2 = self.out_conv2(x) 105 | x = residual2 + self.remap_conv2(out2) + self.proj_conv2(x) 106 | 107 | return out1, out2 108 | 109 | if __name__ == '__main__': 110 | model = Hourglass(nJoints=16) 111 | print(model) 112 | 113 | data = torch.randn(1,3,256,256) 114 | out1, out2 = model(data) 115 | print(out1.shape) 116 | print(out2.shape) 117 | 118 | -------------------------------------------------------------------------------- /HumanPoseEstimation/LPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | from context_block import ContextBlock 5 | 6 | class LBwithGCBlock(nn.Module): 7 | expansion = 1 8 | def __init__(self, inplanes, planes, stride=1, downsample=None): 9 | super(LBwithGCBlock, self).__init__() 10 | self.downsample = downsample 11 | self.conv1 = nn.Conv2d(in_channels=inplanes,out_channels=planes,kernel_size=1,stride=1,padding=0) 12 | self.conv1_bn = nn.BatchNorm2d(planes) 13 | self.conv1_bn_relu = nn.ReLU(inplace=True) 14 | self.conv2 = nn.Conv2d(in_channels=planes, out_channels=planes, kernel_size=3, stride=stride, padding=1) 15 | self.conv2_bn = nn.BatchNorm2d(planes) 16 | self.conv2_bn_relu = nn.ReLU(inplace=True) 17 | self.conv3 = nn.Conv2d(in_channels=planes, out_channels=planes * self.expansion, kernel_size=1, stride=1, padding=0) 18 | self.conv3_bn = nn.BatchNorm2d(planes * self.expansion) 19 | self.gcb = ContextBlock(planes * self.expansion,ratio=2) 20 | self.relu = nn.ReLU(inplace=True) 21 | 22 | def forward(self, x): 23 | residual = x 24 | out = self.conv1_bn_relu(self.conv1_bn(self.conv1(x))) 25 | out = self.conv2_bn_relu(self.conv2_bn(self.conv2(out))) 26 | out = self.conv3_bn(self.conv3(out)) 27 | out = self.gcb(out) 28 | if self.downsample is not None: 29 | residual = self.downsample(x) 30 | out += residual 31 | return self.relu(out) 32 | 33 | def computeGCD(a,b): 34 | while a != b: 35 | if a > b: 36 | a = a - b 37 | else: 38 | b = b - a 39 | return b 40 | 41 | def GroupDeconv(inplanes, planes, kernel_size, stride, padding, output_padding): 42 | groups = computeGCD(inplanes, planes) 43 | return nn.Sequential( 44 | nn.ConvTranspose2d(in_channels=inplanes, out_channels=2*planes, kernel_size=kernel_size, 45 | stride=stride, padding=padding, output_padding=output_padding, groups=groups), 46 | nn.Conv2d(2*planes, planes, kernel_size=1, stride=1, padding=0) 47 | ) 48 | 49 | class LPN(nn.Module): 50 | def __init__(self, nJoints): 51 | super(LPN, self).__init__() 52 | self.inplanes = 64 53 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3) 54 | self.bn1 = nn.BatchNorm2d(64) 55 | self.relu = nn.ReLU(inplace=True) 56 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 57 | self.layer1 = self._make_layer(LBwithGCBlock, 64, 3) 58 | self.layer2 = self._make_layer(LBwithGCBlock, 128, 4, stride=2) 59 | self.layer3 = self._make_layer(LBwithGCBlock, 256, 6, stride=2) 60 | self.layer4 = self._make_layer(LBwithGCBlock, 512, 3, stride=1) 61 | 62 | self.deconv_layers = self._make_deconv_group_layer() 63 | self.final_layer = nn.Conv2d(in_channels=self.inplanes,out_channels=nJoints,kernel_size=1,stride=1,padding=0) 64 | 65 | def _make_layer(self, block, planes, blocks, stride=1): 66 | downsample = None 67 | if stride != 1 or self.inplanes != planes * block.expansion: 68 | downsample = nn.Sequential( 69 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride), 70 | nn.BatchNorm2d(planes * block.expansion), 71 | ) 72 | layers = [] 73 | layers.append(block(self.inplanes, planes, stride, downsample)) 74 | self.inplanes = planes * block.expansion 75 | for i in range(1, blocks): 76 | layers.append(block(self.inplanes, planes)) 77 | return nn.Sequential(*layers) 78 | 79 | def _make_deconv_group_layer(self): 80 | layers = [] 81 | planes = 256 82 | for i in range(2): 83 | planes = planes//2 84 | # layers.append(nn.ConvTranspose2d(in_channels=self.inplanes,out_channels=256,kernel_size=4,stride=2,padding=1,output_padding=0,groups=computeGCD(self.inplanes,256))) 85 | layers.append(GroupDeconv(inplanes=self.inplanes, planes=planes, kernel_size=4, stride=2, padding=1, output_padding=0)) 86 | layers.append(nn.BatchNorm2d(planes)) 87 | layers.append(nn.ReLU(inplace=True)) 88 | self.inplanes = planes 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): 92 | x = self.conv1(x) 93 | x = self.bn1(x) 94 | x = self.relu(x) 95 | x = self.maxpool(x) 96 | 97 | x = self.layer1(x) 98 | x = self.layer2(x) 99 | x = self.layer3(x) 100 | x = self.layer4(x) 101 | 102 | x = self.deconv_layers(x) 103 | x = self.final_layer(x) 104 | return x 105 | 106 | if __name__ == '__main__': 107 | model = LPN(nJoints=16) 108 | print(model) 109 | 110 | data = torch.randn(1,3,256,192) 111 | out = model(data) 112 | print(out.shape) 113 | -------------------------------------------------------------------------------- /HumanPoseEstimation/README.md: -------------------------------------------------------------------------------- 1 | # HumanPoseEstimation-network 2 | Pytorch implementation of HumanPoseEstimation-network 3 | 4 | 5 | 6 | **StackedHG:** 7 | 8 | Stacked Hourglass Networks for Human Pose Estimation ,2016 9 | 10 | https://arxiv.org/pdf/1603.06937.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/101484455 13 | 14 | 15 | 16 | **Simple Baselines** 17 | 18 | Simple Baselines for Human Pose Estimation and Tracking 19 | 20 | https://arxiv.org/pdf/1804.06208.pdf 21 | 22 | https://liumin.blog.csdn.net/article/details/103447040 23 | 24 | 25 | 26 | **LPN:** 27 | 28 | Simple and Lightweight Human Pose Estimation 29 | 30 | https://arxiv.org/pdf/1911.10346v1.pdf 31 | 32 | https://liumin.blog.csdn.net/article/details/103448034 -------------------------------------------------------------------------------- /HumanPoseEstimation/SimpleBaseline.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | class ResBlock(nn.Module): 7 | expansion = 4 8 | def __init__(self, inplanes, planes, stride=1, downsample=None): 9 | super(ResBlock, self).__init__() 10 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 11 | self.bn1 = nn.BatchNorm2d(planes) 12 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,padding=1, bias=False) 13 | self.bn2 = nn.BatchNorm2d(planes) 14 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,bias=False) 15 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 16 | self.relu = nn.ReLU(inplace=True) 17 | self.downsample = downsample 18 | self.stride = stride 19 | 20 | def forward(self, x): 21 | residual = x 22 | out = self.relu(self.bn1(self.conv1(x))) 23 | out = self.relu(self.bn2(self.conv2(out))) 24 | out = self.bn3(self.conv3(out)) 25 | if self.downsample is not None: 26 | residual = self.downsample(x) 27 | out += residual 28 | return self.relu(out) 29 | 30 | 31 | class SimpleBaseline(nn.Module): 32 | def __init__(self, nJoints): 33 | super(SimpleBaseline, self).__init__() 34 | self.inplanes = 64 35 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False) 36 | self.bn1 = nn.BatchNorm2d(64) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 39 | self.layer1 = self._make_layer(ResBlock, 64, 3) 40 | self.layer2 = self._make_layer(ResBlock, 128, 4, stride=2) 41 | self.layer3 = self._make_layer(ResBlock, 256, 6, stride=2) 42 | self.layer4 = self._make_layer(ResBlock, 512, 3, stride=2) 43 | 44 | self.deconv_layers = self._make_deconv_layer() 45 | self.final_layer = nn.Conv2d(in_channels=256,out_channels=nJoints,kernel_size=1,stride=1,padding=0) 46 | 47 | def _make_layer(self, block, planes, blocks, stride=1): 48 | downsample = None 49 | if stride != 1 or self.inplanes != planes * block.expansion: 50 | downsample = nn.Sequential( 51 | nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False), 52 | nn.BatchNorm2d(planes * block.expansion), 53 | ) 54 | 55 | layers = [] 56 | layers.append(block(self.inplanes, planes, stride, downsample)) 57 | self.inplanes = planes * block.expansion 58 | for i in range(1, blocks): 59 | layers.append(block(self.inplanes, planes)) 60 | return nn.Sequential(*layers) 61 | 62 | 63 | def _make_deconv_layer(self): 64 | layers = [] 65 | for i in range(3): 66 | layers.append(nn.ConvTranspose2d(in_channels=self.inplanes,out_channels=256,kernel_size=4, 67 | stride=2,padding=1,output_padding=0,bias=False)) 68 | layers.append(nn.BatchNorm2d(256)) 69 | layers.append(nn.ReLU(inplace=True)) 70 | self.inplanes = 256 71 | return nn.Sequential(*layers) 72 | 73 | 74 | def forward(self, x): 75 | x = self.conv1(x) 76 | x = self.bn1(x) 77 | x = self.relu(x) 78 | x = self.maxpool(x) 79 | 80 | x = self.layer1(x) 81 | x = self.layer2(x) 82 | x = self.layer3(x) 83 | x = self.layer4(x) 84 | 85 | x = self.deconv_layers(x) 86 | x = self.final_layer(x) 87 | return x 88 | 89 | if __name__ == '__main__': 90 | model = SimpleBaseline(nJoints=16) 91 | print(model) 92 | 93 | data = torch.randn(1,3,256,192) 94 | out = model(data) 95 | print(out.shape) 96 | -------------------------------------------------------------------------------- /HumanPoseEstimation/context_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class ContextBlock(nn.Module): 5 | def __init__(self, 6 | inplanes, 7 | ratio, 8 | pooling_type='att', 9 | fusion_types=('channel_add', )): 10 | super(ContextBlock, self).__init__() 11 | assert pooling_type in ['avg', 'att'] 12 | assert isinstance(fusion_types, (list, tuple)) 13 | valid_fusion_types = ['channel_add', 'channel_mul'] 14 | assert all([f in valid_fusion_types for f in fusion_types]) 15 | assert len(fusion_types) > 0, 'at least one fusion should be used' 16 | self.inplanes = inplanes 17 | self.ratio = ratio 18 | self.planes = int(inplanes * ratio) 19 | self.pooling_type = pooling_type 20 | self.fusion_types = fusion_types 21 | if pooling_type == 'att': 22 | self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1) 23 | self.softmax = nn.Softmax(dim=2) 24 | else: 25 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 26 | if 'channel_add' in fusion_types: 27 | self.channel_add_conv = nn.Sequential( 28 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 29 | nn.LayerNorm([self.planes, 1, 1]), 30 | nn.ReLU(inplace=True), # yapf: disable 31 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 32 | else: 33 | self.channel_add_conv = None 34 | if 'channel_mul' in fusion_types: 35 | self.channel_mul_conv = nn.Sequential( 36 | nn.Conv2d(self.inplanes, self.planes, kernel_size=1), 37 | nn.LayerNorm([self.planes, 1, 1]), 38 | nn.ReLU(inplace=True), # yapf: disable 39 | nn.Conv2d(self.planes, self.inplanes, kernel_size=1)) 40 | else: 41 | self.channel_mul_conv = None 42 | 43 | def spatial_pool(self, x): 44 | batch, channel, height, width = x.size() 45 | if self.pooling_type == 'att': 46 | input_x = x 47 | # [N, C, H * W] 48 | input_x = input_x.view(batch, channel, height * width) 49 | # [N, 1, C, H * W] 50 | input_x = input_x.unsqueeze(1) 51 | # [N, 1, H, W] 52 | context_mask = self.conv_mask(x) 53 | # [N, 1, H * W] 54 | context_mask = context_mask.view(batch, 1, height * width) 55 | # [N, 1, H * W] 56 | context_mask = self.softmax(context_mask) 57 | # [N, 1, H * W, 1] 58 | context_mask = context_mask.unsqueeze(-1) 59 | # [N, 1, C, 1] 60 | context = torch.matmul(input_x, context_mask) 61 | # [N, C, 1, 1] 62 | context = context.view(batch, channel, 1, 1) 63 | else: 64 | # [N, C, 1, 1] 65 | context = self.avg_pool(x) 66 | 67 | return context 68 | 69 | def forward(self, x): 70 | # [N, C, 1, 1] 71 | context = self.spatial_pool(x) 72 | 73 | out = x 74 | if self.channel_mul_conv is not None: 75 | # [N, C, 1, 1] 76 | channel_mul_term = torch.sigmoid(self.channel_mul_conv(context)) 77 | out = out * channel_mul_term 78 | if self.channel_add_conv is not None: 79 | # [N, C, 1, 1] 80 | channel_add_term = self.channel_add_conv(context) 81 | out = out + channel_add_term 82 | 83 | return out 84 | -------------------------------------------------------------------------------- /InstanceSegmentation/PolarMask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3ReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def locLayer(in_channels,out_channels): 12 | return nn.Sequential( 13 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 14 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 15 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 16 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 17 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 18 | ) 19 | 20 | def conf_centernessLayer(in_channels,out_channels): 21 | return nn.Sequential( 22 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 23 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 24 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 25 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 27 | ) 28 | 29 | class PolarMask(nn.Module): 30 | def __init__(self, num_classes=21): 31 | super(PolarMask, self).__init__() 32 | self.num_classes = num_classes 33 | resnet = torchvision.models.resnet50() 34 | layers = list(resnet.children()) 35 | 36 | self.layer1 = nn.Sequential(*layers[:5]) 37 | self.layer2 = nn.Sequential(*layers[5]) 38 | self.layer3 = nn.Sequential(*layers[6]) 39 | self.layer4 = nn.Sequential(*layers[7]) 40 | 41 | self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1) 42 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 43 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 44 | 45 | self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 46 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 47 | 48 | self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 49 | self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 50 | 51 | self.loc_layer3 = locLayer(in_channels=256,out_channels=36) 52 | self.conf_centerness_layer3 = conf_centernessLayer(in_channels=256,out_channels=self.num_classes) 53 | 54 | self.loc_layer4 = locLayer(in_channels=256, out_channels=36) 55 | self.conf_centerness_layer4 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes) 56 | 57 | self.loc_layer5 = locLayer(in_channels=256, out_channels=36) 58 | self.conf_centerness_layer5 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes) 59 | 60 | self.loc_layer6 = locLayer(in_channels=256, out_channels=36) 61 | self.conf_centerness_layer6 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes) 62 | 63 | self.loc_layer7 = locLayer(in_channels=256, out_channels=36) 64 | self.conf_centerness_layer7 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes) 65 | 66 | self.init_params() 67 | 68 | def init_params(self): 69 | for m in self.modules(): 70 | if isinstance(m, nn.Conv2d): 71 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 72 | elif isinstance(m, nn.BatchNorm2d): 73 | nn.init.constant_(m.weight, 1) 74 | nn.init.constant_(m.bias, 0) 75 | 76 | def forward(self, x): 77 | x = self.layer1(x) 78 | c3 =x = self.layer2(x) 79 | c4 =x = self.layer3(x) 80 | c5 = x = self.layer4(x) 81 | 82 | p5 = self.lateral5(c5) 83 | p4 = self.upsample4(p5) + self.lateral4(c4) 84 | p3 = self.upsample3(p4) + self.lateral3(c3) 85 | 86 | p6 = self.downsample5(p5) 87 | p7 = self.downsample6(p6) 88 | 89 | loc3 = self.loc_layer3(p3) 90 | conf_centerness3 = self.conf_centerness_layer3(p3) 91 | conf3, centerness3 = conf_centerness3.split([self.num_classes, 1], dim=1) 92 | 93 | loc4 = self.loc_layer4(p4) 94 | conf_centerness4 = self.conf_centerness_layer4(p4) 95 | conf4, centerness4 = conf_centerness4.split([self.num_classes, 1], dim=1) 96 | 97 | loc5 = self.loc_layer5(p5) 98 | conf_centerness5 = self.conf_centerness_layer5(p5) 99 | conf5, centerness5 = conf_centerness5.split([self.num_classes, 1], dim=1) 100 | 101 | loc6 = self.loc_layer6(p6) 102 | conf_centerness6 = self.conf_centerness_layer6(p6) 103 | conf6, centerness6 = conf_centerness6.split([self.num_classes, 1], dim=1) 104 | 105 | loc7 = self.loc_layer7(p7) 106 | conf_centerness7 = self.conf_centerness_layer7(p7) 107 | conf7, centerness7 = conf_centerness7.split([self.num_classes, 1], dim=1) 108 | 109 | locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1), 110 | loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1), 111 | loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1), 112 | loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1), 113 | loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1) 114 | 115 | confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1), 116 | conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1), 117 | conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1), 118 | conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1), 119 | conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1) 120 | 121 | centernesses = torch.cat([centerness3.permute(0, 2, 3, 1).contiguous().view(centerness3.size(0), -1), 122 | centerness4.permute(0, 2, 3, 1).contiguous().view(centerness4.size(0), -1), 123 | centerness5.permute(0, 2, 3, 1).contiguous().view(centerness5.size(0), -1), 124 | centerness6.permute(0, 2, 3, 1).contiguous().view(centerness6.size(0), -1), 125 | centerness7.permute(0, 2, 3, 1).contiguous().view(centerness7.size(0), -1), ], dim=1) 126 | 127 | out = (locs, confs, centernesses) 128 | return out 129 | 130 | if __name__ == '__main__': 131 | model = PolarMask() 132 | print(model) 133 | 134 | input = torch.randn(1, 3, 800, 1024) 135 | out = model(input) 136 | print(out[0].shape) 137 | print(out[1].shape) 138 | print(out[2].shape) -------------------------------------------------------------------------------- /InstanceSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # InstanceSegmentation-network 2 | Pytorch implementation of InstanceSegmentation-network 3 | 4 | 5 | 6 | **PolarMask :** 7 | 8 | PolarMask: Single Shot Instance Segmentation with Polar Representation ,2019 9 | 10 | https://arxiv.org/pdf/1909.13226.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/101975085 13 | 14 | -------------------------------------------------------------------------------- /Lightweight/MobileNetV1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def BottleneckV1(in_channels, out_channels, stride): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=in_channels,kernel_size=3,stride=stride,padding=1,groups=in_channels), 8 | nn.BatchNorm2d(in_channels), 9 | nn.ReLU6(inplace=True), 10 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 11 | nn.BatchNorm2d(out_channels), 12 | nn.ReLU6(inplace=True) 13 | ) 14 | 15 | class MobileNetV1(nn.Module): 16 | def __init__(self, num_classes=1000): 17 | super(MobileNetV1, self).__init__() 18 | 19 | self.first_conv = nn.Sequential( 20 | nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=2,padding=1), 21 | nn.BatchNorm2d(32), 22 | nn.ReLU6(inplace=True), 23 | ) 24 | 25 | self.bottleneck = nn.Sequential( 26 | BottleneckV1(32, 64, stride=1), 27 | BottleneckV1(64, 128, stride=2), 28 | BottleneckV1(128, 128, stride=1), 29 | BottleneckV1(128, 256, stride=2), 30 | BottleneckV1(256, 256, stride=1), 31 | BottleneckV1(256, 512, stride=2), 32 | BottleneckV1(512, 512, stride=1), 33 | BottleneckV1(512, 512, stride=1), 34 | BottleneckV1(512, 512, stride=1), 35 | BottleneckV1(512, 512, stride=1), 36 | BottleneckV1(512, 512, stride=1), 37 | BottleneckV1(512, 1024, stride=2), 38 | BottleneckV1(1024, 1024, stride=1), 39 | ) 40 | 41 | self.avg_pool = nn.AvgPool2d(kernel_size=7,stride=1) 42 | self.linear = nn.Linear(in_features=1024,out_features=num_classes) 43 | self.dropout = nn.Dropout(p=0.2) 44 | self.softmax = nn.Softmax(dim=1) 45 | 46 | self.init_params() 47 | 48 | def init_params(self): 49 | for m in self.modules(): 50 | if isinstance(m, nn.Conv2d): 51 | nn.init.kaiming_normal_(m.weight) 52 | nn.init.constant_(m.bias,0) 53 | elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d): 54 | nn.init.constant_(m.weight, 1) 55 | nn.init.constant_(m.bias, 0) 56 | 57 | def forward(self, x): 58 | x = self.first_conv(x) 59 | x = self.bottleneck(x) 60 | x = self.avg_pool(x) 61 | x = x.view(x.size(0),-1) 62 | x = self.dropout(x) 63 | x = self.linear(x) 64 | out = self.softmax(x) 65 | return out 66 | 67 | if __name__=='__main__': 68 | model = MobileNetV1() 69 | print(model) 70 | 71 | input = torch.randn(1, 3, 224, 224) 72 | out = model(input) 73 | print(out.shape) 74 | -------------------------------------------------------------------------------- /Lightweight/MobileNetV2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | from functools import reduce 5 | 6 | 7 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups): 8 | return nn.Sequential( 9 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups), 10 | nn.BatchNorm2d(out_channels), 11 | nn.ReLU6(inplace=True) 12 | ) 13 | 14 | def Conv1x1BNReLU(in_channels,out_channels): 15 | return nn.Sequential( 16 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 17 | nn.BatchNorm2d(out_channels), 18 | nn.ReLU6(inplace=True) 19 | ) 20 | 21 | def Conv1x1BN(in_channels,out_channels): 22 | return nn.Sequential( 23 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 24 | nn.BatchNorm2d(out_channels) 25 | ) 26 | 27 | class InvertedResidual(nn.Module): 28 | def __init__(self, in_channels, out_channels, stride, expansion_factor=6): 29 | super(InvertedResidual, self).__init__() 30 | self.stride = stride 31 | mid_channels = (in_channels * expansion_factor) 32 | 33 | self.bottleneck = nn.Sequential( 34 | Conv1x1BNReLU(in_channels, mid_channels), 35 | Conv3x3BNReLU(mid_channels, mid_channels, stride,groups=mid_channels), 36 | Conv1x1BN(mid_channels, out_channels) 37 | ) 38 | 39 | if self.stride == 1: 40 | self.shortcut = Conv1x1BN(in_channels, out_channels) 41 | 42 | def forward(self, x): 43 | out = self.bottleneck(x) 44 | out = (out+self.shortcut(x)) if self.stride==1 else out 45 | return out 46 | 47 | class MobileNetV2(nn.Module): 48 | def __init__(self, num_classes=1000): 49 | super(MobileNetV2,self).__init__() 50 | 51 | self.first_conv = Conv3x3BNReLU(3,32,2,groups=1) 52 | 53 | self.layer1 = self.make_layer(in_channels=32, out_channels=16, stride=1, block_num=1) 54 | self.layer2 = self.make_layer(in_channels=16, out_channels=24, stride=2, block_num=2) 55 | self.layer3 = self.make_layer(in_channels=24, out_channels=32, stride=2, block_num=3) 56 | self.layer4 = self.make_layer(in_channels=32, out_channels=64, stride=2, block_num=4) 57 | self.layer5 = self.make_layer(in_channels=64, out_channels=96, stride=1, block_num=3) 58 | self.layer6 = self.make_layer(in_channels=96, out_channels=160, stride=2, block_num=3) 59 | self.layer7 = self.make_layer(in_channels=160, out_channels=320, stride=1, block_num=1) 60 | 61 | self.last_conv = Conv1x1BNReLU(320,1280) 62 | self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1) 63 | self.dropout = nn.Dropout(p=0.2) 64 | self.linear = nn.Linear(in_features=1280,out_features=num_classes) 65 | 66 | def make_layer(self, in_channels, out_channels, stride, block_num): 67 | layers = [] 68 | layers.append(InvertedResidual(in_channels, out_channels, stride)) 69 | for i in range(1, block_num): 70 | layers.append(InvertedResidual(out_channels,out_channels,1)) 71 | return nn.Sequential(*layers) 72 | 73 | def init_params(self): 74 | for m in self.modules(): 75 | if isinstance(m, nn.Conv2d): 76 | nn.init.kaiming_normal_(m.weight) 77 | nn.init.constant_(m.bias, 0) 78 | elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d): 79 | nn.init.constant_(m.weight, 1) 80 | nn.init.constant_(m.bias, 0) 81 | 82 | def forward(self, x): 83 | x = self.first_conv(x) 84 | x = self.layer1(x) 85 | x = self.layer2(x) 86 | x = self.layer3(x) 87 | x = self.layer4(x) 88 | x = self.layer5(x) 89 | x = self.layer6(x) 90 | x = self.layer7(x) 91 | x = self.last_conv(x) 92 | x = self.avgpool(x) 93 | x = x.view(x.size(0),-1) 94 | x = self.dropout(x) 95 | out = self.linear(x) 96 | return out 97 | 98 | 99 | if __name__=='__main__': 100 | model = MobileNetV2() 101 | # model = torchvision.models.MobileNetV2() 102 | print(model) 103 | 104 | input = torch.randn(1, 3, 224, 224) 105 | out = model(input) 106 | print(out.shape) 107 | -------------------------------------------------------------------------------- /Lightweight/MobileNetXt.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | from functools import reduce 5 | 6 | 7 | def Conv3x3BN(in_channels,out_channels,stride=1,groups=1): 8 | return nn.Sequential( 9 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups), 10 | nn.BatchNorm2d(out_channels) 11 | ) 12 | 13 | def Conv3x3BNReLU(in_channels,out_channels,stride=1,groups=1): 14 | return nn.Sequential( 15 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups), 16 | nn.BatchNorm2d(out_channels), 17 | nn.ReLU6(inplace=True) 18 | ) 19 | 20 | def Conv1x1BN(in_channels,out_channels): 21 | return nn.Sequential( 22 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 23 | nn.BatchNorm2d(out_channels) 24 | ) 25 | 26 | def Conv1x1BNReLU(in_channels,out_channels): 27 | return nn.Sequential( 28 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 29 | nn.BatchNorm2d(out_channels), 30 | nn.ReLU6(inplace=True) 31 | ) 32 | 33 | class SandglassBlock(nn.Module): 34 | def __init__(self, in_channels, out_channels, stride, expansion_factor=6): 35 | super(SandglassBlock, self).__init__() 36 | self.stride = stride 37 | mid_channels = in_channels // expansion_factor 38 | self.identity = stride == 1 and in_channels == out_channels 39 | 40 | self.bottleneck = nn.Sequential( 41 | Conv3x3BNReLU(in_channels, in_channels, 1, groups=in_channels), 42 | Conv1x1BN(in_channels, mid_channels), 43 | Conv1x1BNReLU(mid_channels, out_channels), 44 | Conv3x3BN(out_channels, out_channels, stride, groups=out_channels), 45 | ) 46 | 47 | def forward(self, x): 48 | out = self.bottleneck(x) 49 | if self.identity: 50 | return out + x 51 | else: 52 | return out 53 | 54 | 55 | class MobileNetXt(nn.Module): 56 | def __init__(self, num_classes=1000): 57 | super(MobileNetXt,self).__init__() 58 | 59 | self.first_conv = Conv3x3BNReLU(3,32,2,groups=1) 60 | 61 | self.layer1 = self.make_layer(in_channels=32, out_channels=96, stride=2, expansion_factor=2, block_num=1) 62 | self.layer2 = self.make_layer(in_channels=96, out_channels=144, stride=1, expansion_factor=6, block_num=1) 63 | self.layer3 = self.make_layer(in_channels=144, out_channels=192, stride=2, expansion_factor=6, block_num=3) 64 | self.layer4 = self.make_layer(in_channels=192, out_channels=288, stride=2, expansion_factor=6, block_num=3) 65 | self.layer5 = self.make_layer(in_channels=288, out_channels=384, stride=1, expansion_factor=6, block_num=4) 66 | self.layer6 = self.make_layer(in_channels=384, out_channels=576, stride=2, expansion_factor=6, block_num=4) 67 | self.layer7 = self.make_layer(in_channels=576, out_channels=960, stride=1, expansion_factor=6, block_num=2) 68 | self.layer8 = self.make_layer(in_channels=960, out_channels=1280, stride=1, expansion_factor=6, block_num=1) 69 | 70 | self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1) 71 | self.dropout = nn.Dropout(p=0.2) 72 | self.linear = nn.Linear(in_features=1280,out_features=num_classes) 73 | 74 | def make_layer(self, in_channels, out_channels, stride, expansion_factor, block_num): 75 | layers = [] 76 | layers.append(SandglassBlock(in_channels, out_channels, stride,expansion_factor)) 77 | for i in range(1, block_num): 78 | layers.append(SandglassBlock(out_channels,out_channels,1,expansion_factor)) 79 | return nn.Sequential(*layers) 80 | 81 | def init_params(self): 82 | for m in self.modules(): 83 | if isinstance(m, nn.Conv2d): 84 | nn.init.kaiming_normal_(m.weight) 85 | nn.init.constant_(m.bias, 0) 86 | elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d): 87 | nn.init.constant_(m.weight, 1) 88 | nn.init.constant_(m.bias, 0) 89 | 90 | def forward(self, x): 91 | x = self.first_conv(x) 92 | x = self.layer1(x) 93 | x = self.layer2(x) 94 | x = self.layer3(x) 95 | x = self.layer4(x) 96 | x = self.layer5(x) 97 | x = self.layer6(x) 98 | x = self.layer7(x) 99 | x = self.layer8(x) 100 | x = self.avgpool(x) 101 | x = x.view(x.size(0),-1) 102 | x = self.dropout(x) 103 | out = self.linear(x) 104 | return out 105 | 106 | 107 | if __name__=='__main__': 108 | model = MobileNetXt() 109 | print(model) 110 | 111 | input = torch.randn(1, 3, 224, 224) 112 | out = model(input) 113 | print(out.shape) 114 | -------------------------------------------------------------------------------- /Lightweight/README.md: -------------------------------------------------------------------------------- 1 | # Lightweight-network 2 | 3 | Lightweight network PyTorch实现 4 | 5 | 6 | 7 | ## MobileNets: 8 | 9 | **MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications** 10 | 11 | 12 | 13 | ## MobileNetV2: 14 | 15 | **MobileNetV2: Inverted Residuals and Linear Bottlenecks** 16 | 17 | 18 | 19 | ## MobileNetV3: 20 | 21 | **Searching for MobileNetV3** 22 | 23 | 24 | ## ShuffleNet: 25 | 26 | **ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices** 27 | 28 | 29 | ## ShuffleNet V2: 30 | 31 | **ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design** 32 | 33 | 34 | 35 | ## SqueezeNet 36 | 37 | **SqueezeNet:AlexNet-level accuracy with 50x fewer parameters and < 0.5MB Model Size** 38 | 39 | ## Xception 40 | 41 | **Xception: Deep Learning with Depthwise Separable Convolutions** 42 | 43 | 44 | 45 | ## MixNet 46 | 47 | **MixNet: Mixed Depthwise Convolutional Kernels** -------------------------------------------------------------------------------- /Lightweight/ShuffleNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | def Conv1x1BNReLU(in_channels,out_channels,groups): 13 | return nn.Sequential( 14 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 15 | nn.BatchNorm2d(out_channels), 16 | nn.ReLU6(inplace=True) 17 | ) 18 | 19 | def Conv1x1BN(in_channels,out_channels,groups): 20 | return nn.Sequential( 21 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 22 | nn.BatchNorm2d(out_channels) 23 | ) 24 | 25 | class ChannelShuffle(nn.Module): 26 | def __init__(self, groups): 27 | super(ChannelShuffle, self).__init__() 28 | self.groups = groups 29 | 30 | def forward(self, x): 31 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 32 | N, C, H, W = x.size() 33 | g = self.groups 34 | return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W) 35 | 36 | 37 | class ShuffleNetUnits(nn.Module): 38 | def __init__(self, in_channels, out_channels, stride,groups): 39 | super(ShuffleNetUnits, self).__init__() 40 | self.stride = stride 41 | out_channels = out_channels - in_channels if self.stride >1 else out_channels 42 | mid_channels = out_channels // 4 43 | 44 | self.bottleneck = nn.Sequential( 45 | Conv1x1BNReLU(in_channels, mid_channels,groups), 46 | ChannelShuffle(groups), 47 | Conv3x3BNReLU(mid_channels, mid_channels, stride,groups), 48 | Conv1x1BN(mid_channels, out_channels,groups) 49 | ) 50 | if self.stride>1: 51 | self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 52 | 53 | self.relu = nn.ReLU6(inplace=True) 54 | 55 | def forward(self, x): 56 | out = self.bottleneck(x) 57 | out = torch.cat([self.shortcut(x),out],dim=1) if self.stride >1 else (out + x) 58 | return self.relu(out) 59 | 60 | class ShuffleNet(nn.Module): 61 | def __init__(self, planes, layers, groups, num_classes=1000): 62 | super(ShuffleNet, self).__init__() 63 | 64 | self.stage1 = nn.Sequential( 65 | Conv3x3BNReLU(in_channels=3,out_channels=24,stride=2, groups=1), 66 | nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 67 | ) 68 | 69 | self.stage2 = self._make_layer(24,planes[0], groups, layers[0], True) 70 | self.stage3 = self._make_layer(planes[0],planes[1], groups, layers[1], False) 71 | self.stage4 = self._make_layer(planes[1],planes[2], groups, layers[2], False) 72 | 73 | self.global_pool = nn.AvgPool2d(kernel_size=7, stride=1) 74 | self.dropout = nn.Dropout(p=0.2) 75 | self.linear = nn.Linear(in_features=planes[2]*7*7, out_features=num_classes) 76 | 77 | self.init_params() 78 | 79 | def _make_layer(self, in_channels,out_channels, groups, block_num, is_stage2): 80 | layers = [] 81 | layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=1 if is_stage2 else groups)) 82 | for idx in range(1, block_num): 83 | layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups)) 84 | return nn.Sequential(*layers) 85 | 86 | def init_params(self): 87 | for m in self.modules(): 88 | if isinstance(m,nn.Conv2d): 89 | nn.init.kaiming_normal_(m.weight) 90 | nn.init.constant_(m.bias,0) 91 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): 92 | nn.init.constant_(m.weight,1) 93 | nn.init.constant_(m.bias, 0) 94 | 95 | def forward(self, x): 96 | x = self.stage1(x) 97 | x = self.stage2(x) 98 | x = self.stage3(x) 99 | x = self.stage4(x) 100 | 101 | x = x.view(x.size(0), -1) 102 | x = self.dropout(x) 103 | out = self.linear(x) 104 | return out 105 | 106 | def shufflenet_g8(**kwargs): 107 | planes = [384, 768, 1536] 108 | layers = [4, 8, 4] 109 | model = ShuffleNet(planes, layers, groups=8) 110 | return model 111 | 112 | def shufflenet_g4(**kwargs): 113 | planes = [272, 544, 1088] 114 | layers = [4, 8, 4] 115 | model = ShuffleNet(planes, layers, groups=4) 116 | return model 117 | 118 | def shufflenet_g3(**kwargs): 119 | planes = [240, 480, 960] 120 | layers = [4, 8, 4] 121 | model = ShuffleNet(planes, layers, groups=3) 122 | return model 123 | 124 | def shufflenet_g2(**kwargs): 125 | planes = [200, 400, 800] 126 | layers = [4, 8, 4] 127 | model = ShuffleNet(planes, layers, groups=2) 128 | return model 129 | 130 | def shufflenet_g1(**kwargs): 131 | planes = [144, 288, 576] 132 | layers = [4, 8, 4] 133 | model = ShuffleNet(planes, layers, groups=1) 134 | return model 135 | 136 | if __name__ == '__main__': 137 | model = shufflenet_g1() 138 | print(model) 139 | 140 | input = torch.randn(1, 3, 224, 224) 141 | out = model(input) 142 | print(out.shape) -------------------------------------------------------------------------------- /Lightweight/ShuffleNetV2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True) 10 | ) 11 | 12 | def Conv3x3BN(in_channels,out_channels,stride,groups): 13 | return nn.Sequential( 14 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups), 15 | nn.BatchNorm2d(out_channels) 16 | ) 17 | 18 | def Conv1x1BNReLU(in_channels,out_channels): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU6(inplace=True) 23 | ) 24 | 25 | def Conv1x1BN(in_channels,out_channels): 26 | return nn.Sequential( 27 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 28 | nn.BatchNorm2d(out_channels) 29 | ) 30 | 31 | class HalfSplit(nn.Module): 32 | def __init__(self, dim=0, first_half=True): 33 | super(HalfSplit, self).__init__() 34 | self.first_half = first_half 35 | self.dim = dim 36 | 37 | def forward(self, input): 38 | splits = torch.chunk(input, 2, dim=self.dim) 39 | return splits[0] if self.first_half else splits[1] 40 | 41 | class ChannelShuffle(nn.Module): 42 | def __init__(self, groups): 43 | super(ChannelShuffle, self).__init__() 44 | self.groups = groups 45 | 46 | def forward(self, x): 47 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 48 | N, C, H, W = x.size() 49 | g = self.groups 50 | return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W) 51 | 52 | class ShuffleNetUnits(nn.Module): 53 | def __init__(self, in_channels, out_channels, stride, groups): 54 | super(ShuffleNetUnits, self).__init__() 55 | self.stride = stride 56 | if self.stride > 1: 57 | mid_channels = out_channels - in_channels 58 | else: 59 | mid_channels = out_channels // 2 60 | in_channels = mid_channels 61 | self.first_half = HalfSplit(dim=1, first_half=True) 62 | self.second_split = HalfSplit(dim=1, first_half=False) 63 | 64 | self.bottleneck = nn.Sequential( 65 | Conv1x1BNReLU(in_channels, in_channels), 66 | Conv3x3BN(in_channels, mid_channels, stride, groups), 67 | Conv1x1BNReLU(mid_channels, mid_channels) 68 | ) 69 | 70 | if self.stride > 1: 71 | self.shortcut = nn.Sequential( 72 | Conv3x3BN(in_channels=in_channels, out_channels=in_channels, stride=stride, groups=groups), 73 | Conv1x1BNReLU(in_channels, in_channels) 74 | ) 75 | 76 | self.channel_shuffle = ChannelShuffle(groups) 77 | 78 | def forward(self, x): 79 | if self.stride > 1: 80 | x1 = self.bottleneck(x) 81 | x2 = self.shortcut(x) 82 | else: 83 | x1 = self.first_half(x) 84 | x2 = self.second_split(x) 85 | x1 = self.bottleneck(x1) 86 | 87 | out = torch.cat([x1, x2], dim=1) 88 | out = self.channel_shuffle(out) 89 | return out 90 | 91 | class ShuffleNetV2(nn.Module): 92 | def __init__(self, planes, layers, groups, num_classes=1000): 93 | super(ShuffleNetV2, self).__init__() 94 | self.groups = groups 95 | self.stage1 = nn.Sequential( 96 | Conv3x3BNReLU(in_channels=3, out_channels=24, stride=2, groups=1), 97 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 98 | ) 99 | 100 | self.stage2 = self._make_layer(24, planes[0], layers[0], True) 101 | self.stage3 = self._make_layer(planes[0], planes[1], layers[1], False) 102 | self.stage4 = self._make_layer(planes[1], planes[2], layers[2], False) 103 | 104 | self.global_pool = nn.AdaptiveAvgPool2d(1) 105 | self.dropout = nn.Dropout(p=0.2) 106 | self.linear = nn.Linear(in_features=planes[2], out_features=num_classes) 107 | 108 | self.init_params() 109 | 110 | def _make_layer(self, in_channels, out_channels, block_num, is_stage2): 111 | layers = [] 112 | layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride= 2, groups=1 if is_stage2 else self.groups)) 113 | for idx in range(1, block_num): 114 | layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=self.groups)) 115 | return nn.Sequential(*layers) 116 | 117 | def init_params(self): 118 | for m in self.modules(): 119 | if isinstance(m, nn.Conv2d): 120 | nn.init.kaiming_normal_(m.weight) 121 | nn.init.constant_(m.bias, 0) 122 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): 123 | nn.init.constant_(m.weight, 1) 124 | nn.init.constant_(m.bias, 0) 125 | 126 | def forward(self, x): 127 | x = self.stage1(x) 128 | x = self.stage2(x) 129 | x = self.stage3(x) 130 | x = self.stage4(x) 131 | x = self.global_pool(x) 132 | x = x.view(x.size(0), -1) 133 | x = self.dropout(x) 134 | out = self.linear(x) 135 | return out 136 | 137 | def shufflenet_v2_x2_0(**kwargs): 138 | planes = [244, 488, 976] 139 | layers = [4, 8, 4] 140 | model = ShuffleNetV2(planes, layers, 1) 141 | return model 142 | 143 | def shufflenet_v2_x1_5(**kwargs): 144 | planes = [176, 352, 704] 145 | layers = [4, 8, 4] 146 | model = ShuffleNetV2(planes, layers, 1) 147 | return model 148 | 149 | def shufflenet_v2_x1_0(**kwargs): 150 | planes = [116, 232, 464] 151 | layers = [4, 8, 4] 152 | model = ShuffleNetV2(planes, layers, 1) 153 | return model 154 | 155 | def shufflenet_v2_x0_5(**kwargs): 156 | planes = [48, 96, 192] 157 | layers = [4, 8, 4] 158 | model = ShuffleNetV2(planes, layers, 1) 159 | return model 160 | 161 | if __name__ == '__main__': 162 | model = shufflenet_v2_x2_0() 163 | print(model) 164 | 165 | input = torch.randn(1, 3, 224, 224) 166 | out = model(input) 167 | print(out.shape) -------------------------------------------------------------------------------- /Lightweight/SqueezeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FireModule(nn.Module): 6 | def __init__(self, in_channels, out_channels, mid_channels=None): 7 | super(FireModule, self).__init__() 8 | mid_channels = out_channels//4 9 | 10 | self.squeeze = nn.Conv2d(in_channels=in_channels,out_channels=mid_channels,kernel_size=1,stride=1) 11 | self.squeeze_relu = nn.ReLU6(inplace=True) 12 | 13 | self.expand3x3 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=1,padding=1) 14 | self.expand3x3_relu = nn.ReLU6(inplace=True) 15 | 16 | self.expand1x1 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1) 17 | self.expand1x1_relu = nn.ReLU6(inplace=True) 18 | 19 | def forward(self, x): 20 | x = self.squeeze_relu(self.squeeze(x)) 21 | y = self.expand3x3_relu(self.expand3x3(x)) 22 | z = self.expand1x1_relu(self.expand1x1(x)) 23 | out = torch.cat([y, z],dim=1) 24 | return out 25 | 26 | class SqueezeNet(nn.Module): 27 | def __init__(self, num_classes = 1000): 28 | super(SqueezeNet, self).__init__() 29 | 30 | self.bottleneck = nn.Sequential( 31 | nn.Conv2d(in_channels=3, out_channels=96,kernel_size=7,stride=2,padding=3), 32 | nn.BatchNorm2d(96), 33 | nn.ReLU6(inplace=True), 34 | nn.MaxPool2d(kernel_size=3,stride=2), 35 | 36 | FireModule(in_channels=96, out_channels=64), 37 | FireModule(in_channels=128, out_channels=64), 38 | FireModule(in_channels=128, out_channels=128), 39 | nn.MaxPool2d(kernel_size=3,stride=2), 40 | 41 | FireModule(in_channels=256, out_channels=128), 42 | FireModule(in_channels=256, out_channels=192), 43 | FireModule(in_channels=384, out_channels=192), 44 | FireModule(in_channels=384, out_channels=256), 45 | nn.MaxPool2d(kernel_size=3, stride=2), 46 | 47 | FireModule(in_channels=512, out_channels=256), 48 | nn.Dropout(p=0.5), 49 | nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1), 50 | nn.ReLU(inplace=True), 51 | nn.AvgPool2d(kernel_size=13, stride=1), 52 | ) 53 | 54 | def forward(self, x): 55 | out = self.bottleneck(x) 56 | return out.view(out.size(1),-1) 57 | 58 | if __name__ == '__main__': 59 | model = SqueezeNet() 60 | print(model) 61 | 62 | input = torch.rand(1,3,224,224) 63 | out = model(input) 64 | print(out.shape) 65 | 66 | -------------------------------------------------------------------------------- /Lightweight/Xception.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def ConvBN(in_channels,out_channels,kernel_size,stride): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=0 if kernel_size==1 else (kernel_size-1)//2), 8 | nn.BatchNorm2d(out_channels), 9 | ) 10 | 11 | def ConvBNRelu(in_channels,out_channels,kernel_size,stride): 12 | return nn.Sequential( 13 | ConvBN(in_channels, out_channels, kernel_size, stride), 14 | nn.ReLU6(inplace=True), 15 | ) 16 | 17 | def SeparableConvolution(in_channels, out_channels): 18 | return nn.Sequential( 19 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3, stride=1,padding=1,groups=in_channels), 20 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0), 21 | ) 22 | def SeparableConvolutionRelu(in_channels, out_channels): 23 | return nn.Sequential( 24 | SeparableConvolution(in_channels, out_channels), 25 | nn.ReLU6(inplace=True), 26 | ) 27 | 28 | def ReluSeparableConvolution(in_channels, out_channels): 29 | return nn.Sequential( 30 | nn.ReLU6(inplace=True), 31 | SeparableConvolution(in_channels, out_channels) 32 | ) 33 | 34 | class EntryBottleneck(nn.Module): 35 | def __init__(self, in_channels, out_channels, first_relu=True): 36 | super(EntryBottleneck, self).__init__() 37 | mid_channels = out_channels 38 | 39 | self.shortcut = ConvBN(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=2) 40 | 41 | self.bottleneck = nn.Sequential( 42 | ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels) if first_relu else SeparableConvolution(in_channels=in_channels,out_channels=mid_channels), 43 | ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels), 44 | nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 45 | ) 46 | 47 | def forward(self, x): 48 | out = self.shortcut(x) 49 | x = self.bottleneck(x) 50 | return out+x 51 | 52 | 53 | class MiddleBottleneck(nn.Module): 54 | def __init__(self, in_channels, out_channels): 55 | super(MiddleBottleneck, self).__init__() 56 | mid_channels = out_channels 57 | 58 | self.bottleneck = nn.Sequential( 59 | ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels), 60 | ReluSeparableConvolution(in_channels=mid_channels, out_channels=mid_channels), 61 | ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels), 62 | ) 63 | 64 | def forward(self, x): 65 | out = self.bottleneck(x) 66 | return out+x 67 | 68 | class ExitBottleneck(nn.Module): 69 | def __init__(self, in_channels, out_channels): 70 | super(ExitBottleneck, self).__init__() 71 | mid_channels = in_channels 72 | 73 | self.shortcut = ConvBN(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=2) 74 | 75 | self.bottleneck = nn.Sequential( 76 | ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels), 77 | ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels), 78 | nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 79 | ) 80 | 81 | def forward(self, x): 82 | out = self.shortcut(x) 83 | x = self.bottleneck(x) 84 | return out+x 85 | 86 | class Xception(nn.Module): 87 | def __init__(self, num_classes=1000): 88 | super(Xception, self).__init__() 89 | 90 | self.entryFlow = nn.Sequential( 91 | ConvBNRelu(in_channels=3, out_channels=32, kernel_size=3, stride=2), 92 | ConvBNRelu(in_channels=32, out_channels=64, kernel_size=3, stride=1), 93 | EntryBottleneck(in_channels=64, out_channels=128, first_relu=False), 94 | EntryBottleneck(in_channels=128, out_channels=256, first_relu=True), 95 | EntryBottleneck(in_channels=256, out_channels=728, first_relu=True), 96 | ) 97 | self.middleFlow = nn.Sequential( 98 | MiddleBottleneck(in_channels=728,out_channels=728), 99 | MiddleBottleneck(in_channels=728, out_channels=728), 100 | MiddleBottleneck(in_channels=728, out_channels=728), 101 | MiddleBottleneck(in_channels=728, out_channels=728), 102 | MiddleBottleneck(in_channels=728, out_channels=728), 103 | MiddleBottleneck(in_channels=728, out_channels=728), 104 | MiddleBottleneck(in_channels=728, out_channels=728), 105 | MiddleBottleneck(in_channels=728, out_channels=728), 106 | ) 107 | self.exitFlow = nn.Sequential( 108 | ExitBottleneck(in_channels=728, out_channels=1024), 109 | SeparableConvolutionRelu(in_channels=1024, out_channels=1536), 110 | SeparableConvolutionRelu(in_channels=1536, out_channels=2048), 111 | nn.AdaptiveAvgPool2d((1,1)), 112 | ) 113 | 114 | self.linear = nn.Linear(2048, num_classes) 115 | 116 | def forward(self, x): 117 | x = self.entryFlow(x) 118 | x = self.middleFlow(x) 119 | x = self.exitFlow(x) 120 | x = x.view(x.size(0), -1) 121 | out = self.linear(x) 122 | return out 123 | 124 | 125 | if __name__ == '__main__': 126 | model = Xception() 127 | print(model) 128 | 129 | input = torch.randn(1,3,299,299) 130 | output = model(input) 131 | print(output.shape) -------------------------------------------------------------------------------- /ObjectDetection/ASFF.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv1x1BnRelu(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False), 8 | nn.BatchNorm2d(out_channels), 9 | nn.ReLU6(inplace=True), 10 | ) 11 | 12 | def upSampling1(in_channels,out_channels): 13 | return nn.Sequential( 14 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0,bias=False), 15 | nn.BatchNorm2d(out_channels), 16 | nn.ReLU6(inplace=True), 17 | nn.Upsample(scale_factor=2, mode='nearest') 18 | ) 19 | 20 | def upSampling2(in_channels,out_channels): 21 | return nn.Sequential( 22 | upSampling1(in_channels,out_channels), 23 | nn.Upsample(scale_factor=2, mode='nearest'), 24 | ) 25 | 26 | def downSampling1(in_channels,out_channels): 27 | return nn.Sequential( 28 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1, bias=False), 29 | nn.BatchNorm2d(out_channels), 30 | nn.ReLU6(inplace=True), 31 | ) 32 | 33 | def downSampling2(in_channels,out_channels): 34 | return nn.Sequential( 35 | nn.MaxPool2d(kernel_size=3, stride=2,padding=1), 36 | downSampling1(in_channels=in_channels, out_channels=out_channels), 37 | ) 38 | 39 | class ASFF(nn.Module): 40 | def __init__(self, level, channel1, channel2, channel3, out_channel): 41 | super(ASFF, self).__init__() 42 | self.level = level 43 | funsed_channel = 8 44 | 45 | if self.level == 1: 46 | # level = 1: 47 | self.level2_1 = downSampling1(channel2,channel1) 48 | self.level3_1 = downSampling2(channel3,channel1) 49 | 50 | self.weight1 = Conv1x1BnRelu(channel1, funsed_channel) 51 | self.weight2 = Conv1x1BnRelu(channel1, funsed_channel) 52 | self.weight3 = Conv1x1BnRelu(channel1, funsed_channel) 53 | 54 | self.expand_conv = Conv1x1BnRelu(channel1,out_channel) 55 | 56 | if self.level == 2: 57 | # level = 2: 58 | self.level1_2 = upSampling1(channel1,channel2) 59 | self.level3_2 = downSampling1(channel3,channel2) 60 | 61 | self.weight1 = Conv1x1BnRelu(channel2, funsed_channel) 62 | self.weight2 = Conv1x1BnRelu(channel2, funsed_channel) 63 | self.weight3 = Conv1x1BnRelu(channel2, funsed_channel) 64 | 65 | self.expand_conv = Conv1x1BnRelu(channel2, out_channel) 66 | 67 | if self.level == 3: 68 | # level = 3: 69 | self.level1_3 = upSampling2(channel1,channel3) 70 | self.level2_3 = upSampling1(channel2,channel3) 71 | 72 | self.weight1 = Conv1x1BnRelu(channel3, funsed_channel) 73 | self.weight2 = Conv1x1BnRelu(channel3, funsed_channel) 74 | self.weight3 = Conv1x1BnRelu(channel3, funsed_channel) 75 | 76 | self.expand_conv = Conv1x1BnRelu(channel3, out_channel) 77 | 78 | self.weight_level = nn.Conv2d(funsed_channel * 3, 3, kernel_size=1, stride=1, padding=0) 79 | 80 | self.softmax = nn.Softmax(dim=1) 81 | 82 | 83 | def forward(self, x, y, z): 84 | if self.level == 1: 85 | level_x = x 86 | level_y = self.level2_1(y) 87 | level_z = self.level3_1(z) 88 | 89 | if self.level == 2: 90 | level_x = self.level1_2(x) 91 | level_y = y 92 | level_z = self.level3_2(z) 93 | 94 | if self.level == 3: 95 | level_x = self.level1_3(x) 96 | level_y = self.level2_3(y) 97 | level_z = z 98 | 99 | weight1 = self.weight1(level_x) 100 | weight2 = self.weight2(level_y) 101 | weight3 = self.weight3(level_z) 102 | 103 | level_weight = torch.cat((weight1, weight2, weight3), 1) 104 | weight_level = self.weight_level(level_weight) 105 | weight_level = self.softmax(weight_level) 106 | 107 | fused_level = level_x * weight_level[:,0,:,:] + level_y * weight_level[:,1,:,:] + level_z * weight_level[:,2,:,:] 108 | out = self.expand_conv(fused_level) 109 | return out 110 | 111 | if __name__ == '__main__': 112 | model = ASFF(level=3, channel1=512, channel2=256, channel3=128, out_channel=128) 113 | print(model) 114 | 115 | x = torch.randn(1, 512, 16, 16) 116 | y = torch.randn(1, 256, 32, 32) 117 | z = torch.randn(1, 128, 64, 64) 118 | out = model(x,y,z) 119 | print(out.shape) -------------------------------------------------------------------------------- /ObjectDetection/CenterNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | 6 | 7 | 8 | 9 | 10 | if __name__ == '__main__': 11 | model = YOLO() 12 | print(model) 13 | 14 | data = torch.randn(1,3,448,448) 15 | output = model(data) 16 | print(output.shape) -------------------------------------------------------------------------------- /ObjectDetection/CornerNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=1): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size//2), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | class ResidualBlock(nn.Module): 12 | def __init__(self, in_channels, out_channels): 13 | super(ResidualBlock, self).__init__() 14 | mid_channels = out_channels//2 15 | 16 | self.bottleneck = nn.Sequential( 17 | ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1), 18 | ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1), 19 | ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1), 20 | ) 21 | self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1) 22 | 23 | def forward(self, x): 24 | out = self.bottleneck(x) 25 | return out+self.shortcut(x) 26 | 27 | 28 | class left_pool(torch.autograd.Function): 29 | def forward(self, input_): 30 | self.save_for_backward(input_.clone()) 31 | output = torch.zeros_like(input_) 32 | batch = input_.size(0) 33 | width = input_.size(3) 34 | 35 | input_tmp = input_.select(3, width - 1) 36 | output.select(3, width - 1).copy_(input_tmp) 37 | 38 | for idx in range(1, width): 39 | input_tmp = input_.select(3, width - idx - 1) 40 | output_tmp = output.select(3, width - idx) 41 | cmp_tmp = torch.cat((input_tmp.view(batch, 1, -1), output_tmp.view(batch, 1, -1)), 1).max(1)[0] 42 | output.select(3, width - idx - 1).copy_(cmp_tmp.view_as(input_tmp)) 43 | 44 | return output 45 | 46 | def backward(self, grad_output): 47 | input_, = self.saved_tensors 48 | output = torch.zeros_like(input_) 49 | 50 | grad_output = grad_output.clone() 51 | res = torch.zeros_like(grad_output) 52 | 53 | w = input_.size(3) 54 | batch = input_.size(0) 55 | 56 | output_tmp = res.select(3, w - 1) 57 | grad_output_tmp = grad_output.select(3, w - 1) 58 | output_tmp.copy_(grad_output_tmp) 59 | 60 | input_tmp = input_.select(3, w - 1) 61 | output.select(3, w - 1).copy_(input_tmp) 62 | 63 | for idx in range(1, w): 64 | input_tmp = input_.select(3, w - idx - 1) 65 | output_tmp = output.select(3, w - idx) 66 | cmp_tmp = torch.cat((input_tmp.view(batch, 1, -1), output_tmp.view(batch, 1, -1)), 1).max(1)[0] 67 | output.select(3, w - idx - 1).copy_(cmp_tmp.view_as(input_tmp)) 68 | 69 | grad_output_tmp = grad_output.select(3, w - idx - 1) 70 | res_tmp = res.select(3, w - idx) 71 | com_tmp = comp(input_tmp, output_tmp, grad_output_tmp, res_tmp) 72 | res.select(3, w - idx - 1).copy_(com_tmp) 73 | return res 74 | 75 | class HourglassNetwork(nn.Module): 76 | def __init__(self): 77 | super(HourglassNetwork, self).__init__() 78 | 79 | def forward(self, x): 80 | return out 81 | 82 | class PredictionModule(nn.Module): 83 | def __init__(self): 84 | super(PredictionModule, self).__init__() 85 | 86 | def forward(self, x): 87 | return out 88 | 89 | 90 | class CornerNet(nn.Module): 91 | def __init__(self): 92 | super(CornerNet, self).__init__() 93 | 94 | def forward(self, x): 95 | return out 96 | 97 | 98 | if __name__ == '__main__': 99 | model = CornerNet() 100 | print(model) 101 | 102 | data = torch.randn(1,3,511,511) 103 | output = model(data) 104 | print(output.shape) -------------------------------------------------------------------------------- /ObjectDetection/FCOS.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3ReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def locLayer(in_channels,out_channels): 12 | return nn.Sequential( 13 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 14 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 15 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 16 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 17 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 18 | ) 19 | 20 | def conf_centernessLayer(in_channels,out_channels): 21 | return nn.Sequential( 22 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 23 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 24 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 25 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 27 | ) 28 | 29 | class FCOS(nn.Module): 30 | def __init__(self, num_classes=21): 31 | super(FCOS, self).__init__() 32 | self.num_classes = num_classes 33 | resnet = torchvision.models.resnet50() 34 | layers = list(resnet.children()) 35 | 36 | self.layer1 = nn.Sequential(*layers[:5]) 37 | self.layer2 = nn.Sequential(*layers[5]) 38 | self.layer3 = nn.Sequential(*layers[6]) 39 | self.layer4 = nn.Sequential(*layers[7]) 40 | 41 | self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1) 42 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 43 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 44 | 45 | self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 46 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 47 | 48 | self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 49 | self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 50 | 51 | self.loc_layer3 = locLayer(in_channels=256,out_channels=4) 52 | self.conf_centerness_layer3 = conf_centernessLayer(in_channels=256,out_channels=self.num_classes+1) 53 | 54 | self.loc_layer4 = locLayer(in_channels=256, out_channels=4) 55 | self.conf_centerness_layer4 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1) 56 | 57 | self.loc_layer5 = locLayer(in_channels=256, out_channels=4) 58 | self.conf_centerness_layer5 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1) 59 | 60 | self.loc_layer6 = locLayer(in_channels=256, out_channels=4) 61 | self.conf_centerness_layer6 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1) 62 | 63 | self.loc_layer7 = locLayer(in_channels=256, out_channels=4) 64 | self.conf_centerness_layer7 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1) 65 | 66 | self.init_params() 67 | 68 | def init_params(self): 69 | for m in self.modules(): 70 | if isinstance(m, nn.Conv2d): 71 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 72 | elif isinstance(m, nn.BatchNorm2d): 73 | nn.init.constant_(m.weight, 1) 74 | nn.init.constant_(m.bias, 0) 75 | 76 | def forward(self, x): 77 | x = self.layer1(x) 78 | c3 =x = self.layer2(x) 79 | c4 =x = self.layer3(x) 80 | c5 = x = self.layer4(x) 81 | 82 | p5 = self.lateral5(c5) 83 | p4 = self.upsample4(p5) + self.lateral4(c4) 84 | p3 = self.upsample3(p4) + self.lateral3(c3) 85 | 86 | p6 = self.downsample5(p5) 87 | p7 = self.downsample6(p6) 88 | 89 | loc3 = self.loc_layer3(p3) 90 | conf_centerness3 = self.conf_centerness_layer3(p3) 91 | conf3, centerness3 = conf_centerness3.split([self.num_classes, 1], dim=1) 92 | 93 | loc4 = self.loc_layer4(p4) 94 | conf_centerness4 = self.conf_centerness_layer4(p4) 95 | conf4, centerness4 = conf_centerness4.split([self.num_classes, 1], dim=1) 96 | 97 | loc5 = self.loc_layer5(p5) 98 | conf_centerness5 = self.conf_centerness_layer5(p5) 99 | conf5, centerness5 = conf_centerness5.split([self.num_classes, 1], dim=1) 100 | 101 | loc6 = self.loc_layer6(p6) 102 | conf_centerness6 = self.conf_centerness_layer6(p6) 103 | conf6, centerness6 = conf_centerness6.split([self.num_classes, 1], dim=1) 104 | 105 | loc7 = self.loc_layer7(p7) 106 | conf_centerness7 = self.conf_centerness_layer7(p7) 107 | conf7, centerness7 = conf_centerness7.split([self.num_classes, 1], dim=1) 108 | 109 | locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1), 110 | loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1), 111 | loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1), 112 | loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1), 113 | loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1) 114 | 115 | confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1), 116 | conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1), 117 | conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1), 118 | conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1), 119 | conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1) 120 | 121 | centernesses = torch.cat([centerness3.permute(0, 2, 3, 1).contiguous().view(centerness3.size(0), -1), 122 | centerness4.permute(0, 2, 3, 1).contiguous().view(centerness4.size(0), -1), 123 | centerness5.permute(0, 2, 3, 1).contiguous().view(centerness5.size(0), -1), 124 | centerness6.permute(0, 2, 3, 1).contiguous().view(centerness6.size(0), -1), 125 | centerness7.permute(0, 2, 3, 1).contiguous().view(centerness7.size(0), -1), ], dim=1) 126 | 127 | out = (locs, confs, centernesses) 128 | return out 129 | 130 | if __name__ == '__main__': 131 | model = FCOS() 132 | print(model) 133 | 134 | input = torch.randn(1, 3, 800, 1024) 135 | out = model(input) 136 | print(out[0].shape) 137 | print(out[1].shape) 138 | print(out[2].shape) -------------------------------------------------------------------------------- /ObjectDetection/FPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FPN(nn.Module): 6 | def __init__(self): 7 | super(FPN, self).__init__() 8 | resnet = torchvision.models.resnet50() 9 | layers = list(resnet.children()) 10 | 11 | self.layer1 = nn.Sequential(*layers[:5]) 12 | self.layer2 = nn.Sequential(*layers[5]) 13 | self.layer3 = nn.Sequential(*layers[6]) 14 | self.layer4 = nn.Sequential(*layers[7]) 15 | 16 | self.lateral5 = nn.Conv2d(in_channels=2048,out_channels=256,kernel_size=1) 17 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 18 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 19 | self.lateral2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1) 20 | 21 | self.upsample2 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 22 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 23 | self.upsample4 = nn.ConvTranspose2d(in_channels=256,out_channels=256, kernel_size=4, stride=2, padding=1) 24 | 25 | self.smooth2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 26 | self.smooth3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 27 | self.smooth4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) 28 | 29 | self.init_params() 30 | 31 | def init_params(self): 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv2d): 34 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 35 | elif isinstance(m, nn.BatchNorm2d): 36 | nn.init.constant_(m.weight, 1) 37 | nn.init.constant_(m.bias, 0) 38 | 39 | def forward(self, x): 40 | c2 = x = self.layer1(x) 41 | c3 = x = self.layer2(x) 42 | c4 = x = self.layer3(x) 43 | c5 = x = self.layer4(x) 44 | 45 | p5 = self.lateral5(c5) 46 | p4 = self.upsample4(p5)+ self.lateral4(c4) 47 | p3 = self.upsample3(p4)+ self.lateral3(c3) 48 | p2 = self.upsample2(p3)+ self.lateral2(c2) 49 | 50 | p4 = self.smooth4(p4) 51 | p3 = self.smooth3(p3) 52 | p2 = self.smooth4(p2) 53 | return p2,p3,p4,p5 54 | 55 | if __name__ == '__main__': 56 | model = FPN() 57 | print(model) 58 | 59 | input = torch.randn(1, 3, 224, 224) 60 | p2, p3, p4, p5 = model(input) 61 | print(p2.shape) 62 | print(p3.shape) 63 | print(p4.shape) 64 | print(p5.shape) -------------------------------------------------------------------------------- /ObjectDetection/FSAF.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shanglianlm0525/PyTorch-Networks/a6b6dd4b7876ba8473a08e116485a0492a88cd48/ObjectDetection/FSAF.py -------------------------------------------------------------------------------- /ObjectDetection/FisheyeMODNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels,groups): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | def Conv1x1BN(in_channels,out_channels,groups): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 21 | nn.BatchNorm2d(out_channels) 22 | ) 23 | 24 | class ChannelShuffle(nn.Module): 25 | def __init__(self, groups): 26 | super(ChannelShuffle, self).__init__() 27 | self.groups = groups 28 | 29 | def forward(self, x): 30 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 31 | N, C, H, W = x.size() 32 | g = self.groups 33 | return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W) 34 | 35 | 36 | class ShuffleNetUnits(nn.Module): 37 | def __init__(self, in_channels, out_channels, stride, groups): 38 | super(ShuffleNetUnits, self).__init__() 39 | self.stride = stride 40 | out_channels = out_channels - in_channels if self.stride>1 else out_channels 41 | mid_channels = out_channels // 4 42 | 43 | self.bottleneck = nn.Sequential( 44 | Conv1x1BNReLU(in_channels, mid_channels,groups), 45 | ChannelShuffle(groups), 46 | Conv3x3BNReLU(mid_channels, mid_channels, stride,groups), 47 | Conv1x1BN(mid_channels, out_channels,groups) 48 | ) 49 | if self.stride>1: 50 | self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 51 | 52 | self.relu = nn.ReLU6(inplace=True) 53 | 54 | def forward(self, x): 55 | out = self.bottleneck(x) 56 | out = torch.cat([self.shortcut(x), out], dim=1) if self.stride > 1 else (out + x) 57 | return self.relu(out) 58 | 59 | class FisheyeMODNet(nn.Module): 60 | def __init__(self, groups=1, num_classes=2): 61 | super(FisheyeMODNet, self).__init__() 62 | layers = [4, 8, 4] 63 | 64 | self.stage1a = nn.Sequential( 65 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3,stride=2, padding=1), 66 | nn.MaxPool2d(kernel_size=2,stride=2), 67 | ) 68 | self.stage2a = self._make_layer(24, 120, groups, layers[0]) 69 | 70 | self.stage1b = nn.Sequential( 71 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3, stride=2, padding=1), 72 | nn.MaxPool2d(kernel_size=2, stride=2), 73 | ) 74 | self.stage2b = self._make_layer(24, 120, groups, layers[0]) 75 | 76 | self.stage3 = self._make_layer(240, 480, groups, layers[1]) 77 | self.stage4 = self._make_layer(480, 960, groups, layers[2]) 78 | 79 | self.adapt_conv3 = nn.Conv2d(960, num_classes, kernel_size=1) 80 | self.adapt_conv2 = nn.Conv2d(480, num_classes, kernel_size=1) 81 | self.adapt_conv1 = nn.Conv2d(240, num_classes, kernel_size=1) 82 | 83 | self.up_sampling3 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1) 84 | self.up_sampling2 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1) 85 | self.up_sampling1 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=16, stride=8, padding=4) 86 | 87 | self.softmax = nn.Softmax(dim=1) 88 | 89 | self.init_params() 90 | 91 | def _make_layer(self, in_channels, out_channels, groups, block_num): 92 | layers = [] 93 | layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=groups)) 94 | for idx in range(1, block_num): 95 | layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups)) 96 | return nn.Sequential(*layers) 97 | 98 | def init_params(self): 99 | for m in self.modules(): 100 | if isinstance(m, nn.Conv2d): 101 | nn.init.kaiming_normal_(m.weight) 102 | nn.init.constant_(m.bias, 0) 103 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): 104 | nn.init.constant_(m.weight, 1) 105 | nn.init.constant_(m.bias, 0) 106 | 107 | def forward(self, x, y): 108 | x = self.stage2a(self.stage1a(x)) 109 | y = self.stage2b(self.stage1b(y)) 110 | feature1 = torch.cat([x, y], dim=1) 111 | feature2 = self.stage3(feature1) 112 | feature3 = self.stage4(feature2) 113 | 114 | out3 = self.up_sampling3(self.adapt_conv3(feature3)) 115 | out2 = self.up_sampling2(self.adapt_conv2(feature2) + out3) 116 | out1 = self.up_sampling1(self.adapt_conv1(feature1) + out2) 117 | 118 | out = self.softmax(out1) 119 | return out 120 | 121 | 122 | if __name__ == '__main__': 123 | model = FisheyeMODNet() 124 | 125 | input1 = torch.randn(1, 3, 640, 640) 126 | input2 = torch.randn(1, 3, 640, 640) 127 | 128 | out = model(input1, input2) 129 | print(out.shape) -------------------------------------------------------------------------------- /ObjectDetection/FoveaBox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3ReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def locLayer(in_channels,out_channels): 12 | return nn.Sequential( 13 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 14 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 15 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 16 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 17 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 18 | ) 19 | 20 | def confLayer(in_channels,out_channels): 21 | return nn.Sequential( 22 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 23 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 24 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 25 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 27 | ) 28 | 29 | class FoveaBox(nn.Module): 30 | def __init__(self, num_classes=80): 31 | super(FoveaBox, self).__init__() 32 | self.num_classes = num_classes 33 | resnet = torchvision.models.resnet50() 34 | layers = list(resnet.children()) 35 | 36 | self.layer1 = nn.Sequential(*layers[:5]) 37 | self.layer2 = nn.Sequential(*layers[5]) 38 | self.layer3 = nn.Sequential(*layers[6]) 39 | self.layer4 = nn.Sequential(*layers[7]) 40 | 41 | self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1) 42 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 43 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 44 | 45 | self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 46 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 47 | 48 | self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 49 | self.downsample6_relu = nn.ReLU6(inplace=True) 50 | self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 51 | 52 | self.loc_layer3 = locLayer(in_channels=256,out_channels=4) 53 | self.conf_layer3 = confLayer(in_channels=256,out_channels=self.num_classes) 54 | 55 | self.loc_layer4 = locLayer(in_channels=256, out_channels=4) 56 | self.conf_layer4 = confLayer(in_channels=256, out_channels=self.num_classes) 57 | 58 | self.loc_layer5 = locLayer(in_channels=256, out_channels=4) 59 | self.conf_layer5 = confLayer(in_channels=256, out_channels=self.num_classes) 60 | 61 | self.loc_layer6 = locLayer(in_channels=256, out_channels=4) 62 | self.conf_layer6 = confLayer(in_channels=256, out_channels=self.num_classes) 63 | 64 | self.loc_layer7 = locLayer(in_channels=256, out_channels=4) 65 | self.conf_layer7 = confLayer(in_channels=256, out_channels=self.num_classes) 66 | 67 | self.init_params() 68 | 69 | def init_params(self): 70 | for m in self.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 73 | elif isinstance(m, nn.BatchNorm2d): 74 | nn.init.constant_(m.weight, 1) 75 | nn.init.constant_(m.bias, 0) 76 | 77 | def forward(self, x): 78 | x = self.layer1(x) 79 | c3 =x = self.layer2(x) 80 | c4 =x = self.layer3(x) 81 | c5 = x = self.layer4(x) 82 | 83 | p5 = self.lateral5(c5) 84 | p4 = self.upsample4(p5) + self.lateral4(c4) 85 | p3 = self.upsample3(p4) + self.lateral3(c3) 86 | 87 | p6 = self.downsample5(p5) 88 | p7 = self.downsample6_relu(self.downsample6(p6)) 89 | 90 | loc3 = self.loc_layer3(p3) 91 | conf3 = self.conf_layer3(p3) 92 | 93 | loc4 = self.loc_layer4(p4) 94 | conf4 = self.conf_layer4(p4) 95 | 96 | loc5 = self.loc_layer5(p5) 97 | conf5 = self.conf_layer5(p5) 98 | 99 | loc6 = self.loc_layer6(p6) 100 | conf6 = self.conf_layer6(p6) 101 | 102 | loc7 = self.loc_layer7(p7) 103 | conf7 = self.conf_layer7(p7) 104 | 105 | locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1), 106 | loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1), 107 | loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1), 108 | loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1), 109 | loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1) 110 | 111 | confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1), 112 | conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1), 113 | conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1), 114 | conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1), 115 | conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1) 116 | 117 | out = (locs, confs) 118 | return out 119 | 120 | if __name__ == '__main__': 121 | model = FoveaBox() 122 | print(model) 123 | 124 | input = torch.randn(1, 3, 800, 800) 125 | out = model(input) 126 | print(out[0].shape) 127 | print(out[1].shape) 128 | -------------------------------------------------------------------------------- /ObjectDetection/README.md: -------------------------------------------------------------------------------- 1 | # ObjectDetection-network 2 | Pytorch implementation of ObjectDetection-network 3 | 4 | 5 | 6 | **SSD:** 7 | 8 | SSD: Single Shot MultiBox Detector,2016 9 | 10 | https://arxiv.org/pdf/1512.02325.pdf 11 | 12 | https://liumin.blog.csdn.net/article/details/100530275 13 | 14 | 15 | 16 | **YOLO:** 17 | 18 | You Only Look Once: Unified, Real-Time Object Detection, 2016 19 | 20 | https://arxiv.org/pdf/1506.02640.pdf 21 | 22 | https://liumin.blog.csdn.net/article/details/100904605 23 | 24 | 25 | 26 | **YOLOv2:** 27 | 28 | YOLO9000: Better, Faster, Stronger,2017 29 | 30 | https://arxiv.org/pdf/1804.02767.pdf 31 | 32 | https://liumin.blog.csdn.net/article/details/100904645 33 | 34 | 35 | 36 | **YOLOv3:** 37 | 38 | YOLOv3: An Incremental Improvement, 2018 39 | 40 | https://arxiv.org/pdf/1612.08242.pdf 41 | 42 | https://liumin.blog.csdn.net/article/details/100904663 43 | 44 | 45 | 46 | **FCOS:** 47 | 48 | FCOS: Fully Convolutional One-Stage Object Detection, 2019 49 | 50 | https://arxiv.org/pdf/1904.01355.pdf 51 | 52 | https://liumin.blog.csdn.net/article/details/89007219 53 | 54 | 55 | 56 | **FPN:** 57 | 58 | Feature Pyramid Networks for Object Detection, 2017 59 | 60 | https://arxiv.org/pdf/1612.03144v2.pdf 61 | 62 | https://liumin.blog.csdn.net/article/details/100864158 63 | 64 | 65 | 66 | **RetinaNet:** 67 | 68 | https://liumin.blog.csdn.net/article/details/102135318 69 | 70 | https://arxiv.org/pdf/1708.02002.pdf 71 | 72 | https://liumin.blog.csdn.net/article/details/102135318 73 | 74 | 75 | 76 | **Objects as Points:** 77 | 78 | Objects as Points,2019 79 | 80 | https://arxiv.org/pdf/1904.07850v1.pdf 81 | 82 | https://liumin.blog.csdn.net/article/details/100867545 83 | 84 | 85 | 86 | **FSAF:** 87 | 88 | Feature Selective Anchor-Free Module for Single-Shot Object Detection, 2019 89 | 90 | https://arxiv.org/pdf/1903.00621.pdf 91 | 92 | https://liumin.blog.csdn.net/article/details/100942317 93 | 94 | 95 | 96 | **CenterNet** 97 | 98 | CenterNet: Keypoint Triplets for Object Detection, 2019 99 | 100 | https://arxiv.org/pdf/1904.08189.pdf 101 | 102 | https://liumin.blog.csdn.net/article/details/100942259 103 | 104 | 105 | 106 | **FoveaBox** 107 | 108 | FoveaBox: Beyond Anchor-based Object Detector, 2019 109 | 110 | https://arxiv.org/pdf/1904.03797v1.pdf 111 | 112 | https://liumin.blog.csdn.net/article/details/100941880 -------------------------------------------------------------------------------- /ObjectDetection/RetinaNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | def Conv3x3ReLU(in_channels,out_channels): 6 | return nn.Sequential( 7 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def locLayer(in_channels,out_channels): 12 | return nn.Sequential( 13 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 14 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 15 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 16 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 17 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 18 | ) 19 | 20 | def confLayer(in_channels,out_channels): 21 | return nn.Sequential( 22 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 23 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 24 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 25 | Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1), 27 | ) 28 | 29 | class RetinaNet(nn.Module): 30 | def __init__(self, num_classes=80, num_anchores = 9): 31 | super(RetinaNet, self).__init__() 32 | self.num_classes = num_classes 33 | resnet = torchvision.models.resnet50() 34 | layers = list(resnet.children()) 35 | 36 | self.layer1 = nn.Sequential(*layers[:5]) 37 | self.layer2 = nn.Sequential(*layers[5]) 38 | self.layer3 = nn.Sequential(*layers[6]) 39 | self.layer4 = nn.Sequential(*layers[7]) 40 | 41 | self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1) 42 | self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1) 43 | self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1) 44 | 45 | self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 46 | self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1) 47 | 48 | self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 49 | self.downsample6_relu = nn.ReLU6(inplace=True) 50 | self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1) 51 | 52 | self.loc_layer3 = locLayer(in_channels=256,out_channels=4*num_anchores) 53 | self.conf_layer3 = confLayer(in_channels=256,out_channels=self.num_classes*num_anchores) 54 | 55 | self.loc_layer4 = locLayer(in_channels=256, out_channels=4*num_anchores) 56 | self.conf_layer4 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores) 57 | 58 | self.loc_layer5 = locLayer(in_channels=256, out_channels=4*num_anchores) 59 | self.conf_layer5 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores) 60 | 61 | self.loc_layer6 = locLayer(in_channels=256, out_channels=4*num_anchores) 62 | self.conf_layer6 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores) 63 | 64 | self.loc_layer7 = locLayer(in_channels=256, out_channels=4*num_anchores) 65 | self.conf_layer7 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores) 66 | 67 | self.init_params() 68 | 69 | def init_params(self): 70 | for m in self.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 73 | elif isinstance(m, nn.BatchNorm2d): 74 | nn.init.constant_(m.weight, 1) 75 | nn.init.constant_(m.bias, 0) 76 | 77 | def forward(self, x): 78 | x = self.layer1(x) 79 | c3 =x = self.layer2(x) 80 | c4 =x = self.layer3(x) 81 | c5 = x = self.layer4(x) 82 | 83 | p5 = self.lateral5(c5) 84 | p4 = self.upsample4(p5) + self.lateral4(c4) 85 | p3 = self.upsample3(p4) + self.lateral3(c3) 86 | 87 | p6 = self.downsample5(p5) 88 | p7 = self.downsample6_relu(self.downsample6(p6)) 89 | 90 | loc3 = self.loc_layer3(p3) 91 | conf3 = self.conf_layer3(p3) 92 | 93 | loc4 = self.loc_layer4(p4) 94 | conf4 = self.conf_layer4(p4) 95 | 96 | loc5 = self.loc_layer5(p5) 97 | conf5 = self.conf_layer5(p5) 98 | 99 | loc6 = self.loc_layer6(p6) 100 | conf6 = self.conf_layer6(p6) 101 | 102 | loc7 = self.loc_layer7(p7) 103 | conf7 = self.conf_layer7(p7) 104 | 105 | locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1), 106 | loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1), 107 | loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1), 108 | loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1), 109 | loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1) 110 | 111 | confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1), 112 | conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1), 113 | conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1), 114 | conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1), 115 | conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1) 116 | 117 | out = (locs, confs) 118 | return out 119 | 120 | if __name__ == '__main__': 121 | model = RetinaNet() 122 | print(model) 123 | 124 | input = torch.randn(1, 3, 800, 800) 125 | out = model(input) 126 | print(out[0].shape) 127 | print(out[1].shape) 128 | -------------------------------------------------------------------------------- /ObjectDetection/VoVNet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/1 14:40 4 | # @Author : liumin 5 | # @File : VoVNet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | 11 | __all__ = ['VoVNet', 'vovnet27_slim', 'vovnet39', 'vovnet57'] 12 | 13 | from PIL.Image import Image 14 | 15 | 16 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1): 17 | return nn.Sequential( 18 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups, bias=False), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU6(inplace=True) 21 | ) 22 | 23 | 24 | def Conv3x3BN(in_channels,out_channels,stride,groups): 25 | return nn.Sequential( 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups, bias=False), 27 | nn.BatchNorm2d(out_channels) 28 | ) 29 | 30 | 31 | def Conv1x1BNReLU(in_channels,out_channels): 32 | return nn.Sequential( 33 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 34 | nn.BatchNorm2d(out_channels), 35 | nn.ReLU6(inplace=True) 36 | ) 37 | 38 | 39 | def Conv1x1BN(in_channels,out_channels): 40 | return nn.Sequential( 41 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 42 | nn.BatchNorm2d(out_channels) 43 | ) 44 | 45 | class OSA_module(nn.Module): 46 | def __init__(self, in_channels,mid_channels, out_channels, block_nums=5): 47 | super(OSA_module, self).__init__() 48 | 49 | self._layers = nn.ModuleList() 50 | self._layers.append(Conv3x3BNReLU(in_channels=in_channels, out_channels=mid_channels, stride=1)) 51 | for idx in range(block_nums-1): 52 | self._layers.append(Conv3x3BNReLU(in_channels=mid_channels, out_channels=mid_channels, stride=1)) 53 | 54 | self.conv1x1 = Conv1x1BNReLU(in_channels+mid_channels*block_nums,out_channels) 55 | 56 | def forward(self, x): 57 | outputs = [] 58 | outputs.append(x) 59 | for _layer in self._layers: 60 | x = _layer(x) 61 | outputs.append(x) 62 | out = torch.cat(outputs, dim=1) 63 | out = self.conv1x1(out) 64 | return out 65 | 66 | 67 | class VoVNet(nn.Module): 68 | def __init__(self, planes, layers, num_classes=2): 69 | super(VoVNet, self).__init__() 70 | 71 | self.groups = 1 72 | self.stage1 = nn.Sequential( 73 | Conv3x3BNReLU(in_channels=3, out_channels=64, stride=2, groups=self.groups), 74 | Conv3x3BNReLU(in_channels=64, out_channels=64, stride=1, groups=self.groups), 75 | Conv3x3BNReLU(in_channels=64, out_channels=128, stride=1, groups=self.groups), 76 | ) 77 | 78 | self.stage2 = self._make_layer(planes[0][0],planes[0][1],planes[0][2],layers[0]) 79 | 80 | self.stage3 = self._make_layer(planes[1][0],planes[1][1],planes[1][2],layers[1]) 81 | 82 | self.stage4 = self._make_layer(planes[2][0],planes[2][1],planes[2][2],layers[2]) 83 | 84 | self.stage5 = self._make_layer(planes[3][0],planes[3][1],planes[3][2],layers[3]) 85 | 86 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=1) 87 | self.flatten = nn.Flatten() 88 | self.dropout = nn.Dropout(p=0.2) 89 | self.linear = nn.Linear(in_features=planes[3][2], out_features=num_classes) 90 | 91 | def _make_layer(self, in_channels, mid_channels,out_channels, block_num): 92 | layers = [] 93 | layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 94 | for idx in range(block_num): 95 | layers.append(OSA_module(in_channels=in_channels, mid_channels=mid_channels, out_channels=out_channels)) 96 | in_channels = out_channels 97 | return nn.Sequential(*layers) 98 | 99 | def init_params(self): 100 | for m in self.modules(): 101 | if isinstance(m, nn.Conv2d): 102 | nn.init.kaiming_normal_(m.weight) 103 | if m.bias is not None: 104 | nn.init.constant_(m.bias, 0) 105 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): 106 | nn.init.constant_(m.weight, 1) 107 | nn.init.constant_(m.bias, 0) 108 | 109 | def forward(self, x): 110 | x = self.stage1(x) 111 | x = self.stage2(x) 112 | x = self.stage3(x) 113 | x = self.stage4(x) 114 | x = self.stage5(x) 115 | x = self.avgpool(x) 116 | x = self.flatten(x) 117 | x = self.dropout(x) 118 | out = self.linear(x) 119 | return out 120 | 121 | def vovnet27_slim(**kwargs): 122 | planes = [[128, 64, 128], 123 | [128, 80, 256], 124 | [256, 96, 384], 125 | [384, 112, 512]] 126 | layers = [1, 1, 1, 1] 127 | model = VoVNet(planes, layers) 128 | return model 129 | 130 | def vovnet39(**kwargs): 131 | planes = [[128, 128, 256], 132 | [256, 160, 512], 133 | [512, 192, 768], 134 | [768, 224, 1024]] 135 | layers = [1, 1, 2, 2] 136 | model = VoVNet(planes, layers) 137 | return model 138 | 139 | def vovnet57(**kwargs): 140 | planes = [[128, 128, 256], 141 | [256, 160, 512], 142 | [512, 192, 768], 143 | [768, 224, 1024]] 144 | layers = [1, 1, 4, 3] 145 | model = VoVNet(planes, layers) 146 | return model 147 | 148 | 149 | if __name__=='__main__': 150 | model = vovnet27_slim() 151 | print(model) 152 | 153 | input = torch.randn(1, 3, 64, 64) 154 | out = model(input) 155 | print(out.shape) 156 | -------------------------------------------------------------------------------- /ObjectDetection/YOLO.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv1x1BNReLU(in_channels,out_channels): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv3x3BNReLU(in_channels,out_channels, stride=1): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class YOLO(nn.Module): 19 | def __init__(self): 20 | super(YOLO, self).__init__() 21 | 22 | self.features = nn.Sequential( 23 | nn.Conv2d(in_channels=3,out_channels=64, kernel_size=7,stride=2,padding=3), 24 | nn.MaxPool2d(kernel_size=2,stride=2), 25 | Conv3x3BNReLU(in_channels=64, out_channels=192), 26 | nn.MaxPool2d(kernel_size=2, stride=2), 27 | Conv1x1BNReLU(in_channels=192, out_channels=128), 28 | Conv3x3BNReLU(in_channels=128, out_channels=256), 29 | Conv1x1BNReLU(in_channels=256, out_channels=256), 30 | Conv3x3BNReLU(in_channels=256, out_channels=512), 31 | nn.MaxPool2d(kernel_size=2, stride=2), 32 | Conv1x1BNReLU(in_channels=512, out_channels=256), 33 | Conv3x3BNReLU(in_channels=256, out_channels=512), 34 | Conv1x1BNReLU(in_channels=512, out_channels=256), 35 | Conv3x3BNReLU(in_channels=256, out_channels=512), 36 | Conv1x1BNReLU(in_channels=512, out_channels=256), 37 | Conv3x3BNReLU(in_channels=256, out_channels=512), 38 | Conv1x1BNReLU(in_channels=512, out_channels=256), 39 | Conv3x3BNReLU(in_channels=256, out_channels=512), 40 | Conv1x1BNReLU(in_channels=512, out_channels=512), 41 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 42 | nn.MaxPool2d(kernel_size=2, stride=2), 43 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 44 | Conv3x3BNReLU(in_channels=512, out_channels= 1024), 45 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 46 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 47 | Conv3x3BNReLU(in_channels=1024, out_channels=1024), 48 | Conv3x3BNReLU(in_channels=1024, out_channels=1024, stride=2), 49 | Conv3x3BNReLU(in_channels=1024, out_channels=1024), 50 | Conv3x3BNReLU(in_channels=1024, out_channels=1024), 51 | ) 52 | 53 | self.classifier = nn.Sequential( 54 | nn.Linear(1024 * 7 * 7, 4096), 55 | nn.ReLU(True), 56 | nn.Dropout(), 57 | nn.Linear(4096, 1470), 58 | ) 59 | 60 | def forward(self, x): 61 | x = self.features(x) 62 | x = x.view(x.size(0), -1) 63 | out = self.classifier(x) 64 | return out 65 | 66 | 67 | if __name__ == '__main__': 68 | model = YOLO() 69 | print(model) 70 | 71 | data = torch.randn(1,3,448,448) 72 | output = model(data) 73 | print(output.shape) -------------------------------------------------------------------------------- /ObjectDetection/YOLOv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,padding=0): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class Darknet19(nn.Module): 19 | def __init__(self, num_classes=1000): 20 | super(Darknet19, self).__init__() 21 | 22 | self.feature = nn.Sequential( 23 | Conv3x3BNReLU(in_channels=3, out_channels=32), 24 | nn.MaxPool2d(kernel_size=2,stride=2), 25 | Conv3x3BNReLU(in_channels=32, out_channels=64), 26 | nn.MaxPool2d(kernel_size=2, stride=2), 27 | Conv3x3BNReLU(in_channels=64, out_channels=128), 28 | Conv1x1BNReLU(in_channels=128, out_channels=64), 29 | Conv3x3BNReLU(in_channels=64, out_channels=128), 30 | nn.MaxPool2d(kernel_size=2, stride=2), 31 | Conv3x3BNReLU(in_channels=128, out_channels=256), 32 | Conv1x1BNReLU(in_channels=256, out_channels=128), 33 | Conv3x3BNReLU(in_channels=128, out_channels=256), 34 | nn.MaxPool2d(kernel_size=2, stride=2), 35 | Conv3x3BNReLU(in_channels=256, out_channels=512), 36 | Conv1x1BNReLU(in_channels=512, out_channels=256), 37 | Conv3x3BNReLU(in_channels=256, out_channels=512), 38 | Conv1x1BNReLU(in_channels=512, out_channels=256), 39 | Conv3x3BNReLU(in_channels=256, out_channels=512), 40 | nn.MaxPool2d(kernel_size=2, stride=2), 41 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 42 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 43 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 44 | Conv1x1BNReLU(in_channels=1024, out_channels=512), 45 | Conv3x3BNReLU(in_channels=512, out_channels=1024), 46 | ) 47 | 48 | self.classifier = nn.Sequential( 49 | Conv1x1BNReLU(in_channels=1024, out_channels=num_classes), 50 | nn.AvgPool2d(kernel_size=7,stride=1), 51 | ) 52 | self.softmax = nn.Softmax(dim=1) 53 | 54 | def forward(self, x): 55 | x = self.feature(x) 56 | x = self.classifier(x) 57 | x = torch.squeeze(x, dim=3).contiguous() 58 | x = torch.squeeze(x, dim=2).contiguous() 59 | out = self.softmax(x) 60 | return out 61 | 62 | if __name__ == '__main__': 63 | model = Darknet19() 64 | print(model) 65 | 66 | input = torch.randn(1,3,224,224) 67 | out = model(input) 68 | print(out.shape) -------------------------------------------------------------------------------- /ObjectDetection/YOLOv3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride=1): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class Residual(nn.Module): 19 | def __init__(self, nchannels): 20 | super(Residual, self).__init__() 21 | mid_channels = nchannels // 2 22 | self.conv1x1 = Conv1x1BNReLU(in_channels=nchannels, out_channels=mid_channels) 23 | self.conv3x3 = Conv3x3BNReLU(in_channels=mid_channels, out_channels=nchannels) 24 | 25 | def forward(self, x): 26 | out = self.conv3x3(self.conv1x1(x)) 27 | return out + x 28 | 29 | class Darknet19(nn.Module): 30 | def __init__(self, num_classes=1000): 31 | super(Darknet19, self).__init__() 32 | self.first_conv = Conv3x3BNReLU(in_channels=3, out_channels=32) 33 | 34 | self.block1 = self._make_layers(in_channels=32,out_channels=64, block_num=1) 35 | self.block2 = self._make_layers(in_channels=64,out_channels=128, block_num=2) 36 | self.block3 = self._make_layers(in_channels=128,out_channels=256, block_num=8) 37 | self.block4 = self._make_layers(in_channels=256,out_channels=512, block_num=8) 38 | self.block5 = self._make_layers(in_channels=512,out_channels=1024, block_num=4) 39 | 40 | self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=1) 41 | self.linear = nn.Linear(in_features=1024,out_features=num_classes) 42 | self.softmax = nn.Softmax(dim=1) 43 | 44 | def _make_layers(self, in_channels,out_channels, block_num): 45 | _layers = [] 46 | _layers.append(Conv3x3BNReLU(in_channels=in_channels, out_channels=out_channels, stride=2)) 47 | for _ in range(block_num): 48 | _layers.append(Residual(nchannels=out_channels)) 49 | return nn.Sequential(*_layers) 50 | 51 | def forward(self, x): 52 | x = self.first_conv(x) 53 | x = self.block1(x) 54 | x = self.block2(x) 55 | x = self.block3(x) 56 | x = self.block4(x) 57 | x = self.block5(x) 58 | 59 | x = self.avg_pool(x) 60 | x = x.view(x.size(0),-1) 61 | x = self.linear(x) 62 | out = self.softmax(x) 63 | return out 64 | 65 | if __name__ == '__main__': 66 | model = Darknet19() 67 | print(model) 68 | 69 | input = torch.randn(1,3,256,256) 70 | out = model(input) 71 | print(out.shape) -------------------------------------------------------------------------------- /Others/DynamicReLU.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/9/11 13:57 4 | # @Author : liumin 5 | # @File : DynamicReLU.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | import torch.nn.functional as F 11 | 12 | class BatchNorm(nn.Module): 13 | def forward(self, x): 14 | return 2 * x - 1 15 | 16 | 17 | class DynamicReLU_A(nn.Module): 18 | def __init__(self, channels, K=2,ratio=6): 19 | super(DynamicReLU_A, self).__init__() 20 | mid_channels = 2*K 21 | 22 | self.K = K 23 | self.lambdas = torch.Tensor([1.]*K + [0.5]*K).float() 24 | self.init_v = torch.Tensor([1.] + [0.]*(2*K - 1)).float() 25 | 26 | self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1) 27 | self.dynamic = nn.Sequential( 28 | nn.Linear(in_features=channels,out_features=channels // ratio), 29 | nn.ReLU(inplace=True), 30 | nn.Linear(in_features=channels // ratio, out_features=mid_channels), 31 | nn.Sigmoid(), 32 | BatchNorm() 33 | ) 34 | 35 | def forward(self, x): 36 | b, c, _, _ = x.size() 37 | y = self.avg_pool(x).view(b, c) 38 | z = self.dynamic(y) 39 | 40 | relu_coefs = z.view(-1, 2 * self.K) * self.lambdas + self.init_v 41 | x_perm = x.transpose(0, -1).unsqueeze(-1) 42 | output = x_perm * relu_coefs[:, :self.K] + relu_coefs[:, self.K:] 43 | 44 | output = torch.max(output, dim=-1)[0].transpose(0, -1) 45 | return output 46 | 47 | 48 | class DynamicReLU_B(nn.Module): 49 | def __init__(self, channels, K=2,ratio=6): 50 | super(DynamicReLU_B, self).__init__() 51 | mid_channels = 2*K*channels 52 | 53 | self.K = K 54 | self.channels = channels 55 | self.lambdas = torch.Tensor([1.]*K + [0.5]*K).float() 56 | self.init_v = torch.Tensor([1.] + [0.]*(2*K - 1)).float() 57 | 58 | self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1) 59 | self.dynamic = nn.Sequential( 60 | nn.Linear(in_features=channels,out_features=channels // ratio), 61 | nn.ReLU(inplace=True), 62 | nn.Linear(in_features=channels // ratio, out_features=mid_channels), 63 | nn.Sigmoid(), 64 | BatchNorm() 65 | ) 66 | 67 | def forward(self, x): 68 | b, c, _, _ = x.size() 69 | y = self.avg_pool(x).view(b, c) 70 | z = self.dynamic(y) 71 | 72 | relu_coefs = z.view(-1, self.channels, 2 * self.K) * self.lambdas + self.init_v 73 | x_perm = x.permute(2, 3, 0, 1).unsqueeze(-1) 74 | output = x_perm * relu_coefs[:, :, :self.K] + relu_coefs[:, :, self.K:] 75 | output = torch.max(output, dim=-1)[0].permute(2, 3, 0, 1) 76 | return output 77 | 78 | if __name__=='__main__': 79 | model = DynamicReLU_B(64) 80 | print(model) 81 | 82 | input = torch.randn(1, 64, 56, 56) 83 | out = model(input) 84 | print(out.shape) -------------------------------------------------------------------------------- /Others/PyramidalConvolution.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/28 13:40 4 | # @Author : liumin 5 | # @File : PyramidalConvolution.py -------------------------------------------------------------------------------- /PortraitSegmentation/SINet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shanglianlm0525/PyTorch-Networks/a6b6dd4b7876ba8473a08e116485a0492a88cd48/PortraitSegmentation/SINet.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-Models 2 | Pytorch implementation of cnn network 3 | 4 | 5 | 6 | 7 | **Classical network** 8 | 9 | - **AlexNet:** 10 | 11 | - **VGG:** 12 | 13 | - **ResNet:** 14 | 15 | - **InceptionV1:** 16 | 17 | - **InceptionV2 and InceptionV3:** 18 | 19 | - **InceptionV4 and Inception-ResNet:** 20 | 21 | 22 | 23 | **Lightweight network** 24 | 25 | - **MobileNets:** 26 | - **MobileNetV2:** 27 | - **MobileNetV3:** 28 | - **ShuffleNet:** 29 | - **ShuffleNet V2:** 30 | - **SqueezeNet** 31 | - **Xception** 32 | - **MixNet** 33 | - **GhostNet** 34 | 35 | 36 | 37 | **ObjectDetection-network** 38 | 39 | - **SSD:** 40 | - **YOLO:** 41 | - **YOLOv2:** 42 | - **YOLOv3:** 43 | - **FCOS:** 44 | - **FPN:** 45 | - **RetinaNet** 46 | - **Objects as Points:** 47 | - **FSAF:** 48 | - **CenterNet** 49 | - **FoveaBox** 50 | 51 | 52 | 53 | **Semantic Segmentation** 54 | 55 | - **FCN** 56 | 57 | - **Fast-SCNN** 58 | 59 | - **LEDNet:** 60 | 61 | - **LRNNet** 62 | 63 | - **FisheyeMODNet:** 64 | 65 | 66 | 67 | **Instance Segmentation** 68 | 69 | - **PolarMask** 70 | 71 | 72 | 73 | **FaceDetectorAndRecognition** 74 | 75 | - **FaceBoxes** 76 | - **LFFD** 77 | - **VarGFaceNet** 78 | 79 | 80 | 81 | **HumanPoseEstimation** 82 | 83 | - **Stacked Hourglass Networks** 84 | - **Simple Baselines** 85 | - **LPN** 86 | 87 | 88 | 89 | ## Star History 90 | 91 | [![Star History Chart](https://api.star-history.com/svg?repos=shanglianlm0525/PyTorch-Networks&type=Date)](https://star-history.com/#shanglianlm0525/PyTorch-Networks&Date) 92 | 93 | 94 | -------------------------------------------------------------------------------- /SemanticSegmentation/FCN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | 5 | class FCN8s(nn.Module): 6 | def __init__(self, num_classes): 7 | super(FCN8s, self).__init__() 8 | vgg = torchvision.models.vgg16() 9 | 10 | features = list(vgg.features.children()) 11 | 12 | self.padd = nn.ZeroPad2d([100,100,100,100]) 13 | 14 | self.pool3 = nn.Sequential(*features[:17]) 15 | self.pool4 = nn.Sequential(*features[17:24]) 16 | self.pool5 = nn.Sequential(*features[24:]) 17 | 18 | self.pool3_conv1x1 = nn.Conv2d(256, num_classes, kernel_size=1) 19 | self.pool4_conv1x1 = nn.Conv2d(512, num_classes, kernel_size=1) 20 | 21 | self.output5 = nn.Sequential( 22 | nn.Conv2d(512, 4096, kernel_size=7), 23 | nn.ReLU(inplace=True), 24 | nn.Dropout(), 25 | nn.Conv2d(4096, 4096, kernel_size=1), 26 | nn.ReLU(inplace=True), 27 | nn.Dropout(), 28 | nn.Conv2d(4096, num_classes, kernel_size=1), 29 | ) 30 | 31 | self.up_pool3_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8) 32 | self.up_pool4_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2) 33 | self.up_pool5_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2) 34 | 35 | def forward(self, x): 36 | _,_, w, h = x.size() 37 | 38 | x = self.padd(x) 39 | pool3 = self.pool3(x) 40 | pool4 = self.pool4(pool3) 41 | pool5 = self.pool5(pool4) 42 | 43 | output5 = self.up_pool5_out(self.output5(pool5)) 44 | 45 | pool4_out = self.pool4_conv1x1(0.01 * pool4) 46 | output4 = self.up_pool4_out(pool4_out[:,:,5:(5 + output5.size()[2]) ,5:(5 + output5.size()[3])]+output5) 47 | 48 | pool3_out = self.pool3_conv1x1(0.0001 * pool3) 49 | output3 = self.up_pool3_out(pool3_out[:, :, 9:(9 + output4.size()[2]), 9:(9 + output4.size()[3])] + output4) 50 | 51 | out = self.up_pool3_out(output3) 52 | 53 | out = out[:, :, 31: (31 + h), 31: (31 + w)].contiguous() 54 | return out 55 | 56 | 57 | if __name__ == '__main__': 58 | model = FCN8s(num_classes=20) 59 | print(model) 60 | 61 | input = torch.randn(1,3,224,224) 62 | output = model(input) 63 | print(output.shape) 64 | 65 | -------------------------------------------------------------------------------- /SemanticSegmentation/FisheyeMODNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels,groups): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | def Conv1x1BN(in_channels,out_channels,groups): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups), 21 | nn.BatchNorm2d(out_channels) 22 | ) 23 | 24 | class ChannelShuffle(nn.Module): 25 | def __init__(self, groups): 26 | super(ChannelShuffle, self).__init__() 27 | self.groups = groups 28 | 29 | def forward(self, x): 30 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 31 | N, C, H, W = x.size() 32 | g = self.groups 33 | return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W) 34 | 35 | 36 | class ShuffleNetUnits(nn.Module): 37 | def __init__(self, in_channels, out_channels, stride, groups): 38 | super(ShuffleNetUnits, self).__init__() 39 | self.stride = stride 40 | out_channels = out_channels - in_channels if self.stride>1 else out_channels 41 | mid_channels = out_channels // 4 42 | 43 | self.bottleneck = nn.Sequential( 44 | Conv1x1BNReLU(in_channels, mid_channels,groups), 45 | ChannelShuffle(groups), 46 | Conv3x3BNReLU(mid_channels, mid_channels, stride,groups), 47 | Conv1x1BN(mid_channels, out_channels,groups) 48 | ) 49 | if self.stride>1: 50 | self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1) 51 | 52 | self.relu = nn.ReLU6(inplace=True) 53 | 54 | def forward(self, x): 55 | out = self.bottleneck(x) 56 | out = torch.cat([self.shortcut(x), out], dim=1) if self.stride > 1 else (out + x) 57 | return self.relu(out) 58 | 59 | class FisheyeMODNet(nn.Module): 60 | def __init__(self, groups=1, num_classes=2): 61 | super(FisheyeMODNet, self).__init__() 62 | layers = [4, 8, 4] 63 | 64 | self.stage1a = nn.Sequential( 65 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3,stride=2, padding=1), 66 | nn.MaxPool2d(kernel_size=2,stride=2), 67 | ) 68 | self.stage2a = self._make_layer(24, 120, groups, layers[0]) 69 | 70 | self.stage1b = nn.Sequential( 71 | nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3, stride=2, padding=1), 72 | nn.MaxPool2d(kernel_size=2, stride=2), 73 | ) 74 | self.stage2b = self._make_layer(24, 120, groups, layers[0]) 75 | 76 | self.stage3 = self._make_layer(240, 480, groups, layers[1]) 77 | self.stage4 = self._make_layer(480, 960, groups, layers[2]) 78 | 79 | self.adapt_conv3 = nn.Conv2d(960, num_classes, kernel_size=1) 80 | self.adapt_conv2 = nn.Conv2d(480, num_classes, kernel_size=1) 81 | self.adapt_conv1 = nn.Conv2d(240, num_classes, kernel_size=1) 82 | 83 | self.up_sampling3 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1) 84 | self.up_sampling2 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1) 85 | self.up_sampling1 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=16, stride=8, padding=4) 86 | 87 | self.softmax = nn.Softmax(dim=1) 88 | 89 | self.init_params() 90 | 91 | def _make_layer(self, in_channels, out_channels, groups, block_num): 92 | layers = [] 93 | layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=groups)) 94 | for idx in range(1, block_num): 95 | layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups)) 96 | return nn.Sequential(*layers) 97 | 98 | def init_params(self): 99 | for m in self.modules(): 100 | if isinstance(m, nn.Conv2d): 101 | nn.init.kaiming_normal_(m.weight) 102 | nn.init.constant_(m.bias, 0) 103 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear): 104 | nn.init.constant_(m.weight, 1) 105 | nn.init.constant_(m.bias, 0) 106 | 107 | def forward(self, x, y): 108 | x = self.stage2a(self.stage1a(x)) 109 | y = self.stage2b(self.stage1b(y)) 110 | feature1 = torch.cat([x, y], dim=1) 111 | feature2 = self.stage3(feature1) 112 | feature3 = self.stage4(feature2) 113 | 114 | out3 = self.up_sampling3(self.adapt_conv3(feature3)) 115 | out2 = self.up_sampling2(self.adapt_conv2(feature2) + out3) 116 | out1 = self.up_sampling1(self.adapt_conv1(feature1) + out2) 117 | 118 | out = self.softmax(out1) 119 | return out 120 | 121 | 122 | if __name__ == '__main__': 123 | model = FisheyeMODNet() 124 | 125 | input1 = torch.randn(1, 3, 640, 640) 126 | input2 = torch.randn(1, 3, 640, 640) 127 | 128 | out = model(input1, input2) 129 | print(out.shape) -------------------------------------------------------------------------------- /SemanticSegmentation/ICNet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/10/28 16:41 4 | # @Author : liumin 5 | # @File : ICNet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torchvision 11 | 12 | __all__ = ["ICNet"] 13 | 14 | 15 | def Conv1x1BN(in_channels,out_channels): 16 | return nn.Sequential( 17 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 18 | nn.BatchNorm2d(out_channels) 19 | ) 20 | 21 | def Conv1x1BNReLU(in_channels,out_channels): 22 | return nn.Sequential( 23 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 24 | nn.BatchNorm2d(out_channels), 25 | nn.ReLU(inplace=True) 26 | ) 27 | 28 | 29 | def Conv3x3BN(in_channels,out_channels,stride,dilation=1): 30 | return nn.Sequential( 31 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=dilation,dilation=dilation, bias=False), 32 | nn.BatchNorm2d(out_channels) 33 | ) 34 | 35 | def Conv3x3BNReLU(in_channels,out_channels,stride,dilation=1): 36 | return nn.Sequential( 37 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=dilation,dilation=dilation, bias=False), 38 | nn.BatchNorm2d(out_channels), 39 | nn.ReLU(inplace=True) 40 | ) 41 | 42 | 43 | class CascadeFeatureFusion(nn.Module): 44 | def __init__(self,low_channels, high_channels, out_channels, num_classes): 45 | super(CascadeFeatureFusion, self).__init__() 46 | 47 | self.conv_low = Conv3x3BNReLU(low_channels,out_channels,1,dilation=2) 48 | self.conv_high = Conv3x3BNReLU(high_channels,out_channels,1,dilation=1) 49 | self.relu = nn.ReLU(inplace=True) 50 | self.conv_low_cls = nn.Conv2d(out_channels, num_classes, 1, bias=False) 51 | 52 | def forward(self, x_low, x_high): 53 | x_low = F.interpolate(x_low, size=x_high.size()[2:], mode='bilinear', align_corners=True) 54 | x_low = self.conv_low(x_low) 55 | x_high = self.conv_high(x_high) 56 | out = self.relu(x_low + x_high) 57 | x_low_cls = self.conv_low_cls(x_low) 58 | return out, x_low_cls 59 | 60 | 61 | class Backbone(nn.Module): 62 | def __init__(self, pyramids=[1,2,3,6]): 63 | super(Backbone, self).__init__() 64 | self.pretrained = torchvision.models.resnet50(pretrained=True) 65 | 66 | def forward(self, x): 67 | x = self.pretrained.conv1(x) 68 | x = self.pretrained.bn1(x) 69 | x = self.pretrained.relu(x) 70 | x = self.pretrained.maxpool(x) 71 | c1 = self.pretrained.layer1(x) 72 | c2 = self.pretrained.layer2(c1) 73 | c3 = self.pretrained.layer3(c2) 74 | c4 = self.pretrained.layer4(c3) 75 | return c1, c2, c3, c4 76 | 77 | class PyramidPoolingModule(nn.Module): 78 | def __init__(self, pyramids=[1,2,3,6]): 79 | super(PyramidPoolingModule, self).__init__() 80 | self.pyramids = pyramids 81 | 82 | def forward(self, x): 83 | feat = x 84 | height, width = x.shape[2:] 85 | for bin_size in self.pyramids: 86 | feat_x = F.adaptive_avg_pool2d(x, output_size=bin_size) 87 | feat_x = F.interpolate(feat_x, size=(height, width), mode='bilinear', align_corners=True) 88 | feat = feat + feat_x 89 | return feat 90 | 91 | 92 | class ICNet(nn.Module): 93 | def __init__(self, num_classes): 94 | super(ICNet, self).__init__() 95 | 96 | self.conv_sub1 = nn.Sequential( 97 | Conv3x3BNReLU(3, 32, 2), 98 | Conv3x3BNReLU(32, 32, 2), 99 | Conv3x3BNReLU(32, 64, 2) 100 | ) 101 | self.backbone = Backbone() 102 | self.ppm = PyramidPoolingModule() 103 | 104 | self.cff_12 = CascadeFeatureFusion(128, 64, 128, num_classes) 105 | self.cff_24 = CascadeFeatureFusion(2048, 512, 128, num_classes) 106 | 107 | self.conv_cls = nn.Conv2d(128, num_classes, 1, bias=False) 108 | 109 | def forward(self, x): 110 | # sub 1 111 | x_sub1 = self.conv_sub1(x) 112 | # sub 2 113 | x_sub2 = F.interpolate(x, scale_factor=0.5, mode='bilinear') 114 | _, x_sub2, _, _ = self.backbone(x_sub2) 115 | # sub 4 116 | x_sub4 = F.interpolate(x, scale_factor=0.25, mode='bilinear') 117 | _, _, _, x_sub4 = self.backbone(x_sub4) 118 | 119 | # add PyramidPoolingModule 120 | x_sub4 = self.ppm(x_sub4) 121 | 122 | outs = list() 123 | x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2) 124 | outs.append(x_24_cls) 125 | # x_cff_12, x_12_cls = self.cff_12(x_sub2, x_sub1) 126 | x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1) 127 | outs.append(x_12_cls) 128 | 129 | up_x2 = F.interpolate(x_cff_12, scale_factor=2, mode='bilinear') 130 | up_x2 = self.conv_cls(up_x2) 131 | outs.append(up_x2) 132 | up_x8 = F.interpolate(up_x2, scale_factor=4, mode='bilinear') 133 | outs.append(up_x8) 134 | # 1 -> 1/4 -> 1/8 -> 1/16 135 | outs.reverse() 136 | 137 | return outs 138 | 139 | 140 | if __name__ == '__main__': 141 | model = ICNet(num_classes=19) 142 | print(model) 143 | 144 | input = torch.randn(1,3,512,512) 145 | output = model(input) 146 | print(output[0].shape) 147 | print(output[1].shape) 148 | print(output[2].shape) 149 | print(output[3].shape) -------------------------------------------------------------------------------- /SemanticSegmentation/LRNnet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/8 15:31 4 | # @Author : liumin 5 | # @File : LRNnet.py -------------------------------------------------------------------------------- /SemanticSegmentation/LWnet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/6/28 18:04 4 | # @Author : liumin 5 | # @File : LWnet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torchvision 10 | import torch.nn.functional as F 11 | 12 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding,dilation=1,groups=1): 13 | return nn.Sequential( 14 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding,dilation=dilation,groups=groups, bias=False), 15 | nn.BatchNorm2d(out_channels), 16 | nn.ReLU6(inplace=True) 17 | ) 18 | 19 | 20 | def ConvBN(in_channels,out_channels,kernel_size,stride,padding,dilation=1,groups=1): 21 | return nn.Sequential( 22 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding,dilation=dilation,groups=groups, bias=False), 23 | nn.BatchNorm2d(out_channels) 24 | ) 25 | 26 | 27 | def Conv1x1BNReLU(in_channels,out_channels): 28 | return nn.Sequential( 29 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 30 | nn.BatchNorm2d(out_channels), 31 | nn.ReLU6(inplace=True) 32 | ) 33 | 34 | 35 | def Conv1x1BN(in_channels,out_channels): 36 | return nn.Sequential( 37 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False), 38 | nn.BatchNorm2d(out_channels) 39 | ) 40 | 41 | class LWbottleneck(nn.Module): 42 | def __init__(self, in_channels,out_channels,stride): 43 | super(LWbottleneck, self).__init__() 44 | self.stride = stride 45 | self.pyramid_list = nn.ModuleList() 46 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[5,1], stride=stride, padding=[2,0])) 47 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,5], stride=stride, padding=[0,2])) 48 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[3,1], stride=stride, padding=[1,0])) 49 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,3], stride=stride, padding=[0,1])) 50 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[2,1], stride=stride, padding=[1,0])) 51 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,2], stride=stride, padding=[0,1])) 52 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=2, stride=stride, padding=1)) 53 | self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=3, stride=stride, padding=1)) 54 | 55 | self.shrink = Conv1x1BN(in_channels*8,out_channels) 56 | 57 | def forward(self, x): 58 | b,c,w,h = x.shape 59 | if self.stride>1: 60 | w, h = w//self.stride,h//self.stride 61 | outputs = [] 62 | for pyconv in self.pyramid_list: 63 | pyconv_x = pyconv(x) 64 | if x.shape[2:] != pyconv_x.shape[2:]: 65 | pyconv_x = pyconv_x[:,:,:w,:h] 66 | outputs.append(pyconv_x) 67 | out = torch.cat(outputs, 1) 68 | return self.shrink(out) 69 | 70 | 71 | class Encoder(nn.Module): 72 | def __init__(self): 73 | super(Encoder, self).__init__() 74 | 75 | self.stage1 = nn.Sequential( 76 | ConvBNReLU(in_channels=3, out_channels=32, kernel_size=3, stride=2, padding=1), 77 | Conv1x1BN(in_channels=32, out_channels=16), 78 | ) 79 | self.stage2 = nn.Sequential( 80 | LWbottleneck(in_channels=16,out_channels=24,stride=2), 81 | LWbottleneck(in_channels=24, out_channels=24, stride=1), 82 | ) 83 | self.stage3 = nn.Sequential( 84 | LWbottleneck(in_channels=24, out_channels=32, stride=2), 85 | LWbottleneck(in_channels=32, out_channels=32, stride=1), 86 | ) 87 | self.stage4 = nn.Sequential( 88 | LWbottleneck(in_channels=32, out_channels=32, stride=2) 89 | ) 90 | self.stage5 = nn.Sequential( 91 | LWbottleneck(in_channels=32, out_channels=64, stride=2), 92 | LWbottleneck(in_channels=64, out_channels=64, stride=1), 93 | LWbottleneck(in_channels=64, out_channels=64, stride=1), 94 | LWbottleneck(in_channels=64, out_channels=64, stride=1), 95 | ) 96 | 97 | self.conv1 = Conv1x1BN(in_channels=64, out_channels=320) 98 | 99 | def forward(self, x): 100 | x = self.stage1(x) 101 | x = self.stage2(x) 102 | x = F.pad(x,pad=(0,1,0,1),mode='constant',value=0) 103 | out1 = x = self.stage3(x) 104 | x = self.stage4(x) 105 | x = F.pad(x, pad=(0, 1, 0, 1), mode='constant', value=0) 106 | x = self.stage5(x) 107 | out2 = self.conv1(x) 108 | return out1,out2 109 | 110 | class ASPP(nn.Module): 111 | def __init__(self, in_channels, out_channels): 112 | super(ASPP, self).__init__() 113 | self.depthwise1 = ConvBNReLU(in_channels, out_channels, 3, 1, 6, dilation=6) 114 | self.depthwise2 = ConvBNReLU(in_channels, out_channels, 3, 1, 12, dilation=12) 115 | self.depthwise3 = ConvBNReLU(in_channels, out_channels, 3, 1, 18, dilation=18) 116 | self.pointconv = Conv1x1BN(in_channels, out_channels) 117 | 118 | def forward(self, x): 119 | x1 = self.depthwise1(x) 120 | x2 = self.depthwise2(x) 121 | x3 = self.depthwise3(x) 122 | x4 = self.pointconv(x) 123 | return torch.cat([x1,x2,x3,x4], dim=1) 124 | 125 | class Decoder(nn.Module): 126 | def __init__(self,num_classes=2): 127 | super(Decoder, self).__init__() 128 | self.aspp = ASPP(320, 128) 129 | self.pconv1 = Conv1x1BN(128*4, 512) 130 | 131 | self.pconv2 = Conv1x1BN(512+32, 128) 132 | self.pconv3 = Conv1x1BN(128, num_classes) 133 | 134 | def forward(self, x, y): 135 | x = self.pconv1(self.aspp(x)) 136 | x = F.interpolate(x,y.shape[2:],align_corners=True,mode='bilinear') 137 | x = torch.cat([x,y], dim=1) 138 | out = self.pconv3(self.pconv2(x)) 139 | return out 140 | 141 | class LW_Network(nn.Module): 142 | def __init__(self, num_classes=2): 143 | super(LW_Network, self).__init__() 144 | self.encoder = Encoder() 145 | self.decoder = Decoder(num_classes) 146 | def forward(self, x): 147 | x1,x2 = self.encoder(x) 148 | out = self.decoder(x2,x1) 149 | return out 150 | 151 | 152 | 153 | if __name__ == '__main__': 154 | model = LW_Network() 155 | print(model) 156 | 157 | input = torch.randn(1, 3, 331, 331) 158 | output = model(input) 159 | print(output.shape) -------------------------------------------------------------------------------- /SemanticSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # SemanticSegmentation-network 2 | pytorch implemention of SemanticSegmentation-network 3 | 4 | 5 | 6 | **FCN:** 7 | Fully Convolutional Networks for Semantic Segmentation 8 | 9 | https://arxiv.org/pdf/1411.4038.pdf 10 | 11 | 12 | 13 | **Fast-SCNN:** 14 | 15 | Fast-SCNN: Fast Semantic Segmentation Network 16 | 17 | https://arxiv.org/pdf/1902.04502.pdf 18 | 19 | 20 | 21 | **LEDNet:** 22 | 23 | LEDNet: A Lightweight Encoder-Decoder Network for Real-time Semantic Segmentation 24 | 25 | https://arxiv.org/pdf/1905.02423.pdf 26 | 27 | 28 | 29 | **LRNNet:** 30 | 31 | LRNNet: A Light-Weighted Network with Efficient Reduced Non-Local Operation for Real-Time Semantic Segmentation 32 | 33 | https://arxiv.org/pdf/2006.02706.pdf 34 | 35 | 36 | 37 | **FisheyeMODNet:** 38 | 39 | FisheyeMODNet: Moving Object detection on Surround-view Cameras for Autonomous Driving (2019) 40 | 41 | https://arxiv.org/pdf/1908.11789v1.pdf -------------------------------------------------------------------------------- /SemanticSegmentation/SegNet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/10/13 8:52 4 | # @Author : liumin 5 | # @File : segnet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | from torchsummary import summary 11 | 12 | 13 | __all__ = ["SegNet"] 14 | 15 | 16 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1): 17 | return nn.Sequential( 18 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU(inplace=True) 21 | ) 22 | 23 | class DoubleConv(nn.Module): 24 | """(convolution => [BN] => ReLU) * 2""" 25 | def __init__(self, in_channels, out_channels, reverse=False): 26 | super().__init__() 27 | if reverse: 28 | self.double_conv = nn.Sequential( 29 | Conv3x3BNReLU(in_channels, in_channels, stride=1), 30 | Conv3x3BNReLU(in_channels, out_channels, stride=1) 31 | ) 32 | else: 33 | self.double_conv = nn.Sequential( 34 | Conv3x3BNReLU(in_channels, out_channels,stride=1), 35 | Conv3x3BNReLU(out_channels, out_channels, stride=1) 36 | ) 37 | 38 | def forward(self, x): 39 | return self.double_conv(x) 40 | 41 | 42 | class TripleConv(nn.Module): 43 | """(convolution => [BN] => ReLU) * 3""" 44 | def __init__(self, in_channels, out_channels, reverse=False): 45 | super().__init__() 46 | if reverse: 47 | self.triple_conv = nn.Sequential( 48 | Conv3x3BNReLU(in_channels, in_channels, stride=1), 49 | Conv3x3BNReLU(in_channels, in_channels, stride=1), 50 | Conv3x3BNReLU(in_channels, out_channels, stride=1) 51 | ) 52 | else: 53 | self.triple_conv = nn.Sequential( 54 | Conv3x3BNReLU(in_channels, out_channels,stride=1), 55 | Conv3x3BNReLU(out_channels, out_channels, stride=1), 56 | Conv3x3BNReLU(out_channels, out_channels, stride=1) 57 | ) 58 | 59 | def forward(self, x): 60 | return self.triple_conv(x) 61 | 62 | 63 | class SegNet(nn.Module): 64 | """ 65 | SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation 66 | https://arxiv.org/pdf/1511.00561.pdf 67 | """ 68 | def __init__(self,classes= 19): 69 | super(SegNet, self).__init__() 70 | 71 | self.conv_down1 = DoubleConv(3, 64) 72 | self.conv_down2 = DoubleConv(64, 128) 73 | self.conv_down3 = TripleConv(128, 256) 74 | self.conv_down4 = TripleConv(256, 512) 75 | self.conv_down5 = TripleConv(512, 512) 76 | 77 | self.conv_up5 = TripleConv(512, 512, reverse=True) 78 | self.conv_up4 = TripleConv(512, 256, reverse=True) 79 | self.conv_up3 = TripleConv(256, 128, reverse=True) 80 | self.conv_up2 = DoubleConv(128, 64, reverse=True) 81 | self.conv_up1 = Conv3x3BNReLU(64, 64, stride=1) 82 | 83 | self.outconv = nn.Conv2d(64, classes, kernel_size=3, padding=1) 84 | 85 | def forward(self, x): 86 | 87 | # Stage 1 88 | x1 = self.conv_down1(x) 89 | x1_size = x1.size() 90 | x1p, id1 = F.max_pool2d(x1, kernel_size=2, stride=2, return_indices=True) 91 | 92 | # Stage 2 93 | x2 = self.conv_down2(x1p) 94 | x2_size = x2.size() 95 | x2p, id2 = F.max_pool2d(x2, kernel_size=2, stride=2, return_indices=True) 96 | 97 | # Stage 3 98 | x3 = self.conv_down3(x2p) 99 | x3_size = x3.size() 100 | x3p, id3 = F.max_pool2d(x3, kernel_size=2, stride=2, return_indices=True) 101 | 102 | # Stage 4 103 | x4 = self.conv_down4(x3p) 104 | x4_size = x4.size() 105 | x4p, id4 = F.max_pool2d(x4, kernel_size=2, stride=2, return_indices=True) 106 | 107 | # Stage 5 108 | x5 = self.conv_down5(x4p) 109 | x5_size = x5.size() 110 | x5p, id5 = F.max_pool2d(x5, kernel_size=2, stride=2, return_indices=True) 111 | 112 | # Stage 5d 113 | x5d = F.max_unpool2d(x5p, id5, kernel_size=2, stride=2, output_size=x5_size) 114 | x5d = self.conv_up5(x5d) 115 | 116 | # Stage 4d 117 | x4d = F.max_unpool2d(x5d, id4, kernel_size=2, stride=2, output_size=x4_size) 118 | x4d = self.conv_up4(x4d) 119 | 120 | # Stage 3d 121 | x3d = F.max_unpool2d(x4d, id3, kernel_size=2, stride=2, output_size=x3_size) 122 | x3d = self.conv_up3(x3d) 123 | 124 | # Stage 2d 125 | x2d = F.max_unpool2d(x3d, id2, kernel_size=2, stride=2, output_size=x2_size) 126 | x2d = self.conv_up2(x2d) 127 | 128 | # Stage 1d 129 | x1d = F.max_unpool2d(x2d, id1, kernel_size=2, stride=2, output_size=x1_size) 130 | x1d = self.conv_up1(x1d) 131 | 132 | out = self.outconv(x1d) 133 | 134 | return out 135 | 136 | 137 | 138 | """print layers and params of network""" 139 | if __name__ == '__main__': 140 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 141 | model = SegNet(classes=19).to(device) 142 | summary(model,(3,800,600)) -------------------------------------------------------------------------------- /SemanticSegmentation/Unet.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | # @Time : 2020/7/8 13:51 4 | # @Author : liumin 5 | # @File : unet.py 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1): 13 | return nn.Sequential( 14 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups), 15 | nn.BatchNorm2d(out_channels), 16 | nn.ReLU(inplace=True) 17 | ) 18 | 19 | 20 | def Conv1x1BNReLU(in_channels,out_channels): 21 | return nn.Sequential( 22 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 23 | nn.BatchNorm2d(out_channels), 24 | nn.ReLU(inplace=True) 25 | ) 26 | 27 | 28 | def Conv1x1BN(in_channels,out_channels): 29 | return nn.Sequential( 30 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1), 31 | nn.BatchNorm2d(out_channels) 32 | ) 33 | 34 | 35 | class DoubleConv(nn.Module): 36 | """(convolution => [BN] => ReLU) * 2""" 37 | def __init__(self, in_channels, out_channels): 38 | super().__init__() 39 | self.double_conv = nn.Sequential( 40 | Conv3x3BNReLU(in_channels, out_channels,stride=1), 41 | Conv3x3BNReLU(out_channels, out_channels, stride=1) 42 | ) 43 | 44 | def forward(self, x): 45 | return self.double_conv(x) 46 | 47 | 48 | class DownConv(nn.Module): 49 | """(convolution => [BN] => ReLU) * 2""" 50 | def __init__(self, in_channels, out_channels,stride=2): 51 | super().__init__() 52 | self.pool = nn.MaxPool2d(kernel_size=2,stride=stride) 53 | self.double_conv = DoubleConv(in_channels, out_channels) 54 | 55 | def forward(self, x): 56 | return self.pool(self.double_conv(x)) 57 | 58 | 59 | class UpConv(nn.Module): 60 | def __init__(self, in_channels, out_channels,bilinear=True): 61 | super().__init__() 62 | self.reduce = Conv1x1BNReLU(in_channels, in_channels//2) 63 | # if bilinear, use the normal convolutions to reduce the number of channels 64 | if bilinear: 65 | self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) 66 | else: 67 | self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2) 68 | self.conv = DoubleConv(in_channels, out_channels) 69 | 70 | def forward(self, x1, x2): 71 | x1 = self.up(self.reduce(x1)) 72 | _, channel1, height1, width1 = x1.size() 73 | _, channel2, height2, width2 = x2.size() 74 | 75 | # input is CHW 76 | diffY = height2 - height1 77 | diffX = width2 - width1 78 | 79 | x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2]) 80 | x = torch.cat([x2, x1], dim=1) 81 | return self.conv(x) 82 | 83 | 84 | class UNet(nn.Module): 85 | def __init__(self, num_classes): 86 | super(UNet, self).__init__() 87 | bilinear = True 88 | 89 | self.conv = DoubleConv(3, 64) 90 | self.down1 = DownConv(64, 128) 91 | self.down2 = DownConv(128, 256) 92 | self.down3 = DownConv(256, 512) 93 | self.down4 = DownConv(512, 1024) 94 | self.up1 = UpConv(1024, 512, bilinear) 95 | self.up2 = UpConv(512, 256, bilinear) 96 | self.up3 = UpConv(256, 128, bilinear) 97 | self.up4 = UpConv(128, 64, bilinear) 98 | self.outconv = nn.Conv2d(64, num_classes, kernel_size=1) 99 | 100 | def forward(self, x): 101 | x1 = self.conv(x) 102 | x2 = self.down1(x1) 103 | x3 = self.down2(x2) 104 | x4 = self.down3(x3) 105 | x5 = self.down4(x4) 106 | xx = self.up1(x5, x4) 107 | xx = self.up2(xx, x3) 108 | xx = self.up3(xx, x2) 109 | xx = self.up4(xx, x1) 110 | outputs = self.outconv(xx) 111 | return outputs 112 | 113 | 114 | if __name__ =='__main__': 115 | model = UNet(19) 116 | print(model) 117 | 118 | input = torch.randn(1,3,572,572) 119 | out = model(input) 120 | print(out.shape) -------------------------------------------------------------------------------- /Utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def Conv3x3BNReLU(in_channels,out_channels,stride,padding=1): 5 | return nn.Sequential( 6 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1), 7 | nn.BatchNorm2d(out_channels), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def Conv1x1BNReLU(in_channels,out_channels): 12 | return nn.Sequential( 13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0), 14 | nn.BatchNorm2d(out_channels), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=1): 19 | return nn.Sequential( 20 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding), 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU6(inplace=True) 23 | ) 24 | 25 | def ConvBN(in_channels,out_channels,kernel_size,stride,padding=1): 26 | return nn.Sequential( 27 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding), 28 | nn.BatchNorm2d(out_channels) 29 | ) 30 | 31 | class ResidualBlock(nn.Module): 32 | def __init__(self, in_channels, out_channels): 33 | super(ResidualBlock, self).__init__() 34 | mid_channels = out_channels//2 35 | 36 | self.bottleneck = nn.Sequential( 37 | ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1), 38 | ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1), 39 | ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1), 40 | ) 41 | self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1) 42 | 43 | def forward(self, x): 44 | out = self.bottleneck(x) 45 | return out+self.shortcut(x) --------------------------------------------------------------------------------