├── Attention
    ├── AFF.py
    ├── ANN.py
    ├── CBAM.py
    ├── CCNet.py
    ├── ECA-Net.py
    ├── GAM.py
    ├── GlobalContextBlock.py
    ├── NAM.py
    ├── NonLocalBlock.py
    ├── README.md
    ├── SENet.py
    ├── SEvariants.py
    └── TripletAttention.py
├── ClassicNetwork
    ├── AlexNet.py
    ├── DenseNet.py
    ├── Efficientnet.py
    ├── InceptionV1.py
    ├── InceptionV2.py
    ├── InceptionV3.py
    ├── InceptionV4.py
    ├── README.md
    ├── ResNeXt.py
    ├── ResNet.py
    ├── VGGNet.py
    └── repVGGNet.py
├── FaceDetectorAndRecognition
    ├── FaceBoxes.py
    ├── LFFD.py
    ├── README.md
    └── VarGFaceNet.py
├── HumanPoseEstimation
    ├── Hourglass.py
    ├── LPN.py
    ├── README.md
    ├── SimpleBaseline.py
    └── context_block.py
├── InstanceSegmentation
    ├── PolarMask.py
    └── README.md
├── Lightweight
    ├── GhostNet.py
    ├── MixNet.py
    ├── MobileNetV1.py
    ├── MobileNetV2.py
    ├── MobileNetV3.py
    ├── MobileNetXt.py
    ├── README.md
    ├── ShuffleNet.py
    ├── ShuffleNetV2.py
    ├── SqueezeNet.py
    └── Xception.py
├── ObjectDetection
    ├── ASFF.py
    ├── CenterNet.py
    ├── CornerNet.py
    ├── FCOS.py
    ├── FPN.py
    ├── FSAF.py
    ├── FisheyeMODNet.py
    ├── FoveaBox.py
    ├── README.md
    ├── RetinaNet.py
    ├── SSD.py
    ├── VoVNet.py
    ├── VoVNetV2.py
    ├── YOLO.py
    ├── YOLO_Nano.py
    ├── YOLOv2.py
    └── YOLOv3.py
├── Others
    ├── DynamicReLU.py
    └── PyramidalConvolution.py
├── PortraitSegmentation
    └── SINet.py
├── README.md
├── SemanticSegmentation
    ├── DeeplabV3Plus.py
    ├── ENet.py
    ├── FCN.py
    ├── FastSCNN.py
    ├── FisheyeMODNet.py
    ├── ICNet.py
    ├── LEDnet.py
    ├── LRNnet.py
    ├── LWnet.py
    ├── README.md
    ├── SegNet.py
    └── Unet.py
├── Utils
    └── utils.py
└── requirements.txt


/Attention/AFF.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2021/11/17 10:29
 4 | # @Author : liumin
 5 | # @File : AFF.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | 
11 | class MS_CAM(nn.Module):
12 |     def __init__(self, channel, ratio = 16):
13 |         super(MS_CAM, self).__init__()
14 |         mid_channel = channel // ratio
15 |         self.global_att = nn.Sequential(
16 |                 nn.AdaptiveAvgPool2d(1),
17 |                 nn.Conv2d(in_channels=channel, out_channels=mid_channel, kernel_size=1, stride=1, padding=0),
18 |                 nn.BatchNorm2d(mid_channel),
19 |                 nn.ReLU(inplace=True),
20 |                 nn.Conv2d(in_channels=mid_channel, out_channels=channel, kernel_size=1, stride=1, padding=0),
21 |                 nn.BatchNorm2d(channel),
22 |             )
23 | 
24 |         self.local_att = nn.Sequential(
25 |             nn.Conv2d(in_channels=channel, out_channels=mid_channel, kernel_size=1, stride=1, padding=0),
26 |             nn.BatchNorm2d(mid_channel),
27 |             nn.ReLU(inplace=True),
28 |             nn.Conv2d(in_channels=mid_channel, out_channels=channel, kernel_size=1, stride=1, padding=0),
29 |             nn.BatchNorm2d(channel),
30 |         )
31 | 
32 |         self.sigmoid = nn.Sigmoid()
33 | 
34 |     def forward(self, x):
35 |         b, c, _, _ = x.size()
36 |         g_x = self.global_att(x)
37 |         l_x = self.local_att(x)
38 |         w = self.sigmoid(l_x * g_x.expand_as(l_x))
39 |         return w * x
40 | 
41 | 
42 | class AFF(nn.Module):
43 |     def __init__(self):
44 |         super(AFF, self).__init__()
45 | 
46 | 
47 |     def forward(self, x):
48 |         pass
49 | 
50 | 
51 | if __name__=='__main__':
52 |     model = MS_CAM(16)
53 |     print(model)
54 | 
55 |     input = torch.randn(2, 16, 64, 64)
56 |     out = model(input)
57 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/ANN.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/6/3 15:08
  4 | # @Author : liumin
  5 | # @File : ANN.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torchvision
 10 | import numpy as np
 11 | 
 12 | class SpatialPyramidPooling(nn.Module):
 13 |     def __init__(self, output_sizes = [1, 3, 6, 8]):
 14 |         super(SpatialPyramidPooling, self).__init__()
 15 | 
 16 |         self.pool_layers = nn.ModuleList()
 17 |         for output_size in output_sizes:
 18 |             self.pool_layers.append(nn.AdaptiveMaxPool2d(output_size=output_size))
 19 | 
 20 |     def forward(self, x):
 21 |         outputs = []
 22 |         for pool_layer in self.pool_layers:
 23 |             outputs.append(pool_layer(x).flatten())
 24 |         out = torch.cat(outputs, dim=0)
 25 |         return out
 26 | 
 27 | class APNB(nn.Module):
 28 |     def __init__(self, channel):
 29 |         super(APNB, self).__init__()
 30 |         self.inter_channel = channel // 2
 31 |         self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False)
 32 |         self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
 33 |         self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
 34 |         self.softmax = nn.Softmax(dim=1)
 35 |         self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False)
 36 | 
 37 |     def forward(self, x):
 38 |         # [N, C, H , W]
 39 |         b, c, h, w = x.size()
 40 |         # [N, C/2, H * W]
 41 |         x_phi = self.conv_phi(x).view(b, c, -1)
 42 |         # [N, H * W, C/2]
 43 |         x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous()
 44 |         x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous()
 45 |         # [N, H * W, H * W]
 46 |         mul_theta_phi = torch.matmul(x_theta, x_phi)
 47 |         mul_theta_phi = self.softmax(mul_theta_phi)
 48 |         # [N, H * W, C/2]
 49 |         mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g)
 50 |         # [N, C/2, H, W]
 51 |         mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w)
 52 |         # [N, C, H , W]
 53 |         mask = self.conv_mask(mul_theta_phi_g)
 54 |         out = mask + x
 55 |         return out
 56 | 
 57 | 
 58 | class AFNB(nn.Module):
 59 |     def __init__(self, channel):
 60 |         super(AFNB, self).__init__()
 61 |         self.inter_channel = channel // 2
 62 |         self.output_sizes = [1, 3, 6, 8]
 63 |         self.sample_dim = np.sum([size*size for size in self.output_sizes])
 64 |         self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False)
 65 |         self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
 66 |         self.conv_theta_spp = SpatialPyramidPooling(self.output_sizes)
 67 |         self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
 68 |         self.conv_g_spp = SpatialPyramidPooling(self.output_sizes)
 69 |         self.softmax = nn.Softmax(dim=1)
 70 |         self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False)
 71 | 
 72 |     def forward(self, x):
 73 |         # [N, C, H , W]
 74 |         b, c, h, w = x.size()
 75 |         # [N, C/2, H * W]
 76 |         x_phi = self.conv_phi(x).view(b, c, -1)
 77 |         # [N, H * W, C/2]
 78 |         xxx = self.conv_theta_spp(self.conv_theta(x))
 79 |         print(xxx.shape)
 80 |         x_theta = self.conv_theta_spp(self.conv_theta(x)).view(b, self.sample_dim, -1).permute(0, 2, 1).contiguous()
 81 |         x_g = self.conv_g_spp(self.conv_g(x)).view(b, self.sample_dim, -1).permute(0, 2, 1).contiguous()
 82 |         # [N, H * W, H * W]
 83 |         mul_theta_phi = torch.matmul(x_theta, x_phi)
 84 |         mul_theta_phi = self.softmax(mul_theta_phi)
 85 |         # [N, H * W, C/2]
 86 |         mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g)
 87 |         # [N, C/2, H, W]
 88 |         mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w)
 89 |         # [N, C, H , W]
 90 |         mask = self.conv_mask(mul_theta_phi_g)
 91 |         out = mask + x
 92 |         return out
 93 | 
 94 | if __name__=='__main__':
 95 |     model = AFNB(channel=16)
 96 |     print(model)
 97 | 
 98 |     input = torch.randn(1, 16, 64, 64)
 99 |     out = model(input)
100 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/CBAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class ChannelAttentionModule(nn.Module):
 7 |     def __init__(self, channel, ratio=16):
 8 |         super(ChannelAttentionModule, self).__init__()
 9 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
10 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
11 | 
12 |         self.shared_MLP = nn.Sequential(
13 |             nn.Conv2d(channel, channel // ratio, 1, bias=False),
14 |             nn.ReLU(),
15 |             nn.Conv2d(channel // ratio, channel, 1, bias=False)
16 |         )
17 |         self.sigmoid = nn.Sigmoid()
18 | 
19 |     def forward(self, x):
20 |         avgout = self.shared_MLP(self.avg_pool(x))
21 |         maxout = self.shared_MLP(self.max_pool(x))
22 |         return self.sigmoid(avgout + maxout)
23 | 
24 | 
25 | class SpatialAttentionModule(nn.Module):
26 |     def __init__(self):
27 |         super(SpatialAttentionModule, self).__init__()
28 |         self.conv2d = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3)
29 |         self.sigmoid = nn.Sigmoid()
30 | 
31 |     def forward(self, x):
32 |         avgout = torch.mean(x, dim=1, keepdim=True)
33 |         maxout, _ = torch.max(x, dim=1, keepdim=True)
34 |         out = torch.cat([avgout, maxout], dim=1)
35 |         out = self.sigmoid(self.conv2d(out))
36 |         return out
37 | 
38 | 
39 | class CBAM(nn.Module):
40 |     def __init__(self, channel):
41 |         super(CBAM, self).__init__()
42 |         self.channel_attention = ChannelAttentionModule(channel)
43 |         self.spatial_attention = SpatialAttentionModule()
44 | 
45 |     def forward(self, x):
46 |         out = self.channel_attention(x) * x
47 |         out = self.spatial_attention(out) * out
48 |         return out
49 | 
50 | 
51 | class ResBlock_CBAM(nn.Module):
52 |     def __init__(self,in_places, places, stride=1,downsampling=False, expansion = 4):
53 |         super(ResBlock_CBAM,self).__init__()
54 |         self.expansion = expansion
55 |         self.downsampling = downsampling
56 | 
57 |         self.bottleneck = nn.Sequential(
58 |             nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
59 |             nn.BatchNorm2d(places),
60 |             nn.ReLU(inplace=True),
61 |             nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
62 |             nn.BatchNorm2d(places),
63 |             nn.ReLU(inplace=True),
64 |             nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
65 |             nn.BatchNorm2d(places*self.expansion),
66 |         )
67 |         self.cbam = CBAM(channel=places*self.expansion)
68 | 
69 |         if self.downsampling:
70 |             self.downsample = nn.Sequential(
71 |                 nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
72 |                 nn.BatchNorm2d(places*self.expansion)
73 |             )
74 |         self.relu = nn.ReLU(inplace=True)
75 | 
76 |     def forward(self, x):
77 |         residual = x
78 |         out = self.bottleneck(x)
79 |         out = self.cbam(out)
80 |         if self.downsampling:
81 |             residual = self.downsample(x)
82 | 
83 |         out += residual
84 |         out = self.relu(out)
85 |         return out
86 | 
87 | if __name__=='__main__':
88 |     model = ResBlock_CBAM(in_places=16, places=4)
89 |     print(model)
90 | 
91 |     input = torch.randn(1, 16, 64, 64)
92 |     out = model(input)
93 |     print(out.shape)
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/Attention/CCNet.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2020/6/3 9:56
 4 | # @Author : liumin
 5 | # @File : CCNet.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | 
11 | def INF(B, H, W):
12 |     return -torch.diag(torch.tensor(float("inf")).repeat(H), 0).unsqueeze(0).repeat(B * W, 1, 1)
13 | 
14 | 
15 | class CrissCrossAttention(nn.Module):
16 |     """ Criss-Cross Attention Module"""
17 | 
18 |     def __init__(self, in_dim):
19 |         super(CrissCrossAttention, self).__init__()
20 |         self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
21 |         self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1)
22 |         self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
23 |         self.softmax = nn.Softmax(dim=3)
24 |         self.INF = INF
25 |         self.gamma = nn.Parameter(torch.zeros(1))
26 | 
27 |     def forward(self, x):
28 |         m_batchsize, _, height, width = x.size()
29 |         proj_query = self.query_conv(x)
30 |         proj_query_H = proj_query.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height).permute(0, 2, 1)
31 |         proj_query_W = proj_query.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width).permute(0, 2, 1)
32 |         proj_key = self.key_conv(x)
33 |         proj_key_H = proj_key.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height)
34 |         proj_key_W = proj_key.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width)
35 |         proj_value = self.value_conv(x)
36 |         proj_value_H = proj_value.permute(0, 3, 1, 2).contiguous().view(m_batchsize * width, -1, height)
37 |         proj_value_W = proj_value.permute(0, 2, 1, 3).contiguous().view(m_batchsize * height, -1, width)
38 |         energy_H = (torch.bmm(proj_query_H, proj_key_H) + self.INF(m_batchsize, height, width)).view(m_batchsize, width,
39 |                                                                                                      height, height).permute(0, 2, 1, 3)
40 |         energy_W = torch.bmm(proj_query_W, proj_key_W).view(m_batchsize, height, width, width)
41 |         concate = self.softmax(torch.cat([energy_H, energy_W], 3))
42 | 
43 |         att_H = concate[:, :, :, 0:height].permute(0, 2, 1, 3).contiguous().view(m_batchsize * width, height, height)
44 | 
45 |         att_W = concate[:, :, :, height:height + width].contiguous().view(m_batchsize * height, width, width)
46 |         out_H = torch.bmm(proj_value_H, att_H.permute(0, 2, 1)).view(m_batchsize, width, -1, height).permute(0, 2, 3, 1)
47 |         out_W = torch.bmm(proj_value_W, att_W.permute(0, 2, 1)).view(m_batchsize, height, -1, width).permute(0, 2, 1, 3)
48 |         return self.gamma * (out_H + out_W) + x
49 | 
50 | 
51 | if __name__=='__main__':
52 |     model = CrissCrossAttention(16)
53 |     print(model)
54 | 
55 |     input = torch.randn(1, 16, 64, 64)
56 |     out = model(input)
57 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/ECA-Net.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/10/10 16:45
  4 | # @Author : liumin
  5 | # @File : ECA-Net.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torchvision
 10 | from math import log
 11 | 
 12 | 
 13 | def Conv1(in_planes, places, stride=2):
 14 |     return nn.Sequential(
 15 |         nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
 16 |         nn.BatchNorm2d(places),
 17 |         nn.ReLU(inplace=True),
 18 |         nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 19 |     )
 20 | 
 21 | class SE_Module(nn.Module):
 22 |     def __init__(self, channel,ratio = 16):
 23 |         super(SE_Module, self).__init__()
 24 |         self.squeeze = nn.AdaptiveAvgPool2d(1)
 25 |         self.excitation = nn.Sequential(
 26 |                 nn.Linear(in_features=channel, out_features=channel // ratio),
 27 |                 nn.ReLU(inplace=True),
 28 |                 nn.Linear(in_features=channel // ratio, out_features=channel),
 29 |                 nn.Sigmoid()
 30 |             )
 31 |     def forward(self, x):
 32 |         b, c, _, _ = x.size()
 33 |         y = self.squeeze(x).view(b, c)
 34 |         z = self.excitation(y).view(b, c, 1, 1)
 35 |         return x * z.expand_as(x)
 36 | 
 37 | class ECA_Module(nn.Module):
 38 |     def __init__(self, channel,gamma=2, b=1):
 39 |         super(ECA_Module, self).__init__()
 40 |         self.gamma = gamma
 41 |         self.b = b
 42 |         t = int(abs(log(channel, 2) + self.b) / self.gamma)
 43 |         k = t if t % 2 else t + 1
 44 | 
 45 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 46 |         self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=k//2, bias=False)
 47 |         self.sigmoid = nn.Sigmoid()
 48 | 
 49 |     def forward(self, x):
 50 |         b, c, _, _ = x.size()
 51 |         y = self.avg_pool(x)
 52 |         y = self.conv(y.squeeze(-1).transpose(-1,-2))
 53 |         y = y.transpose(-1,-2).unsqueeze(-1)
 54 |         y = self.sigmoid(y)
 55 |         return x * y.expand_as(x)
 56 | 
 57 | class ECA_ResNetBlock(nn.Module):
 58 |     def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
 59 |         super(ECA_ResNetBlock,self).__init__()
 60 |         self.expansion = expansion
 61 |         self.downsampling = downsampling
 62 | 
 63 |         self.bottleneck = nn.Sequential(
 64 |             nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
 65 |             nn.BatchNorm2d(places),
 66 |             nn.ReLU(inplace=True),
 67 |             nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
 68 |             nn.BatchNorm2d(places),
 69 |             nn.ReLU(inplace=True),
 70 |             nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
 71 |             nn.BatchNorm2d(places*self.expansion),
 72 |         )
 73 | 
 74 |         if self.downsampling:
 75 |             self.downsample = nn.Sequential(
 76 |                 nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
 77 |                 nn.BatchNorm2d(places*self.expansion)
 78 |             )
 79 |         self.relu = nn.ReLU(inplace=True)
 80 | 
 81 |     def forward(self, x):
 82 |         residual = x
 83 |         out = self.bottleneck(x)
 84 | 
 85 |         if self.downsampling:
 86 |             residual = self.downsample(x)
 87 | 
 88 |         out += residual
 89 |         out = self.relu(out)
 90 |         return out
 91 | 
 92 | class ECA_ResNet(nn.Module):
 93 |     def __init__(self,blocks, num_classes=1000, expansion = 4):
 94 |         super(ECA_ResNet,self).__init__()
 95 |         self.expansion = expansion
 96 | 
 97 |         self.conv1 = Conv1(in_planes = 3, places= 64)
 98 | 
 99 |         self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
100 |         self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
101 |         self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
102 |         self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)
103 | 
104 |         self.avgpool = nn.AvgPool2d(7, stride=1)
105 |         self.fc = nn.Linear(2048,num_classes)
106 | 
107 |         for m in self.modules():
108 |             if isinstance(m, nn.Conv2d):
109 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
110 |             elif isinstance(m, nn.BatchNorm2d):
111 |                 nn.init.constant_(m.weight, 1)
112 |                 nn.init.constant_(m.bias, 0)
113 | 
114 |     def make_layer(self, in_places, places, block, stride):
115 |         layers = []
116 |         layers.append(ECA_ResNetBlock(in_places, places,stride, downsampling =True))
117 |         for i in range(1, block):
118 |             layers.append(ECA_ResNetBlock(places*self.expansion, places))
119 | 
120 |         return nn.Sequential(*layers)
121 | 
122 | 
123 |     def forward(self, x):
124 |         x = self.conv1(x)
125 | 
126 |         x = self.layer1(x)
127 |         x = self.layer2(x)
128 |         x = self.layer3(x)
129 |         x = self.layer4(x)
130 | 
131 |         x = self.avgpool(x)
132 |         x = x.view(x.size(0), -1)
133 |         x = self.fc(x)
134 |         return x
135 | 
136 | def ECA_ResNet50():
137 |     return ECA_ResNet([3, 4, 6, 3])
138 | 
139 | if __name__=='__main__':
140 |     model = ECA_ResNet50()
141 |     print(model)
142 | 
143 |     input = torch.randn(1, 3, 224, 224)
144 |     out = model(input)
145 |     print(out.shape)
146 | 


--------------------------------------------------------------------------------
/Attention/GAM.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2022/1/17 14:18
 4 | # @Author : liumin
 5 | # @File : GAM.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | 
11 | class GAM(nn.Module):
12 |     def __init__(self, channels, rate=4):
13 |         super(GAM, self).__init__()
14 |         mid_channels = channels // rate
15 | 
16 |         self.channel_attention = nn.Sequential(
17 |             nn.Linear(channels, mid_channels),
18 |             nn.ReLU(inplace=True),
19 |             nn.Linear(mid_channels, channels)
20 |         )
21 | 
22 |         self.spatial_attention = nn.Sequential(
23 |             nn.Conv2d(channels, mid_channels, kernel_size=7, stride=1, padding=3),
24 |             nn.BatchNorm2d(mid_channels),
25 |             nn.ReLU(inplace=True),
26 |             nn.Conv2d(mid_channels, channels, kernel_size=7, stride=1, padding=3),
27 |             nn.BatchNorm2d(channels)
28 |         )
29 | 
30 |     def forward(self, x):
31 |         b, c, h, w = x.shape
32 |         # channel attention
33 |         x_permute = x.permute(0, 2, 3, 1).view(b, -1, c)
34 |         x_att_permute = self.channel_attention(x_permute).view(b, h, w, c)
35 |         x_channel_att = x_att_permute.permute(0, 3, 1, 2)
36 | 
37 |         x = x * x_channel_att
38 |         # spatial attention
39 |         x_spatial_att = self.spatial_attention(x).sigmoid()
40 |         out = x * x_spatial_att
41 |         return out
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     x = torch.randn(1, 16, 64, 64)
46 |     b, c, h, w = x.shape
47 |     net = GAM(channels=c)
48 |     out = net(x)
49 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/GlobalContextBlock.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class GlobalContextBlock(nn.Module):
 7 |     def __init__(self,
 8 |                  inplanes,
 9 |                  ratio,
10 |                  pooling_type='att',
11 |                  fusion_types=('channel_add', )):
12 |         super(GlobalContextBlock, self).__init__()
13 |         assert pooling_type in ['avg', 'att']
14 |         assert isinstance(fusion_types, (list, tuple))
15 |         valid_fusion_types = ['channel_add', 'channel_mul']
16 |         assert all([f in valid_fusion_types for f in fusion_types])
17 |         assert len(fusion_types) > 0, 'at least one fusion should be used'
18 |         self.inplanes = inplanes
19 |         self.ratio = ratio
20 |         self.planes = int(inplanes * ratio)
21 |         self.pooling_type = pooling_type
22 |         self.fusion_types = fusion_types
23 |         if pooling_type == 'att':
24 |             self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
25 |             self.softmax = nn.Softmax(dim=2)
26 |         else:
27 |             self.avg_pool = nn.AdaptiveAvgPool2d(1)
28 |         if 'channel_add' in fusion_types:
29 |             self.channel_add_conv = nn.Sequential(
30 |                 nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
31 |                 nn.LayerNorm([self.planes, 1, 1]),
32 |                 nn.ReLU(inplace=True),  # yapf: disable
33 |                 nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
34 |         else:
35 |             self.channel_add_conv = None
36 |         if 'channel_mul' in fusion_types:
37 |             self.channel_mul_conv = nn.Sequential(
38 |                 nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
39 |                 nn.LayerNorm([self.planes, 1, 1]),
40 |                 nn.ReLU(inplace=True),  # yapf: disable
41 |                 nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
42 |         else:
43 |             self.channel_mul_conv = None
44 | 
45 |     def spatial_pool(self, x):
46 |         batch, channel, height, width = x.size()
47 |         if self.pooling_type == 'att':
48 |             input_x = x
49 |             # [N, C, H * W]
50 |             input_x = input_x.view(batch, channel, height * width)
51 |             # [N, 1, C, H * W]
52 |             input_x = input_x.unsqueeze(1)
53 |             # [N, 1, H, W]
54 |             context_mask = self.conv_mask(x)
55 |             # [N, 1, H * W]
56 |             context_mask = context_mask.view(batch, 1, height * width)
57 |             # [N, 1, H * W]
58 |             context_mask = self.softmax(context_mask)
59 |             # [N, 1, H * W, 1]
60 |             context_mask = context_mask.unsqueeze(-1)
61 |             # [N, 1, C, 1]
62 |             context = torch.matmul(input_x, context_mask)
63 |             # [N, C, 1, 1]
64 |             context = context.view(batch, channel, 1, 1)
65 |         else:
66 |             # [N, C, 1, 1]
67 |             context = self.avg_pool(x)
68 | 
69 |         return context
70 | 
71 |     def forward(self, x):
72 |         # [N, C, 1, 1]
73 |         context = self.spatial_pool(x)
74 | 
75 |         out = x
76 |         if self.channel_mul_conv is not None:
77 |             # [N, C, 1, 1]
78 |             channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
79 |             out = out * channel_mul_term
80 |         if self.channel_add_conv is not None:
81 |             # [N, C, 1, 1]
82 |             channel_add_term = self.channel_add_conv(context)
83 |             out = out + channel_add_term
84 | 
85 |         return out
86 | 
87 | 
88 | if __name__=='__main__':
89 |     model = GlobalContextBlock(inplanes=16, ratio=0.25)
90 |     print(model)
91 | 
92 |     input = torch.randn(1, 16, 64, 64)
93 |     out = model(input)
94 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/NAM.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2021/12/7 11:06
 4 | # @Author : liumin
 5 | # @File : NAM.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | """
11 |     NAM: Normalization-based Attention Module
12 |     PDF: https://arxiv.org/pdf/2111.12419.pdf
13 | """
14 | 
15 | class NAM(nn.Module):
16 |     def __init__(self, channel):
17 |         super(NAM, self).__init__()
18 |         self.channel = channel
19 |         self.bn2 = nn.BatchNorm2d(self.channel, affine=True)
20 |         self.sigmoid = nn.Sigmoid()
21 | 
22 |     def forward(self, x):
23 |         residual = x
24 |         x = self.bn2(x)
25 |         weight_bn = self.bn2.weight.data.abs() / torch.sum(self.bn2.weight.data.abs())
26 |         x = x.permute(0, 2, 3, 1).contiguous()
27 |         x = torch.mul(weight_bn, x)
28 |         x = x.permute(0, 3, 1, 2).contiguous()
29 |         out = self.sigmoid(x) * residual  #
30 |         return out
31 | 
32 | 
33 | if __name__=='__main__':
34 |     model = NAM(channel=16)
35 |     print(model)
36 | 
37 |     input = torch.randn(1, 16, 64, 64)
38 |     out = model(input)
39 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/NonLocalBlock.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class NonLocalBlock(nn.Module):
 7 |     def __init__(self, channel):
 8 |         super(NonLocalBlock, self).__init__()
 9 |         self.inter_channel = channel // 2
10 |         self.conv_phi = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1,padding=0, bias=False)
11 |         self.conv_theta = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
12 |         self.conv_g = nn.Conv2d(in_channels=channel, out_channels=self.inter_channel, kernel_size=1, stride=1, padding=0, bias=False)
13 |         self.softmax = nn.Softmax(dim=1)
14 |         self.conv_mask = nn.Conv2d(in_channels=self.inter_channel, out_channels=channel, kernel_size=1, stride=1, padding=0, bias=False)
15 | 
16 |     def forward(self, x):
17 |         # [N, C, H , W]
18 |         b, c, h, w = x.size()
19 |         # [N, C/2, H * W]
20 |         x_phi = self.conv_phi(x).view(b, c, -1)
21 |         # [N, H * W, C/2]
22 |         x_theta = self.conv_theta(x).view(b, c, -1).permute(0, 2, 1).contiguous()
23 |         x_g = self.conv_g(x).view(b, c, -1).permute(0, 2, 1).contiguous()
24 |         # [N, H * W, H * W]
25 |         mul_theta_phi = torch.matmul(x_theta, x_phi)
26 |         mul_theta_phi = self.softmax(mul_theta_phi)
27 |         # [N, H * W, C/2]
28 |         mul_theta_phi_g = torch.matmul(mul_theta_phi, x_g)
29 |         # [N, C/2, H, W]
30 |         mul_theta_phi_g = mul_theta_phi_g.permute(0,2,1).contiguous().view(b,self.inter_channel, h, w)
31 |         # [N, C, H , W]
32 |         mask = self.conv_mask(mul_theta_phi_g)
33 |         out = mask + x
34 |         return out
35 | 
36 | 
37 | if __name__=='__main__':
38 |     model = NonLocalBlock(channel=16)
39 |     print(model)
40 | 
41 |     input = torch.randn(1, 16, 64, 64)
42 |     out = model(input)
43 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/README.md:
--------------------------------------------------------------------------------
 1 | # Attention
 2 | 
 3 | 
 4 | 
 5 | **SE Net**
 6 | 
 7 | Squeeze-and-Excitation Networks,2017
 8 | 
 9 | https://arxiv.org/pdf/1709.01507.pdf
10 | 
11 | https://liumin.blog.csdn.net/article/details/104370739
12 | 
13 | 
14 | 
15 | **scSE**
16 | 
17 | Concurrent Spatial and Channel Squeeze & Excitation in Fully Convolutional Networks, 2018
18 | https://arxiv.org/pdf/1803.02579v2.pdf
19 | 
20 | https://liumin.blog.csdn.net/article/details/104371065
21 | 
22 | 
23 | 
24 | **NL Net**
25 | 
26 | Non-Local neural networks,2018
27 | https://arxiv.org/pdf/1711.07971.pdf
28 | 
29 | https://liumin.blog.csdn.net/article/details/104371212
30 | 
31 | 
32 | 
33 | **GCNet**
34 | 
35 | GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond, 2019
36 | https://arxiv.org/pdf/1904.11492.pdf
37 | 
38 | https://liumin.blog.csdn.net/article/details/104375585
39 | 
40 | 
41 | 
42 | **CBAM**
43 | 
44 | CBAM: Convolutional Block Attention Module, 2018
45 | https://arxiv.org/pdf/1807.06521.pdf
46 | 
47 | https://liumin.blog.csdn.net/article/details/104371273
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/Attention/SENet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | 
  6 | def Conv1(in_planes, places, stride=2):
  7 |     return nn.Sequential(
  8 |         nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
  9 |         nn.BatchNorm2d(places),
 10 |         nn.ReLU(inplace=True),
 11 |         nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 12 |     )
 13 | 
 14 | class SE_Module(nn.Module):
 15 |     def __init__(self, channel,ratio = 16):
 16 |         super(SE_Module, self).__init__()
 17 |         self.squeeze = nn.AdaptiveAvgPool2d(1)
 18 |         self.excitation = nn.Sequential(
 19 |                 nn.Linear(in_features=channel, out_features=channel // ratio),
 20 |                 nn.ReLU(inplace=True),
 21 |                 nn.Linear(in_features=channel // ratio, out_features=channel),
 22 |                 nn.Sigmoid()
 23 |             )
 24 |     def forward(self, x):
 25 |         b, c, _, _ = x.size()
 26 |         y = self.squeeze(x).view(b, c)
 27 |         z = self.excitation(y).view(b, c, 1, 1)
 28 |         return x * z.expand_as(x)
 29 | 
 30 | 
 31 | class SE_ResNetBlock(nn.Module):
 32 |     def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
 33 |         super(SE_ResNetBlock,self).__init__()
 34 |         self.expansion = expansion
 35 |         self.downsampling = downsampling
 36 | 
 37 |         self.bottleneck = nn.Sequential(
 38 |             nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
 39 |             nn.BatchNorm2d(places),
 40 |             nn.ReLU(inplace=True),
 41 |             nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
 42 |             nn.BatchNorm2d(places),
 43 |             nn.ReLU(inplace=True),
 44 |             nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
 45 |             nn.BatchNorm2d(places*self.expansion),
 46 |         )
 47 | 
 48 |         if self.downsampling:
 49 |             self.downsample = nn.Sequential(
 50 |                 nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
 51 |                 nn.BatchNorm2d(places*self.expansion)
 52 |             )
 53 |         self.relu = nn.ReLU(inplace=True)
 54 | 
 55 |     def forward(self, x):
 56 |         residual = x
 57 |         out = self.bottleneck(x)
 58 | 
 59 |         if self.downsampling:
 60 |             residual = self.downsample(x)
 61 | 
 62 |         out += residual
 63 |         out = self.relu(out)
 64 |         return out
 65 | 
 66 | class SE_ResNet(nn.Module):
 67 |     def __init__(self,blocks, num_classes=1000, expansion = 4):
 68 |         super(SE_ResNet,self).__init__()
 69 |         self.expansion = expansion
 70 | 
 71 |         self.conv1 = Conv1(in_planes = 3, places= 64)
 72 | 
 73 |         self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
 74 |         self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
 75 |         self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
 76 |         self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)
 77 | 
 78 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 79 |         self.fc = nn.Linear(2048,num_classes)
 80 | 
 81 |         for m in self.modules():
 82 |             if isinstance(m, nn.Conv2d):
 83 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 84 |             elif isinstance(m, nn.BatchNorm2d):
 85 |                 nn.init.constant_(m.weight, 1)
 86 |                 nn.init.constant_(m.bias, 0)
 87 | 
 88 |     def make_layer(self, in_places, places, block, stride):
 89 |         layers = []
 90 |         layers.append(SE_ResNetBlock(in_places, places,stride, downsampling =True))
 91 |         for i in range(1, block):
 92 |             layers.append(SE_ResNetBlock(places*self.expansion, places))
 93 | 
 94 |         return nn.Sequential(*layers)
 95 | 
 96 | 
 97 |     def forward(self, x):
 98 |         x = self.conv1(x)
 99 | 
100 |         x = self.layer1(x)
101 |         x = self.layer2(x)
102 |         x = self.layer3(x)
103 |         x = self.layer4(x)
104 | 
105 |         x = self.avgpool(x)
106 |         x = x.view(x.size(0), -1)
107 |         x = self.fc(x)
108 |         return x
109 | 
110 | def SE_ResNet50():
111 |     return SE_ResNet([3, 4, 6, 3])
112 | 
113 | if __name__=='__main__':
114 |     model = SE_ResNet50()
115 |     print(model)
116 | 
117 |     input = torch.randn(1, 3, 224, 224)
118 |     out = model(input)
119 |     print(out.shape)
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/Attention/SEvariants.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class cSE_Module(nn.Module):
 7 |     def __init__(self, channel,ratio = 16):
 8 |         super(cSE_Module, self).__init__()
 9 |         self.squeeze = nn.AdaptiveAvgPool2d(1)
10 |         self.excitation = nn.Sequential(
11 |                 nn.Linear(in_features=channel, out_features=channel // ratio),
12 |                 nn.ReLU(inplace=True),
13 |                 nn.Linear(in_features=channel // ratio, out_features=channel),
14 |                 nn.Sigmoid()
15 |             )
16 |     def forward(self, x):
17 |         b, c, _, _ = x.size()
18 |         y = self.squeeze(x).view(b, c)
19 |         z = self.excitation(y).view(b, c, 1, 1)
20 |         return x * z.expand_as(x)
21 | 
22 | 
23 | class sSE_Module(nn.Module):
24 |     def __init__(self, channel):
25 |         super(sSE_Module, self).__init__()
26 |         self.spatial_excitation = nn.Sequential(
27 |                 nn.Conv2d(in_channels=channel, out_channels=1, kernel_size=1,stride=1,padding=0),
28 |                 nn.Sigmoid()
29 |             )
30 |     def forward(self, x):
31 |         z = self.spatial_excitation(x)
32 |         return x * z.expand_as(x)
33 | 
34 | 
35 | class scSE_Module(nn.Module):
36 |     def __init__(self, channel,ratio = 16):
37 |         super(scSE_Module, self).__init__()
38 |         self.cSE = cSE_Module(channel,ratio)
39 |         self.sSE = sSE_Module(channel)
40 | 
41 |     def forward(self, x):
42 |         return self.cSE(x) + self.sSE(x)
43 | 
44 | 
45 | if __name__=='__main__':
46 |     # model = cSE_Module(channel=16)
47 |     # model = sSE_Module(channel=16)
48 |     model = scSE_Module(channel=16)
49 |     print(model)
50 | 
51 |     input = torch.randn(1, 16, 64, 64)
52 |     out = model(input)
53 |     print(out.shape)


--------------------------------------------------------------------------------
/Attention/TripletAttention.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2020/10/30 14:30
 4 | # @Author : liumin
 5 | # @File : TripletAttention.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torchvision
10 | 
11 | 
12 | class ChannelPool(nn.Module):
13 |     def forward(self, x):
14 |         return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )
15 | 
16 | 
17 | class SpatialGate(nn.Module):
18 |     def __init__(self):
19 |         super(SpatialGate, self).__init__()
20 | 
21 |         self.channel_pool = ChannelPool()
22 |         self.conv = nn.Sequential(
23 |             nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3),
24 |             nn.BatchNorm2d(1)
25 |         )
26 |         self.sigmod = nn.Sigmoid()
27 | 
28 |     def forward(self, x):
29 |         out = self.conv(self.channel_pool(x))
30 |         return out * self.sigmod(out)
31 | 
32 | 
33 | class TripletAttention(nn.Module):
34 |     def __init__(self, spatial=True):
35 |         super(TripletAttention, self).__init__()
36 |         self.spatial = spatial
37 |         self.height_gate = SpatialGate()
38 |         self.width_gate = SpatialGate()
39 |         if self.spatial:
40 |             self.spatial_gate = SpatialGate()
41 | 
42 |     def forward(self, x):
43 |         x_perm1 = x.permute(0, 2, 1, 3).contiguous()
44 |         x_out1 = self.height_gate(x_perm1)
45 |         x_out1 = x_out1.permute(0, 2, 1, 3).contiguous()
46 | 
47 |         x_perm2 = x.permute(0, 3, 2, 1).contiguous()
48 |         x_out2 = self.width_gate(x_perm2)
49 |         x_out2 = x_out2.permute(0, 3, 2, 1).contiguous()
50 | 
51 |         if self.spatial:
52 |             x_out3 = self.spatial_gate(x)
53 |             return (1/3) * (x_out1 + x_out2 + x_out3)
54 |         else:
55 |             return (1/2) * (x_out1 + x_out2)
56 | 
57 | 
58 | 
59 | if __name__=='__main__':
60 |     model = TripletAttention()
61 |     print(model)
62 | 
63 |     input = torch.randn(1, 16, 256, 256)
64 |     out = model(input)
65 |     print(out.shape)


--------------------------------------------------------------------------------
/ClassicNetwork/AlexNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class AlexNet(nn.Module):
 6 |     def __init__(self,num_classes=1000):
 7 |         super(AlexNet,self).__init__()
 8 |         self.feature_extraction = nn.Sequential(
 9 |             nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False),
10 |             nn.ReLU(inplace=True),
11 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
12 |             nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False),
13 |             nn.ReLU(inplace=True),
14 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
15 |             nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False),
16 |             nn.ReLU(inplace=True),
17 |             nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
18 |             nn.ReLU(inplace=True),
19 |             nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
20 |             nn.ReLU(inplace=True),
21 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
22 |         )
23 |         self.classifier = nn.Sequential(
24 |             nn.Dropout(p=0.5),
25 |             nn.Linear(in_features=256*6*6,out_features=4096),
26 |             nn.Dropout(p=0.5),
27 |             nn.Linear(in_features=4096, out_features=4096),
28 |             nn.Linear(in_features=4096, out_features=num_classes),
29 |         )
30 |     def forward(self,x):
31 |         x = self.feature_extraction(x)
32 |         x = x.view(x.size(0),256*6*6)
33 |         x = self.classifier(x)
34 |         return x
35 | 
36 | 
37 | if __name__ =='__main__':
38 |     # model = torchvision.models.AlexNet()
39 |     model = AlexNet()
40 |     print(model)
41 | 
42 |     input = torch.randn(8,3,224,224)
43 |     out = model(input)
44 |     print(out.shape)
45 | 
46 | 


--------------------------------------------------------------------------------
/ClassicNetwork/DenseNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | print("PyTorch Version: ",torch.__version__)
  6 | print("Torchvision Version: ",torchvision.__version__)
  7 | 
  8 | __all__ = ['DenseNet121', 'DenseNet169','DenseNet201','DenseNet264']
  9 | 
 10 | def Conv1(in_planes, places, stride=2):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
 13 |         nn.BatchNorm2d(places),
 14 |         nn.ReLU(inplace=True),
 15 |         nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 16 |     )
 17 | 
 18 | class _TransitionLayer(nn.Module):
 19 |     def __init__(self, inplace, plance):
 20 |         super(_TransitionLayer, self).__init__()
 21 |         self.transition_layer = nn.Sequential(
 22 |             nn.BatchNorm2d(inplace),
 23 |             nn.ReLU(inplace=True),
 24 |             nn.Conv2d(in_channels=inplace,out_channels=plance,kernel_size=1,stride=1,padding=0,bias=False),
 25 |             nn.AvgPool2d(kernel_size=2,stride=2),
 26 |         )
 27 | 
 28 |     def forward(self, x):
 29 |         return self.transition_layer(x)
 30 | 
 31 | 
 32 | class _DenseLayer(nn.Module):
 33 |     def __init__(self, inplace, growth_rate, bn_size, drop_rate=0):
 34 |         super(_DenseLayer, self).__init__()
 35 |         self.drop_rate = drop_rate
 36 |         self.dense_layer = nn.Sequential(
 37 |             nn.BatchNorm2d(inplace),
 38 |             nn.ReLU(inplace=True),
 39 |             nn.Conv2d(in_channels=inplace, out_channels=bn_size * growth_rate, kernel_size=1, stride=1, padding=0, bias=False),
 40 |             nn.BatchNorm2d(bn_size * growth_rate),
 41 |             nn.ReLU(inplace=True),
 42 |             nn.Conv2d(in_channels=bn_size * growth_rate, out_channels=growth_rate, kernel_size=3, stride=1, padding=1, bias=False),
 43 |         )
 44 |         self.dropout = nn.Dropout(p=self.drop_rate)
 45 | 
 46 |     def forward(self, x):
 47 |         y = self.dense_layer(x)
 48 |         if self.drop_rate > 0:
 49 |             y = self.dropout(y)
 50 |         return torch.cat([x, y], 1)
 51 | 
 52 | 
 53 | class DenseBlock(nn.Module):
 54 |     def __init__(self, num_layers, inplances, growth_rate, bn_size , drop_rate=0):
 55 |         super(DenseBlock, self).__init__()
 56 |         layers = []
 57 |         for i in range(num_layers):
 58 |             layers.append(_DenseLayer(inplances + i * growth_rate, growth_rate, bn_size, drop_rate))
 59 |         self.layers = nn.Sequential(*layers)
 60 | 
 61 |     def forward(self, x):
 62 |         return self.layers(x)
 63 | 
 64 | 
 65 | class DenseNet(nn.Module):
 66 |     def __init__(self, init_channels=64, growth_rate=32, blocks=[6, 12, 24, 16],num_classes=1000):
 67 |         super(DenseNet, self).__init__()
 68 |         bn_size = 4
 69 |         drop_rate = 0
 70 |         self.conv1 = Conv1(in_planes=3, places=init_channels)
 71 | 
 72 |         num_features = init_channels
 73 |         self.layer1 = DenseBlock(num_layers=blocks[0], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
 74 |         num_features = num_features + blocks[0] * growth_rate
 75 |         self.transition1 = _TransitionLayer(inplace=num_features, plance=num_features // 2)
 76 |         num_features = num_features // 2
 77 |         self.layer2 = DenseBlock(num_layers=blocks[1], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
 78 |         num_features = num_features + blocks[1] * growth_rate
 79 |         self.transition2 = _TransitionLayer(inplace=num_features, plance=num_features // 2)
 80 |         num_features = num_features // 2
 81 |         self.layer3 = DenseBlock(num_layers=blocks[2], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
 82 |         num_features = num_features + blocks[2] * growth_rate
 83 |         self.transition3 = _TransitionLayer(inplace=num_features, plance=num_features // 2)
 84 |         num_features = num_features // 2
 85 |         self.layer4 = DenseBlock(num_layers=blocks[3], inplances=num_features, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate)
 86 |         num_features = num_features + blocks[3] * growth_rate
 87 | 
 88 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 89 |         self.fc = nn.Linear(num_features, num_classes)
 90 | 
 91 |     def forward(self, x):
 92 |         x = self.conv1(x)
 93 | 
 94 |         x = self.layer1(x)
 95 |         x = self.transition1(x)
 96 |         x = self.layer2(x)
 97 |         x = self.transition2(x)
 98 |         x = self.layer3(x)
 99 |         x = self.transition3(x)
100 |         x = self.layer4(x)
101 | 
102 |         x = self.avgpool(x)
103 |         x = x.view(x.size(0), -1)
104 |         x = self.fc(x)
105 |         return x
106 | 
107 | def DenseNet121():
108 |     return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 24, 16])
109 | 
110 | def DenseNet169():
111 |     return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 32, 32])
112 | 
113 | def DenseNet201():
114 |     return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 48, 32])
115 | 
116 | def DenseNet264():
117 |     return DenseNet(init_channels=64, growth_rate=32, blocks=[6, 12, 64, 48])
118 | 
119 | if __name__=='__main__':
120 |     # model = torchvision.models.densenet121()
121 |     model = DenseNet121()
122 |     print(model)
123 | 
124 |     input = torch.randn(1, 3, 224, 224)
125 |     out = model(input)
126 |     print(out.shape)


--------------------------------------------------------------------------------
/ClassicNetwork/InceptionV1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def ConvBNReLU(in_channels,out_channels,kernel_size):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1,padding=kernel_size//2),
  8 |         nn.BatchNorm2d(out_channels),
  9 |         nn.ReLU6(inplace=True)
 10 |     )
 11 | 
 12 | class InceptionV1Module(nn.Module):
 13 |     def __init__(self, in_channels,out_channels1, out_channels2reduce,out_channels2, out_channels3reduce, out_channels3, out_channels4):
 14 |         super(InceptionV1Module, self).__init__()
 15 | 
 16 |         self.branch1_conv = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)
 17 | 
 18 |         self.branch2_conv1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels2reduce,kernel_size=1)
 19 |         self.branch2_conv2 = ConvBNReLU(in_channels=out_channels2reduce,out_channels=out_channels2,kernel_size=3)
 20 | 
 21 |         self.branch3_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels3reduce, kernel_size=1)
 22 |         self.branch3_conv2 = ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=5)
 23 | 
 24 |         self.branch4_pool = nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
 25 |         self.branch4_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1)
 26 | 
 27 |     def forward(self,x):
 28 |         out1 = self.branch1_conv(x)
 29 |         out2 = self.branch2_conv2(self.branch2_conv1(x))
 30 |         out3 = self.branch3_conv2(self.branch3_conv1(x))
 31 |         out4 = self.branch4_conv1(self.branch4_pool(x))
 32 |         out = torch.cat([out1, out2, out3, out4], dim=1)
 33 |         return out
 34 | 
 35 | class InceptionAux(nn.Module):
 36 |     def __init__(self, in_channels,out_channels):
 37 |         super(InceptionAux, self).__init__()
 38 | 
 39 |         self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
 40 |         self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
 41 |         self.auxiliary_linear1 = nn.Linear(in_features=128 * 4 * 4, out_features=1024)
 42 |         self.auxiliary_relu = nn.ReLU6(inplace=True)
 43 |         self.auxiliary_dropout = nn.Dropout(p=0.7)
 44 |         self.auxiliary_linear2 = nn.Linear(in_features=1024, out_features=out_channels)
 45 | 
 46 |     def forward(self, x):
 47 |         x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
 48 |         x = x.view(x.size(0), -1)
 49 |         x= self.auxiliary_relu(self.auxiliary_linear1(x))
 50 |         out = self.auxiliary_linear2(self.auxiliary_dropout(x))
 51 |         return out
 52 | 
 53 | class InceptionV1(nn.Module):
 54 |     def __init__(self, num_classes=1000, stage='train'):
 55 |         super(InceptionV1, self).__init__()
 56 |         self.stage = stage
 57 | 
 58 |         self.block1 = nn.Sequential(
 59 |             nn.Conv2d(in_channels=3,out_channels=64,kernel_size=7,stride=2,padding=3),
 60 |             nn.BatchNorm2d(64),
 61 |             nn.MaxPool2d(kernel_size=3,stride=2, padding=1),
 62 |             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1),
 63 |             nn.BatchNorm2d(64),
 64 |         )
 65 |         self.block2 = nn.Sequential(
 66 |             nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1),
 67 |             nn.BatchNorm2d(192),
 68 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
 69 |         )
 70 | 
 71 |         self.block3 = nn.Sequential(
 72 |             InceptionV1Module(in_channels=192,out_channels1=64, out_channels2reduce=96, out_channels2=128, out_channels3reduce = 16, out_channels3=32, out_channels4=32),
 73 |             InceptionV1Module(in_channels=256, out_channels1=128, out_channels2reduce=128, out_channels2=192,out_channels3reduce=32, out_channels3=96, out_channels4=64),
 74 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
 75 |         )
 76 | 
 77 |         self.block4_1 = InceptionV1Module(in_channels=480, out_channels1=192, out_channels2reduce=96, out_channels2=208,out_channels3reduce=16, out_channels3=48, out_channels4=64)
 78 | 
 79 |         if self.stage == 'train':
 80 |             self.aux_logits1 = InceptionAux(in_channels=512,out_channels=num_classes)
 81 | 
 82 |         self.block4_2 = nn.Sequential(
 83 |             InceptionV1Module(in_channels=512, out_channels1=160, out_channels2reduce=112, out_channels2=224,
 84 |                               out_channels3reduce=24, out_channels3=64, out_channels4=64),
 85 |             InceptionV1Module(in_channels=512, out_channels1=128, out_channels2reduce=128, out_channels2=256,
 86 |                               out_channels3reduce=24, out_channels3=64, out_channels4=64),
 87 |             InceptionV1Module(in_channels=512, out_channels1=112, out_channels2reduce=144, out_channels2=288,
 88 |                               out_channels3reduce=32, out_channels3=64, out_channels4=64),
 89 |         )
 90 | 
 91 |         if self.stage == 'train':
 92 |             self.aux_logits2 = InceptionAux(in_channels=528,out_channels=num_classes)
 93 | 
 94 |         self.block4_3 = nn.Sequential(
 95 |             InceptionV1Module(in_channels=528, out_channels1=256, out_channels2reduce=160, out_channels2=320,
 96 |                               out_channels3reduce=32, out_channels3=128, out_channels4=128),
 97 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
 98 |         )
 99 | 
100 |         self.block5 = nn.Sequential(
101 |             InceptionV1Module(in_channels=832, out_channels1=256, out_channels2reduce=160, out_channels2=320,out_channels3reduce=32, out_channels3=128, out_channels4=128),
102 |             InceptionV1Module(in_channels=832, out_channels1=384, out_channels2reduce=192, out_channels2=384,out_channels3reduce=48, out_channels3=128, out_channels4=128),
103 |         )
104 | 
105 |         self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1)
106 |         self.dropout = nn.Dropout(p=0.4)
107 |         self.linear = nn.Linear(in_features=1024,out_features=num_classes)
108 | 
109 |     def forward(self, x):
110 |         x = self.block1(x)
111 |         x = self.block2(x)
112 |         x = self.block3(x)
113 |         aux1 = x = self.block4_1(x)
114 |         aux2 = x = self.block4_2(x)
115 |         x = self.block4_3(x)
116 |         out = self.block5(x)
117 |         out = self.avgpool(out)
118 |         out = self.dropout(out)
119 |         out = out.view(out.size(0), -1)
120 |         out = self.linear(out)
121 |         if self.stage == 'train':
122 |             aux1 = self.aux_logits1(aux1)
123 |             aux2 = self.aux_logits2(aux2)
124 |             return aux1, aux2, out
125 |         else:
126 |             return out
127 | 
128 | if __name__=='__main__':
129 |     model = InceptionV1()
130 |     print(model)
131 | 
132 |     input = torch.randn(1, 3, 224, 224)
133 |     aux1, aux2, out = model(input)
134 |     print(aux1.shape)
135 |     print(aux2.shape)
136 |     print(out.shape)
137 | 


--------------------------------------------------------------------------------
/ClassicNetwork/InceptionV4.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class InceptionV4(nn.Module):
 6 |     def __init__(self):
 7 |         super(InceptionV4, self).__init__()
 8 | 
 9 |     def forward(self):
10 |         return out
11 | 
12 | if __name__=='__main__':
13 |     model = InceptionV4()
14 |     print(model)
15 | 
16 |     input = torch.randn(1, 3, 224, 224)
17 |     out = model(input)
18 |     print(out.shape)


--------------------------------------------------------------------------------
/ClassicNetwork/README.md:
--------------------------------------------------------------------------------
 1 | # ClassicNetwork
 2 | Classical network implemented by pytorch
 3 | 
 4 | 
 5 | 
 6 | **AlexNet:**
 7 | 
 8 | - ImageNet Classification with Deep Convolutional Neural Networks, Alex Krizhevsky, 2012
 9 | 
10 | 
11 | 
12 | **VGG:**
13 | 
14 | - Very Deep Convolutional Networks for Large-Scale Image Recognition,Karen Simonyan,2014
15 | 
16 | 
17 | 
18 | **ResNet:**
19 | 
20 | - Deep Residual Learning for Image Recognition, He-Kaiming, 2015
21 | 
22 | 
23 | 
24 | **InceptionV1:**
25 | 
26 | - Going deeper with convolutions , Christian Szegedy , 2014
27 | 
28 | 
29 | 
30 | **InceptionV2 and InceptionV3:**
31 | 
32 | - Rethinking the Inception Architecture for Computer Vision , Christian Szegedy ,2015
33 | 
34 | 
35 | 
36 | **InceptionV4 and Inception-ResNet:**
37 | 
38 | - Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning , Christian Szegedy ,2016
39 | 
40 | 
41 | 
42 | **DenseNet:**
43 | 
44 | Densely Connected Convolutional Networks, 2017
45 | 
46 | 
47 | 
48 | **ResNeXt:**
49 | 
50 | Aggregated Residual Transformations for Deep Neural Networks,2017


--------------------------------------------------------------------------------
/ClassicNetwork/ResNeXt.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class ResNeXtBlock(nn.Module):
 7 |     def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 2, cardinality=32):
 8 |         super(ResNeXtBlock,self).__init__()
 9 |         self.expansion = expansion
10 |         self.downsampling = downsampling
11 | 
12 |         self.bottleneck = nn.Sequential(
13 |             nn.Conv2d(in_channels=in_places, out_channels=places, kernel_size=1, stride=1, bias=False),
14 |             nn.BatchNorm2d(places),
15 |             nn.ReLU(inplace=True),
16 |             nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False, groups=cardinality),
17 |             nn.BatchNorm2d(places),
18 |             nn.ReLU(inplace=True),
19 |             nn.Conv2d(in_channels=places, out_channels=places * self.expansion, kernel_size=1, stride=1, bias=False),
20 |             nn.BatchNorm2d(places * self.expansion),
21 |         )
22 | 
23 |         if self.downsampling:
24 |             self.downsample = nn.Sequential(
25 |                 nn.Conv2d(in_channels=in_places, out_channels=places * self.expansion, kernel_size=1, stride=stride,bias=False),
26 |                 nn.BatchNorm2d(places * self.expansion)
27 |             )
28 |         self.relu = nn.ReLU(inplace=True)
29 | 
30 |     def forward(self, x):
31 |         residual = x
32 |         out = self.bottleneck(x)
33 | 
34 |         if self.downsampling:
35 |             residual = self.downsample(x)
36 | 
37 |         out += residual
38 |         out = self.relu(out)
39 |         return out
40 | 
41 | 
42 | if __name__ =='__main__':
43 |     model = ResNeXtBlock(in_places=256, places=128)
44 |     print(model)
45 | 
46 |     input = torch.randn(1,256,64,64)
47 |     out = model(input)
48 |     print(out.shape)


--------------------------------------------------------------------------------
/ClassicNetwork/ResNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | print("PyTorch Version: ",torch.__version__)
  6 | print("Torchvision Version: ",torchvision.__version__)
  7 | 
  8 | __all__ = ['ResNet50', 'ResNet101','ResNet152']
  9 | 
 10 | def Conv1(in_planes, places, stride=2):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(in_channels=in_planes,out_channels=places,kernel_size=7,stride=stride,padding=3, bias=False),
 13 |         nn.BatchNorm2d(places),
 14 |         nn.ReLU(inplace=True),
 15 |         nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 16 |     )
 17 | 
 18 | class Bottleneck(nn.Module):
 19 |     def __init__(self,in_places,places, stride=1,downsampling=False, expansion = 4):
 20 |         super(Bottleneck,self).__init__()
 21 |         self.expansion = expansion
 22 |         self.downsampling = downsampling
 23 | 
 24 |         self.bottleneck = nn.Sequential(
 25 |             nn.Conv2d(in_channels=in_places,out_channels=places,kernel_size=1,stride=1, bias=False),
 26 |             nn.BatchNorm2d(places),
 27 |             nn.ReLU(inplace=True),
 28 |             nn.Conv2d(in_channels=places, out_channels=places, kernel_size=3, stride=stride, padding=1, bias=False),
 29 |             nn.BatchNorm2d(places),
 30 |             nn.ReLU(inplace=True),
 31 |             nn.Conv2d(in_channels=places, out_channels=places*self.expansion, kernel_size=1, stride=1, bias=False),
 32 |             nn.BatchNorm2d(places*self.expansion),
 33 |         )
 34 | 
 35 |         if self.downsampling:
 36 |             self.downsample = nn.Sequential(
 37 |                 nn.Conv2d(in_channels=in_places, out_channels=places*self.expansion, kernel_size=1, stride=stride, bias=False),
 38 |                 nn.BatchNorm2d(places*self.expansion)
 39 |             )
 40 |         self.relu = nn.ReLU(inplace=True)
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 |         out = self.bottleneck(x)
 45 | 
 46 |         if self.downsampling:
 47 |             residual = self.downsample(x)
 48 | 
 49 |         out += residual
 50 |         out = self.relu(out)
 51 |         return out
 52 | 
 53 | class ResNet(nn.Module):
 54 |     def __init__(self,blocks, num_classes=1000, expansion = 4):
 55 |         super(ResNet,self).__init__()
 56 |         self.expansion = expansion
 57 | 
 58 |         self.conv1 = Conv1(in_planes = 3, places= 64)
 59 | 
 60 |         self.layer1 = self.make_layer(in_places = 64, places= 64, block=blocks[0], stride=1)
 61 |         self.layer2 = self.make_layer(in_places = 256,places=128, block=blocks[1], stride=2)
 62 |         self.layer3 = self.make_layer(in_places=512,places=256, block=blocks[2], stride=2)
 63 |         self.layer4 = self.make_layer(in_places=1024,places=512, block=blocks[3], stride=2)
 64 | 
 65 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 66 |         self.fc = nn.Linear(2048,num_classes)
 67 | 
 68 |         for m in self.modules():
 69 |             if isinstance(m, nn.Conv2d):
 70 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 71 |             elif isinstance(m, nn.BatchNorm2d):
 72 |                 nn.init.constant_(m.weight, 1)
 73 |                 nn.init.constant_(m.bias, 0)
 74 | 
 75 |     def make_layer(self, in_places, places, block, stride):
 76 |         layers = []
 77 |         layers.append(Bottleneck(in_places, places,stride, downsampling =True))
 78 |         for i in range(1, block):
 79 |             layers.append(Bottleneck(places*self.expansion, places))
 80 | 
 81 |         return nn.Sequential(*layers)
 82 | 
 83 | 
 84 |     def forward(self, x):
 85 |         x = self.conv1(x)
 86 | 
 87 |         x = self.layer1(x)
 88 |         x = self.layer2(x)
 89 |         x = self.layer3(x)
 90 |         x = self.layer4(x)
 91 | 
 92 |         x = self.avgpool(x)
 93 |         x = x.view(x.size(0), -1)
 94 |         x = self.fc(x)
 95 |         return x
 96 | 
 97 | def ResNet50():
 98 |     return ResNet([3, 4, 6, 3])
 99 | 
100 | def ResNet101():
101 |     return ResNet([3, 4, 23, 3])
102 | 
103 | def ResNet152():
104 |     return ResNet([3, 8, 36, 3])
105 | 
106 | 
107 | if __name__=='__main__':
108 |     #model = torchvision.models.resnet50()
109 |     model = ResNet50()
110 |     print(model)
111 | 
112 |     input = torch.randn(1, 3, 224, 224)
113 |     out = model(input)
114 |     print(out.shape)
115 | 


--------------------------------------------------------------------------------
/ClassicNetwork/VGGNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | def Conv3x3BNReLU(in_channels,out_channels):
 6 |     return nn.Sequential(
 7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
 8 |         nn.BatchNorm2d(out_channels),
 9 |         nn.ReLU6(inplace=True)
10 |     )
11 | 
12 | class VGG(nn.Module):
13 |     def __init__(self, block_nums,num_classes=1000):
14 |         super(VGG, self).__init__()
15 | 
16 |         self.stage1 = self._make_layers(in_channels=3, out_channels=64, block_num=block_nums[0])
17 |         self.stage2 = self._make_layers(in_channels=64, out_channels=128, block_num=block_nums[1])
18 |         self.stage3 = self._make_layers(in_channels=128, out_channels=256, block_num=block_nums[2])
19 |         self.stage4 = self._make_layers(in_channels=256, out_channels=512, block_num=block_nums[3])
20 |         self.stage5 = self._make_layers(in_channels=512, out_channels=512, block_num=block_nums[4])
21 | 
22 |         self.classifier = nn.Sequential(
23 |             nn.Linear(in_features=512*7*7,out_features=4096),
24 |             nn.Dropout(p=0.2),
25 |             nn.Linear(in_features=4096, out_features=4096),
26 |             nn.Dropout(p=0.2),
27 |             nn.Linear(in_features=4096, out_features=num_classes)
28 |         )
29 | 
30 |         self._init_params()
31 | 
32 |     def _make_layers(self, in_channels, out_channels, block_num):
33 |         layers = []
34 |         layers.append(Conv3x3BNReLU(in_channels,out_channels))
35 |         for i in range(1,block_num):
36 |             layers.append(Conv3x3BNReLU(out_channels,out_channels))
37 |         layers.append(nn.MaxPool2d(kernel_size=2,stride=2, ceil_mode=False))
38 |         return nn.Sequential(*layers)
39 | 
40 |     def _init_params(self):
41 |         for m in self.modules():
42 |             if isinstance(m, nn.Conv2d):
43 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
44 |             elif isinstance(m, nn.BatchNorm2d):
45 |                 nn.init.constant_(m.weight, 1)
46 |                 nn.init.constant_(m.bias, 0)
47 | 
48 |     def forward(self, x):
49 |         x = self.stage1(x)
50 |         x = self.stage2(x)
51 |         x = self.stage3(x)
52 |         x = self.stage4(x)
53 |         x = self.stage5(x)
54 |         x = x.view(x.size(0),-1)
55 |         out = self.classifier(x)
56 |         return out
57 | 
58 | def VGG16():
59 |     block_nums = [2, 2, 3, 3, 3]
60 |     model = VGG(block_nums)
61 |     return model
62 | 
63 | def VGG19():
64 |     block_nums = [2, 2, 4, 4, 4]
65 |     model = VGG(block_nums)
66 |     return model
67 | 
68 | if __name__ == '__main__':
69 |     model = VGG16()
70 |     print(model)
71 |     torchvision.models.vgg16_bn()
72 | 
73 |     input = torch.randn(1,3,224,224)
74 |     out = model(input)
75 |     print(out.shape)
76 | 
77 | 


--------------------------------------------------------------------------------
/FaceDetectorAndRecognition/FaceBoxes.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | class Conv2dCReLU(nn.Module):
  6 |     def __init__(self,in_channels,out_channels,kernel_size,stride,padding):
  7 |         super(Conv2dCReLU, self).__init__()
  8 |         self.conv = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding)
  9 |         self.bn = nn.BatchNorm2d(out_channels)
 10 |         self.relu = nn.ReLU6(inplace=True)
 11 | 
 12 |     def forward(self, x):
 13 |         x = self.bn(self.conv(x))
 14 |         out = torch.cat([x, -x], dim=1)
 15 |         return self.relu(out)
 16 | 
 17 | 
 18 | class InceptionModules(nn.Module):
 19 |     def __init__(self):
 20 |         super(InceptionModules, self).__init__()
 21 | 
 22 |         self.branch1_conv1 = nn.Conv2d(in_channels=128,out_channels=32,kernel_size=1,stride=1)
 23 |         self.branch1_conv1_bn = nn.BatchNorm2d(32)
 24 | 
 25 |         self.branch2_pool = nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
 26 |         self.branch2_conv1 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=1, stride=1)
 27 |         self.branch2_conv1_bn = nn.BatchNorm2d(32)
 28 | 
 29 |         self.branch3_conv1 = nn.Conv2d(in_channels=128, out_channels=24, kernel_size=1, stride=1)
 30 |         self.branch3_conv1_bn = nn.BatchNorm2d(24)
 31 |         self.branch3_conv2 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
 32 |         self.branch3_conv2_bn = nn.BatchNorm2d(32)
 33 | 
 34 |         self.branch4_conv1 = nn.Conv2d(in_channels=128, out_channels=24, kernel_size=1, stride=1)
 35 |         self.branch4_conv1_bn = nn.BatchNorm2d(24)
 36 |         self.branch4_conv2 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
 37 |         self.branch4_conv2_bn = nn.BatchNorm2d(32)
 38 |         self.branch4_conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
 39 |         self.branch4_conv3_bn = nn.BatchNorm2d(32)
 40 | 
 41 | 
 42 |     def forward(self, x):
 43 |         x1 = self.branch1_conv1_bn(self.branch1_conv1(x))
 44 |         x2 = self.branch2_conv1_bn(self.branch2_conv1(self.branch2_pool(x)))
 45 |         x3 = self.branch3_conv2_bn(self.branch3_conv2(self.branch3_conv1_bn(self.branch3_conv1(x))))
 46 |         x4 = self.branch4_conv3_bn(self.branch4_conv3(self.branch4_conv2_bn(self.branch4_conv2(self.branch4_conv1_bn(self.branch4_conv1(x))))))
 47 |         out = torch.cat([x1, x2, x3, x4],dim=1)
 48 |         return out
 49 | 
 50 | class FaceBoxes(nn.Module):
 51 |     def __init__(self, num_classes, phase):
 52 |         super(FaceBoxes, self).__init__()
 53 |         self.phase = phase
 54 |         self.num_classes = num_classes
 55 | 
 56 |         self.RapidlyDigestedConvolutionalLayers = nn.Sequential(
 57 |             Conv2dCReLU(in_channels=3,out_channels=24,kernel_size=7,stride=4,padding=3),
 58 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
 59 |             Conv2dCReLU(in_channels=48,out_channels=64,kernel_size=5,stride=2,padding=2),
 60 |             nn.MaxPool2d(kernel_size=3, stride=2,padding=1)
 61 |         )
 62 | 
 63 |         self.MultipleScaleConvolutionalLayers = nn.Sequential(
 64 |             InceptionModules(),
 65 |             InceptionModules(),
 66 |             InceptionModules(),
 67 |         )
 68 | 
 69 |         self.conv3_1 = nn.Conv2d(in_channels=128,out_channels=128,kernel_size=1,stride=1)
 70 |         self.conv3_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1)
 71 |         self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1)
 72 |         self.conv4_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1)
 73 | 
 74 |         self.loc_layer1 = nn.Conv2d(in_channels=128, out_channels=21*4, kernel_size=3, stride=1, padding=1)
 75 |         self.conf_layer1 = nn.Conv2d(in_channels=128, out_channels=21*num_classes, kernel_size=3, stride=1, padding=1)
 76 | 
 77 |         self.loc_layer2 = nn.Conv2d(in_channels=256, out_channels=4, kernel_size=3, stride=1, padding=1)
 78 |         self.conf_layer2 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=3, stride=1, padding=1)
 79 | 
 80 |         self.loc_layer3 = nn.Conv2d(in_channels=256, out_channels=4, kernel_size=3, stride=1, padding=1)
 81 |         self.conf_layer3 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=3, stride=1, padding=1)
 82 | 
 83 |         if self.phase == 'test':
 84 |             self.softmax = nn.Softmax(dim=-1)
 85 |         elif self.phase == 'train':
 86 |             for m in self.modules():
 87 |                 if isinstance(m, nn.Conv2d):
 88 |                     if m.bias is not None:
 89 |                         nn.init.xavier_normal_(m.weight.data)
 90 |                         nn.init.constant_(m.bias, 0)
 91 |                     else:
 92 |                         nn.init.xavier_normal_(m.weight.data)
 93 | 
 94 |                 elif isinstance(m, nn.BatchNorm2d):
 95 |                     nn.init.constant_(m.weight, 1)
 96 |                     nn.init.constant_(m.bias, 0)
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.RapidlyDigestedConvolutionalLayers(x)
100 |         out1 = self.MultipleScaleConvolutionalLayers(x)
101 |         out2 = self.conv3_2(self.conv3_1(out1))
102 |         out3 = self.conv4_2(self.conv4_1(out2))
103 | 
104 |         loc1 = self.loc_layer1(out1)
105 |         conf1 = self.conf_layer1(out1)
106 | 
107 |         loc2 = self.loc_layer2(out2)
108 |         conf2 = self.conf_layer2(out2)
109 | 
110 |         loc3 = self.loc_layer3(out3)
111 |         conf3 = self.conf_layer3(out3)
112 | 
113 |         locs = torch.cat([loc1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
114 |                           loc2.permute(0, 2, 3, 1).contiguous().view(loc2.size(0), -1),
115 |                           loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1)], dim=1)
116 |         confs = torch.cat([conf1.permute(0, 2, 3, 1).contiguous().view(conf1.size(0), -1),
117 |                            conf2.permute(0, 2, 3, 1).contiguous().view(conf2.size(0), -1),
118 |                            conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1)], dim=1)
119 | 
120 |         if self.phase == 'test':
121 |             out = (locs.view(locs.size(0), -1, 4),
122 |                    self.softmax(confs.view(-1, self.num_classes)))
123 |         else:
124 |             out = (locs.view(locs.size(0), -1, 4),
125 |                    confs.view(-1, self.num_classes))
126 |         return out
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     model = FaceBoxes(num_classes=2, phase='train')
131 |     print(model)
132 | 
133 |     input = torch.randn(1, 3, 1024, 1024)
134 |     out = model(input)
135 |     print(out[0].shape)
136 |     print(out[1].shape)
137 | 
138 | 


--------------------------------------------------------------------------------
/FaceDetectorAndRecognition/LFFD.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def Conv1x1ReLU(in_channels,out_channels):
  5 |     return nn.Sequential(
  6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
  7 |             nn.ReLU6(inplace=True)
  8 |         )
  9 | 
 10 | def Conv3x3ReLU(in_channels,out_channels,stride,padding):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=padding),
 13 |         nn.ReLU6(inplace=True)
 14 |     )
 15 | 
 16 | class LossBranch(nn.Module):
 17 |     def __init__(self,in_channels, mid_channels=64):
 18 |         super(LossBranch, self).__init__()
 19 |         self.conv1 = Conv1x1ReLU(in_channels, mid_channels)
 20 | 
 21 |         self.conv2_score = Conv1x1ReLU(mid_channels, mid_channels)
 22 |         self.classify = nn.Conv2d(in_channels=mid_channels, out_channels=2, kernel_size=1, stride=1)
 23 | 
 24 |         self.conv2_bbox = Conv1x1ReLU(mid_channels, mid_channels)
 25 |         self.regress = nn.Conv2d(in_channels=mid_channels, out_channels=4, kernel_size=1, stride=1)
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.conv1(x)
 29 |         cls = self.classify(self.conv2_score(x))
 30 |         reg = self.regress(self.conv2_bbox(x))
 31 |         return cls,reg
 32 | 
 33 | class LFFDBlock(nn.Module):
 34 |     def __init__(self, in_channels, out_channels, stride):
 35 |         super(LFFDBlock, self).__init__()
 36 |         mid_channels = out_channels
 37 |         self.downsampling = True if stride == 2 else False
 38 | 
 39 |         if self.downsampling:
 40 |             self.conv = nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=3, stride=stride, padding=0)
 41 | 
 42 |         self.branch1_relu1 = nn.ReLU6(inplace=True)
 43 |         self.branch1_conv1 = Conv3x3ReLU(in_channels=mid_channels, out_channels=mid_channels, stride=1, padding=1)
 44 |         self.branch1_conv2 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1)
 45 | 
 46 |         self.relu = nn.ReLU6(inplace=True)
 47 | 
 48 |     def forward(self, x):
 49 |         if self.downsampling:
 50 |             x = self.conv(x)
 51 |         out = self.branch1_conv2(self.branch1_conv1(self.branch1_relu1(x)))
 52 |         return self.relu(out+x)
 53 | 
 54 | class LFFD(nn.Module):
 55 |     def __init__(self, classes_num = 2):
 56 |         super(LFFD, self).__init__()
 57 | 
 58 |         self.tiny_part1 = nn.Sequential(
 59 |             Conv3x3ReLU(in_channels=3, out_channels=64, stride=2, padding = 0),
 60 |             LFFDBlock(in_channels=64, out_channels=64, stride=2),
 61 |             LFFDBlock(in_channels=64, out_channels=64, stride=1),
 62 |             LFFDBlock(in_channels=64, out_channels=64, stride=1),
 63 |         )
 64 |         self.tiny_part2 = LFFDBlock(in_channels=64, out_channels=64, stride=1)
 65 | 
 66 |         self.small_part1 = LFFDBlock(in_channels=64, out_channels=64, stride=2)
 67 |         self.small_part2 = LFFDBlock(in_channels=64, out_channels=64, stride=1)
 68 | 
 69 |         self.medium_part = nn.Sequential(
 70 |             LFFDBlock(in_channels=64, out_channels=128, stride=2),
 71 |             LFFDBlock(in_channels=128, out_channels=128, stride=1),
 72 |         )
 73 | 
 74 |         self.large_part1 = LFFDBlock(in_channels=128, out_channels=128, stride=2)
 75 |         self.large_part2 = LFFDBlock(in_channels=128, out_channels=128, stride=1)
 76 |         self.large_part3 = LFFDBlock(in_channels=128, out_channels=128, stride=1)
 77 | 
 78 |         self.loss_branch1 = LossBranch(in_channels=64)
 79 |         self.loss_branch2 = LossBranch(in_channels=64)
 80 |         self.loss_branch3 = LossBranch(in_channels=64)
 81 |         self.loss_branch4 = LossBranch(in_channels=64)
 82 |         self.loss_branch5 = LossBranch(in_channels=128)
 83 |         self.loss_branch6 = LossBranch(in_channels=128)
 84 |         self.loss_branch7 = LossBranch(in_channels=128)
 85 |         self.loss_branch8 = LossBranch(in_channels=128)
 86 | 
 87 |     def forward(self, x):
 88 |         branch1 = self.tiny_part1(x)
 89 |         branch2 = self.tiny_part2(branch1)
 90 |         branch3 = self.small_part1(branch2)
 91 |         branch4 = self.small_part2(branch3)
 92 |         branch5 = self.medium_part(branch4)
 93 |         branch6 = self.large_part1(branch5)
 94 |         branch7 = self.large_part2(branch6)
 95 |         branch8 = self.large_part3(branch7)
 96 | 
 97 |         cls1,loc1 = self.loss_branch1(branch1)
 98 |         cls2,loc2 = self.loss_branch2(branch2)
 99 |         cls3,loc3 = self.loss_branch3(branch3)
100 |         cls4,loc4 = self.loss_branch4(branch4)
101 |         cls5,loc5 = self.loss_branch5(branch5)
102 |         cls6,loc6 = self.loss_branch6(branch6)
103 |         cls7,loc7 = self.loss_branch7(branch7)
104 |         cls8,loc8 = self.loss_branch8(branch8)
105 | 
106 |         cls = torch.cat([cls1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
107 |                          cls2.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
108 |                          cls3.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
109 |                          cls4.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
110 |                          cls5.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
111 |                          cls6.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
112 |                          cls7.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
113 |                          cls8.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1)], dim=1)
114 |         loc = torch.cat([loc1.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
115 |                          loc2.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
116 |                          loc3.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
117 |                          loc4.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
118 |                          loc5.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
119 |                          loc6.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
120 |                          loc7.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1),
121 |                          loc8.permute(0, 2, 3, 1).contiguous().view(loc1.size(0), -1)], dim=1)
122 |         out = (cls,loc)
123 |         return out
124 | 
125 | if __name__ == '__main__':
126 |     net = LFFD()
127 |     print(net)
128 | 
129 |     input = torch.randn(1,3,480,640)
130 |     output = net(input)
131 |     print(output[0].shape)
132 |     print(output[1].shape)
133 | 
134 | 


--------------------------------------------------------------------------------
/FaceDetectorAndRecognition/README.md:
--------------------------------------------------------------------------------
 1 | # FaceDetectorAndRecognition
 2 | 
 3 | 
 4 | 
 5 | **FaceBoxes**
 6 | 
 7 | FaceBoxes: A CPU Real-time Face Detector with High Accuracy,2018
 8 | 
 9 | https://arxiv.org/pdf/1708.05234.pdf
10 | 
11 | https://liumin.blog.csdn.net/article/details/97698853
12 | 
13 | 
14 | 
15 | **LFFD**
16 | 
17 | LFFD: A Light and Fast Face Detector for Edge Devices,2019
18 | 
19 | https://arxiv.org/pdf/1904.10633.pdf
20 | 
21 | https://liumin.blog.csdn.net/article/details/100181190


--------------------------------------------------------------------------------
/HumanPoseEstimation/Hourglass.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=0):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding),
  8 |         nn.BatchNorm2d(out_channels),
  9 |         nn.ReLU6(inplace=True)
 10 |     )
 11 | 
 12 | class ResidualBlock(nn.Module):
 13 |     def __init__(self, in_channels, out_channels):
 14 |         super(ResidualBlock, self).__init__()
 15 |         mid_channels = out_channels//2
 16 | 
 17 |         self.bottleneck = nn.Sequential(
 18 |             ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1),
 19 |             ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1),
 20 |             ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1),
 21 |         )
 22 |         self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1)
 23 | 
 24 |     def forward(self, x):
 25 |         out = self.bottleneck(x)
 26 |         return out+self.shortcut(x)
 27 | 
 28 | 
 29 | class HourglassModule(nn.Module):
 30 |     def __init__(self, nChannels=256, nModules=2, numReductions = 4):
 31 |         super(HourglassModule, self).__init__()
 32 |         self.nChannels = nChannels
 33 |         self.nModules = nModules
 34 |         self.numReductions = numReductions
 35 | 
 36 |         self.residual_block = self._make_residual_layer(self.nModules, self.nChannels)
 37 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
 38 |         self.after_pool_block = self._make_residual_layer(self.nModules, self.nChannels)
 39 | 
 40 |         if numReductions > 1:
 41 |             self.hourglass_module = HourglassModule(self.nChannels, self.numReductions - 1, self.nModules)
 42 |         else:
 43 |             self.num1res_block = self._make_residual_layer(self.nModules, self.nChannels)
 44 | 
 45 |         self.lowres_block = self._make_residual_layer(self.nModules, self.nChannels)
 46 | 
 47 |         self.upsample = nn.Upsample(scale_factor=2)
 48 | 
 49 |     def _make_residual_layer(self, nModules, nChannels):
 50 |         _residual_blocks = []
 51 |         for _ in range(nModules):
 52 |             _residual_blocks.append(ResidualBlock(in_channels=nChannels, out_channels=nChannels))
 53 |         return nn.Sequential(*_residual_blocks)
 54 | 
 55 |     def forward(self, x):
 56 |         out1 = self.residual_block(x)
 57 | 
 58 |         out2 = self.max_pool(x)
 59 |         out2 = self.after_pool_block(out2)
 60 | 
 61 |         if self.numReductions > 1:
 62 |             out2 = self.hourglass_module(out2)
 63 |         else:
 64 |             out2 = self.num1res_block(out2)
 65 |         out2 = self.lowres_block(out2)
 66 |         out2 = self.upsample(out2)
 67 | 
 68 |         return out1 + out2
 69 | 
 70 | class Hourglass(nn.Module):
 71 |     def __init__(self, nJoints):
 72 |         super(Hourglass, self).__init__()
 73 | 
 74 |         self.first_conv = ConvBNReLU(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
 75 |         self.residual_block1 = ResidualBlock(in_channels=64,  out_channels=128)
 76 |         self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
 77 |         self.residual_block2 = ResidualBlock(in_channels=128, out_channels=128)
 78 |         self.residual_block3 = ResidualBlock(in_channels=128, out_channels=256)
 79 | 
 80 |         self.hourglass_module1 = HourglassModule(nChannels=256, nModules=2, numReductions = 4)
 81 |         self.hourglass_module2 = HourglassModule(nChannels=256, nModules=2, numReductions = 4)
 82 | 
 83 |         self.after_hourglass_conv1 = ConvBNReLU(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 84 |         self.proj_conv1 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1)
 85 |         self.out_conv1 = nn.Conv2d(in_channels=256,out_channels=nJoints,kernel_size=1,stride=1)
 86 |         self.remap_conv1 = nn.Conv2d(in_channels=nJoints, out_channels=256, kernel_size=1, stride=1)
 87 | 
 88 |         self.after_hourglass_conv2 = ConvBNReLU(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
 89 |         self.proj_conv2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=1)
 90 |         self.out_conv2 = nn.Conv2d(in_channels=256, out_channels=nJoints, kernel_size=1, stride=1)
 91 |         self.remap_conv2 = nn.Conv2d(in_channels=nJoints, out_channels=256, kernel_size=1, stride=1)
 92 | 
 93 |     def forward(self, x):
 94 |         x = self.max_pool(self.residual_block1(self.first_conv(x)))
 95 |         x = self.residual_block3(self.residual_block2(x))
 96 | 
 97 |         x = self.hourglass_module1(x)
 98 |         residual1= x = self.after_hourglass_conv1(x)
 99 |         out1 = self.out_conv1(x)
100 |         residual2 =  x = residual1 + self.remap_conv1(out1)+self.proj_conv1(x)
101 | 
102 |         x = self.hourglass_module2(x)
103 |         x = self.after_hourglass_conv2(x)
104 |         out2 = self.out_conv2(x)
105 |         x = residual2 + self.remap_conv2(out2) + self.proj_conv2(x)
106 | 
107 |         return out1, out2
108 | 
109 | if __name__ == '__main__':
110 |     model = Hourglass(nJoints=16)
111 |     print(model)
112 | 
113 |     data = torch.randn(1,3,256,256)
114 |     out1, out2 = model(data)
115 |     print(out1.shape)
116 |     print(out2.shape)
117 | 
118 | 


--------------------------------------------------------------------------------
/HumanPoseEstimation/LPN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | from context_block import ContextBlock
  5 | 
  6 | class  LBwithGCBlock(nn.Module):
  7 |     expansion = 1
  8 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
  9 |         super(LBwithGCBlock, self).__init__()
 10 |         self.downsample = downsample
 11 |         self.conv1 = nn.Conv2d(in_channels=inplanes,out_channels=planes,kernel_size=1,stride=1,padding=0)
 12 |         self.conv1_bn = nn.BatchNorm2d(planes)
 13 |         self.conv1_bn_relu = nn.ReLU(inplace=True)
 14 |         self.conv2 = nn.Conv2d(in_channels=planes, out_channels=planes, kernel_size=3, stride=stride, padding=1)
 15 |         self.conv2_bn = nn.BatchNorm2d(planes)
 16 |         self.conv2_bn_relu = nn.ReLU(inplace=True)
 17 |         self.conv3 = nn.Conv2d(in_channels=planes, out_channels=planes * self.expansion, kernel_size=1, stride=1, padding=0)
 18 |         self.conv3_bn = nn.BatchNorm2d(planes * self.expansion)
 19 |         self.gcb = ContextBlock(planes * self.expansion,ratio=2)
 20 |         self.relu = nn.ReLU(inplace=True)
 21 | 
 22 |     def forward(self, x):
 23 |         residual = x
 24 |         out = self.conv1_bn_relu(self.conv1_bn(self.conv1(x)))
 25 |         out = self.conv2_bn_relu(self.conv2_bn(self.conv2(out)))
 26 |         out = self.conv3_bn(self.conv3(out))
 27 |         out = self.gcb(out)
 28 |         if self.downsample is not None:
 29 |             residual = self.downsample(x)
 30 |         out += residual
 31 |         return self.relu(out)
 32 | 
 33 | def computeGCD(a,b):
 34 |     while a != b:
 35 |         if a > b:
 36 |             a = a - b
 37 |         else:
 38 |             b = b - a
 39 |     return b
 40 | 
 41 | def GroupDeconv(inplanes, planes, kernel_size, stride, padding, output_padding):
 42 |     groups = computeGCD(inplanes, planes)
 43 |     return nn.Sequential(
 44 |         nn.ConvTranspose2d(in_channels=inplanes, out_channels=2*planes, kernel_size=kernel_size,
 45 |                            stride=stride, padding=padding, output_padding=output_padding, groups=groups),
 46 |         nn.Conv2d(2*planes, planes, kernel_size=1, stride=1, padding=0)
 47 |     )
 48 | 
 49 | class LPN(nn.Module):
 50 |     def __init__(self, nJoints):
 51 |         super(LPN, self).__init__()
 52 |         self.inplanes = 64
 53 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
 54 |         self.bn1 = nn.BatchNorm2d(64)
 55 |         self.relu = nn.ReLU(inplace=True)
 56 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 57 |         self.layer1 = self._make_layer(LBwithGCBlock, 64, 3)
 58 |         self.layer2 = self._make_layer(LBwithGCBlock, 128, 4, stride=2)
 59 |         self.layer3 = self._make_layer(LBwithGCBlock, 256, 6, stride=2)
 60 |         self.layer4 = self._make_layer(LBwithGCBlock, 512, 3, stride=1)
 61 | 
 62 |         self.deconv_layers = self._make_deconv_group_layer()
 63 |         self.final_layer = nn.Conv2d(in_channels=self.inplanes,out_channels=nJoints,kernel_size=1,stride=1,padding=0)
 64 | 
 65 |     def _make_layer(self, block, planes, blocks, stride=1):
 66 |         downsample = None
 67 |         if stride != 1 or self.inplanes != planes * block.expansion:
 68 |             downsample = nn.Sequential(
 69 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride),
 70 |                 nn.BatchNorm2d(planes * block.expansion),
 71 |             )
 72 |         layers = []
 73 |         layers.append(block(self.inplanes, planes, stride, downsample))
 74 |         self.inplanes = planes * block.expansion
 75 |         for i in range(1, blocks):
 76 |             layers.append(block(self.inplanes, planes))
 77 |         return nn.Sequential(*layers)
 78 | 
 79 |     def _make_deconv_group_layer(self):
 80 |         layers = []
 81 |         planes = 256
 82 |         for i in range(2):
 83 |             planes = planes//2
 84 |             # layers.append(nn.ConvTranspose2d(in_channels=self.inplanes,out_channels=256,kernel_size=4,stride=2,padding=1,output_padding=0,groups=computeGCD(self.inplanes,256)))
 85 |             layers.append(GroupDeconv(inplanes=self.inplanes, planes=planes, kernel_size=4, stride=2, padding=1, output_padding=0))
 86 |             layers.append(nn.BatchNorm2d(planes))
 87 |             layers.append(nn.ReLU(inplace=True))
 88 |             self.inplanes = planes
 89 |         return nn.Sequential(*layers)
 90 | 
 91 |     def forward(self, x):
 92 |         x = self.conv1(x)
 93 |         x = self.bn1(x)
 94 |         x = self.relu(x)
 95 |         x = self.maxpool(x)
 96 | 
 97 |         x = self.layer1(x)
 98 |         x = self.layer2(x)
 99 |         x = self.layer3(x)
100 |         x = self.layer4(x)
101 | 
102 |         x = self.deconv_layers(x)
103 |         x = self.final_layer(x)
104 |         return x
105 | 
106 | if __name__ == '__main__':
107 |     model = LPN(nJoints=16)
108 |     print(model)
109 | 
110 |     data = torch.randn(1,3,256,192)
111 |     out = model(data)
112 |     print(out.shape)
113 | 


--------------------------------------------------------------------------------
/HumanPoseEstimation/README.md:
--------------------------------------------------------------------------------
 1 | # HumanPoseEstimation-network
 2 | Pytorch implementation of HumanPoseEstimation-network
 3 | 
 4 | 
 5 | 
 6 | **StackedHG:**
 7 | 
 8 | Stacked Hourglass Networks for Human Pose Estimation ,2016
 9 | 
10 | https://arxiv.org/pdf/1603.06937.pdf
11 | 
12 | https://liumin.blog.csdn.net/article/details/101484455
13 | 
14 | 
15 | 
16 | **Simple Baselines**
17 | 
18 | Simple Baselines for Human Pose Estimation and Tracking
19 | 
20 | https://arxiv.org/pdf/1804.06208.pdf
21 | 
22 | https://liumin.blog.csdn.net/article/details/103447040
23 | 
24 | 
25 | 
26 | **LPN:**
27 | 
28 | Simple and Lightweight Human Pose Estimation
29 | 
30 | https://arxiv.org/pdf/1911.10346v1.pdf
31 | 
32 | https://liumin.blog.csdn.net/article/details/103448034


--------------------------------------------------------------------------------
/HumanPoseEstimation/SimpleBaseline.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | class ResBlock(nn.Module):
 7 |     expansion = 4
 8 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 9 |         super(ResBlock, self).__init__()
10 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
11 |         self.bn1 = nn.BatchNorm2d(planes)
12 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,padding=1, bias=False)
13 |         self.bn2 = nn.BatchNorm2d(planes)
14 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,bias=False)
15 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
16 |         self.relu = nn.ReLU(inplace=True)
17 |         self.downsample = downsample
18 |         self.stride = stride
19 | 
20 |     def forward(self, x):
21 |         residual = x
22 |         out = self.relu(self.bn1(self.conv1(x)))
23 |         out = self.relu(self.bn2(self.conv2(out)))
24 |         out = self.bn3(self.conv3(out))
25 |         if self.downsample is not None:
26 |             residual = self.downsample(x)
27 |         out += residual
28 |         return self.relu(out)
29 | 
30 | 
31 | class SimpleBaseline(nn.Module):
32 |     def __init__(self, nJoints):
33 |         super(SimpleBaseline, self).__init__()
34 |         self.inplanes = 64
35 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,bias=False)
36 |         self.bn1 = nn.BatchNorm2d(64)
37 |         self.relu = nn.ReLU(inplace=True)
38 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
39 |         self.layer1 = self._make_layer(ResBlock, 64, 3)
40 |         self.layer2 = self._make_layer(ResBlock, 128, 4, stride=2)
41 |         self.layer3 = self._make_layer(ResBlock, 256, 6, stride=2)
42 |         self.layer4 = self._make_layer(ResBlock, 512, 3, stride=2)
43 | 
44 |         self.deconv_layers = self._make_deconv_layer()
45 |         self.final_layer = nn.Conv2d(in_channels=256,out_channels=nJoints,kernel_size=1,stride=1,padding=0)
46 | 
47 |     def _make_layer(self, block, planes, blocks, stride=1):
48 |         downsample = None
49 |         if stride != 1 or self.inplanes != planes * block.expansion:
50 |             downsample = nn.Sequential(
51 |                 nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
52 |                 nn.BatchNorm2d(planes * block.expansion),
53 |             )
54 | 
55 |         layers = []
56 |         layers.append(block(self.inplanes, planes, stride, downsample))
57 |         self.inplanes = planes * block.expansion
58 |         for i in range(1, blocks):
59 |             layers.append(block(self.inplanes, planes))
60 |         return nn.Sequential(*layers)
61 | 
62 | 
63 |     def _make_deconv_layer(self):
64 |         layers = []
65 |         for i in range(3):
66 |             layers.append(nn.ConvTranspose2d(in_channels=self.inplanes,out_channels=256,kernel_size=4,
67 |                                              stride=2,padding=1,output_padding=0,bias=False))
68 |             layers.append(nn.BatchNorm2d(256))
69 |             layers.append(nn.ReLU(inplace=True))
70 |             self.inplanes = 256
71 |         return nn.Sequential(*layers)
72 | 
73 | 
74 |     def forward(self, x):
75 |         x = self.conv1(x)
76 |         x = self.bn1(x)
77 |         x = self.relu(x)
78 |         x = self.maxpool(x)
79 | 
80 |         x = self.layer1(x)
81 |         x = self.layer2(x)
82 |         x = self.layer3(x)
83 |         x = self.layer4(x)
84 | 
85 |         x = self.deconv_layers(x)
86 |         x = self.final_layer(x)
87 |         return x
88 | 
89 | if __name__ == '__main__':
90 |     model = SimpleBaseline(nJoints=16)
91 |     print(model)
92 | 
93 |     data = torch.randn(1,3,256,192)
94 |     out = model(data)
95 |     print(out.shape)
96 | 


--------------------------------------------------------------------------------
/HumanPoseEstimation/context_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class ContextBlock(nn.Module):
 5 |     def __init__(self,
 6 |                  inplanes,
 7 |                  ratio,
 8 |                  pooling_type='att',
 9 |                  fusion_types=('channel_add', )):
10 |         super(ContextBlock, self).__init__()
11 |         assert pooling_type in ['avg', 'att']
12 |         assert isinstance(fusion_types, (list, tuple))
13 |         valid_fusion_types = ['channel_add', 'channel_mul']
14 |         assert all([f in valid_fusion_types for f in fusion_types])
15 |         assert len(fusion_types) > 0, 'at least one fusion should be used'
16 |         self.inplanes = inplanes
17 |         self.ratio = ratio
18 |         self.planes = int(inplanes * ratio)
19 |         self.pooling_type = pooling_type
20 |         self.fusion_types = fusion_types
21 |         if pooling_type == 'att':
22 |             self.conv_mask = nn.Conv2d(inplanes, 1, kernel_size=1)
23 |             self.softmax = nn.Softmax(dim=2)
24 |         else:
25 |             self.avg_pool = nn.AdaptiveAvgPool2d(1)
26 |         if 'channel_add' in fusion_types:
27 |             self.channel_add_conv = nn.Sequential(
28 |                 nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
29 |                 nn.LayerNorm([self.planes, 1, 1]),
30 |                 nn.ReLU(inplace=True),  # yapf: disable
31 |                 nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
32 |         else:
33 |             self.channel_add_conv = None
34 |         if 'channel_mul' in fusion_types:
35 |             self.channel_mul_conv = nn.Sequential(
36 |                 nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
37 |                 nn.LayerNorm([self.planes, 1, 1]),
38 |                 nn.ReLU(inplace=True),  # yapf: disable
39 |                 nn.Conv2d(self.planes, self.inplanes, kernel_size=1))
40 |         else:
41 |             self.channel_mul_conv = None
42 | 
43 |     def spatial_pool(self, x):
44 |         batch, channel, height, width = x.size()
45 |         if self.pooling_type == 'att':
46 |             input_x = x
47 |             # [N, C, H * W]
48 |             input_x = input_x.view(batch, channel, height * width)
49 |             # [N, 1, C, H * W]
50 |             input_x = input_x.unsqueeze(1)
51 |             # [N, 1, H, W]
52 |             context_mask = self.conv_mask(x)
53 |             # [N, 1, H * W]
54 |             context_mask = context_mask.view(batch, 1, height * width)
55 |             # [N, 1, H * W]
56 |             context_mask = self.softmax(context_mask)
57 |             # [N, 1, H * W, 1]
58 |             context_mask = context_mask.unsqueeze(-1)
59 |             # [N, 1, C, 1]
60 |             context = torch.matmul(input_x, context_mask)
61 |             # [N, C, 1, 1]
62 |             context = context.view(batch, channel, 1, 1)
63 |         else:
64 |             # [N, C, 1, 1]
65 |             context = self.avg_pool(x)
66 | 
67 |         return context
68 | 
69 |     def forward(self, x):
70 |         # [N, C, 1, 1]
71 |         context = self.spatial_pool(x)
72 | 
73 |         out = x
74 |         if self.channel_mul_conv is not None:
75 |             # [N, C, 1, 1]
76 |             channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
77 |             out = out * channel_mul_term
78 |         if self.channel_add_conv is not None:
79 |             # [N, C, 1, 1]
80 |             channel_add_term = self.channel_add_conv(context)
81 |             out = out + channel_add_term
82 | 
83 |         return out
84 | 


--------------------------------------------------------------------------------
/InstanceSegmentation/PolarMask.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3ReLU(in_channels,out_channels):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
  8 |         nn.ReLU6(inplace=True)
  9 |     )
 10 | 
 11 | def locLayer(in_channels,out_channels):
 12 |     return nn.Sequential(
 13 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 14 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 15 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 16 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 17 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 18 |         )
 19 | 
 20 | def conf_centernessLayer(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 23 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 24 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 25 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 26 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 27 |     )
 28 | 
 29 | class PolarMask(nn.Module):
 30 |     def __init__(self, num_classes=21):
 31 |         super(PolarMask, self).__init__()
 32 |         self.num_classes = num_classes
 33 |         resnet = torchvision.models.resnet50()
 34 |         layers = list(resnet.children())
 35 | 
 36 |         self.layer1 = nn.Sequential(*layers[:5])
 37 |         self.layer2 = nn.Sequential(*layers[5])
 38 |         self.layer3 = nn.Sequential(*layers[6])
 39 |         self.layer4 = nn.Sequential(*layers[7])
 40 | 
 41 |         self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1)
 42 |         self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
 43 |         self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1)
 44 | 
 45 |         self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 46 |         self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 47 | 
 48 |         self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 49 |         self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 50 | 
 51 |         self.loc_layer3 = locLayer(in_channels=256,out_channels=36)
 52 |         self.conf_centerness_layer3 = conf_centernessLayer(in_channels=256,out_channels=self.num_classes)
 53 | 
 54 |         self.loc_layer4 = locLayer(in_channels=256, out_channels=36)
 55 |         self.conf_centerness_layer4 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes)
 56 | 
 57 |         self.loc_layer5 = locLayer(in_channels=256, out_channels=36)
 58 |         self.conf_centerness_layer5 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes)
 59 | 
 60 |         self.loc_layer6 = locLayer(in_channels=256, out_channels=36)
 61 |         self.conf_centerness_layer6 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes)
 62 | 
 63 |         self.loc_layer7 = locLayer(in_channels=256, out_channels=36)
 64 |         self.conf_centerness_layer7 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes)
 65 | 
 66 |         self.init_params()
 67 | 
 68 |     def init_params(self):
 69 |         for m in self.modules():
 70 |             if isinstance(m, nn.Conv2d):
 71 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 72 |             elif isinstance(m, nn.BatchNorm2d):
 73 |                 nn.init.constant_(m.weight, 1)
 74 |                 nn.init.constant_(m.bias, 0)
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.layer1(x)
 78 |         c3 =x = self.layer2(x)
 79 |         c4 =x = self.layer3(x)
 80 |         c5 = x = self.layer4(x)
 81 | 
 82 |         p5 = self.lateral5(c5)
 83 |         p4 = self.upsample4(p5) + self.lateral4(c4)
 84 |         p3 = self.upsample3(p4) + self.lateral3(c3)
 85 | 
 86 |         p6 = self.downsample5(p5)
 87 |         p7 = self.downsample6(p6)
 88 | 
 89 |         loc3 = self.loc_layer3(p3)
 90 |         conf_centerness3 = self.conf_centerness_layer3(p3)
 91 |         conf3, centerness3 = conf_centerness3.split([self.num_classes, 1], dim=1)
 92 | 
 93 |         loc4 = self.loc_layer4(p4)
 94 |         conf_centerness4 = self.conf_centerness_layer4(p4)
 95 |         conf4, centerness4 = conf_centerness4.split([self.num_classes, 1], dim=1)
 96 | 
 97 |         loc5 = self.loc_layer5(p5)
 98 |         conf_centerness5 = self.conf_centerness_layer5(p5)
 99 |         conf5, centerness5 = conf_centerness5.split([self.num_classes, 1], dim=1)
100 | 
101 |         loc6 = self.loc_layer6(p6)
102 |         conf_centerness6 = self.conf_centerness_layer6(p6)
103 |         conf6, centerness6 = conf_centerness6.split([self.num_classes, 1], dim=1)
104 | 
105 |         loc7 = self.loc_layer7(p7)
106 |         conf_centerness7 = self.conf_centerness_layer7(p7)
107 |         conf7, centerness7 = conf_centerness7.split([self.num_classes, 1], dim=1)
108 | 
109 |         locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1),
110 |                     loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1),
111 |                     loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1),
112 |                     loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1),
113 |                     loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1)
114 | 
115 |         confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1),
116 |                            conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1),
117 |                            conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1),
118 |                            conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1),
119 |                            conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1)
120 | 
121 |         centernesses = torch.cat([centerness3.permute(0, 2, 3, 1).contiguous().view(centerness3.size(0), -1),
122 |                            centerness4.permute(0, 2, 3, 1).contiguous().view(centerness4.size(0), -1),
123 |                            centerness5.permute(0, 2, 3, 1).contiguous().view(centerness5.size(0), -1),
124 |                            centerness6.permute(0, 2, 3, 1).contiguous().view(centerness6.size(0), -1),
125 |                            centerness7.permute(0, 2, 3, 1).contiguous().view(centerness7.size(0), -1), ], dim=1)
126 | 
127 |         out = (locs, confs, centernesses)
128 |         return out
129 | 
130 | if __name__ == '__main__':
131 |     model = PolarMask()
132 |     print(model)
133 | 
134 |     input = torch.randn(1, 3, 800, 1024)
135 |     out = model(input)
136 |     print(out[0].shape)
137 |     print(out[1].shape)
138 |     print(out[2].shape)


--------------------------------------------------------------------------------
/InstanceSegmentation/README.md:
--------------------------------------------------------------------------------
 1 | # InstanceSegmentation-network
 2 | Pytorch implementation of InstanceSegmentation-network
 3 | 
 4 | 
 5 | 
 6 | **PolarMask :**
 7 | 
 8 | PolarMask: Single Shot Instance Segmentation with Polar Representation ,2019
 9 | 
10 | https://arxiv.org/pdf/1909.13226.pdf
11 | 
12 | https://liumin.blog.csdn.net/article/details/101975085
13 | 
14 | 


--------------------------------------------------------------------------------
/Lightweight/MobileNetV1.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | def BottleneckV1(in_channels, out_channels, stride):
 6 |   return  nn.Sequential(
 7 |         nn.Conv2d(in_channels=in_channels,out_channels=in_channels,kernel_size=3,stride=stride,padding=1,groups=in_channels),
 8 |         nn.BatchNorm2d(in_channels),
 9 |         nn.ReLU6(inplace=True),
10 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
11 |         nn.BatchNorm2d(out_channels),
12 |         nn.ReLU6(inplace=True)
13 |     )
14 | 
15 | class MobileNetV1(nn.Module):
16 |     def __init__(self, num_classes=1000):
17 |         super(MobileNetV1, self).__init__()
18 | 
19 |         self.first_conv = nn.Sequential(
20 |             nn.Conv2d(in_channels=3,out_channels=32,kernel_size=3,stride=2,padding=1),
21 |             nn.BatchNorm2d(32),
22 |             nn.ReLU6(inplace=True),
23 |         )
24 | 
25 |         self.bottleneck = nn.Sequential(
26 |             BottleneckV1(32, 64, stride=1),
27 |             BottleneckV1(64, 128, stride=2),
28 |             BottleneckV1(128, 128, stride=1),
29 |             BottleneckV1(128, 256, stride=2),
30 |             BottleneckV1(256, 256, stride=1),
31 |             BottleneckV1(256, 512, stride=2),
32 |             BottleneckV1(512, 512, stride=1),
33 |             BottleneckV1(512, 512, stride=1),
34 |             BottleneckV1(512, 512, stride=1),
35 |             BottleneckV1(512, 512, stride=1),
36 |             BottleneckV1(512, 512, stride=1),
37 |             BottleneckV1(512, 1024, stride=2),
38 |             BottleneckV1(1024, 1024, stride=1),
39 |         )
40 | 
41 |         self.avg_pool = nn.AvgPool2d(kernel_size=7,stride=1)
42 |         self.linear = nn.Linear(in_features=1024,out_features=num_classes)
43 |         self.dropout = nn.Dropout(p=0.2)
44 |         self.softmax = nn.Softmax(dim=1)
45 | 
46 |         self.init_params()
47 | 
48 |     def init_params(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 nn.init.kaiming_normal_(m.weight)
52 |                 nn.init.constant_(m.bias,0)
53 |             elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d):
54 |                 nn.init.constant_(m.weight, 1)
55 |                 nn.init.constant_(m.bias, 0)
56 | 
57 |     def forward(self, x):
58 |         x = self.first_conv(x)
59 |         x = self.bottleneck(x)
60 |         x = self.avg_pool(x)
61 |         x = x.view(x.size(0),-1)
62 |         x = self.dropout(x)
63 |         x = self.linear(x)
64 |         out = self.softmax(x)
65 |         return out
66 | 
67 | if __name__=='__main__':
68 |     model = MobileNetV1()
69 |     print(model)
70 | 
71 |     input = torch.randn(1, 3, 224, 224)
72 |     out = model(input)
73 |     print(out.shape)
74 | 


--------------------------------------------------------------------------------
/Lightweight/MobileNetV2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | from functools import reduce
  5 | 
  6 | 
  7 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups):
  8 |     return nn.Sequential(
  9 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
 10 |             nn.BatchNorm2d(out_channels),
 11 |             nn.ReLU6(inplace=True)
 12 |         )
 13 | 
 14 | def Conv1x1BNReLU(in_channels,out_channels):
 15 |     return nn.Sequential(
 16 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 17 |             nn.BatchNorm2d(out_channels),
 18 |             nn.ReLU6(inplace=True)
 19 |         )
 20 | 
 21 | def Conv1x1BN(in_channels,out_channels):
 22 |     return nn.Sequential(
 23 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 24 |             nn.BatchNorm2d(out_channels)
 25 |         )
 26 | 
 27 | class InvertedResidual(nn.Module):
 28 |     def __init__(self, in_channels, out_channels, stride, expansion_factor=6):
 29 |         super(InvertedResidual, self).__init__()
 30 |         self.stride = stride
 31 |         mid_channels = (in_channels * expansion_factor)
 32 | 
 33 |         self.bottleneck = nn.Sequential(
 34 |             Conv1x1BNReLU(in_channels, mid_channels),
 35 |             Conv3x3BNReLU(mid_channels, mid_channels, stride,groups=mid_channels),
 36 |             Conv1x1BN(mid_channels, out_channels)
 37 |         )
 38 | 
 39 |         if self.stride == 1:
 40 |             self.shortcut = Conv1x1BN(in_channels, out_channels)
 41 | 
 42 |     def forward(self, x):
 43 |         out = self.bottleneck(x)
 44 |         out = (out+self.shortcut(x)) if self.stride==1 else out
 45 |         return out
 46 | 
 47 | class MobileNetV2(nn.Module):
 48 |     def __init__(self, num_classes=1000):
 49 |         super(MobileNetV2,self).__init__()
 50 | 
 51 |         self.first_conv = Conv3x3BNReLU(3,32,2,groups=1)
 52 | 
 53 |         self.layer1 = self.make_layer(in_channels=32, out_channels=16, stride=1, block_num=1)
 54 |         self.layer2 = self.make_layer(in_channels=16, out_channels=24, stride=2, block_num=2)
 55 |         self.layer3 = self.make_layer(in_channels=24, out_channels=32, stride=2, block_num=3)
 56 |         self.layer4 = self.make_layer(in_channels=32, out_channels=64, stride=2, block_num=4)
 57 |         self.layer5 = self.make_layer(in_channels=64, out_channels=96, stride=1, block_num=3)
 58 |         self.layer6 = self.make_layer(in_channels=96, out_channels=160, stride=2, block_num=3)
 59 |         self.layer7 = self.make_layer(in_channels=160, out_channels=320, stride=1, block_num=1)
 60 | 
 61 |         self.last_conv = Conv1x1BNReLU(320,1280)
 62 |         self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1)
 63 |         self.dropout = nn.Dropout(p=0.2)
 64 |         self.linear = nn.Linear(in_features=1280,out_features=num_classes)
 65 | 
 66 |     def make_layer(self, in_channels, out_channels, stride, block_num):
 67 |         layers = []
 68 |         layers.append(InvertedResidual(in_channels, out_channels, stride))
 69 |         for i in range(1, block_num):
 70 |             layers.append(InvertedResidual(out_channels,out_channels,1))
 71 |         return nn.Sequential(*layers)
 72 | 
 73 |     def init_params(self):
 74 |         for m in self.modules():
 75 |             if isinstance(m, nn.Conv2d):
 76 |                 nn.init.kaiming_normal_(m.weight)
 77 |                 nn.init.constant_(m.bias, 0)
 78 |             elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d):
 79 |                 nn.init.constant_(m.weight, 1)
 80 |                 nn.init.constant_(m.bias, 0)
 81 | 
 82 |     def forward(self, x):
 83 |         x = self.first_conv(x)
 84 |         x = self.layer1(x)
 85 |         x = self.layer2(x)
 86 |         x = self.layer3(x)
 87 |         x = self.layer4(x)
 88 |         x = self.layer5(x)
 89 |         x = self.layer6(x)
 90 |         x = self.layer7(x)
 91 |         x = self.last_conv(x)
 92 |         x = self.avgpool(x)
 93 |         x = x.view(x.size(0),-1)
 94 |         x = self.dropout(x)
 95 |         out = self.linear(x)
 96 |         return out
 97 | 
 98 | 
 99 | if __name__=='__main__':
100 |     model = MobileNetV2()
101 |     # model = torchvision.models.MobileNetV2()
102 |     print(model)
103 | 
104 |     input = torch.randn(1, 3, 224, 224)
105 |     out = model(input)
106 |     print(out.shape)
107 | 


--------------------------------------------------------------------------------
/Lightweight/MobileNetXt.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | from functools import reduce
  5 | 
  6 | 
  7 | def Conv3x3BN(in_channels,out_channels,stride=1,groups=1):
  8 |     return nn.Sequential(
  9 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
 10 |             nn.BatchNorm2d(out_channels)
 11 |         )
 12 | 
 13 | def Conv3x3BNReLU(in_channels,out_channels,stride=1,groups=1):
 14 |     return nn.Sequential(
 15 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
 16 |             nn.BatchNorm2d(out_channels),
 17 |             nn.ReLU6(inplace=True)
 18 |         )
 19 | 
 20 | def Conv1x1BN(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 23 |             nn.BatchNorm2d(out_channels)
 24 |         )
 25 | 
 26 | def Conv1x1BNReLU(in_channels,out_channels):
 27 |     return nn.Sequential(
 28 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 29 |             nn.BatchNorm2d(out_channels),
 30 |             nn.ReLU6(inplace=True)
 31 |         )
 32 | 
 33 | class SandglassBlock(nn.Module):
 34 |     def __init__(self, in_channels, out_channels, stride, expansion_factor=6):
 35 |         super(SandglassBlock, self).__init__()
 36 |         self.stride = stride
 37 |         mid_channels = in_channels // expansion_factor
 38 |         self.identity = stride == 1 and in_channels == out_channels
 39 | 
 40 |         self.bottleneck = nn.Sequential(
 41 |             Conv3x3BNReLU(in_channels, in_channels, 1, groups=in_channels),
 42 |             Conv1x1BN(in_channels, mid_channels),
 43 |             Conv1x1BNReLU(mid_channels, out_channels),
 44 |             Conv3x3BN(out_channels, out_channels, stride, groups=out_channels),
 45 |         )
 46 | 
 47 |     def forward(self, x):
 48 |         out = self.bottleneck(x)
 49 |         if self.identity:
 50 |             return out + x
 51 |         else:
 52 |             return out
 53 | 
 54 | 
 55 | class MobileNetXt(nn.Module):
 56 |     def __init__(self, num_classes=1000):
 57 |         super(MobileNetXt,self).__init__()
 58 | 
 59 |         self.first_conv = Conv3x3BNReLU(3,32,2,groups=1)
 60 | 
 61 |         self.layer1 = self.make_layer(in_channels=32, out_channels=96, stride=2, expansion_factor=2, block_num=1)
 62 |         self.layer2 = self.make_layer(in_channels=96, out_channels=144, stride=1, expansion_factor=6, block_num=1)
 63 |         self.layer3 = self.make_layer(in_channels=144, out_channels=192, stride=2, expansion_factor=6, block_num=3)
 64 |         self.layer4 = self.make_layer(in_channels=192, out_channels=288, stride=2, expansion_factor=6, block_num=3)
 65 |         self.layer5 = self.make_layer(in_channels=288, out_channels=384, stride=1, expansion_factor=6, block_num=4)
 66 |         self.layer6 = self.make_layer(in_channels=384, out_channels=576, stride=2, expansion_factor=6, block_num=4)
 67 |         self.layer7 = self.make_layer(in_channels=576, out_channels=960, stride=1, expansion_factor=6, block_num=2)
 68 |         self.layer8 = self.make_layer(in_channels=960, out_channels=1280, stride=1, expansion_factor=6, block_num=1)
 69 | 
 70 |         self.avgpool = nn.AvgPool2d(kernel_size=7,stride=1)
 71 |         self.dropout = nn.Dropout(p=0.2)
 72 |         self.linear = nn.Linear(in_features=1280,out_features=num_classes)
 73 | 
 74 |     def make_layer(self, in_channels, out_channels, stride, expansion_factor, block_num):
 75 |         layers = []
 76 |         layers.append(SandglassBlock(in_channels, out_channels, stride,expansion_factor))
 77 |         for i in range(1, block_num):
 78 |             layers.append(SandglassBlock(out_channels,out_channels,1,expansion_factor))
 79 |         return nn.Sequential(*layers)
 80 | 
 81 |     def init_params(self):
 82 |         for m in self.modules():
 83 |             if isinstance(m, nn.Conv2d):
 84 |                 nn.init.kaiming_normal_(m.weight)
 85 |                 nn.init.constant_(m.bias, 0)
 86 |             elif isinstance(m, nn.Linear) or isinstance(m, nn.BatchNorm2d):
 87 |                 nn.init.constant_(m.weight, 1)
 88 |                 nn.init.constant_(m.bias, 0)
 89 | 
 90 |     def forward(self, x):
 91 |         x = self.first_conv(x)
 92 |         x = self.layer1(x)
 93 |         x = self.layer2(x)
 94 |         x = self.layer3(x)
 95 |         x = self.layer4(x)
 96 |         x = self.layer5(x)
 97 |         x = self.layer6(x)
 98 |         x = self.layer7(x)
 99 |         x = self.layer8(x)
100 |         x = self.avgpool(x)
101 |         x = x.view(x.size(0),-1)
102 |         x = self.dropout(x)
103 |         out = self.linear(x)
104 |         return out
105 | 
106 | 
107 | if __name__=='__main__':
108 |     model = MobileNetXt()
109 |     print(model)
110 | 
111 |     input = torch.randn(1, 3, 224, 224)
112 |     out = model(input)
113 |     print(out.shape)
114 | 


--------------------------------------------------------------------------------
/Lightweight/README.md:
--------------------------------------------------------------------------------
 1 | # Lightweight-network
 2 | 
 3 | Lightweight network PyTorch实现
 4 | 
 5 | 
 6 | 
 7 | ## MobileNets:
 8 | 
 9 | **MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications**
10 | 
11 | 
12 | 
13 | ## MobileNetV2：
14 | 
15 | **MobileNetV2: Inverted Residuals and Linear Bottlenecks**
16 | 
17 | 
18 | 
19 | ## MobileNetV3：
20 | 
21 | **Searching for MobileNetV3** 
22 | 
23 | 
24 | ## ShuffleNet：
25 | 
26 | **ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices**
27 | 
28 | 
29 | ## ShuffleNet V2:
30 | 
31 | **ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design**
32 | 
33 | 
34 | 
35 | ## SqueezeNet
36 | 
37 | **SqueezeNet：AlexNet-level accuracy with 50x fewer parameters and < 0.5MB Model Size**
38 | 
39 | ## Xception
40 | 
41 | **Xception: Deep Learning with Depthwise Separable Convolutions** 
42 | 
43 | 
44 | 
45 | ## MixNet
46 | 
47 | **MixNet: Mixed Depthwise Convolutional Kernels**


--------------------------------------------------------------------------------
/Lightweight/ShuffleNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups):
  6 |     return nn.Sequential(
  7 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups),
  8 |             nn.BatchNorm2d(out_channels),
  9 |             nn.ReLU6(inplace=True)
 10 |         )
 11 | 
 12 | def Conv1x1BNReLU(in_channels,out_channels,groups):
 13 |     return nn.Sequential(
 14 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 15 |             nn.BatchNorm2d(out_channels),
 16 |             nn.ReLU6(inplace=True)
 17 |         )
 18 | 
 19 | def Conv1x1BN(in_channels,out_channels,groups):
 20 |     return nn.Sequential(
 21 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 22 |             nn.BatchNorm2d(out_channels)
 23 |         )
 24 | 
 25 | class ChannelShuffle(nn.Module):
 26 |     def __init__(self, groups):
 27 |         super(ChannelShuffle, self).__init__()
 28 |         self.groups = groups
 29 | 
 30 |     def forward(self, x):
 31 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 32 |         N, C, H, W = x.size()
 33 |         g = self.groups
 34 |         return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
 35 | 
 36 | 
 37 | class ShuffleNetUnits(nn.Module):
 38 |     def __init__(self, in_channels, out_channels, stride,groups):
 39 |         super(ShuffleNetUnits, self).__init__()
 40 |         self.stride = stride
 41 |         out_channels = out_channels - in_channels if self.stride >1 else out_channels
 42 |         mid_channels = out_channels // 4
 43 | 
 44 |         self.bottleneck = nn.Sequential(
 45 |             Conv1x1BNReLU(in_channels, mid_channels,groups),
 46 |             ChannelShuffle(groups),
 47 |             Conv3x3BNReLU(mid_channels, mid_channels, stride,groups),
 48 |             Conv1x1BN(mid_channels, out_channels,groups)
 49 |         )
 50 |         if self.stride>1:
 51 |             self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 52 | 
 53 |         self.relu = nn.ReLU6(inplace=True)
 54 | 
 55 |     def forward(self, x):
 56 |         out = self.bottleneck(x)
 57 |         out = torch.cat([self.shortcut(x),out],dim=1) if self.stride >1 else (out + x)
 58 |         return self.relu(out)
 59 | 
 60 | class ShuffleNet(nn.Module):
 61 |     def __init__(self, planes, layers, groups, num_classes=1000):
 62 |         super(ShuffleNet, self).__init__()
 63 | 
 64 |         self.stage1 = nn.Sequential(
 65 |             Conv3x3BNReLU(in_channels=3,out_channels=24,stride=2, groups=1),
 66 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 67 |         )
 68 | 
 69 |         self.stage2 = self._make_layer(24,planes[0], groups, layers[0], True)
 70 |         self.stage3 = self._make_layer(planes[0],planes[1], groups, layers[1], False)
 71 |         self.stage4 = self._make_layer(planes[1],planes[2], groups, layers[2], False)
 72 | 
 73 |         self.global_pool = nn.AvgPool2d(kernel_size=7, stride=1)
 74 |         self.dropout = nn.Dropout(p=0.2)
 75 |         self.linear = nn.Linear(in_features=planes[2]*7*7, out_features=num_classes)
 76 | 
 77 |         self.init_params()
 78 | 
 79 |     def _make_layer(self, in_channels,out_channels, groups, block_num, is_stage2):
 80 |         layers = []
 81 |         layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=1 if is_stage2 else groups))
 82 |         for idx in range(1, block_num):
 83 |             layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups))
 84 |         return nn.Sequential(*layers)
 85 | 
 86 |     def init_params(self):
 87 |         for m in self.modules():
 88 |             if isinstance(m,nn.Conv2d):
 89 |                 nn.init.kaiming_normal_(m.weight)
 90 |                 nn.init.constant_(m.bias,0)
 91 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
 92 |                 nn.init.constant_(m.weight,1)
 93 |                 nn.init.constant_(m.bias, 0)
 94 | 
 95 |     def forward(self, x):
 96 |         x = self.stage1(x)
 97 |         x = self.stage2(x)
 98 |         x = self.stage3(x)
 99 |         x = self.stage4(x)
100 | 
101 |         x = x.view(x.size(0), -1)
102 |         x = self.dropout(x)
103 |         out = self.linear(x)
104 |         return out
105 | 
106 | def shufflenet_g8(**kwargs):
107 |     planes = [384, 768, 1536]
108 |     layers = [4, 8, 4]
109 |     model = ShuffleNet(planes, layers, groups=8)
110 |     return model
111 | 
112 | def shufflenet_g4(**kwargs):
113 |     planes = [272, 544, 1088]
114 |     layers = [4, 8, 4]
115 |     model = ShuffleNet(planes, layers, groups=4)
116 |     return model
117 | 
118 | def shufflenet_g3(**kwargs):
119 |     planes = [240, 480, 960]
120 |     layers = [4, 8, 4]
121 |     model = ShuffleNet(planes, layers, groups=3)
122 |     return model
123 | 
124 | def shufflenet_g2(**kwargs):
125 |     planes = [200, 400, 800]
126 |     layers = [4, 8, 4]
127 |     model = ShuffleNet(planes, layers, groups=2)
128 |     return model
129 | 
130 | def shufflenet_g1(**kwargs):
131 |     planes = [144, 288, 576]
132 |     layers = [4, 8, 4]
133 |     model = ShuffleNet(planes, layers, groups=1)
134 |     return model
135 | 
136 | if __name__ == '__main__':
137 |     model = shufflenet_g1()
138 |     print(model)
139 | 
140 |     input = torch.randn(1, 3, 224, 224)
141 |     out = model(input)
142 |     print(out.shape)


--------------------------------------------------------------------------------
/Lightweight/ShuffleNetV2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups):
  6 |     return nn.Sequential(
  7 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups),
  8 |             nn.BatchNorm2d(out_channels),
  9 |             nn.ReLU6(inplace=True)
 10 |         )
 11 | 
 12 | def Conv3x3BN(in_channels,out_channels,stride,groups):
 13 |     return nn.Sequential(
 14 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups),
 15 |             nn.BatchNorm2d(out_channels)
 16 |         )
 17 | 
 18 | def Conv1x1BNReLU(in_channels,out_channels):
 19 |     return nn.Sequential(
 20 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 21 |             nn.BatchNorm2d(out_channels),
 22 |             nn.ReLU6(inplace=True)
 23 |         )
 24 | 
 25 | def Conv1x1BN(in_channels,out_channels):
 26 |     return nn.Sequential(
 27 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 28 |             nn.BatchNorm2d(out_channels)
 29 |         )
 30 | 
 31 | class HalfSplit(nn.Module):
 32 |     def __init__(self, dim=0, first_half=True):
 33 |         super(HalfSplit, self).__init__()
 34 |         self.first_half = first_half
 35 |         self.dim = dim
 36 | 
 37 |     def forward(self, input):
 38 |         splits = torch.chunk(input, 2, dim=self.dim)
 39 |         return splits[0] if self.first_half else splits[1]
 40 | 
 41 | class ChannelShuffle(nn.Module):
 42 |     def __init__(self, groups):
 43 |         super(ChannelShuffle, self).__init__()
 44 |         self.groups = groups
 45 | 
 46 |     def forward(self, x):
 47 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 48 |         N, C, H, W = x.size()
 49 |         g = self.groups
 50 |         return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
 51 | 
 52 | class ShuffleNetUnits(nn.Module):
 53 |     def __init__(self, in_channels, out_channels, stride, groups):
 54 |         super(ShuffleNetUnits, self).__init__()
 55 |         self.stride = stride
 56 |         if self.stride > 1:
 57 |             mid_channels = out_channels - in_channels
 58 |         else:
 59 |             mid_channels = out_channels // 2
 60 |             in_channels = mid_channels
 61 |             self.first_half = HalfSplit(dim=1, first_half=True)
 62 |             self.second_split = HalfSplit(dim=1, first_half=False)
 63 | 
 64 |         self.bottleneck = nn.Sequential(
 65 |             Conv1x1BNReLU(in_channels, in_channels),
 66 |             Conv3x3BN(in_channels, mid_channels, stride, groups),
 67 |             Conv1x1BNReLU(mid_channels, mid_channels)
 68 |         )
 69 | 
 70 |         if self.stride > 1:
 71 |             self.shortcut = nn.Sequential(
 72 |                 Conv3x3BN(in_channels=in_channels, out_channels=in_channels, stride=stride, groups=groups),
 73 |                 Conv1x1BNReLU(in_channels, in_channels)
 74 |             )
 75 | 
 76 |         self.channel_shuffle = ChannelShuffle(groups)
 77 | 
 78 |     def forward(self, x):
 79 |         if self.stride > 1:
 80 |             x1 = self.bottleneck(x)
 81 |             x2 = self.shortcut(x)
 82 |         else:
 83 |             x1 = self.first_half(x)
 84 |             x2 = self.second_split(x)
 85 |             x1 = self.bottleneck(x1)
 86 | 
 87 |         out = torch.cat([x1, x2], dim=1)
 88 |         out = self.channel_shuffle(out)
 89 |         return out
 90 | 
 91 | class ShuffleNetV2(nn.Module):
 92 |     def __init__(self, planes, layers, groups, num_classes=1000):
 93 |         super(ShuffleNetV2, self).__init__()
 94 |         self.groups = groups
 95 |         self.stage1 = nn.Sequential(
 96 |             Conv3x3BNReLU(in_channels=3, out_channels=24, stride=2, groups=1),
 97 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 98 |         )
 99 | 
100 |         self.stage2 = self._make_layer(24, planes[0], layers[0], True)
101 |         self.stage3 = self._make_layer(planes[0], planes[1], layers[1], False)
102 |         self.stage4 = self._make_layer(planes[1], planes[2], layers[2], False)
103 | 
104 |         self.global_pool = nn.AdaptiveAvgPool2d(1)
105 |         self.dropout = nn.Dropout(p=0.2)
106 |         self.linear = nn.Linear(in_features=planes[2], out_features=num_classes)
107 | 
108 |         self.init_params()
109 | 
110 |     def _make_layer(self, in_channels, out_channels, block_num, is_stage2):
111 |         layers = []
112 |         layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride= 2, groups=1 if is_stage2 else self.groups))
113 |         for idx in range(1, block_num):
114 |             layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=self.groups))
115 |         return nn.Sequential(*layers)
116 | 
117 |     def init_params(self):
118 |         for m in self.modules():
119 |             if isinstance(m, nn.Conv2d):
120 |                 nn.init.kaiming_normal_(m.weight)
121 |                 nn.init.constant_(m.bias, 0)
122 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
123 |                 nn.init.constant_(m.weight, 1)
124 |                 nn.init.constant_(m.bias, 0)
125 | 
126 |     def forward(self, x):
127 |         x = self.stage1(x)
128 |         x = self.stage2(x)
129 |         x = self.stage3(x)
130 |         x = self.stage4(x)
131 |         x = self.global_pool(x)
132 |         x = x.view(x.size(0), -1)
133 |         x = self.dropout(x)
134 |         out = self.linear(x)
135 |         return out
136 | 
137 | def shufflenet_v2_x2_0(**kwargs):
138 |     planes = [244, 488, 976]
139 |     layers = [4, 8, 4]
140 |     model = ShuffleNetV2(planes, layers, 1)
141 |     return model
142 | 
143 | def shufflenet_v2_x1_5(**kwargs):
144 |     planes = [176, 352, 704]
145 |     layers = [4, 8, 4]
146 |     model = ShuffleNetV2(planes, layers, 1)
147 |     return model
148 | 
149 | def shufflenet_v2_x1_0(**kwargs):
150 |     planes = [116, 232, 464]
151 |     layers = [4, 8, 4]
152 |     model = ShuffleNetV2(planes, layers, 1)
153 |     return model
154 | 
155 | def shufflenet_v2_x0_5(**kwargs):
156 |     planes = [48, 96, 192]
157 |     layers = [4, 8, 4]
158 |     model = ShuffleNetV2(planes, layers, 1)
159 |     return model
160 | 
161 | if __name__ == '__main__':
162 |     model = shufflenet_v2_x2_0()
163 |     print(model)
164 | 
165 |     input = torch.randn(1, 3, 224, 224)
166 |     out = model(input)
167 |     print(out.shape)


--------------------------------------------------------------------------------
/Lightweight/SqueezeNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class FireModule(nn.Module):
 6 |     def __init__(self, in_channels, out_channels, mid_channels=None):
 7 |         super(FireModule, self).__init__()
 8 |         mid_channels = out_channels//4
 9 | 
10 |         self.squeeze = nn.Conv2d(in_channels=in_channels,out_channels=mid_channels,kernel_size=1,stride=1)
11 |         self.squeeze_relu = nn.ReLU6(inplace=True)
12 | 
13 |         self.expand3x3 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=3, stride=1,padding=1)
14 |         self.expand3x3_relu = nn.ReLU6(inplace=True)
15 | 
16 |         self.expand1x1 = nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1)
17 |         self.expand1x1_relu = nn.ReLU6(inplace=True)
18 | 
19 |     def forward(self, x):
20 |         x = self.squeeze_relu(self.squeeze(x))
21 |         y = self.expand3x3_relu(self.expand3x3(x))
22 |         z = self.expand1x1_relu(self.expand1x1(x))
23 |         out = torch.cat([y, z],dim=1)
24 |         return out
25 | 
26 | class SqueezeNet(nn.Module):
27 |     def __init__(self, num_classes = 1000):
28 |         super(SqueezeNet, self).__init__()
29 | 
30 |         self.bottleneck = nn.Sequential(
31 |             nn.Conv2d(in_channels=3, out_channels=96,kernel_size=7,stride=2,padding=3),
32 |             nn.BatchNorm2d(96),
33 |             nn.ReLU6(inplace=True),
34 |             nn.MaxPool2d(kernel_size=3,stride=2),
35 | 
36 |             FireModule(in_channels=96, out_channels=64),
37 |             FireModule(in_channels=128, out_channels=64),
38 |             FireModule(in_channels=128, out_channels=128),
39 |             nn.MaxPool2d(kernel_size=3,stride=2),
40 | 
41 |             FireModule(in_channels=256, out_channels=128),
42 |             FireModule(in_channels=256, out_channels=192),
43 |             FireModule(in_channels=384, out_channels=192),
44 |             FireModule(in_channels=384, out_channels=256),
45 |             nn.MaxPool2d(kernel_size=3, stride=2),
46 | 
47 |             FireModule(in_channels=512, out_channels=256),
48 |             nn.Dropout(p=0.5),
49 |             nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1, stride=1),
50 |             nn.ReLU(inplace=True),
51 |             nn.AvgPool2d(kernel_size=13, stride=1),
52 |         )
53 | 
54 |     def forward(self, x):
55 |         out = self.bottleneck(x)
56 |         return out.view(out.size(1),-1)
57 | 
58 | if __name__ == '__main__':
59 |     model = SqueezeNet()
60 |     print(model)
61 | 
62 |     input = torch.rand(1,3,224,224)
63 |     out = model(input)
64 |     print(out.shape)
65 | 
66 | 


--------------------------------------------------------------------------------
/Lightweight/Xception.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def ConvBN(in_channels,out_channels,kernel_size,stride):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=0 if kernel_size==1 else (kernel_size-1)//2),
  8 |         nn.BatchNorm2d(out_channels),
  9 |     )
 10 | 
 11 | def ConvBNRelu(in_channels,out_channels,kernel_size,stride):
 12 |     return nn.Sequential(
 13 |         ConvBN(in_channels, out_channels, kernel_size, stride),
 14 |         nn.ReLU6(inplace=True),
 15 |     )
 16 | 
 17 | def SeparableConvolution(in_channels, out_channels):
 18 |     return nn.Sequential(
 19 |         nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=3, stride=1,padding=1,groups=in_channels),
 20 |         nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
 21 |     )
 22 | def SeparableConvolutionRelu(in_channels, out_channels):
 23 |     return nn.Sequential(
 24 |         SeparableConvolution(in_channels, out_channels),
 25 |         nn.ReLU6(inplace=True),
 26 |     )
 27 | 
 28 | def ReluSeparableConvolution(in_channels, out_channels):
 29 |     return nn.Sequential(
 30 |         nn.ReLU6(inplace=True),
 31 |         SeparableConvolution(in_channels, out_channels)
 32 |     )
 33 | 
 34 | class EntryBottleneck(nn.Module):
 35 |     def __init__(self, in_channels, out_channels, first_relu=True):
 36 |         super(EntryBottleneck, self).__init__()
 37 |         mid_channels = out_channels
 38 | 
 39 |         self.shortcut = ConvBN(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=2)
 40 | 
 41 |         self.bottleneck = nn.Sequential(
 42 |             ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels) if first_relu else SeparableConvolution(in_channels=in_channels,out_channels=mid_channels),
 43 |             ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels),
 44 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 45 |         )
 46 | 
 47 |     def forward(self, x):
 48 |         out = self.shortcut(x)
 49 |         x = self.bottleneck(x)
 50 |         return out+x
 51 | 
 52 | 
 53 | class MiddleBottleneck(nn.Module):
 54 |     def __init__(self, in_channels, out_channels):
 55 |         super(MiddleBottleneck, self).__init__()
 56 |         mid_channels = out_channels
 57 | 
 58 |         self.bottleneck = nn.Sequential(
 59 |             ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels),
 60 |             ReluSeparableConvolution(in_channels=mid_channels, out_channels=mid_channels),
 61 |             ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels),
 62 |         )
 63 | 
 64 |     def forward(self, x):
 65 |         out = self.bottleneck(x)
 66 |         return out+x
 67 | 
 68 | class ExitBottleneck(nn.Module):
 69 |     def __init__(self, in_channels, out_channels):
 70 |         super(ExitBottleneck, self).__init__()
 71 |         mid_channels = in_channels
 72 | 
 73 |         self.shortcut = ConvBN(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=2)
 74 | 
 75 |         self.bottleneck = nn.Sequential(
 76 |             ReluSeparableConvolution(in_channels=in_channels,out_channels=mid_channels),
 77 |             ReluSeparableConvolution(in_channels=mid_channels, out_channels=out_channels),
 78 |             nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 79 |         )
 80 | 
 81 |     def forward(self, x):
 82 |         out = self.shortcut(x)
 83 |         x = self.bottleneck(x)
 84 |         return out+x
 85 | 
 86 | class Xception(nn.Module):
 87 |     def __init__(self, num_classes=1000):
 88 |         super(Xception, self).__init__()
 89 | 
 90 |         self.entryFlow = nn.Sequential(
 91 |             ConvBNRelu(in_channels=3, out_channels=32, kernel_size=3, stride=2),
 92 |             ConvBNRelu(in_channels=32, out_channels=64, kernel_size=3, stride=1),
 93 |             EntryBottleneck(in_channels=64, out_channels=128, first_relu=False),
 94 |             EntryBottleneck(in_channels=128, out_channels=256, first_relu=True),
 95 |             EntryBottleneck(in_channels=256, out_channels=728, first_relu=True),
 96 |         )
 97 |         self.middleFlow = nn.Sequential(
 98 |             MiddleBottleneck(in_channels=728,out_channels=728),
 99 |             MiddleBottleneck(in_channels=728, out_channels=728),
100 |             MiddleBottleneck(in_channels=728, out_channels=728),
101 |             MiddleBottleneck(in_channels=728, out_channels=728),
102 |             MiddleBottleneck(in_channels=728, out_channels=728),
103 |             MiddleBottleneck(in_channels=728, out_channels=728),
104 |             MiddleBottleneck(in_channels=728, out_channels=728),
105 |             MiddleBottleneck(in_channels=728, out_channels=728),
106 |         )
107 |         self.exitFlow = nn.Sequential(
108 |             ExitBottleneck(in_channels=728, out_channels=1024),
109 |             SeparableConvolutionRelu(in_channels=1024, out_channels=1536),
110 |             SeparableConvolutionRelu(in_channels=1536, out_channels=2048),
111 |             nn.AdaptiveAvgPool2d((1,1)),
112 |         )
113 | 
114 |         self.linear = nn.Linear(2048, num_classes)
115 | 
116 |     def forward(self, x):
117 |         x = self.entryFlow(x)
118 |         x = self.middleFlow(x)
119 |         x = self.exitFlow(x)
120 |         x = x.view(x.size(0), -1)
121 |         out = self.linear(x)
122 |         return out
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     model = Xception()
127 |     print(model)
128 | 
129 |     input = torch.randn(1,3,299,299)
130 |     output = model(input)
131 |     print(output.shape)


--------------------------------------------------------------------------------
/ObjectDetection/ASFF.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv1x1BnRelu(in_channels,out_channels):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, bias=False),
  8 |         nn.BatchNorm2d(out_channels),
  9 |         nn.ReLU6(inplace=True),
 10 |     )
 11 | 
 12 | def upSampling1(in_channels,out_channels):
 13 |     return nn.Sequential(
 14 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0,bias=False),
 15 |         nn.BatchNorm2d(out_channels),
 16 |         nn.ReLU6(inplace=True),
 17 |         nn.Upsample(scale_factor=2, mode='nearest')
 18 |     )
 19 | 
 20 | def upSampling2(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |         upSampling1(in_channels,out_channels),
 23 |         nn.Upsample(scale_factor=2, mode='nearest'),
 24 |     )
 25 | 
 26 | def downSampling1(in_channels,out_channels):
 27 |     return nn.Sequential(
 28 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=2, padding=1, bias=False),
 29 |         nn.BatchNorm2d(out_channels),
 30 |         nn.ReLU6(inplace=True),
 31 |     )
 32 | 
 33 | def downSampling2(in_channels,out_channels):
 34 |     return nn.Sequential(
 35 |         nn.MaxPool2d(kernel_size=3, stride=2,padding=1),
 36 |         downSampling1(in_channels=in_channels, out_channels=out_channels),
 37 |     )
 38 | 
 39 | class ASFF(nn.Module):
 40 |     def __init__(self, level, channel1, channel2, channel3, out_channel):
 41 |         super(ASFF, self).__init__()
 42 |         self.level = level
 43 |         funsed_channel = 8
 44 | 
 45 |         if self.level == 1:
 46 |             # level = 1:
 47 |             self.level2_1 = downSampling1(channel2,channel1)
 48 |             self.level3_1 = downSampling2(channel3,channel1)
 49 | 
 50 |             self.weight1 = Conv1x1BnRelu(channel1, funsed_channel)
 51 |             self.weight2 = Conv1x1BnRelu(channel1, funsed_channel)
 52 |             self.weight3 = Conv1x1BnRelu(channel1, funsed_channel)
 53 | 
 54 |             self.expand_conv = Conv1x1BnRelu(channel1,out_channel)
 55 | 
 56 |         if self.level == 2:
 57 |             #  level = 2:
 58 |             self.level1_2 = upSampling1(channel1,channel2)
 59 |             self.level3_2 = downSampling1(channel3,channel2)
 60 | 
 61 |             self.weight1 = Conv1x1BnRelu(channel2, funsed_channel)
 62 |             self.weight2 = Conv1x1BnRelu(channel2, funsed_channel)
 63 |             self.weight3 = Conv1x1BnRelu(channel2, funsed_channel)
 64 | 
 65 |             self.expand_conv = Conv1x1BnRelu(channel2, out_channel)
 66 | 
 67 |         if self.level == 3:
 68 |             #  level = 3:
 69 |             self.level1_3 = upSampling2(channel1,channel3)
 70 |             self.level2_3 = upSampling1(channel2,channel3)
 71 | 
 72 |             self.weight1 = Conv1x1BnRelu(channel3, funsed_channel)
 73 |             self.weight2 = Conv1x1BnRelu(channel3, funsed_channel)
 74 |             self.weight3 = Conv1x1BnRelu(channel3, funsed_channel)
 75 | 
 76 |             self.expand_conv = Conv1x1BnRelu(channel3, out_channel)
 77 | 
 78 |         self.weight_level = nn.Conv2d(funsed_channel * 3, 3, kernel_size=1, stride=1, padding=0)
 79 | 
 80 |         self.softmax = nn.Softmax(dim=1)
 81 | 
 82 | 
 83 |     def forward(self, x, y, z):
 84 |         if self.level == 1:
 85 |             level_x = x
 86 |             level_y = self.level2_1(y)
 87 |             level_z = self.level3_1(z)
 88 | 
 89 |         if self.level == 2:
 90 |             level_x = self.level1_2(x)
 91 |             level_y = y
 92 |             level_z = self.level3_2(z)
 93 | 
 94 |         if self.level == 3:
 95 |             level_x = self.level1_3(x)
 96 |             level_y = self.level2_3(y)
 97 |             level_z = z
 98 | 
 99 |         weight1 = self.weight1(level_x)
100 |         weight2 = self.weight2(level_y)
101 |         weight3 = self.weight3(level_z)
102 | 
103 |         level_weight = torch.cat((weight1, weight2, weight3), 1)
104 |         weight_level = self.weight_level(level_weight)
105 |         weight_level = self.softmax(weight_level)
106 | 
107 |         fused_level = level_x * weight_level[:,0,:,:] + level_y * weight_level[:,1,:,:] + level_z * weight_level[:,2,:,:]
108 |         out = self.expand_conv(fused_level)
109 |         return out
110 | 
111 | if __name__ == '__main__':
112 |     model = ASFF(level=3, channel1=512, channel2=256, channel3=128, out_channel=128)
113 |     print(model)
114 | 
115 |     x = torch.randn(1, 512, 16, 16)
116 |     y = torch.randn(1, 256, 32, 32)
117 |     z = torch.randn(1, 128, 64, 64)
118 |     out = model(x,y,z)
119 |     print(out.shape)


--------------------------------------------------------------------------------
/ObjectDetection/CenterNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | if __name__ == '__main__':
11 |     model = YOLO()
12 |     print(model)
13 | 
14 |     data = torch.randn(1,3,448,448)
15 |     output = model(data)
16 |     print(output.shape)


--------------------------------------------------------------------------------
/ObjectDetection/CornerNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=1):
  5 |     return nn.Sequential(
  6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size//2),
  7 |             nn.BatchNorm2d(out_channels),
  8 |             nn.ReLU6(inplace=True)
  9 |         )
 10 | 
 11 | class ResidualBlock(nn.Module):
 12 |     def __init__(self, in_channels, out_channels):
 13 |         super(ResidualBlock, self).__init__()
 14 |         mid_channels = out_channels//2
 15 | 
 16 |         self.bottleneck = nn.Sequential(
 17 |             ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1),
 18 |             ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1),
 19 |             ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1),
 20 |         )
 21 |         self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1)
 22 | 
 23 |     def forward(self, x):
 24 |         out = self.bottleneck(x)
 25 |         return out+self.shortcut(x)
 26 | 
 27 | 
 28 | class left_pool(torch.autograd.Function):
 29 |     def forward(self, input_):
 30 |         self.save_for_backward(input_.clone())
 31 |         output = torch.zeros_like(input_)
 32 |         batch = input_.size(0)
 33 |         width = input_.size(3)
 34 | 
 35 |         input_tmp = input_.select(3, width - 1)
 36 |         output.select(3, width - 1).copy_(input_tmp)
 37 | 
 38 |         for idx in range(1, width):
 39 |             input_tmp = input_.select(3, width - idx - 1)
 40 |             output_tmp = output.select(3, width - idx)
 41 |             cmp_tmp = torch.cat((input_tmp.view(batch, 1, -1), output_tmp.view(batch, 1, -1)), 1).max(1)[0]
 42 |             output.select(3, width - idx - 1).copy_(cmp_tmp.view_as(input_tmp))
 43 | 
 44 |         return output
 45 | 
 46 |     def backward(self, grad_output):
 47 |         input_, = self.saved_tensors
 48 |         output = torch.zeros_like(input_)
 49 | 
 50 |         grad_output = grad_output.clone()
 51 |         res = torch.zeros_like(grad_output)
 52 | 
 53 |         w = input_.size(3)
 54 |         batch = input_.size(0)
 55 | 
 56 |         output_tmp = res.select(3, w - 1)
 57 |         grad_output_tmp = grad_output.select(3, w - 1)
 58 |         output_tmp.copy_(grad_output_tmp)
 59 | 
 60 |         input_tmp = input_.select(3, w - 1)
 61 |         output.select(3, w - 1).copy_(input_tmp)
 62 | 
 63 |         for idx in range(1, w):
 64 |             input_tmp = input_.select(3, w - idx - 1)
 65 |             output_tmp = output.select(3, w - idx)
 66 |             cmp_tmp = torch.cat((input_tmp.view(batch, 1, -1), output_tmp.view(batch, 1, -1)), 1).max(1)[0]
 67 |             output.select(3, w - idx - 1).copy_(cmp_tmp.view_as(input_tmp))
 68 | 
 69 |             grad_output_tmp = grad_output.select(3, w - idx - 1)
 70 |             res_tmp = res.select(3, w - idx)
 71 |             com_tmp = comp(input_tmp, output_tmp, grad_output_tmp, res_tmp)
 72 |             res.select(3, w - idx - 1).copy_(com_tmp)
 73 |         return res
 74 | 
 75 | class HourglassNetwork(nn.Module):
 76 |     def __init__(self):
 77 |         super(HourglassNetwork, self).__init__()
 78 | 
 79 |     def forward(self, x):
 80 |         return out
 81 | 
 82 | class PredictionModule(nn.Module):
 83 |     def __init__(self):
 84 |         super(PredictionModule, self).__init__()
 85 | 
 86 |     def forward(self, x):
 87 |         return out
 88 | 
 89 | 
 90 | class CornerNet(nn.Module):
 91 |     def __init__(self):
 92 |         super(CornerNet, self).__init__()
 93 | 
 94 |     def forward(self, x):
 95 |         return out
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     model = CornerNet()
100 |     print(model)
101 | 
102 |     data = torch.randn(1,3,511,511)
103 |     output = model(data)
104 |     print(output.shape)


--------------------------------------------------------------------------------
/ObjectDetection/FCOS.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3ReLU(in_channels,out_channels):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
  8 |         nn.ReLU6(inplace=True)
  9 |     )
 10 | 
 11 | def locLayer(in_channels,out_channels):
 12 |     return nn.Sequential(
 13 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 14 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 15 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 16 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 17 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 18 |         )
 19 | 
 20 | def conf_centernessLayer(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 23 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 24 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 25 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 26 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 27 |     )
 28 | 
 29 | class FCOS(nn.Module):
 30 |     def __init__(self, num_classes=21):
 31 |         super(FCOS, self).__init__()
 32 |         self.num_classes = num_classes
 33 |         resnet = torchvision.models.resnet50()
 34 |         layers = list(resnet.children())
 35 | 
 36 |         self.layer1 = nn.Sequential(*layers[:5])
 37 |         self.layer2 = nn.Sequential(*layers[5])
 38 |         self.layer3 = nn.Sequential(*layers[6])
 39 |         self.layer4 = nn.Sequential(*layers[7])
 40 | 
 41 |         self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1)
 42 |         self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
 43 |         self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1)
 44 | 
 45 |         self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 46 |         self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 47 | 
 48 |         self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 49 |         self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 50 | 
 51 |         self.loc_layer3 = locLayer(in_channels=256,out_channels=4)
 52 |         self.conf_centerness_layer3 = conf_centernessLayer(in_channels=256,out_channels=self.num_classes+1)
 53 | 
 54 |         self.loc_layer4 = locLayer(in_channels=256, out_channels=4)
 55 |         self.conf_centerness_layer4 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1)
 56 | 
 57 |         self.loc_layer5 = locLayer(in_channels=256, out_channels=4)
 58 |         self.conf_centerness_layer5 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1)
 59 | 
 60 |         self.loc_layer6 = locLayer(in_channels=256, out_channels=4)
 61 |         self.conf_centerness_layer6 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1)
 62 | 
 63 |         self.loc_layer7 = locLayer(in_channels=256, out_channels=4)
 64 |         self.conf_centerness_layer7 = conf_centernessLayer(in_channels=256, out_channels=self.num_classes + 1)
 65 | 
 66 |         self.init_params()
 67 | 
 68 |     def init_params(self):
 69 |         for m in self.modules():
 70 |             if isinstance(m, nn.Conv2d):
 71 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 72 |             elif isinstance(m, nn.BatchNorm2d):
 73 |                 nn.init.constant_(m.weight, 1)
 74 |                 nn.init.constant_(m.bias, 0)
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.layer1(x)
 78 |         c3 =x = self.layer2(x)
 79 |         c4 =x = self.layer3(x)
 80 |         c5 = x = self.layer4(x)
 81 | 
 82 |         p5 = self.lateral5(c5)
 83 |         p4 = self.upsample4(p5) + self.lateral4(c4)
 84 |         p3 = self.upsample3(p4) + self.lateral3(c3)
 85 | 
 86 |         p6 = self.downsample5(p5)
 87 |         p7 = self.downsample6(p6)
 88 | 
 89 |         loc3 = self.loc_layer3(p3)
 90 |         conf_centerness3 = self.conf_centerness_layer3(p3)
 91 |         conf3, centerness3 = conf_centerness3.split([self.num_classes, 1], dim=1)
 92 | 
 93 |         loc4 = self.loc_layer4(p4)
 94 |         conf_centerness4 = self.conf_centerness_layer4(p4)
 95 |         conf4, centerness4 = conf_centerness4.split([self.num_classes, 1], dim=1)
 96 | 
 97 |         loc5 = self.loc_layer5(p5)
 98 |         conf_centerness5 = self.conf_centerness_layer5(p5)
 99 |         conf5, centerness5 = conf_centerness5.split([self.num_classes, 1], dim=1)
100 | 
101 |         loc6 = self.loc_layer6(p6)
102 |         conf_centerness6 = self.conf_centerness_layer6(p6)
103 |         conf6, centerness6 = conf_centerness6.split([self.num_classes, 1], dim=1)
104 | 
105 |         loc7 = self.loc_layer7(p7)
106 |         conf_centerness7 = self.conf_centerness_layer7(p7)
107 |         conf7, centerness7 = conf_centerness7.split([self.num_classes, 1], dim=1)
108 | 
109 |         locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1),
110 |                     loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1),
111 |                     loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1),
112 |                     loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1),
113 |                     loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1)
114 | 
115 |         confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1),
116 |                            conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1),
117 |                            conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1),
118 |                            conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1),
119 |                            conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1)
120 | 
121 |         centernesses = torch.cat([centerness3.permute(0, 2, 3, 1).contiguous().view(centerness3.size(0), -1),
122 |                            centerness4.permute(0, 2, 3, 1).contiguous().view(centerness4.size(0), -1),
123 |                            centerness5.permute(0, 2, 3, 1).contiguous().view(centerness5.size(0), -1),
124 |                            centerness6.permute(0, 2, 3, 1).contiguous().view(centerness6.size(0), -1),
125 |                            centerness7.permute(0, 2, 3, 1).contiguous().view(centerness7.size(0), -1), ], dim=1)
126 | 
127 |         out = (locs, confs, centernesses)
128 |         return out
129 | 
130 | if __name__ == '__main__':
131 |     model = FCOS()
132 |     print(model)
133 | 
134 |     input = torch.randn(1, 3, 800, 1024)
135 |     out = model(input)
136 |     print(out[0].shape)
137 |     print(out[1].shape)
138 |     print(out[2].shape)


--------------------------------------------------------------------------------
/ObjectDetection/FPN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class FPN(nn.Module):
 6 |     def __init__(self):
 7 |         super(FPN, self).__init__()
 8 |         resnet = torchvision.models.resnet50()
 9 |         layers = list(resnet.children())
10 | 
11 |         self.layer1 = nn.Sequential(*layers[:5])
12 |         self.layer2 = nn.Sequential(*layers[5])
13 |         self.layer3 = nn.Sequential(*layers[6])
14 |         self.layer4 = nn.Sequential(*layers[7])
15 | 
16 |         self.lateral5 = nn.Conv2d(in_channels=2048,out_channels=256,kernel_size=1)
17 |         self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
18 |         self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1)
19 |         self.lateral2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1)
20 | 
21 |         self.upsample2 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
22 |         self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
23 |         self.upsample4 = nn.ConvTranspose2d(in_channels=256,out_channels=256, kernel_size=4, stride=2, padding=1)
24 | 
25 |         self.smooth2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
26 |         self.smooth3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
27 |         self.smooth4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
28 | 
29 |         self.init_params()
30 | 
31 |     def init_params(self):
32 |         for m in self.modules():
33 |             if isinstance(m, nn.Conv2d):
34 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
35 |             elif isinstance(m, nn.BatchNorm2d):
36 |                 nn.init.constant_(m.weight, 1)
37 |                 nn.init.constant_(m.bias, 0)
38 | 
39 |     def forward(self, x):
40 |         c2 = x = self.layer1(x)
41 |         c3 = x = self.layer2(x)
42 |         c4 = x = self.layer3(x)
43 |         c5 = x = self.layer4(x)
44 | 
45 |         p5 = self.lateral5(c5)
46 |         p4 = self.upsample4(p5)+ self.lateral4(c4)
47 |         p3 = self.upsample3(p4)+ self.lateral3(c3)
48 |         p2 = self.upsample2(p3)+ self.lateral2(c2)
49 | 
50 |         p4 = self.smooth4(p4)
51 |         p3 = self.smooth3(p3)
52 |         p2 = self.smooth4(p2)
53 |         return p2,p3,p4,p5
54 | 
55 | if __name__ == '__main__':
56 |     model = FPN()
57 |     print(model)
58 | 
59 |     input = torch.randn(1, 3, 224, 224)
60 |     p2, p3, p4, p5 = model(input)
61 |     print(p2.shape)
62 |     print(p3.shape)
63 |     print(p4.shape)
64 |     print(p5.shape)


--------------------------------------------------------------------------------
/ObjectDetection/FSAF.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanglianlm0525/PyTorch-Networks/a6b6dd4b7876ba8473a08e116485a0492a88cd48/ObjectDetection/FSAF.py


--------------------------------------------------------------------------------
/ObjectDetection/FisheyeMODNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups):
  5 |     return nn.Sequential(
  6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups),
  7 |             nn.BatchNorm2d(out_channels),
  8 |             nn.ReLU6(inplace=True)
  9 |         )
 10 | 
 11 | def Conv1x1BNReLU(in_channels,out_channels,groups):
 12 |     return nn.Sequential(
 13 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 14 |             nn.BatchNorm2d(out_channels),
 15 |             nn.ReLU6(inplace=True)
 16 |         )
 17 | 
 18 | def Conv1x1BN(in_channels,out_channels,groups):
 19 |     return nn.Sequential(
 20 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 21 |             nn.BatchNorm2d(out_channels)
 22 |         )
 23 | 
 24 | class ChannelShuffle(nn.Module):
 25 |     def __init__(self, groups):
 26 |         super(ChannelShuffle, self).__init__()
 27 |         self.groups = groups
 28 | 
 29 |     def forward(self, x):
 30 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 31 |         N, C, H, W = x.size()
 32 |         g = self.groups
 33 |         return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
 34 | 
 35 | 
 36 | class ShuffleNetUnits(nn.Module):
 37 |     def __init__(self, in_channels, out_channels, stride, groups):
 38 |         super(ShuffleNetUnits, self).__init__()
 39 |         self.stride = stride
 40 |         out_channels = out_channels - in_channels if self.stride>1 else out_channels
 41 |         mid_channels = out_channels // 4
 42 | 
 43 |         self.bottleneck = nn.Sequential(
 44 |             Conv1x1BNReLU(in_channels, mid_channels,groups),
 45 |             ChannelShuffle(groups),
 46 |             Conv3x3BNReLU(mid_channels, mid_channels, stride,groups),
 47 |             Conv1x1BN(mid_channels, out_channels,groups)
 48 |         )
 49 |         if self.stride>1:
 50 |             self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 51 | 
 52 |         self.relu = nn.ReLU6(inplace=True)
 53 | 
 54 |     def forward(self, x):
 55 |         out = self.bottleneck(x)
 56 |         out = torch.cat([self.shortcut(x), out], dim=1) if self.stride > 1 else (out + x)
 57 |         return self.relu(out)
 58 | 
 59 | class FisheyeMODNet(nn.Module):
 60 |     def __init__(self, groups=1, num_classes=2):
 61 |         super(FisheyeMODNet, self).__init__()
 62 |         layers = [4, 8, 4]
 63 | 
 64 |         self.stage1a = nn.Sequential(
 65 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3,stride=2, padding=1),
 66 |             nn.MaxPool2d(kernel_size=2,stride=2),
 67 |         )
 68 |         self.stage2a = self._make_layer(24, 120, groups, layers[0])
 69 | 
 70 |         self.stage1b = nn.Sequential(
 71 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3, stride=2, padding=1),
 72 |             nn.MaxPool2d(kernel_size=2, stride=2),
 73 |         )
 74 |         self.stage2b = self._make_layer(24, 120, groups, layers[0])
 75 | 
 76 |         self.stage3 = self._make_layer(240, 480, groups, layers[1])
 77 |         self.stage4 = self._make_layer(480, 960, groups, layers[2])
 78 | 
 79 |         self.adapt_conv3 = nn.Conv2d(960, num_classes, kernel_size=1)
 80 |         self.adapt_conv2 = nn.Conv2d(480, num_classes, kernel_size=1)
 81 |         self.adapt_conv1 = nn.Conv2d(240, num_classes, kernel_size=1)
 82 | 
 83 |         self.up_sampling3 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
 84 |         self.up_sampling2 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
 85 |         self.up_sampling1 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=16, stride=8, padding=4)
 86 | 
 87 |         self.softmax  = nn.Softmax(dim=1)
 88 | 
 89 |         self.init_params()
 90 | 
 91 |     def _make_layer(self, in_channels, out_channels, groups, block_num):
 92 |         layers = []
 93 |         layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=groups))
 94 |         for idx in range(1, block_num):
 95 |             layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups))
 96 |         return nn.Sequential(*layers)
 97 | 
 98 |     def init_params(self):
 99 |         for m in self.modules():
100 |             if isinstance(m, nn.Conv2d):
101 |                 nn.init.kaiming_normal_(m.weight)
102 |                 nn.init.constant_(m.bias, 0)
103 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
104 |                 nn.init.constant_(m.weight, 1)
105 |                 nn.init.constant_(m.bias, 0)
106 | 
107 |     def forward(self, x, y):
108 |         x = self.stage2a(self.stage1a(x))
109 |         y = self.stage2b(self.stage1b(y))
110 |         feature1 = torch.cat([x, y], dim=1)
111 |         feature2 = self.stage3(feature1)
112 |         feature3 = self.stage4(feature2)
113 | 
114 |         out3 = self.up_sampling3(self.adapt_conv3(feature3))
115 |         out2 = self.up_sampling2(self.adapt_conv2(feature2) + out3)
116 |         out1 = self.up_sampling1(self.adapt_conv1(feature1) + out2)
117 | 
118 |         out = self.softmax(out1)
119 |         return out
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     model = FisheyeMODNet()
124 | 
125 |     input1 = torch.randn(1, 3, 640, 640)
126 |     input2 = torch.randn(1, 3, 640, 640)
127 | 
128 |     out = model(input1, input2)
129 |     print(out.shape)


--------------------------------------------------------------------------------
/ObjectDetection/FoveaBox.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3ReLU(in_channels,out_channels):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
  8 |         nn.ReLU6(inplace=True)
  9 |     )
 10 | 
 11 | def locLayer(in_channels,out_channels):
 12 |     return nn.Sequential(
 13 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 14 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 15 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 16 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 17 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 18 |         )
 19 | 
 20 | def confLayer(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 23 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 24 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 25 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 26 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 27 |     )
 28 | 
 29 | class FoveaBox(nn.Module):
 30 |     def __init__(self, num_classes=80):
 31 |         super(FoveaBox, self).__init__()
 32 |         self.num_classes = num_classes
 33 |         resnet = torchvision.models.resnet50()
 34 |         layers = list(resnet.children())
 35 | 
 36 |         self.layer1 = nn.Sequential(*layers[:5])
 37 |         self.layer2 = nn.Sequential(*layers[5])
 38 |         self.layer3 = nn.Sequential(*layers[6])
 39 |         self.layer4 = nn.Sequential(*layers[7])
 40 | 
 41 |         self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1)
 42 |         self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
 43 |         self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1)
 44 | 
 45 |         self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 46 |         self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 47 | 
 48 |         self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 49 |         self.downsample6_relu = nn.ReLU6(inplace=True)
 50 |         self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 51 | 
 52 |         self.loc_layer3 = locLayer(in_channels=256,out_channels=4)
 53 |         self.conf_layer3 = confLayer(in_channels=256,out_channels=self.num_classes)
 54 | 
 55 |         self.loc_layer4 = locLayer(in_channels=256, out_channels=4)
 56 |         self.conf_layer4 = confLayer(in_channels=256, out_channels=self.num_classes)
 57 | 
 58 |         self.loc_layer5 = locLayer(in_channels=256, out_channels=4)
 59 |         self.conf_layer5 = confLayer(in_channels=256, out_channels=self.num_classes)
 60 | 
 61 |         self.loc_layer6 = locLayer(in_channels=256, out_channels=4)
 62 |         self.conf_layer6 = confLayer(in_channels=256, out_channels=self.num_classes)
 63 | 
 64 |         self.loc_layer7 = locLayer(in_channels=256, out_channels=4)
 65 |         self.conf_layer7 = confLayer(in_channels=256, out_channels=self.num_classes)
 66 | 
 67 |         self.init_params()
 68 | 
 69 |     def init_params(self):
 70 |         for m in self.modules():
 71 |             if isinstance(m, nn.Conv2d):
 72 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 73 |             elif isinstance(m, nn.BatchNorm2d):
 74 |                 nn.init.constant_(m.weight, 1)
 75 |                 nn.init.constant_(m.bias, 0)
 76 | 
 77 |     def forward(self, x):
 78 |         x = self.layer1(x)
 79 |         c3 =x = self.layer2(x)
 80 |         c4 =x = self.layer3(x)
 81 |         c5 = x = self.layer4(x)
 82 | 
 83 |         p5 = self.lateral5(c5)
 84 |         p4 = self.upsample4(p5) + self.lateral4(c4)
 85 |         p3 = self.upsample3(p4) + self.lateral3(c3)
 86 | 
 87 |         p6 = self.downsample5(p5)
 88 |         p7 = self.downsample6_relu(self.downsample6(p6))
 89 | 
 90 |         loc3 = self.loc_layer3(p3)
 91 |         conf3 = self.conf_layer3(p3)
 92 | 
 93 |         loc4 = self.loc_layer4(p4)
 94 |         conf4 = self.conf_layer4(p4)
 95 | 
 96 |         loc5 = self.loc_layer5(p5)
 97 |         conf5 = self.conf_layer5(p5)
 98 | 
 99 |         loc6 = self.loc_layer6(p6)
100 |         conf6 = self.conf_layer6(p6)
101 | 
102 |         loc7 = self.loc_layer7(p7)
103 |         conf7 = self.conf_layer7(p7)
104 | 
105 |         locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1),
106 |                     loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1),
107 |                     loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1),
108 |                     loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1),
109 |                     loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1)
110 | 
111 |         confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1),
112 |                            conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1),
113 |                            conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1),
114 |                            conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1),
115 |                            conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1)
116 | 
117 |         out = (locs, confs)
118 |         return out
119 | 
120 | if __name__ == '__main__':
121 |     model = FoveaBox()
122 |     print(model)
123 | 
124 |     input = torch.randn(1, 3, 800, 800)
125 |     out = model(input)
126 |     print(out[0].shape)
127 |     print(out[1].shape)
128 | 


--------------------------------------------------------------------------------
/ObjectDetection/README.md:
--------------------------------------------------------------------------------
  1 | # ObjectDetection-network
  2 | Pytorch implementation of ObjectDetection-network
  3 | 
  4 | 
  5 | 
  6 | **SSD:**
  7 | 
  8 | SSD: Single Shot MultiBox Detector,2016
  9 | 
 10 | https://arxiv.org/pdf/1512.02325.pdf
 11 | 
 12 | https://liumin.blog.csdn.net/article/details/100530275
 13 | 
 14 | 
 15 | 
 16 | **YOLO:**
 17 | 
 18 | You Only Look Once: Unified, Real-Time Object Detection, 2016
 19 | 
 20 | https://arxiv.org/pdf/1506.02640.pdf
 21 | 
 22 | https://liumin.blog.csdn.net/article/details/100904605
 23 | 
 24 | 
 25 | 
 26 | **YOLOv2:**
 27 | 
 28 | YOLO9000: Better, Faster, Stronger,2017
 29 | 
 30 | https://arxiv.org/pdf/1804.02767.pdf
 31 | 
 32 | https://liumin.blog.csdn.net/article/details/100904645
 33 | 
 34 | 
 35 | 
 36 | **YOLOv3:**
 37 | 
 38 | YOLOv3: An Incremental Improvement, 2018
 39 | 
 40 | https://arxiv.org/pdf/1612.08242.pdf
 41 | 
 42 | https://liumin.blog.csdn.net/article/details/100904663
 43 | 
 44 | 
 45 | 
 46 | **FCOS:**
 47 | 
 48 | FCOS: Fully Convolutional One-Stage Object Detection, 2019
 49 | 
 50 | https://arxiv.org/pdf/1904.01355.pdf
 51 | 
 52 | https://liumin.blog.csdn.net/article/details/89007219
 53 | 
 54 |  
 55 | 
 56 | **FPN:**
 57 | 
 58 | Feature Pyramid Networks for Object Detection, 2017
 59 | 
 60 | https://arxiv.org/pdf/1612.03144v2.pdf
 61 | 
 62 | https://liumin.blog.csdn.net/article/details/100864158 
 63 | 
 64 | 
 65 | 
 66 | **RetinaNet:**
 67 | 
 68 | https://liumin.blog.csdn.net/article/details/102135318
 69 | 
 70 | https://arxiv.org/pdf/1708.02002.pdf
 71 | 
 72 | https://liumin.blog.csdn.net/article/details/102135318
 73 | 
 74 | 
 75 | 
 76 | **Objects as Points:**
 77 | 
 78 | Objects as Points,2019
 79 | 
 80 | https://arxiv.org/pdf/1904.07850v1.pdf
 81 | 
 82 | https://liumin.blog.csdn.net/article/details/100867545
 83 | 
 84 | 
 85 | 
 86 | **FSAF:**
 87 | 
 88 | Feature Selective Anchor-Free Module for Single-Shot Object Detection, 2019
 89 | 
 90 | https://arxiv.org/pdf/1903.00621.pdf
 91 | 
 92 | https://liumin.blog.csdn.net/article/details/100942317
 93 | 
 94 | 
 95 | 
 96 | **CenterNet**
 97 | 
 98 | CenterNet: Keypoint Triplets for Object Detection, 2019
 99 | 
100 | https://arxiv.org/pdf/1904.08189.pdf
101 | 
102 | https://liumin.blog.csdn.net/article/details/100942259
103 | 
104 | 
105 | 
106 | **FoveaBox**
107 | 
108 | FoveaBox: Beyond Anchor-based Object Detector, 2019
109 | 
110 | https://arxiv.org/pdf/1904.03797v1.pdf
111 | 
112 | https://liumin.blog.csdn.net/article/details/100941880


--------------------------------------------------------------------------------
/ObjectDetection/RetinaNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | def Conv3x3ReLU(in_channels,out_channels):
  6 |     return nn.Sequential(
  7 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
  8 |         nn.ReLU6(inplace=True)
  9 |     )
 10 | 
 11 | def locLayer(in_channels,out_channels):
 12 |     return nn.Sequential(
 13 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 14 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 15 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 16 |             Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 17 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 18 |         )
 19 | 
 20 | def confLayer(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 23 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 24 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 25 |         Conv3x3ReLU(in_channels=in_channels, out_channels=in_channels),
 26 |         nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1),
 27 |     )
 28 | 
 29 | class RetinaNet(nn.Module):
 30 |     def __init__(self, num_classes=80, num_anchores = 9):
 31 |         super(RetinaNet, self).__init__()
 32 |         self.num_classes = num_classes
 33 |         resnet = torchvision.models.resnet50()
 34 |         layers = list(resnet.children())
 35 | 
 36 |         self.layer1 = nn.Sequential(*layers[:5])
 37 |         self.layer2 = nn.Sequential(*layers[5])
 38 |         self.layer3 = nn.Sequential(*layers[6])
 39 |         self.layer4 = nn.Sequential(*layers[7])
 40 | 
 41 |         self.lateral5 = nn.Conv2d(in_channels=2048, out_channels=256, kernel_size=1)
 42 |         self.lateral4 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1)
 43 |         self.lateral3 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1)
 44 | 
 45 |         self.upsample4 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 46 |         self.upsample3 = nn.ConvTranspose2d(in_channels=256, out_channels=256, kernel_size=4, stride=2, padding=1)
 47 | 
 48 |         self.downsample6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 49 |         self.downsample6_relu = nn.ReLU6(inplace=True)
 50 |         self.downsample5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=2, padding=1)
 51 | 
 52 |         self.loc_layer3 = locLayer(in_channels=256,out_channels=4*num_anchores)
 53 |         self.conf_layer3 = confLayer(in_channels=256,out_channels=self.num_classes*num_anchores)
 54 | 
 55 |         self.loc_layer4 = locLayer(in_channels=256, out_channels=4*num_anchores)
 56 |         self.conf_layer4 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores)
 57 | 
 58 |         self.loc_layer5 = locLayer(in_channels=256, out_channels=4*num_anchores)
 59 |         self.conf_layer5 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores)
 60 | 
 61 |         self.loc_layer6 = locLayer(in_channels=256, out_channels=4*num_anchores)
 62 |         self.conf_layer6 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores)
 63 | 
 64 |         self.loc_layer7 = locLayer(in_channels=256, out_channels=4*num_anchores)
 65 |         self.conf_layer7 = confLayer(in_channels=256, out_channels=self.num_classes*num_anchores)
 66 | 
 67 |         self.init_params()
 68 | 
 69 |     def init_params(self):
 70 |         for m in self.modules():
 71 |             if isinstance(m, nn.Conv2d):
 72 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 73 |             elif isinstance(m, nn.BatchNorm2d):
 74 |                 nn.init.constant_(m.weight, 1)
 75 |                 nn.init.constant_(m.bias, 0)
 76 | 
 77 |     def forward(self, x):
 78 |         x = self.layer1(x)
 79 |         c3 =x = self.layer2(x)
 80 |         c4 =x = self.layer3(x)
 81 |         c5 = x = self.layer4(x)
 82 | 
 83 |         p5 = self.lateral5(c5)
 84 |         p4 = self.upsample4(p5) + self.lateral4(c4)
 85 |         p3 = self.upsample3(p4) + self.lateral3(c3)
 86 | 
 87 |         p6 = self.downsample5(p5)
 88 |         p7 = self.downsample6_relu(self.downsample6(p6))
 89 | 
 90 |         loc3 = self.loc_layer3(p3)
 91 |         conf3 = self.conf_layer3(p3)
 92 | 
 93 |         loc4 = self.loc_layer4(p4)
 94 |         conf4 = self.conf_layer4(p4)
 95 | 
 96 |         loc5 = self.loc_layer5(p5)
 97 |         conf5 = self.conf_layer5(p5)
 98 | 
 99 |         loc6 = self.loc_layer6(p6)
100 |         conf6 = self.conf_layer6(p6)
101 | 
102 |         loc7 = self.loc_layer7(p7)
103 |         conf7 = self.conf_layer7(p7)
104 | 
105 |         locs = torch.cat([loc3.permute(0, 2, 3, 1).contiguous().view(loc3.size(0), -1),
106 |                     loc4.permute(0, 2, 3, 1).contiguous().view(loc4.size(0), -1),
107 |                     loc5.permute(0, 2, 3, 1).contiguous().view(loc5.size(0), -1),
108 |                     loc6.permute(0, 2, 3, 1).contiguous().view(loc6.size(0), -1),
109 |                     loc7.permute(0, 2, 3, 1).contiguous().view(loc7.size(0), -1)],dim=1)
110 | 
111 |         confs = torch.cat([conf3.permute(0, 2, 3, 1).contiguous().view(conf3.size(0), -1),
112 |                            conf4.permute(0, 2, 3, 1).contiguous().view(conf4.size(0), -1),
113 |                            conf5.permute(0, 2, 3, 1).contiguous().view(conf5.size(0), -1),
114 |                            conf6.permute(0, 2, 3, 1).contiguous().view(conf6.size(0), -1),
115 |                            conf7.permute(0, 2, 3, 1).contiguous().view(conf7.size(0), -1),], dim=1)
116 | 
117 |         out = (locs, confs)
118 |         return out
119 | 
120 | if __name__ == '__main__':
121 |     model = RetinaNet()
122 |     print(model)
123 | 
124 |     input = torch.randn(1, 3, 800, 800)
125 |     out = model(input)
126 |     print(out[0].shape)
127 |     print(out[1].shape)
128 | 


--------------------------------------------------------------------------------
/ObjectDetection/VoVNet.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/6/1 14:40
  4 | # @Author : liumin
  5 | # @File : VoVNet.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torchvision
 10 | 
 11 | __all__ = ['VoVNet', 'vovnet27_slim', 'vovnet39', 'vovnet57']
 12 | 
 13 | from PIL.Image import Image
 14 | 
 15 | 
 16 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1):
 17 |     return nn.Sequential(
 18 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups, bias=False),
 19 |             nn.BatchNorm2d(out_channels),
 20 |             nn.ReLU6(inplace=True)
 21 |         )
 22 | 
 23 | 
 24 | def Conv3x3BN(in_channels,out_channels,stride,groups):
 25 |     return nn.Sequential(
 26 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups, bias=False),
 27 |             nn.BatchNorm2d(out_channels)
 28 |         )
 29 | 
 30 | 
 31 | def Conv1x1BNReLU(in_channels,out_channels):
 32 |     return nn.Sequential(
 33 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 34 |             nn.BatchNorm2d(out_channels),
 35 |             nn.ReLU6(inplace=True)
 36 |         )
 37 | 
 38 | 
 39 | def Conv1x1BN(in_channels,out_channels):
 40 |     return nn.Sequential(
 41 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 42 |             nn.BatchNorm2d(out_channels)
 43 |         )
 44 | 
 45 | class OSA_module(nn.Module):
 46 |     def __init__(self, in_channels,mid_channels, out_channels, block_nums=5):
 47 |         super(OSA_module, self).__init__()
 48 | 
 49 |         self._layers = nn.ModuleList()
 50 |         self._layers.append(Conv3x3BNReLU(in_channels=in_channels, out_channels=mid_channels, stride=1))
 51 |         for idx in range(block_nums-1):
 52 |             self._layers.append(Conv3x3BNReLU(in_channels=mid_channels, out_channels=mid_channels, stride=1))
 53 | 
 54 |         self.conv1x1 = Conv1x1BNReLU(in_channels+mid_channels*block_nums,out_channels)
 55 | 
 56 |     def forward(self, x):
 57 |         outputs = []
 58 |         outputs.append(x)
 59 |         for _layer in self._layers:
 60 |             x = _layer(x)
 61 |             outputs.append(x)
 62 |         out = torch.cat(outputs, dim=1)
 63 |         out = self.conv1x1(out)
 64 |         return out
 65 | 
 66 | 
 67 | class VoVNet(nn.Module):
 68 |     def __init__(self, planes, layers, num_classes=2):
 69 |         super(VoVNet, self).__init__()
 70 | 
 71 |         self.groups = 1
 72 |         self.stage1 = nn.Sequential(
 73 |             Conv3x3BNReLU(in_channels=3, out_channels=64, stride=2, groups=self.groups),
 74 |             Conv3x3BNReLU(in_channels=64, out_channels=64, stride=1, groups=self.groups),
 75 |             Conv3x3BNReLU(in_channels=64, out_channels=128, stride=1, groups=self.groups),
 76 |         )
 77 | 
 78 |         self.stage2 = self._make_layer(planes[0][0],planes[0][1],planes[0][2],layers[0])
 79 | 
 80 |         self.stage3 = self._make_layer(planes[1][0],planes[1][1],planes[1][2],layers[1])
 81 | 
 82 |         self.stage4 = self._make_layer(planes[2][0],planes[2][1],planes[2][2],layers[2])
 83 | 
 84 |         self.stage5 = self._make_layer(planes[3][0],planes[3][1],planes[3][2],layers[3])
 85 | 
 86 |         self.avgpool = nn.AdaptiveAvgPool2d(output_size=1)
 87 |         self.flatten = nn.Flatten()
 88 |         self.dropout = nn.Dropout(p=0.2)
 89 |         self.linear = nn.Linear(in_features=planes[3][2], out_features=num_classes)
 90 | 
 91 |     def _make_layer(self, in_channels, mid_channels,out_channels, block_num):
 92 |         layers = []
 93 |         layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
 94 |         for idx in range(block_num):
 95 |             layers.append(OSA_module(in_channels=in_channels, mid_channels=mid_channels, out_channels=out_channels))
 96 |             in_channels = out_channels
 97 |         return nn.Sequential(*layers)
 98 | 
 99 |     def init_params(self):
100 |         for m in self.modules():
101 |             if isinstance(m, nn.Conv2d):
102 |                 nn.init.kaiming_normal_(m.weight)
103 |                 if m.bias is not None:
104 |                     nn.init.constant_(m.bias, 0)
105 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
106 |                 nn.init.constant_(m.weight, 1)
107 |                 nn.init.constant_(m.bias, 0)
108 | 
109 |     def forward(self, x):
110 |         x = self.stage1(x)
111 |         x = self.stage2(x)
112 |         x = self.stage3(x)
113 |         x = self.stage4(x)
114 |         x = self.stage5(x)
115 |         x = self.avgpool(x)
116 |         x = self.flatten(x)
117 |         x = self.dropout(x)
118 |         out = self.linear(x)
119 |         return out
120 | 
121 | def vovnet27_slim(**kwargs):
122 |     planes = [[128, 64, 128],
123 |               [128, 80, 256],
124 |               [256, 96, 384],
125 |               [384, 112, 512]]
126 |     layers = [1, 1, 1, 1]
127 |     model = VoVNet(planes, layers)
128 |     return model
129 | 
130 | def vovnet39(**kwargs):
131 |     planes = [[128, 128, 256],
132 |               [256, 160, 512],
133 |               [512, 192, 768],
134 |               [768, 224, 1024]]
135 |     layers = [1, 1, 2, 2]
136 |     model = VoVNet(planes, layers)
137 |     return model
138 | 
139 | def vovnet57(**kwargs):
140 |     planes = [[128, 128, 256],
141 |               [256, 160, 512],
142 |               [512, 192, 768],
143 |               [768, 224, 1024]]
144 |     layers = [1, 1, 4, 3]
145 |     model = VoVNet(planes, layers)
146 |     return model
147 | 
148 | 
149 | if __name__=='__main__':
150 |     model = vovnet27_slim()
151 |     print(model)
152 | 
153 |     input = torch.randn(1, 3, 64, 64)
154 |     out = model(input)
155 |     print(out.shape)
156 | 


--------------------------------------------------------------------------------
/ObjectDetection/YOLO.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def Conv1x1BNReLU(in_channels,out_channels):
 5 |     return nn.Sequential(
 6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 7 |             nn.BatchNorm2d(out_channels),
 8 |             nn.ReLU6(inplace=True)
 9 |         )
10 | 
11 | def Conv3x3BNReLU(in_channels,out_channels, stride=1):
12 |     return nn.Sequential(
13 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1),
14 |         nn.BatchNorm2d(out_channels),
15 |         nn.ReLU6(inplace=True)
16 |     )
17 | 
18 | class YOLO(nn.Module):
19 |     def __init__(self):
20 |         super(YOLO, self).__init__()
21 | 
22 |         self.features = nn.Sequential(
23 |             nn.Conv2d(in_channels=3,out_channels=64, kernel_size=7,stride=2,padding=3),
24 |             nn.MaxPool2d(kernel_size=2,stride=2),
25 |             Conv3x3BNReLU(in_channels=64, out_channels=192),
26 |             nn.MaxPool2d(kernel_size=2, stride=2),
27 |             Conv1x1BNReLU(in_channels=192, out_channels=128),
28 |             Conv3x3BNReLU(in_channels=128, out_channels=256),
29 |             Conv1x1BNReLU(in_channels=256, out_channels=256),
30 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
31 |             nn.MaxPool2d(kernel_size=2, stride=2),
32 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
33 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
34 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
35 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
36 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
37 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
38 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
39 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
40 |             Conv1x1BNReLU(in_channels=512, out_channels=512),
41 |             Conv3x3BNReLU(in_channels=512, out_channels=1024),
42 |             nn.MaxPool2d(kernel_size=2, stride=2),
43 |             Conv1x1BNReLU(in_channels=1024, out_channels=512),
44 |             Conv3x3BNReLU(in_channels=512, out_channels= 1024),
45 |             Conv1x1BNReLU(in_channels=1024, out_channels=512),
46 |             Conv3x3BNReLU(in_channels=512, out_channels=1024),
47 |             Conv3x3BNReLU(in_channels=1024, out_channels=1024),
48 |             Conv3x3BNReLU(in_channels=1024, out_channels=1024, stride=2),
49 |             Conv3x3BNReLU(in_channels=1024, out_channels=1024),
50 |             Conv3x3BNReLU(in_channels=1024, out_channels=1024),
51 |         )
52 | 
53 |         self.classifier = nn.Sequential(
54 |             nn.Linear(1024 * 7 * 7, 4096),
55 |             nn.ReLU(True),
56 |             nn.Dropout(),
57 |             nn.Linear(4096, 1470),
58 |         )
59 | 
60 |     def forward(self, x):
61 |         x = self.features(x)
62 |         x = x.view(x.size(0), -1)
63 |         out = self.classifier(x)
64 |         return out
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     model = YOLO()
69 |     print(model)
70 | 
71 |     data = torch.randn(1,3,448,448)
72 |     output = model(data)
73 |     print(output.shape)


--------------------------------------------------------------------------------
/ObjectDetection/YOLOv2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def Conv3x3BNReLU(in_channels,out_channels,padding=0):
 5 |     return nn.Sequential(
 6 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=1,padding=1),
 7 |         nn.BatchNorm2d(out_channels),
 8 |         nn.ReLU6(inplace=True)
 9 |     )
10 | 
11 | def Conv1x1BNReLU(in_channels,out_channels):
12 |     return nn.Sequential(
13 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0),
14 |         nn.BatchNorm2d(out_channels),
15 |         nn.ReLU6(inplace=True)
16 |     )
17 | 
18 | class Darknet19(nn.Module):
19 |     def __init__(self, num_classes=1000):
20 |         super(Darknet19, self).__init__()
21 | 
22 |         self.feature = nn.Sequential(
23 |             Conv3x3BNReLU(in_channels=3, out_channels=32),
24 |             nn.MaxPool2d(kernel_size=2,stride=2),
25 |             Conv3x3BNReLU(in_channels=32, out_channels=64),
26 |             nn.MaxPool2d(kernel_size=2, stride=2),
27 |             Conv3x3BNReLU(in_channels=64, out_channels=128),
28 |             Conv1x1BNReLU(in_channels=128, out_channels=64),
29 |             Conv3x3BNReLU(in_channels=64, out_channels=128),
30 |             nn.MaxPool2d(kernel_size=2, stride=2),
31 |             Conv3x3BNReLU(in_channels=128, out_channels=256),
32 |             Conv1x1BNReLU(in_channels=256, out_channels=128),
33 |             Conv3x3BNReLU(in_channels=128, out_channels=256),
34 |             nn.MaxPool2d(kernel_size=2, stride=2),
35 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
36 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
37 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
38 |             Conv1x1BNReLU(in_channels=512, out_channels=256),
39 |             Conv3x3BNReLU(in_channels=256, out_channels=512),
40 |             nn.MaxPool2d(kernel_size=2, stride=2),
41 |             Conv3x3BNReLU(in_channels=512, out_channels=1024),
42 |             Conv1x1BNReLU(in_channels=1024, out_channels=512),
43 |             Conv3x3BNReLU(in_channels=512, out_channels=1024),
44 |             Conv1x1BNReLU(in_channels=1024, out_channels=512),
45 |             Conv3x3BNReLU(in_channels=512, out_channels=1024),
46 |         )
47 | 
48 |         self.classifier = nn.Sequential(
49 |             Conv1x1BNReLU(in_channels=1024, out_channels=num_classes),
50 |             nn.AvgPool2d(kernel_size=7,stride=1),
51 |         )
52 |         self.softmax = nn.Softmax(dim=1)
53 | 
54 |     def forward(self, x):
55 |         x = self.feature(x)
56 |         x = self.classifier(x)
57 |         x = torch.squeeze(x, dim=3).contiguous()
58 |         x = torch.squeeze(x, dim=2).contiguous()
59 |         out = self.softmax(x)
60 |         return out
61 | 
62 | if __name__ == '__main__':
63 |     model = Darknet19()
64 |     print(model)
65 | 
66 |     input = torch.randn(1,3,224,224)
67 |     out = model(input)
68 |     print(out.shape)


--------------------------------------------------------------------------------
/ObjectDetection/YOLOv3.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def Conv3x3BNReLU(in_channels,out_channels,stride=1):
 5 |     return nn.Sequential(
 6 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=3,stride=stride,padding=1),
 7 |         nn.BatchNorm2d(out_channels),
 8 |         nn.ReLU6(inplace=True)
 9 |     )
10 | 
11 | def Conv1x1BNReLU(in_channels,out_channels):
12 |     return nn.Sequential(
13 |         nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=1,stride=1,padding=0),
14 |         nn.BatchNorm2d(out_channels),
15 |         nn.ReLU6(inplace=True)
16 |     )
17 | 
18 | class Residual(nn.Module):
19 |     def __init__(self, nchannels):
20 |         super(Residual, self).__init__()
21 |         mid_channels = nchannels // 2
22 |         self.conv1x1 = Conv1x1BNReLU(in_channels=nchannels, out_channels=mid_channels)
23 |         self.conv3x3 = Conv3x3BNReLU(in_channels=mid_channels, out_channels=nchannels)
24 | 
25 |     def forward(self, x):
26 |         out = self.conv3x3(self.conv1x1(x))
27 |         return out + x
28 | 
29 | class Darknet19(nn.Module):
30 |     def __init__(self, num_classes=1000):
31 |         super(Darknet19, self).__init__()
32 |         self.first_conv = Conv3x3BNReLU(in_channels=3, out_channels=32)
33 | 
34 |         self.block1 = self._make_layers(in_channels=32,out_channels=64, block_num=1)
35 |         self.block2 = self._make_layers(in_channels=64,out_channels=128, block_num=2)
36 |         self.block3 = self._make_layers(in_channels=128,out_channels=256, block_num=8)
37 |         self.block4 = self._make_layers(in_channels=256,out_channels=512, block_num=8)
38 |         self.block5 = self._make_layers(in_channels=512,out_channels=1024, block_num=4)
39 | 
40 |         self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=1)
41 |         self.linear = nn.Linear(in_features=1024,out_features=num_classes)
42 |         self.softmax = nn.Softmax(dim=1)
43 | 
44 |     def _make_layers(self, in_channels,out_channels, block_num):
45 |         _layers = []
46 |         _layers.append(Conv3x3BNReLU(in_channels=in_channels, out_channels=out_channels, stride=2))
47 |         for _ in range(block_num):
48 |             _layers.append(Residual(nchannels=out_channels))
49 |         return nn.Sequential(*_layers)
50 | 
51 |     def forward(self, x):
52 |         x = self.first_conv(x)
53 |         x = self.block1(x)
54 |         x = self.block2(x)
55 |         x = self.block3(x)
56 |         x = self.block4(x)
57 |         x = self.block5(x)
58 | 
59 |         x = self.avg_pool(x)
60 |         x = x.view(x.size(0),-1)
61 |         x = self.linear(x)
62 |         out = self.softmax(x)
63 |         return out
64 | 
65 | if __name__ == '__main__':
66 |     model = Darknet19()
67 |     print(model)
68 | 
69 |     input = torch.randn(1,3,256,256)
70 |     out = model(input)
71 |     print(out.shape)


--------------------------------------------------------------------------------
/Others/DynamicReLU.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -- coding: utf-8 --
 3 | # @Time : 2020/9/11 13:57
 4 | # @Author : liumin
 5 | # @File : DynamicReLU.py
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torchvision
10 | import torch.nn.functional as F
11 | 
12 | class BatchNorm(nn.Module):
13 |     def forward(self, x):
14 |         return 2 * x - 1
15 | 
16 | 
17 | class DynamicReLU_A(nn.Module):
18 |     def __init__(self, channels, K=2,ratio=6):
19 |         super(DynamicReLU_A, self).__init__()
20 |         mid_channels = 2*K
21 | 
22 |         self.K = K
23 |         self.lambdas = torch.Tensor([1.]*K + [0.5]*K).float()
24 |         self.init_v = torch.Tensor([1.] + [0.]*(2*K - 1)).float()
25 | 
26 |         self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
27 |         self.dynamic = nn.Sequential(
28 |             nn.Linear(in_features=channels,out_features=channels // ratio),
29 |             nn.ReLU(inplace=True),
30 |             nn.Linear(in_features=channels // ratio, out_features=mid_channels),
31 |             nn.Sigmoid(),
32 |             BatchNorm()
33 |         )
34 | 
35 |     def forward(self, x):
36 |         b, c, _, _ = x.size()
37 |         y = self.avg_pool(x).view(b, c)
38 |         z = self.dynamic(y)
39 | 
40 |         relu_coefs = z.view(-1, 2 * self.K) * self.lambdas + self.init_v
41 |         x_perm = x.transpose(0, -1).unsqueeze(-1)
42 |         output = x_perm * relu_coefs[:, :self.K] + relu_coefs[:, self.K:]
43 | 
44 |         output = torch.max(output, dim=-1)[0].transpose(0, -1)
45 |         return output
46 | 
47 | 
48 | class DynamicReLU_B(nn.Module):
49 |     def __init__(self, channels, K=2,ratio=6):
50 |         super(DynamicReLU_B, self).__init__()
51 |         mid_channels = 2*K*channels
52 | 
53 |         self.K = K
54 |         self.channels = channels
55 |         self.lambdas = torch.Tensor([1.]*K + [0.5]*K).float()
56 |         self.init_v = torch.Tensor([1.] + [0.]*(2*K - 1)).float()
57 | 
58 |         self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
59 |         self.dynamic = nn.Sequential(
60 |             nn.Linear(in_features=channels,out_features=channels // ratio),
61 |             nn.ReLU(inplace=True),
62 |             nn.Linear(in_features=channels // ratio, out_features=mid_channels),
63 |             nn.Sigmoid(),
64 |             BatchNorm()
65 |         )
66 | 
67 |     def forward(self, x):
68 |         b, c, _, _ = x.size()
69 |         y = self.avg_pool(x).view(b, c)
70 |         z = self.dynamic(y)
71 | 
72 |         relu_coefs = z.view(-1, self.channels, 2 * self.K) * self.lambdas + self.init_v
73 |         x_perm = x.permute(2, 3, 0, 1).unsqueeze(-1)
74 |         output = x_perm * relu_coefs[:, :, :self.K] + relu_coefs[:, :, self.K:]
75 |         output = torch.max(output, dim=-1)[0].permute(2, 3, 0, 1)
76 |         return output
77 | 
78 | if __name__=='__main__':
79 |     model = DynamicReLU_B(64)
80 |     print(model)
81 | 
82 |     input = torch.randn(1, 64, 56, 56)
83 |     out = model(input)
84 |     print(out.shape)


--------------------------------------------------------------------------------
/Others/PyramidalConvolution.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -- coding: utf-8 --
3 | # @Time : 2020/6/28 13:40
4 | # @Author : liumin
5 | # @File : PyramidalConvolution.py


--------------------------------------------------------------------------------
/PortraitSegmentation/SINet.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shanglianlm0525/PyTorch-Networks/a6b6dd4b7876ba8473a08e116485a0492a88cd48/PortraitSegmentation/SINet.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch-Models
 2 | Pytorch implementation of cnn network
 3 | 
 4 | 
 5 | 
 6 | 
 7 | **Classical network** 
 8 | 
 9 | - **AlexNet:**
10 | 
11 | - **VGG:**
12 | 
13 | - **ResNet:** 
14 | 
15 | - **InceptionV1:**
16 | 
17 | - **InceptionV2 and InceptionV3:**
18 | 
19 | - **InceptionV4 and Inception-ResNet:**
20 | 
21 | 
22 | 
23 | **Lightweight network**
24 | 
25 | - **MobileNets:**
26 | - **MobileNetV2：**
27 | - **MobileNetV3：**
28 | - **ShuffleNet：**
29 | - **ShuffleNet V2:**
30 | - **SqueezeNet**
31 | - **Xception**
32 | - **MixNet**
33 | - **GhostNet**
34 | 
35 | 
36 | 
37 | **ObjectDetection-network**
38 | 
39 | - **SSD:**
40 | - **YOLO:**
41 | - **YOLOv2:**
42 | - **YOLOv3:**
43 | - **FCOS:**
44 | - **FPN:**
45 | - **RetinaNet**
46 | - **Objects as Points:**
47 | - **FSAF:**
48 | - **CenterNet**
49 | - **FoveaBox**
50 | 
51 | 
52 | 
53 | **Semantic Segmentation**
54 | 
55 | - **FCN**
56 | 
57 | - **Fast-SCNN**
58 | 
59 | - **LEDNet:**
60 | 
61 | - **LRNNet**
62 | 
63 | - **FisheyeMODNet:**
64 | 
65 |   
66 | 
67 | **Instance Segmentation** 
68 | 
69 | - **PolarMask** 
70 | 
71 |   
72 | 
73 | **FaceDetectorAndRecognition**
74 | 
75 | - **FaceBoxes**
76 | - **LFFD**
77 | - **VarGFaceNet**
78 | 
79 | 
80 | 
81 | **HumanPoseEstimation**
82 | 
83 | - **Stacked Hourglass Networks**
84 | - **Simple Baselines**
85 | - **LPN**
86 | 
87 | 
88 | 
89 | ## Star History
90 | 
91 | [![Star History Chart](https://api.star-history.com/svg?repos=shanglianlm0525/PyTorch-Networks&type=Date)](https://star-history.com/#shanglianlm0525/PyTorch-Networks&Date)
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/SemanticSegmentation/FCN.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class FCN8s(nn.Module):
 6 |     def __init__(self, num_classes):
 7 |         super(FCN8s, self).__init__()
 8 |         vgg = torchvision.models.vgg16()
 9 | 
10 |         features = list(vgg.features.children())
11 | 
12 |         self.padd = nn.ZeroPad2d([100,100,100,100])
13 | 
14 |         self.pool3 = nn.Sequential(*features[:17])
15 |         self.pool4 = nn.Sequential(*features[17:24])
16 |         self.pool5 = nn.Sequential(*features[24:])
17 | 
18 |         self.pool3_conv1x1 = nn.Conv2d(256, num_classes, kernel_size=1)
19 |         self.pool4_conv1x1 = nn.Conv2d(512, num_classes, kernel_size=1)
20 | 
21 |         self.output5 = nn.Sequential(
22 |             nn.Conv2d(512, 4096, kernel_size=7),
23 |             nn.ReLU(inplace=True),
24 |             nn.Dropout(),
25 |             nn.Conv2d(4096, 4096, kernel_size=1),
26 |             nn.ReLU(inplace=True),
27 |             nn.Dropout(),
28 |             nn.Conv2d(4096, num_classes, kernel_size=1),
29 |         )
30 | 
31 |         self.up_pool3_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=16, stride=8)
32 |         self.up_pool4_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2)
33 |         self.up_pool5_out = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2)
34 | 
35 |     def forward(self, x):
36 |         _,_, w, h = x.size()
37 | 
38 |         x = self.padd(x)
39 |         pool3 = self.pool3(x)
40 |         pool4 = self.pool4(pool3)
41 |         pool5 = self.pool5(pool4)
42 | 
43 |         output5 = self.up_pool5_out(self.output5(pool5))
44 | 
45 |         pool4_out = self.pool4_conv1x1(0.01 * pool4)
46 |         output4 = self.up_pool4_out(pool4_out[:,:,5:(5 + output5.size()[2]) ,5:(5 + output5.size()[3])]+output5)
47 | 
48 |         pool3_out = self.pool3_conv1x1(0.0001 * pool3)
49 |         output3 = self.up_pool3_out(pool3_out[:, :, 9:(9 + output4.size()[2]), 9:(9 + output4.size()[3])] + output4)
50 | 
51 |         out = self.up_pool3_out(output3)
52 | 
53 |         out = out[:, :, 31: (31 + h), 31: (31 + w)].contiguous()
54 |         return out
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     model = FCN8s(num_classes=20)
59 |     print(model)
60 | 
61 |     input = torch.randn(1,3,224,224)
62 |     output = model(input)
63 |     print(output.shape)
64 | 
65 | 


--------------------------------------------------------------------------------
/SemanticSegmentation/FisheyeMODNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups):
  5 |     return nn.Sequential(
  6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1,groups=groups),
  7 |             nn.BatchNorm2d(out_channels),
  8 |             nn.ReLU6(inplace=True)
  9 |         )
 10 | 
 11 | def Conv1x1BNReLU(in_channels,out_channels,groups):
 12 |     return nn.Sequential(
 13 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 14 |             nn.BatchNorm2d(out_channels),
 15 |             nn.ReLU6(inplace=True)
 16 |         )
 17 | 
 18 | def Conv1x1BN(in_channels,out_channels,groups):
 19 |     return nn.Sequential(
 20 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1,groups=groups),
 21 |             nn.BatchNorm2d(out_channels)
 22 |         )
 23 | 
 24 | class ChannelShuffle(nn.Module):
 25 |     def __init__(self, groups):
 26 |         super(ChannelShuffle, self).__init__()
 27 |         self.groups = groups
 28 | 
 29 |     def forward(self, x):
 30 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 31 |         N, C, H, W = x.size()
 32 |         g = self.groups
 33 |         return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)
 34 | 
 35 | 
 36 | class ShuffleNetUnits(nn.Module):
 37 |     def __init__(self, in_channels, out_channels, stride, groups):
 38 |         super(ShuffleNetUnits, self).__init__()
 39 |         self.stride = stride
 40 |         out_channels = out_channels - in_channels if self.stride>1 else out_channels
 41 |         mid_channels = out_channels // 4
 42 | 
 43 |         self.bottleneck = nn.Sequential(
 44 |             Conv1x1BNReLU(in_channels, mid_channels,groups),
 45 |             ChannelShuffle(groups),
 46 |             Conv3x3BNReLU(mid_channels, mid_channels, stride,groups),
 47 |             Conv1x1BN(mid_channels, out_channels,groups)
 48 |         )
 49 |         if self.stride>1:
 50 |             self.shortcut = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 51 | 
 52 |         self.relu = nn.ReLU6(inplace=True)
 53 | 
 54 |     def forward(self, x):
 55 |         out = self.bottleneck(x)
 56 |         out = torch.cat([self.shortcut(x), out], dim=1) if self.stride > 1 else (out + x)
 57 |         return self.relu(out)
 58 | 
 59 | class FisheyeMODNet(nn.Module):
 60 |     def __init__(self, groups=1, num_classes=2):
 61 |         super(FisheyeMODNet, self).__init__()
 62 |         layers = [4, 8, 4]
 63 | 
 64 |         self.stage1a = nn.Sequential(
 65 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3,stride=2, padding=1),
 66 |             nn.MaxPool2d(kernel_size=2,stride=2),
 67 |         )
 68 |         self.stage2a = self._make_layer(24, 120, groups, layers[0])
 69 | 
 70 |         self.stage1b = nn.Sequential(
 71 |             nn.Conv2d(in_channels=3, out_channels=24, kernel_size=3, stride=2, padding=1),
 72 |             nn.MaxPool2d(kernel_size=2, stride=2),
 73 |         )
 74 |         self.stage2b = self._make_layer(24, 120, groups, layers[0])
 75 | 
 76 |         self.stage3 = self._make_layer(240, 480, groups, layers[1])
 77 |         self.stage4 = self._make_layer(480, 960, groups, layers[2])
 78 | 
 79 |         self.adapt_conv3 = nn.Conv2d(960, num_classes, kernel_size=1)
 80 |         self.adapt_conv2 = nn.Conv2d(480, num_classes, kernel_size=1)
 81 |         self.adapt_conv1 = nn.Conv2d(240, num_classes, kernel_size=1)
 82 | 
 83 |         self.up_sampling3 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
 84 |         self.up_sampling2 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=4, stride=2, padding=1)
 85 |         self.up_sampling1 = nn.ConvTranspose2d(in_channels=num_classes, out_channels=num_classes, kernel_size=16, stride=8, padding=4)
 86 | 
 87 |         self.softmax  = nn.Softmax(dim=1)
 88 | 
 89 |         self.init_params()
 90 | 
 91 |     def _make_layer(self, in_channels, out_channels, groups, block_num):
 92 |         layers = []
 93 |         layers.append(ShuffleNetUnits(in_channels=in_channels, out_channels=out_channels, stride=2, groups=groups))
 94 |         for idx in range(1, block_num):
 95 |             layers.append(ShuffleNetUnits(in_channels=out_channels, out_channels=out_channels, stride=1, groups=groups))
 96 |         return nn.Sequential(*layers)
 97 | 
 98 |     def init_params(self):
 99 |         for m in self.modules():
100 |             if isinstance(m, nn.Conv2d):
101 |                 nn.init.kaiming_normal_(m.weight)
102 |                 nn.init.constant_(m.bias, 0)
103 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.Linear):
104 |                 nn.init.constant_(m.weight, 1)
105 |                 nn.init.constant_(m.bias, 0)
106 | 
107 |     def forward(self, x, y):
108 |         x = self.stage2a(self.stage1a(x))
109 |         y = self.stage2b(self.stage1b(y))
110 |         feature1 = torch.cat([x, y], dim=1)
111 |         feature2 = self.stage3(feature1)
112 |         feature3 = self.stage4(feature2)
113 | 
114 |         out3 = self.up_sampling3(self.adapt_conv3(feature3))
115 |         out2 = self.up_sampling2(self.adapt_conv2(feature2) + out3)
116 |         out1 = self.up_sampling1(self.adapt_conv1(feature1) + out2)
117 | 
118 |         out = self.softmax(out1)
119 |         return out
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     model = FisheyeMODNet()
124 | 
125 |     input1 = torch.randn(1, 3, 640, 640)
126 |     input2 = torch.randn(1, 3, 640, 640)
127 | 
128 |     out = model(input1, input2)
129 |     print(out.shape)


--------------------------------------------------------------------------------
/SemanticSegmentation/ICNet.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/10/28 16:41
  4 | # @Author : liumin
  5 | # @File : ICNet.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torchvision
 11 | 
 12 | __all__ = ["ICNet"]
 13 | 
 14 | 
 15 | def Conv1x1BN(in_channels,out_channels):
 16 |     return nn.Sequential(
 17 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 18 |             nn.BatchNorm2d(out_channels)
 19 |         )
 20 | 
 21 | def Conv1x1BNReLU(in_channels,out_channels):
 22 |     return nn.Sequential(
 23 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 24 |             nn.BatchNorm2d(out_channels),
 25 |             nn.ReLU(inplace=True)
 26 |         )
 27 | 
 28 | 
 29 | def Conv3x3BN(in_channels,out_channels,stride,dilation=1):
 30 |     return nn.Sequential(
 31 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=dilation,dilation=dilation, bias=False),
 32 |             nn.BatchNorm2d(out_channels)
 33 |         )
 34 | 
 35 | def Conv3x3BNReLU(in_channels,out_channels,stride,dilation=1):
 36 |     return nn.Sequential(
 37 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=dilation,dilation=dilation, bias=False),
 38 |             nn.BatchNorm2d(out_channels),
 39 |             nn.ReLU(inplace=True)
 40 |         )
 41 | 
 42 | 
 43 | class CascadeFeatureFusion(nn.Module):
 44 |     def __init__(self,low_channels, high_channels, out_channels, num_classes):
 45 |         super(CascadeFeatureFusion, self).__init__()
 46 | 
 47 |         self.conv_low = Conv3x3BNReLU(low_channels,out_channels,1,dilation=2)
 48 |         self.conv_high = Conv3x3BNReLU(high_channels,out_channels,1,dilation=1)
 49 |         self.relu = nn.ReLU(inplace=True)
 50 |         self.conv_low_cls = nn.Conv2d(out_channels, num_classes, 1, bias=False)
 51 | 
 52 |     def forward(self, x_low, x_high):
 53 |         x_low = F.interpolate(x_low, size=x_high.size()[2:], mode='bilinear', align_corners=True)
 54 |         x_low = self.conv_low(x_low)
 55 |         x_high = self.conv_high(x_high)
 56 |         out = self.relu(x_low + x_high)
 57 |         x_low_cls = self.conv_low_cls(x_low)
 58 |         return out, x_low_cls
 59 | 
 60 | 
 61 | class Backbone(nn.Module):
 62 |     def __init__(self, pyramids=[1,2,3,6]):
 63 |         super(Backbone, self).__init__()
 64 |         self.pretrained = torchvision.models.resnet50(pretrained=True)
 65 | 
 66 |     def forward(self, x):
 67 |         x = self.pretrained.conv1(x)
 68 |         x = self.pretrained.bn1(x)
 69 |         x = self.pretrained.relu(x)
 70 |         x = self.pretrained.maxpool(x)
 71 |         c1 = self.pretrained.layer1(x)
 72 |         c2 = self.pretrained.layer2(c1)
 73 |         c3 = self.pretrained.layer3(c2)
 74 |         c4 = self.pretrained.layer4(c3)
 75 |         return c1, c2, c3, c4
 76 | 
 77 | class PyramidPoolingModule(nn.Module):
 78 |     def __init__(self, pyramids=[1,2,3,6]):
 79 |         super(PyramidPoolingModule, self).__init__()
 80 |         self.pyramids = pyramids
 81 | 
 82 |     def forward(self, x):
 83 |         feat = x
 84 |         height, width = x.shape[2:]
 85 |         for bin_size in self.pyramids:
 86 |             feat_x = F.adaptive_avg_pool2d(x, output_size=bin_size)
 87 |             feat_x = F.interpolate(feat_x, size=(height, width), mode='bilinear', align_corners=True)
 88 |             feat  = feat + feat_x
 89 |         return feat
 90 | 
 91 | 
 92 | class ICNet(nn.Module):
 93 |     def __init__(self, num_classes):
 94 |         super(ICNet, self).__init__()
 95 | 
 96 |         self.conv_sub1 = nn.Sequential(
 97 |             Conv3x3BNReLU(3, 32, 2),
 98 |             Conv3x3BNReLU(32, 32, 2),
 99 |             Conv3x3BNReLU(32, 64, 2)
100 |         )
101 |         self.backbone = Backbone()
102 |         self.ppm = PyramidPoolingModule()
103 | 
104 |         self.cff_12 = CascadeFeatureFusion(128, 64, 128, num_classes)
105 |         self.cff_24 = CascadeFeatureFusion(2048, 512, 128, num_classes)
106 | 
107 |         self.conv_cls = nn.Conv2d(128, num_classes, 1, bias=False)
108 | 
109 |     def forward(self, x):
110 |         # sub 1
111 |         x_sub1 = self.conv_sub1(x)
112 |         # sub 2
113 |         x_sub2 = F.interpolate(x, scale_factor=0.5, mode='bilinear')
114 |         _, x_sub2, _, _ = self.backbone(x_sub2)
115 |         # sub 4
116 |         x_sub4 = F.interpolate(x, scale_factor=0.25, mode='bilinear')
117 |         _, _, _, x_sub4 = self.backbone(x_sub4)
118 | 
119 |         # add PyramidPoolingModule
120 |         x_sub4 = self.ppm(x_sub4)
121 | 
122 |         outs = list()
123 |         x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2)
124 |         outs.append(x_24_cls)
125 |         # x_cff_12, x_12_cls = self.cff_12(x_sub2, x_sub1)
126 |         x_cff_12, x_12_cls = self.cff_12(x_cff_24, x_sub1)
127 |         outs.append(x_12_cls)
128 | 
129 |         up_x2 = F.interpolate(x_cff_12, scale_factor=2, mode='bilinear')
130 |         up_x2 = self.conv_cls(up_x2)
131 |         outs.append(up_x2)
132 |         up_x8 = F.interpolate(up_x2, scale_factor=4, mode='bilinear')
133 |         outs.append(up_x8)
134 |         # 1 -> 1/4 -> 1/8 -> 1/16
135 |         outs.reverse()
136 | 
137 |         return outs
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     model = ICNet(num_classes=19)
142 |     print(model)
143 | 
144 |     input = torch.randn(1,3,512,512)
145 |     output = model(input)
146 |     print(output[0].shape)
147 |     print(output[1].shape)
148 |     print(output[2].shape)
149 |     print(output[3].shape)


--------------------------------------------------------------------------------
/SemanticSegmentation/LRNnet.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -- coding: utf-8 --
3 | # @Time : 2020/6/8 15:31
4 | # @Author : liumin
5 | # @File : LRNnet.py


--------------------------------------------------------------------------------
/SemanticSegmentation/LWnet.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/6/28 18:04
  4 | # @Author : liumin
  5 | # @File : LWnet.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torchvision
 10 | import torch.nn.functional as F
 11 | 
 12 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding,dilation=1,groups=1):
 13 |     return nn.Sequential(
 14 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding,dilation=dilation,groups=groups, bias=False),
 15 |             nn.BatchNorm2d(out_channels),
 16 |             nn.ReLU6(inplace=True)
 17 |         )
 18 | 
 19 | 
 20 | def ConvBN(in_channels,out_channels,kernel_size,stride,padding,dilation=1,groups=1):
 21 |     return nn.Sequential(
 22 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding,dilation=dilation,groups=groups, bias=False),
 23 |             nn.BatchNorm2d(out_channels)
 24 |         )
 25 | 
 26 | 
 27 | def Conv1x1BNReLU(in_channels,out_channels):
 28 |     return nn.Sequential(
 29 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 30 |             nn.BatchNorm2d(out_channels),
 31 |             nn.ReLU6(inplace=True)
 32 |         )
 33 | 
 34 | 
 35 | def Conv1x1BN(in_channels,out_channels):
 36 |     return nn.Sequential(
 37 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, bias=False),
 38 |             nn.BatchNorm2d(out_channels)
 39 |         )
 40 | 
 41 | class LWbottleneck(nn.Module):
 42 |     def __init__(self, in_channels,out_channels,stride):
 43 |         super(LWbottleneck, self).__init__()
 44 |         self.stride = stride
 45 |         self.pyramid_list = nn.ModuleList()
 46 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[5,1], stride=stride, padding=[2,0]))
 47 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,5], stride=stride, padding=[0,2]))
 48 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[3,1], stride=stride, padding=[1,0]))
 49 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,3], stride=stride, padding=[0,1]))
 50 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[2,1], stride=stride, padding=[1,0]))
 51 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=[1,2], stride=stride, padding=[0,1]))
 52 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=2, stride=stride, padding=1))
 53 |         self.pyramid_list.append(ConvBNReLU(in_channels, in_channels, kernel_size=3, stride=stride, padding=1))
 54 | 
 55 |         self.shrink = Conv1x1BN(in_channels*8,out_channels)
 56 | 
 57 |     def forward(self, x):
 58 |         b,c,w,h = x.shape
 59 |         if self.stride>1:
 60 |             w, h = w//self.stride,h//self.stride
 61 |         outputs = []
 62 |         for pyconv in self.pyramid_list:
 63 |             pyconv_x = pyconv(x)
 64 |             if x.shape[2:] != pyconv_x.shape[2:]:
 65 |                 pyconv_x = pyconv_x[:,:,:w,:h]
 66 |             outputs.append(pyconv_x)
 67 |         out = torch.cat(outputs, 1)
 68 |         return self.shrink(out)
 69 | 
 70 | 
 71 | class Encoder(nn.Module):
 72 |     def __init__(self):
 73 |         super(Encoder, self).__init__()
 74 | 
 75 |         self.stage1 = nn.Sequential(
 76 |             ConvBNReLU(in_channels=3, out_channels=32, kernel_size=3, stride=2, padding=1),
 77 |             Conv1x1BN(in_channels=32, out_channels=16),
 78 |         )
 79 |         self.stage2 = nn.Sequential(
 80 |             LWbottleneck(in_channels=16,out_channels=24,stride=2),
 81 |             LWbottleneck(in_channels=24, out_channels=24, stride=1),
 82 |         )
 83 |         self.stage3 = nn.Sequential(
 84 |             LWbottleneck(in_channels=24, out_channels=32, stride=2),
 85 |             LWbottleneck(in_channels=32, out_channels=32, stride=1),
 86 |         )
 87 |         self.stage4 = nn.Sequential(
 88 |             LWbottleneck(in_channels=32, out_channels=32, stride=2)
 89 |         )
 90 |         self.stage5 = nn.Sequential(
 91 |             LWbottleneck(in_channels=32, out_channels=64, stride=2),
 92 |             LWbottleneck(in_channels=64, out_channels=64, stride=1),
 93 |             LWbottleneck(in_channels=64, out_channels=64, stride=1),
 94 |             LWbottleneck(in_channels=64, out_channels=64, stride=1),
 95 |         )
 96 | 
 97 |         self.conv1 = Conv1x1BN(in_channels=64, out_channels=320)
 98 | 
 99 |     def forward(self, x):
100 |         x = self.stage1(x)
101 |         x = self.stage2(x)
102 |         x = F.pad(x,pad=(0,1,0,1),mode='constant',value=0)
103 |         out1 = x = self.stage3(x)
104 |         x = self.stage4(x)
105 |         x = F.pad(x, pad=(0, 1, 0, 1), mode='constant', value=0)
106 |         x = self.stage5(x)
107 |         out2 = self.conv1(x)
108 |         return out1,out2
109 | 
110 | class ASPP(nn.Module):
111 |     def __init__(self, in_channels, out_channels):
112 |         super(ASPP, self).__init__()
113 |         self.depthwise1 = ConvBNReLU(in_channels, out_channels, 3, 1, 6, dilation=6)
114 |         self.depthwise2 = ConvBNReLU(in_channels, out_channels, 3, 1, 12, dilation=12)
115 |         self.depthwise3 = ConvBNReLU(in_channels, out_channels, 3, 1, 18, dilation=18)
116 |         self.pointconv = Conv1x1BN(in_channels, out_channels)
117 | 
118 |     def forward(self, x):
119 |         x1 = self.depthwise1(x)
120 |         x2 = self.depthwise2(x)
121 |         x3 = self.depthwise3(x)
122 |         x4 = self.pointconv(x)
123 |         return torch.cat([x1,x2,x3,x4], dim=1)
124 | 
125 | class Decoder(nn.Module):
126 |     def __init__(self,num_classes=2):
127 |         super(Decoder, self).__init__()
128 |         self.aspp = ASPP(320, 128)
129 |         self.pconv1 = Conv1x1BN(128*4, 512)
130 | 
131 |         self.pconv2 = Conv1x1BN(512+32, 128)
132 |         self.pconv3 = Conv1x1BN(128, num_classes)
133 | 
134 |     def forward(self, x, y):
135 |         x = self.pconv1(self.aspp(x))
136 |         x = F.interpolate(x,y.shape[2:],align_corners=True,mode='bilinear')
137 |         x = torch.cat([x,y], dim=1)
138 |         out = self.pconv3(self.pconv2(x))
139 |         return out
140 | 
141 | class LW_Network(nn.Module):
142 |     def __init__(self, num_classes=2):
143 |         super(LW_Network, self).__init__()
144 |         self.encoder = Encoder()
145 |         self.decoder = Decoder(num_classes)
146 |     def forward(self, x):
147 |         x1,x2 = self.encoder(x)
148 |         out = self.decoder(x2,x1)
149 |         return out
150 | 
151 | 
152 | 
153 | if __name__ == '__main__':
154 |     model = LW_Network()
155 |     print(model)
156 | 
157 |     input = torch.randn(1, 3, 331, 331)
158 |     output = model(input)
159 |     print(output.shape)


--------------------------------------------------------------------------------
/SemanticSegmentation/README.md:
--------------------------------------------------------------------------------
 1 | # SemanticSegmentation-network
 2 | pytorch implemention of SemanticSegmentation-network
 3 | 
 4 | 
 5 | 
 6 | **FCN:**
 7 | Fully Convolutional Networks for Semantic Segmentation 
 8 | 
 9 | https://arxiv.org/pdf/1411.4038.pdf
10 | 
11 | 
12 | 
13 | **Fast-SCNN:**
14 | 
15 | Fast-SCNN: Fast Semantic Segmentation Network
16 | 
17 | https://arxiv.org/pdf/1902.04502.pdf
18 | 
19 | 
20 | 
21 | **LEDNet:** 
22 | 
23 | LEDNet: A Lightweight Encoder-Decoder Network for Real-time Semantic Segmentation
24 | 
25 | https://arxiv.org/pdf/1905.02423.pdf
26 | 
27 | 
28 | 
29 | **LRNNet:**
30 | 
31 | LRNNet: A Light-Weighted Network with Efficient Reduced Non-Local Operation for Real-Time Semantic Segmentation
32 | 
33 | https://arxiv.org/pdf/2006.02706.pdf
34 | 
35 | 
36 | 
37 | **FisheyeMODNet:**
38 | 
39 | FisheyeMODNet: Moving Object detection on Surround-view Cameras for Autonomous Driving (2019)
40 | 
41 | https://arxiv.org/pdf/1908.11789v1.pdf


--------------------------------------------------------------------------------
/SemanticSegmentation/SegNet.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/10/13 8:52
  4 | # @Author : liumin
  5 | # @File : segnet.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from torchsummary import summary
 11 | 
 12 | 
 13 | __all__ = ["SegNet"]
 14 | 
 15 | 
 16 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1):
 17 |     return nn.Sequential(
 18 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
 19 |             nn.BatchNorm2d(out_channels),
 20 |             nn.ReLU(inplace=True)
 21 |         )
 22 | 
 23 | class DoubleConv(nn.Module):
 24 |     """(convolution => [BN] => ReLU) * 2"""
 25 |     def __init__(self, in_channels, out_channels, reverse=False):
 26 |         super().__init__()
 27 |         if reverse:
 28 |             self.double_conv = nn.Sequential(
 29 |                 Conv3x3BNReLU(in_channels, in_channels, stride=1),
 30 |                 Conv3x3BNReLU(in_channels, out_channels, stride=1)
 31 |             )
 32 |         else:
 33 |             self.double_conv = nn.Sequential(
 34 |                 Conv3x3BNReLU(in_channels, out_channels,stride=1),
 35 |                 Conv3x3BNReLU(out_channels, out_channels, stride=1)
 36 |             )
 37 | 
 38 |     def forward(self, x):
 39 |         return self.double_conv(x)
 40 | 
 41 | 
 42 | class TripleConv(nn.Module):
 43 |     """(convolution => [BN] => ReLU) * 3"""
 44 |     def __init__(self, in_channels, out_channels, reverse=False):
 45 |         super().__init__()
 46 |         if reverse:
 47 |             self.triple_conv = nn.Sequential(
 48 |                 Conv3x3BNReLU(in_channels, in_channels, stride=1),
 49 |                 Conv3x3BNReLU(in_channels, in_channels, stride=1),
 50 |                 Conv3x3BNReLU(in_channels, out_channels, stride=1)
 51 |             )
 52 |         else:
 53 |             self.triple_conv = nn.Sequential(
 54 |                 Conv3x3BNReLU(in_channels, out_channels,stride=1),
 55 |                 Conv3x3BNReLU(out_channels, out_channels, stride=1),
 56 |                 Conv3x3BNReLU(out_channels, out_channels, stride=1)
 57 |             )
 58 | 
 59 |     def forward(self, x):
 60 |         return self.triple_conv(x)
 61 | 
 62 | 
 63 | class SegNet(nn.Module):
 64 |     """
 65 |         SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation
 66 |         https://arxiv.org/pdf/1511.00561.pdf
 67 |     """
 68 |     def __init__(self,classes= 19):
 69 |         super(SegNet, self).__init__()
 70 | 
 71 |         self.conv_down1 = DoubleConv(3, 64)
 72 |         self.conv_down2 = DoubleConv(64, 128)
 73 |         self.conv_down3 = TripleConv(128, 256)
 74 |         self.conv_down4 = TripleConv(256, 512)
 75 |         self.conv_down5 = TripleConv(512, 512)
 76 | 
 77 |         self.conv_up5 = TripleConv(512, 512, reverse=True)
 78 |         self.conv_up4 = TripleConv(512, 256, reverse=True)
 79 |         self.conv_up3 = TripleConv(256, 128, reverse=True)
 80 |         self.conv_up2 = DoubleConv(128, 64, reverse=True)
 81 |         self.conv_up1 = Conv3x3BNReLU(64, 64, stride=1)
 82 | 
 83 |         self.outconv = nn.Conv2d(64, classes, kernel_size=3, padding=1)
 84 | 
 85 |     def forward(self, x):
 86 | 
 87 |         # Stage 1
 88 |         x1 = self.conv_down1(x)
 89 |         x1_size = x1.size()
 90 |         x1p, id1 = F.max_pool2d(x1, kernel_size=2, stride=2, return_indices=True)
 91 | 
 92 |         # Stage 2
 93 |         x2 = self.conv_down2(x1p)
 94 |         x2_size = x2.size()
 95 |         x2p, id2 = F.max_pool2d(x2, kernel_size=2, stride=2, return_indices=True)
 96 | 
 97 |         # Stage 3
 98 |         x3 = self.conv_down3(x2p)
 99 |         x3_size = x3.size()
100 |         x3p, id3 = F.max_pool2d(x3, kernel_size=2, stride=2, return_indices=True)
101 | 
102 |         # Stage 4
103 |         x4 = self.conv_down4(x3p)
104 |         x4_size = x4.size()
105 |         x4p, id4 = F.max_pool2d(x4, kernel_size=2, stride=2, return_indices=True)
106 | 
107 |         # Stage 5
108 |         x5 = self.conv_down5(x4p)
109 |         x5_size = x5.size()
110 |         x5p, id5 = F.max_pool2d(x5, kernel_size=2, stride=2, return_indices=True)
111 | 
112 |         # Stage 5d
113 |         x5d = F.max_unpool2d(x5p, id5, kernel_size=2, stride=2, output_size=x5_size)
114 |         x5d = self.conv_up5(x5d)
115 | 
116 |         # Stage 4d
117 |         x4d = F.max_unpool2d(x5d, id4, kernel_size=2, stride=2, output_size=x4_size)
118 |         x4d = self.conv_up4(x4d)
119 | 
120 |         # Stage 3d
121 |         x3d = F.max_unpool2d(x4d, id3, kernel_size=2, stride=2, output_size=x3_size)
122 |         x3d = self.conv_up3(x3d)
123 | 
124 |         # Stage 2d
125 |         x2d = F.max_unpool2d(x3d, id2, kernel_size=2, stride=2, output_size=x2_size)
126 |         x2d = self.conv_up2(x2d)
127 | 
128 |         # Stage 1d
129 |         x1d = F.max_unpool2d(x2d, id1, kernel_size=2, stride=2, output_size=x1_size)
130 |         x1d = self.conv_up1(x1d)
131 | 
132 |         out = self.outconv(x1d)
133 | 
134 |         return out
135 | 
136 | 
137 | 
138 | """print layers and params of network"""
139 | if __name__ == '__main__':
140 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
141 |     model = SegNet(classes=19).to(device)
142 |     summary(model,(3,800,600))


--------------------------------------------------------------------------------
/SemanticSegmentation/Unet.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | # -- coding: utf-8 --
  3 | # @Time : 2020/7/8 13:51
  4 | # @Author : liumin
  5 | # @File : unet.py
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | def Conv3x3BNReLU(in_channels,out_channels,stride,groups=1):
 13 |     return nn.Sequential(
 14 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, groups=groups),
 15 |             nn.BatchNorm2d(out_channels),
 16 |             nn.ReLU(inplace=True)
 17 |         )
 18 | 
 19 | 
 20 | def Conv1x1BNReLU(in_channels,out_channels):
 21 |     return nn.Sequential(
 22 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 23 |             nn.BatchNorm2d(out_channels),
 24 |             nn.ReLU(inplace=True)
 25 |         )
 26 | 
 27 | 
 28 | def Conv1x1BN(in_channels,out_channels):
 29 |     return nn.Sequential(
 30 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
 31 |             nn.BatchNorm2d(out_channels)
 32 |         )
 33 | 
 34 | 
 35 | class DoubleConv(nn.Module):
 36 |     """(convolution => [BN] => ReLU) * 2"""
 37 |     def __init__(self, in_channels, out_channels):
 38 |         super().__init__()
 39 |         self.double_conv = nn.Sequential(
 40 |             Conv3x3BNReLU(in_channels, out_channels,stride=1),
 41 |             Conv3x3BNReLU(out_channels, out_channels, stride=1)
 42 |         )
 43 | 
 44 |     def forward(self, x):
 45 |         return self.double_conv(x)
 46 | 
 47 | 
 48 | class DownConv(nn.Module):
 49 |     """(convolution => [BN] => ReLU) * 2"""
 50 |     def __init__(self, in_channels, out_channels,stride=2):
 51 |         super().__init__()
 52 |         self.pool = nn.MaxPool2d(kernel_size=2,stride=stride)
 53 |         self.double_conv = DoubleConv(in_channels, out_channels)
 54 | 
 55 |     def forward(self, x):
 56 |         return self.pool(self.double_conv(x))
 57 | 
 58 | 
 59 | class UpConv(nn.Module):
 60 |     def __init__(self, in_channels, out_channels,bilinear=True):
 61 |         super().__init__()
 62 |         self.reduce = Conv1x1BNReLU(in_channels, in_channels//2)
 63 |         # if bilinear, use the normal convolutions to reduce the number of channels
 64 |         if bilinear:
 65 |             self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
 66 |         else:
 67 |             self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)
 68 |         self.conv = DoubleConv(in_channels, out_channels)
 69 | 
 70 |     def forward(self, x1, x2):
 71 |         x1 = self.up(self.reduce(x1))
 72 |         _, channel1, height1, width1 = x1.size()
 73 |         _, channel2, height2, width2 = x2.size()
 74 | 
 75 |         # input is CHW
 76 |         diffY = height2 - height1
 77 |         diffX = width2 - width1
 78 | 
 79 |         x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
 80 |         x = torch.cat([x2, x1], dim=1)
 81 |         return self.conv(x)
 82 | 
 83 | 
 84 | class UNet(nn.Module):
 85 |     def __init__(self, num_classes):
 86 |         super(UNet, self).__init__()
 87 |         bilinear = True
 88 | 
 89 |         self.conv = DoubleConv(3, 64)
 90 |         self.down1 = DownConv(64, 128)
 91 |         self.down2 = DownConv(128, 256)
 92 |         self.down3 = DownConv(256, 512)
 93 |         self.down4 = DownConv(512, 1024)
 94 |         self.up1 = UpConv(1024, 512, bilinear)
 95 |         self.up2 = UpConv(512, 256, bilinear)
 96 |         self.up3 = UpConv(256, 128, bilinear)
 97 |         self.up4 = UpConv(128, 64, bilinear)
 98 |         self.outconv = nn.Conv2d(64, num_classes, kernel_size=1)
 99 | 
100 |     def forward(self, x):
101 |         x1 = self.conv(x)
102 |         x2 = self.down1(x1)
103 |         x3 = self.down2(x2)
104 |         x4 = self.down3(x3)
105 |         x5 = self.down4(x4)
106 |         xx = self.up1(x5, x4)
107 |         xx = self.up2(xx, x3)
108 |         xx = self.up3(xx, x2)
109 |         xx = self.up4(xx, x1)
110 |         outputs = self.outconv(xx)
111 |         return outputs
112 | 
113 | 
114 | if __name__ =='__main__':
115 |     model = UNet(19)
116 |     print(model)
117 | 
118 |     input = torch.randn(1,3,572,572)
119 |     out = model(input)
120 |     print(out.shape)


--------------------------------------------------------------------------------
/Utils/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | def Conv3x3BNReLU(in_channels,out_channels,stride,padding=1):
 5 |     return nn.Sequential(
 6 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1),
 7 |             nn.BatchNorm2d(out_channels),
 8 |             nn.ReLU6(inplace=True)
 9 |         )
10 | 
11 | def Conv1x1BNReLU(in_channels,out_channels):
12 |     return nn.Sequential(
13 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0),
14 |             nn.BatchNorm2d(out_channels),
15 |             nn.ReLU6(inplace=True)
16 |         )
17 | 
18 | def ConvBNReLU(in_channels,out_channels,kernel_size,stride,padding=1):
19 |     return nn.Sequential(
20 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
21 |             nn.BatchNorm2d(out_channels),
22 |             nn.ReLU6(inplace=True)
23 |         )
24 | 
25 | def ConvBN(in_channels,out_channels,kernel_size,stride,padding=1):
26 |     return nn.Sequential(
27 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
28 |             nn.BatchNorm2d(out_channels)
29 |         )
30 | 
31 | class ResidualBlock(nn.Module):
32 |     def __init__(self, in_channels, out_channels):
33 |         super(ResidualBlock, self).__init__()
34 |         mid_channels = out_channels//2
35 | 
36 |         self.bottleneck = nn.Sequential(
37 |             ConvBNReLU(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1),
38 |             ConvBNReLU(in_channels=mid_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1),
39 |             ConvBNReLU(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1),
40 |         )
41 |         self.shortcut = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1)
42 | 
43 |     def forward(self, x):
44 |         out = self.bottleneck(x)
45 |         return out+self.shortcut(x)


--------------------------------------------------------------------------------