├── README.md
├── fpn.py
└── retina_fpn.py


/README.md:
--------------------------------------------------------------------------------
1 | # PyTorch-FPN  
2 | _Feature Pyramid Networks_ in PyTorch.
3 | 
4 | References:  
5 | [1] [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)  
6 | [2] [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)  
7 | 


--------------------------------------------------------------------------------
/fpn.py:
--------------------------------------------------------------------------------
  1 | '''FPN in PyTorch.
  2 | 
  3 | See the paper "Feature Pyramid Networks for Object Detection" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class Bottleneck(nn.Module):
 13 |     expansion = 4
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(Bottleneck, self).__init__()
 17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 22 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 23 | 
 24 |         self.shortcut = nn.Sequential()
 25 |         if stride != 1 or in_planes != self.expansion*planes:
 26 |             self.shortcut = nn.Sequential(
 27 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = F.relu(self.bn2(self.conv2(out)))
 34 |         out = self.bn3(self.conv3(out))
 35 |         out += self.shortcut(x)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class FPN(nn.Module):
 41 |     def __init__(self, block, num_blocks):
 42 |         super(FPN, self).__init__()
 43 |         self.in_planes = 64
 44 | 
 45 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 46 |         self.bn1 = nn.BatchNorm2d(64)
 47 | 
 48 |         # Bottom-up layers
 49 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 50 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 51 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 52 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 53 | 
 54 |         # Top layer
 55 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels
 56 | 
 57 |         # Smooth layers
 58 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 59 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 60 |         self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 61 | 
 62 |         # Lateral layers
 63 |         self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 64 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
 65 |         self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0)
 66 | 
 67 |     def _make_layer(self, block, planes, num_blocks, stride):
 68 |         strides = [stride] + [1]*(num_blocks-1)
 69 |         layers = []
 70 |         for stride in strides:
 71 |             layers.append(block(self.in_planes, planes, stride))
 72 |             self.in_planes = planes * block.expansion
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def _upsample_add(self, x, y):
 76 |         '''Upsample and add two feature maps.
 77 | 
 78 |         Args:
 79 |           x: (Variable) top feature map to be upsampled.
 80 |           y: (Variable) lateral feature map.
 81 | 
 82 |         Returns:
 83 |           (Variable) added feature map.
 84 | 
 85 |         Note in PyTorch, when input size is odd, the upsampled feature map
 86 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
 87 |         maybe not equal to the lateral feature map size.
 88 | 
 89 |         e.g.
 90 |         original input size: [N,_,15,15] ->
 91 |         conv2d feature map size: [N,_,8,8] ->
 92 |         upsampled feature map size: [N,_,16,16]
 93 | 
 94 |         So we choose bilinear upsample which supports arbitrary output sizes.
 95 |         '''
 96 |         _,_,H,W = y.size()
 97 |         return F.upsample(x, size=(H,W), mode='bilinear') + y
 98 | 
 99 |     def forward(self, x):
100 |         # Bottom-up
101 |         c1 = F.relu(self.bn1(self.conv1(x)))
102 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
103 |         c2 = self.layer1(c1)
104 |         c3 = self.layer2(c2)
105 |         c4 = self.layer3(c3)
106 |         c5 = self.layer4(c4)
107 |         # Top-down
108 |         p5 = self.toplayer(c5)
109 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
110 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
111 |         p2 = self._upsample_add(p3, self.latlayer3(c2))
112 |         # Smooth
113 |         p4 = self.smooth1(p4)
114 |         p3 = self.smooth2(p3)
115 |         p2 = self.smooth3(p2)
116 |         return p2, p3, p4, p5
117 | 
118 | 
119 | def FPN101():
120 |     # return FPN(Bottleneck, [2,4,23,3])
121 |     return FPN(Bottleneck, [2,2,2,2])
122 | 
123 | 
124 | def test():
125 |     net = FPN101()
126 |     fms = net(Variable(torch.randn(1,3,600,900)))
127 |     for fm in fms:
128 |         print(fm.size())
129 | 
130 | test()
131 | 


--------------------------------------------------------------------------------
/retina_fpn.py:
--------------------------------------------------------------------------------
  1 | '''RetinaFPN in PyTorch.
  2 | 
  3 | See the paper "Focal Loss for Dense Object Detection" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class Bottleneck(nn.Module):
 13 |     expansion = 4
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(Bottleneck, self).__init__()
 17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 22 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 23 | 
 24 |         self.shortcut = nn.Sequential()
 25 |         if stride != 1 or in_planes != self.expansion*planes:
 26 |             self.shortcut = nn.Sequential(
 27 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = F.relu(self.bn2(self.conv2(out)))
 34 |         out = self.bn3(self.conv3(out))
 35 |         out += self.shortcut(x)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class RetinaFPN(nn.Module):
 41 |     def __init__(self, block, num_blocks):
 42 |         super(RetinaFPN, self).__init__()
 43 |         self.in_planes = 64
 44 | 
 45 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
 46 |         self.bn1 = nn.BatchNorm2d(64)
 47 | 
 48 |         # Bottom-up layers
 49 |         self.layer2 = self._make_layer(block,  64, num_blocks[0], stride=1)
 50 |         self.layer3 = self._make_layer(block, 128, num_blocks[1], stride=2)
 51 |         self.layer4 = self._make_layer(block, 256, num_blocks[2], stride=2)
 52 |         self.layer5 = self._make_layer(block, 512, num_blocks[3], stride=2)
 53 |         self.conv6 = nn.Conv2d(2048, 256, kernel_size=3, stride=2, padding=1)
 54 |         self.conv7 = nn.Conv2d( 256, 256, kernel_size=3, stride=2, padding=1)
 55 | 
 56 |         # Top layer
 57 |         self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0)  # Reduce channels
 58 | 
 59 |         # Smooth layers
 60 |         self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 61 |         self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
 62 | 
 63 |         # Lateral layers
 64 |         self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
 65 |         self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0)
 66 | 
 67 |     def _make_layer(self, block, planes, num_blocks, stride):
 68 |         strides = [stride] + [1]*(num_blocks-1)
 69 |         layers = []
 70 |         for stride in strides:
 71 |             layers.append(block(self.in_planes, planes, stride))
 72 |             self.in_planes = planes * block.expansion
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def _upsample_add(self, x, y):
 76 |         '''Upsample and add two feature maps.
 77 | 
 78 |         Args:
 79 |           x: (Variable) top feature map to be upsampled.
 80 |           y: (Variable) lateral feature map.
 81 | 
 82 |         Returns:
 83 |           (Variable) added feature map.
 84 | 
 85 |         Note in PyTorch, when input size is odd, the upsampled feature map
 86 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
 87 |         maybe not equal to the lateral feature map size.
 88 | 
 89 |         e.g.
 90 |         original input size: [N,_,15,15] ->
 91 |         conv2d feature map size: [N,_,8,8] ->
 92 |         upsampled feature map size: [N,_,16,16]
 93 | 
 94 |         So we choose bilinear upsample which supports arbitrary output sizes.
 95 |         '''
 96 |         _,_,H,W = y.size()
 97 |         return F.upsample(x, size=(H,W), mode='bilinear') + y
 98 | 
 99 |     def forward(self, x):
100 |         # Bottom-up
101 |         c1 = F.relu(self.bn1(self.conv1(x)))
102 |         c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
103 |         c2 = self.layer2(c1)
104 |         c3 = self.layer3(c2)
105 |         c4 = self.layer4(c3)
106 |         c5 = self.layer5(c4)
107 |         p6 = self.conv6(c5)
108 |         p7 = self.conv7(F.relu(p6))
109 |         # Top-down
110 |         p5 = self.toplayer(c5)
111 |         p4 = self._upsample_add(p5, self.latlayer1(c4))
112 |         p3 = self._upsample_add(p4, self.latlayer2(c3))
113 |         # Smooth
114 |         p4 = self.smooth1(p4)
115 |         p3 = self.smooth2(p3)
116 |         return p3, p4, p5, p6, p7
117 | 
118 | 
119 | def RetinaFPN101():
120 |     # return RetinaFPN(Bottleneck, [2,4,23,3])
121 |     return RetinaFPN(Bottleneck, [2,2,2,2])
122 | 
123 | 
124 | def test():
125 |     net = RetinaFPN101()
126 |     fms = net(Variable(torch.randn(1,3,600,900)))
127 |     for fm in fms:
128 |         print(fm.size())
129 | 
130 | test()
131 | 


--------------------------------------------------------------------------------