├── models
    ├── __init__.py
    ├── lenet.py
    ├── vgg.py
    ├── mobilenet.py
    ├── mobilenetv2.py
    ├── googlenet.py
    ├── resnext.py
    ├── dpn.py
    ├── densenet.py
    ├── shufflenet.py
    ├── senet.py
    ├── preact_resnet.py
    ├── dla_simple.py
    ├── pnasnet.py
    ├── resnet.py
    ├── dla.py
    ├── regnet.py
    ├── shufflenetv2.py
    └── efficientnet.py
├── LICENSE
├── README.md
├── utils.py
└── main.py


/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vgg import *
 2 | from .dpn import *
 3 | from .lenet import *
 4 | from .senet import *
 5 | from .pnasnet import *
 6 | from .densenet import *
 7 | from .googlenet import *
 8 | from .shufflenet import *
 9 | from .shufflenetv2 import *
10 | from .resnet import *
11 | from .resnext import *
12 | from .preact_resnet import *
13 | from .mobilenet import *
14 | from .mobilenetv2 import *
15 | from .efficientnet import *
16 | from .regnet import *
17 | from .dla_simple import *
18 | from .dla import *
19 | 


--------------------------------------------------------------------------------
/models/lenet.py:
--------------------------------------------------------------------------------
 1 | '''LeNet in PyTorch.'''
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LeNet(nn.Module):
 6 |     def __init__(self):
 7 |         super(LeNet, self).__init__()
 8 |         self.conv1 = nn.Conv2d(3, 6, 5)
 9 |         self.conv2 = nn.Conv2d(6, 16, 5)
10 |         self.fc1   = nn.Linear(16*5*5, 120)
11 |         self.fc2   = nn.Linear(120, 84)
12 |         self.fc3   = nn.Linear(84, 10)
13 | 
14 |     def forward(self, x):
15 |         out = F.relu(self.conv1(x))
16 |         out = F.max_pool2d(out, 2)
17 |         out = F.relu(self.conv2(out))
18 |         out = F.max_pool2d(out, 2)
19 |         out = out.view(out.size(0), -1)
20 |         out = F.relu(self.fc1(out))
21 |         out = F.relu(self.fc2(out))
22 |         out = self.fc3(out)
23 |         return out
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 liukuang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Train CIFAR10 with PyTorch
 2 | 
 3 | I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
 4 | 
 5 | ## Prerequisites
 6 | - Python 3.6+
 7 | - PyTorch 1.0+
 8 | 
 9 | ## Training
10 | ```
11 | # Start training with: 
12 | python main.py
13 | 
14 | # You can manually resume the training with: 
15 | python main.py --resume --lr=0.01
16 | ```
17 | 
18 | ## Accuracy
19 | | Model             | Acc.        |
20 | | ----------------- | ----------- |
21 | | [VGG16](https://arxiv.org/abs/1409.1556)              | 92.64%      |
22 | | [ResNet18](https://arxiv.org/abs/1512.03385)          | 93.02%      |
23 | | [ResNet50](https://arxiv.org/abs/1512.03385)          | 93.62%      |
24 | | [ResNet101](https://arxiv.org/abs/1512.03385)         | 93.75%      |
25 | | [RegNetX_200MF](https://arxiv.org/abs/2003.13678)     | 94.24%      |
26 | | [RegNetY_400MF](https://arxiv.org/abs/2003.13678)     | 94.29%      |
27 | | [MobileNetV2](https://arxiv.org/abs/1801.04381)       | 94.43%      |
28 | | [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431)  | 94.73%      |
29 | | [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431)  | 94.82%      |
30 | | [SimpleDLA](https://arxiv.org/abs/1707.064)           | 94.89%      |
31 | | [DenseNet121](https://arxiv.org/abs/1608.06993)       | 95.04%      |
32 | | [PreActResNet18](https://arxiv.org/abs/1603.05027)    | 95.11%      |
33 | | [DPN92](https://arxiv.org/abs/1707.01629)             | 95.16%      |
34 | | [DLA](https://arxiv.org/pdf/1707.06484.pdf)           | 95.47%      |
35 | 
36 | 


--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | cfg = {
 7 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 8 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 9 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
10 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
11 | }
12 | 
13 | 
14 | class VGG(nn.Module):
15 |     def __init__(self, vgg_name):
16 |         super(VGG, self).__init__()
17 |         self.features = self._make_layers(cfg[vgg_name])
18 |         self.classifier = nn.Linear(512, 10)
19 | 
20 |     def forward(self, x):
21 |         out = self.features(x)
22 |         out = out.view(out.size(0), -1)
23 |         out = self.classifier(out)
24 |         return out
25 | 
26 |     def _make_layers(self, cfg):
27 |         layers = []
28 |         in_channels = 3
29 |         for x in cfg:
30 |             if x == 'M':
31 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
32 |             else:
33 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
34 |                            nn.BatchNorm2d(x),
35 |                            nn.ReLU(inplace=True)]
36 |                 in_channels = x
37 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
38 |         return nn.Sequential(*layers)
39 | 
40 | 
41 | def test():
42 |     net = VGG('VGG11')
43 |     x = torch.randn(2,3,32,32)
44 |     y = net(x)
45 |     print(y.size())
46 | 
47 | # test()
48 | 


--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
 1 | '''MobileNet in PyTorch.
 2 | 
 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
 4 | for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class Block(nn.Module):
12 |     '''Depthwise conv + Pointwise conv'''
13 |     def __init__(self, in_planes, out_planes, stride=1):
14 |         super(Block, self).__init__()
15 |         self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
16 |         self.bn1 = nn.BatchNorm2d(in_planes)
17 |         self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
18 |         self.bn2 = nn.BatchNorm2d(out_planes)
19 | 
20 |     def forward(self, x):
21 |         out = F.relu(self.bn1(self.conv1(x)))
22 |         out = F.relu(self.bn2(self.conv2(out)))
23 |         return out
24 | 
25 | 
26 | class MobileNet(nn.Module):
27 |     # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
28 |     cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
29 | 
30 |     def __init__(self, num_classes=10):
31 |         super(MobileNet, self).__init__()
32 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
33 |         self.bn1 = nn.BatchNorm2d(32)
34 |         self.layers = self._make_layers(in_planes=32)
35 |         self.linear = nn.Linear(1024, num_classes)
36 | 
37 |     def _make_layers(self, in_planes):
38 |         layers = []
39 |         for x in self.cfg:
40 |             out_planes = x if isinstance(x, int) else x[0]
41 |             stride = 1 if isinstance(x, int) else x[1]
42 |             layers.append(Block(in_planes, out_planes, stride))
43 |             in_planes = out_planes
44 |         return nn.Sequential(*layers)
45 | 
46 |     def forward(self, x):
47 |         out = F.relu(self.bn1(self.conv1(x)))
48 |         out = self.layers(out)
49 |         out = F.avg_pool2d(out, 2)
50 |         out = out.view(out.size(0), -1)
51 |         out = self.linear(out)
52 |         return out
53 | 
54 | 
55 | def test():
56 |     net = MobileNet()
57 |     x = torch.randn(1,3,32,32)
58 |     y = net(x)
59 |     print(y.size())
60 | 
61 | # test()
62 | 


--------------------------------------------------------------------------------
/models/mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | '''MobileNetV2 in PyTorch.
 2 | 
 3 | See the paper "Inverted Residuals and Linear Bottlenecks:
 4 | Mobile Networks for Classification, Detection and Segmentation" for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class Block(nn.Module):
12 |     '''expand + depthwise + pointwise'''
13 |     def __init__(self, in_planes, out_planes, expansion, stride):
14 |         super(Block, self).__init__()
15 |         self.stride = stride
16 | 
17 |         planes = expansion * in_planes
18 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
19 |         self.bn1 = nn.BatchNorm2d(planes)
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
21 |         self.bn2 = nn.BatchNorm2d(planes)
22 |         self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
23 |         self.bn3 = nn.BatchNorm2d(out_planes)
24 | 
25 |         self.shortcut = nn.Sequential()
26 |         if stride == 1 and in_planes != out_planes:
27 |             self.shortcut = nn.Sequential(
28 |                 nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
29 |                 nn.BatchNorm2d(out_planes),
30 |             )
31 | 
32 |     def forward(self, x):
33 |         out = F.relu(self.bn1(self.conv1(x)))
34 |         out = F.relu(self.bn2(self.conv2(out)))
35 |         out = self.bn3(self.conv3(out))
36 |         out = out + self.shortcut(x) if self.stride==1 else out
37 |         return out
38 | 
39 | 
40 | class MobileNetV2(nn.Module):
41 |     # (expansion, out_planes, num_blocks, stride)
42 |     cfg = [(1,  16, 1, 1),
43 |            (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
44 |            (6,  32, 3, 2),
45 |            (6,  64, 4, 2),
46 |            (6,  96, 3, 1),
47 |            (6, 160, 3, 2),
48 |            (6, 320, 1, 1)]
49 | 
50 |     def __init__(self, num_classes=10):
51 |         super(MobileNetV2, self).__init__()
52 |         # NOTE: change conv1 stride 2 -> 1 for CIFAR10
53 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
54 |         self.bn1 = nn.BatchNorm2d(32)
55 |         self.layers = self._make_layers(in_planes=32)
56 |         self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
57 |         self.bn2 = nn.BatchNorm2d(1280)
58 |         self.linear = nn.Linear(1280, num_classes)
59 | 
60 |     def _make_layers(self, in_planes):
61 |         layers = []
62 |         for expansion, out_planes, num_blocks, stride in self.cfg:
63 |             strides = [stride] + [1]*(num_blocks-1)
64 |             for stride in strides:
65 |                 layers.append(Block(in_planes, out_planes, expansion, stride))
66 |                 in_planes = out_planes
67 |         return nn.Sequential(*layers)
68 | 
69 |     def forward(self, x):
70 |         out = F.relu(self.bn1(self.conv1(x)))
71 |         out = self.layers(out)
72 |         out = F.relu(self.bn2(self.conv2(out)))
73 |         # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
74 |         out = F.avg_pool2d(out, 4)
75 |         out = out.view(out.size(0), -1)
76 |         out = self.linear(out)
77 |         return out
78 | 
79 | 
80 | def test():
81 |     net = MobileNetV2()
82 |     x = torch.randn(2,3,32,32)
83 |     y = net(x)
84 |     print(y.size())
85 | 
86 | # test()
87 | 


--------------------------------------------------------------------------------
/models/googlenet.py:
--------------------------------------------------------------------------------
  1 | '''GoogLeNet with PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class Inception(nn.Module):
  8 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
  9 |         super(Inception, self).__init__()
 10 |         # 1x1 conv branch
 11 |         self.b1 = nn.Sequential(
 12 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
 13 |             nn.BatchNorm2d(n1x1),
 14 |             nn.ReLU(True),
 15 |         )
 16 | 
 17 |         # 1x1 conv -> 3x3 conv branch
 18 |         self.b2 = nn.Sequential(
 19 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
 20 |             nn.BatchNorm2d(n3x3red),
 21 |             nn.ReLU(True),
 22 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
 23 |             nn.BatchNorm2d(n3x3),
 24 |             nn.ReLU(True),
 25 |         )
 26 | 
 27 |         # 1x1 conv -> 5x5 conv branch
 28 |         self.b3 = nn.Sequential(
 29 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
 30 |             nn.BatchNorm2d(n5x5red),
 31 |             nn.ReLU(True),
 32 |             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
 33 |             nn.BatchNorm2d(n5x5),
 34 |             nn.ReLU(True),
 35 |             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
 36 |             nn.BatchNorm2d(n5x5),
 37 |             nn.ReLU(True),
 38 |         )
 39 | 
 40 |         # 3x3 pool -> 1x1 conv branch
 41 |         self.b4 = nn.Sequential(
 42 |             nn.MaxPool2d(3, stride=1, padding=1),
 43 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
 44 |             nn.BatchNorm2d(pool_planes),
 45 |             nn.ReLU(True),
 46 |         )
 47 | 
 48 |     def forward(self, x):
 49 |         y1 = self.b1(x)
 50 |         y2 = self.b2(x)
 51 |         y3 = self.b3(x)
 52 |         y4 = self.b4(x)
 53 |         return torch.cat([y1,y2,y3,y4], 1)
 54 | 
 55 | 
 56 | class GoogLeNet(nn.Module):
 57 |     def __init__(self):
 58 |         super(GoogLeNet, self).__init__()
 59 |         self.pre_layers = nn.Sequential(
 60 |             nn.Conv2d(3, 192, kernel_size=3, padding=1),
 61 |             nn.BatchNorm2d(192),
 62 |             nn.ReLU(True),
 63 |         )
 64 | 
 65 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
 66 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 67 | 
 68 |         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 69 | 
 70 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
 71 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
 72 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
 73 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
 74 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 75 | 
 76 |         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
 77 |         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 78 | 
 79 |         self.avgpool = nn.AvgPool2d(8, stride=1)
 80 |         self.linear = nn.Linear(1024, 10)
 81 | 
 82 |     def forward(self, x):
 83 |         out = self.pre_layers(x)
 84 |         out = self.a3(out)
 85 |         out = self.b3(out)
 86 |         out = self.maxpool(out)
 87 |         out = self.a4(out)
 88 |         out = self.b4(out)
 89 |         out = self.c4(out)
 90 |         out = self.d4(out)
 91 |         out = self.e4(out)
 92 |         out = self.maxpool(out)
 93 |         out = self.a5(out)
 94 |         out = self.b5(out)
 95 |         out = self.avgpool(out)
 96 |         out = out.view(out.size(0), -1)
 97 |         out = self.linear(out)
 98 |         return out
 99 | 
100 | 
101 | def test():
102 |     net = GoogLeNet()
103 |     x = torch.randn(1,3,32,32)
104 |     y = net(x)
105 |     print(y.size())
106 | 
107 | # test()
108 | 


--------------------------------------------------------------------------------
/models/resnext.py:
--------------------------------------------------------------------------------
 1 | '''ResNeXt in PyTorch.
 2 | 
 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | class Block(nn.Module):
11 |     '''Grouped convolution block.'''
12 |     expansion = 2
13 | 
14 |     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
15 |         super(Block, self).__init__()
16 |         group_width = cardinality * bottleneck_width
17 |         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(group_width)
19 |         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
20 |         self.bn2 = nn.BatchNorm2d(group_width)
21 |         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
22 |         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
23 | 
24 |         self.shortcut = nn.Sequential()
25 |         if stride != 1 or in_planes != self.expansion*group_width:
26 |             self.shortcut = nn.Sequential(
27 |                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
28 |                 nn.BatchNorm2d(self.expansion*group_width)
29 |             )
30 | 
31 |     def forward(self, x):
32 |         out = F.relu(self.bn1(self.conv1(x)))
33 |         out = F.relu(self.bn2(self.conv2(out)))
34 |         out = self.bn3(self.conv3(out))
35 |         out += self.shortcut(x)
36 |         out = F.relu(out)
37 |         return out
38 | 
39 | 
40 | class ResNeXt(nn.Module):
41 |     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
42 |         super(ResNeXt, self).__init__()
43 |         self.cardinality = cardinality
44 |         self.bottleneck_width = bottleneck_width
45 |         self.in_planes = 64
46 | 
47 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
48 |         self.bn1 = nn.BatchNorm2d(64)
49 |         self.layer1 = self._make_layer(num_blocks[0], 1)
50 |         self.layer2 = self._make_layer(num_blocks[1], 2)
51 |         self.layer3 = self._make_layer(num_blocks[2], 2)
52 |         # self.layer4 = self._make_layer(num_blocks[3], 2)
53 |         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
54 | 
55 |     def _make_layer(self, num_blocks, stride):
56 |         strides = [stride] + [1]*(num_blocks-1)
57 |         layers = []
58 |         for stride in strides:
59 |             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
60 |             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
61 |         # Increase bottleneck_width by 2 after each stage.
62 |         self.bottleneck_width *= 2
63 |         return nn.Sequential(*layers)
64 | 
65 |     def forward(self, x):
66 |         out = F.relu(self.bn1(self.conv1(x)))
67 |         out = self.layer1(out)
68 |         out = self.layer2(out)
69 |         out = self.layer3(out)
70 |         # out = self.layer4(out)
71 |         out = F.avg_pool2d(out, 8)
72 |         out = out.view(out.size(0), -1)
73 |         out = self.linear(out)
74 |         return out
75 | 
76 | 
77 | def ResNeXt29_2x64d():
78 |     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
79 | 
80 | def ResNeXt29_4x64d():
81 |     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
82 | 
83 | def ResNeXt29_8x64d():
84 |     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
85 | 
86 | def ResNeXt29_32x4d():
87 |     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
88 | 
89 | def test_resnext():
90 |     net = ResNeXt29_2x64d()
91 |     x = torch.randn(1,3,32,32)
92 |     y = net(x)
93 |     print(y.size())
94 | 
95 | # test_resnext()
96 | 


--------------------------------------------------------------------------------
/models/dpn.py:
--------------------------------------------------------------------------------
 1 | '''Dual Path Networks in PyTorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Bottleneck(nn.Module):
 8 |     def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
 9 |         super(Bottleneck, self).__init__()
10 |         self.out_planes = out_planes
11 |         self.dense_depth = dense_depth
12 | 
13 |         self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
14 |         self.bn1 = nn.BatchNorm2d(in_planes)
15 |         self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
16 |         self.bn2 = nn.BatchNorm2d(in_planes)
17 |         self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
18 |         self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
19 | 
20 |         self.shortcut = nn.Sequential()
21 |         if first_layer:
22 |             self.shortcut = nn.Sequential(
23 |                 nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
24 |                 nn.BatchNorm2d(out_planes+dense_depth)
25 |             )
26 | 
27 |     def forward(self, x):
28 |         out = F.relu(self.bn1(self.conv1(x)))
29 |         out = F.relu(self.bn2(self.conv2(out)))
30 |         out = self.bn3(self.conv3(out))
31 |         x = self.shortcut(x)
32 |         d = self.out_planes
33 |         out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
34 |         out = F.relu(out)
35 |         return out
36 | 
37 | 
38 | class DPN(nn.Module):
39 |     def __init__(self, cfg):
40 |         super(DPN, self).__init__()
41 |         in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
42 |         num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
43 | 
44 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
45 |         self.bn1 = nn.BatchNorm2d(64)
46 |         self.last_planes = 64
47 |         self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
48 |         self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
49 |         self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
50 |         self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
51 |         self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
52 | 
53 |     def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
54 |         strides = [stride] + [1]*(num_blocks-1)
55 |         layers = []
56 |         for i,stride in enumerate(strides):
57 |             layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
58 |             self.last_planes = out_planes + (i+2) * dense_depth
59 |         return nn.Sequential(*layers)
60 | 
61 |     def forward(self, x):
62 |         out = F.relu(self.bn1(self.conv1(x)))
63 |         out = self.layer1(out)
64 |         out = self.layer2(out)
65 |         out = self.layer3(out)
66 |         out = self.layer4(out)
67 |         out = F.avg_pool2d(out, 4)
68 |         out = out.view(out.size(0), -1)
69 |         out = self.linear(out)
70 |         return out
71 | 
72 | 
73 | def DPN26():
74 |     cfg = {
75 |         'in_planes': (96,192,384,768),
76 |         'out_planes': (256,512,1024,2048),
77 |         'num_blocks': (2,2,2,2),
78 |         'dense_depth': (16,32,24,128)
79 |     }
80 |     return DPN(cfg)
81 | 
82 | def DPN92():
83 |     cfg = {
84 |         'in_planes': (96,192,384,768),
85 |         'out_planes': (256,512,1024,2048),
86 |         'num_blocks': (3,4,20,3),
87 |         'dense_depth': (16,32,24,128)
88 |     }
89 |     return DPN(cfg)
90 | 
91 | 
92 | def test():
93 |     net = DPN92()
94 |     x = torch.randn(1,3,32,32)
95 |     y = net(x)
96 |     print(y)
97 | 
98 | # test()
99 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | '''Some helper functions for PyTorch, including:
  2 |     - get_mean_and_std: calculate the mean and std value of dataset.
  3 |     - msr_init: net parameter initialization.
  4 |     - progress_bar: progress bar mimic xlua.progress.
  5 | '''
  6 | import os
  7 | import sys
  8 | import time
  9 | import math
 10 | 
 11 | import torch.nn as nn
 12 | import torch.nn.init as init
 13 | 
 14 | 
 15 | def get_mean_and_std(dataset):
 16 |     '''Compute the mean and std value of dataset.'''
 17 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
 18 |     mean = torch.zeros(3)
 19 |     std = torch.zeros(3)
 20 |     print('==> Computing mean and std..')
 21 |     for inputs, targets in dataloader:
 22 |         for i in range(3):
 23 |             mean[i] += inputs[:,i,:,:].mean()
 24 |             std[i] += inputs[:,i,:,:].std()
 25 |     mean.div_(len(dataset))
 26 |     std.div_(len(dataset))
 27 |     return mean, std
 28 | 
 29 | def init_params(net):
 30 |     '''Init layer parameters.'''
 31 |     for m in net.modules():
 32 |         if isinstance(m, nn.Conv2d):
 33 |             init.kaiming_normal(m.weight, mode='fan_out')
 34 |             if m.bias:
 35 |                 init.constant(m.bias, 0)
 36 |         elif isinstance(m, nn.BatchNorm2d):
 37 |             init.constant(m.weight, 1)
 38 |             init.constant(m.bias, 0)
 39 |         elif isinstance(m, nn.Linear):
 40 |             init.normal(m.weight, std=1e-3)
 41 |             if m.bias:
 42 |                 init.constant(m.bias, 0)
 43 | 
 44 | 
 45 | _, term_width = os.popen('stty size', 'r').read().split()
 46 | term_width = int(term_width)
 47 | 
 48 | TOTAL_BAR_LENGTH = 65.
 49 | last_time = time.time()
 50 | begin_time = last_time
 51 | def progress_bar(current, total, msg=None):
 52 |     global last_time, begin_time
 53 |     if current == 0:
 54 |         begin_time = time.time()  # Reset for new bar.
 55 | 
 56 |     cur_len = int(TOTAL_BAR_LENGTH*current/total)
 57 |     rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
 58 | 
 59 |     sys.stdout.write(' [')
 60 |     for i in range(cur_len):
 61 |         sys.stdout.write('=')
 62 |     sys.stdout.write('>')
 63 |     for i in range(rest_len):
 64 |         sys.stdout.write('.')
 65 |     sys.stdout.write(']')
 66 | 
 67 |     cur_time = time.time()
 68 |     step_time = cur_time - last_time
 69 |     last_time = cur_time
 70 |     tot_time = cur_time - begin_time
 71 | 
 72 |     L = []
 73 |     L.append('  Step: %s' % format_time(step_time))
 74 |     L.append(' | Tot: %s' % format_time(tot_time))
 75 |     if msg:
 76 |         L.append(' | ' + msg)
 77 | 
 78 |     msg = ''.join(L)
 79 |     sys.stdout.write(msg)
 80 |     for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
 81 |         sys.stdout.write(' ')
 82 | 
 83 |     # Go back to the center of the bar.
 84 |     for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
 85 |         sys.stdout.write('\b')
 86 |     sys.stdout.write(' %d/%d ' % (current+1, total))
 87 | 
 88 |     if current < total-1:
 89 |         sys.stdout.write('\r')
 90 |     else:
 91 |         sys.stdout.write('\n')
 92 |     sys.stdout.flush()
 93 | 
 94 | def format_time(seconds):
 95 |     days = int(seconds / 3600/24)
 96 |     seconds = seconds - days*3600*24
 97 |     hours = int(seconds / 3600)
 98 |     seconds = seconds - hours*3600
 99 |     minutes = int(seconds / 60)
100 |     seconds = seconds - minutes*60
101 |     secondsf = int(seconds)
102 |     seconds = seconds - secondsf
103 |     millis = int(seconds*1000)
104 | 
105 |     f = ''
106 |     i = 1
107 |     if days > 0:
108 |         f += str(days) + 'D'
109 |         i += 1
110 |     if hours > 0 and i <= 2:
111 |         f += str(hours) + 'h'
112 |         i += 1
113 |     if minutes > 0 and i <= 2:
114 |         f += str(minutes) + 'm'
115 |         i += 1
116 |     if secondsf > 0 and i <= 2:
117 |         f += str(secondsf) + 's'
118 |         i += 1
119 |     if millis > 0 and i <= 2:
120 |         f += str(millis) + 'ms'
121 |         i += 1
122 |     if f == '':
123 |         f = '0ms'
124 |     return f
125 | 


--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     def __init__(self, in_planes, growth_rate):
 11 |         super(Bottleneck, self).__init__()
 12 |         self.bn1 = nn.BatchNorm2d(in_planes)
 13 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 14 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 15 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 16 | 
 17 |     def forward(self, x):
 18 |         out = self.conv1(F.relu(self.bn1(x)))
 19 |         out = self.conv2(F.relu(self.bn2(out)))
 20 |         out = torch.cat([out,x], 1)
 21 |         return out
 22 | 
 23 | 
 24 | class Transition(nn.Module):
 25 |     def __init__(self, in_planes, out_planes):
 26 |         super(Transition, self).__init__()
 27 |         self.bn = nn.BatchNorm2d(in_planes)
 28 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         out = self.conv(F.relu(self.bn(x)))
 32 |         out = F.avg_pool2d(out, 2)
 33 |         return out
 34 | 
 35 | 
 36 | class DenseNet(nn.Module):
 37 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
 38 |         super(DenseNet, self).__init__()
 39 |         self.growth_rate = growth_rate
 40 | 
 41 |         num_planes = 2*growth_rate
 42 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
 43 | 
 44 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 45 |         num_planes += nblocks[0]*growth_rate
 46 |         out_planes = int(math.floor(num_planes*reduction))
 47 |         self.trans1 = Transition(num_planes, out_planes)
 48 |         num_planes = out_planes
 49 | 
 50 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 51 |         num_planes += nblocks[1]*growth_rate
 52 |         out_planes = int(math.floor(num_planes*reduction))
 53 |         self.trans2 = Transition(num_planes, out_planes)
 54 |         num_planes = out_planes
 55 | 
 56 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 57 |         num_planes += nblocks[2]*growth_rate
 58 |         out_planes = int(math.floor(num_planes*reduction))
 59 |         self.trans3 = Transition(num_planes, out_planes)
 60 |         num_planes = out_planes
 61 | 
 62 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 63 |         num_planes += nblocks[3]*growth_rate
 64 | 
 65 |         self.bn = nn.BatchNorm2d(num_planes)
 66 |         self.linear = nn.Linear(num_planes, num_classes)
 67 | 
 68 |     def _make_dense_layers(self, block, in_planes, nblock):
 69 |         layers = []
 70 |         for i in range(nblock):
 71 |             layers.append(block(in_planes, self.growth_rate))
 72 |             in_planes += self.growth_rate
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x):
 76 |         out = self.conv1(x)
 77 |         out = self.trans1(self.dense1(out))
 78 |         out = self.trans2(self.dense2(out))
 79 |         out = self.trans3(self.dense3(out))
 80 |         out = self.dense4(out)
 81 |         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
 82 |         out = out.view(out.size(0), -1)
 83 |         out = self.linear(out)
 84 |         return out
 85 | 
 86 | def DenseNet121():
 87 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
 88 | 
 89 | def DenseNet169():
 90 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
 91 | 
 92 | def DenseNet201():
 93 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
 94 | 
 95 | def DenseNet161():
 96 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
 97 | 
 98 | def densenet_cifar():
 99 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
100 | 
101 | def test():
102 |     net = densenet_cifar()
103 |     x = torch.randn(1,3,32,32)
104 |     y = net(x)
105 |     print(y)
106 | 
107 | # test()
108 | 


--------------------------------------------------------------------------------
/models/shufflenet.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNet in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class ShuffleBlock(nn.Module):
 11 |     def __init__(self, groups):
 12 |         super(ShuffleBlock, self).__init__()
 13 |         self.groups = groups
 14 | 
 15 |     def forward(self, x):
 16 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 17 |         N,C,H,W = x.size()
 18 |         g = self.groups
 19 |         return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
 20 | 
 21 | 
 22 | class Bottleneck(nn.Module):
 23 |     def __init__(self, in_planes, out_planes, stride, groups):
 24 |         super(Bottleneck, self).__init__()
 25 |         self.stride = stride
 26 | 
 27 |         mid_planes = out_planes/4
 28 |         g = 1 if in_planes==24 else groups
 29 |         self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
 30 |         self.bn1 = nn.BatchNorm2d(mid_planes)
 31 |         self.shuffle1 = ShuffleBlock(groups=g)
 32 |         self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
 33 |         self.bn2 = nn.BatchNorm2d(mid_planes)
 34 |         self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
 35 |         self.bn3 = nn.BatchNorm2d(out_planes)
 36 | 
 37 |         self.shortcut = nn.Sequential()
 38 |         if stride == 2:
 39 |             self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
 40 | 
 41 |     def forward(self, x):
 42 |         out = F.relu(self.bn1(self.conv1(x)))
 43 |         out = self.shuffle1(out)
 44 |         out = F.relu(self.bn2(self.conv2(out)))
 45 |         out = self.bn3(self.conv3(out))
 46 |         res = self.shortcut(x)
 47 |         out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
 48 |         return out
 49 | 
 50 | 
 51 | class ShuffleNet(nn.Module):
 52 |     def __init__(self, cfg):
 53 |         super(ShuffleNet, self).__init__()
 54 |         out_planes = cfg['out_planes']
 55 |         num_blocks = cfg['num_blocks']
 56 |         groups = cfg['groups']
 57 | 
 58 |         self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
 59 |         self.bn1 = nn.BatchNorm2d(24)
 60 |         self.in_planes = 24
 61 |         self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
 62 |         self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
 63 |         self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
 64 |         self.linear = nn.Linear(out_planes[2], 10)
 65 | 
 66 |     def _make_layer(self, out_planes, num_blocks, groups):
 67 |         layers = []
 68 |         for i in range(num_blocks):
 69 |             stride = 2 if i == 0 else 1
 70 |             cat_planes = self.in_planes if i == 0 else 0
 71 |             layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
 72 |             self.in_planes = out_planes
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x):
 76 |         out = F.relu(self.bn1(self.conv1(x)))
 77 |         out = self.layer1(out)
 78 |         out = self.layer2(out)
 79 |         out = self.layer3(out)
 80 |         out = F.avg_pool2d(out, 4)
 81 |         out = out.view(out.size(0), -1)
 82 |         out = self.linear(out)
 83 |         return out
 84 | 
 85 | 
 86 | def ShuffleNetG2():
 87 |     cfg = {
 88 |         'out_planes': [200,400,800],
 89 |         'num_blocks': [4,8,4],
 90 |         'groups': 2
 91 |     }
 92 |     return ShuffleNet(cfg)
 93 | 
 94 | def ShuffleNetG3():
 95 |     cfg = {
 96 |         'out_planes': [240,480,960],
 97 |         'num_blocks': [4,8,4],
 98 |         'groups': 3
 99 |     }
100 |     return ShuffleNet(cfg)
101 | 
102 | 
103 | def test():
104 |     net = ShuffleNetG2()
105 |     x = torch.randn(1,3,32,32)
106 |     y = net(x)
107 |     print(y)
108 | 
109 | # test()
110 | 


--------------------------------------------------------------------------------
/models/senet.py:
--------------------------------------------------------------------------------
  1 | '''SENet in PyTorch.
  2 | 
  3 | SENet is the winner of ImageNet-2017. The paper is not released yet.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class BasicBlock(nn.Module):
 11 |     def __init__(self, in_planes, planes, stride=1):
 12 |         super(BasicBlock, self).__init__()
 13 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(planes)
 15 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(planes)
 17 | 
 18 |         self.shortcut = nn.Sequential()
 19 |         if stride != 1 or in_planes != planes:
 20 |             self.shortcut = nn.Sequential(
 21 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
 22 |                 nn.BatchNorm2d(planes)
 23 |             )
 24 | 
 25 |         # SE layers
 26 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
 27 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 28 | 
 29 |     def forward(self, x):
 30 |         out = F.relu(self.bn1(self.conv1(x)))
 31 |         out = self.bn2(self.conv2(out))
 32 | 
 33 |         # Squeeze
 34 |         w = F.avg_pool2d(out, out.size(2))
 35 |         w = F.relu(self.fc1(w))
 36 |         w = F.sigmoid(self.fc2(w))
 37 |         # Excitation
 38 |         out = out * w  # New broadcasting feature from v0.2!
 39 | 
 40 |         out += self.shortcut(x)
 41 |         out = F.relu(out)
 42 |         return out
 43 | 
 44 | 
 45 | class PreActBlock(nn.Module):
 46 |     def __init__(self, in_planes, planes, stride=1):
 47 |         super(PreActBlock, self).__init__()
 48 |         self.bn1 = nn.BatchNorm2d(in_planes)
 49 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 50 |         self.bn2 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 52 | 
 53 |         if stride != 1 or in_planes != planes:
 54 |             self.shortcut = nn.Sequential(
 55 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
 56 |             )
 57 | 
 58 |         # SE layers
 59 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
 60 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 61 | 
 62 |     def forward(self, x):
 63 |         out = F.relu(self.bn1(x))
 64 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 65 |         out = self.conv1(out)
 66 |         out = self.conv2(F.relu(self.bn2(out)))
 67 | 
 68 |         # Squeeze
 69 |         w = F.avg_pool2d(out, out.size(2))
 70 |         w = F.relu(self.fc1(w))
 71 |         w = F.sigmoid(self.fc2(w))
 72 |         # Excitation
 73 |         out = out * w
 74 | 
 75 |         out += shortcut
 76 |         return out
 77 | 
 78 | 
 79 | class SENet(nn.Module):
 80 |     def __init__(self, block, num_blocks, num_classes=10):
 81 |         super(SENet, self).__init__()
 82 |         self.in_planes = 64
 83 | 
 84 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 85 |         self.bn1 = nn.BatchNorm2d(64)
 86 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 87 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 88 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 89 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 90 |         self.linear = nn.Linear(512, num_classes)
 91 | 
 92 |     def _make_layer(self, block, planes, num_blocks, stride):
 93 |         strides = [stride] + [1]*(num_blocks-1)
 94 |         layers = []
 95 |         for stride in strides:
 96 |             layers.append(block(self.in_planes, planes, stride))
 97 |             self.in_planes = planes
 98 |         return nn.Sequential(*layers)
 99 | 
100 |     def forward(self, x):
101 |         out = F.relu(self.bn1(self.conv1(x)))
102 |         out = self.layer1(out)
103 |         out = self.layer2(out)
104 |         out = self.layer3(out)
105 |         out = self.layer4(out)
106 |         out = F.avg_pool2d(out, 4)
107 |         out = out.view(out.size(0), -1)
108 |         out = self.linear(out)
109 |         return out
110 | 
111 | 
112 | def SENet18():
113 |     return SENet(PreActBlock, [2,2,2,2])
114 | 
115 | 
116 | def test():
117 |     net = SENet18()
118 |     y = net(torch.randn(1,3,32,32))
119 |     print(y.size())
120 | 
121 | # test()
122 | 


--------------------------------------------------------------------------------
/models/preact_resnet.py:
--------------------------------------------------------------------------------
  1 | '''Pre-activation ResNet in PyTorch.
  2 | 
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class PreActBlock(nn.Module):
 13 |     '''Pre-activation version of the BasicBlock.'''
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(PreActBlock, self).__init__()
 18 |         self.bn1 = nn.BatchNorm2d(in_planes)
 19 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 22 | 
 23 |         if stride != 1 or in_planes != self.expansion*planes:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 26 |             )
 27 | 
 28 |     def forward(self, x):
 29 |         out = F.relu(self.bn1(x))
 30 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 31 |         out = self.conv1(out)
 32 |         out = self.conv2(F.relu(self.bn2(out)))
 33 |         out += shortcut
 34 |         return out
 35 | 
 36 | 
 37 | class PreActBottleneck(nn.Module):
 38 |     '''Pre-activation version of the original Bottleneck module.'''
 39 |     expansion = 4
 40 | 
 41 |     def __init__(self, in_planes, planes, stride=1):
 42 |         super(PreActBottleneck, self).__init__()
 43 |         self.bn1 = nn.BatchNorm2d(in_planes)
 44 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 45 |         self.bn2 = nn.BatchNorm2d(planes)
 46 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 47 |         self.bn3 = nn.BatchNorm2d(planes)
 48 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 49 | 
 50 |         if stride != 1 or in_planes != self.expansion*planes:
 51 |             self.shortcut = nn.Sequential(
 52 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 53 |             )
 54 | 
 55 |     def forward(self, x):
 56 |         out = F.relu(self.bn1(x))
 57 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 58 |         out = self.conv1(out)
 59 |         out = self.conv2(F.relu(self.bn2(out)))
 60 |         out = self.conv3(F.relu(self.bn3(out)))
 61 |         out += shortcut
 62 |         return out
 63 | 
 64 | 
 65 | class PreActResNet(nn.Module):
 66 |     def __init__(self, block, num_blocks, num_classes=10):
 67 |         super(PreActResNet, self).__init__()
 68 |         self.in_planes = 64
 69 | 
 70 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 71 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 72 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 73 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 74 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 75 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 76 | 
 77 |     def _make_layer(self, block, planes, num_blocks, stride):
 78 |         strides = [stride] + [1]*(num_blocks-1)
 79 |         layers = []
 80 |         for stride in strides:
 81 |             layers.append(block(self.in_planes, planes, stride))
 82 |             self.in_planes = planes * block.expansion
 83 |         return nn.Sequential(*layers)
 84 | 
 85 |     def forward(self, x):
 86 |         out = self.conv1(x)
 87 |         out = self.layer1(out)
 88 |         out = self.layer2(out)
 89 |         out = self.layer3(out)
 90 |         out = self.layer4(out)
 91 |         out = F.avg_pool2d(out, 4)
 92 |         out = out.view(out.size(0), -1)
 93 |         out = self.linear(out)
 94 |         return out
 95 | 
 96 | 
 97 | def PreActResNet18():
 98 |     return PreActResNet(PreActBlock, [2,2,2,2])
 99 | 
100 | def PreActResNet34():
101 |     return PreActResNet(PreActBlock, [3,4,6,3])
102 | 
103 | def PreActResNet50():
104 |     return PreActResNet(PreActBottleneck, [3,4,6,3])
105 | 
106 | def PreActResNet101():
107 |     return PreActResNet(PreActBottleneck, [3,4,23,3])
108 | 
109 | def PreActResNet152():
110 |     return PreActResNet(PreActBottleneck, [3,8,36,3])
111 | 
112 | 
113 | def test():
114 |     net = PreActResNet18()
115 |     y = net((torch.randn(1,3,32,32)))
116 |     print(y.size())
117 | 
118 | # test()
119 | 


--------------------------------------------------------------------------------
/models/dla_simple.py:
--------------------------------------------------------------------------------
  1 | '''Simplified version of DLA in PyTorch.
  2 | 
  3 | Note this implementation is not identical to the original paper version.
  4 | But it seems works fine.
  5 | 
  6 | See dla.py for the original paper version.
  7 | 
  8 | Reference:
  9 |     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
 10 | '''
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.nn.functional as F
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, in_planes, planes, stride=1):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = nn.Conv2d(
 22 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 23 |         self.bn1 = nn.BatchNorm2d(planes)
 24 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 25 |                                stride=1, padding=1, bias=False)
 26 |         self.bn2 = nn.BatchNorm2d(planes)
 27 | 
 28 |         self.shortcut = nn.Sequential()
 29 |         if stride != 1 or in_planes != self.expansion*planes:
 30 |             self.shortcut = nn.Sequential(
 31 |                 nn.Conv2d(in_planes, self.expansion*planes,
 32 |                           kernel_size=1, stride=stride, bias=False),
 33 |                 nn.BatchNorm2d(self.expansion*planes)
 34 |             )
 35 | 
 36 |     def forward(self, x):
 37 |         out = F.relu(self.bn1(self.conv1(x)))
 38 |         out = self.bn2(self.conv2(out))
 39 |         out += self.shortcut(x)
 40 |         out = F.relu(out)
 41 |         return out
 42 | 
 43 | 
 44 | class Root(nn.Module):
 45 |     def __init__(self, in_channels, out_channels, kernel_size=1):
 46 |         super(Root, self).__init__()
 47 |         self.conv = nn.Conv2d(
 48 |             in_channels, out_channels, kernel_size,
 49 |             stride=1, padding=(kernel_size - 1) // 2, bias=False)
 50 |         self.bn = nn.BatchNorm2d(out_channels)
 51 | 
 52 |     def forward(self, xs):
 53 |         x = torch.cat(xs, 1)
 54 |         out = F.relu(self.bn(self.conv(x)))
 55 |         return out
 56 | 
 57 | 
 58 | class Tree(nn.Module):
 59 |     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
 60 |         super(Tree, self).__init__()
 61 |         self.root = Root(2*out_channels, out_channels)
 62 |         if level == 1:
 63 |             self.left_tree = block(in_channels, out_channels, stride=stride)
 64 |             self.right_tree = block(out_channels, out_channels, stride=1)
 65 |         else:
 66 |             self.left_tree = Tree(block, in_channels,
 67 |                                   out_channels, level=level-1, stride=stride)
 68 |             self.right_tree = Tree(block, out_channels,
 69 |                                    out_channels, level=level-1, stride=1)
 70 | 
 71 |     def forward(self, x):
 72 |         out1 = self.left_tree(x)
 73 |         out2 = self.right_tree(out1)
 74 |         out = self.root([out1, out2])
 75 |         return out
 76 | 
 77 | 
 78 | class SimpleDLA(nn.Module):
 79 |     def __init__(self, block=BasicBlock, num_classes=10):
 80 |         super(SimpleDLA, self).__init__()
 81 |         self.base = nn.Sequential(
 82 |             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
 83 |             nn.BatchNorm2d(16),
 84 |             nn.ReLU(True)
 85 |         )
 86 | 
 87 |         self.layer1 = nn.Sequential(
 88 |             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
 89 |             nn.BatchNorm2d(16),
 90 |             nn.ReLU(True)
 91 |         )
 92 | 
 93 |         self.layer2 = nn.Sequential(
 94 |             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
 95 |             nn.BatchNorm2d(32),
 96 |             nn.ReLU(True)
 97 |         )
 98 | 
 99 |         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
100 |         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
101 |         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
102 |         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
103 |         self.linear = nn.Linear(512, num_classes)
104 | 
105 |     def forward(self, x):
106 |         out = self.base(x)
107 |         out = self.layer1(out)
108 |         out = self.layer2(out)
109 |         out = self.layer3(out)
110 |         out = self.layer4(out)
111 |         out = self.layer5(out)
112 |         out = self.layer6(out)
113 |         out = F.avg_pool2d(out, 4)
114 |         out = out.view(out.size(0), -1)
115 |         out = self.linear(out)
116 |         return out
117 | 
118 | 
119 | def test():
120 |     net = SimpleDLA()
121 |     print(net)
122 |     x = torch.randn(1, 3, 32, 32)
123 |     y = net(x)
124 |     print(y.size())
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     test()
129 | 


--------------------------------------------------------------------------------
/models/pnasnet.py:
--------------------------------------------------------------------------------
  1 | '''PNASNet in PyTorch.
  2 | 
  3 | Paper: Progressive Neural Architecture Search
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class SepConv(nn.Module):
 11 |     '''Separable Convolution.'''
 12 |     def __init__(self, in_planes, out_planes, kernel_size, stride):
 13 |         super(SepConv, self).__init__()
 14 |         self.conv1 = nn.Conv2d(in_planes, out_planes,
 15 |                                kernel_size, stride,
 16 |                                padding=(kernel_size-1)//2,
 17 |                                bias=False, groups=in_planes)
 18 |         self.bn1 = nn.BatchNorm2d(out_planes)
 19 | 
 20 |     def forward(self, x):
 21 |         return self.bn1(self.conv1(x))
 22 | 
 23 | 
 24 | class CellA(nn.Module):
 25 |     def __init__(self, in_planes, out_planes, stride=1):
 26 |         super(CellA, self).__init__()
 27 |         self.stride = stride
 28 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 29 |         if stride==2:
 30 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 31 |             self.bn1 = nn.BatchNorm2d(out_planes)
 32 | 
 33 |     def forward(self, x):
 34 |         y1 = self.sep_conv1(x)
 35 |         y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 36 |         if self.stride==2:
 37 |             y2 = self.bn1(self.conv1(y2))
 38 |         return F.relu(y1+y2)
 39 | 
 40 | class CellB(nn.Module):
 41 |     def __init__(self, in_planes, out_planes, stride=1):
 42 |         super(CellB, self).__init__()
 43 |         self.stride = stride
 44 |         # Left branch
 45 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 46 |         self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
 47 |         # Right branch
 48 |         self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
 49 |         if stride==2:
 50 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 51 |             self.bn1 = nn.BatchNorm2d(out_planes)
 52 |         # Reduce channels
 53 |         self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 54 |         self.bn2 = nn.BatchNorm2d(out_planes)
 55 | 
 56 |     def forward(self, x):
 57 |         # Left branch
 58 |         y1 = self.sep_conv1(x)
 59 |         y2 = self.sep_conv2(x)
 60 |         # Right branch
 61 |         y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 62 |         if self.stride==2:
 63 |             y3 = self.bn1(self.conv1(y3))
 64 |         y4 = self.sep_conv3(x)
 65 |         # Concat & reduce channels
 66 |         b1 = F.relu(y1+y2)
 67 |         b2 = F.relu(y3+y4)
 68 |         y = torch.cat([b1,b2], 1)
 69 |         return F.relu(self.bn2(self.conv2(y)))
 70 | 
 71 | class PNASNet(nn.Module):
 72 |     def __init__(self, cell_type, num_cells, num_planes):
 73 |         super(PNASNet, self).__init__()
 74 |         self.in_planes = num_planes
 75 |         self.cell_type = cell_type
 76 | 
 77 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
 78 |         self.bn1 = nn.BatchNorm2d(num_planes)
 79 | 
 80 |         self.layer1 = self._make_layer(num_planes, num_cells=6)
 81 |         self.layer2 = self._downsample(num_planes*2)
 82 |         self.layer3 = self._make_layer(num_planes*2, num_cells=6)
 83 |         self.layer4 = self._downsample(num_planes*4)
 84 |         self.layer5 = self._make_layer(num_planes*4, num_cells=6)
 85 | 
 86 |         self.linear = nn.Linear(num_planes*4, 10)
 87 | 
 88 |     def _make_layer(self, planes, num_cells):
 89 |         layers = []
 90 |         for _ in range(num_cells):
 91 |             layers.append(self.cell_type(self.in_planes, planes, stride=1))
 92 |             self.in_planes = planes
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def _downsample(self, planes):
 96 |         layer = self.cell_type(self.in_planes, planes, stride=2)
 97 |         self.in_planes = planes
 98 |         return layer
 99 | 
100 |     def forward(self, x):
101 |         out = F.relu(self.bn1(self.conv1(x)))
102 |         out = self.layer1(out)
103 |         out = self.layer2(out)
104 |         out = self.layer3(out)
105 |         out = self.layer4(out)
106 |         out = self.layer5(out)
107 |         out = F.avg_pool2d(out, 8)
108 |         out = self.linear(out.view(out.size(0), -1))
109 |         return out
110 | 
111 | 
112 | def PNASNetA():
113 |     return PNASNet(CellA, num_cells=6, num_planes=44)
114 | 
115 | def PNASNetB():
116 |     return PNASNet(CellB, num_cells=6, num_planes=32)
117 | 
118 | 
119 | def test():
120 |     net = PNASNetB()
121 |     x = torch.randn(1,3,32,32)
122 |     y = net(x)
123 |     print(y)
124 | 
125 | # test()
126 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | 
  5 | Reference:
  6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  7 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  8 | '''
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class BasicBlock(nn.Module):
 15 |     expansion = 1
 16 | 
 17 |     def __init__(self, in_planes, planes, stride=1):
 18 |         super(BasicBlock, self).__init__()
 19 |         self.conv1 = nn.Conv2d(
 20 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 23 |                                stride=1, padding=1, bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 | 
 26 |         self.shortcut = nn.Sequential()
 27 |         if stride != 1 or in_planes != self.expansion*planes:
 28 |             self.shortcut = nn.Sequential(
 29 |                 nn.Conv2d(in_planes, self.expansion*planes,
 30 |                           kernel_size=1, stride=stride, bias=False),
 31 |                 nn.BatchNorm2d(self.expansion*planes)
 32 |             )
 33 | 
 34 |     def forward(self, x):
 35 |         out = F.relu(self.bn1(self.conv1(x)))
 36 |         out = self.bn2(self.conv2(out))
 37 |         out += self.shortcut(x)
 38 |         out = F.relu(out)
 39 |         return out
 40 | 
 41 | 
 42 | class Bottleneck(nn.Module):
 43 |     expansion = 4
 44 | 
 45 |     def __init__(self, in_planes, planes, stride=1):
 46 |         super(Bottleneck, self).__init__()
 47 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 48 |         self.bn1 = nn.BatchNorm2d(planes)
 49 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 50 |                                stride=stride, padding=1, bias=False)
 51 |         self.bn2 = nn.BatchNorm2d(planes)
 52 |         self.conv3 = nn.Conv2d(planes, self.expansion *
 53 |                                planes, kernel_size=1, bias=False)
 54 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 55 | 
 56 |         self.shortcut = nn.Sequential()
 57 |         if stride != 1 or in_planes != self.expansion*planes:
 58 |             self.shortcut = nn.Sequential(
 59 |                 nn.Conv2d(in_planes, self.expansion*planes,
 60 |                           kernel_size=1, stride=stride, bias=False),
 61 |                 nn.BatchNorm2d(self.expansion*planes)
 62 |             )
 63 | 
 64 |     def forward(self, x):
 65 |         out = F.relu(self.bn1(self.conv1(x)))
 66 |         out = F.relu(self.bn2(self.conv2(out)))
 67 |         out = self.bn3(self.conv3(out))
 68 |         out += self.shortcut(x)
 69 |         out = F.relu(out)
 70 |         return out
 71 | 
 72 | 
 73 | class ResNet(nn.Module):
 74 |     def __init__(self, block, num_blocks, num_classes=10):
 75 |         super(ResNet, self).__init__()
 76 |         self.in_planes = 64
 77 | 
 78 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
 79 |                                stride=1, padding=1, bias=False)
 80 |         self.bn1 = nn.BatchNorm2d(64)
 81 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 82 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 83 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 84 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 85 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 86 | 
 87 |     def _make_layer(self, block, planes, num_blocks, stride):
 88 |         strides = [stride] + [1]*(num_blocks-1)
 89 |         layers = []
 90 |         for stride in strides:
 91 |             layers.append(block(self.in_planes, planes, stride))
 92 |             self.in_planes = planes * block.expansion
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def forward(self, x):
 96 |         out = F.relu(self.bn1(self.conv1(x)))
 97 |         out = self.layer1(out)
 98 |         out = self.layer2(out)
 99 |         out = self.layer3(out)
100 |         out = self.layer4(out)
101 |         out = F.avg_pool2d(out, 4)
102 |         out = out.view(out.size(0), -1)
103 |         out = self.linear(out)
104 |         return out
105 | 
106 | 
107 | def ResNet18():
108 |     return ResNet(BasicBlock, [2, 2, 2, 2])
109 | 
110 | 
111 | def ResNet34():
112 |     return ResNet(BasicBlock, [3, 4, 6, 3])
113 | 
114 | 
115 | def ResNet50():
116 |     return ResNet(Bottleneck, [3, 4, 6, 3])
117 | 
118 | 
119 | def ResNet101():
120 |     return ResNet(Bottleneck, [3, 4, 23, 3])
121 | 
122 | 
123 | def ResNet152():
124 |     return ResNet(Bottleneck, [3, 8, 36, 3])
125 | 
126 | 
127 | def test():
128 |     net = ResNet18()
129 |     y = net(torch.randn(1, 3, 32, 32))
130 |     print(y.size())
131 | 
132 | # test()
133 | 


--------------------------------------------------------------------------------
/models/dla.py:
--------------------------------------------------------------------------------
  1 | '''DLA in PyTorch.
  2 | 
  3 | Reference:
  4 |     Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
  5 | '''
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | class BasicBlock(nn.Module):
 12 |     expansion = 1
 13 | 
 14 |     def __init__(self, in_planes, planes, stride=1):
 15 |         super(BasicBlock, self).__init__()
 16 |         self.conv1 = nn.Conv2d(
 17 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 20 |                                stride=1, padding=1, bias=False)
 21 |         self.bn2 = nn.BatchNorm2d(planes)
 22 | 
 23 |         self.shortcut = nn.Sequential()
 24 |         if stride != 1 or in_planes != self.expansion*planes:
 25 |             self.shortcut = nn.Sequential(
 26 |                 nn.Conv2d(in_planes, self.expansion*planes,
 27 |                           kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = self.bn2(self.conv2(out))
 34 |         out += self.shortcut(x)
 35 |         out = F.relu(out)
 36 |         return out
 37 | 
 38 | 
 39 | class Root(nn.Module):
 40 |     def __init__(self, in_channels, out_channels, kernel_size=1):
 41 |         super(Root, self).__init__()
 42 |         self.conv = nn.Conv2d(
 43 |             in_channels, out_channels, kernel_size,
 44 |             stride=1, padding=(kernel_size - 1) // 2, bias=False)
 45 |         self.bn = nn.BatchNorm2d(out_channels)
 46 | 
 47 |     def forward(self, xs):
 48 |         x = torch.cat(xs, 1)
 49 |         out = F.relu(self.bn(self.conv(x)))
 50 |         return out
 51 | 
 52 | 
 53 | class Tree(nn.Module):
 54 |     def __init__(self, block, in_channels, out_channels, level=1, stride=1):
 55 |         super(Tree, self).__init__()
 56 |         self.level = level
 57 |         if level == 1:
 58 |             self.root = Root(2*out_channels, out_channels)
 59 |             self.left_node = block(in_channels, out_channels, stride=stride)
 60 |             self.right_node = block(out_channels, out_channels, stride=1)
 61 |         else:
 62 |             self.root = Root((level+2)*out_channels, out_channels)
 63 |             for i in reversed(range(1, level)):
 64 |                 subtree = Tree(block, in_channels, out_channels,
 65 |                                level=i, stride=stride)
 66 |                 self.__setattr__('level_%d' % i, subtree)
 67 |             self.prev_root = block(in_channels, out_channels, stride=stride)
 68 |             self.left_node = block(out_channels, out_channels, stride=1)
 69 |             self.right_node = block(out_channels, out_channels, stride=1)
 70 | 
 71 |     def forward(self, x):
 72 |         xs = [self.prev_root(x)] if self.level > 1 else []
 73 |         for i in reversed(range(1, self.level)):
 74 |             level_i = self.__getattr__('level_%d' % i)
 75 |             x = level_i(x)
 76 |             xs.append(x)
 77 |         x = self.left_node(x)
 78 |         xs.append(x)
 79 |         x = self.right_node(x)
 80 |         xs.append(x)
 81 |         out = self.root(xs)
 82 |         return out
 83 | 
 84 | 
 85 | class DLA(nn.Module):
 86 |     def __init__(self, block=BasicBlock, num_classes=10):
 87 |         super(DLA, self).__init__()
 88 |         self.base = nn.Sequential(
 89 |             nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
 90 |             nn.BatchNorm2d(16),
 91 |             nn.ReLU(True)
 92 |         )
 93 | 
 94 |         self.layer1 = nn.Sequential(
 95 |             nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
 96 |             nn.BatchNorm2d(16),
 97 |             nn.ReLU(True)
 98 |         )
 99 | 
100 |         self.layer2 = nn.Sequential(
101 |             nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
102 |             nn.BatchNorm2d(32),
103 |             nn.ReLU(True)
104 |         )
105 | 
106 |         self.layer3 = Tree(block,  32,  64, level=1, stride=1)
107 |         self.layer4 = Tree(block,  64, 128, level=2, stride=2)
108 |         self.layer5 = Tree(block, 128, 256, level=2, stride=2)
109 |         self.layer6 = Tree(block, 256, 512, level=1, stride=2)
110 |         self.linear = nn.Linear(512, num_classes)
111 | 
112 |     def forward(self, x):
113 |         out = self.base(x)
114 |         out = self.layer1(out)
115 |         out = self.layer2(out)
116 |         out = self.layer3(out)
117 |         out = self.layer4(out)
118 |         out = self.layer5(out)
119 |         out = self.layer6(out)
120 |         out = F.avg_pool2d(out, 4)
121 |         out = out.view(out.size(0), -1)
122 |         out = self.linear(out)
123 |         return out
124 | 
125 | 
126 | def test():
127 |     net = DLA()
128 |     print(net)
129 |     x = torch.randn(1, 3, 32, 32)
130 |     y = net(x)
131 |     print(y.size())
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     test()
136 | 


--------------------------------------------------------------------------------
/models/regnet.py:
--------------------------------------------------------------------------------
  1 | '''RegNet in PyTorch.
  2 | 
  3 | Paper: "Designing Network Design Spaces".
  4 | 
  5 | Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class SE(nn.Module):
 13 |     '''Squeeze-and-Excitation block.'''
 14 | 
 15 |     def __init__(self, in_planes, se_planes):
 16 |         super(SE, self).__init__()
 17 |         self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
 18 |         self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
 19 | 
 20 |     def forward(self, x):
 21 |         out = F.adaptive_avg_pool2d(x, (1, 1))
 22 |         out = F.relu(self.se1(out))
 23 |         out = self.se2(out).sigmoid()
 24 |         out = x * out
 25 |         return out
 26 | 
 27 | 
 28 | class Block(nn.Module):
 29 |     def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
 30 |         super(Block, self).__init__()
 31 |         # 1x1
 32 |         w_b = int(round(w_out * bottleneck_ratio))
 33 |         self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
 34 |         self.bn1 = nn.BatchNorm2d(w_b)
 35 |         # 3x3
 36 |         num_groups = w_b // group_width
 37 |         self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
 38 |                                stride=stride, padding=1, groups=num_groups, bias=False)
 39 |         self.bn2 = nn.BatchNorm2d(w_b)
 40 |         # se
 41 |         self.with_se = se_ratio > 0
 42 |         if self.with_se:
 43 |             w_se = int(round(w_in * se_ratio))
 44 |             self.se = SE(w_b, w_se)
 45 |         # 1x1
 46 |         self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
 47 |         self.bn3 = nn.BatchNorm2d(w_out)
 48 | 
 49 |         self.shortcut = nn.Sequential()
 50 |         if stride != 1 or w_in != w_out:
 51 |             self.shortcut = nn.Sequential(
 52 |                 nn.Conv2d(w_in, w_out,
 53 |                           kernel_size=1, stride=stride, bias=False),
 54 |                 nn.BatchNorm2d(w_out)
 55 |             )
 56 | 
 57 |     def forward(self, x):
 58 |         out = F.relu(self.bn1(self.conv1(x)))
 59 |         out = F.relu(self.bn2(self.conv2(out)))
 60 |         if self.with_se:
 61 |             out = self.se(out)
 62 |         out = self.bn3(self.conv3(out))
 63 |         out += self.shortcut(x)
 64 |         out = F.relu(out)
 65 |         return out
 66 | 
 67 | 
 68 | class RegNet(nn.Module):
 69 |     def __init__(self, cfg, num_classes=10):
 70 |         super(RegNet, self).__init__()
 71 |         self.cfg = cfg
 72 |         self.in_planes = 64
 73 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
 74 |                                stride=1, padding=1, bias=False)
 75 |         self.bn1 = nn.BatchNorm2d(64)
 76 |         self.layer1 = self._make_layer(0)
 77 |         self.layer2 = self._make_layer(1)
 78 |         self.layer3 = self._make_layer(2)
 79 |         self.layer4 = self._make_layer(3)
 80 |         self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
 81 | 
 82 |     def _make_layer(self, idx):
 83 |         depth = self.cfg['depths'][idx]
 84 |         width = self.cfg['widths'][idx]
 85 |         stride = self.cfg['strides'][idx]
 86 |         group_width = self.cfg['group_width']
 87 |         bottleneck_ratio = self.cfg['bottleneck_ratio']
 88 |         se_ratio = self.cfg['se_ratio']
 89 | 
 90 |         layers = []
 91 |         for i in range(depth):
 92 |             s = stride if i == 0 else 1
 93 |             layers.append(Block(self.in_planes, width,
 94 |                                 s, group_width, bottleneck_ratio, se_ratio))
 95 |             self.in_planes = width
 96 |         return nn.Sequential(*layers)
 97 | 
 98 |     def forward(self, x):
 99 |         out = F.relu(self.bn1(self.conv1(x)))
100 |         out = self.layer1(out)
101 |         out = self.layer2(out)
102 |         out = self.layer3(out)
103 |         out = self.layer4(out)
104 |         out = F.adaptive_avg_pool2d(out, (1, 1))
105 |         out = out.view(out.size(0), -1)
106 |         out = self.linear(out)
107 |         return out
108 | 
109 | 
110 | def RegNetX_200MF():
111 |     cfg = {
112 |         'depths': [1, 1, 4, 7],
113 |         'widths': [24, 56, 152, 368],
114 |         'strides': [1, 1, 2, 2],
115 |         'group_width': 8,
116 |         'bottleneck_ratio': 1,
117 |         'se_ratio': 0,
118 |     }
119 |     return RegNet(cfg)
120 | 
121 | 
122 | def RegNetX_400MF():
123 |     cfg = {
124 |         'depths': [1, 2, 7, 12],
125 |         'widths': [32, 64, 160, 384],
126 |         'strides': [1, 1, 2, 2],
127 |         'group_width': 16,
128 |         'bottleneck_ratio': 1,
129 |         'se_ratio': 0,
130 |     }
131 |     return RegNet(cfg)
132 | 
133 | 
134 | def RegNetY_400MF():
135 |     cfg = {
136 |         'depths': [1, 2, 7, 12],
137 |         'widths': [32, 64, 160, 384],
138 |         'strides': [1, 1, 2, 2],
139 |         'group_width': 16,
140 |         'bottleneck_ratio': 1,
141 |         'se_ratio': 0.25,
142 |     }
143 |     return RegNet(cfg)
144 | 
145 | 
146 | def test():
147 |     net = RegNetX_200MF()
148 |     print(net)
149 |     x = torch.randn(2, 3, 32, 32)
150 |     y = net(x)
151 |     print(y.shape)
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     test()
156 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | '''Train CIFAR10 with PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.optim as optim
  5 | import torch.nn.functional as F
  6 | import torch.backends.cudnn as cudnn
  7 | 
  8 | import torchvision
  9 | import torchvision.transforms as transforms
 10 | 
 11 | import os
 12 | import argparse
 13 | 
 14 | from models import *
 15 | from utils import progress_bar
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
 19 | parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
 20 | parser.add_argument('--resume', '-r', action='store_true',
 21 |                     help='resume from checkpoint')
 22 | args = parser.parse_args()
 23 | 
 24 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 25 | best_acc = 0  # best test accuracy
 26 | start_epoch = 0  # start from epoch 0 or last checkpoint epoch
 27 | 
 28 | # Data
 29 | print('==> Preparing data..')
 30 | transform_train = transforms.Compose([
 31 |     transforms.RandomCrop(32, padding=4),
 32 |     transforms.RandomHorizontalFlip(),
 33 |     transforms.ToTensor(),
 34 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 35 | ])
 36 | 
 37 | transform_test = transforms.Compose([
 38 |     transforms.ToTensor(),
 39 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 40 | ])
 41 | 
 42 | trainset = torchvision.datasets.CIFAR10(
 43 |     root='./data', train=True, download=True, transform=transform_train)
 44 | trainloader = torch.utils.data.DataLoader(
 45 |     trainset, batch_size=128, shuffle=True, num_workers=2)
 46 | 
 47 | testset = torchvision.datasets.CIFAR10(
 48 |     root='./data', train=False, download=True, transform=transform_test)
 49 | testloader = torch.utils.data.DataLoader(
 50 |     testset, batch_size=100, shuffle=False, num_workers=2)
 51 | 
 52 | classes = ('plane', 'car', 'bird', 'cat', 'deer',
 53 |            'dog', 'frog', 'horse', 'ship', 'truck')
 54 | 
 55 | # Model
 56 | print('==> Building model..')
 57 | # net = VGG('VGG19')
 58 | # net = ResNet18()
 59 | # net = PreActResNet18()
 60 | # net = GoogLeNet()
 61 | # net = DenseNet121()
 62 | # net = ResNeXt29_2x64d()
 63 | # net = MobileNet()
 64 | # net = MobileNetV2()
 65 | # net = DPN92()
 66 | # net = ShuffleNetG2()
 67 | # net = SENet18()
 68 | # net = ShuffleNetV2(1)
 69 | # net = EfficientNetB0()
 70 | # net = RegNetX_200MF()
 71 | net = SimpleDLA()
 72 | net = net.to(device)
 73 | if device == 'cuda':
 74 |     net = torch.nn.DataParallel(net)
 75 |     cudnn.benchmark = True
 76 | 
 77 | if args.resume:
 78 |     # Load checkpoint.
 79 |     print('==> Resuming from checkpoint..')
 80 |     assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
 81 |     checkpoint = torch.load('./checkpoint/ckpt.pth')
 82 |     net.load_state_dict(checkpoint['net'])
 83 |     best_acc = checkpoint['acc']
 84 |     start_epoch = checkpoint['epoch']
 85 | 
 86 | criterion = nn.CrossEntropyLoss()
 87 | optimizer = optim.SGD(net.parameters(), lr=args.lr,
 88 |                       momentum=0.9, weight_decay=5e-4)
 89 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
 90 | 
 91 | 
 92 | # Training
 93 | def train(epoch):
 94 |     print('\nEpoch: %d' % epoch)
 95 |     net.train()
 96 |     train_loss = 0
 97 |     correct = 0
 98 |     total = 0
 99 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
100 |         inputs, targets = inputs.to(device), targets.to(device)
101 |         optimizer.zero_grad()
102 |         outputs = net(inputs)
103 |         loss = criterion(outputs, targets)
104 |         loss.backward()
105 |         optimizer.step()
106 | 
107 |         train_loss += loss.item()
108 |         _, predicted = outputs.max(1)
109 |         total += targets.size(0)
110 |         correct += predicted.eq(targets).sum().item()
111 | 
112 |         progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
113 |                      % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
114 | 
115 | 
116 | def test(epoch):
117 |     global best_acc
118 |     net.eval()
119 |     test_loss = 0
120 |     correct = 0
121 |     total = 0
122 |     with torch.no_grad():
123 |         for batch_idx, (inputs, targets) in enumerate(testloader):
124 |             inputs, targets = inputs.to(device), targets.to(device)
125 |             outputs = net(inputs)
126 |             loss = criterion(outputs, targets)
127 | 
128 |             test_loss += loss.item()
129 |             _, predicted = outputs.max(1)
130 |             total += targets.size(0)
131 |             correct += predicted.eq(targets).sum().item()
132 | 
133 |             progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
134 |                          % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
135 | 
136 |     # Save checkpoint.
137 |     acc = 100.*correct/total
138 |     if acc > best_acc:
139 |         print('Saving..')
140 |         state = {
141 |             'net': net.state_dict(),
142 |             'acc': acc,
143 |             'epoch': epoch,
144 |         }
145 |         if not os.path.isdir('checkpoint'):
146 |             os.mkdir('checkpoint')
147 |         torch.save(state, './checkpoint/ckpt.pth')
148 |         best_acc = acc
149 | 
150 | 
151 | for epoch in range(start_epoch, start_epoch+200):
152 |     train(epoch)
153 |     test(epoch)
154 |     scheduler.step()
155 | 


--------------------------------------------------------------------------------
/models/shufflenetv2.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNetV2 in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class ShuffleBlock(nn.Module):
 11 |     def __init__(self, groups=2):
 12 |         super(ShuffleBlock, self).__init__()
 13 |         self.groups = groups
 14 | 
 15 |     def forward(self, x):
 16 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 17 |         N, C, H, W = x.size()
 18 |         g = self.groups
 19 |         return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
 20 | 
 21 | 
 22 | class SplitBlock(nn.Module):
 23 |     def __init__(self, ratio):
 24 |         super(SplitBlock, self).__init__()
 25 |         self.ratio = ratio
 26 | 
 27 |     def forward(self, x):
 28 |         c = int(x.size(1) * self.ratio)
 29 |         return x[:, :c, :, :], x[:, c:, :, :]
 30 | 
 31 | 
 32 | class BasicBlock(nn.Module):
 33 |     def __init__(self, in_channels, split_ratio=0.5):
 34 |         super(BasicBlock, self).__init__()
 35 |         self.split = SplitBlock(split_ratio)
 36 |         in_channels = int(in_channels * split_ratio)
 37 |         self.conv1 = nn.Conv2d(in_channels, in_channels,
 38 |                                kernel_size=1, bias=False)
 39 |         self.bn1 = nn.BatchNorm2d(in_channels)
 40 |         self.conv2 = nn.Conv2d(in_channels, in_channels,
 41 |                                kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
 42 |         self.bn2 = nn.BatchNorm2d(in_channels)
 43 |         self.conv3 = nn.Conv2d(in_channels, in_channels,
 44 |                                kernel_size=1, bias=False)
 45 |         self.bn3 = nn.BatchNorm2d(in_channels)
 46 |         self.shuffle = ShuffleBlock()
 47 | 
 48 |     def forward(self, x):
 49 |         x1, x2 = self.split(x)
 50 |         out = F.relu(self.bn1(self.conv1(x2)))
 51 |         out = self.bn2(self.conv2(out))
 52 |         out = F.relu(self.bn3(self.conv3(out)))
 53 |         out = torch.cat([x1, out], 1)
 54 |         out = self.shuffle(out)
 55 |         return out
 56 | 
 57 | 
 58 | class DownBlock(nn.Module):
 59 |     def __init__(self, in_channels, out_channels):
 60 |         super(DownBlock, self).__init__()
 61 |         mid_channels = out_channels // 2
 62 |         # left
 63 |         self.conv1 = nn.Conv2d(in_channels, in_channels,
 64 |                                kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
 65 |         self.bn1 = nn.BatchNorm2d(in_channels)
 66 |         self.conv2 = nn.Conv2d(in_channels, mid_channels,
 67 |                                kernel_size=1, bias=False)
 68 |         self.bn2 = nn.BatchNorm2d(mid_channels)
 69 |         # right
 70 |         self.conv3 = nn.Conv2d(in_channels, mid_channels,
 71 |                                kernel_size=1, bias=False)
 72 |         self.bn3 = nn.BatchNorm2d(mid_channels)
 73 |         self.conv4 = nn.Conv2d(mid_channels, mid_channels,
 74 |                                kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
 75 |         self.bn4 = nn.BatchNorm2d(mid_channels)
 76 |         self.conv5 = nn.Conv2d(mid_channels, mid_channels,
 77 |                                kernel_size=1, bias=False)
 78 |         self.bn5 = nn.BatchNorm2d(mid_channels)
 79 | 
 80 |         self.shuffle = ShuffleBlock()
 81 | 
 82 |     def forward(self, x):
 83 |         # left
 84 |         out1 = self.bn1(self.conv1(x))
 85 |         out1 = F.relu(self.bn2(self.conv2(out1)))
 86 |         # right
 87 |         out2 = F.relu(self.bn3(self.conv3(x)))
 88 |         out2 = self.bn4(self.conv4(out2))
 89 |         out2 = F.relu(self.bn5(self.conv5(out2)))
 90 |         # concat
 91 |         out = torch.cat([out1, out2], 1)
 92 |         out = self.shuffle(out)
 93 |         return out
 94 | 
 95 | 
 96 | class ShuffleNetV2(nn.Module):
 97 |     def __init__(self, net_size):
 98 |         super(ShuffleNetV2, self).__init__()
 99 |         out_channels = configs[net_size]['out_channels']
100 |         num_blocks = configs[net_size]['num_blocks']
101 | 
102 |         self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
103 |                                stride=1, padding=1, bias=False)
104 |         self.bn1 = nn.BatchNorm2d(24)
105 |         self.in_channels = 24
106 |         self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
107 |         self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
108 |         self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
109 |         self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
110 |                                kernel_size=1, stride=1, padding=0, bias=False)
111 |         self.bn2 = nn.BatchNorm2d(out_channels[3])
112 |         self.linear = nn.Linear(out_channels[3], 10)
113 | 
114 |     def _make_layer(self, out_channels, num_blocks):
115 |         layers = [DownBlock(self.in_channels, out_channels)]
116 |         for i in range(num_blocks):
117 |             layers.append(BasicBlock(out_channels))
118 |             self.in_channels = out_channels
119 |         return nn.Sequential(*layers)
120 | 
121 |     def forward(self, x):
122 |         out = F.relu(self.bn1(self.conv1(x)))
123 |         # out = F.max_pool2d(out, 3, stride=2, padding=1)
124 |         out = self.layer1(out)
125 |         out = self.layer2(out)
126 |         out = self.layer3(out)
127 |         out = F.relu(self.bn2(self.conv2(out)))
128 |         out = F.avg_pool2d(out, 4)
129 |         out = out.view(out.size(0), -1)
130 |         out = self.linear(out)
131 |         return out
132 | 
133 | 
134 | configs = {
135 |     0.5: {
136 |         'out_channels': (48, 96, 192, 1024),
137 |         'num_blocks': (3, 7, 3)
138 |     },
139 | 
140 |     1: {
141 |         'out_channels': (116, 232, 464, 1024),
142 |         'num_blocks': (3, 7, 3)
143 |     },
144 |     1.5: {
145 |         'out_channels': (176, 352, 704, 1024),
146 |         'num_blocks': (3, 7, 3)
147 |     },
148 |     2: {
149 |         'out_channels': (224, 488, 976, 2048),
150 |         'num_blocks': (3, 7, 3)
151 |     }
152 | }
153 | 
154 | 
155 | def test():
156 |     net = ShuffleNetV2(net_size=0.5)
157 |     x = torch.randn(3, 3, 32, 32)
158 |     y = net(x)
159 |     print(y.shape)
160 | 
161 | 
162 | # test()
163 | 


--------------------------------------------------------------------------------
/models/efficientnet.py:
--------------------------------------------------------------------------------
  1 | '''EfficientNet in PyTorch.
  2 | 
  3 | Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
  4 | 
  5 | Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | def swish(x):
 13 |     return x * x.sigmoid()
 14 | 
 15 | 
 16 | def drop_connect(x, drop_ratio):
 17 |     keep_ratio = 1.0 - drop_ratio
 18 |     mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
 19 |     mask.bernoulli_(keep_ratio)
 20 |     x.div_(keep_ratio)
 21 |     x.mul_(mask)
 22 |     return x
 23 | 
 24 | 
 25 | class SE(nn.Module):
 26 |     '''Squeeze-and-Excitation block with Swish.'''
 27 | 
 28 |     def __init__(self, in_channels, se_channels):
 29 |         super(SE, self).__init__()
 30 |         self.se1 = nn.Conv2d(in_channels, se_channels,
 31 |                              kernel_size=1, bias=True)
 32 |         self.se2 = nn.Conv2d(se_channels, in_channels,
 33 |                              kernel_size=1, bias=True)
 34 | 
 35 |     def forward(self, x):
 36 |         out = F.adaptive_avg_pool2d(x, (1, 1))
 37 |         out = swish(self.se1(out))
 38 |         out = self.se2(out).sigmoid()
 39 |         out = x * out
 40 |         return out
 41 | 
 42 | 
 43 | class Block(nn.Module):
 44 |     '''expansion + depthwise + pointwise + squeeze-excitation'''
 45 | 
 46 |     def __init__(self,
 47 |                  in_channels,
 48 |                  out_channels,
 49 |                  kernel_size,
 50 |                  stride,
 51 |                  expand_ratio=1,
 52 |                  se_ratio=0.,
 53 |                  drop_rate=0.):
 54 |         super(Block, self).__init__()
 55 |         self.stride = stride
 56 |         self.drop_rate = drop_rate
 57 |         self.expand_ratio = expand_ratio
 58 | 
 59 |         # Expansion
 60 |         channels = expand_ratio * in_channels
 61 |         self.conv1 = nn.Conv2d(in_channels,
 62 |                                channels,
 63 |                                kernel_size=1,
 64 |                                stride=1,
 65 |                                padding=0,
 66 |                                bias=False)
 67 |         self.bn1 = nn.BatchNorm2d(channels)
 68 | 
 69 |         # Depthwise conv
 70 |         self.conv2 = nn.Conv2d(channels,
 71 |                                channels,
 72 |                                kernel_size=kernel_size,
 73 |                                stride=stride,
 74 |                                padding=(1 if kernel_size == 3 else 2),
 75 |                                groups=channels,
 76 |                                bias=False)
 77 |         self.bn2 = nn.BatchNorm2d(channels)
 78 | 
 79 |         # SE layers
 80 |         se_channels = int(in_channels * se_ratio)
 81 |         self.se = SE(channels, se_channels)
 82 | 
 83 |         # Output
 84 |         self.conv3 = nn.Conv2d(channels,
 85 |                                out_channels,
 86 |                                kernel_size=1,
 87 |                                stride=1,
 88 |                                padding=0,
 89 |                                bias=False)
 90 |         self.bn3 = nn.BatchNorm2d(out_channels)
 91 | 
 92 |         # Skip connection if in and out shapes are the same (MV-V2 style)
 93 |         self.has_skip = (stride == 1) and (in_channels == out_channels)
 94 | 
 95 |     def forward(self, x):
 96 |         out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
 97 |         out = swish(self.bn2(self.conv2(out)))
 98 |         out = self.se(out)
 99 |         out = self.bn3(self.conv3(out))
100 |         if self.has_skip:
101 |             if self.training and self.drop_rate > 0:
102 |                 out = drop_connect(out, self.drop_rate)
103 |             out = out + x
104 |         return out
105 | 
106 | 
107 | class EfficientNet(nn.Module):
108 |     def __init__(self, cfg, num_classes=10):
109 |         super(EfficientNet, self).__init__()
110 |         self.cfg = cfg
111 |         self.conv1 = nn.Conv2d(3,
112 |                                32,
113 |                                kernel_size=3,
114 |                                stride=1,
115 |                                padding=1,
116 |                                bias=False)
117 |         self.bn1 = nn.BatchNorm2d(32)
118 |         self.layers = self._make_layers(in_channels=32)
119 |         self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
120 | 
121 |     def _make_layers(self, in_channels):
122 |         layers = []
123 |         cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
124 |                                      'stride']]
125 |         b = 0
126 |         blocks = sum(self.cfg['num_blocks'])
127 |         for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
128 |             strides = [stride] + [1] * (num_blocks - 1)
129 |             for stride in strides:
130 |                 drop_rate = self.cfg['drop_connect_rate'] * b / blocks
131 |                 layers.append(
132 |                     Block(in_channels,
133 |                           out_channels,
134 |                           kernel_size,
135 |                           stride,
136 |                           expansion,
137 |                           se_ratio=0.25,
138 |                           drop_rate=drop_rate))
139 |                 in_channels = out_channels
140 |         return nn.Sequential(*layers)
141 | 
142 |     def forward(self, x):
143 |         out = swish(self.bn1(self.conv1(x)))
144 |         out = self.layers(out)
145 |         out = F.adaptive_avg_pool2d(out, 1)
146 |         out = out.view(out.size(0), -1)
147 |         dropout_rate = self.cfg['dropout_rate']
148 |         if self.training and dropout_rate > 0:
149 |             out = F.dropout(out, p=dropout_rate)
150 |         out = self.linear(out)
151 |         return out
152 | 
153 | 
154 | def EfficientNetB0():
155 |     cfg = {
156 |         'num_blocks': [1, 2, 2, 3, 3, 4, 1],
157 |         'expansion': [1, 6, 6, 6, 6, 6, 6],
158 |         'out_channels': [16, 24, 40, 80, 112, 192, 320],
159 |         'kernel_size': [3, 3, 5, 3, 5, 5, 3],
160 |         'stride': [1, 2, 2, 2, 1, 2, 1],
161 |         'dropout_rate': 0.2,
162 |         'drop_connect_rate': 0.2,
163 |     }
164 |     return EfficientNet(cfg)
165 | 
166 | 
167 | def test():
168 |     net = EfficientNetB0()
169 |     x = torch.randn(2, 3, 32, 32)
170 |     y = net(x)
171 |     print(y.shape)
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     test()
176 | 


--------------------------------------------------------------------------------