├── .gitignore
├── LICENSE
├── ModelFiles
    ├── FaceBoxes
    │   └── FaceBoxes.py
    ├── MobileNet
    │   └── MobileNet.py
    ├── ResNet
    │   └── resnet.py
    ├── UNet
    │   └── UNet.py
    └── _netG_1
    │   ├── build_face_dataset.py
    │   ├── main.py
    │   └── models.py
├── README.md
├── TestData
    ├── 2008_000536.jpg
    ├── 2008_001171.jpg
    ├── 2008_001601.jpg
    ├── 2008_001841.jpg
    ├── 227-2.jpg
    ├── 227-3.jpg
    ├── 227.jpg
    └── ImageNetLabels.txt
└── code
    ├── ConvertLayer_caffe.py
    ├── ConvertLayer_ncnn.py
    ├── ConvertModel.py
    ├── ReplaceDenormals.py
    ├── caffe.proto
    ├── caffe_pb2.py
    ├── run.py
    └── test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2017, 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/ModelFiles/FaceBoxes/FaceBoxes.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class CReLUM(nn.Module):
  7 |     def __init__(self):
  8 |         super(CReLUM, self).__init__()
  9 | 
 10 |     def forward(self, x):
 11 |         return F.relu(torch.cat((x, -x), 1))
 12 | 
 13 | 
 14 | CRelu = CReLUM()
 15 | 
 16 | 
 17 | class BasicConv2d(nn.Module):
 18 | 
 19 |     def __init__(self, in_channels, out_channels, **kwargs):
 20 |         super(BasicConv2d, self).__init__()
 21 |         self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
 22 |         self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
 23 | 
 24 |     def forward(self, x):
 25 |         x = self.conv(x)
 26 |         x = self.bn(x)
 27 |         return F.relu(x, inplace=True)
 28 | 
 29 | 
 30 | class Inception(nn.Module):
 31 |     def __init__(self, in_planes, n1x1down, n1x1up, n3x3):
 32 |         super(Inception, self).__init__()
 33 | 
 34 |         self.conv1 = BasicConv2d(in_planes, n1x1down, kernel_size=1)
 35 | 
 36 |         self.pool2_1 = nn.MaxPool2d(3, stride=1, padding=1, ceil_mode=True)
 37 |         self.conv2_2 = BasicConv2d(in_planes, n1x1down, kernel_size=1)
 38 | 
 39 |         self.conv3_1 = BasicConv2d(in_planes, n1x1up, kernel_size=1)
 40 |         self.conv3_2 = BasicConv2d(n1x1up, n3x3, kernel_size=3, padding=1)
 41 | 
 42 |         self.conv4_1 = BasicConv2d(in_planes, n1x1up, kernel_size=1)
 43 |         self.conv4_2 = BasicConv2d(n1x1up, n3x3, kernel_size=3, padding=1)
 44 |         self.conv4_3 = BasicConv2d(n3x3, n3x3, kernel_size=3, padding=1)
 45 | 
 46 |     def forward(self, x):
 47 |         y1 = self.conv1(x)
 48 | 
 49 |         y2 = self.pool2_1(x)
 50 |         y2 = self.conv2_2(y2)
 51 | 
 52 |         y3 = self.conv3_1(x)
 53 |         y3 = self.conv3_2(y3)
 54 | 
 55 |         y4 = self.conv4_1(x)
 56 |         y4 = self.conv4_2(y4)
 57 |         y4 = self.conv4_3(y4)
 58 | 
 59 |         return torch.cat([y1, y2, y3, y4], 1)
 60 | 
 61 | 
 62 | anchors = (21, 1, 1)
 63 | 
 64 | 
 65 | class FaceBoxes(nn.Module):
 66 |     def __init__(self):
 67 |         super(FaceBoxes, self).__init__()
 68 | 
 69 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=7, stride=4, padding=3)
 70 |         self.bn1 = nn.BatchNorm2d(16, eps=0.001)
 71 |         self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2)
 72 |         self.bn2 = nn.BatchNorm2d(64, eps=0.001)
 73 |         self.inception1 = Inception(128, 32, 16, 32)
 74 |         self.inception2 = Inception(128, 32, 16, 32)
 75 |         self.inception3 = Inception(128, 32, 16, 32)
 76 |         self.conv3_1 = nn.Conv2d(128, 128, kernel_size=1, stride=1)
 77 |         self.conv3_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
 78 |         self.conv4_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1)
 79 |         self.conv4_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
 80 | 
 81 |         self.score_conv1 = nn.Conv2d(
 82 |             128, 2 * anchors[0], kernel_size=3, stride=1, padding=1)
 83 |         self.bbox_conv1 = nn.Conv2d(
 84 |             128, 4 * anchors[0], kernel_size=3, stride=1, padding=1)
 85 |         self.score_conv2 = nn.Conv2d(
 86 |             256, 2 * anchors[1], kernel_size=3, stride=1, padding=1)
 87 |         self.bbox_conv2 = nn.Conv2d(
 88 |             256, 4 * anchors[1], kernel_size=3, stride=1, padding=1)
 89 |         self.score_conv3 = nn.Conv2d(
 90 |             256, 2 * anchors[2], kernel_size=3, stride=1, padding=1)
 91 |         self.bbox_conv3 = nn.Conv2d(
 92 |             256, 4 * anchors[2], kernel_size=3, stride=1, padding=1)
 93 | 
 94 |     def forward(self, x):
 95 |         x = self.conv1(x)
 96 |         x = self.bn1(x)
 97 |         x = F.max_pool2d(CRelu(x), kernel_size=3, stride=2, ceil_mode=True)
 98 | 
 99 |         x = self.conv2(x)
100 |         x = self.bn2(x)
101 |         x = F.max_pool2d(CRelu(x), kernel_size=3, stride=2, ceil_mode=True)
102 | 
103 |         x = self.inception1(x)
104 |         x = self.inception2(x)
105 |         x = self.inception3(x)
106 | 
107 |         score1 = self.score_conv1(x)
108 |         bbox1 = self.bbox_conv1(x)
109 | 
110 |         x = F.relu(self.conv3_1(x), inplace=True)
111 |         x = F.relu(self.conv3_2(x), inplace=True)
112 | 
113 |         score2 = self.score_conv2(x)
114 |         bbox2 = self.bbox_conv2(x)
115 | 
116 |         x = F.relu(self.conv4_1(x), inplace=True)
117 |         x = F.relu(self.conv4_2(x), inplace=True)
118 | 
119 |         score3 = self.score_conv3(x)
120 |         bbox3 = self.bbox_conv3(x)
121 | 
122 |         scorelist = list()
123 |         bboxlist = list()
124 |         scorelist.append(score1.permute(0, 2, 3, 1).contiguous())
125 |         scorelist.append(score2.permute(0, 2, 3, 1).contiguous())
126 |         scorelist.append(score3.permute(0, 2, 3, 1).contiguous())
127 |         bboxlist.append(bbox1.permute(0, 2, 3, 1).contiguous())
128 |         bboxlist.append(bbox2.permute(0, 2, 3, 1).contiguous())
129 |         bboxlist.append(bbox3.permute(0, 2, 3, 1).contiguous())
130 |         pscore = torch.cat([o.view(o.size(0), -1) for o in scorelist], 1)
131 |         pbbox = torch.cat([o.view(o.size(0), -1) for o in bboxlist], 1)
132 | 
133 |         return pscore, pbbox
134 | 


--------------------------------------------------------------------------------
/ModelFiles/MobileNet/MobileNet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class MobileNet(nn.Module):
 5 |     def __init__(self):
 6 |         super(MobileNet, self).__init__()
 7 | 
 8 |         def conv_bn(inp, oup, stride):
 9 |             return nn.Sequential(
10 |                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
11 |                 nn.BatchNorm2d(oup),
12 |                 nn.ReLU(inplace=True)
13 |             )
14 | 
15 |         def conv_dw(inp, oup, stride):
16 |             return nn.Sequential(
17 |                 nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
18 |                 nn.BatchNorm2d(inp),
19 |                 nn.ReLU(inplace=True),
20 |     
21 |                 nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
22 |                 nn.BatchNorm2d(oup),
23 |                 nn.ReLU(inplace=True),
24 |             )
25 | 
26 |         self.model = nn.Sequential(
27 |             conv_bn(  3,  32, 2), 
28 |             conv_dw( 32,  64, 1),
29 |             conv_dw( 64, 128, 2),
30 |             conv_dw(128, 128, 1),
31 |             conv_dw(128, 256, 2),
32 |             conv_dw(256, 256, 1),
33 |             conv_dw(256, 512, 2),
34 |             conv_dw(512, 512, 1),
35 |             conv_dw(512, 512, 1),
36 |             conv_dw(512, 512, 1),
37 |             conv_dw(512, 512, 1),
38 |             conv_dw(512, 512, 1),
39 |             conv_dw(512, 1024, 2),
40 |             conv_dw(1024, 1024, 1),
41 |             nn.AvgPool2d(7, ceil_mode=True),
42 |         )
43 |         self.fc = nn.Linear(1024, 1000)
44 | 
45 |     def forward(self, x):
46 |         x = self.model(x)
47 |         x = x.view(-1, 1024)
48 |         x = self.fc(x)
49 |         return x
50 | 


--------------------------------------------------------------------------------
/ModelFiles/ResNet/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torch
  5 | 
  6 | 
  7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  8 |            'resnet152']
  9 | 
 10 | 
 11 | model_urls = {
 12 |     'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth',
 13 |     'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth',
 14 |     'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth',
 15 |     'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth',
 16 |     'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth',
 17 | }
 18 | 
 19 | 
 20 | def conv3x3(in_planes, out_planes, stride=1):
 21 |     "3x3 convolution with padding"
 22 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 23 |                      padding=1, bias=False)
 24 | 
 25 | 
 26 | class BasicBlock(nn.Module):
 27 |     expansion = 1
 28 | 
 29 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 30 |         super(BasicBlock, self).__init__()
 31 |         self.conv1 = conv3x3(inplanes, planes, stride)
 32 |         self.bn1 = nn.BatchNorm2d(planes)
 33 |         self.relu = nn.ReLU(inplace=True)
 34 |         self.conv2 = conv3x3(planes, planes)
 35 |         self.bn2 = nn.BatchNorm2d(planes)
 36 |         self.downsample = model_urls
 37 |         self.stride = stride
 38 | 
 39 |     def forward(self, x):
 40 |         residual = x
 41 | 
 42 |         out = self.conv1(x)
 43 |         out = self.bn1(out)
 44 |         out = self.relu(out)
 45 | 
 46 |         out = self.conv2(out)
 47 |         out = self.bn2(out)
 48 | 
 49 |         if self.downsample is not None:
 50 |             residual = self.downsample(x)
 51 | 
 52 |         out += residual
 53 |         out = self.relu(out)
 54 | 
 55 |         return out
 56 | 
 57 | 
 58 | class Bottleneck(nn.Module):
 59 |     expansion = 4
 60 | 
 61 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 62 |         super(Bottleneck, self).__init__()
 63 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change
 64 |         self.bn1 = nn.BatchNorm2d(planes)
 65 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change
 66 |                                padding=1, bias=False)
 67 |         self.bn2 = nn.BatchNorm2d(planes)
 68 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 69 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 70 |         self.relu = nn.ReLU(inplace=True)
 71 |         self.downsample = downsample
 72 |         self.stride = stride
 73 | 
 74 |     def forward(self, x):
 75 |         residual = x
 76 | 
 77 |         out = self.conv1(x)
 78 |         out = self.bn1(out)
 79 |         out = self.relu(out)
 80 | 
 81 |         out = self.conv2(out)
 82 |         out = self.bn2(out)
 83 |         out = self.relu(out)
 84 | 
 85 |         out = self.conv3(out)
 86 |         out = self.bn3(out)
 87 | 
 88 |         if self.downsample is not None:
 89 |             residual = self.downsample(x)
 90 | 
 91 |         out += residual
 92 |         out = self.relu(out)
 93 | 
 94 |         return out
 95 | 
 96 | 
 97 | class ResNet(nn.Module):
 98 |     def __init__(self, block, layers, num_classes=1000):
 99 |         self.inplanes = 64
100 |         super(ResNet, self).__init__()
101 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
102 |                                bias=False)
103 |         self.bn1 = nn.BatchNorm2d(64)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change
106 |         self.layer1 = self._make_layer(block, 64, layers[0])
107 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 |         self.avgpool = nn.AvgPool2d(7)
111 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
112 | 
113 |         for m in self.modules():
114 |             if isinstance(m, nn.Conv2d):
115 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
116 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
117 |             elif isinstance(m, nn.BatchNorm2d):
118 |                 m.weight.data.fill_(1)
119 |                 m.bias.data.zero_()
120 | 
121 |     def _make_layer(self, block, planes, blocks, stride=1):
122 |         downsample = None
123 |         if stride != 1 or self.inplanes != planes * block.expansion:
124 |             downsample = nn.Sequential(
125 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
126 |                           kernel_size=1, stride=stride, bias=False),
127 |                 nn.BatchNorm2d(planes * block.expansion),
128 |             )
129 | 
130 |         layers = []
131 |         layers.append(block(self.inplanes, planes, stride, downsample))
132 |         self.inplanes = planes * block.expansion
133 |         for i in range(1, blocks):
134 |             layers.append(block(self.inplanes, planes))
135 | 
136 |         return nn.Sequential(*layers)
137 | 
138 |     def forward(self, x):
139 |         x = self.conv1(x)
140 |         x = self.bn1(x)
141 |         x = self.relu(x)
142 |         x = self.maxpool(x)
143 | 
144 |         x = self.layer1(x)
145 |         x = self.layer2(x)
146 |         x = self.layer3(x)
147 |         x = self.layer4(x)
148 | 
149 |         x = self.avgpool(x)
150 |         x = x.view(x.size(0), -1)
151 |         x = self.fc(x)
152 | 
153 |         return x
154 | 
155 | 
156 | def resnet18(pretrained=False):
157 |     """Constructs a ResNet-18 model.
158 |     Args:
159 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
160 |     """
161 |     model = ResNet(BasicBlock, [2, 2, 2, 2])
162 |     if pretrained:
163 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
164 |     return model
165 | 
166 | 
167 | def resnet34(pretrained=False):
168 |     """Constructs a ResNet-34 model.
169 |     Args:
170 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
171 |     """
172 |     model = ResNet(BasicBlock, [3, 4, 6, 3])
173 |     if pretrained:
174 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
175 |     return model
176 | 
177 | 
178 | def resnet50(pretrained=False):
179 |     """Constructs a ResNet-50 model.
180 |     Args:
181 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
182 |     """
183 |     model = ResNet(Bottleneck, [3, 4, 6, 3])
184 |     if pretrained:
185 |         # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
186 |         model.load_state_dict(torch.load('../ModelFiles/ResNet/resnet50.pth'))
187 |     return model
188 | 
189 | 
190 | def resnet101(pretrained=False):
191 |     """Constructs a ResNet-101 model.
192 |     Args:
193 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
194 |     """
195 |     model = ResNet(Bottleneck, [3, 4, 23, 3])
196 |     if pretrained:
197 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
198 |     return model
199 | 
200 | 
201 | def resnet152(pretrained=False):
202 |     """Constructs a ResNet-152 model.
203 |     Args:
204 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
205 |     """
206 |     model = ResNet(Bottleneck, [3, 8, 36, 3])
207 |     if pretrained:
208 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
209 |     return model
210 | 


--------------------------------------------------------------------------------
/ModelFiles/UNet/UNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.init as init
 4 | import torch.nn.functional as F
 5 | 
 6 | from torch.utils import model_zoo
 7 | from torchvision import models
 8 | 
 9 | 
10 | class UNetEnc(nn.Module):
11 | 
12 |     def __init__(self, in_channels, features, out_channels):
13 |         super(UNetEnc, self).__init__()
14 | 
15 |         self.up = nn.Sequential(
16 |             nn.Conv2d(in_channels, features, 3, padding=1),
17 |             nn.ReLU(inplace=True),
18 |             nn.Conv2d(features, features, 3, padding=1),
19 |             nn.ReLU(inplace=True),
20 |             nn.ConvTranspose2d(features, out_channels, 2, stride=2),
21 |             nn.ReLU(inplace=True),
22 |         )
23 | 
24 |     def forward(self, x):
25 |         return self.up(x)
26 | 
27 | 
28 | class UNetDec(nn.Module):
29 | 
30 |     def __init__(self, in_channels, out_channels, dropout=False):
31 |         super(UNetDec, self).__init__()
32 | 
33 |         layers = [
34 |             nn.Conv2d(in_channels, out_channels, 3, padding=1),
35 |             nn.ReLU(inplace=True),
36 |             nn.Conv2d(out_channels, out_channels, 3, padding=1),
37 |             nn.ReLU(inplace=True),
38 |         ]
39 |         if dropout:
40 |             layers += [nn.Dropout(.5)]
41 |         layers += [nn.MaxPool2d(2, stride=2, ceil_mode=True)]
42 | 
43 |         self.down = nn.Sequential(*layers)
44 | 
45 |     def forward(self, x):
46 |         return self.down(x)
47 | 
48 | 
49 | class UNet(nn.Module):
50 | 
51 |     def __init__(self, num_classes):
52 |         super(UNet, self).__init__()
53 | 
54 |         self.dec1 = UNetDec(3, 64)
55 |         self.dec2 = UNetDec(64, 128)
56 |         self.dec3 = UNetDec(128, 256)
57 |         self.dec4 = UNetDec(256, 512, dropout=True)
58 |         self.center = nn.Sequential(
59 |             nn.Conv2d(512, 1024, 3, padding=1),
60 |             nn.ReLU(inplace=True),
61 |             nn.Conv2d(1024, 1024, 3, padding=1),
62 |             nn.ReLU(inplace=True),
63 |             nn.Dropout(),
64 |             nn.ConvTranspose2d(1024, 512, 2, stride=2),
65 |             nn.ReLU(inplace=True),
66 |         )
67 |         self.enc4 = UNetEnc(1024, 512, 256)
68 |         self.enc3 = UNetEnc(512, 256, 128)
69 |         self.enc2 = UNetEnc(256, 128, 64)
70 |         self.enc1 = nn.Sequential(
71 |             nn.Conv2d(128, 64, 3, padding=1),
72 |             nn.ReLU(inplace=True),
73 |             nn.Conv2d(64, 64, 3, padding=1),
74 |             nn.ReLU(inplace=True),
75 |         )
76 |         self.final = nn.Conv2d(64, num_classes, 1)
77 | 
78 |     def forward(self, x):
79 |         dec1 = self.dec1(x)
80 |         dec2 = self.dec2(dec1)
81 |         dec3 = self.dec3(dec2)
82 |         dec4 = self.dec4(dec3)
83 |         center = self.center(dec4)
84 | 
85 |         enc4 = self.enc4(torch.cat([
86 |             center, F.upsample_bilinear(dec4, scale_factor=center.size()[2] / dec4.size()[2])], 1))
87 |         enc3 = self.enc3(torch.cat([
88 |             enc4, F.upsample_bilinear(dec3, scale_factor=enc4.size()[2] / dec3.size()[2])], 1))
89 |         enc2 = self.enc2(torch.cat([
90 |             enc3, F.upsample_bilinear(dec2, scale_factor=enc3.size()[2] / dec2.size()[2])], 1))
91 |         enc1 = self.enc1(torch.cat([
92 |             enc2, F.upsample_bilinear(dec1, scale_factor=enc2.size()[2] / dec1.size()[2])], 1))
93 | 
94 |         return self.final(enc1)
95 | 


--------------------------------------------------------------------------------
/ModelFiles/_netG_1/build_face_dataset.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from multiprocessing import Pool
 3 | from PIL import Image
 4 | import numpy as np
 5 | import animeface
 6 | import sys
 7 | import os
 8 | 
 9 | 
10 | # im from PIL.Image.open, face_pos position object, margin
11 | def faceCrop(im,face_pos,m):
12 |     """
13 |     m is the relative margin added to the face image
14 |     """
15 |     x,y,w,h = face_pos.x, face_pos.y, face_pos.width, face_pos.height
16 |     sizeX, sizeY = im.size
17 |     new_x, new_y = max(0,x-m*w), max(0,y-m*h)
18 |     new_w = w + 2*m*w if sizeX > (new_x + w + 2*m*w) else sizeX - new_x
19 |     new_h = h + 2*m*h if sizeY > (new_y + h + 2*m*h) else sizeY - new_y
20 |     new_x,new_y,new_w,new_h = int(new_x),int(new_y),int(new_w),int(new_h)
21 |     return im.crop((new_x,new_y,new_x+new_w,new_y+new_h))
22 |     
23 | def min_resize_crop(im, min_side):
24 |     sizeX,sizeY = im.size
25 |     if sizeX > sizeY:
26 |         im = im.resize((min_side*sizeX/sizeY, min_side), Image.ANTIALIAS)
27 |     else:
28 |         im = im.resize((min_side, sizeY*min_side/sizeX), Image.ANTIALIAS)
29 |     return im.crop((0,0,min_side,min_side))
30 |     #return im
31 | 
32 | def load_detect(img_path):
33 |     """Read original image file, return the cropped face image in the size 96x96
34 | 
35 |     Input: A string indicates the image path
36 |     Output: Detected face image in the size 96x96
37 | 
38 |     Note that there might be multiple faces in one image, 
39 |     the output crossponding to the face with highest probability
40 |     """
41 |     im = Image.open(img_path)
42 |     faces = animeface.detect(im)
43 |     prob_list = []
44 |     len_f = len(faces)
45 |     if len_f == 0:
46 |         return 0
47 |     for i in range(len_f):
48 |         prob_list.append(faces[i].likelihood)
49 |     prob_array = np.array(prob_list)
50 |     idx = np.argmax(prob_array)
51 |     face_pos = faces[idx].face.pos
52 |     im = faceCrop(im, face_pos, 0.5)
53 |     return min_resize_crop(im, 96)
54 | 
55 | def process_img(img_path):
56 |     """
57 |     The face images are stored in {${pwd} + faces} 
58 |     """
59 |     tmp = img_path.split('/')
60 |     cls_name,img_name = tmp[len(tmp)-2], tmp[len(tmp)-1]
61 |     new_dir_path = os.path.join('faces',cls_name)
62 |     try:
63 |         os.makedirs(new_dir_path)
64 |     except OSError as err:
65 |         print("OS error: {0}".format(err))
66 | 
67 |     new_img_path = os.path.join(new_dir_path, img_name)
68 |     if os.path.exists(new_img_path):
69 |         return 0
70 |     im = load_detect(img_path)
71 |     # no faces in this image
72 |     if im == 0:
73 |         return 0
74 |     im.save(new_img_path, 'JPEG')
75 | 
76 | def try_process_img(img_path):
77 |     try:
78 |         process_img(img_path)
79 |     except:
80 |         e = sys.exc_info()[0]
81 |         print('Err: %s \n' % e)
82 | 
83 | # multiprocessing version
84 | def multi_construct_face_dataset(base_dir):
85 |     cls_dirs = [f for f in os.listdir(base_dir)]
86 |     imgs = []
87 |     for i in xrange(len(cls_dirs)):
88 |         sub_dir = os.path.join(base_dir, cls_dirs[i])
89 |         imgs_tmp = [os.path.join(sub_dir,f) for f in os.listdir(sub_dir) if f.endswith(('.jpg', '.png'))]
90 |         imgs = imgs + imgs_tmp
91 |     print('There are %d classes, %d images in total. \n' % (len(cls_dirs), len(imgs)))
92 |     pool = Pool(12) # 12 workers
93 |     pool.map(try_process_img, imgs)
94 | 
95 | 
96 | base_dir = '/home/jielei/gallery-dl/danbooru'
97 | multi_construct_face_dataset(base_dir)


--------------------------------------------------------------------------------
/ModelFiles/_netG_1/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import time
  4 | import random
  5 | import argparse
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.parallel
  9 | import torch.backends.cudnn as cudnn
 10 | import torch.optim as optim
 11 | import torch.utils.data
 12 | import torchvision.datasets as dset
 13 | import torchvision.transforms as transforms
 14 | import torchvision.utils as vutils
 15 | from torch.autograd import Variable
 16 | 
 17 | ### load project files
 18 | import models
 19 | from models import weights_init
 20 | 
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument('--dataRoot', required=True, help='path to dataset')
 23 | parser.add_argument('--workers', type=int, default=2, help='number of data loading workers')
 24 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size')
 25 | parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network')
 26 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector')
 27 | parser.add_argument('--ngf', type=int, default=64)
 28 | parser.add_argument('--ndf', type=int, default=64)
 29 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for')
 30 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002')
 31 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5')
 32 | parser.add_argument('--cuda'  , action='store_true', help='enables cuda')
 33 | parser.add_argument('--ngpu'  , type=int, default=1, help='number of GPUs to use')
 34 | parser.add_argument('--netG', default='', help="path to netG (to continue training)")
 35 | parser.add_argument('--netD', default='', help="path to netD (to continue training)")
 36 | parser.add_argument('--outDir', default='.', help='folder to output images and model checkpoints')
 37 | parser.add_argument('--model', type=int, default=1, help='1 for dcgan, 2 for illustrationGAN-like-GAN')
 38 | parser.add_argument('--d_labelSmooth', type=float, default=0, help='for D, use soft label "1-labelSmooth" for real samples')
 39 | parser.add_argument('--n_extra_layers_d', type=int, default=0, help='number of extra conv layers in D')
 40 | parser.add_argument('--n_extra_layers_g', type=int, default=1, help='number of extra conv layers in G')
 41 | parser.add_argument('--binary', action='store_true', help='z from bernoulli distribution, with prob=0.5')
 42 | 
 43 | # simply prefer this way
 44 | # arg_list = [
 45 | #     '--dataRoot', '/home/jielei/data/danbooru-faces',
 46 | #     '--workers', '12',
 47 | #     '--batchSize', '128',
 48 | #     '--imageSize', '64',
 49 | #     '--nz', '100',
 50 | #     '--ngf', '64',
 51 | #     '--ndf', '64',
 52 | #     '--niter', '80',
 53 | #     '--lr', '0.0002',
 54 | #     '--beta1', '0.5',
 55 | #     '--cuda', 
 56 | #     '--ngpu', '1',
 57 | #     '--netG', '',
 58 | #     '--netD', '',
 59 | #     '--outDir', './results',
 60 | #     '--model', '1',
 61 | #     '--d_labelSmooth', '0.1', # 0.25 from imporved-GAN paper 
 62 | #     '--n_extra_layers_d', '0',
 63 | #     '--n_extra_layers_g', '1', # in the sense that generator should be more powerful
 64 | # ]
 65 | 
 66 | args = parser.parse_args()
 67 | # opt = parser.parse_args(arg_list)
 68 | print(opt)
 69 | 
 70 | try:
 71 |     os.makedirs(opt.outDir)
 72 | except OSError:
 73 |     pass
 74 | 
 75 | opt.manualSeed = random.randint(1,10000) # fix seed, a scalar
 76 | random.seed(opt.manualSeed)
 77 | torch.manual_seed(opt.manualSeed)
 78 | 
 79 | cudnn.benchmark = True
 80 | 
 81 | if torch.cuda.is_available() and not opt.cuda:
 82 |     print("WARNING: You have a CUDA device, so you should probably run with --cuda")
 83 |     
 84 | nc = 3
 85 | ngpu = opt.ngpu
 86 | nz = opt.nz
 87 | ngf = opt.ngf
 88 | ndf = opt.ndf
 89 | n_extra_d = opt.n_extra_layers_d
 90 | n_extra_g = opt.n_extra_layers_g
 91 | 
 92 | dataset = dset.ImageFolder(
 93 |     root=opt.dataRoot,
 94 |     transform=transforms.Compose([
 95 |             transforms.Scale(opt.imageSize),
 96 |             # transforms.CenterCrop(opt.imageSize),
 97 |             transforms.ToTensor(),
 98 |             transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)), # bring images to (-1,1)
 99 |         ])
100 | )
101 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize,
102 |                                          shuffle=True, num_workers=opt.workers)
103 | 
104 | # load models 
105 | if opt.model == 1:
106 |     netG = models._netG_1(ngpu, nz, nc, ngf, n_extra_g)
107 |     netD = models._netD_1(ngpu, nz, nc, ndf, n_extra_d)
108 | elif opt.model == 2:
109 |     netG = models._netG_2(ngpu, nz, nc, ngf)
110 |     netD = models._netD_2(ngpu, nz, nc, ndf)
111 | 
112 | netG.apply(weights_init)
113 | if opt.netG != '':
114 |     netG.load_state_dict(torch.load(opt.netG))
115 | print(netG)
116 | 
117 | netD.apply(weights_init)
118 | if opt.netD != '':
119 |     netD.load_state_dict(torch.load(opt.netD))
120 | print(netD)
121 | 
122 | criterion = nn.BCELoss()
123 | criterion_MSE = nn.MSELoss()
124 | 
125 | input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize)
126 | noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
127 | if opt.binary:
128 |     bernoulli_prob = torch.FloatTensor(opt.batchSize, nz, 1, 1).fill_(0.5)
129 |     fixed_noise = torch.bernoulli(bernoulli_prob)
130 | else:
131 |     fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
132 | label = torch.FloatTensor(opt.batchSize)
133 | real_label = 1
134 | fake_label = 0
135 | 
136 | if opt.cuda:
137 |     netD.cuda()
138 |     netG.cuda()
139 |     criterion.cuda()
140 |     criterion_MSE.cuda()
141 |     input, label = input.cuda(), label.cuda()
142 |     noise, fixed_noise = noise.cuda(), fixed_noise.cuda()
143 |     
144 | input = Variable(input)
145 | label = Variable(label)
146 | noise = Variable(noise)
147 | fixed_noise = Variable(fixed_noise)
148 | 
149 | # setup optimizer
150 | optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
151 | optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999))
152 | 
153 | for epoch in range(opt.niter):
154 |     for i, data in enumerate(dataloader, 0):
155 |         start_iter = time.time()
156 |         ############################
157 |         # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
158 |         ###########################
159 |         # train with real
160 |         netD.zero_grad()
161 |         real_cpu, _ = data
162 |         batch_size = real_cpu.size(0)
163 |         input.data.resize_(real_cpu.size()).copy_(real_cpu)
164 |         label.data.resize_(batch_size).fill_(real_label - opt.d_labelSmooth) # use smooth label for discriminator
165 | 
166 |         output = netD(input)
167 |         errD_real = criterion(output, label)
168 |         errD_real.backward()
169 |         D_x = output.data.mean()
170 |         # train with fake
171 |         noise.data.resize_(batch_size, nz, 1, 1)
172 |         if opt.binary:
173 |             bernoulli_prob.resize_(noise.data.size())
174 |             noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5))
175 |         else:
176 |             noise.data.normal_(0, 1)
177 |         fake,z_prediction = netG(noise)
178 |         label.data.fill_(fake_label)
179 |         output = netD(fake.detach()) # add ".detach()" to avoid backprop through G
180 |         errD_fake = criterion(output, label)
181 |         errD_fake.backward() # gradients for fake/real will be accumulated
182 |         D_G_z1 = output.data.mean()
183 |         errD = errD_real + errD_fake
184 |         optimizerD.step() # .step() can be called once the gradients are computed
185 | 
186 |         ############################
187 |         # (2) Update G network: maximize log(D(G(z)))
188 |         ###########################
189 |         netG.zero_grad()
190 |         label.data.fill_(real_label) # fake labels are real for generator cost
191 |         output = netD(fake)
192 |         errG = criterion(output, label)
193 |         errG.backward(retain_variables=True) # True if backward through the graph for the second time
194 |         if opt.model == 2: # with z predictor
195 |             errG_z = criterion_MSE(z_prediction, noise)
196 |             errG_z.backward()
197 |         D_G_z2 = output.data.mean()
198 |         optimizerG.step()
199 |         
200 |         end_iter = time.time()
201 |         print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s'
202 |               % (epoch, opt.niter, i, len(dataloader),
203 |                  errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter))
204 |         if i % 100 == 0:
205 |             # the first 64 samples from the mini-batch are saved.
206 |             vutils.save_image(real_cpu[0:64,:,:,:],
207 |                     '%s/real_samples.png' % opt.outDir, nrow=8)
208 |             fake,_ = netG(fixed_noise)
209 |             vutils.save_image(fake.data[0:64,:,:,:],
210 |                     '%s/fake_samples_epoch_%03d.png' % (opt.outDir, epoch), nrow=8)
211 |     if epoch % 1 == 0:
212 |         # do checkpointing
213 |         torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outDir, epoch))
214 |         torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outDir, epoch))
215 | 


--------------------------------------------------------------------------------
/ModelFiles/_netG_1/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.parallel
  4 | 
  5 | 
  6 | 
  7 | def weights_init(m):
  8 |     classname = m.__class__.__name__
  9 |     if classname.find('Conv') != -1:
 10 |         m.weight.data.normal_(0.0, 0.02)
 11 |     elif classname.find('BatchNorm') != -1:
 12 |         m.weight.data.normal_(1.0, 0.02)
 13 |         m.bias.data.fill_(0)
 14 | 
 15 | # DCGAN model, fully convolutional architecture
 16 | class _netG_1(nn.Module):
 17 |     def __init__(self, ngpu, nz, nc , ngf, n_extra_layers_g):
 18 |         super(_netG_1, self).__init__()
 19 |         self.ngpu = ngpu
 20 |         #self.nz = nz
 21 |         #self.nc = nc
 22 |         #self.ngf = ngf
 23 |         main = nn.Sequential(
 24 |             # input is Z, going into a convolution
 25 |             # state size. nz x 1 x 1
 26 |             nn.ConvTranspose2d(     nz, ngf * 8, 4, 1, 0, bias=False),
 27 |             nn.BatchNorm2d(ngf * 8),
 28 |             nn.LeakyReLU(0.2, inplace=True),
 29 |             # state size. (ngf*8) x 4 x 4
 30 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
 31 |             nn.BatchNorm2d(ngf * 4),
 32 |             nn.LeakyReLU(0.2, inplace=True),
 33 |             # state size. (ngf*4) x 8 x 8
 34 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
 35 |             nn.BatchNorm2d(ngf * 2),
 36 |             nn.LeakyReLU(0.2, inplace=True),
 37 |             # state size. (ngf*2) x 16 x 16
 38 |             nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
 39 |             nn.BatchNorm2d(ngf),
 40 |             nn.LeakyReLU(0.2, inplace=True),
 41 |             # state size. (ngf) x 32 x 32
 42 |         )
 43 | 
 44 |         # Extra layers
 45 |         for t in range(n_extra_layers_g):
 46 |             main.add_module('extra-layers-{0}.{1}.conv'.format(t, ngf),
 47 |                             nn.Conv2d(ngf, ngf, 3, 1, 1, bias=False))
 48 |             main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, ngf),
 49 |                             nn.BatchNorm2d(ngf))
 50 |             main.add_module('extra-layers-{0}.{1}.relu'.format(t, ngf),
 51 |                             nn.LeakyReLU(0.2, inplace=True))
 52 | 
 53 |         main.add_module('final_layer.deconv', 
 54 |         	             nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False)) # 5,3,1 for 96x96
 55 |         main.add_module('final_layer.tanh', 
 56 |         	             nn.Tanh())
 57 |             # state size. (nc) x 96 x 96
 58 | 
 59 |         self.main = main
 60 | 
 61 | 
 62 |     def forward(self, input):
 63 |         # gpu_ids = None
 64 |         # if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
 65 |         #     gpu_ids = range(self.ngpu)
 66 |         # return nn.parallel.data_parallel(self.main, input, gpu_ids), 0
 67 |         return self.main(input)
 68 | 
 69 | class _netD_1(nn.Module):
 70 |     def __init__(self, ngpu, nz, nc, ndf,  n_extra_layers_d):
 71 |         super(_netD_1, self).__init__()
 72 |         self.ngpu = ngpu
 73 |         main = nn.Sequential(
 74 |             # input is (nc) x 96 x 96
 75 |             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), # 5,3,1 for 96x96
 76 |             nn.LeakyReLU(0.2, inplace=True),
 77 |             # state size. (ndf) x 32 x 32
 78 |             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
 79 |             nn.BatchNorm2d(ndf * 2),
 80 |             nn.LeakyReLU(0.2, inplace=True),
 81 |             # state size. (ndf*2) x 16 x 16
 82 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
 83 |             nn.BatchNorm2d(ndf * 4),
 84 |             nn.LeakyReLU(0.2, inplace=True),
 85 |             # state size. (ndf*4) x 8 x 8
 86 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
 87 |             nn.BatchNorm2d(ndf * 8),
 88 |             nn.LeakyReLU(0.2, inplace=True),
 89 |             # state size. (ndf*8) x 4 x 4
 90 |         )
 91 | 
 92 |         # Extra layers
 93 |         for t in range(n_extra_layers_d):
 94 |             main.add_module('extra-layers-{0}.{1}.conv'.format(t, ndf * 8),
 95 |                             nn.Conv2d(ndf * 8, ndf * 8, 3, 1, 1, bias=False))
 96 |             main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, ndf * 8),
 97 |                             nn.BatchNorm2d(ndf * 8))
 98 |             main.add_module('extra-layers-{0}.{1}.relu'.format(t, ndf * 8),
 99 |                             nn.LeakyReLU(0.2, inplace=True))
100 | 
101 | 
102 |         main.add_module('final_layers.conv', nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False))
103 |         main.add_module('final_layers.sigmoid', nn.Sigmoid())
104 |         # state size. 1 x 1 x 1
105 |         self.main = main
106 | 
107 |     def forward(self, input):
108 |         gpu_ids = None
109 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
110 |             gpu_ids = range(self.ngpu)
111 |         output = nn.parallel.data_parallel(self.main, input, gpu_ids)
112 |         return output.view(-1, 1)
113 | 
114 | 
115 | 
116 | 
117 | class _netD_2(nn.Module):
118 |     def __init__(self, ngpu, nz, nc , ndf):
119 |         super(_netD_2, self).__init__()
120 |         self.ngpu = ngpu
121 |         self.convs = nn.Sequential(
122 |             # input is (nc) x 96 x 96
123 |             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
124 |             nn.LeakyReLU(0.2, inplace=True),
125 |             # state size. (ndf) x 32 x 32
126 |             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
127 |             nn.BatchNorm2d(ndf * 2),
128 |             nn.LeakyReLU(0.2, inplace=True),
129 |             # state size. (ndf*2) x 16 x 16
130 |             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
131 |             nn.BatchNorm2d(ndf * 4),
132 |             nn.LeakyReLU(0.2, inplace=True),
133 |             # state size. (ndf*4) x 8 x 8
134 |             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
135 |             nn.BatchNorm2d(ndf * 8),
136 |             nn.LeakyReLU(0.2, inplace=True),
137 |             # state size. (ndf*8) x 4 x 4
138 |             nn.Conv2d(ndf * 8, 1024, 4, 1, 0, bias=False),
139 |             nn.LeakyReLU(inplace=True),
140 |             nn.Dropout(0.5),
141 |             # state size. 1024 x 1 x 1
142 |         )
143 |         self.fcs = nn.Sequential(
144 |             nn.Linear(1024, 1024),
145 |             nn.LeakyReLU(inplace=True),
146 |             nn.Dropout(0.5),
147 |             nn.Linear(1024, 1),            
148 |             nn.Sigmoid()
149 |             )
150 |     def forward(self, input):
151 |         gpu_ids = None
152 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
153 |             gpu_ids = range(self.ngpu)
154 |         output = nn.parallel.data_parallel(self.convs, input, gpu_ids)
155 |         output = self.fcs(output.view(-1,1024))
156 |         return output.view(-1, 1)
157 | 
158 | # with z decoder and fc layers
159 | class _netG_2(nn.Module):
160 |     def __init__(self, ngpu, nz, nc , ngf):
161 |         super(_netG_2, self).__init__()
162 |         self.ngpu = ngpu
163 |         self.nz = nz
164 |         self.fcs = nn.Sequential(
165 |             # input is Z, going into a convolution
166 |             # state size. nz x 1 x 1
167 |             nn.Linear(nz, 1024),
168 |             nn.ReLU(inplace=True),
169 |             nn.Dropout(0.5),
170 |             nn.Linear(1024, 1024),
171 |             nn.ReLU(inplace=True),
172 |             nn.Dropout(0.5),
173 |             )
174 |         
175 |         self.decode_fcs = nn.Sequential(
176 |             nn.Linear(1024, 1024),
177 |             nn.ReLU(inplace=True),
178 |             nn.Dropout(0.5),
179 |             nn.Linear(1024, nz),
180 |             )
181 | 
182 |         self.convs = nn.Sequential(
183 |             # 1024x1x1
184 |             nn.ConvTranspose2d(1024, ngf * 8, 4, 1, 0, bias=False),
185 |             nn.BatchNorm2d(ngf * 8),
186 |             nn.ReLU(inplace=True),
187 |             # state size. (ngf*8) x 4 x 4
188 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
189 |             nn.BatchNorm2d(ngf * 4),
190 |             nn.ReLU(inplace=True),
191 |             # state size. (ngf*4) x 8 x 8
192 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
193 |             nn.BatchNorm2d(ngf * 2),
194 |             nn.ReLU(inplace=True),
195 |             # state size. (ngf*2) x 16 x 16
196 |             nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
197 |             nn.BatchNorm2d(ngf),
198 |             nn.ReLU(inplace=True),
199 |             # state size. (ngf) x 32 x 32
200 |             nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
201 |             nn.Tanh()
202 |             # state size. (nc) x 96 x 96
203 |         )
204 |     def forward(self, input):
205 |         input = self.fcs(input.view(-1,self.nz))
206 |         gpu_ids = None
207 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
208 |             gpu_ids = range(self.ngpu)
209 |         z_prediction = self.decode_fcs(input)
210 |         input = input.view(-1,1024,1,1)
211 |         output = nn.parallel.data_parallel(self.convs, input, gpu_ids)
212 |         return output, z_prediction
213 | 
214 | 
215 | # DCGAN model with fc layers
216 | class _netG_3(nn.Module):
217 |     def __init__(self, ngpu, nz, nc , ngf):
218 |         super(_netG_3, self).__init__()
219 |         self.ngpu = ngpu
220 |         self.fcs = nn.Sequential(
221 |             # input is Z, going into a convolution
222 |             # state size. nz x 1 x 1
223 |             nn.Linear(nz, 1024),
224 |             nn.ReLU(inplace=True),
225 |             nn.Dropout(0.5),
226 |             nn.Linear(1024, 1024),
227 |             nn.ReLU(inplace=True),
228 |             nn.Dropout(0.5),
229 |             )
230 |         self.convs = nn.Sequential(
231 |             # 1024x1x1
232 |             nn.ConvTranspose2d(1024, ngf * 8, 4, 1, 0, bias=False),
233 |             nn.BatchNorm2d(ngf * 8),
234 |             nn.ReLU(inplace=True),
235 |             # state size. (ngf*8) x 4 x 4
236 |             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
237 |             nn.BatchNorm2d(ngf * 4),
238 |             nn.ReLU(inplace=True),
239 |             # state size. (ngf*4) x 8 x 8
240 |             nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
241 |             nn.BatchNorm2d(ngf * 2),
242 |             nn.ReLU(inplace=True),
243 |             # state size. (ngf*2) x 16 x 16
244 |             nn.ConvTranspose2d(ngf * 2,     ngf, 4, 2, 1, bias=False),
245 |             nn.BatchNorm2d(ngf),
246 |             nn.ReLU(inplace=True),
247 |             # state size. (ngf) x 32 x 32
248 |             nn.ConvTranspose2d(    ngf,      nc, 4, 2, 1, bias=False),
249 |             nn.Tanh()
250 |             # state size. (nc) x 96 x 96
251 |         )
252 |     def forward(self, input):
253 |         input = self.fcs(input.view(-1,nz))
254 |         gpu_ids = None
255 |         if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
256 |             gpu_ids = range(self.ngpu)
257 |         input = input.view(-1,1024,1,1)
258 |         return nn.parallel.data_parallel(self.convs, input, gpu_ids)
259 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Pytorch Converter
 2 | Pytorch model to Caffe &amp; [ncnn](https://github.com/Tencent/ncnn)
 3 | 
 4 | ## Model Examples
 5 |   - SqueezeNet from torchvision
 6 |   - DenseNet from torchvision
 7 |   - [ResNet50](https://drive.google.com/file/d/0B5B31rlbCRZfcS1rY3BtVWhDREk/view?usp=sharing) (with ceiling_mode=True)
 8 |   - MobileNet
 9 |   - AnimeGAN pretrained model from author (https://github.com/jayleicn/animeGAN)
10 |   - SSD-like object detection net(for ncnn)
11 |   - UNet (no pretrained model yet, just default initialization)
12 |         
13 | ## Attentions
14 |   - **Mind the difference on ceil_mode of pooling layer among Pytorch and Caffe, ncnn**
15 |     - You can convert Pytorch models with all pooling layer's ceil_mode=True.
16 |     - Or compile a custom version of Caffe/ncnn with floor() replaced by ceil() in pooling layer inference.
17 | 
18 |   - **Python Errors: Use Pytorch 0.2.0 Only to Convert Your Model**
19 |     - Higher version of pytorch 0.3.0, 0.3.1, 0.4.0 seemingly have blocked third party model conversion.
20 |     - Please note that you can still TRAIN your model on pytorch 0.3.0~0.4.0. The converter running on 0.2.0 could still load higher version models correctly.
21 | 
22 |   - **Other Python packages requirements:**
23 |     - to Caffe: numpy, protobuf (to gen caffe proto)
24 |     - to ncnn: numpy
25 |     - for testing Caffe result: pycaffe, cv2
26 | 
27 |   - **Model Loading Error**
28 |     - Use compatible model saving & loading method, e.g.    
29 | 
30 |       ```
31 |       # Saving, notice the difference on DataParallel
32 |       net_for_saving = net.module if use_nn_DataParallel else net
33 |       torch.save(net_for_saving.state_dict(), path)
34 |       
35 |       # Loading
36 |       net.load_state_dict(torch.load(path, map_location=lambda storge, loc: storage))
37 |       ```
38 | 


--------------------------------------------------------------------------------
/TestData/2008_000536.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_000536.jpg


--------------------------------------------------------------------------------
/TestData/2008_001171.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001171.jpg


--------------------------------------------------------------------------------
/TestData/2008_001601.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001601.jpg


--------------------------------------------------------------------------------
/TestData/2008_001841.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001841.jpg


--------------------------------------------------------------------------------
/TestData/227-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227-2.jpg


--------------------------------------------------------------------------------
/TestData/227-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227-3.jpg


--------------------------------------------------------------------------------
/TestData/227.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227.jpg


--------------------------------------------------------------------------------
/TestData/ImageNetLabels.txt:
--------------------------------------------------------------------------------
   1 | 0: 'tench, Tinca tinca',
   2 | 1: 'goldfish, Carassius auratus',
   3 | 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
   4 | 3: 'tiger shark, Galeocerdo cuvieri',
   5 | 4: 'hammerhead, hammerhead shark',
   6 | 5: 'electric ray, crampfish, numbfish, torpedo',
   7 | 6: 'stingray',
   8 | 7: 'cock',
   9 | 8: 'hen',
  10 | 9: 'ostrich, Struthio camelus',
  11 | 10: 'brambling, Fringilla montifringilla',
  12 | 11: 'goldfinch, Carduelis carduelis',
  13 | 12: 'house finch, linnet, Carpodacus mexicanus',
  14 | 13: 'junco, snowbird',
  15 | 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
  16 | 15: 'robin, American robin, Turdus migratorius',
  17 | 16: 'bulbul',
  18 | 17: 'jay',
  19 | 18: 'magpie',
  20 | 19: 'chickadee',
  21 | 20: 'water ouzel, dipper',
  22 | 21: 'kite',
  23 | 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
  24 | 23: 'vulture',
  25 | 24: 'great grey owl, great gray owl, Strix nebulosa',
  26 | 25: 'European fire salamander, Salamandra salamandra',
  27 | 26: 'common newt, Triturus vulgaris',
  28 | 27: 'eft',
  29 | 28: 'spotted salamander, Ambystoma maculatum',
  30 | 29: 'axolotl, mud puppy, Ambystoma mexicanum',
  31 | 30: 'bullfrog, Rana catesbeiana',
  32 | 31: 'tree frog, tree-frog',
  33 | 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
  34 | 33: 'loggerhead, loggerhead turtle, Caretta caretta',
  35 | 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
  36 | 35: 'mud turtle',
  37 | 36: 'terrapin',
  38 | 37: 'box turtle, box tortoise',
  39 | 38: 'banded gecko',
  40 | 39: 'common iguana, iguana, Iguana iguana',
  41 | 40: 'American chameleon, anole, Anolis carolinensis',
  42 | 41: 'whiptail, whiptail lizard',
  43 | 42: 'agama',
  44 | 43: 'frilled lizard, Chlamydosaurus kingi',
  45 | 44: 'alligator lizard',
  46 | 45: 'Gila monster, Heloderma suspectum',
  47 | 46: 'green lizard, Lacerta viridis',
  48 | 47: 'African chameleon, Chamaeleo chamaeleon',
  49 | 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
  50 | 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
  51 | 50: 'American alligator, Alligator mississipiensis',
  52 | 51: 'triceratops',
  53 | 52: 'thunder snake, worm snake, Carphophis amoenus',
  54 | 53: 'ringneck snake, ring-necked snake, ring snake',
  55 | 54: 'hognose snake, puff adder, sand viper',
  56 | 55: 'green snake, grass snake',
  57 | 56: 'king snake, kingsnake',
  58 | 57: 'garter snake, grass snake',
  59 | 58: 'water snake',
  60 | 59: 'vine snake',
  61 | 60: 'night snake, Hypsiglena torquata',
  62 | 61: 'boa constrictor, Constrictor constrictor',
  63 | 62: 'rock python, rock snake, Python sebae',
  64 | 63: 'Indian cobra, Naja naja',
  65 | 64: 'green mamba',
  66 | 65: 'sea snake',
  67 | 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
  68 | 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
  69 | 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
  70 | 69: 'trilobite',
  71 | 70: 'harvestman, daddy longlegs, Phalangium opilio',
  72 | 71: 'scorpion',
  73 | 72: 'black and gold garden spider, Argiope aurantia',
  74 | 73: 'barn spider, Araneus cavaticus',
  75 | 74: 'garden spider, Aranea diademata',
  76 | 75: 'black widow, Latrodectus mactans',
  77 | 76: 'tarantula',
  78 | 77: 'wolf spider, hunting spider',
  79 | 78: 'tick',
  80 | 79: 'centipede',
  81 | 80: 'black grouse',
  82 | 81: 'ptarmigan',
  83 | 82: 'ruffed grouse, partridge, Bonasa umbellus',
  84 | 83: 'prairie chicken, prairie grouse, prairie fowl',
  85 | 84: 'peacock',
  86 | 85: 'quail',
  87 | 86: 'partridge',
  88 | 87: 'African grey, African gray, Psittacus erithacus',
  89 | 88: 'macaw',
  90 | 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
  91 | 90: 'lorikeet',
  92 | 91: 'coucal',
  93 | 92: 'bee eater',
  94 | 93: 'hornbill',
  95 | 94: 'hummingbird',
  96 | 95: 'jacamar',
  97 | 96: 'toucan',
  98 | 97: 'drake',
  99 | 98: 'red-breasted merganser, Mergus serrator',
 100 | 99: 'goose',
 101 | 100: 'black swan, Cygnus atratus',
 102 | 101: 'tusker',
 103 | 102: 'echidna, spiny anteater, anteater',
 104 | 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
 105 | 104: 'wallaby, brush kangaroo',
 106 | 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
 107 | 106: 'wombat',
 108 | 107: 'jellyfish',
 109 | 108: 'sea anemone, anemone',
 110 | 109: 'brain coral',
 111 | 110: 'flatworm, platyhelminth',
 112 | 111: 'nematode, nematode worm, roundworm',
 113 | 112: 'conch',
 114 | 113: 'snail',
 115 | 114: 'slug',
 116 | 115: 'sea slug, nudibranch',
 117 | 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
 118 | 117: 'chambered nautilus, pearly nautilus, nautilus',
 119 | 118: 'Dungeness crab, Cancer magister',
 120 | 119: 'rock crab, Cancer irroratus',
 121 | 120: 'fiddler crab',
 122 | 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
 123 | 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
 124 | 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
 125 | 124: 'crayfish, crawfish, crawdad, crawdaddy',
 126 | 125: 'hermit crab',
 127 | 126: 'isopod',
 128 | 127: 'white stork, Ciconia ciconia',
 129 | 128: 'black stork, Ciconia nigra',
 130 | 129: 'spoonbill',
 131 | 130: 'flamingo',
 132 | 131: 'little blue heron, Egretta caerulea',
 133 | 132: 'American egret, great white heron, Egretta albus',
 134 | 133: 'bittern',
 135 | 134: 'crane',
 136 | 135: 'limpkin, Aramus pictus',
 137 | 136: 'European gallinule, Porphyrio porphyrio',
 138 | 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
 139 | 138: 'bustard',
 140 | 139: 'ruddy turnstone, Arenaria interpres',
 141 | 140: 'red-backed sandpiper, dunlin, Erolia alpina',
 142 | 141: 'redshank, Tringa totanus',
 143 | 142: 'dowitcher',
 144 | 143: 'oystercatcher, oyster catcher',
 145 | 144: 'pelican',
 146 | 145: 'king penguin, Aptenodytes patagonica',
 147 | 146: 'albatross, mollymawk',
 148 | 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
 149 | 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
 150 | 149: 'dugong, Dugong dugon',
 151 | 150: 'sea lion',
 152 | 151: 'Chihuahua',
 153 | 152: 'Japanese spaniel',
 154 | 153: 'Maltese dog, Maltese terrier, Maltese',
 155 | 154: 'Pekinese, Pekingese, Peke',
 156 | 155: 'Shih-Tzu',
 157 | 156: 'Blenheim spaniel',
 158 | 157: 'papillon',
 159 | 158: 'toy terrier',
 160 | 159: 'Rhodesian ridgeback',
 161 | 160: 'Afghan hound, Afghan',
 162 | 161: 'basset, basset hound',
 163 | 162: 'beagle',
 164 | 163: 'bloodhound, sleuthhound',
 165 | 164: 'bluetick',
 166 | 165: 'black-and-tan coonhound',
 167 | 166: 'Walker hound, Walker foxhound',
 168 | 167: 'English foxhound',
 169 | 168: 'redbone',
 170 | 169: 'borzoi, Russian wolfhound',
 171 | 170: 'Irish wolfhound',
 172 | 171: 'Italian greyhound',
 173 | 172: 'whippet',
 174 | 173: 'Ibizan hound, Ibizan Podenco',
 175 | 174: 'Norwegian elkhound, elkhound',
 176 | 175: 'otterhound, otter hound',
 177 | 176: 'Saluki, gazelle hound',
 178 | 177: 'Scottish deerhound, deerhound',
 179 | 178: 'Weimaraner',
 180 | 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
 181 | 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
 182 | 181: 'Bedlington terrier',
 183 | 182: 'Border terrier',
 184 | 183: 'Kerry blue terrier',
 185 | 184: 'Irish terrier',
 186 | 185: 'Norfolk terrier',
 187 | 186: 'Norwich terrier',
 188 | 187: 'Yorkshire terrier',
 189 | 188: 'wire-haired fox terrier',
 190 | 189: 'Lakeland terrier',
 191 | 190: 'Sealyham terrier, Sealyham',
 192 | 191: 'Airedale, Airedale terrier',
 193 | 192: 'cairn, cairn terrier',
 194 | 193: 'Australian terrier',
 195 | 194: 'Dandie Dinmont, Dandie Dinmont terrier',
 196 | 195: 'Boston bull, Boston terrier',
 197 | 196: 'miniature schnauzer',
 198 | 197: 'giant schnauzer',
 199 | 198: 'standard schnauzer',
 200 | 199: 'Scotch terrier, Scottish terrier, Scottie',
 201 | 200: 'Tibetan terrier, chrysanthemum dog',
 202 | 201: 'silky terrier, Sydney silky',
 203 | 202: 'soft-coated wheaten terrier',
 204 | 203: 'West Highland white terrier',
 205 | 204: 'Lhasa, Lhasa apso',
 206 | 205: 'flat-coated retriever',
 207 | 206: 'curly-coated retriever',
 208 | 207: 'golden retriever',
 209 | 208: 'Labrador retriever',
 210 | 209: 'Chesapeake Bay retriever',
 211 | 210: 'German short-haired pointer',
 212 | 211: 'vizsla, Hungarian pointer',
 213 | 212: 'English setter',
 214 | 213: 'Irish setter, red setter',
 215 | 214: 'Gordon setter',
 216 | 215: 'Brittany spaniel',
 217 | 216: 'clumber, clumber spaniel',
 218 | 217: 'English springer, English springer spaniel',
 219 | 218: 'Welsh springer spaniel',
 220 | 219: 'cocker spaniel, English cocker spaniel, cocker',
 221 | 220: 'Sussex spaniel',
 222 | 221: 'Irish water spaniel',
 223 | 222: 'kuvasz',
 224 | 223: 'schipperke',
 225 | 224: 'groenendael',
 226 | 225: 'malinois',
 227 | 226: 'briard',
 228 | 227: 'kelpie',
 229 | 228: 'komondor',
 230 | 229: 'Old English sheepdog, bobtail',
 231 | 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
 232 | 231: 'collie',
 233 | 232: 'Border collie',
 234 | 233: 'Bouvier des Flandres, Bouviers des Flandres',
 235 | 234: 'Rottweiler',
 236 | 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
 237 | 236: 'Doberman, Doberman pinscher',
 238 | 237: 'miniature pinscher',
 239 | 238: 'Greater Swiss Mountain dog',
 240 | 239: 'Bernese mountain dog',
 241 | 240: 'Appenzeller',
 242 | 241: 'EntleBucher',
 243 | 242: 'boxer',
 244 | 243: 'bull mastiff',
 245 | 244: 'Tibetan mastiff',
 246 | 245: 'French bulldog',
 247 | 246: 'Great Dane',
 248 | 247: 'Saint Bernard, St Bernard',
 249 | 248: 'Eskimo dog, husky',
 250 | 249: 'malamute, malemute, Alaskan malamute',
 251 | 250: 'Siberian husky',
 252 | 251: 'dalmatian, coach dog, carriage dog',
 253 | 252: 'affenpinscher, monkey pinscher, monkey dog',
 254 | 253: 'basenji',
 255 | 254: 'pug, pug-dog',
 256 | 255: 'Leonberg',
 257 | 256: 'Newfoundland, Newfoundland dog',
 258 | 257: 'Great Pyrenees',
 259 | 258: 'Samoyed, Samoyede',
 260 | 259: 'Pomeranian',
 261 | 260: 'chow, chow chow',
 262 | 261: 'keeshond',
 263 | 262: 'Brabancon griffon',
 264 | 263: 'Pembroke, Pembroke Welsh corgi',
 265 | 264: 'Cardigan, Cardigan Welsh corgi',
 266 | 265: 'toy poodle',
 267 | 266: 'miniature poodle',
 268 | 267: 'standard poodle',
 269 | 268: 'Mexican hairless',
 270 | 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
 271 | 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
 272 | 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
 273 | 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
 274 | 273: 'dingo, warrigal, warragal, Canis dingo',
 275 | 274: 'dhole, Cuon alpinus',
 276 | 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
 277 | 276: 'hyena, hyaena',
 278 | 277: 'red fox, Vulpes vulpes',
 279 | 278: 'kit fox, Vulpes macrotis',
 280 | 279: 'Arctic fox, white fox, Alopex lagopus',
 281 | 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
 282 | 281: 'tabby, tabby cat',
 283 | 282: 'tiger cat',
 284 | 283: 'Persian cat',
 285 | 284: 'Siamese cat, Siamese',
 286 | 285: 'Egyptian cat',
 287 | 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
 288 | 287: 'lynx, catamount',
 289 | 288: 'leopard, Panthera pardus',
 290 | 289: 'snow leopard, ounce, Panthera uncia',
 291 | 290: 'jaguar, panther, Panthera onca, Felis onca',
 292 | 291: 'lion, king of beasts, Panthera leo',
 293 | 292: 'tiger, Panthera tigris',
 294 | 293: 'cheetah, chetah, Acinonyx jubatus',
 295 | 294: 'brown bear, bruin, Ursus arctos',
 296 | 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
 297 | 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
 298 | 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
 299 | 298: 'mongoose',
 300 | 299: 'meerkat, mierkat',
 301 | 300: 'tiger beetle',
 302 | 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
 303 | 302: 'ground beetle, carabid beetle',
 304 | 303: 'long-horned beetle, longicorn, longicorn beetle',
 305 | 304: 'leaf beetle, chrysomelid',
 306 | 305: 'dung beetle',
 307 | 306: 'rhinoceros beetle',
 308 | 307: 'weevil',
 309 | 308: 'fly',
 310 | 309: 'bee',
 311 | 310: 'ant, emmet, pismire',
 312 | 311: 'grasshopper, hopper',
 313 | 312: 'cricket',
 314 | 313: 'walking stick, walkingstick, stick insect',
 315 | 314: 'cockroach, roach',
 316 | 315: 'mantis, mantid',
 317 | 316: 'cicada, cicala',
 318 | 317: 'leafhopper',
 319 | 318: 'lacewing, lacewing fly',
 320 | 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
 321 | 320: 'damselfly',
 322 | 321: 'admiral',
 323 | 322: 'ringlet, ringlet butterfly',
 324 | 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
 325 | 324: 'cabbage butterfly',
 326 | 325: 'sulphur butterfly, sulfur butterfly',
 327 | 326: 'lycaenid, lycaenid butterfly',
 328 | 327: 'starfish, sea star',
 329 | 328: 'sea urchin',
 330 | 329: 'sea cucumber, holothurian',
 331 | 330: 'wood rabbit, cottontail, cottontail rabbit',
 332 | 331: 'hare',
 333 | 332: 'Angora, Angora rabbit',
 334 | 333: 'hamster',
 335 | 334: 'porcupine, hedgehog',
 336 | 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
 337 | 336: 'marmot',
 338 | 337: 'beaver',
 339 | 338: 'guinea pig, Cavia cobaya',
 340 | 339: 'sorrel',
 341 | 340: 'zebra',
 342 | 341: 'hog, pig, grunter, squealer, Sus scrofa',
 343 | 342: 'wild boar, boar, Sus scrofa',
 344 | 343: 'warthog',
 345 | 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
 346 | 345: 'ox',
 347 | 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
 348 | 347: 'bison',
 349 | 348: 'ram, tup',
 350 | 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
 351 | 350: 'ibex, Capra ibex',
 352 | 351: 'hartebeest',
 353 | 352: 'impala, Aepyceros melampus',
 354 | 353: 'gazelle',
 355 | 354: 'Arabian camel, dromedary, Camelus dromedarius',
 356 | 355: 'llama',
 357 | 356: 'weasel',
 358 | 357: 'mink',
 359 | 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
 360 | 359: 'black-footed ferret, ferret, Mustela nigripes',
 361 | 360: 'otter',
 362 | 361: 'skunk, polecat, wood pussy',
 363 | 362: 'badger',
 364 | 363: 'armadillo',
 365 | 364: 'three-toed sloth, ai, Bradypus tridactylus',
 366 | 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
 367 | 366: 'gorilla, Gorilla gorilla',
 368 | 367: 'chimpanzee, chimp, Pan troglodytes',
 369 | 368: 'gibbon, Hylobates lar',
 370 | 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
 371 | 370: 'guenon, guenon monkey',
 372 | 371: 'patas, hussar monkey, Erythrocebus patas',
 373 | 372: 'baboon',
 374 | 373: 'macaque',
 375 | 374: 'langur',
 376 | 375: 'colobus, colobus monkey',
 377 | 376: 'proboscis monkey, Nasalis larvatus',
 378 | 377: 'marmoset',
 379 | 378: 'capuchin, ringtail, Cebus capucinus',
 380 | 379: 'howler monkey, howler',
 381 | 380: 'titi, titi monkey',
 382 | 381: 'spider monkey, Ateles geoffroyi',
 383 | 382: 'squirrel monkey, Saimiri sciureus',
 384 | 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
 385 | 384: 'indri, indris, Indri indri, Indri brevicaudatus',
 386 | 385: 'Indian elephant, Elephas maximus',
 387 | 386: 'African elephant, Loxodonta africana',
 388 | 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
 389 | 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
 390 | 389: 'barracouta, snoek',
 391 | 390: 'eel',
 392 | 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
 393 | 392: 'rock beauty, Holocanthus tricolor',
 394 | 393: 'anemone fish',
 395 | 394: 'sturgeon',
 396 | 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
 397 | 396: 'lionfish',
 398 | 397: 'puffer, pufferfish, blowfish, globefish',
 399 | 398: 'abacus',
 400 | 399: 'abaya',
 401 | 400: "academic gown, academic robe, judge's robe",
 402 | 401: 'accordion, piano accordion, squeeze box',
 403 | 402: 'acoustic guitar',
 404 | 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
 405 | 404: 'airliner',
 406 | 405: 'airship, dirigible',
 407 | 406: 'altar',
 408 | 407: 'ambulance',
 409 | 408: 'amphibian, amphibious vehicle',
 410 | 409: 'analog clock',
 411 | 410: 'apiary, bee house',
 412 | 411: 'apron',
 413 | 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
 414 | 413: 'assault rifle, assault gun',
 415 | 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
 416 | 415: 'bakery, bakeshop, bakehouse',
 417 | 416: 'balance beam, beam',
 418 | 417: 'balloon',
 419 | 418: 'ballpoint, ballpoint pen, ballpen, Biro',
 420 | 419: 'Band Aid',
 421 | 420: 'banjo',
 422 | 421: 'bannister, banister, balustrade, balusters, handrail',
 423 | 422: 'barbell',
 424 | 423: 'barber chair',
 425 | 424: 'barbershop',
 426 | 425: 'barn',
 427 | 426: 'barometer',
 428 | 427: 'barrel, cask',
 429 | 428: 'barrow, garden cart, lawn cart, wheelbarrow',
 430 | 429: 'baseball',
 431 | 430: 'basketball',
 432 | 431: 'bassinet',
 433 | 432: 'bassoon',
 434 | 433: 'bathing cap, swimming cap',
 435 | 434: 'bath towel',
 436 | 435: 'bathtub, bathing tub, bath, tub',
 437 | 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
 438 | 437: 'beacon, lighthouse, beacon light, pharos',
 439 | 438: 'beaker',
 440 | 439: 'bearskin, busby, shako',
 441 | 440: 'beer bottle',
 442 | 441: 'beer glass',
 443 | 442: 'bell cote, bell cot',
 444 | 443: 'bib',
 445 | 444: 'bicycle-built-for-two, tandem bicycle, tandem',
 446 | 445: 'bikini, two-piece',
 447 | 446: 'binder, ring-binder',
 448 | 447: 'binoculars, field glasses, opera glasses',
 449 | 448: 'birdhouse',
 450 | 449: 'boathouse',
 451 | 450: 'bobsled, bobsleigh, bob',
 452 | 451: 'bolo tie, bolo, bola tie, bola',
 453 | 452: 'bonnet, poke bonnet',
 454 | 453: 'bookcase',
 455 | 454: 'bookshop, bookstore, bookstall',
 456 | 455: 'bottlecap',
 457 | 456: 'bow',
 458 | 457: 'bow tie, bow-tie, bowtie',
 459 | 458: 'brass, memorial tablet, plaque',
 460 | 459: 'brassiere, bra, bandeau',
 461 | 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
 462 | 461: 'breastplate, aegis, egis',
 463 | 462: 'broom',
 464 | 463: 'bucket, pail',
 465 | 464: 'buckle',
 466 | 465: 'bulletproof vest',
 467 | 466: 'bullet train, bullet',
 468 | 467: 'butcher shop, meat market',
 469 | 468: 'cab, hack, taxi, taxicab',
 470 | 469: 'caldron, cauldron',
 471 | 470: 'candle, taper, wax light',
 472 | 471: 'cannon',
 473 | 472: 'canoe',
 474 | 473: 'can opener, tin opener',
 475 | 474: 'cardigan',
 476 | 475: 'car mirror',
 477 | 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
 478 | 477: "carpenter's kit, tool kit",
 479 | 478: 'carton',
 480 | 479: 'car wheel',
 481 | 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
 482 | 481: 'cassette',
 483 | 482: 'cassette player',
 484 | 483: 'castle',
 485 | 484: 'catamaran',
 486 | 485: 'CD player',
 487 | 486: 'cello, violoncello',
 488 | 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
 489 | 488: 'chain',
 490 | 489: 'chainlink fence',
 491 | 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
 492 | 491: 'chain saw, chainsaw',
 493 | 492: 'chest',
 494 | 493: 'chiffonier, commode',
 495 | 494: 'chime, bell, gong',
 496 | 495: 'china cabinet, china closet',
 497 | 496: 'Christmas stocking',
 498 | 497: 'church, church building',
 499 | 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
 500 | 499: 'cleaver, meat cleaver, chopper',
 501 | 500: 'cliff dwelling',
 502 | 501: 'cloak',
 503 | 502: 'clog, geta, patten, sabot',
 504 | 503: 'cocktail shaker',
 505 | 504: 'coffee mug',
 506 | 505: 'coffeepot',
 507 | 506: 'coil, spiral, volute, whorl, helix',
 508 | 507: 'combination lock',
 509 | 508: 'computer keyboard, keypad',
 510 | 509: 'confectionery, confectionary, candy store',
 511 | 510: 'container ship, containership, container vessel',
 512 | 511: 'convertible',
 513 | 512: 'corkscrew, bottle screw',
 514 | 513: 'cornet, horn, trumpet, trump',
 515 | 514: 'cowboy boot',
 516 | 515: 'cowboy hat, ten-gallon hat',
 517 | 516: 'cradle',
 518 | 517: 'crane',
 519 | 518: 'crash helmet',
 520 | 519: 'crate',
 521 | 520: 'crib, cot',
 522 | 521: 'Crock Pot',
 523 | 522: 'croquet ball',
 524 | 523: 'crutch',
 525 | 524: 'cuirass',
 526 | 525: 'dam, dike, dyke',
 527 | 526: 'desk',
 528 | 527: 'desktop computer',
 529 | 528: 'dial telephone, dial phone',
 530 | 529: 'diaper, nappy, napkin',
 531 | 530: 'digital clock',
 532 | 531: 'digital watch',
 533 | 532: 'dining table, board',
 534 | 533: 'dishrag, dishcloth',
 535 | 534: 'dishwasher, dish washer, dishwashing machine',
 536 | 535: 'disk brake, disc brake',
 537 | 536: 'dock, dockage, docking facility',
 538 | 537: 'dogsled, dog sled, dog sleigh',
 539 | 538: 'dome',
 540 | 539: 'doormat, welcome mat',
 541 | 540: 'drilling platform, offshore rig',
 542 | 541: 'drum, membranophone, tympan',
 543 | 542: 'drumstick',
 544 | 543: 'dumbbell',
 545 | 544: 'Dutch oven',
 546 | 545: 'electric fan, blower',
 547 | 546: 'electric guitar',
 548 | 547: 'electric locomotive',
 549 | 548: 'entertainment center',
 550 | 549: 'envelope',
 551 | 550: 'espresso maker',
 552 | 551: 'face powder',
 553 | 552: 'feather boa, boa',
 554 | 553: 'file, file cabinet, filing cabinet',
 555 | 554: 'fireboat',
 556 | 555: 'fire engine, fire truck',
 557 | 556: 'fire screen, fireguard',
 558 | 557: 'flagpole, flagstaff',
 559 | 558: 'flute, transverse flute',
 560 | 559: 'folding chair',
 561 | 560: 'football helmet',
 562 | 561: 'forklift',
 563 | 562: 'fountain',
 564 | 563: 'fountain pen',
 565 | 564: 'four-poster',
 566 | 565: 'freight car',
 567 | 566: 'French horn, horn',
 568 | 567: 'frying pan, frypan, skillet',
 569 | 568: 'fur coat',
 570 | 569: 'garbage truck, dustcart',
 571 | 570: 'gasmask, respirator, gas helmet',
 572 | 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
 573 | 572: 'goblet',
 574 | 573: 'go-kart',
 575 | 574: 'golf ball',
 576 | 575: 'golfcart, golf cart',
 577 | 576: 'gondola',
 578 | 577: 'gong, tam-tam',
 579 | 578: 'gown',
 580 | 579: 'grand piano, grand',
 581 | 580: 'greenhouse, nursery, glasshouse',
 582 | 581: 'grille, radiator grille',
 583 | 582: 'grocery store, grocery, food market, market',
 584 | 583: 'guillotine',
 585 | 584: 'hair slide',
 586 | 585: 'hair spray',
 587 | 586: 'half track',
 588 | 587: 'hammer',
 589 | 588: 'hamper',
 590 | 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
 591 | 590: 'hand-held computer, hand-held microcomputer',
 592 | 591: 'handkerchief, hankie, hanky, hankey',
 593 | 592: 'hard disc, hard disk, fixed disk',
 594 | 593: 'harmonica, mouth organ, harp, mouth harp',
 595 | 594: 'harp',
 596 | 595: 'harvester, reaper',
 597 | 596: 'hatchet',
 598 | 597: 'holster',
 599 | 598: 'home theater, home theatre',
 600 | 599: 'honeycomb',
 601 | 600: 'hook, claw',
 602 | 601: 'hoopskirt, crinoline',
 603 | 602: 'horizontal bar, high bar',
 604 | 603: 'horse cart, horse-cart',
 605 | 604: 'hourglass',
 606 | 605: 'iPod',
 607 | 606: 'iron, smoothing iron',
 608 | 607: "jack-o'-lantern",
 609 | 608: 'jean, blue jean, denim',
 610 | 609: 'jeep, landrover',
 611 | 610: 'jersey, T-shirt, tee shirt',
 612 | 611: 'jigsaw puzzle',
 613 | 612: 'jinrikisha, ricksha, rickshaw',
 614 | 613: 'joystick',
 615 | 614: 'kimono',
 616 | 615: 'knee pad',
 617 | 616: 'knot',
 618 | 617: 'lab coat, laboratory coat',
 619 | 618: 'ladle',
 620 | 619: 'lampshade, lamp shade',
 621 | 620: 'laptop, laptop computer',
 622 | 621: 'lawn mower, mower',
 623 | 622: 'lens cap, lens cover',
 624 | 623: 'letter opener, paper knife, paperknife',
 625 | 624: 'library',
 626 | 625: 'lifeboat',
 627 | 626: 'lighter, light, igniter, ignitor',
 628 | 627: 'limousine, limo',
 629 | 628: 'liner, ocean liner',
 630 | 629: 'lipstick, lip rouge',
 631 | 630: 'Loafer',
 632 | 631: 'lotion',
 633 | 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
 634 | 633: "loupe, jeweler's loupe",
 635 | 634: 'lumbermill, sawmill',
 636 | 635: 'magnetic compass',
 637 | 636: 'mailbag, postbag',
 638 | 637: 'mailbox, letter box',
 639 | 638: 'maillot',
 640 | 639: 'maillot, tank suit',
 641 | 640: 'manhole cover',
 642 | 641: 'maraca',
 643 | 642: 'marimba, xylophone',
 644 | 643: 'mask',
 645 | 644: 'matchstick',
 646 | 645: 'maypole',
 647 | 646: 'maze, labyrinth',
 648 | 647: 'measuring cup',
 649 | 648: 'medicine chest, medicine cabinet',
 650 | 649: 'megalith, megalithic structure',
 651 | 650: 'microphone, mike',
 652 | 651: 'microwave, microwave oven',
 653 | 652: 'military uniform',
 654 | 653: 'milk can',
 655 | 654: 'minibus',
 656 | 655: 'miniskirt, mini',
 657 | 656: 'minivan',
 658 | 657: 'missile',
 659 | 658: 'mitten',
 660 | 659: 'mixing bowl',
 661 | 660: 'mobile home, manufactured home',
 662 | 661: 'Model T',
 663 | 662: 'modem',
 664 | 663: 'monastery',
 665 | 664: 'monitor',
 666 | 665: 'moped',
 667 | 666: 'mortar',
 668 | 667: 'mortarboard',
 669 | 668: 'mosque',
 670 | 669: 'mosquito net',
 671 | 670: 'motor scooter, scooter',
 672 | 671: 'mountain bike, all-terrain bike, off-roader',
 673 | 672: 'mountain tent',
 674 | 673: 'mouse, computer mouse',
 675 | 674: 'mousetrap',
 676 | 675: 'moving van',
 677 | 676: 'muzzle',
 678 | 677: 'nail',
 679 | 678: 'neck brace',
 680 | 679: 'necklace',
 681 | 680: 'nipple',
 682 | 681: 'notebook, notebook computer',
 683 | 682: 'obelisk',
 684 | 683: 'oboe, hautboy, hautbois',
 685 | 684: 'ocarina, sweet potato',
 686 | 685: 'odometer, hodometer, mileometer, milometer',
 687 | 686: 'oil filter',
 688 | 687: 'organ, pipe organ',
 689 | 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
 690 | 689: 'overskirt',
 691 | 690: 'oxcart',
 692 | 691: 'oxygen mask',
 693 | 692: 'packet',
 694 | 693: 'paddle, boat paddle',
 695 | 694: 'paddlewheel, paddle wheel',
 696 | 695: 'padlock',
 697 | 696: 'paintbrush',
 698 | 697: "pajama, pyjama, pj's, jammies",
 699 | 698: 'palace',
 700 | 699: 'panpipe, pandean pipe, syrinx',
 701 | 700: 'paper towel',
 702 | 701: 'parachute, chute',
 703 | 702: 'parallel bars, bars',
 704 | 703: 'park bench',
 705 | 704: 'parking meter',
 706 | 705: 'passenger car, coach, carriage',
 707 | 706: 'patio, terrace',
 708 | 707: 'pay-phone, pay-station',
 709 | 708: 'pedestal, plinth, footstall',
 710 | 709: 'pencil box, pencil case',
 711 | 710: 'pencil sharpener',
 712 | 711: 'perfume, essence',
 713 | 712: 'Petri dish',
 714 | 713: 'photocopier',
 715 | 714: 'pick, plectrum, plectron',
 716 | 715: 'pickelhaube',
 717 | 716: 'picket fence, paling',
 718 | 717: 'pickup, pickup truck',
 719 | 718: 'pier',
 720 | 719: 'piggy bank, penny bank',
 721 | 720: 'pill bottle',
 722 | 721: 'pillow',
 723 | 722: 'ping-pong ball',
 724 | 723: 'pinwheel',
 725 | 724: 'pirate, pirate ship',
 726 | 725: 'pitcher, ewer',
 727 | 726: "plane, carpenter's plane, woodworking plane",
 728 | 727: 'planetarium',
 729 | 728: 'plastic bag',
 730 | 729: 'plate rack',
 731 | 730: 'plow, plough',
 732 | 731: "plunger, plumber's helper",
 733 | 732: 'Polaroid camera, Polaroid Land camera',
 734 | 733: 'pole',
 735 | 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
 736 | 735: 'poncho',
 737 | 736: 'pool table, billiard table, snooker table',
 738 | 737: 'pop bottle, soda bottle',
 739 | 738: 'pot, flowerpot',
 740 | 739: "potter's wheel",
 741 | 740: 'power drill',
 742 | 741: 'prayer rug, prayer mat',
 743 | 742: 'printer',
 744 | 743: 'prison, prison house',
 745 | 744: 'projectile, missile',
 746 | 745: 'projector',
 747 | 746: 'puck, hockey puck',
 748 | 747: 'punching bag, punch bag, punching ball, punchball',
 749 | 748: 'purse',
 750 | 749: 'quill, quill pen',
 751 | 750: 'quilt, comforter, comfort, puff',
 752 | 751: 'racer, race car, racing car',
 753 | 752: 'racket, racquet',
 754 | 753: 'radiator',
 755 | 754: 'radio, wireless',
 756 | 755: 'radio telescope, radio reflector',
 757 | 756: 'rain barrel',
 758 | 757: 'recreational vehicle, RV, R.V.',
 759 | 758: 'reel',
 760 | 759: 'reflex camera',
 761 | 760: 'refrigerator, icebox',
 762 | 761: 'remote control, remote',
 763 | 762: 'restaurant, eating house, eating place, eatery',
 764 | 763: 'revolver, six-gun, six-shooter',
 765 | 764: 'rifle',
 766 | 765: 'rocking chair, rocker',
 767 | 766: 'rotisserie',
 768 | 767: 'rubber eraser, rubber, pencil eraser',
 769 | 768: 'rugby ball',
 770 | 769: 'rule, ruler',
 771 | 770: 'running shoe',
 772 | 771: 'safe',
 773 | 772: 'safety pin',
 774 | 773: 'saltshaker, salt shaker',
 775 | 774: 'sandal',
 776 | 775: 'sarong',
 777 | 776: 'sax, saxophone',
 778 | 777: 'scabbard',
 779 | 778: 'scale, weighing machine',
 780 | 779: 'school bus',
 781 | 780: 'schooner',
 782 | 781: 'scoreboard',
 783 | 782: 'screen, CRT screen',
 784 | 783: 'screw',
 785 | 784: 'screwdriver',
 786 | 785: 'seat belt, seatbelt',
 787 | 786: 'sewing machine',
 788 | 787: 'shield, buckler',
 789 | 788: 'shoe shop, shoe-shop, shoe store',
 790 | 789: 'shoji',
 791 | 790: 'shopping basket',
 792 | 791: 'shopping cart',
 793 | 792: 'shovel',
 794 | 793: 'shower cap',
 795 | 794: 'shower curtain',
 796 | 795: 'ski',
 797 | 796: 'ski mask',
 798 | 797: 'sleeping bag',
 799 | 798: 'slide rule, slipstick',
 800 | 799: 'sliding door',
 801 | 800: 'slot, one-armed bandit',
 802 | 801: 'snorkel',
 803 | 802: 'snowmobile',
 804 | 803: 'snowplow, snowplough',
 805 | 804: 'soap dispenser',
 806 | 805: 'soccer ball',
 807 | 806: 'sock',
 808 | 807: 'solar dish, solar collector, solar furnace',
 809 | 808: 'sombrero',
 810 | 809: 'soup bowl',
 811 | 810: 'space bar',
 812 | 811: 'space heater',
 813 | 812: 'space shuttle',
 814 | 813: 'spatula',
 815 | 814: 'speedboat',
 816 | 815: "spider web, spider's web",
 817 | 816: 'spindle',
 818 | 817: 'sports car, sport car',
 819 | 818: 'spotlight, spot',
 820 | 819: 'stage',
 821 | 820: 'steam locomotive',
 822 | 821: 'steel arch bridge',
 823 | 822: 'steel drum',
 824 | 823: 'stethoscope',
 825 | 824: 'stole',
 826 | 825: 'stone wall',
 827 | 826: 'stopwatch, stop watch',
 828 | 827: 'stove',
 829 | 828: 'strainer',
 830 | 829: 'streetcar, tram, tramcar, trolley, trolley car',
 831 | 830: 'stretcher',
 832 | 831: 'studio couch, day bed',
 833 | 832: 'stupa, tope',
 834 | 833: 'submarine, pigboat, sub, U-boat',
 835 | 834: 'suit, suit of clothes',
 836 | 835: 'sundial',
 837 | 836: 'sunglass',
 838 | 837: 'sunglasses, dark glasses, shades',
 839 | 838: 'sunscreen, sunblock, sun blocker',
 840 | 839: 'suspension bridge',
 841 | 840: 'swab, swob, mop',
 842 | 841: 'sweatshirt',
 843 | 842: 'swimming trunks, bathing trunks',
 844 | 843: 'swing',
 845 | 844: 'switch, electric switch, electrical switch',
 846 | 845: 'syringe',
 847 | 846: 'table lamp',
 848 | 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
 849 | 848: 'tape player',
 850 | 849: 'teapot',
 851 | 850: 'teddy, teddy bear',
 852 | 851: 'television, television system',
 853 | 852: 'tennis ball',
 854 | 853: 'thatch, thatched roof',
 855 | 854: 'theater curtain, theatre curtain',
 856 | 855: 'thimble',
 857 | 856: 'thresher, thrasher, threshing machine',
 858 | 857: 'throne',
 859 | 858: 'tile roof',
 860 | 859: 'toaster',
 861 | 860: 'tobacco shop, tobacconist shop, tobacconist',
 862 | 861: 'toilet seat',
 863 | 862: 'torch',
 864 | 863: 'totem pole',
 865 | 864: 'tow truck, tow car, wrecker',
 866 | 865: 'toyshop',
 867 | 866: 'tractor',
 868 | 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
 869 | 868: 'tray',
 870 | 869: 'trench coat',
 871 | 870: 'tricycle, trike, velocipede',
 872 | 871: 'trimaran',
 873 | 872: 'tripod',
 874 | 873: 'triumphal arch',
 875 | 874: 'trolleybus, trolley coach, trackless trolley',
 876 | 875: 'trombone',
 877 | 876: 'tub, vat',
 878 | 877: 'turnstile',
 879 | 878: 'typewriter keyboard',
 880 | 879: 'umbrella',
 881 | 880: 'unicycle, monocycle',
 882 | 881: 'upright, upright piano',
 883 | 882: 'vacuum, vacuum cleaner',
 884 | 883: 'vase',
 885 | 884: 'vault',
 886 | 885: 'velvet',
 887 | 886: 'vending machine',
 888 | 887: 'vestment',
 889 | 888: 'viaduct',
 890 | 889: 'violin, fiddle',
 891 | 890: 'volleyball',
 892 | 891: 'waffle iron',
 893 | 892: 'wall clock',
 894 | 893: 'wallet, billfold, notecase, pocketbook',
 895 | 894: 'wardrobe, closet, press',
 896 | 895: 'warplane, military plane',
 897 | 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
 898 | 897: 'washer, automatic washer, washing machine',
 899 | 898: 'water bottle',
 900 | 899: 'water jug',
 901 | 900: 'water tower',
 902 | 901: 'whiskey jug',
 903 | 902: 'whistle',
 904 | 903: 'wig',
 905 | 904: 'window screen',
 906 | 905: 'window shade',
 907 | 906: 'Windsor tie',
 908 | 907: 'wine bottle',
 909 | 908: 'wing',
 910 | 909: 'wok',
 911 | 910: 'wooden spoon',
 912 | 911: 'wool, woolen, woollen',
 913 | 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
 914 | 913: 'wreck',
 915 | 914: 'yawl',
 916 | 915: 'yurt',
 917 | 916: 'web site, website, internet site, site',
 918 | 917: 'comic book',
 919 | 918: 'crossword puzzle, crossword',
 920 | 919: 'street sign',
 921 | 920: 'traffic light, traffic signal, stoplight',
 922 | 921: 'book jacket, dust cover, dust jacket, dust wrapper',
 923 | 922: 'menu',
 924 | 923: 'plate',
 925 | 924: 'guacamole',
 926 | 925: 'consomme',
 927 | 926: 'hot pot, hotpot',
 928 | 927: 'trifle',
 929 | 928: 'ice cream, icecream',
 930 | 929: 'ice lolly, lolly, lollipop, popsicle',
 931 | 930: 'French loaf',
 932 | 931: 'bagel, beigel',
 933 | 932: 'pretzel',
 934 | 933: 'cheeseburger',
 935 | 934: 'hotdog, hot dog, red hot',
 936 | 935: 'mashed potato',
 937 | 936: 'head cabbage',
 938 | 937: 'broccoli',
 939 | 938: 'cauliflower',
 940 | 939: 'zucchini, courgette',
 941 | 940: 'spaghetti squash',
 942 | 941: 'acorn squash',
 943 | 942: 'butternut squash',
 944 | 943: 'cucumber, cuke',
 945 | 944: 'artichoke, globe artichoke',
 946 | 945: 'bell pepper',
 947 | 946: 'cardoon',
 948 | 947: 'mushroom',
 949 | 948: 'Granny Smith',
 950 | 949: 'strawberry',
 951 | 950: 'orange',
 952 | 951: 'lemon',
 953 | 952: 'fig',
 954 | 953: 'pineapple, ananas',
 955 | 954: 'banana',
 956 | 955: 'jackfruit, jak, jack',
 957 | 956: 'custard apple',
 958 | 957: 'pomegranate',
 959 | 958: 'hay',
 960 | 959: 'carbonara',
 961 | 960: 'chocolate sauce, chocolate syrup',
 962 | 961: 'dough',
 963 | 962: 'meat loaf, meatloaf',
 964 | 963: 'pizza, pizza pie',
 965 | 964: 'potpie',
 966 | 965: 'burrito',
 967 | 966: 'red wine',
 968 | 967: 'espresso',
 969 | 968: 'cup',
 970 | 969: 'eggnog',
 971 | 970: 'alp',
 972 | 971: 'bubble',
 973 | 972: 'cliff, drop, drop-off',
 974 | 973: 'coral reef',
 975 | 974: 'geyser',
 976 | 975: 'lakeside, lakeshore',
 977 | 976: 'promontory, headland, head, foreland',
 978 | 977: 'sandbar, sand bar',
 979 | 978: 'seashore, coast, seacoast, sea-coast',
 980 | 979: 'valley, vale',
 981 | 980: 'volcano',
 982 | 981: 'ballplayer, baseball player',
 983 | 982: 'groom, bridegroom',
 984 | 983: 'scuba diver',
 985 | 984: 'rapeseed',
 986 | 985: 'daisy',
 987 | 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
 988 | 987: 'corn',
 989 | 988: 'acorn',
 990 | 989: 'hip, rose hip, rosehip',
 991 | 990: 'buckeye, horse chestnut, conker',
 992 | 991: 'coral fungus',
 993 | 992: 'agaric',
 994 | 993: 'gyromitra',
 995 | 994: 'stinkhorn, carrion fungus',
 996 | 995: 'earthstar',
 997 | 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
 998 | 997: 'bolete',
 999 | 998: 'ear, spike, capitulum',
1000 | 999: 'toilet tissue, toilet paper, bathroom tissue'


--------------------------------------------------------------------------------
/code/ConvertLayer_caffe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2017-present, starime.
  3 | All rights reserved.
  4 | 
  5 | This source code is licensed under the BSD-style license found in the
  6 | LICENSE file in the root directory of this source tree. An additional grant
  7 | of patent rights can be found in the PATENTS file in the same directory.
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | import caffe_pb2 as pb2
 13 | 
 14 | 
 15 | def as_blob(array):
 16 |     blob = pb2.BlobProto()
 17 |     blob.shape.dim.extend(array.shape)
 18 |     blob.data.extend(array.astype(float).flat)
 19 |     return blob
 20 | 
 21 | 
 22 | def CopyTuple(param):
 23 |     if isinstance(param, tuple):
 24 |         return param
 25 |     elif isinstance(param, int):
 26 |         return param, param
 27 |     else:
 28 |         assert type(param)
 29 | 
 30 | 
 31 | def ty(caffe_type):
 32 |     def f(_):
 33 |         layer = pb2.LayerParameter()
 34 |         layer.type = caffe_type
 35 |         return layer
 36 |     return f
 37 | 
 38 | 
 39 | def data(inputs):
 40 |     layer = pb2.LayerParameter()
 41 |     layer.type = 'Input'
 42 |     input_shape = pb2.BlobShape()
 43 |     input_shape.dim.extend(inputs.data.numpy().shape)
 44 |     layer.input_param.shape.extend([input_shape])
 45 |     return layer
 46 | 
 47 | 
 48 | def Slice(pytorch_layer):
 49 |     layer = pb2.LayerParameter()
 50 |     layer.type = "Slice"
 51 | 
 52 |     layer.slice_param.axis = pytorch_layer.axis
 53 |     layer.slice_param.slice_point.extend(pytorch_layer.slice_point)
 54 |     return layer
 55 | 
 56 | 
 57 | def inner_product(pytorch_layer):
 58 |     layer = pb2.LayerParameter()
 59 |     layer.type = "InnerProduct"
 60 | 
 61 |     blobs_weight = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.data.numpy()
 62 |     num_output = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.size(0)
 63 |     layer.inner_product_param.num_output = num_output
 64 | 
 65 |     if pytorch_layer.next_functions[0][0]:
 66 |         layer.inner_product_param.bias_term = True
 67 |         bias = pytorch_layer.next_functions[0][0].variable.data.numpy()
 68 |         layer.blobs.extend([as_blob(blobs_weight), as_blob(bias)])
 69 |     else:
 70 |         layer.inner_product_param.bias_term = False
 71 |         layer.blobs.extend([as_blob(blobs_weight)])
 72 | 
 73 |     return layer
 74 | 
 75 | 
 76 | def concat(pytorch_layer):
 77 |     layer = pb2.LayerParameter()
 78 |     layer.type = "Concat"
 79 |     layer.concat_param.axis = int(pytorch_layer.dim)
 80 |     return layer
 81 | 
 82 | 
 83 | def flatten(pytorch_layer):
 84 |     """ Only support flatten view """
 85 |     total = 1
 86 |     for dim in pytorch_layer.old_size:
 87 |         total *= dim
 88 |     assert ((pytorch_layer.new_sizes[1] == total) or (pytorch_layer.new_sizes[1] == -1))
 89 | 
 90 |     layer = pb2.LayerParameter()
 91 |     layer.type = "Flatten"
 92 |     return layer
 93 | 
 94 | 
 95 | def spatial_convolution(pytorch_layer):
 96 |     layer = pb2.LayerParameter()
 97 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
 98 |     assert len(blobs_weight.shape) == 4, blobs_weight.shape
 99 |     (nOutputPlane, nInputPlane, kH, kW) = blobs_weight.shape
100 | 
101 |     padH = pytorch_layer.padding[0]
102 |     padW = pytorch_layer.padding[1]
103 |     dH = pytorch_layer.stride[0]
104 |     dW = pytorch_layer.stride[1]
105 |     dilation = pytorch_layer.dilation[0]
106 | 
107 |     if pytorch_layer.transposed:
108 |         layer.type = "Deconvolution"
109 |         layer.convolution_param.num_output = nInputPlane
110 |     else:
111 |         layer.type = "Convolution"
112 |         layer.convolution_param.num_output = nOutputPlane
113 | 
114 |     if kH == kW:
115 |         layer.convolution_param.kernel_size.extend([kH])
116 |     else:
117 |         layer.convolution_param.kernel_h = kH
118 |         layer.convolution_param.kernel_w = kW
119 |     if dH == dW:
120 |         layer.convolution_param.stride.extend([dH])
121 |     else:
122 |         layer.convolution_param.stride_h = dH
123 |         layer.convolution_param.stride_w = dW
124 |     if padH == padW:
125 |         layer.convolution_param.pad.extend([padH])
126 |     else:
127 |         layer.convolution_param.pad_h = padH
128 |         layer.convolution_param.pad_w = padW
129 |     layer.convolution_param.dilation.extend([dilation])
130 |     layer.convolution_param.group = pytorch_layer.groups
131 | 
132 |     if pytorch_layer.next_functions[2][0]:
133 |         layer.convolution_param.bias_term = True
134 |         bias = pytorch_layer.next_functions[2][0].variable.data.numpy()
135 |         layer.blobs.extend([as_blob(blobs_weight), as_blob(bias)])
136 |     else:
137 |         layer.convolution_param.bias_term = False
138 |         layer.blobs.extend([as_blob(blobs_weight)])
139 | 
140 |     return layer
141 | 
142 | 
143 | def FillBilinear(ch, k):
144 |     blob = np.zeros(shape=(ch, 1, k, k))
145 | 
146 |     """ Create bilinear weights in numpy array """
147 |     bilinear_kernel = np.zeros([k, k], dtype=np.float32)
148 |     scale_factor = (k + 1) // 2
149 |     if k % 2 == 1:
150 |         center = scale_factor - 1
151 |     else:
152 |         center = scale_factor - 0.5
153 |     for x in range(k):
154 |         for y in range(k):
155 |             bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor)
156 | 
157 |     for i in range(ch):
158 |         blob[i, 0, :, :] = bilinear_kernel
159 |     return blob
160 | 
161 | 
162 | def UpsampleBilinear(pytorch_layer):
163 |     layer = pb2.LayerParameter()
164 |     layer.type = "Deconvolution"
165 | 
166 |     assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1]
167 |     factor = int(pytorch_layer.scale_factor[0])
168 |     c = int(pytorch_layer.input_size[1])
169 |     k = 2 * factor - factor % 2
170 | 
171 |     layer.convolution_param.num_output = c
172 |     layer.convolution_param.kernel_size.extend([k])
173 |     layer.convolution_param.stride.extend([factor])
174 |     layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))])
175 |     layer.convolution_param.group = c
176 |     layer.convolution_param.weight_filler.type = 'bilinear'
177 |     layer.convolution_param.bias_term = False
178 | 
179 |     learning_param = pb2.ParamSpec()
180 |     learning_param.lr_mult = 0
181 |     learning_param.decay_mult = 0
182 |     layer.param.extend([learning_param])
183 | 
184 |     """ Init weight blob of filter kernel """
185 |     blobs_weight = FillBilinear(c, k)
186 |     layer.blobs.extend([as_blob(blobs_weight)])
187 | 
188 |     return layer
189 | 
190 | 
191 | def CopyPoolingParameter(pytorch_layer, layer):
192 | 
193 |     kH, kW = CopyTuple(pytorch_layer.kernel_size)
194 |     dH, dW = CopyTuple(pytorch_layer.stride)
195 |     padH, padW = CopyTuple(pytorch_layer.padding)
196 | 
197 |     if kH == kW:
198 |         layer.pooling_param.kernel_size = kH
199 |     else:
200 |         layer.pooling_param.kernel_h = kH
201 |         layer.pooling_param.kernel_w = kW
202 |     if dH == dW:
203 |         layer.pooling_param.stride = dH
204 |     else:
205 |         layer.pooling_param.stride_h = dH
206 |         layer.pooling_param.stride_w = dW
207 |     if padH == padW:
208 |         layer.pooling_param.pad = padH
209 |     else:
210 |         layer.pooling_param.pad_h = padH
211 |         layer.pooling_param.pad_w = padW
212 | 
213 |     if pytorch_layer.ceil_mode is True:
214 |         return
215 | 
216 |     if pytorch_layer.ceil_mode is False:
217 |         if padH == padW:
218 |             if dH > 1 and padH > 0:
219 |                 layer.pooling_param.pad = padH - 1
220 |         else:
221 |             if dH > 1 and padH > 0:
222 |                 layer.pooling_param.pad_h = padH - 1
223 |             if dW > 1 and padW > 0:
224 |                 layer.pooling_param.pad_w = padW - 1
225 | 
226 | 
227 | def MaxPooling(pytorch_layer):
228 |     layer = pb2.LayerParameter()
229 |     layer.type = "Pooling"
230 |     layer.pooling_param.pool = pb2.PoolingParameter.MAX
231 |     CopyPoolingParameter(pytorch_layer, layer)
232 |     return layer
233 | 
234 | 
235 | def AvgPooling(pytorch_layer):
236 |     layer = pb2.LayerParameter()
237 |     layer.type = "Pooling"
238 |     layer.pooling_param.pool = pb2.PoolingParameter.AVE
239 |     CopyPoolingParameter(pytorch_layer, layer)
240 |     return layer
241 | 
242 | 
243 | def dropout(pytorch_layer):
244 |     layer = pb2.LayerParameter()
245 |     layer.type = "Dropout"
246 |     layer.dropout_param.dropout_ratio = float(pytorch_layer.p)
247 |     train_only = pb2.NetStateRule()
248 |     train_only.phase = pb2.TEST
249 |     layer.exclude.extend([train_only])
250 |     return layer
251 | 
252 | 
253 | def elu(pytorch_layer):
254 |     layer = pb2.LayerParameter()
255 |     layer.type = "ELU"
256 |     layer.elu_param.alpha = pytorch_layer.additional_args[0]
257 |     return layer
258 | 
259 | 
260 | def leaky_ReLU(pytorch_layer):
261 |     layer = pb2.LayerParameter()
262 |     layer.type = "ReLU"
263 |     layer.relu_param.negative_slope = float(pytorch_layer.additional_args[0])
264 |     return layer
265 | 
266 | 
267 | def PReLU(pytorch_layer):
268 |     layer = pb2.LayerParameter()
269 |     layer.type = "PReLU"
270 |     num_parameters = int(pytorch_layer.num_parameters)
271 |     layer.prelu_param.channel_shared = True if num_parameters == 1 else False
272 | 
273 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
274 |     layer.blobs.extend([as_blob(blobs_weight)])
275 |     return layer
276 | 
277 | 
278 | def MulConst(pytorch_layer):
279 |     layer = pb2.LayerParameter()
280 |     layer.type = "Power"
281 |     layer.power_param.power = 1
282 |     layer.power_param.scale = float(pytorch_layer.constant)
283 |     layer.power_param.shift = 0
284 |     return layer
285 | 
286 | 
287 | def AddConst(pytorch_layer):
288 |     layer = pb2.LayerParameter()
289 |     layer.type = "Power"
290 |     layer.power_param.power = 1
291 |     layer.power_param.scale = 1
292 |     """ Constant to add should be filled by hand, since not visible in autograd """
293 |     layer.power_param.shift = float('inf')
294 |     return layer
295 | 
296 | 
297 | def softmax(pytorch_layer):
298 |     layer = pb2.LayerParameter()
299 |     layer.type = 'Softmax'
300 |     return layer
301 | 
302 | 
303 | def eltwise(pytorch_layer):
304 |     layer = pb2.LayerParameter()
305 |     layer.type = "Eltwise"
306 |     return layer
307 | 
308 | 
309 | def eltwise_max(pytorch_layer):
310 |     layer = pb2.LayerParameter()
311 |     layer.type = "Eltwise"
312 |     layer.eltwise_param.operation = 2
313 |     return layer
314 | 
315 | 
316 | def batchnorm(pytorch_layer):
317 |     layer_bn = pb2.LayerParameter()
318 |     layer_bn.type = "BatchNorm"
319 | 
320 |     layer_bn.batch_norm_param.use_global_stats = 1
321 |     layer_bn.batch_norm_param.eps = pytorch_layer.eps
322 |     layer_bn.blobs.extend([
323 |         as_blob(pytorch_layer.running_mean.numpy()),
324 |         as_blob(pytorch_layer.running_var.numpy()),
325 |         as_blob(np.array([1.]))
326 |     ])
327 | 
328 |     layer_scale = pb2.LayerParameter()
329 |     layer_scale.type = "Scale"
330 | 
331 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
332 | 
333 |     if pytorch_layer.next_functions[2][0]:
334 |         layer_scale.scale_param.bias_term = True
335 |         bias = pytorch_layer.next_functions[2][0].variable.data.numpy()
336 |         layer_scale.blobs.extend([as_blob(blobs_weight), as_blob(bias)])
337 |     else:
338 |         layer_scale.scale_param.bias_term = False
339 |         layer_scale.blobs.extend([as_blob(blobs_weight)])
340 | 
341 |     return [layer_bn, layer_scale]
342 | 
343 | 
344 | def build_converter(opts):
345 |     return {
346 |         'data': data,
347 |         'Addmm': inner_product,
348 |         'Threshold': ty('ReLU'),
349 |         'ConvNd': spatial_convolution,
350 |         'MaxPool2d': MaxPooling,
351 |         'AvgPool2d': AvgPooling,
352 |         'Add': eltwise,
353 |         'Cmax': eltwise_max,
354 |         'BatchNorm': batchnorm,
355 |         'Concat': concat,
356 |         'Dropout': dropout,
357 |         'UpsamplingBilinear2d': UpsampleBilinear,
358 |         'MulConstant': MulConst,
359 |         'AddConstant': AddConst,
360 |         'Softmax': softmax,
361 |         'Sigmoid': ty('Sigmoid'),
362 |         'Tanh': ty('TanH'),
363 |         'ELU': elu,
364 |         'LeakyReLU': leaky_ReLU,
365 |         'PReLU': PReLU,
366 |         'Slice': Slice,
367 |         'View': flatten,
368 |     }
369 | 
370 | 
371 | def convert_caffe(opts, typename, pytorch_layer):
372 |     converter = build_converter(opts)
373 |     if typename not in converter:
374 |         raise ValueError("Unknown layer type: {}, known types: {}".format(
375 |             typename, converter.keys()))
376 |     return converter[typename](pytorch_layer)
377 | 


--------------------------------------------------------------------------------
/code/ConvertLayer_ncnn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2017-present, starime.
  3 | All rights reserved.
  4 | 
  5 | This source code is licensed under the BSD-style license found in the
  6 | LICENSE file in the root directory of this source tree. An additional grant
  7 | of patent rights can be found in the PATENTS file in the same directory.
  8 | """
  9 | 
 10 | import math
 11 | import numpy as np
 12 | 
 13 | 
 14 | class LayerParameter_ncnn(object):
 15 | 
 16 |     def __init__(self):
 17 |         self.type = ''
 18 |         self.param = []
 19 |         self.weights = []
 20 | 
 21 | 
 22 | def CopyTuple(param):
 23 |     if isinstance(param, tuple):
 24 |         return param
 25 |     elif isinstance(param, int):
 26 |         return param, param
 27 |     else:
 28 |         assert type(param)
 29 | 
 30 | 
 31 | def ty(ncnn_type):
 32 |     def f(_):
 33 |         layer = LayerParameter_ncnn()
 34 |         layer.type = ncnn_type
 35 |         return layer
 36 |     return f
 37 | 
 38 | 
 39 | def data(inputs):
 40 |     layer = LayerParameter_ncnn()
 41 |     layer.type = 'Input'
 42 | 
 43 |     input_shape = inputs.data.numpy().shape
 44 |     for dim in range(1, 4):
 45 |         if dim - 1 < len(input_shape):
 46 |             size = input_shape[dim]
 47 |         else:
 48 |             size = -233
 49 |         layer.param.append('%ld' % size)
 50 |     return layer
 51 | 
 52 | 
 53 | def Slice(pytorch_layer):
 54 |     layer = LayerParameter_ncnn()
 55 |     layer.type = 'Slice'
 56 | 
 57 |     # """ ncnn only support slicing on channel dimension """
 58 |     # assert pytorch_layer.axis == 1
 59 | 
 60 |     layer.param = {}
 61 |     num_slice = len(pytorch_layer.slice_point) + 1
 62 |     slice_param = ('%d' % num_slice)
 63 |     prev_offset = 0
 64 |     for p in pytorch_layer.slice_point:
 65 |         offset = p
 66 |         slice_param += (',%d' % (offset - prev_offset))
 67 |         prev_offset = offset
 68 |     slice_param += (',%d' % -233)
 69 | 
 70 |     layer.param['-23300'] = slice_param
 71 |     layer.param['1'] = ('%d' % (pytorch_layer.axis - 1))
 72 | 
 73 |     return layer
 74 | 
 75 | 
 76 | def Split(pytorch_layer):
 77 |     layer = LayerParameter_ncnn()
 78 |     layer.type = 'Split'
 79 | 
 80 |     return layer
 81 | 
 82 | 
 83 | def permute(pytorch_layer):
 84 |     layer = LayerParameter_ncnn()
 85 |     layer.type = 'Permute'
 86 |     assert len(pytorch_layer.rev_dim_indices) == 4, len(pytorch_layer.rev_dim_indices)
 87 |     assert pytorch_layer.rev_dim_indices[0] == 0, pytorch_layer.rev_dim_indices[0]
 88 | 
 89 |     """ order_type details at src/layer/permute.cpp """
 90 |     h, w, c = pytorch_layer.rev_dim_indices[1], pytorch_layer.rev_dim_indices[2], pytorch_layer.rev_dim_indices[3]
 91 |     order_type = 0
 92 |     if c == 1 and h == 2 and w == 3:
 93 |         order_type = 0
 94 |     elif c == 1 and h == 3 and w == 2:
 95 |         order_type = 1
 96 |     elif c == 2 and h == 1 and w == 3:
 97 |         order_type = 2
 98 |     elif c == 2 and h == 3 and w == 1:
 99 |         order_type = 3
100 |     elif c == 3 and h == 1 and w == 2:
101 |         order_type = 4
102 |     elif c == 3 and h == 2 and w == 1:
103 |         order_type = 5
104 | 
105 |     layer.param.append('%d' % order_type)
106 |     return layer
107 | 
108 | 
109 | def flatten(pytorch_layer):
110 |     """ Only support flatten view """
111 |     total = 1
112 |     for dim in pytorch_layer.old_size:
113 |         total *= dim
114 |     assert ((pytorch_layer.new_sizes[1] == total) or (pytorch_layer.new_sizes[1] == -1))
115 | 
116 |     layer = LayerParameter_ncnn()
117 |     layer.type = "Flatten"
118 |     return layer
119 | 
120 | 
121 | def inner_product(pytorch_layer):
122 |     layer = LayerParameter_ncnn()
123 |     layer.type = 'InnerProduct'
124 | 
125 |     blobs_weight = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.data.numpy()
126 |     num_output = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.size(0)
127 |     layer.param.append('%d' % num_output)
128 | 
129 |     if pytorch_layer.next_functions[0][0]:
130 |         layer.param.append('%d' % True)
131 |         bias = pytorch_layer.next_functions[0][0].variable.data.numpy()
132 |         layer.param.append('%d' % blobs_weight.size)
133 |         layer.weights.append(np.array([0.]))
134 |         layer.weights.append(blobs_weight)
135 |         layer.weights.append(bias)
136 |     else:
137 |         layer.param.append('%d' % False)
138 |         layer.param.append('%d' % blobs_weight.size)
139 |         layer.weights.append(np.array([0.]))
140 |         layer.weights.append(blobs_weight)
141 | 
142 |     return layer
143 | 
144 | 
145 | def concat(pytorch_layer):
146 |     layer = LayerParameter_ncnn()
147 |     axis = int(pytorch_layer.dim)
148 |     layer.type = 'Concat'
149 |     if (axis == 1):
150 |         pass
151 |     else:
152 |         dim = axis - 1 if axis >= 1 else 0
153 |         layer.param.append('%d' % dim)
154 |     return layer
155 | 
156 | 
157 | def spatial_convolution(pytorch_layer):
158 |     layer = LayerParameter_ncnn()
159 | 
160 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
161 |     assert len(blobs_weight.shape) == 4, blobs_weight.shape
162 |     (nOutputPlane, nInputPlane, kH, kW) = blobs_weight.shape
163 | 
164 |     padH = pytorch_layer.padding[0]
165 |     padW = pytorch_layer.padding[1]
166 |     dH = pytorch_layer.stride[0]
167 |     dW = pytorch_layer.stride[1]
168 |     dilation = pytorch_layer.dilation[0]
169 |     groups = pytorch_layer.groups
170 | 
171 |     if pytorch_layer.transposed:
172 |         layer.type = 'Deconvolution'
173 |         layer.param.append('%d' % nInputPlane)
174 | 
175 |         """ ncnn: Need to swap input dim and output dim """
176 |         blobs_weight = np.swapaxes(blobs_weight, 0, 1)
177 |     else:
178 |         layer.type = 'Convolution'
179 |         layer.param.append('%d' % nOutputPlane)
180 | 
181 |     assert kH == kW, [kH, kW]
182 |     assert dH == dW, [dH, dW]
183 |     assert padH == padW, [padH, padW]
184 |     layer.param.append('%d' % kH)
185 |     layer.param.append('%d' % dilation)
186 |     layer.param.append('%d' % dH)
187 |     layer.param.append('%d' % padH)
188 | 
189 |     if pytorch_layer.next_functions[2][0]:
190 |         layer.param.append('%d' % True)
191 |         bias = pytorch_layer.next_functions[2][0].variable.data.numpy()
192 |         layer.param.append('%d' % blobs_weight.size)
193 |         layer.weights.append(np.array([0.]))
194 |         layer.weights.append(blobs_weight)
195 |         layer.weights.append(bias)
196 |     else:
197 |         layer.param.append('%d' % False)
198 |         layer.param.append('%d' % blobs_weight.size)
199 |         layer.weights.append(np.array([0.]))
200 |         layer.weights.append(blobs_weight)
201 | 
202 |     if groups != 1:
203 |         layer.param.append('%d' % groups)
204 |         layer.type += "DepthWise"
205 | 
206 |     return layer
207 | 
208 | 
209 | def FillBilinear(ch, k):
210 |     blob = np.zeros(shape=(ch, 1, k, k))
211 | 
212 |     """ Create bilinear weights in numpy array """
213 |     bilinear_kernel = np.zeros([k, k], dtype=np.float32)
214 |     scale_factor = (k + 1) // 2
215 |     if k % 2 == 1:
216 |         center = scale_factor - 1
217 |     else:
218 |         center = scale_factor - 0.5
219 |     for x in range(k):
220 |         for y in range(k):
221 |             bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor)
222 | 
223 |     for i in range(ch):
224 |         blob[i, 0, :, :] = bilinear_kernel
225 |     return blob
226 | 
227 | 
228 | def UpsampleBilinear(pytorch_layer):
229 |     layer = LayerParameter_ncnn()
230 |     layer.type = 'Deconvolution'
231 | 
232 |     assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1]
233 |     factor = int(pytorch_layer.scale_factor[0])
234 |     c = int(pytorch_layer.input_size[1])
235 |     k = 2 * factor - factor % 2
236 | 
237 |     num_output = c
238 |     kernel_size = k
239 |     stride = factor
240 |     pad = int(math.ceil((factor - 1) / 2.))
241 |     dilation = 1
242 |     # group = c
243 |     # weight_filler = 'bilinear'
244 |     bias_term = False
245 | 
246 |     layer.param.append('%d' % num_output)
247 |     layer.param.append('%d' % kernel_size)
248 |     layer.param.append('%d' % dilation)
249 |     layer.param.append('%d' % stride)
250 |     layer.param.append('%d' % pad)
251 |     layer.param.append('%d' % bias_term)
252 | 
253 |     # learning_param = pb2.ParamSpec()
254 |     # learning_param.lr_mult = 0
255 |     # learning_param.decay_mult = 0
256 |     # layer.param.extend([learning_param])
257 | 
258 |     """ init weight blob of filter kernel """
259 |     blobs_weight = FillBilinear(c, k)
260 |     layer.param.append('%d' % blobs_weight.size)
261 |     layer.weights.append(np.array([0.]))
262 |     layer.weights.append(blobs_weight)
263 | 
264 |     return layer
265 | 
266 | 
267 | def CopyPoolingParameter(pytorch_layer, layer):
268 | 
269 |     padH, padW = CopyTuple(pytorch_layer.padding)
270 |     kH, kW = CopyTuple(pytorch_layer.kernel_size)
271 |     dH, dW = CopyTuple(pytorch_layer.stride)
272 | 
273 |     assert kH == kW, [kH, kW]
274 |     assert dH == dW, [dH, dW]
275 |     assert padH == padW, [padH, padW]
276 |     layer.param.append('%d' % kH)
277 |     layer.param.append('%d' % dH)
278 | 
279 |     # if pytorch_layer.ceil_mode is True:
280 |     layer.param.append('%d' % padH)
281 | 
282 |     """ TODO: global_pooling? """
283 |     layer.param.append('%d' % 0)
284 | 
285 | 
286 | def MaxPooling(pytorch_layer):
287 |     layer = LayerParameter_ncnn()
288 |     layer.type = 'Pooling'
289 |     layer.param.append('%d' % 0)
290 |     CopyPoolingParameter(pytorch_layer, layer)
291 |     return layer
292 | 
293 | 
294 | def AvgPooling(pytorch_layer):
295 |     layer = LayerParameter_ncnn()
296 |     layer.type = 'Pooling'
297 |     layer.param.append('%d' % 1)
298 |     CopyPoolingParameter(pytorch_layer, layer)
299 |     return layer
300 | 
301 | 
302 | def dropout(pytorch_layer):
303 |     layer = LayerParameter_ncnn()
304 |     dropout_ratio = float(pytorch_layer.p)
305 |     layer.type = 'Dropout'
306 |     if abs(dropout_ratio - 0.5) < 1e-3:
307 |         pass
308 |     else:
309 |         scale = 1.0 - dropout_ratio
310 |         layer.param.append('%f' % scale)
311 |     return layer
312 | 
313 | 
314 | def elu(pytorch_layer):
315 |     layer = LayerParameter_ncnn()
316 |     layer.type = 'ELU'
317 |     alpha = pytorch_layer.additional_args[0]
318 |     layer.param.append('%f' % alpha)
319 |     return layer
320 | 
321 | 
322 | def ReLU(pytorch_layer):
323 |     layer = LayerParameter_ncnn()
324 |     layer.type = 'ReLU'
325 |     layer.param.append('%f' % 0.0)
326 |     return layer
327 | 
328 | 
329 | def leaky_ReLU(pytorch_layer):
330 |     layer = LayerParameter_ncnn()
331 |     layer.type = 'ReLU'
332 |     negative_slope = float(pytorch_layer.additional_args[0])
333 |     layer.param.append('%f' % negative_slope)
334 |     return layer
335 | 
336 | 
337 | def PReLU(pytorch_layer):
338 |     layer = LayerParameter_ncnn()
339 |     layer.type = 'PReLU'
340 | 
341 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
342 |     layer.param.append('%d' % blobs_weight.size)
343 |     layer.weights.append(blobs_weight)
344 |     return layer
345 | 
346 | 
347 | def MulConst(pytorch_layer):
348 |     layer = LayerParameter_ncnn()
349 |     layer.type = 'Power'
350 |     layer.param.append('%f' % 1)
351 |     layer.param.append('%f' % float(pytorch_layer.constant))
352 |     layer.param.append('%f' % 0)
353 |     return layer
354 | 
355 | 
356 | def AddConst(pytorch_layer):
357 |     layer = LayerParameter_ncnn()
358 |     layer.type = 'Power'
359 |     layer.param.append('%f' % 1)
360 |     layer.param.append('%f' % 1)
361 |     """ Constant to add should be filled by hand, since not visible in autograd """
362 |     layer.param.append('%f' % float('inf'))
363 |     return layer
364 | 
365 | 
366 | def softmax(pytorch_layer):
367 |     layer = LayerParameter_ncnn()
368 |     layer.type = 'Softmax'
369 |     """ TODO: axis """
370 |     layer.param.append('%d' % 0)
371 | 
372 |     return layer
373 | 
374 | 
375 | def eltwise(pytorch_layer):
376 |     layer = LayerParameter_ncnn()
377 |     layer.type = 'Eltwise'
378 |     """ operation: 0=mul 1=add 2=max """
379 |     layer.param.append('%d' % 1)
380 |     """  TODO: coefficient  """
381 |     return layer
382 | 
383 | 
384 | def eltwise_max(pytorch_layer):
385 |     layer = LayerParameter_ncnn()
386 |     layer.type = 'Eltwise'
387 |     """ operation: 0=mul 1=add 2=max """
388 |     layer.param.append('%d' % 2)
389 |     """  TODO: coefficient  """
390 |     return layer
391 | 
392 | 
393 | def negate(pytorch_layer):
394 |     layer = LayerParameter_ncnn()
395 |     layer.type = 'UnaryOp'
396 |     """ Operation_NEG=1, more op details at src/layer/unaryop.h """
397 |     layer.param.append('%d' % 1)
398 |     return layer
399 | 
400 | 
401 | def batchnorm(pytorch_layer):
402 |     layer_bn = LayerParameter_ncnn()
403 |     layer_bn.type = 'BatchNorm'
404 | 
405 |     layer_bn.param.append('%d' % pytorch_layer.running_mean.numpy().size)
406 | 
407 |     layer_bn.weights.append(np.ones(pytorch_layer.running_mean.numpy().shape))
408 |     layer_bn.weights.append(pytorch_layer.running_mean.numpy())
409 |     """ Add eps by hand for running_var in ncnn """
410 |     running_var = pytorch_layer.running_var.numpy()
411 |     running_var = running_var + pytorch_layer.eps
412 |     layer_bn.weights.append(running_var)
413 |     layer_bn.weights.append(np.zeros(pytorch_layer.running_mean.numpy().shape))
414 | 
415 |     layer_scale = LayerParameter_ncnn()
416 |     layer_scale.type = 'Scale'
417 | 
418 |     blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy()
419 | 
420 |     if pytorch_layer.next_functions[2][0]:
421 |         layer_scale.param.append('%d' % blobs_weight.size)
422 |         layer_scale.param.append('%d' % True)
423 | 
424 |         bias = pytorch_layer.next_functions[2][0].variable.data.numpy()
425 |         layer_scale.weights.append(blobs_weight)
426 |         layer_scale.weights.append(bias)
427 |     else:
428 |         layer_scale.param.append('%d' % blobs_weight.size)
429 |         layer_scale.param.append('%d' % False)
430 |         layer_scale.weights.append(blobs_weight)
431 | 
432 |     return [layer_bn, layer_scale]
433 | 
434 | 
435 | def build_converter(opts):
436 |     return {
437 |         'data': data,
438 |         'Addmm': inner_product,
439 |         'Threshold': ReLU,
440 |         'ConvNd': spatial_convolution,
441 |         'MaxPool2d': MaxPooling,
442 |         'AvgPool2d': AvgPooling,
443 |         'Add': eltwise,
444 |         'Cmax': eltwise_max,
445 |         'BatchNorm': batchnorm,
446 |         'Concat': concat,
447 |         'Dropout': dropout,
448 |         'UpsamplingBilinear2d': UpsampleBilinear,
449 |         'MulConstant': MulConst,
450 |         'AddConstant': AddConst,
451 |         'Softmax': softmax,
452 |         'Sigmoid': ty('Sigmoid'),
453 |         'Tanh': ty('TanH'),
454 |         'ELU': elu,
455 |         'LeakyReLU': leaky_ReLU,
456 |         'PReLU': PReLU,
457 |         'Slice': Slice,
458 |         'MultiCopy': Split,
459 |         'Negate': negate,
460 |         'Permute': permute,
461 |         'View': flatten,
462 |     }
463 | 
464 | 
465 | def convert_ncnn(opts, typename, pytorch_layer):
466 |     converter = build_converter(opts)
467 |     if typename not in converter:
468 |         raise ValueError("Unknown layer type: {}, known types: {}".format(
469 |             typename, converter.keys()))
470 |     return converter[typename](pytorch_layer)
471 | 


--------------------------------------------------------------------------------
/code/ConvertModel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2017-present, starime.
  3 | All rights reserved.
  4 | 
  5 | This source code is licensed under the BSD-style license found in the
  6 | LICENSE file in the root directory of this source tree. An additional grant
  7 | of patent rights can be found in the PATENTS file in the same directory.
  8 | """
  9 | 
 10 | import torch
 11 | from torch.autograd import Variable
 12 | 
 13 | 
 14 | def link_caffe(layer, name, bottom, top):
 15 |     layer.name = name
 16 |     for b in bottom:
 17 |         layer.bottom.append(b)
 18 |     for t in top:
 19 |         layer.top.append(t)
 20 | 
 21 |     caffe_net.append(layer)
 22 | 
 23 | 
 24 | def link_ncnn(layer, name, bottom, top):
 25 |     layer_type = layer.type
 26 |     layer_param = layer.param
 27 |     if isinstance(layer_param, list):
 28 |         for ind, param in enumerate(layer_param):
 29 |             layer_param[ind] = str(ind) + '=' + param
 30 |     elif isinstance(layer_param, dict):
 31 |         param_dict = layer_param
 32 |         layer_param = []
 33 |         for key, param in param_dict.iteritems():
 34 |             layer_param.append(key + '=' + param)
 35 | 
 36 |     pp = []
 37 |     pp.append('%-16s' % layer_type)
 38 |     pp.append('%-16s %d %d' % (name, len(bottom), len(top)))
 39 |     for b in bottom:
 40 |         pp.append('%s' % b)
 41 |         if b not in blob_set:
 42 |             blob_set.add(b)
 43 |     for t in top:
 44 |         pp.append('%s' % t)
 45 |         if t not in blob_set:
 46 |             blob_set.add(t)
 47 |     layer_param = pp + layer_param
 48 | 
 49 |     ncnn_net.append(' '.join(layer_param))
 50 | 
 51 |     for w in layer.weights:
 52 |         ncnn_weights.append(w)
 53 | 
 54 | 
 55 | def GetLayerParam_Index(func):
 56 |     for axis, slice_param in enumerate(func.index):
 57 |         if isinstance(slice_param, int):
 58 |             start = slice_param
 59 |             stop = slice_param + 1
 60 |         else:
 61 |             start = slice_param.start
 62 |             stop = slice_param.stop
 63 |             step = slice_param.step
 64 |         if (start or stop or step) is not None:
 65 |             break
 66 |     shape = func.input_size
 67 |     dim_size = shape[axis]
 68 |     return start, stop, dim_size, axis
 69 | 
 70 | 
 71 | def DFS(func):
 72 |     if func in visited:
 73 |         return tops_dict[func]
 74 | 
 75 |     visited.add(func)
 76 |     layer_type = str(type(func).__name__)
 77 |     bottoms = []
 78 | 
 79 |     father_func = None
 80 |     if hasattr(func, 'next_functions'):
 81 |         for u in func.next_functions:
 82 |             if u[0] is not None:
 83 |                 child_type = str(type(u[0]).__name__)
 84 |                 if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
 85 |                     child_name = DFS(u[0])
 86 |                     bottoms.append(child_name)
 87 |                     father_func = u[0]
 88 | 
 89 |     """ Gen layer name """
 90 |     layer_type_name = layer_type.replace('Backward', '')
 91 |     if layer_type_name in layer_type_count:
 92 |         layer_type_count[layer_type_name] += 1
 93 |     else:
 94 |         layer_type_count[layer_type_name] = 1
 95 | 
 96 |     name = layer_type_name + '_' + str(layer_type_count[layer_type_name])
 97 | 
 98 |     """ Reaching the root node """
 99 |     """  TODO: multi data input """
100 |     if len(bottoms) == 0:
101 |         if 'data' not in layer_type_count:
102 |             layer_type_count['data'] = 1
103 |             """ Gen data layer """
104 |             layer_data = convert('', 'data', inputs)
105 |             link(layer_data, 'data', [], ['data'])
106 | 
107 |         """ Link it with data input """
108 |         bottoms.append('data')
109 | 
110 |     """  Skip some pytorch layers  """
111 |     if dst == 'caffe':
112 |         if layer_type_name in ['Clone', 'Threshold', 'Dropout', 'SetItem']:
113 |             tops_dict[func] = bottoms[0]
114 |         elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)):
115 |             tops_dict[func] = bottoms[0]
116 |         else:
117 |             tops_dict[func] = name
118 |             if layer_type_name == 'Index':
119 |                 """ Change layer name only for 'Slice' """
120 |                 tops_dict[func] = tops_dict[father_func] + '_' + tops_dict[func]
121 |     elif dst == 'ncnn':
122 |         if layer_type_name in ['Clone', 'SetItem']:
123 |             tops_dict[func] = bottoms[0]
124 |         elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)):
125 |             tops_dict[func] = bottoms[0]
126 |         else:
127 |             tops_dict[func] = name
128 |             if layer_type_name == 'Index':
129 |                 """ Chane layer name for 'Slice' """
130 |                 tops_dict[func] = tops_dict[father_func] + '_' + tops_dict[func]
131 |             elif hasattr(func, 'next_functions'):
132 |                 """ Change bottom layers name for other multi top layers cases """
133 |                 for u in func.next_functions:
134 |                     if u[0] is not None:
135 |                         child_type = str(type(u[0]).__name__)
136 |                         if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
137 |                             father_func = u[0]
138 |                             if (father_func in multi_tops) and (len(multi_tops[father_func]) > 1):
139 |                                 for i in range(len(bottoms)):
140 |                                     if bottoms[i] == tops_dict[father_func]:
141 |                                         bottoms[i] = tops_dict[father_func] + '_' + tops_dict[func]
142 | 
143 |     """ Split to BatchNorm and Scale """
144 |     if layer_type_name == 'BatchNorm':
145 |         layer_double = convert('', layer_type_name, func)
146 |         scale_name = name + '_' + 'scale'
147 |         if dst == 'caffe':
148 |             link(layer_double[0], name, bottoms, [tops_dict[func]])
149 |             link(layer_double[1], scale_name, [tops_dict[func]], [tops_dict[func]])
150 |         elif dst == 'ncnn':
151 |             link(layer_double[0], name, bottoms, [tops_dict[func]])
152 |             link(layer_double[1], scale_name, [tops_dict[func]], [scale_name])
153 |             tops_dict[func] = scale_name
154 | 
155 |     elif layer_type_name not in ['Index', 'Clone', 'SetItem']:
156 |             """ Debug """
157 |             # if layer_type_name != 'Cmax':
158 |             #     return tops_dict[func]
159 | 
160 |             layer = convert('', layer_type_name, func)
161 |             link(layer, name, bottoms, [tops_dict[func]])
162 | 
163 |     """ If func layer has multiple top layers """
164 |     if (func in multi_tops) and (len(multi_tops[func]) > 1):
165 |         if func in slice_point:
166 |             """ Make an extra dummy layer type 'Slice' after func layer, which not exist in pytorch """
167 |             slice_func = torch.autograd.function
168 |             slice_func.axis = axis_dict[func]
169 |             slice_func.slice_point = slice_point[func]
170 |             slice_layer = convert('', 'Slice', slice_func)
171 |             link(slice_layer, tops_dict[func] + '_slicer', [tops_dict[func]], multi_tops[func])
172 |         elif dst == 'ncnn':
173 |             """
174 |             Make 'Split' copy for each top layer respectively
175 |             (only in ncnn, caffe will automatically handle this case)
176 |             """
177 |             copy_func = torch.autograd.function
178 |             split_layer = convert('', 'MultiCopy', copy_func)
179 |             link(split_layer, tops_dict[func] + '_copyer', [tops_dict[func]], multi_tops[func])
180 | 
181 |     return tops_dict[func]
182 | 
183 | 
184 | def FindMultiTops(func):
185 |     """
186 |         Precount nodes with number of tops(indegree)>1,
187 |         which could be Slice or Split(only in ncnn, for making multiple copies)
188 |     """
189 |     if func in visited:
190 |         return tops_dict[func]
191 | 
192 |     visited.add(func)
193 |     layer_type = str(type(func).__name__)
194 |     bottoms = []
195 | 
196 |     if hasattr(func, 'next_functions'):
197 |         for u in func.next_functions:
198 |             if u[0] is not None:
199 |                 child_type = str(type(u[0]).__name__)
200 |                 if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
201 |                     child_name = FindMultiTops(u[0])
202 |                     bottoms.append(child_name)
203 | 
204 |     """ Gen layer name """
205 |     layer_type_name = layer_type.replace('Backward', '')
206 |     if layer_type_name in layer_type_count:
207 |         layer_type_count[layer_type_name] += 1
208 |     else:
209 |         layer_type_count[layer_type_name] = 1
210 | 
211 |     name = layer_type_name + '_' + str(layer_type_count[layer_type_name])
212 | 
213 |     """  Skip some pytorch layers  """
214 |     if dst == 'caffe':
215 |         if layer_type_name in ['Clone', 'Threshold', 'Dropout', 'SetItem']:
216 |             tops_dict[func] = bottoms[0]
217 |         elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)):
218 |             tops_dict[func] = bottoms[0]
219 |         else:
220 |             tops_dict[func] = name
221 |     elif dst == 'ncnn':
222 |         if layer_type_name in ['Clone', 'SetItem']:
223 |             tops_dict[func] = bottoms[0]
224 |         elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)):
225 |             tops_dict[func] = bottoms[0]
226 |         elif layer_type_name == 'BatchNorm':
227 |             tops_dict[func] = name + '_' + 'scale'
228 |         else:
229 |             tops_dict[func] = name
230 | 
231 |     if hasattr(func, 'next_functions'):
232 |         for u in func.next_functions:
233 |             if u[0] is not None:
234 |                 child_type = str(type(u[0]).__name__)
235 |                 if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'):
236 |                     father_func = u[0]
237 |                     if father_func not in multi_tops:
238 |                         multi_tops[father_func] = []
239 |                     multi_tops[father_func].append(tops_dict[father_func] + '_' + tops_dict[func])
240 | 
241 |                     if (layer_type == 'IndexBackward') and isinstance(func.index, tuple):
242 |                         if father_func not in slice_point:
243 |                             slice_point[father_func] = []
244 |                         start, stop, dim_size, axis = GetLayerParam_Index(func)
245 | 
246 |                         """ Persume the visit of Index layers will be ascending """
247 |                         if start > 0:
248 |                             slice_point[father_func].append(start)
249 |                             axis_dict[father_func] = axis
250 | 
251 |                             """ Last slice """
252 |                             # if stop == dim_size
253 | 
254 |     return tops_dict[func]
255 | 
256 | 
257 | def ConvertModel_ncnn(pytorch_net, InputShape, softmax=False):
258 |     """ Pytorch to ncnn, only support single tensor input """
259 |     from ConvertLayer_ncnn import convert_ncnn
260 | 
261 |     """ Need forward once """
262 |     pytorch_net.eval()
263 |     global inputs
264 |     n, c, h, w = InputShape
265 |     inputs = Variable(torch.rand(n, c, h, w), requires_grad=True)
266 |     outputs = pytorch_net(inputs)
267 | 
268 |     if softmax:
269 |         import torch.nn as nn
270 |         regularize = nn.Softmax()
271 |         outputs = regularize(outputs)
272 | 
273 |     """ Travel computational graph in backward order """
274 |     """ Need to count number of tops(indegree) of all nodes first"""
275 |     global visited, tops_dict, layer_type_count, dst
276 |     global multi_tops, slice_point, axis_dict
277 | 
278 |     visited = set()
279 |     tops_dict = dict()
280 |     layer_type_count = dict()
281 |     multi_tops = dict()
282 |     slice_point = dict()
283 |     axis_dict = dict()
284 |     dst = 'ncnn'
285 | 
286 |     for out in outputs:
287 |         FindMultiTops(out.grad_fn)
288 | 
289 |     """ Travel computational graph in backward order """
290 |     global ncnn_net, ncnn_weights, blob_set
291 |     global convert, link
292 |     ncnn_net = []
293 |     ncnn_weights = []
294 |     convert = convert_ncnn
295 |     link = link_ncnn
296 | 
297 |     visited = set()
298 |     tops_dict = dict()
299 |     layer_type_count = dict()
300 |     blob_set = set()
301 | 
302 |     for out in outputs:
303 |         DFS(out.grad_fn)
304 | 
305 |     text_net = '\n'.join(ncnn_net)
306 |     """ Add layer number and blob number """
307 |     text_net = ('%d %d\n' % (len(ncnn_net), len(blob_set))) + text_net
308 |     """ Add ncnn magic number """
309 |     text_net = '7767517\n' + text_net
310 | 
311 |     return text_net, ncnn_weights
312 | 
313 | 
314 | def ConvertModel_caffe(pytorch_net, InputShape, softmax=False):
315 |     """ Pytorch to Caffe, only support single tensor input """
316 |     import os
317 |     import caffe_pb2 as pb2
318 |     from ConvertLayer_caffe import convert_caffe
319 | 
320 |     """ Need forward once """
321 |     pytorch_net.eval()
322 |     global inputs
323 |     n, c, h, w = InputShape
324 |     inputs = Variable(torch.rand(n, c, h, w), requires_grad=True)
325 |     outputs = pytorch_net(inputs)
326 | 
327 |     if softmax:
328 |         import torch.nn as nn
329 |         regularize = nn.Softmax()
330 |         outputs = regularize(outputs)
331 | 
332 |     """ Travel computational graph in backward order """
333 |     """ Need to count number of tops(indegree) of all nodes first """
334 |     global visited, tops_dict, layer_type_count, dst
335 |     global slice_point, multi_tops, axis_dict
336 |     visited = set()
337 |     tops_dict = dict()
338 |     layer_type_count = dict()
339 |     slice_point = dict()
340 |     multi_tops = dict()
341 |     axis_dict = dict()
342 |     dst = 'caffe'
343 | 
344 |     for out in outputs:
345 |         FindMultiTops(out.grad_fn)
346 | 
347 |     """ Travel computational graph in backward order """
348 |     global caffe_net
349 |     global convert, link
350 |     convert = convert_caffe
351 |     link = link_caffe
352 |     caffe_net = []
353 | 
354 |     visited = set()
355 |     tops_dict = dict()
356 |     layer_type_count = dict()
357 | 
358 |     for out in outputs:
359 |         DFS(out.grad_fn)
360 | 
361 |     """ Caffe input """
362 |     text_net = pb2.NetParameter()
363 |     if os.environ.get("T2C_DEBUG"):
364 |         text_net.debug_info = True
365 | 
366 |     """ Caffe layer parameters """
367 |     binary_weights = pb2.NetParameter()
368 |     binary_weights.CopyFrom(text_net)
369 |     for layer in caffe_net:
370 |         binary_weights.layer.extend([layer])
371 | 
372 |         layer_proto = pb2.LayerParameter()
373 |         layer_proto.CopyFrom(layer)
374 |         del layer_proto.blobs[:]
375 |         text_net.layer.extend([layer_proto])
376 | 
377 |     return text_net, binary_weights
378 | 


--------------------------------------------------------------------------------
/code/ReplaceDenormals.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def ReplaceDenormals(net):
 6 |     for name, param in net.named_parameters():
 7 |         np_arr = param.data.numpy()
 8 |         for x in np.nditer(np_arr, op_flags=['readwrite']):
 9 |             if abs(x) < 1e-30:
10 |                 x[...] = 1e-30
11 |         param.data = torch.from_numpy(np_arr)
12 | 


--------------------------------------------------------------------------------
/code/caffe.proto:
--------------------------------------------------------------------------------
   1 | syntax = "proto2";
   2 | 
   3 | package caffe;
   4 | 
   5 | // Specifies the shape (dimensions) of a Blob.
   6 | message BlobShape {
   7 |   repeated int64 dim = 1 [packed = true];
   8 | }
   9 | 
  10 | message BlobProto {
  11 |   optional BlobShape shape = 7;
  12 |   repeated float data = 5 [packed = true];
  13 |   repeated float diff = 6 [packed = true];
  14 |   repeated double double_data = 8 [packed = true];
  15 |   repeated double double_diff = 9 [packed = true];
  16 | 
  17 |   // 4D dimensions -- deprecated.  Use "shape" instead.
  18 |   optional int32 num = 1 [default = 0];
  19 |   optional int32 channels = 2 [default = 0];
  20 |   optional int32 height = 3 [default = 0];
  21 |   optional int32 width = 4 [default = 0];
  22 | }
  23 | 
  24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances
  25 | // around.
  26 | message BlobProtoVector {
  27 |   repeated BlobProto blobs = 1;
  28 | }
  29 | 
  30 | message Datum {
  31 |   optional int32 channels = 1;
  32 |   optional int32 height = 2;
  33 |   optional int32 width = 3;
  34 |   // the actual image data, in bytes
  35 |   optional bytes data = 4;
  36 |   optional int32 label = 5;
  37 |   // Optionally, the datum could also hold float data.
  38 |   repeated float float_data = 6;
  39 |   // If true data contains an encoded image that need to be decoded
  40 |   optional bool encoded = 7 [default = false];
  41 | }
  42 | 
  43 | message FillerParameter {
  44 |   // The filler type.
  45 |   optional string type = 1 [default = 'constant'];
  46 |   optional float value = 2 [default = 0]; // the value in constant filler
  47 |   optional float min = 3 [default = 0]; // the min value in uniform filler
  48 |   optional float max = 4 [default = 1]; // the max value in uniform filler
  49 |   optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  50 |   optional float std = 6 [default = 1]; // the std value in Gaussian filler
  51 |   // The expected number of non-zero output weights for a given input in
  52 |   // Gaussian filler -- the default -1 means don't perform sparsification.
  53 |   optional int32 sparse = 7 [default = -1];
  54 |   // Normalize the filler variance by fan_in, fan_out, or their average.
  55 |   // Applies to 'xavier' and 'msra' fillers.
  56 |   enum VarianceNorm {
  57 |     FAN_IN = 0;
  58 |     FAN_OUT = 1;
  59 |     AVERAGE = 2;
  60 |   }
  61 |   optional VarianceNorm variance_norm = 8 [default = FAN_IN];
  62 | }
  63 | 
  64 | message NetParameter {
  65 |   optional string name = 1; // consider giving the network a name
  66 |   // DEPRECATED. See InputParameter. The input blobs to the network.
  67 |   repeated string input = 3;
  68 |   // DEPRECATED. See InputParameter. The shape of the input blobs.
  69 |   repeated BlobShape input_shape = 8;
  70 | 
  71 |   // 4D input dimensions -- deprecated.  Use "input_shape" instead.
  72 |   // If specified, for each input blob there should be four
  73 |   // values specifying the num, channels, height and width of the input blob.
  74 |   // Thus, there should be a total of (4 * #input) numbers.
  75 |   repeated int32 input_dim = 4;
  76 | 
  77 |   // Whether the network will force every layer to carry out backward operation.
  78 |   // If set False, then whether to carry out backward is determined
  79 |   // automatically according to the net structure and learning rates.
  80 |   optional bool force_backward = 5 [default = false];
  81 |   // The current "state" of the network, including the phase, level, and stage.
  82 |   // Some layers may be included/excluded depending on this state and the states
  83 |   // specified in the layers' include and exclude fields.
  84 |   optional NetState state = 6;
  85 | 
  86 |   // Print debugging information about results while running Net::Forward,
  87 |   // Net::Backward, and Net::Update.
  88 |   optional bool debug_info = 7 [default = false];
  89 | 
  90 |   // The layers that make up the net.  Each of their configurations, including
  91 |   // connectivity and behavior, is specified as a LayerParameter.
  92 |   repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
  93 | 
  94 |   // DEPRECATED: use 'layer' instead.
  95 |   repeated V1LayerParameter layers = 2;
  96 | }
  97 | 
  98 | // NOTE
  99 | // Update the next available ID when you add a new SolverParameter field.
 100 | //
 101 | // SolverParameter next available ID: 41 (last added: type)
 102 | message SolverParameter {
 103 |   //////////////////////////////////////////////////////////////////////////////
 104 |   // Specifying the train and test networks
 105 |   //
 106 |   // Exactly one train net must be specified using one of the following fields:
 107 |   //     train_net_param, train_net, net_param, net
 108 |   // One or more test nets may be specified using any of the following fields:
 109 |   //     test_net_param, test_net, net_param, net
 110 |   // If more than one test net field is specified (e.g., both net and
 111 |   // test_net are specified), they will be evaluated in the field order given
 112 |   // above: (1) test_net_param, (2) test_net, (3) net_param/net.
 113 |   // A test_iter must be specified for each test_net.
 114 |   // A test_level and/or a test_stage may also be specified for each test_net.
 115 |   //////////////////////////////////////////////////////////////////////////////
 116 | 
 117 |   // Proto filename for the train net, possibly combined with one or more
 118 |   // test nets.
 119 |   optional string net = 24;
 120 |   // Inline train net param, possibly combined with one or more test nets.
 121 |   optional NetParameter net_param = 25;
 122 | 
 123 |   optional string train_net = 1; // Proto filename for the train net.
 124 |   repeated string test_net = 2; // Proto filenames for the test nets.
 125 |   optional NetParameter train_net_param = 21; // Inline train net params.
 126 |   repeated NetParameter test_net_param = 22; // Inline test net params.
 127 | 
 128 |   // The states for the train/test nets. Must be unspecified or
 129 |   // specified once per net.
 130 |   //
 131 |   // By default, all states will have solver = true;
 132 |   // train_state will have phase = TRAIN,
 133 |   // and all test_state's will have phase = TEST.
 134 |   // Other defaults are set according to the NetState defaults.
 135 |   optional NetState train_state = 26;
 136 |   repeated NetState test_state = 27;
 137 | 
 138 |   // The number of iterations for each test net.
 139 |   repeated int32 test_iter = 3;
 140 | 
 141 |   // The number of iterations between two testing phases.
 142 |   optional int32 test_interval = 4 [default = 0];
 143 |   optional bool test_compute_loss = 19 [default = false];
 144 |   // If true, run an initial test pass before the first iteration,
 145 |   // ensuring memory availability and printing the starting value of the loss.
 146 |   optional bool test_initialization = 32 [default = true];
 147 |   optional float base_lr = 5; // The base learning rate
 148 |   // the number of iterations between displaying info. If display = 0, no info
 149 |   // will be displayed.
 150 |   optional int32 display = 6;
 151 |   // Display the loss averaged over the last average_loss iterations
 152 |   optional int32 average_loss = 33 [default = 1];
 153 |   optional int32 max_iter = 7; // the maximum number of iterations
 154 |   // accumulate gradients over `iter_size` x `batch_size` instances
 155 |   optional int32 iter_size = 36 [default = 1];
 156 | 
 157 |   // The learning rate decay policy. The currently implemented learning rate
 158 |   // policies are as follows:
 159 |   //    - fixed: always return base_lr.
 160 |   //    - step: return base_lr * gamma ^ (floor(iter / step))
 161 |   //    - exp: return base_lr * gamma ^ iter
 162 |   //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
 163 |   //    - multistep: similar to step but it allows non uniform steps defined by
 164 |   //      stepvalue
 165 |   //    - poly: the effective learning rate follows a polynomial decay, to be
 166 |   //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
 167 |   //    - sigmoid: the effective learning rate follows a sigmod decay
 168 |   //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
 169 |   //
 170 |   // where base_lr, max_iter, gamma, step, stepvalue and power are defined
 171 |   // in the solver parameter protocol buffer, and iter is the current iteration.
 172 |   optional string lr_policy = 8;
 173 |   optional float gamma = 9; // The parameter to compute the learning rate.
 174 |   optional float power = 10; // The parameter to compute the learning rate.
 175 |   optional float momentum = 11; // The momentum value.
 176 |   optional float weight_decay = 12; // The weight decay.
 177 |   // regularization types supported: L1 and L2
 178 |   // controlled by weight_decay
 179 |   optional string regularization_type = 29 [default = "L2"];
 180 |   // the stepsize for learning rate policy "step"
 181 |   optional int32 stepsize = 13;
 182 |   // the stepsize for learning rate policy "multistep"
 183 |   repeated int32 stepvalue = 34;
 184 | 
 185 |   // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
 186 |   // whenever their actual L2 norm is larger.
 187 |   optional float clip_gradients = 35 [default = -1];
 188 | 
 189 |   optional int32 snapshot = 14 [default = 0]; // The snapshot interval
 190 |   optional string snapshot_prefix = 15; // The prefix for the snapshot.
 191 |   // whether to snapshot diff in the results or not. Snapshotting diff will help
 192 |   // debugging but the final protocol buffer size will be much larger.
 193 |   optional bool snapshot_diff = 16 [default = false];
 194 |   enum SnapshotFormat {
 195 |     HDF5 = 0;
 196 |     BINARYPROTO = 1;
 197 |   }
 198 |   optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
 199 |   // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
 200 |   enum SolverMode {
 201 |     CPU = 0;
 202 |     GPU = 1;
 203 |   }
 204 |   optional SolverMode solver_mode = 17 [default = GPU];
 205 |   // the device_id will that be used in GPU mode. Use device_id = 0 in default.
 206 |   optional int32 device_id = 18 [default = 0];
 207 |   // If non-negative, the seed with which the Solver will initialize the Caffe
 208 |   // random number generator -- useful for reproducible results. Otherwise,
 209 |   // (and by default) initialize using a seed derived from the system clock.
 210 |   optional int64 random_seed = 20 [default = -1];
 211 | 
 212 |   // type of the solver
 213 |   optional string type = 40 [default = "SGD"];
 214 | 
 215 |   // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
 216 |   optional float delta = 31 [default = 1e-8];
 217 |   // parameters for the Adam solver
 218 |   optional float momentum2 = 39 [default = 0.999];
 219 | 
 220 |   // RMSProp decay value
 221 |   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
 222 |   optional float rms_decay = 38;
 223 | 
 224 |   // If true, print information about the state of the net that may help with
 225 |   // debugging learning problems.
 226 |   optional bool debug_info = 23 [default = false];
 227 | 
 228 |   // If false, don't save a snapshot after training finishes.
 229 |   optional bool snapshot_after_train = 28 [default = true];
 230 | 
 231 |   // DEPRECATED: old solver enum types, use string instead
 232 |   enum SolverType {
 233 |     SGD = 0;
 234 |     NESTEROV = 1;
 235 |     ADAGRAD = 2;
 236 |     RMSPROP = 3;
 237 |     ADADELTA = 4;
 238 |     ADAM = 5;
 239 |   }
 240 |   // DEPRECATED: use type instead of solver_type
 241 |   optional SolverType solver_type = 30 [default = SGD];
 242 | }
 243 | 
 244 | // A message that stores the solver snapshots
 245 | message SolverState {
 246 |   optional int32 iter = 1; // The current iteration
 247 |   optional string learned_net = 2; // The file that stores the learned net.
 248 |   repeated BlobProto history = 3; // The history for sgd solvers
 249 |   optional int32 current_step = 4 [default = 0]; // The current step for learning rate
 250 | }
 251 | 
 252 | enum Phase {
 253 |    TRAIN = 0;
 254 |    TEST = 1;
 255 | }
 256 | 
 257 | message NetState {
 258 |   optional Phase phase = 1 [default = TEST];
 259 |   optional int32 level = 2 [default = 0];
 260 |   repeated string stage = 3;
 261 | }
 262 | 
 263 | message NetStateRule {
 264 |   // Set phase to require the NetState have a particular phase (TRAIN or TEST)
 265 |   // to meet this rule.
 266 |   optional Phase phase = 1;
 267 | 
 268 |   // Set the minimum and/or maximum levels in which the layer should be used.
 269 |   // Leave undefined to meet the rule regardless of level.
 270 |   optional int32 min_level = 2;
 271 |   optional int32 max_level = 3;
 272 | 
 273 |   // Customizable sets of stages to include or exclude.
 274 |   // The net must have ALL of the specified stages and NONE of the specified
 275 |   // "not_stage"s to meet the rule.
 276 |   // (Use multiple NetStateRules to specify conjunctions of stages.)
 277 |   repeated string stage = 4;
 278 |   repeated string not_stage = 5;
 279 | }
 280 | 
 281 | // Specifies training parameters (multipliers on global learning constants,
 282 | // and the name and other settings used for weight sharing).
 283 | message ParamSpec {
 284 |   // The names of the parameter blobs -- useful for sharing parameters among
 285 |   // layers, but never required otherwise.  To share a parameter between two
 286 |   // layers, give it a (non-empty) name.
 287 |   optional string name = 1;
 288 | 
 289 |   // Whether to require shared weights to have the same shape, or just the same
 290 |   // count -- defaults to STRICT if unspecified.
 291 |   optional DimCheckMode share_mode = 2;
 292 |   enum DimCheckMode {
 293 |     // STRICT (default) requires that num, channels, height, width each match.
 294 |     STRICT = 0;
 295 |     // PERMISSIVE requires only the count (num*channels*height*width) to match.
 296 |     PERMISSIVE = 1;
 297 |   }
 298 | 
 299 |   // The multiplier on the global learning rate for this parameter.
 300 |   optional float lr_mult = 3 [default = 1.0];
 301 | 
 302 |   // The multiplier on the global weight decay for this parameter.
 303 |   optional float decay_mult = 4 [default = 1.0];
 304 | }
 305 | 
 306 | // NOTE
 307 | // Update the next available ID when you add a new LayerParameter field.
 308 | //
 309 | // LayerParameter next available layer-specific ID: 151 (last added: box_annotator_ohem_param)
 310 | message LayerParameter {
 311 |   optional string name = 1; // the layer name
 312 |   optional string type = 2; // the layer type
 313 |   repeated string bottom = 3; // the name of each bottom blob
 314 |   repeated string top = 4; // the name of each top blob
 315 | 
 316 |   // The train / test phase for computation.
 317 |   optional Phase phase = 10;
 318 | 
 319 |   // The amount of weight to assign each top blob in the objective.
 320 |   // Each layer assigns a default value, usually of either 0 or 1,
 321 |   // to each top blob.
 322 |   repeated float loss_weight = 5;
 323 | 
 324 |   // Specifies training parameters (multipliers on global learning constants,
 325 |   // and the name and other settings used for weight sharing).
 326 |   repeated ParamSpec param = 6;
 327 | 
 328 |   // The blobs containing the numeric parameters of the layer.
 329 |   repeated BlobProto blobs = 7;
 330 | 
 331 |   // Specifies whether to backpropagate to each bottom. If unspecified,
 332 |   // Caffe will automatically infer whether each input needs backpropagation
 333 |   // to compute parameter gradients. If set to true for some inputs,
 334 |   // backpropagation to those inputs is forced; if set false for some inputs,
 335 |   // backpropagation to those inputs is skipped.
 336 |   //
 337 |   // The size must be either 0 or equal to the number of bottoms.
 338 |   repeated bool propagate_down = 11;
 339 | 
 340 |   // Rules controlling whether and when a layer is included in the network,
 341 |   // based on the current NetState.  You may specify a non-zero number of rules
 342 |   // to include OR exclude, but not both.  If no include or exclude rules are
 343 |   // specified, the layer is always included.  If the current NetState meets
 344 |   // ANY (i.e., one or more) of the specified rules, the layer is
 345 |   // included/excluded.
 346 |   repeated NetStateRule include = 8;
 347 |   repeated NetStateRule exclude = 9;
 348 | 
 349 |   // Parameters for data pre-processing.
 350 |   optional TransformationParameter transform_param = 100;
 351 | 
 352 |   // Parameters shared by loss layers.
 353 |   optional LossParameter loss_param = 101;
 354 | 
 355 |   // Layer type-specific parameters.
 356 |   //
 357 |   // Note: certain layers may have more than one computational engine
 358 |   // for their implementation. These layers include an Engine type and
 359 |   // engine parameter for selecting the implementation.
 360 |   // The default for the engine is set by the ENGINE switch at compile-time.
 361 |   optional AccuracyParameter accuracy_param = 102;
 362 |   optional ArgMaxParameter argmax_param = 103;
 363 |   optional BatchNormParameter batch_norm_param = 139;
 364 |   optional BoxAnnotatorOHEMParameter box_annotator_ohem_param = 150;
 365 |   optional BiasParameter bias_param = 141;
 366 |   optional ConcatParameter concat_param = 104;
 367 |   optional ContrastiveLossParameter contrastive_loss_param = 105;
 368 |   optional ConvolutionParameter convolution_param = 106;
 369 |   optional CropParameter crop_param = 144;
 370 |   optional DataParameter data_param = 107;
 371 |   optional DropoutParameter dropout_param = 108;
 372 |   optional DummyDataParameter dummy_data_param = 109;
 373 |   optional EltwiseParameter eltwise_param = 110;
 374 |   optional ELUParameter elu_param = 140;
 375 |   optional EmbedParameter embed_param = 137;
 376 |   optional ExpParameter exp_param = 111;
 377 |   optional FlattenParameter flatten_param = 135;
 378 |   optional HDF5DataParameter hdf5_data_param = 112;
 379 |   optional HDF5OutputParameter hdf5_output_param = 113;
 380 |   optional HingeLossParameter hinge_loss_param = 114;
 381 |   optional ImageDataParameter image_data_param = 115;
 382 |   optional InfogainLossParameter infogain_loss_param = 116;
 383 |   optional InnerProductParameter inner_product_param = 117;
 384 |   optional InputParameter input_param = 143;
 385 |   optional LogParameter log_param = 134;
 386 |   optional LRNParameter lrn_param = 118;
 387 |   optional MemoryDataParameter memory_data_param = 119;
 388 |   optional MVNParameter mvn_param = 120;
 389 |   optional ParameterParameter parameter_param = 145;
 390 |   optional PoolingParameter pooling_param = 121;
 391 |   optional PowerParameter power_param = 122;
 392 |   optional PReLUParameter prelu_param = 131;
 393 |   optional PSROIPoolingParameter psroi_pooling_param = 149;
 394 |   optional PythonParameter python_param = 130;
 395 |   optional RecurrentParameter recurrent_param = 146;
 396 |   optional ReductionParameter reduction_param = 136;
 397 |   optional ReLUParameter relu_param = 123;
 398 |   optional ReshapeParameter reshape_param = 133;
 399 |   optional ROIPoolingParameter roi_pooling_param = 147;
 400 |   optional ScaleParameter scale_param = 142;
 401 |   optional SigmoidParameter sigmoid_param = 124;
 402 |   optional SmoothL1LossParameter smooth_l1_loss_param = 148;
 403 |   optional SoftmaxParameter softmax_param = 125;
 404 |   optional SPPParameter spp_param = 132;
 405 |   optional SliceParameter slice_param = 126;
 406 |   optional TanHParameter tanh_param = 127;
 407 |   optional ThresholdParameter threshold_param = 128;
 408 |   optional TileParameter tile_param = 138;
 409 |   optional WindowDataParameter window_data_param = 129;
 410 |   optional MILDataParameter mil_data_param = 0x004d4944; //"MID"
 411 |   optional MILParameter mil_param = 0x004d494c; //"MIL"
 412 | }
 413 | 
 414 | // Message that stores parameters used to apply transformation
 415 | // to the data layer's data
 416 | message TransformationParameter {
 417 |   // For data pre-processing, we can do simple scaling and subtracting the
 418 |   // data mean, if provided. Note that the mean subtraction is always carried
 419 |   // out before scaling.
 420 |   optional float scale = 1 [default = 1];
 421 |   // Specify if we want to randomly mirror data.
 422 |   optional bool mirror = 2 [default = false];
 423 |   // Specify if we would like to randomly crop an image.
 424 |   optional uint32 crop_size = 3 [default = 0];
 425 |   // mean_file and mean_value cannot be specified at the same time
 426 |   optional string mean_file = 4;
 427 |   // if specified can be repeated once (would substract it from all the channels)
 428 |   // or can be repeated the same number of times as channels
 429 |   // (would subtract them from the corresponding channel)
 430 |   repeated float mean_value = 5;
 431 |   // Force the decoded image to have 3 color channels.
 432 |   optional bool force_color = 6 [default = false];
 433 |   // Force the decoded image to have 1 color channels.
 434 |   optional bool force_gray = 7 [default = false];
 435 | }
 436 | 
 437 | // Message that stores parameters shared by loss layers
 438 | message LossParameter {
 439 |   // If specified, ignore instances with the given label.
 440 |   optional int32 ignore_label = 1;
 441 |   // How to normalize the loss for loss layers that aggregate across batches,
 442 |   // spatial dimensions, or other dimensions.  Currently only implemented in
 443 |   // SoftmaxWithLoss layer.
 444 |   enum NormalizationMode {
 445 |     // Divide by the number of examples in the batch times spatial dimensions.
 446 |     // Outputs that receive the ignore label will NOT be ignored in computing
 447 |     // the normalization factor.
 448 |     FULL = 0;
 449 |     // Divide by the total number of output locations that do not take the
 450 |     // ignore_label.  If ignore_label is not set, this behaves like FULL.
 451 |     VALID = 1;
 452 |     // Divide by the batch size.
 453 |     BATCH_SIZE = 2;
 454 |     // Divide by pre-fixed normalizer
 455 |     PRE_FIXED = 3;
 456 |     // Do not normalize the loss.
 457 |     NONE = 4;
 458 |   }
 459 |   optional NormalizationMode normalization = 3 [default = VALID];
 460 |   // Deprecated.  Ignored if normalization is specified.  If normalization
 461 |   // is not specified, then setting this to false will be equivalent to
 462 |   // normalization = BATCH_SIZE to be consistent with previous behavior.
 463 |   optional bool normalize = 2;
 464 |   //pre-fixed normalizer
 465 |   optional float pre_fixed_normalizer = 4 [default = 1];
 466 | }
 467 | 
 468 | // Messages that store parameters used by individual layer types follow, in
 469 | // alphabetical order.
 470 | 
 471 | message AccuracyParameter {
 472 |   // When computing accuracy, count as correct by comparing the true label to
 473 |   // the top k scoring classes.  By default, only compare to the top scoring
 474 |   // class (i.e. argmax).
 475 |   optional uint32 top_k = 1 [default = 1];
 476 | 
 477 |   // The "label" axis of the prediction blob, whose argmax corresponds to the
 478 |   // predicted label -- may be negative to index from the end (e.g., -1 for the
 479 |   // last axis).  For example, if axis == 1 and the predictions are
 480 |   // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
 481 |   // labels with integer values in {0, 1, ..., C-1}.
 482 |   optional int32 axis = 2 [default = 1];
 483 | 
 484 |   // If specified, ignore instances with the given label.
 485 |   optional int32 ignore_label = 3;
 486 | }
 487 | 
 488 | message ArgMaxParameter {
 489 |   // If true produce pairs (argmax, maxval)
 490 |   optional bool out_max_val = 1 [default = false];
 491 |   optional uint32 top_k = 2 [default = 1];
 492 |   // The axis along which to maximise -- may be negative to index from the
 493 |   // end (e.g., -1 for the last axis).
 494 |   // By default ArgMaxLayer maximizes over the flattened trailing dimensions
 495 |   // for each index of the first / num dimension.
 496 |   optional int32 axis = 3;
 497 | }
 498 | 
 499 | message ConcatParameter {
 500 |   // The axis along which to concatenate -- may be negative to index from the
 501 |   // end (e.g., -1 for the last axis).  Other axes must have the
 502 |   // same dimension for all the bottom blobs.
 503 |   // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
 504 |   optional int32 axis = 2 [default = 1];
 505 | 
 506 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
 507 |   optional uint32 concat_dim = 1 [default = 1];
 508 | }
 509 | 
 510 | message BatchNormParameter {
 511 |   // If false, accumulate global mean/variance values via a moving average. If
 512 |   // true, use those accumulated values instead of computing mean/variance
 513 |   // across the batch.
 514 |   optional bool use_global_stats = 1;
 515 |   // How much does the moving average decay each iteration?
 516 |   optional float moving_average_fraction = 2 [default = .999];
 517 |   // Small value to add to the variance estimate so that we don't divide by
 518 |   // zero.
 519 |   optional float eps = 3 [default = 1e-5];
 520 | }
 521 | 
 522 | message BoxAnnotatorOHEMParameter {
 523 |   required uint32 roi_per_img = 1; // number of rois for training
 524 |   optional int32 ignore_label = 2 [default = -1]; // ignore_label in scoring
 525 | }
 526 | 
 527 | message BiasParameter {
 528 |   // The first axis of bottom[0] (the first input Blob) along which to apply
 529 |   // bottom[1] (the second input Blob).  May be negative to index from the end
 530 |   // (e.g., -1 for the last axis).
 531 |   //
 532 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
 533 |   // top[0] will have the same shape, and bottom[1] may have any of the
 534 |   // following shapes (for the given value of axis):
 535 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
 536 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
 537 |   //    (axis == 2 == -2)                   40;       40x60
 538 |   //    (axis == 3 == -1)                                60
 539 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
 540 |   // "axis") -- a scalar bias.
 541 |   optional int32 axis = 1 [default = 1];
 542 | 
 543 |   // (num_axes is ignored unless just one bottom is given and the bias is
 544 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
 545 |   // number of axes by the second bottom.)
 546 |   // The number of axes of the input (bottom[0]) covered by the bias
 547 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
 548 |   // Set num_axes := 0, to add a zero-axis Blob: a scalar.
 549 |   optional int32 num_axes = 2 [default = 1];
 550 | 
 551 |   // (filler is ignored unless just one bottom is given and the bias is
 552 |   // a learned parameter of the layer.)
 553 |   // The initialization for the learned bias parameter.
 554 |   // Default is the zero (0) initialization, resulting in the BiasLayer
 555 |   // initially performing the identity operation.
 556 |   optional FillerParameter filler = 3;
 557 | }
 558 | 
 559 | message ContrastiveLossParameter {
 560 |   // margin for dissimilar pair
 561 |   optional float margin = 1 [default = 1.0];
 562 |   // The first implementation of this cost did not exactly match the cost of
 563 |   // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
 564 |   // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
 565 |   // Hadsell paper. New models should probably use this version.
 566 |   // legacy_version = true uses (margin - d^2). This is kept to support /
 567 |   // reproduce existing models and results
 568 |   optional bool legacy_version = 2 [default = false];
 569 | }
 570 | 
 571 | message ConvolutionParameter {
 572 |   optional uint32 num_output = 1; // The number of outputs for the layer
 573 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 574 | 
 575 |   // Pad, kernel size, and stride are all given as a single value for equal
 576 |   // dimensions in all spatial dimensions, or once per spatial dimension.
 577 |   repeated uint32 pad = 3; // The padding size; defaults to 0
 578 |   repeated uint32 kernel_size = 4; // The kernel size
 579 |   repeated uint32 stride = 6; // The stride; defaults to 1
 580 |   // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
 581 |   // holes. (Kernel dilation is sometimes referred to by its use in the
 582 |   // algorithme à trous from Holschneider et al. 1987.)
 583 |   repeated uint32 dilation = 18; // The dilation; defaults to 1
 584 | 
 585 |   // For 2D convolution only, the *_h and *_w versions may also be used to
 586 |   // specify both spatial dimensions.
 587 |   optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
 588 |   optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
 589 |   optional uint32 kernel_h = 11; // The kernel height (2D only)
 590 |   optional uint32 kernel_w = 12; // The kernel width (2D only)
 591 |   optional uint32 stride_h = 13; // The stride height (2D only)
 592 |   optional uint32 stride_w = 14; // The stride width (2D only)
 593 | 
 594 |   optional uint32 group = 5 [default = 1]; // The group size for group conv
 595 | 
 596 |   optional FillerParameter weight_filler = 7; // The filler for the weight
 597 |   optional FillerParameter bias_filler = 8; // The filler for the bias
 598 |   enum Engine {
 599 |     DEFAULT = 0;
 600 |     CAFFE = 1;
 601 |     CUDNN = 2;
 602 |   }
 603 |   optional Engine engine = 15 [default = DEFAULT];
 604 | 
 605 |   // The axis to interpret as "channels" when performing convolution.
 606 |   // Preceding dimensions are treated as independent inputs;
 607 |   // succeeding dimensions are treated as "spatial".
 608 |   // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
 609 |   // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
 610 |   // groups g>1) filters across the spatial axes (H, W) of the input.
 611 |   // With (N, C, D, H, W) inputs, and axis == 1, we perform
 612 |   // N independent 3D convolutions, sliding (C/g)-channels
 613 |   // filters across the spatial axes (D, H, W) of the input.
 614 |   optional int32 axis = 16 [default = 1];
 615 | 
 616 |   // Whether to force use of the general ND convolution, even if a specific
 617 |   // implementation for blobs of the appropriate number of spatial dimensions
 618 |   // is available. (Currently, there is only a 2D-specific convolution
 619 |   // implementation; for input blobs with num_axes != 2, this option is
 620 |   // ignored and the ND implementation will be used.)
 621 |   optional bool force_nd_im2col = 17 [default = false];
 622 | }
 623 | 
 624 | message CropParameter {
 625 |   // To crop, elements of the first bottom are selected to fit the dimensions
 626 |   // of the second, reference bottom. The crop is configured by
 627 |   // - the crop `axis` to pick the dimensions for cropping
 628 |   // - the crop `offset` to set the shift for all/each dimension
 629 |   // to align the cropped bottom with the reference bottom.
 630 |   // All dimensions up to but excluding `axis` are preserved, while
 631 |   // the dimensions including and trailing `axis` are cropped.
 632 |   // If only one `offset` is set, then all dimensions are offset by this amount.
 633 |   // Otherwise, the number of offsets must equal the number of cropped axes to
 634 |   // shift the crop in each dimension accordingly.
 635 |   // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
 636 |   // and `axis` may be negative to index from the end (e.g., -1 for the last
 637 |   // axis).
 638 |   optional int32 axis = 1 [default = 2];
 639 |   repeated uint32 offset = 2;
 640 | }
 641 | 
 642 | message DataParameter {
 643 |   enum DB {
 644 |     LEVELDB = 0;
 645 |     LMDB = 1;
 646 |   }
 647 |   // Specify the data source.
 648 |   optional string source = 1;
 649 |   // Specify the batch size.
 650 |   optional uint32 batch_size = 4;
 651 |   // The rand_skip variable is for the data layer to skip a few data points
 652 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 653 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 654 |   // be larger than the number of keys in the database.
 655 |   // DEPRECATED. Each solver accesses a different subset of the database.
 656 |   optional uint32 rand_skip = 7 [default = 0];
 657 |   optional DB backend = 8 [default = LEVELDB];
 658 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 659 |   // simple scaling and subtracting the data mean, if provided. Note that the
 660 |   // mean subtraction is always carried out before scaling.
 661 |   optional float scale = 2 [default = 1];
 662 |   optional string mean_file = 3;
 663 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 664 |   // crop an image.
 665 |   optional uint32 crop_size = 5 [default = 0];
 666 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 667 |   // data.
 668 |   optional bool mirror = 6 [default = false];
 669 |   // Force the encoded image to have 3 color channels
 670 |   optional bool force_encoded_color = 9 [default = false];
 671 |   // Prefetch queue (Number of batches to prefetch to host memory, increase if
 672 |   // data access bandwidth varies).
 673 |   optional uint32 prefetch = 10 [default = 4];
 674 | }
 675 | 
 676 | message DropoutParameter {
 677 |   optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
 678 |   optional bool scale_train = 2 [default = true];  // scale train or test phase
 679 | }
 680 | 
 681 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random
 682 | // (or constant) data generated by "Fillers" (see "message FillerParameter").
 683 | message DummyDataParameter {
 684 |   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
 685 |   // shape fields, and 0, 1 or N data_fillers.
 686 |   //
 687 |   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
 688 |   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
 689 |   // specified, the ith is applied to the ith top blob.
 690 |   repeated FillerParameter data_filler = 1;
 691 |   repeated BlobShape shape = 6;
 692 | 
 693 |   // 4D dimensions -- deprecated.  Use "shape" instead.
 694 |   repeated uint32 num = 2;
 695 |   repeated uint32 channels = 3;
 696 |   repeated uint32 height = 4;
 697 |   repeated uint32 width = 5;
 698 | }
 699 | 
 700 | message EltwiseParameter {
 701 |   enum EltwiseOp {
 702 |     PROD = 0;
 703 |     SUM = 1;
 704 |     MAX = 2;
 705 |   }
 706 |   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
 707 |   repeated float coeff = 2; // blob-wise coefficient for SUM operation
 708 | 
 709 |   // Whether to use an asymptotically slower (for >2 inputs) but stabler method
 710 |   // of computing the gradient for the PROD operation. (No effect for SUM op.)
 711 |   optional bool stable_prod_grad = 3 [default = true];
 712 | }
 713 | 
 714 | // Message that stores parameters used by ELULayer
 715 | message ELUParameter {
 716 |   // Described in:
 717 |   // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
 718 |   // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
 719 |   optional float alpha = 1 [default = 1];
 720 | }
 721 | 
 722 | // Message that stores parameters used by EmbedLayer
 723 | message EmbedParameter {
 724 |   optional uint32 num_output = 1; // The number of outputs for the layer
 725 |   // The input is given as integers to be interpreted as one-hot
 726 |   // vector indices with dimension num_input.  Hence num_input should be
 727 |   // 1 greater than the maximum possible input value.
 728 |   optional uint32 input_dim = 2;
 729 | 
 730 |   optional bool bias_term = 3 [default = true]; // Whether to use a bias term
 731 |   optional FillerParameter weight_filler = 4; // The filler for the weight
 732 |   optional FillerParameter bias_filler = 5; // The filler for the bias
 733 | 
 734 | }
 735 | 
 736 | // Message that stores parameters used by ExpLayer
 737 | message ExpParameter {
 738 |   // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
 739 |   // Or if base is set to the default (-1), base is set to e,
 740 |   // so y = exp(shift + scale * x).
 741 |   optional float base = 1 [default = -1.0];
 742 |   optional float scale = 2 [default = 1.0];
 743 |   optional float shift = 3 [default = 0.0];
 744 | }
 745 | 
 746 | /// Message that stores parameters used by FlattenLayer
 747 | message FlattenParameter {
 748 |   // The first axis to flatten: all preceding axes are retained in the output.
 749 |   // May be negative to index from the end (e.g., -1 for the last axis).
 750 |   optional int32 axis = 1 [default = 1];
 751 | 
 752 |   // The last axis to flatten: all following axes are retained in the output.
 753 |   // May be negative to index from the end (e.g., the default -1 for the last
 754 |   // axis).
 755 |   optional int32 end_axis = 2 [default = -1];
 756 | }
 757 | 
 758 | // Message that stores parameters used by HDF5DataLayer
 759 | message HDF5DataParameter {
 760 |   // Specify the data source.
 761 |   optional string source = 1;
 762 |   // Specify the batch size.
 763 |   optional uint32 batch_size = 2;
 764 | 
 765 |   // Specify whether to shuffle the data.
 766 |   // If shuffle == true, the ordering of the HDF5 files is shuffled,
 767 |   // and the ordering of data within any given HDF5 file is shuffled,
 768 |   // but data between different files are not interleaved; all of a file's
 769 |   // data are output (in a random order) before moving onto another file.
 770 |   optional bool shuffle = 3 [default = false];
 771 | }
 772 | 
 773 | message HDF5OutputParameter {
 774 |   optional string file_name = 1;
 775 | }
 776 | 
 777 | message HingeLossParameter {
 778 |   enum Norm {
 779 |     L1 = 1;
 780 |     L2 = 2;
 781 |   }
 782 |   // Specify the Norm to use L1 or L2
 783 |   optional Norm norm = 1 [default = L1];
 784 | }
 785 | 
 786 | message ImageDataParameter {
 787 |   // Specify the data source.
 788 |   optional string source = 1;
 789 |   // Specify the batch size.
 790 |   optional uint32 batch_size = 4 [default = 1];
 791 |   // The rand_skip variable is for the data layer to skip a few data points
 792 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
 793 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
 794 |   // be larger than the number of keys in the database.
 795 |   optional uint32 rand_skip = 7 [default = 0];
 796 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
 797 |   optional bool shuffle = 8 [default = false];
 798 |   // It will also resize images if new_height or new_width are not zero.
 799 |   optional uint32 new_height = 9 [default = 0];
 800 |   optional uint32 new_width = 10 [default = 0];
 801 |   // Specify if the images are color or gray
 802 |   optional bool is_color = 11 [default = true];
 803 |   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
 804 |   // simple scaling and subtracting the data mean, if provided. Note that the
 805 |   // mean subtraction is always carried out before scaling.
 806 |   optional float scale = 2 [default = 1];
 807 |   optional string mean_file = 3;
 808 |   // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
 809 |   // crop an image.
 810 |   optional uint32 crop_size = 5 [default = 0];
 811 |   // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
 812 |   // data.
 813 |   optional bool mirror = 6 [default = false];
 814 |   optional string root_folder = 12 [default = ""];
 815 | }
 816 | 
 817 | message InfogainLossParameter {
 818 |   // Specify the infogain matrix source.
 819 |   optional string source = 1;
 820 | }
 821 | 
 822 | message InnerProductParameter {
 823 |   optional uint32 num_output = 1; // The number of outputs for the layer
 824 |   optional bool bias_term = 2 [default = true]; // whether to have bias terms
 825 |   optional FillerParameter weight_filler = 3; // The filler for the weight
 826 |   optional FillerParameter bias_filler = 4; // The filler for the bias
 827 | 
 828 |   // The first axis to be lumped into a single inner product computation;
 829 |   // all preceding axes are retained in the output.
 830 |   // May be negative to index from the end (e.g., -1 for the last axis).
 831 |   optional int32 axis = 5 [default = 1];
 832 |   // Specify whether to transpose the weight matrix or not.
 833 |   // If transpose == true, any operations will be performed on the transpose
 834 |   // of the weight matrix. The weight matrix itself is not going to be transposed
 835 |   // but rather the transfer flag of operations will be toggled accordingly.
 836 |   optional bool transpose = 6 [default = false];
 837 | }
 838 | 
 839 | message InputParameter {
 840 |   // This layer produces N >= 1 top blob(s) to be assigned manually.
 841 |   // Define N shapes to set a shape for each top.
 842 |   // Define 1 shape to set the same shape for every top.
 843 |   // Define no shape to defer to reshaping manually.
 844 |   repeated BlobShape shape = 1;
 845 | }
 846 | 
 847 | // Message that stores parameters used by LogLayer
 848 | message LogParameter {
 849 |   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
 850 |   // Or if base is set to the default (-1), base is set to e,
 851 |   // so y = ln(shift + scale * x) = log_e(shift + scale * x)
 852 |   optional float base = 1 [default = -1.0];
 853 |   optional float scale = 2 [default = 1.0];
 854 |   optional float shift = 3 [default = 0.0];
 855 | }
 856 | 
 857 | // Message that stores parameters used by LRNLayer
 858 | message LRNParameter {
 859 |   optional uint32 local_size = 1 [default = 5];
 860 |   optional float alpha = 2 [default = 1.];
 861 |   optional float beta = 3 [default = 0.75];
 862 |   enum NormRegion {
 863 |     ACROSS_CHANNELS = 0;
 864 |     WITHIN_CHANNEL = 1;
 865 |   }
 866 |   optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
 867 |   optional float k = 5 [default = 1.];
 868 |   enum Engine {
 869 |     DEFAULT = 0;
 870 |     CAFFE = 1;
 871 |     CUDNN = 2;
 872 |   }
 873 |   optional Engine engine = 6 [default = DEFAULT];
 874 | }
 875 | 
 876 | message MemoryDataParameter {
 877 |   optional uint32 batch_size = 1;
 878 |   optional uint32 channels = 2;
 879 |   optional uint32 height = 3;
 880 |   optional uint32 width = 4;
 881 | }
 882 | 
 883 | message MVNParameter {
 884 |   // This parameter can be set to false to normalize mean only
 885 |   optional bool normalize_variance = 1 [default = true];
 886 | 
 887 |   // This parameter can be set to true to perform DNN-like MVN
 888 |   optional bool across_channels = 2 [default = false];
 889 | 
 890 |   // Epsilon for not dividing by zero while normalizing variance
 891 |   optional float eps = 3 [default = 1e-9];
 892 | }
 893 | 
 894 | message ParameterParameter {
 895 |   optional BlobShape shape = 1;
 896 | }
 897 | 
 898 | message PoolingParameter {
 899 |   enum PoolMethod {
 900 |     MAX = 0;
 901 |     AVE = 1;
 902 |     STOCHASTIC = 2;
 903 |   }
 904 |   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
 905 |   // Pad, kernel size, and stride are all given as a single value for equal
 906 |   // dimensions in height and width or as Y, X pairs.
 907 |   optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
 908 |   optional uint32 pad_h = 9 [default = 0]; // The padding height
 909 |   optional uint32 pad_w = 10 [default = 0]; // The padding width
 910 |   optional uint32 kernel_size = 2; // The kernel size (square)
 911 |   optional uint32 kernel_h = 5; // The kernel height
 912 |   optional uint32 kernel_w = 6; // The kernel width
 913 |   optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
 914 |   optional uint32 stride_h = 7; // The stride height
 915 |   optional uint32 stride_w = 8; // The stride width
 916 |   enum Engine {
 917 |     DEFAULT = 0;
 918 |     CAFFE = 1;
 919 |     CUDNN = 2;
 920 |   }
 921 |   optional Engine engine = 11 [default = DEFAULT];
 922 |   // If global_pooling then it will pool over the size of the bottom by doing
 923 |   // kernel_h = bottom->height and kernel_w = bottom->width
 924 |   optional bool global_pooling = 12 [default = false];
 925 | }
 926 | 
 927 | message PowerParameter {
 928 |   // PowerLayer computes outputs y = (shift + scale * x) ^ power.
 929 |   optional float power = 1 [default = 1.0];
 930 |   optional float scale = 2 [default = 1.0];
 931 |   optional float shift = 3 [default = 0.0];
 932 | }
 933 | 
 934 | message PSROIPoolingParameter {
 935 |    required float spatial_scale = 1; 
 936 |    required int32 output_dim = 2; // output channel number
 937 |    required int32 group_size = 3; // number of groups to encode position-sensitive score maps
 938 |  }
 939 | 
 940 | message PythonParameter {
 941 |   optional string module = 1;
 942 |   optional string layer = 2;
 943 |   // This value is set to the attribute `param_str` of the `PythonLayer` object
 944 |   // in Python before calling the `setup()` method. This could be a number,
 945 |   // string, dictionary in Python dict format, JSON, etc. You may parse this
 946 |   // string in `setup` method and use it in `forward` and `backward`.
 947 |   optional string param_str = 3 [default = ''];
 948 |   // Whether this PythonLayer is shared among worker solvers during data parallelism.
 949 |   // If true, each worker solver sequentially run forward from this layer.
 950 |   // This value should be set true if you are using it as a data layer.
 951 |   optional bool share_in_parallel = 4 [default = false];
 952 | }
 953 | 
 954 | // Message that stores parameters used by RecurrentLayer
 955 | message RecurrentParameter {
 956 |   // The dimension of the output (and usually hidden state) representation --
 957 |   // must be explicitly set to non-zero.
 958 |   optional uint32 num_output = 1 [default = 0];
 959 | 
 960 |   optional FillerParameter weight_filler = 2; // The filler for the weight
 961 |   optional FillerParameter bias_filler = 3; // The filler for the bias
 962 | 
 963 |   // Whether to enable displaying debug_info in the unrolled recurrent net.
 964 |   optional bool debug_info = 4 [default = false];
 965 | 
 966 |   // Whether to add as additional inputs (bottoms) the initial hidden state
 967 |   // blobs, and add as additional outputs (tops) the final timestep hidden state
 968 |   // blobs.  The number of additional bottom/top blobs required depends on the
 969 |   // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
 970 |   optional bool expose_hidden = 5 [default = false];
 971 | }
 972 | 
 973 | // Message that stores parameters used by ReductionLayer
 974 | message ReductionParameter {
 975 |   enum ReductionOp {
 976 |     SUM = 1;
 977 |     ASUM = 2;
 978 |     SUMSQ = 3;
 979 |     MEAN = 4;
 980 |   }
 981 | 
 982 |   optional ReductionOp operation = 1 [default = SUM]; // reduction operation
 983 | 
 984 |   // The first axis to reduce to a scalar -- may be negative to index from the
 985 |   // end (e.g., -1 for the last axis).
 986 |   // (Currently, only reduction along ALL "tail" axes is supported; reduction
 987 |   // of axis M through N, where N < num_axes - 1, is unsupported.)
 988 |   // Suppose we have an n-axis bottom Blob with shape:
 989 |   //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
 990 |   // If axis == m, the output Blob will have shape
 991 |   //     (d0, d1, d2, ..., d(m-1)),
 992 |   // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
 993 |   // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
 994 |   // If axis == 0 (the default), the output Blob always has the empty shape
 995 |   // (count 1), performing reduction across the entire input --
 996 |   // often useful for creating new loss functions.
 997 |   optional int32 axis = 2 [default = 0];
 998 | 
 999 |   optional float coeff = 3 [default = 1.0]; // coefficient for output
1000 | }
1001 | 
1002 | // Message that stores parameters used by ReLULayer
1003 | message ReLUParameter {
1004 |   // Allow non-zero slope for negative inputs to speed up optimization
1005 |   // Described in:
1006 |   // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1007 |   // improve neural network acoustic models. In ICML Workshop on Deep Learning
1008 |   // for Audio, Speech, and Language Processing.
1009 |   optional float negative_slope = 1 [default = 0];
1010 |   enum Engine {
1011 |     DEFAULT = 0;
1012 |     CAFFE = 1;
1013 |     CUDNN = 2;
1014 |   }
1015 |   optional Engine engine = 2 [default = DEFAULT];
1016 | }
1017 | 
1018 | message ReshapeParameter {
1019 |   // Specify the output dimensions. If some of the dimensions are set to 0,
1020 |   // the corresponding dimension from the bottom layer is used (unchanged).
1021 |   // Exactly one dimension may be set to -1, in which case its value is
1022 |   // inferred from the count of the bottom blob and the remaining dimensions.
1023 |   // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1024 |   //
1025 |   //   layer {
1026 |   //     type: "Reshape" bottom: "input" top: "output"
1027 |   //     reshape_param { ... }
1028 |   //   }
1029 |   //
1030 |   // If "input" is 2D with shape 2 x 8, then the following reshape_param
1031 |   // specifications are all equivalent, producing a 3D blob "output" with shape
1032 |   // 2 x 2 x 4:
1033 |   //
1034 |   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
1035 |   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
1036 |   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
1037 |   //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
1038 |   //
1039 |   optional BlobShape shape = 1;
1040 | 
1041 |   // axis and num_axes control the portion of the bottom blob's shape that are
1042 |   // replaced by (included in) the reshape. By default (axis == 0 and
1043 |   // num_axes == -1), the entire bottom blob shape is included in the reshape,
1044 |   // and hence the shape field must specify the entire output shape.
1045 |   //
1046 |   // axis may be non-zero to retain some portion of the beginning of the input
1047 |   // shape (and may be negative to index from the end; e.g., -1 to begin the
1048 |   // reshape after the last axis, including nothing in the reshape,
1049 |   // -2 to include only the last axis, etc.).
1050 |   //
1051 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1052 |   // Then the following ReshapeLayer specifications are all equivalent,
1053 |   // producing a blob "output" with shape 2 x 2 x 4:
1054 |   //
1055 |   //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
1056 |   //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
1057 |   //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
1058 |   //
1059 |   // num_axes specifies the extent of the reshape.
1060 |   // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1061 |   // input axes in the range [axis, axis+num_axes].
1062 |   // num_axes may also be -1, the default, to include all remaining axes
1063 |   // (starting from axis).
1064 |   //
1065 |   // For example, suppose "input" is a 2D blob with shape 2 x 8.
1066 |   // Then the following ReshapeLayer specifications are equivalent,
1067 |   // producing a blob "output" with shape 1 x 2 x 8.
1068 |   //
1069 |   //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
1070 |   //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
1071 |   //   reshape_param { shape { dim:  1  }  num_axes: 0 }
1072 |   //
1073 |   // On the other hand, these would produce output blob shape 2 x 1 x 8:
1074 |   //
1075 |   //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
1076 |   //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
1077 |   //
1078 |   optional int32 axis = 2 [default = 0];
1079 |   optional int32 num_axes = 3 [default = -1];
1080 | }
1081 | 
1082 | // Message that stores parameters used by ROIPoolingLayer
1083 | message ROIPoolingParameter {
1084 |   // Pad, kernel size, and stride are all given as a single value for equal
1085 |   // dimensions in height and width or as Y, X pairs.
1086 |   optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
1087 |   optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
1088 |   // Multiplicative spatial scale factor to translate ROI coords from their
1089 |   // input scale to the scale used when pooling
1090 |   optional float spatial_scale = 3 [default = 1];
1091 | }
1092 | 
1093 | message ScaleParameter {
1094 |   // The first axis of bottom[0] (the first input Blob) along which to apply
1095 |   // bottom[1] (the second input Blob).  May be negative to index from the end
1096 |   // (e.g., -1 for the last axis).
1097 |   //
1098 |   // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1099 |   // top[0] will have the same shape, and bottom[1] may have any of the
1100 |   // following shapes (for the given value of axis):
1101 |   //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1102 |   //    (axis == 1 == -3)          3;     3x40;     3x40x60
1103 |   //    (axis == 2 == -2)                   40;       40x60
1104 |   //    (axis == 3 == -1)                                60
1105 |   // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1106 |   // "axis") -- a scalar multiplier.
1107 |   optional int32 axis = 1 [default = 1];
1108 | 
1109 |   // (num_axes is ignored unless just one bottom is given and the scale is
1110 |   // a learned parameter of the layer.  Otherwise, num_axes is determined by the
1111 |   // number of axes by the second bottom.)
1112 |   // The number of axes of the input (bottom[0]) covered by the scale
1113 |   // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1114 |   // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
1115 |   optional int32 num_axes = 2 [default = 1];
1116 | 
1117 |   // (filler is ignored unless just one bottom is given and the scale is
1118 |   // a learned parameter of the layer.)
1119 |   // The initialization for the learned scale parameter.
1120 |   // Default is the unit (1) initialization, resulting in the ScaleLayer
1121 |   // initially performing the identity operation.
1122 |   optional FillerParameter filler = 3;
1123 | 
1124 |   // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
1125 |   // may be more efficient).  Initialized with bias_filler (defaults to 0).
1126 |   optional bool bias_term = 4 [default = false];
1127 |   optional FillerParameter bias_filler = 5;
1128 | }
1129 | 
1130 | message SigmoidParameter {
1131 |   enum Engine {
1132 |     DEFAULT = 0;
1133 |     CAFFE = 1;
1134 |     CUDNN = 2;
1135 |   }
1136 |   optional Engine engine = 1 [default = DEFAULT];
1137 | }
1138 | 
1139 | message SliceParameter {
1140 |   // The axis along which to slice -- may be negative to index from the end
1141 |   // (e.g., -1 for the last axis).
1142 |   // By default, SliceLayer concatenates blobs along the "channels" axis (1).
1143 |   optional int32 axis = 3 [default = 1];
1144 |   repeated uint32 slice_point = 2;
1145 | 
1146 |   // DEPRECATED: alias for "axis" -- does not support negative indexing.
1147 |   optional uint32 slice_dim = 1 [default = 1];
1148 | }
1149 | 
1150 | message SmoothL1LossParameter {
1151 |   // SmoothL1Loss(x) =
1152 |   //   0.5 * (sigma * x) ** 2    -- if x < 1.0 / sigma / sigma
1153 |   //   |x| - 0.5 / sigma / sigma -- otherwise
1154 |   optional float sigma = 1 [default = 1];
1155 | }
1156 | 
1157 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
1158 | message SoftmaxParameter {
1159 |   enum Engine {
1160 |     DEFAULT = 0;
1161 |     CAFFE = 1;
1162 |     CUDNN = 2;
1163 |   }
1164 |   optional Engine engine = 1 [default = DEFAULT];
1165 | 
1166 |   // The axis along which to perform the softmax -- may be negative to index
1167 |   // from the end (e.g., -1 for the last axis).
1168 |   // Any other axes will be evaluated as independent softmaxes.
1169 |   optional int32 axis = 2 [default = 1];
1170 | }
1171 | 
1172 | message TanHParameter {
1173 |   enum Engine {
1174 |     DEFAULT = 0;
1175 |     CAFFE = 1;
1176 |     CUDNN = 2;
1177 |   }
1178 |   optional Engine engine = 1 [default = DEFAULT];
1179 | }
1180 | 
1181 | // Message that stores parameters used by TileLayer
1182 | message TileParameter {
1183 |   // The index of the axis to tile.
1184 |   optional int32 axis = 1 [default = 1];
1185 | 
1186 |   // The number of copies (tiles) of the blob to output.
1187 |   optional int32 tiles = 2;
1188 | }
1189 | 
1190 | // Message that stores parameters used by ThresholdLayer
1191 | message ThresholdParameter {
1192 |   optional float threshold = 1 [default = 0]; // Strictly positive values
1193 | }
1194 | 
1195 | // Message that stores parameters used by MILLayer
1196 | message MILParameter {
1197 |   enum MILType {
1198 |     MAX = 0;
1199 |     NOR = 1;
1200 |   }
1201 |   optional MILType type = 1 [default = MAX]; // The MIL method
1202 | }
1203 | 
1204 | 
1205 | message WindowDataParameter {
1206 |   // Specify the data source.
1207 |   optional string source = 1;
1208 |   // For data pre-processing, we can do simple scaling and subtracting the
1209 |   // data mean, if provided. Note that the mean subtraction is always carried
1210 |   // out before scaling.
1211 |   optional float scale = 2 [default = 1];
1212 |   optional string mean_file = 3;
1213 |   // Specify the batch size.
1214 |   optional uint32 batch_size = 4;
1215 |   // Specify if we would like to randomly crop an image.
1216 |   optional uint32 crop_size = 5 [default = 0];
1217 |   // Specify if we want to randomly mirror data.
1218 |   optional bool mirror = 6 [default = false];
1219 |   // Foreground (object) overlap threshold
1220 |   optional float fg_threshold = 7 [default = 0.5];
1221 |   // Background (non-object) overlap threshold
1222 |   optional float bg_threshold = 8 [default = 0.5];
1223 |   // Fraction of batch that should be foreground objects
1224 |   optional float fg_fraction = 9 [default = 0.25];
1225 |   // Amount of contextual padding to add around a window
1226 |   // (used only by the window_data_layer)
1227 |   optional uint32 context_pad = 10 [default = 0];
1228 |   // Mode for cropping out a detection window
1229 |   // warp: cropped window is warped to a fixed size and aspect ratio
1230 |   // square: the tightest square around the window is cropped
1231 |   optional string crop_mode = 11 [default = "warp"];
1232 |   // cache_images: will load all images in memory for faster access
1233 |   optional bool cache_images = 12 [default = false];
1234 |   // append root_folder to locate images
1235 |   optional string root_folder = 13 [default = ""];
1236 | }
1237 | 
1238 | message MILDataParameter {
1239 |   // Specify the data source.
1240 |   optional string source = 1;
1241 |   
1242 |   // Number of scales for each image
1243 |   optional uint32 num_scales = 2 [default = 1];
1244 |   
1245 |   // Side length ratio between neighbouring scales
1246 |   optional float scale_factor = 6 [default = 1];
1247 |   
1248 |   // Number of channels in the image
1249 |   optional uint32 channels = 4 [default = 3];
1250 |   
1251 |   // Specify the number of images per batch
1252 |   optional uint32 images_per_batch = 3;
1253 |   // Specify the number of classes
1254 |   optional uint32 n_classes = 5;
1255 |   // specify the box_dir and label_dir
1256 |   optional string label_file = 7;
1257 |   
1258 |   // Root directory which contains all the images
1259 |   optional string root_dir = 11;
1260 |   // Extention for the file
1261 |   optional string ext = 12;
1262 |   
1263 |   // To randomize or not
1264 |   optional bool randomize = 13 [default = true];
1265 | }
1266 | 
1267 | 
1268 | 
1269 | message SPPParameter {
1270 |   enum PoolMethod {
1271 |     MAX = 0;
1272 |     AVE = 1;
1273 |     STOCHASTIC = 2;
1274 |   }
1275 |   optional uint32 pyramid_height = 1;
1276 |   optional PoolMethod pool = 2 [default = MAX]; // The pooling method
1277 |   enum Engine {
1278 |     DEFAULT = 0;
1279 |     CAFFE = 1;
1280 |     CUDNN = 2;
1281 |   }
1282 |   optional Engine engine = 6 [default = DEFAULT];
1283 | }
1284 | 
1285 | // DEPRECATED: use LayerParameter.
1286 | message V1LayerParameter {
1287 |   repeated string bottom = 2;
1288 |   repeated string top = 3;
1289 |   optional string name = 4;
1290 |   repeated NetStateRule include = 32;
1291 |   repeated NetStateRule exclude = 33;
1292 |   enum LayerType {
1293 |     NONE = 0;
1294 |     ABSVAL = 35;
1295 |     ACCURACY = 1;
1296 |     ARGMAX = 30;
1297 |     BNLL = 2;
1298 |     CONCAT = 3;
1299 |     CONTRASTIVE_LOSS = 37;
1300 |     CONVOLUTION = 4;
1301 |     DATA = 5;
1302 |     DECONVOLUTION = 39;
1303 |     DROPOUT = 6;
1304 |     DUMMY_DATA = 32;
1305 |     EUCLIDEAN_LOSS = 7;
1306 |     ELTWISE = 25;
1307 |     EXP = 38;
1308 |     FLATTEN = 8;
1309 |     HDF5_DATA = 9;
1310 |     HDF5_OUTPUT = 10;
1311 |     HINGE_LOSS = 28;
1312 |     IM2COL = 11;
1313 |     IMAGE_DATA = 12;
1314 |     INFOGAIN_LOSS = 13;
1315 |     INNER_PRODUCT = 14;
1316 |     LRN = 15;
1317 |     MEMORY_DATA = 29;
1318 |     MULTINOMIAL_LOGISTIC_LOSS = 16;
1319 |     MVN = 34;
1320 |     POOLING = 17;
1321 |     POWER = 26;
1322 |     RELU = 18;
1323 |     SIGMOID = 19;
1324 |     SIGMOID_CROSS_ENTROPY_LOSS = 27;
1325 |     SILENCE = 36;
1326 |     SOFTMAX = 20;
1327 |     SOFTMAX_LOSS = 21;
1328 |     SPLIT = 22;
1329 |     SLICE = 33;
1330 |     TANH = 23;
1331 |     WINDOW_DATA = 24;
1332 |     THRESHOLD = 31;
1333 |   }
1334 |   optional LayerType type = 5;
1335 |   repeated BlobProto blobs = 6;
1336 |   repeated string param = 1001;
1337 |   repeated DimCheckMode blob_share_mode = 1002;
1338 |   enum DimCheckMode {
1339 |     STRICT = 0;
1340 |     PERMISSIVE = 1;
1341 |   }
1342 |   repeated float blobs_lr = 7;
1343 |   repeated float weight_decay = 8;
1344 |   repeated float loss_weight = 35;
1345 |   optional AccuracyParameter accuracy_param = 27;
1346 |   optional ArgMaxParameter argmax_param = 23;
1347 |   optional ConcatParameter concat_param = 9;
1348 |   optional ContrastiveLossParameter contrastive_loss_param = 40;
1349 |   optional ConvolutionParameter convolution_param = 10;
1350 |   optional DataParameter data_param = 11;
1351 |   optional DropoutParameter dropout_param = 12;
1352 |   optional DummyDataParameter dummy_data_param = 26;
1353 |   optional EltwiseParameter eltwise_param = 24;
1354 |   optional ExpParameter exp_param = 41;
1355 |   optional HDF5DataParameter hdf5_data_param = 13;
1356 |   optional HDF5OutputParameter hdf5_output_param = 14;
1357 |   optional HingeLossParameter hinge_loss_param = 29;
1358 |   optional ImageDataParameter image_data_param = 15;
1359 |   optional InfogainLossParameter infogain_loss_param = 16;
1360 |   optional InnerProductParameter inner_product_param = 17;
1361 |   optional LRNParameter lrn_param = 18;
1362 |   optional MemoryDataParameter memory_data_param = 22;
1363 |   optional MVNParameter mvn_param = 34;
1364 |   optional PoolingParameter pooling_param = 19;
1365 |   optional PowerParameter power_param = 21;
1366 |   optional ReLUParameter relu_param = 30;
1367 |   optional SigmoidParameter sigmoid_param = 38;
1368 |   optional SoftmaxParameter softmax_param = 39;
1369 |   optional SliceParameter slice_param = 31;
1370 |   optional TanHParameter tanh_param = 37;
1371 |   optional ThresholdParameter threshold_param = 25;
1372 |   optional WindowDataParameter window_data_param = 20;
1373 |   optional TransformationParameter transform_param = 36;
1374 |   optional LossParameter loss_param = 42;
1375 |   optional V0LayerParameter layer = 1;
1376 | }
1377 | 
1378 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1379 | // in Caffe.  We keep this message type around for legacy support.
1380 | message V0LayerParameter {
1381 |   optional string name = 1; // the layer name
1382 |   optional string type = 2; // the string to specify the layer type
1383 | 
1384 |   // Parameters to specify layers with inner products.
1385 |   optional uint32 num_output = 3; // The number of outputs for the layer
1386 |   optional bool biasterm = 4 [default = true]; // whether to have bias terms
1387 |   optional FillerParameter weight_filler = 5; // The filler for the weight
1388 |   optional FillerParameter bias_filler = 6; // The filler for the bias
1389 | 
1390 |   optional uint32 pad = 7 [default = 0]; // The padding size
1391 |   optional uint32 kernelsize = 8; // The kernel size
1392 |   optional uint32 group = 9 [default = 1]; // The group size for group conv
1393 |   optional uint32 stride = 10 [default = 1]; // The stride
1394 |   enum PoolMethod {
1395 |     MAX = 0;
1396 |     AVE = 1;
1397 |     STOCHASTIC = 2;
1398 |   }
1399 |   optional PoolMethod pool = 11 [default = MAX]; // The pooling method
1400 |   optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
1401 | 
1402 |   optional uint32 local_size = 13 [default = 5]; // for local response norm
1403 |   optional float alpha = 14 [default = 1.]; // for local response norm
1404 |   optional float beta = 15 [default = 0.75]; // for local response norm
1405 |   optional float k = 22 [default = 1.];
1406 | 
1407 |   // For data layers, specify the data source
1408 |   optional string source = 16;
1409 |   // For data pre-processing, we can do simple scaling and subtracting the
1410 |   // data mean, if provided. Note that the mean subtraction is always carried
1411 |   // out before scaling.
1412 |   optional float scale = 17 [default = 1];
1413 |   optional string meanfile = 18;
1414 |   // For data layers, specify the batch size.
1415 |   optional uint32 batchsize = 19;
1416 |   // For data layers, specify if we would like to randomly crop an image.
1417 |   optional uint32 cropsize = 20 [default = 0];
1418 |   // For data layers, specify if we want to randomly mirror data.
1419 |   optional bool mirror = 21 [default = false];
1420 | 
1421 |   // The blobs containing the numeric parameters of the layer
1422 |   repeated BlobProto blobs = 50;
1423 |   // The ratio that is multiplied on the global learning rate. If you want to
1424 |   // set the learning ratio for one blob, you need to set it for all blobs.
1425 |   repeated float blobs_lr = 51;
1426 |   // The weight decay that is multiplied on the global weight decay.
1427 |   repeated float weight_decay = 52;
1428 | 
1429 |   // The rand_skip variable is for the data layer to skip a few data points
1430 |   // to avoid all asynchronous sgd clients to start at the same point. The skip
1431 |   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1432 |   // be larger than the number of keys in the database.
1433 |   optional uint32 rand_skip = 53 [default = 0];
1434 | 
1435 |   // Fields related to detection (det_*)
1436 |   // foreground (object) overlap threshold
1437 |   optional float det_fg_threshold = 54 [default = 0.5];
1438 |   // background (non-object) overlap threshold
1439 |   optional float det_bg_threshold = 55 [default = 0.5];
1440 |   // Fraction of batch that should be foreground objects
1441 |   optional float det_fg_fraction = 56 [default = 0.25];
1442 | 
1443 |   // optional bool OBSOLETE_can_clobber = 57 [default = true];
1444 | 
1445 |   // Amount of contextual padding to add around a window
1446 |   // (used only by the window_data_layer)
1447 |   optional uint32 det_context_pad = 58 [default = 0];
1448 | 
1449 |   // Mode for cropping out a detection window
1450 |   // warp: cropped window is warped to a fixed size and aspect ratio
1451 |   // square: the tightest square around the window is cropped
1452 |   optional string det_crop_mode = 59 [default = "warp"];
1453 | 
1454 |   // For ReshapeLayer, one needs to specify the new dimensions.
1455 |   optional int32 new_num = 60 [default = 0];
1456 |   optional int32 new_channels = 61 [default = 0];
1457 |   optional int32 new_height = 62 [default = 0];
1458 |   optional int32 new_width = 63 [default = 0];
1459 | 
1460 |   // Whether or not ImageLayer should shuffle the list of files at every epoch.
1461 |   // It will also resize images if new_height or new_width are not zero.
1462 |   optional bool shuffle_images = 64 [default = false];
1463 | 
1464 |   // For ConcatLayer, one needs to specify the dimension for concatenation, and
1465 |   // the other dimensions must be the same for all the bottom blobs.
1466 |   // By default it will concatenate blobs along the channels dimension.
1467 |   optional uint32 concat_dim = 65 [default = 1];
1468 | 
1469 |   optional HDF5OutputParameter hdf5_output_param = 1001;
1470 | }
1471 | 
1472 | message PReLUParameter {
1473 |   // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
1474 |   // Surpassing Human-Level Performance on ImageNet Classification, 2015.
1475 | 
1476 |   // Initial value of a_i. Default is a_i=0.25 for all i.
1477 |   optional FillerParameter filler = 1;
1478 |   // Whether or not slope paramters are shared across channels.
1479 |   optional bool channel_shared = 2 [default = false];
1480 | }
1481 | 


--------------------------------------------------------------------------------
/code/run.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2017-present, starime.
  3 | All rights reserved.
  4 | 
  5 | This source code is licensed under the BSD-style license found in the
  6 | LICENSE file in the root directory of this source tree. An additional grant
  7 | of patent rights can be found in the PATENTS file in the same directory.
  8 | """
  9 | 
 10 | import os
 11 | import torch
 12 | import torch._utils
 13 | try:
 14 |     torch._utils._rebuild_tensor_v2
 15 | except AttributeError:
 16 |     def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
 17 |         tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
 18 |         tensor.requires_grad = requires_grad
 19 |         tensor._backward_hooks = backward_hooks
 20 |         return tensor
 21 |     torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
 22 | 
 23 | import torchvision
 24 | 
 25 | from ConvertModel import ConvertModel_caffe
 26 | from ConvertModel import ConvertModel_ncnn
 27 | 
 28 | from ReplaceDenormals import ReplaceDenormals
 29 | 
 30 | 
 31 | """ Import your net structure here """
 32 | 
 33 | """  ResNet  """
 34 | os.sys.path.append('../ModelFiles/ResNet')
 35 | import resnet
 36 | 
 37 | """  MobileNet  """
 38 | os.sys.path.append('../ModelFiles/MobileNet')
 39 | from MobileNet import MobileNet
 40 | 
 41 | """  UNet  """
 42 | os.sys.path.append('../ModelFiles/UNet')
 43 | import UNet
 44 | 
 45 | """  FaceBoxes  """
 46 | os.sys.path.append('../ModelFiles/FaceBoxes')
 47 | from FaceBoxes import FaceBoxes
 48 | 
 49 | """  Anime Gan  """
 50 | os.sys.path.append('../ModelFiles/_netG_1')
 51 | import models
 52 | 
 53 | 
 54 | def GenModelZoo():
 55 |     """  Specify the input shape and model initializing param  """
 56 |     return {
 57 |         0: (torchvision.models.squeezenet1_1, [1, 3, 224, 224], [True], {}),
 58 |         1: (resnet.resnet50, [1, 3, 224, 224], [True], {}),
 59 |         2: (torchvision.models.densenet121, [1, 3, 224, 224], [False], {}),
 60 |         3: (MobileNet, [1, 3, 224, 224], [], {}),
 61 | 
 62 |         17: (models._netG_1, [1, 100, 1, 1], [1, 100, 3, 64, 1], {}),
 63 |         18: (FaceBoxes, [1, 3, 224, 224], [], {}),
 64 |         20: (UNet.UNet, [1, 3, 64, 64], [2], {}),
 65 |     }
 66 | 
 67 | 
 68 | """  Set empty path to use default weight initialization  """
 69 | # model_path = '../ModelFiles/ResNet/resnet50.pth'
 70 | model_path = ''
 71 | 
 72 | ModelZoo = GenModelZoo()
 73 | ModelDir = '../ModelFiles/'
 74 | 
 75 | """  Set to caffe or ncnn  """
 76 | dst = 'ncnn'
 77 | 
 78 | for i in range(18, 19):
 79 |     if i not in ModelZoo:
 80 |         continue
 81 | 
 82 |     ModuleFunc, InputShape, args, kwargs = ModelZoo[i]
 83 |     """  Init pytorch model  """
 84 |     pytorch_net = ModuleFunc(*args, **kwargs)
 85 | 
 86 |     if model_path != '':
 87 |         try:
 88 |             pytorch_net.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))
 89 |         except AttributeError:
 90 |             pytorch_net = torch.load(model_path, map_location=lambda storage, loc: storage)
 91 |     else:
 92 |         NetName = str(pytorch_net.__class__.__name__)
 93 |         if not os.path.exists(ModelDir + NetName):
 94 |             os.makedirs(ModelDir + NetName)
 95 |         print 'Saving default weight initialization...'
 96 |         torch.save(pytorch_net.state_dict(), ModelDir + NetName + '/' + NetName + '.pth')
 97 | 
 98 |     """ Replace denormal weight values(<1e-30), otherwise may increase forward time cost """
 99 |     ReplaceDenormals(pytorch_net)
100 | 
101 |     """  Connnnnnnnvert!  """
102 |     print('Converting...')
103 |     if dst == 'caffe':
104 |         text_net, binary_weights = ConvertModel_caffe(pytorch_net, InputShape, softmax=False)
105 |     elif dst == 'ncnn':
106 |         text_net, binary_weights = ConvertModel_ncnn(pytorch_net, InputShape, softmax=False)
107 | 
108 |     """  Save files  """
109 |     NetName = str(pytorch_net.__class__.__name__)
110 |     if not os.path.exists(ModelDir + NetName):
111 |         os.makedirs(ModelDir + NetName)
112 |     print('Saving to ' + ModelDir + NetName)
113 | 
114 |     if dst == 'caffe':
115 |         import google.protobuf.text_format
116 |         with open(ModelDir + NetName + '/' + NetName + '.prototxt', 'w') as f:
117 |             f.write(google.protobuf.text_format.MessageToString(text_net))
118 |         with open(ModelDir + NetName + '/' + NetName + '.caffemodel', 'w') as f:
119 |             f.write(binary_weights.SerializeToString())
120 | 
121 |     elif dst == 'ncnn':
122 |         import numpy as np
123 |         with open(ModelDir + NetName + '/' + NetName + '.param', 'w') as f:
124 |             f.write(text_net)
125 |         with open(ModelDir + NetName + '/' + NetName + '.bin', 'w') as f:
126 |             for weights in binary_weights:
127 |                 for blob in weights:
128 |                     blob_32f = blob.flatten().astype(np.float32)
129 |                     blob_32f.tofile(f)
130 | 
131 |     print('Converting Done.')
132 | 
133 |     """  Test & Compare(optional)  """
134 |     # from test import TestAndCompare
135 |     # TestAndCompare(i, pytorch_net, InputShape, 'Addmm_1', UseImage=False)
136 | 


--------------------------------------------------------------------------------
/code/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.sys.path.append('/home/starimeliu/Documents/caffe/caffe-master/python')
  3 | 
  4 | import caffe
  5 | import numpy as np
  6 | import cv2
  7 | import torch.nn as nn
  8 | from torchvision import transforms
  9 | 
 10 | 
 11 | def PrintLabel(prob):
 12 |     labels_filename = '../TestData/ImageNetLabels.txt'
 13 |     labels = np.loadtxt(labels_filename, str, delimiter='\t')
 14 |     order = prob.argsort()
 15 |     for i in range(3):
 16 |         print(labels[order[-1 - i]], prob[order[-1 - i]])
 17 | 
 18 | 
 19 | def TestCaffe(proto_path, model_path, inputs, LayerCheck, ModelInd):
 20 |     net = caffe.Net(proto_path, model_path, caffe.TEST)
 21 |     net.blobs['data'].data[...] = inputs
 22 |     print('input blob:')
 23 |     print(net.blobs['data'].data[...])
 24 | 
 25 |     net.forward()
 26 | 
 27 |     if LayerCheck == 'Softmax_1':
 28 |         PrintLabel(net.blobs[LayerCheck].data[0].flatten())
 29 |     else:
 30 |         print(net.blobs[LayerCheck].data[0][...].flatten())
 31 |         if (ModelInd == 17):
 32 |             result_img = net.blobs[LayerCheck].data[0] * 255
 33 |             result_img = result_img.astype(int)
 34 |             result_img = np.transpose(result_img, (1, 2, 0))
 35 |             result_img = result_img[..., ::-1]
 36 |             cv2.imwrite("AnimeNet_result.png", result_img)
 37 |         if (ModelInd == 91):
 38 |             result_img = net.blobs[LayerCheck].data[0] * 255
 39 |             result_img = result_img.astype(int)
 40 |             result_img = np.transpose(result_img, (1, 2, 0))
 41 |             result_img = result_img[..., ::-1]
 42 |             cv2.imwrite("Upsample_result.png", result_img)
 43 | 
 44 | 
 45 | def TestPytorch(net, inputs, LayerCheck):
 46 |     from torch.autograd import Variable
 47 | 
 48 |     inputs = Variable(inputs, requires_grad=True)
 49 | 
 50 |     net.eval()
 51 |     outputs = net(inputs)
 52 | 
 53 |     if LayerCheck == 'Softmax_1':
 54 |         m = nn.Softmax()
 55 |         if isinstance(outputs, tuple):
 56 |             outputs = outputs[0]
 57 |         outputs = m(outputs)
 58 |         PrintLabel(outputs.data.numpy().flatten())
 59 |         result = outputs.data.numpy().flatten()
 60 |         print(result.shape)
 61 |     else:
 62 |         print(outputs.data.numpy().flatten())
 63 |         # result = outputs.data.numpy().flatten()
 64 | 
 65 | 
 66 | class ColorSpaceTransform(object):
 67 |     def __call__(self, image):
 68 |         img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 69 |         img_ycc = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
 70 |         img = np.concatenate((img_hsv, img_ycc), 2)
 71 | 
 72 |         return img
 73 | 
 74 | 
 75 | def TestAndCompare(ModelInd, pytorch_net, InputShape, LayerCheck='Softmax_1', UseImage=False):
 76 | 
 77 |     # trans = ColorSpaceTransform()
 78 |     # inputs = trans(inputs)
 79 | 
 80 |     if UseImage:
 81 |         img = '../TestData/2008_000536.jpg'
 82 |         # inputs = cv2.imread(img, 0)  # 0 for grayscale
 83 |         inputs = cv2.imread(img, 1)  # 1 for color
 84 |     else:
 85 |         n, c, h, w = InputShape
 86 |         if (ModelInd == 17):
 87 |             """ mean and standard deviation """
 88 |             mu, sigma = 0, 1
 89 |             inputs = np.random.normal(mu, sigma, w * h * c).reshape(w, h, c)
 90 |         else:
 91 |             # inputs = np.linspace(1, w * h * c, w * h * c).reshape(w, h, c)
 92 |             inputs = np.random.rand(w, h, c)
 93 | 
 94 |     print(inputs.shape)
 95 | 
 96 |     scale_factor = 1.0 / 255.0
 97 |     if UseImage:
 98 |         transform_inputs = transforms.Compose([
 99 |             transforms.ToPILImage(),
100 |             # transforms.CenterCrop(112),
101 |             transforms.ToTensor(),
102 |             transforms.Normalize((0, 0, 0), (scale_factor, scale_factor, scale_factor)),
103 |             # transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
104 |         ])
105 |     else:
106 |         transform_inputs = transforms.Compose([
107 |             transforms.ToTensor(),
108 |             transforms.Normalize((0, 0, 0), (scale_factor, scale_factor, scale_factor)),
109 |         ])
110 | 
111 |     print('Caffe Output:')
112 |     NetName = str(pytorch_net.__class__.__name__)
113 |     proto_path = '../ModelFiles/' + NetName + '/' + NetName + '.prototxt'
114 |     model_path = '../ModelFiles/' + NetName + '/' + NetName + '.caffemodel'
115 |     inputs_caffe = transform_inputs(inputs).numpy()
116 |     TestCaffe(proto_path, model_path, inputs_caffe, LayerCheck, ModelInd)
117 | 
118 |     print('')
119 |     print('Pytorch Output:')
120 |     inputs_pytorch = transform_inputs(inputs)
121 |     inputs_pytorch = inputs_pytorch.unsqueeze(0)
122 |     TestPytorch(pytorch_net, inputs_pytorch, LayerCheck)
123 | 


--------------------------------------------------------------------------------