├── .gitignore ├── LICENSE ├── ModelFiles ├── FaceBoxes │ └── FaceBoxes.py ├── MobileNet │ └── MobileNet.py ├── ResNet │ └── resnet.py ├── UNet │ └── UNet.py └── _netG_1 │ ├── build_face_dataset.py │ ├── main.py │ └── models.py ├── README.md ├── TestData ├── 2008_000536.jpg ├── 2008_001171.jpg ├── 2008_001601.jpg ├── 2008_001841.jpg ├── 227-2.jpg ├── 227-3.jpg ├── 227.jpg └── ImageNetLabels.txt └── code ├── ConvertLayer_caffe.py ├── ConvertLayer_ncnn.py ├── ConvertModel.py ├── ReplaceDenormals.py ├── caffe.proto ├── caffe_pb2.py ├── run.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /ModelFiles/FaceBoxes/FaceBoxes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class CReLUM(nn.Module): 7 | def __init__(self): 8 | super(CReLUM, self).__init__() 9 | 10 | def forward(self, x): 11 | return F.relu(torch.cat((x, -x), 1)) 12 | 13 | 14 | CRelu = CReLUM() 15 | 16 | 17 | class BasicConv2d(nn.Module): 18 | 19 | def __init__(self, in_channels, out_channels, **kwargs): 20 | super(BasicConv2d, self).__init__() 21 | self.conv = nn.Conv2d(in_channels, out_channels, **kwargs) 22 | self.bn = nn.BatchNorm2d(out_channels, eps=0.001) 23 | 24 | def forward(self, x): 25 | x = self.conv(x) 26 | x = self.bn(x) 27 | return F.relu(x, inplace=True) 28 | 29 | 30 | class Inception(nn.Module): 31 | def __init__(self, in_planes, n1x1down, n1x1up, n3x3): 32 | super(Inception, self).__init__() 33 | 34 | self.conv1 = BasicConv2d(in_planes, n1x1down, kernel_size=1) 35 | 36 | self.pool2_1 = nn.MaxPool2d(3, stride=1, padding=1, ceil_mode=True) 37 | self.conv2_2 = BasicConv2d(in_planes, n1x1down, kernel_size=1) 38 | 39 | self.conv3_1 = BasicConv2d(in_planes, n1x1up, kernel_size=1) 40 | self.conv3_2 = BasicConv2d(n1x1up, n3x3, kernel_size=3, padding=1) 41 | 42 | self.conv4_1 = BasicConv2d(in_planes, n1x1up, kernel_size=1) 43 | self.conv4_2 = BasicConv2d(n1x1up, n3x3, kernel_size=3, padding=1) 44 | self.conv4_3 = BasicConv2d(n3x3, n3x3, kernel_size=3, padding=1) 45 | 46 | def forward(self, x): 47 | y1 = self.conv1(x) 48 | 49 | y2 = self.pool2_1(x) 50 | y2 = self.conv2_2(y2) 51 | 52 | y3 = self.conv3_1(x) 53 | y3 = self.conv3_2(y3) 54 | 55 | y4 = self.conv4_1(x) 56 | y4 = self.conv4_2(y4) 57 | y4 = self.conv4_3(y4) 58 | 59 | return torch.cat([y1, y2, y3, y4], 1) 60 | 61 | 62 | anchors = (21, 1, 1) 63 | 64 | 65 | class FaceBoxes(nn.Module): 66 | def __init__(self): 67 | super(FaceBoxes, self).__init__() 68 | 69 | self.conv1 = nn.Conv2d(3, 16, kernel_size=7, stride=4, padding=3) 70 | self.bn1 = nn.BatchNorm2d(16, eps=0.001) 71 | self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2) 72 | self.bn2 = nn.BatchNorm2d(64, eps=0.001) 73 | self.inception1 = Inception(128, 32, 16, 32) 74 | self.inception2 = Inception(128, 32, 16, 32) 75 | self.inception3 = Inception(128, 32, 16, 32) 76 | self.conv3_1 = nn.Conv2d(128, 128, kernel_size=1, stride=1) 77 | self.conv3_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) 78 | self.conv4_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1) 79 | self.conv4_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) 80 | 81 | self.score_conv1 = nn.Conv2d( 82 | 128, 2 * anchors[0], kernel_size=3, stride=1, padding=1) 83 | self.bbox_conv1 = nn.Conv2d( 84 | 128, 4 * anchors[0], kernel_size=3, stride=1, padding=1) 85 | self.score_conv2 = nn.Conv2d( 86 | 256, 2 * anchors[1], kernel_size=3, stride=1, padding=1) 87 | self.bbox_conv2 = nn.Conv2d( 88 | 256, 4 * anchors[1], kernel_size=3, stride=1, padding=1) 89 | self.score_conv3 = nn.Conv2d( 90 | 256, 2 * anchors[2], kernel_size=3, stride=1, padding=1) 91 | self.bbox_conv3 = nn.Conv2d( 92 | 256, 4 * anchors[2], kernel_size=3, stride=1, padding=1) 93 | 94 | def forward(self, x): 95 | x = self.conv1(x) 96 | x = self.bn1(x) 97 | x = F.max_pool2d(CRelu(x), kernel_size=3, stride=2, ceil_mode=True) 98 | 99 | x = self.conv2(x) 100 | x = self.bn2(x) 101 | x = F.max_pool2d(CRelu(x), kernel_size=3, stride=2, ceil_mode=True) 102 | 103 | x = self.inception1(x) 104 | x = self.inception2(x) 105 | x = self.inception3(x) 106 | 107 | score1 = self.score_conv1(x) 108 | bbox1 = self.bbox_conv1(x) 109 | 110 | x = F.relu(self.conv3_1(x), inplace=True) 111 | x = F.relu(self.conv3_2(x), inplace=True) 112 | 113 | score2 = self.score_conv2(x) 114 | bbox2 = self.bbox_conv2(x) 115 | 116 | x = F.relu(self.conv4_1(x), inplace=True) 117 | x = F.relu(self.conv4_2(x), inplace=True) 118 | 119 | score3 = self.score_conv3(x) 120 | bbox3 = self.bbox_conv3(x) 121 | 122 | scorelist = list() 123 | bboxlist = list() 124 | scorelist.append(score1.permute(0, 2, 3, 1).contiguous()) 125 | scorelist.append(score2.permute(0, 2, 3, 1).contiguous()) 126 | scorelist.append(score3.permute(0, 2, 3, 1).contiguous()) 127 | bboxlist.append(bbox1.permute(0, 2, 3, 1).contiguous()) 128 | bboxlist.append(bbox2.permute(0, 2, 3, 1).contiguous()) 129 | bboxlist.append(bbox3.permute(0, 2, 3, 1).contiguous()) 130 | pscore = torch.cat([o.view(o.size(0), -1) for o in scorelist], 1) 131 | pbbox = torch.cat([o.view(o.size(0), -1) for o in bboxlist], 1) 132 | 133 | return pscore, pbbox 134 | -------------------------------------------------------------------------------- /ModelFiles/MobileNet/MobileNet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class MobileNet(nn.Module): 5 | def __init__(self): 6 | super(MobileNet, self).__init__() 7 | 8 | def conv_bn(inp, oup, stride): 9 | return nn.Sequential( 10 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 11 | nn.BatchNorm2d(oup), 12 | nn.ReLU(inplace=True) 13 | ) 14 | 15 | def conv_dw(inp, oup, stride): 16 | return nn.Sequential( 17 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 18 | nn.BatchNorm2d(inp), 19 | nn.ReLU(inplace=True), 20 | 21 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 22 | nn.BatchNorm2d(oup), 23 | nn.ReLU(inplace=True), 24 | ) 25 | 26 | self.model = nn.Sequential( 27 | conv_bn( 3, 32, 2), 28 | conv_dw( 32, 64, 1), 29 | conv_dw( 64, 128, 2), 30 | conv_dw(128, 128, 1), 31 | conv_dw(128, 256, 2), 32 | conv_dw(256, 256, 1), 33 | conv_dw(256, 512, 2), 34 | conv_dw(512, 512, 1), 35 | conv_dw(512, 512, 1), 36 | conv_dw(512, 512, 1), 37 | conv_dw(512, 512, 1), 38 | conv_dw(512, 512, 1), 39 | conv_dw(512, 1024, 2), 40 | conv_dw(1024, 1024, 1), 41 | nn.AvgPool2d(7, ceil_mode=True), 42 | ) 43 | self.fc = nn.Linear(1024, 1000) 44 | 45 | def forward(self, x): 46 | x = self.model(x) 47 | x = x.view(-1, 1024) 48 | x = self.fc(x) 49 | return x 50 | -------------------------------------------------------------------------------- /ModelFiles/ResNet/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import math 3 | import torch.utils.model_zoo as model_zoo 4 | import torch 5 | 6 | 7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 8 | 'resnet152'] 9 | 10 | 11 | model_urls = { 12 | 'resnet18': 'https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth', 13 | 'resnet34': 'https://s3.amazonaws.com/pytorch/models/resnet34-333f7ec4.pth', 14 | 'resnet50': 'https://s3.amazonaws.com/pytorch/models/resnet50-19c8e357.pth', 15 | 'resnet101': 'https://s3.amazonaws.com/pytorch/models/resnet101-5d3b4d8f.pth', 16 | 'resnet152': 'https://s3.amazonaws.com/pytorch/models/resnet152-b121ed2d.pth', 17 | } 18 | 19 | 20 | def conv3x3(in_planes, out_planes, stride=1): 21 | "3x3 convolution with padding" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | 25 | 26 | class BasicBlock(nn.Module): 27 | expansion = 1 28 | 29 | def __init__(self, inplanes, planes, stride=1, downsample=None): 30 | super(BasicBlock, self).__init__() 31 | self.conv1 = conv3x3(inplanes, planes, stride) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.relu = nn.ReLU(inplace=True) 34 | self.conv2 = conv3x3(planes, planes) 35 | self.bn2 = nn.BatchNorm2d(planes) 36 | self.downsample = model_urls 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | expansion = 4 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change 64 | self.bn1 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change 66 | padding=1, bias=False) 67 | self.bn2 = nn.BatchNorm2d(planes) 68 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 69 | self.bn3 = nn.BatchNorm2d(planes * 4) 70 | self.relu = nn.ReLU(inplace=True) 71 | self.downsample = downsample 72 | self.stride = stride 73 | 74 | def forward(self, x): 75 | residual = x 76 | 77 | out = self.conv1(x) 78 | out = self.bn1(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv2(out) 82 | out = self.bn2(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv3(out) 86 | out = self.bn3(out) 87 | 88 | if self.downsample is not None: 89 | residual = self.downsample(x) 90 | 91 | out += residual 92 | out = self.relu(out) 93 | 94 | return out 95 | 96 | 97 | class ResNet(nn.Module): 98 | def __init__(self, block, layers, num_classes=1000): 99 | self.inplanes = 64 100 | super(ResNet, self).__init__() 101 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(64) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) # change 106 | self.layer1 = self._make_layer(block, 64, layers[0]) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 110 | self.avgpool = nn.AvgPool2d(7) 111 | self.fc = nn.Linear(512 * block.expansion, num_classes) 112 | 113 | for m in self.modules(): 114 | if isinstance(m, nn.Conv2d): 115 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 116 | m.weight.data.normal_(0, math.sqrt(2. / n)) 117 | elif isinstance(m, nn.BatchNorm2d): 118 | m.weight.data.fill_(1) 119 | m.bias.data.zero_() 120 | 121 | def _make_layer(self, block, planes, blocks, stride=1): 122 | downsample = None 123 | if stride != 1 or self.inplanes != planes * block.expansion: 124 | downsample = nn.Sequential( 125 | nn.Conv2d(self.inplanes, planes * block.expansion, 126 | kernel_size=1, stride=stride, bias=False), 127 | nn.BatchNorm2d(planes * block.expansion), 128 | ) 129 | 130 | layers = [] 131 | layers.append(block(self.inplanes, planes, stride, downsample)) 132 | self.inplanes = planes * block.expansion 133 | for i in range(1, blocks): 134 | layers.append(block(self.inplanes, planes)) 135 | 136 | return nn.Sequential(*layers) 137 | 138 | def forward(self, x): 139 | x = self.conv1(x) 140 | x = self.bn1(x) 141 | x = self.relu(x) 142 | x = self.maxpool(x) 143 | 144 | x = self.layer1(x) 145 | x = self.layer2(x) 146 | x = self.layer3(x) 147 | x = self.layer4(x) 148 | 149 | x = self.avgpool(x) 150 | x = x.view(x.size(0), -1) 151 | x = self.fc(x) 152 | 153 | return x 154 | 155 | 156 | def resnet18(pretrained=False): 157 | """Constructs a ResNet-18 model. 158 | Args: 159 | pretrained (bool): If True, returns a model pre-trained on ImageNet 160 | """ 161 | model = ResNet(BasicBlock, [2, 2, 2, 2]) 162 | if pretrained: 163 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 164 | return model 165 | 166 | 167 | def resnet34(pretrained=False): 168 | """Constructs a ResNet-34 model. 169 | Args: 170 | pretrained (bool): If True, returns a model pre-trained on ImageNet 171 | """ 172 | model = ResNet(BasicBlock, [3, 4, 6, 3]) 173 | if pretrained: 174 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 175 | return model 176 | 177 | 178 | def resnet50(pretrained=False): 179 | """Constructs a ResNet-50 model. 180 | Args: 181 | pretrained (bool): If True, returns a model pre-trained on ImageNet 182 | """ 183 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 184 | if pretrained: 185 | # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 186 | model.load_state_dict(torch.load('../ModelFiles/ResNet/resnet50.pth')) 187 | return model 188 | 189 | 190 | def resnet101(pretrained=False): 191 | """Constructs a ResNet-101 model. 192 | Args: 193 | pretrained (bool): If True, returns a model pre-trained on ImageNet 194 | """ 195 | model = ResNet(Bottleneck, [3, 4, 23, 3]) 196 | if pretrained: 197 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 198 | return model 199 | 200 | 201 | def resnet152(pretrained=False): 202 | """Constructs a ResNet-152 model. 203 | Args: 204 | pretrained (bool): If True, returns a model pre-trained on ImageNet 205 | """ 206 | model = ResNet(Bottleneck, [3, 8, 36, 3]) 207 | if pretrained: 208 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 209 | return model 210 | -------------------------------------------------------------------------------- /ModelFiles/UNet/UNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.init as init 4 | import torch.nn.functional as F 5 | 6 | from torch.utils import model_zoo 7 | from torchvision import models 8 | 9 | 10 | class UNetEnc(nn.Module): 11 | 12 | def __init__(self, in_channels, features, out_channels): 13 | super(UNetEnc, self).__init__() 14 | 15 | self.up = nn.Sequential( 16 | nn.Conv2d(in_channels, features, 3, padding=1), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(features, features, 3, padding=1), 19 | nn.ReLU(inplace=True), 20 | nn.ConvTranspose2d(features, out_channels, 2, stride=2), 21 | nn.ReLU(inplace=True), 22 | ) 23 | 24 | def forward(self, x): 25 | return self.up(x) 26 | 27 | 28 | class UNetDec(nn.Module): 29 | 30 | def __init__(self, in_channels, out_channels, dropout=False): 31 | super(UNetDec, self).__init__() 32 | 33 | layers = [ 34 | nn.Conv2d(in_channels, out_channels, 3, padding=1), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2d(out_channels, out_channels, 3, padding=1), 37 | nn.ReLU(inplace=True), 38 | ] 39 | if dropout: 40 | layers += [nn.Dropout(.5)] 41 | layers += [nn.MaxPool2d(2, stride=2, ceil_mode=True)] 42 | 43 | self.down = nn.Sequential(*layers) 44 | 45 | def forward(self, x): 46 | return self.down(x) 47 | 48 | 49 | class UNet(nn.Module): 50 | 51 | def __init__(self, num_classes): 52 | super(UNet, self).__init__() 53 | 54 | self.dec1 = UNetDec(3, 64) 55 | self.dec2 = UNetDec(64, 128) 56 | self.dec3 = UNetDec(128, 256) 57 | self.dec4 = UNetDec(256, 512, dropout=True) 58 | self.center = nn.Sequential( 59 | nn.Conv2d(512, 1024, 3, padding=1), 60 | nn.ReLU(inplace=True), 61 | nn.Conv2d(1024, 1024, 3, padding=1), 62 | nn.ReLU(inplace=True), 63 | nn.Dropout(), 64 | nn.ConvTranspose2d(1024, 512, 2, stride=2), 65 | nn.ReLU(inplace=True), 66 | ) 67 | self.enc4 = UNetEnc(1024, 512, 256) 68 | self.enc3 = UNetEnc(512, 256, 128) 69 | self.enc2 = UNetEnc(256, 128, 64) 70 | self.enc1 = nn.Sequential( 71 | nn.Conv2d(128, 64, 3, padding=1), 72 | nn.ReLU(inplace=True), 73 | nn.Conv2d(64, 64, 3, padding=1), 74 | nn.ReLU(inplace=True), 75 | ) 76 | self.final = nn.Conv2d(64, num_classes, 1) 77 | 78 | def forward(self, x): 79 | dec1 = self.dec1(x) 80 | dec2 = self.dec2(dec1) 81 | dec3 = self.dec3(dec2) 82 | dec4 = self.dec4(dec3) 83 | center = self.center(dec4) 84 | 85 | enc4 = self.enc4(torch.cat([ 86 | center, F.upsample_bilinear(dec4, scale_factor=center.size()[2] / dec4.size()[2])], 1)) 87 | enc3 = self.enc3(torch.cat([ 88 | enc4, F.upsample_bilinear(dec3, scale_factor=enc4.size()[2] / dec3.size()[2])], 1)) 89 | enc2 = self.enc2(torch.cat([ 90 | enc3, F.upsample_bilinear(dec2, scale_factor=enc3.size()[2] / dec2.size()[2])], 1)) 91 | enc1 = self.enc1(torch.cat([ 92 | enc2, F.upsample_bilinear(dec1, scale_factor=enc2.size()[2] / dec1.size()[2])], 1)) 93 | 94 | return self.final(enc1) 95 | -------------------------------------------------------------------------------- /ModelFiles/_netG_1/build_face_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from multiprocessing import Pool 3 | from PIL import Image 4 | import numpy as np 5 | import animeface 6 | import sys 7 | import os 8 | 9 | 10 | # im from PIL.Image.open, face_pos position object, margin 11 | def faceCrop(im,face_pos,m): 12 | """ 13 | m is the relative margin added to the face image 14 | """ 15 | x,y,w,h = face_pos.x, face_pos.y, face_pos.width, face_pos.height 16 | sizeX, sizeY = im.size 17 | new_x, new_y = max(0,x-m*w), max(0,y-m*h) 18 | new_w = w + 2*m*w if sizeX > (new_x + w + 2*m*w) else sizeX - new_x 19 | new_h = h + 2*m*h if sizeY > (new_y + h + 2*m*h) else sizeY - new_y 20 | new_x,new_y,new_w,new_h = int(new_x),int(new_y),int(new_w),int(new_h) 21 | return im.crop((new_x,new_y,new_x+new_w,new_y+new_h)) 22 | 23 | def min_resize_crop(im, min_side): 24 | sizeX,sizeY = im.size 25 | if sizeX > sizeY: 26 | im = im.resize((min_side*sizeX/sizeY, min_side), Image.ANTIALIAS) 27 | else: 28 | im = im.resize((min_side, sizeY*min_side/sizeX), Image.ANTIALIAS) 29 | return im.crop((0,0,min_side,min_side)) 30 | #return im 31 | 32 | def load_detect(img_path): 33 | """Read original image file, return the cropped face image in the size 96x96 34 | 35 | Input: A string indicates the image path 36 | Output: Detected face image in the size 96x96 37 | 38 | Note that there might be multiple faces in one image, 39 | the output crossponding to the face with highest probability 40 | """ 41 | im = Image.open(img_path) 42 | faces = animeface.detect(im) 43 | prob_list = [] 44 | len_f = len(faces) 45 | if len_f == 0: 46 | return 0 47 | for i in range(len_f): 48 | prob_list.append(faces[i].likelihood) 49 | prob_array = np.array(prob_list) 50 | idx = np.argmax(prob_array) 51 | face_pos = faces[idx].face.pos 52 | im = faceCrop(im, face_pos, 0.5) 53 | return min_resize_crop(im, 96) 54 | 55 | def process_img(img_path): 56 | """ 57 | The face images are stored in {${pwd} + faces} 58 | """ 59 | tmp = img_path.split('/') 60 | cls_name,img_name = tmp[len(tmp)-2], tmp[len(tmp)-1] 61 | new_dir_path = os.path.join('faces',cls_name) 62 | try: 63 | os.makedirs(new_dir_path) 64 | except OSError as err: 65 | print("OS error: {0}".format(err)) 66 | 67 | new_img_path = os.path.join(new_dir_path, img_name) 68 | if os.path.exists(new_img_path): 69 | return 0 70 | im = load_detect(img_path) 71 | # no faces in this image 72 | if im == 0: 73 | return 0 74 | im.save(new_img_path, 'JPEG') 75 | 76 | def try_process_img(img_path): 77 | try: 78 | process_img(img_path) 79 | except: 80 | e = sys.exc_info()[0] 81 | print('Err: %s \n' % e) 82 | 83 | # multiprocessing version 84 | def multi_construct_face_dataset(base_dir): 85 | cls_dirs = [f for f in os.listdir(base_dir)] 86 | imgs = [] 87 | for i in xrange(len(cls_dirs)): 88 | sub_dir = os.path.join(base_dir, cls_dirs[i]) 89 | imgs_tmp = [os.path.join(sub_dir,f) for f in os.listdir(sub_dir) if f.endswith(('.jpg', '.png'))] 90 | imgs = imgs + imgs_tmp 91 | print('There are %d classes, %d images in total. \n' % (len(cls_dirs), len(imgs))) 92 | pool = Pool(12) # 12 workers 93 | pool.map(try_process_img, imgs) 94 | 95 | 96 | base_dir = '/home/jielei/gallery-dl/danbooru' 97 | multi_construct_face_dataset(base_dir) -------------------------------------------------------------------------------- /ModelFiles/_netG_1/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import time 4 | import random 5 | import argparse 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.parallel 9 | import torch.backends.cudnn as cudnn 10 | import torch.optim as optim 11 | import torch.utils.data 12 | import torchvision.datasets as dset 13 | import torchvision.transforms as transforms 14 | import torchvision.utils as vutils 15 | from torch.autograd import Variable 16 | 17 | ### load project files 18 | import models 19 | from models import weights_init 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--dataRoot', required=True, help='path to dataset') 23 | parser.add_argument('--workers', type=int, default=2, help='number of data loading workers') 24 | parser.add_argument('--batchSize', type=int, default=64, help='input batch size') 25 | parser.add_argument('--imageSize', type=int, default=64, help='the height / width of the input image to network') 26 | parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector') 27 | parser.add_argument('--ngf', type=int, default=64) 28 | parser.add_argument('--ndf', type=int, default=64) 29 | parser.add_argument('--niter', type=int, default=25, help='number of epochs to train for') 30 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') 31 | parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') 32 | parser.add_argument('--cuda' , action='store_true', help='enables cuda') 33 | parser.add_argument('--ngpu' , type=int, default=1, help='number of GPUs to use') 34 | parser.add_argument('--netG', default='', help="path to netG (to continue training)") 35 | parser.add_argument('--netD', default='', help="path to netD (to continue training)") 36 | parser.add_argument('--outDir', default='.', help='folder to output images and model checkpoints') 37 | parser.add_argument('--model', type=int, default=1, help='1 for dcgan, 2 for illustrationGAN-like-GAN') 38 | parser.add_argument('--d_labelSmooth', type=float, default=0, help='for D, use soft label "1-labelSmooth" for real samples') 39 | parser.add_argument('--n_extra_layers_d', type=int, default=0, help='number of extra conv layers in D') 40 | parser.add_argument('--n_extra_layers_g', type=int, default=1, help='number of extra conv layers in G') 41 | parser.add_argument('--binary', action='store_true', help='z from bernoulli distribution, with prob=0.5') 42 | 43 | # simply prefer this way 44 | # arg_list = [ 45 | # '--dataRoot', '/home/jielei/data/danbooru-faces', 46 | # '--workers', '12', 47 | # '--batchSize', '128', 48 | # '--imageSize', '64', 49 | # '--nz', '100', 50 | # '--ngf', '64', 51 | # '--ndf', '64', 52 | # '--niter', '80', 53 | # '--lr', '0.0002', 54 | # '--beta1', '0.5', 55 | # '--cuda', 56 | # '--ngpu', '1', 57 | # '--netG', '', 58 | # '--netD', '', 59 | # '--outDir', './results', 60 | # '--model', '1', 61 | # '--d_labelSmooth', '0.1', # 0.25 from imporved-GAN paper 62 | # '--n_extra_layers_d', '0', 63 | # '--n_extra_layers_g', '1', # in the sense that generator should be more powerful 64 | # ] 65 | 66 | args = parser.parse_args() 67 | # opt = parser.parse_args(arg_list) 68 | print(opt) 69 | 70 | try: 71 | os.makedirs(opt.outDir) 72 | except OSError: 73 | pass 74 | 75 | opt.manualSeed = random.randint(1,10000) # fix seed, a scalar 76 | random.seed(opt.manualSeed) 77 | torch.manual_seed(opt.manualSeed) 78 | 79 | cudnn.benchmark = True 80 | 81 | if torch.cuda.is_available() and not opt.cuda: 82 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 83 | 84 | nc = 3 85 | ngpu = opt.ngpu 86 | nz = opt.nz 87 | ngf = opt.ngf 88 | ndf = opt.ndf 89 | n_extra_d = opt.n_extra_layers_d 90 | n_extra_g = opt.n_extra_layers_g 91 | 92 | dataset = dset.ImageFolder( 93 | root=opt.dataRoot, 94 | transform=transforms.Compose([ 95 | transforms.Scale(opt.imageSize), 96 | # transforms.CenterCrop(opt.imageSize), 97 | transforms.ToTensor(), 98 | transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)), # bring images to (-1,1) 99 | ]) 100 | ) 101 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, 102 | shuffle=True, num_workers=opt.workers) 103 | 104 | # load models 105 | if opt.model == 1: 106 | netG = models._netG_1(ngpu, nz, nc, ngf, n_extra_g) 107 | netD = models._netD_1(ngpu, nz, nc, ndf, n_extra_d) 108 | elif opt.model == 2: 109 | netG = models._netG_2(ngpu, nz, nc, ngf) 110 | netD = models._netD_2(ngpu, nz, nc, ndf) 111 | 112 | netG.apply(weights_init) 113 | if opt.netG != '': 114 | netG.load_state_dict(torch.load(opt.netG)) 115 | print(netG) 116 | 117 | netD.apply(weights_init) 118 | if opt.netD != '': 119 | netD.load_state_dict(torch.load(opt.netD)) 120 | print(netD) 121 | 122 | criterion = nn.BCELoss() 123 | criterion_MSE = nn.MSELoss() 124 | 125 | input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) 126 | noise = torch.FloatTensor(opt.batchSize, nz, 1, 1) 127 | if opt.binary: 128 | bernoulli_prob = torch.FloatTensor(opt.batchSize, nz, 1, 1).fill_(0.5) 129 | fixed_noise = torch.bernoulli(bernoulli_prob) 130 | else: 131 | fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1) 132 | label = torch.FloatTensor(opt.batchSize) 133 | real_label = 1 134 | fake_label = 0 135 | 136 | if opt.cuda: 137 | netD.cuda() 138 | netG.cuda() 139 | criterion.cuda() 140 | criterion_MSE.cuda() 141 | input, label = input.cuda(), label.cuda() 142 | noise, fixed_noise = noise.cuda(), fixed_noise.cuda() 143 | 144 | input = Variable(input) 145 | label = Variable(label) 146 | noise = Variable(noise) 147 | fixed_noise = Variable(fixed_noise) 148 | 149 | # setup optimizer 150 | optimizerD = optim.Adam(netD.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999)) 151 | optimizerG = optim.Adam(netG.parameters(), lr = opt.lr, betas = (opt.beta1, 0.999)) 152 | 153 | for epoch in range(opt.niter): 154 | for i, data in enumerate(dataloader, 0): 155 | start_iter = time.time() 156 | ############################ 157 | # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) 158 | ########################### 159 | # train with real 160 | netD.zero_grad() 161 | real_cpu, _ = data 162 | batch_size = real_cpu.size(0) 163 | input.data.resize_(real_cpu.size()).copy_(real_cpu) 164 | label.data.resize_(batch_size).fill_(real_label - opt.d_labelSmooth) # use smooth label for discriminator 165 | 166 | output = netD(input) 167 | errD_real = criterion(output, label) 168 | errD_real.backward() 169 | D_x = output.data.mean() 170 | # train with fake 171 | noise.data.resize_(batch_size, nz, 1, 1) 172 | if opt.binary: 173 | bernoulli_prob.resize_(noise.data.size()) 174 | noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5)) 175 | else: 176 | noise.data.normal_(0, 1) 177 | fake,z_prediction = netG(noise) 178 | label.data.fill_(fake_label) 179 | output = netD(fake.detach()) # add ".detach()" to avoid backprop through G 180 | errD_fake = criterion(output, label) 181 | errD_fake.backward() # gradients for fake/real will be accumulated 182 | D_G_z1 = output.data.mean() 183 | errD = errD_real + errD_fake 184 | optimizerD.step() # .step() can be called once the gradients are computed 185 | 186 | ############################ 187 | # (2) Update G network: maximize log(D(G(z))) 188 | ########################### 189 | netG.zero_grad() 190 | label.data.fill_(real_label) # fake labels are real for generator cost 191 | output = netD(fake) 192 | errG = criterion(output, label) 193 | errG.backward(retain_variables=True) # True if backward through the graph for the second time 194 | if opt.model == 2: # with z predictor 195 | errG_z = criterion_MSE(z_prediction, noise) 196 | errG_z.backward() 197 | D_G_z2 = output.data.mean() 198 | optimizerG.step() 199 | 200 | end_iter = time.time() 201 | print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s' 202 | % (epoch, opt.niter, i, len(dataloader), 203 | errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter)) 204 | if i % 100 == 0: 205 | # the first 64 samples from the mini-batch are saved. 206 | vutils.save_image(real_cpu[0:64,:,:,:], 207 | '%s/real_samples.png' % opt.outDir, nrow=8) 208 | fake,_ = netG(fixed_noise) 209 | vutils.save_image(fake.data[0:64,:,:,:], 210 | '%s/fake_samples_epoch_%03d.png' % (opt.outDir, epoch), nrow=8) 211 | if epoch % 1 == 0: 212 | # do checkpointing 213 | torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (opt.outDir, epoch)) 214 | torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (opt.outDir, epoch)) 215 | -------------------------------------------------------------------------------- /ModelFiles/_netG_1/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.parallel 4 | 5 | 6 | 7 | def weights_init(m): 8 | classname = m.__class__.__name__ 9 | if classname.find('Conv') != -1: 10 | m.weight.data.normal_(0.0, 0.02) 11 | elif classname.find('BatchNorm') != -1: 12 | m.weight.data.normal_(1.0, 0.02) 13 | m.bias.data.fill_(0) 14 | 15 | # DCGAN model, fully convolutional architecture 16 | class _netG_1(nn.Module): 17 | def __init__(self, ngpu, nz, nc , ngf, n_extra_layers_g): 18 | super(_netG_1, self).__init__() 19 | self.ngpu = ngpu 20 | #self.nz = nz 21 | #self.nc = nc 22 | #self.ngf = ngf 23 | main = nn.Sequential( 24 | # input is Z, going into a convolution 25 | # state size. nz x 1 x 1 26 | nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False), 27 | nn.BatchNorm2d(ngf * 8), 28 | nn.LeakyReLU(0.2, inplace=True), 29 | # state size. (ngf*8) x 4 x 4 30 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 31 | nn.BatchNorm2d(ngf * 4), 32 | nn.LeakyReLU(0.2, inplace=True), 33 | # state size. (ngf*4) x 8 x 8 34 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 35 | nn.BatchNorm2d(ngf * 2), 36 | nn.LeakyReLU(0.2, inplace=True), 37 | # state size. (ngf*2) x 16 x 16 38 | nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), 39 | nn.BatchNorm2d(ngf), 40 | nn.LeakyReLU(0.2, inplace=True), 41 | # state size. (ngf) x 32 x 32 42 | ) 43 | 44 | # Extra layers 45 | for t in range(n_extra_layers_g): 46 | main.add_module('extra-layers-{0}.{1}.conv'.format(t, ngf), 47 | nn.Conv2d(ngf, ngf, 3, 1, 1, bias=False)) 48 | main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, ngf), 49 | nn.BatchNorm2d(ngf)) 50 | main.add_module('extra-layers-{0}.{1}.relu'.format(t, ngf), 51 | nn.LeakyReLU(0.2, inplace=True)) 52 | 53 | main.add_module('final_layer.deconv', 54 | nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False)) # 5,3,1 for 96x96 55 | main.add_module('final_layer.tanh', 56 | nn.Tanh()) 57 | # state size. (nc) x 96 x 96 58 | 59 | self.main = main 60 | 61 | 62 | def forward(self, input): 63 | # gpu_ids = None 64 | # if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 65 | # gpu_ids = range(self.ngpu) 66 | # return nn.parallel.data_parallel(self.main, input, gpu_ids), 0 67 | return self.main(input) 68 | 69 | class _netD_1(nn.Module): 70 | def __init__(self, ngpu, nz, nc, ndf, n_extra_layers_d): 71 | super(_netD_1, self).__init__() 72 | self.ngpu = ngpu 73 | main = nn.Sequential( 74 | # input is (nc) x 96 x 96 75 | nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), # 5,3,1 for 96x96 76 | nn.LeakyReLU(0.2, inplace=True), 77 | # state size. (ndf) x 32 x 32 78 | nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), 79 | nn.BatchNorm2d(ndf * 2), 80 | nn.LeakyReLU(0.2, inplace=True), 81 | # state size. (ndf*2) x 16 x 16 82 | nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), 83 | nn.BatchNorm2d(ndf * 4), 84 | nn.LeakyReLU(0.2, inplace=True), 85 | # state size. (ndf*4) x 8 x 8 86 | nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), 87 | nn.BatchNorm2d(ndf * 8), 88 | nn.LeakyReLU(0.2, inplace=True), 89 | # state size. (ndf*8) x 4 x 4 90 | ) 91 | 92 | # Extra layers 93 | for t in range(n_extra_layers_d): 94 | main.add_module('extra-layers-{0}.{1}.conv'.format(t, ndf * 8), 95 | nn.Conv2d(ndf * 8, ndf * 8, 3, 1, 1, bias=False)) 96 | main.add_module('extra-layers-{0}.{1}.batchnorm'.format(t, ndf * 8), 97 | nn.BatchNorm2d(ndf * 8)) 98 | main.add_module('extra-layers-{0}.{1}.relu'.format(t, ndf * 8), 99 | nn.LeakyReLU(0.2, inplace=True)) 100 | 101 | 102 | main.add_module('final_layers.conv', nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False)) 103 | main.add_module('final_layers.sigmoid', nn.Sigmoid()) 104 | # state size. 1 x 1 x 1 105 | self.main = main 106 | 107 | def forward(self, input): 108 | gpu_ids = None 109 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 110 | gpu_ids = range(self.ngpu) 111 | output = nn.parallel.data_parallel(self.main, input, gpu_ids) 112 | return output.view(-1, 1) 113 | 114 | 115 | 116 | 117 | class _netD_2(nn.Module): 118 | def __init__(self, ngpu, nz, nc , ndf): 119 | super(_netD_2, self).__init__() 120 | self.ngpu = ngpu 121 | self.convs = nn.Sequential( 122 | # input is (nc) x 96 x 96 123 | nn.Conv2d(nc, ndf, 4, 2, 1, bias=False), 124 | nn.LeakyReLU(0.2, inplace=True), 125 | # state size. (ndf) x 32 x 32 126 | nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), 127 | nn.BatchNorm2d(ndf * 2), 128 | nn.LeakyReLU(0.2, inplace=True), 129 | # state size. (ndf*2) x 16 x 16 130 | nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), 131 | nn.BatchNorm2d(ndf * 4), 132 | nn.LeakyReLU(0.2, inplace=True), 133 | # state size. (ndf*4) x 8 x 8 134 | nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), 135 | nn.BatchNorm2d(ndf * 8), 136 | nn.LeakyReLU(0.2, inplace=True), 137 | # state size. (ndf*8) x 4 x 4 138 | nn.Conv2d(ndf * 8, 1024, 4, 1, 0, bias=False), 139 | nn.LeakyReLU(inplace=True), 140 | nn.Dropout(0.5), 141 | # state size. 1024 x 1 x 1 142 | ) 143 | self.fcs = nn.Sequential( 144 | nn.Linear(1024, 1024), 145 | nn.LeakyReLU(inplace=True), 146 | nn.Dropout(0.5), 147 | nn.Linear(1024, 1), 148 | nn.Sigmoid() 149 | ) 150 | def forward(self, input): 151 | gpu_ids = None 152 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 153 | gpu_ids = range(self.ngpu) 154 | output = nn.parallel.data_parallel(self.convs, input, gpu_ids) 155 | output = self.fcs(output.view(-1,1024)) 156 | return output.view(-1, 1) 157 | 158 | # with z decoder and fc layers 159 | class _netG_2(nn.Module): 160 | def __init__(self, ngpu, nz, nc , ngf): 161 | super(_netG_2, self).__init__() 162 | self.ngpu = ngpu 163 | self.nz = nz 164 | self.fcs = nn.Sequential( 165 | # input is Z, going into a convolution 166 | # state size. nz x 1 x 1 167 | nn.Linear(nz, 1024), 168 | nn.ReLU(inplace=True), 169 | nn.Dropout(0.5), 170 | nn.Linear(1024, 1024), 171 | nn.ReLU(inplace=True), 172 | nn.Dropout(0.5), 173 | ) 174 | 175 | self.decode_fcs = nn.Sequential( 176 | nn.Linear(1024, 1024), 177 | nn.ReLU(inplace=True), 178 | nn.Dropout(0.5), 179 | nn.Linear(1024, nz), 180 | ) 181 | 182 | self.convs = nn.Sequential( 183 | # 1024x1x1 184 | nn.ConvTranspose2d(1024, ngf * 8, 4, 1, 0, bias=False), 185 | nn.BatchNorm2d(ngf * 8), 186 | nn.ReLU(inplace=True), 187 | # state size. (ngf*8) x 4 x 4 188 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 189 | nn.BatchNorm2d(ngf * 4), 190 | nn.ReLU(inplace=True), 191 | # state size. (ngf*4) x 8 x 8 192 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 193 | nn.BatchNorm2d(ngf * 2), 194 | nn.ReLU(inplace=True), 195 | # state size. (ngf*2) x 16 x 16 196 | nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), 197 | nn.BatchNorm2d(ngf), 198 | nn.ReLU(inplace=True), 199 | # state size. (ngf) x 32 x 32 200 | nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), 201 | nn.Tanh() 202 | # state size. (nc) x 96 x 96 203 | ) 204 | def forward(self, input): 205 | input = self.fcs(input.view(-1,self.nz)) 206 | gpu_ids = None 207 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 208 | gpu_ids = range(self.ngpu) 209 | z_prediction = self.decode_fcs(input) 210 | input = input.view(-1,1024,1,1) 211 | output = nn.parallel.data_parallel(self.convs, input, gpu_ids) 212 | return output, z_prediction 213 | 214 | 215 | # DCGAN model with fc layers 216 | class _netG_3(nn.Module): 217 | def __init__(self, ngpu, nz, nc , ngf): 218 | super(_netG_3, self).__init__() 219 | self.ngpu = ngpu 220 | self.fcs = nn.Sequential( 221 | # input is Z, going into a convolution 222 | # state size. nz x 1 x 1 223 | nn.Linear(nz, 1024), 224 | nn.ReLU(inplace=True), 225 | nn.Dropout(0.5), 226 | nn.Linear(1024, 1024), 227 | nn.ReLU(inplace=True), 228 | nn.Dropout(0.5), 229 | ) 230 | self.convs = nn.Sequential( 231 | # 1024x1x1 232 | nn.ConvTranspose2d(1024, ngf * 8, 4, 1, 0, bias=False), 233 | nn.BatchNorm2d(ngf * 8), 234 | nn.ReLU(inplace=True), 235 | # state size. (ngf*8) x 4 x 4 236 | nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), 237 | nn.BatchNorm2d(ngf * 4), 238 | nn.ReLU(inplace=True), 239 | # state size. (ngf*4) x 8 x 8 240 | nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), 241 | nn.BatchNorm2d(ngf * 2), 242 | nn.ReLU(inplace=True), 243 | # state size. (ngf*2) x 16 x 16 244 | nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), 245 | nn.BatchNorm2d(ngf), 246 | nn.ReLU(inplace=True), 247 | # state size. (ngf) x 32 x 32 248 | nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False), 249 | nn.Tanh() 250 | # state size. (nc) x 96 x 96 251 | ) 252 | def forward(self, input): 253 | input = self.fcs(input.view(-1,nz)) 254 | gpu_ids = None 255 | if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1: 256 | gpu_ids = range(self.ngpu) 257 | input = input.view(-1,1024,1,1) 258 | return nn.parallel.data_parallel(self.convs, input, gpu_ids) 259 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pytorch Converter 2 | Pytorch model to Caffe & [ncnn](https://github.com/Tencent/ncnn) 3 | 4 | ## Model Examples 5 | - SqueezeNet from torchvision 6 | - DenseNet from torchvision 7 | - [ResNet50](https://drive.google.com/file/d/0B5B31rlbCRZfcS1rY3BtVWhDREk/view?usp=sharing) (with ceiling_mode=True) 8 | - MobileNet 9 | - AnimeGAN pretrained model from author (https://github.com/jayleicn/animeGAN) 10 | - SSD-like object detection net(for ncnn) 11 | - UNet (no pretrained model yet, just default initialization) 12 | 13 | ## Attentions 14 | - **Mind the difference on ceil_mode of pooling layer among Pytorch and Caffe, ncnn** 15 | - You can convert Pytorch models with all pooling layer's ceil_mode=True. 16 | - Or compile a custom version of Caffe/ncnn with floor() replaced by ceil() in pooling layer inference. 17 | 18 | - **Python Errors: Use Pytorch 0.2.0 Only to Convert Your Model** 19 | - Higher version of pytorch 0.3.0, 0.3.1, 0.4.0 seemingly have blocked third party model conversion. 20 | - Please note that you can still TRAIN your model on pytorch 0.3.0~0.4.0. The converter running on 0.2.0 could still load higher version models correctly. 21 | 22 | - **Other Python packages requirements:** 23 | - to Caffe: numpy, protobuf (to gen caffe proto) 24 | - to ncnn: numpy 25 | - for testing Caffe result: pycaffe, cv2 26 | 27 | - **Model Loading Error** 28 | - Use compatible model saving & loading method, e.g. 29 | 30 | ``` 31 | # Saving, notice the difference on DataParallel 32 | net_for_saving = net.module if use_nn_DataParallel else net 33 | torch.save(net_for_saving.state_dict(), path) 34 | 35 | # Loading 36 | net.load_state_dict(torch.load(path, map_location=lambda storge, loc: storage)) 37 | ``` 38 | -------------------------------------------------------------------------------- /TestData/2008_000536.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_000536.jpg -------------------------------------------------------------------------------- /TestData/2008_001171.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001171.jpg -------------------------------------------------------------------------------- /TestData/2008_001601.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001601.jpg -------------------------------------------------------------------------------- /TestData/2008_001841.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/2008_001841.jpg -------------------------------------------------------------------------------- /TestData/227-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227-2.jpg -------------------------------------------------------------------------------- /TestData/227-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227-3.jpg -------------------------------------------------------------------------------- /TestData/227.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starimeL/PytorchConverter/75fbdb3d52da9ee64db509ecdf221dd102402579/TestData/227.jpg -------------------------------------------------------------------------------- /TestData/ImageNetLabels.txt: -------------------------------------------------------------------------------- 1 | 0: 'tench, Tinca tinca', 2 | 1: 'goldfish, Carassius auratus', 3 | 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias', 4 | 3: 'tiger shark, Galeocerdo cuvieri', 5 | 4: 'hammerhead, hammerhead shark', 6 | 5: 'electric ray, crampfish, numbfish, torpedo', 7 | 6: 'stingray', 8 | 7: 'cock', 9 | 8: 'hen', 10 | 9: 'ostrich, Struthio camelus', 11 | 10: 'brambling, Fringilla montifringilla', 12 | 11: 'goldfinch, Carduelis carduelis', 13 | 12: 'house finch, linnet, Carpodacus mexicanus', 14 | 13: 'junco, snowbird', 15 | 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea', 16 | 15: 'robin, American robin, Turdus migratorius', 17 | 16: 'bulbul', 18 | 17: 'jay', 19 | 18: 'magpie', 20 | 19: 'chickadee', 21 | 20: 'water ouzel, dipper', 22 | 21: 'kite', 23 | 22: 'bald eagle, American eagle, Haliaeetus leucocephalus', 24 | 23: 'vulture', 25 | 24: 'great grey owl, great gray owl, Strix nebulosa', 26 | 25: 'European fire salamander, Salamandra salamandra', 27 | 26: 'common newt, Triturus vulgaris', 28 | 27: 'eft', 29 | 28: 'spotted salamander, Ambystoma maculatum', 30 | 29: 'axolotl, mud puppy, Ambystoma mexicanum', 31 | 30: 'bullfrog, Rana catesbeiana', 32 | 31: 'tree frog, tree-frog', 33 | 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui', 34 | 33: 'loggerhead, loggerhead turtle, Caretta caretta', 35 | 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea', 36 | 35: 'mud turtle', 37 | 36: 'terrapin', 38 | 37: 'box turtle, box tortoise', 39 | 38: 'banded gecko', 40 | 39: 'common iguana, iguana, Iguana iguana', 41 | 40: 'American chameleon, anole, Anolis carolinensis', 42 | 41: 'whiptail, whiptail lizard', 43 | 42: 'agama', 44 | 43: 'frilled lizard, Chlamydosaurus kingi', 45 | 44: 'alligator lizard', 46 | 45: 'Gila monster, Heloderma suspectum', 47 | 46: 'green lizard, Lacerta viridis', 48 | 47: 'African chameleon, Chamaeleo chamaeleon', 49 | 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis', 50 | 49: 'African crocodile, Nile crocodile, Crocodylus niloticus', 51 | 50: 'American alligator, Alligator mississipiensis', 52 | 51: 'triceratops', 53 | 52: 'thunder snake, worm snake, Carphophis amoenus', 54 | 53: 'ringneck snake, ring-necked snake, ring snake', 55 | 54: 'hognose snake, puff adder, sand viper', 56 | 55: 'green snake, grass snake', 57 | 56: 'king snake, kingsnake', 58 | 57: 'garter snake, grass snake', 59 | 58: 'water snake', 60 | 59: 'vine snake', 61 | 60: 'night snake, Hypsiglena torquata', 62 | 61: 'boa constrictor, Constrictor constrictor', 63 | 62: 'rock python, rock snake, Python sebae', 64 | 63: 'Indian cobra, Naja naja', 65 | 64: 'green mamba', 66 | 65: 'sea snake', 67 | 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus', 68 | 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus', 69 | 68: 'sidewinder, horned rattlesnake, Crotalus cerastes', 70 | 69: 'trilobite', 71 | 70: 'harvestman, daddy longlegs, Phalangium opilio', 72 | 71: 'scorpion', 73 | 72: 'black and gold garden spider, Argiope aurantia', 74 | 73: 'barn spider, Araneus cavaticus', 75 | 74: 'garden spider, Aranea diademata', 76 | 75: 'black widow, Latrodectus mactans', 77 | 76: 'tarantula', 78 | 77: 'wolf spider, hunting spider', 79 | 78: 'tick', 80 | 79: 'centipede', 81 | 80: 'black grouse', 82 | 81: 'ptarmigan', 83 | 82: 'ruffed grouse, partridge, Bonasa umbellus', 84 | 83: 'prairie chicken, prairie grouse, prairie fowl', 85 | 84: 'peacock', 86 | 85: 'quail', 87 | 86: 'partridge', 88 | 87: 'African grey, African gray, Psittacus erithacus', 89 | 88: 'macaw', 90 | 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita', 91 | 90: 'lorikeet', 92 | 91: 'coucal', 93 | 92: 'bee eater', 94 | 93: 'hornbill', 95 | 94: 'hummingbird', 96 | 95: 'jacamar', 97 | 96: 'toucan', 98 | 97: 'drake', 99 | 98: 'red-breasted merganser, Mergus serrator', 100 | 99: 'goose', 101 | 100: 'black swan, Cygnus atratus', 102 | 101: 'tusker', 103 | 102: 'echidna, spiny anteater, anteater', 104 | 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus', 105 | 104: 'wallaby, brush kangaroo', 106 | 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus', 107 | 106: 'wombat', 108 | 107: 'jellyfish', 109 | 108: 'sea anemone, anemone', 110 | 109: 'brain coral', 111 | 110: 'flatworm, platyhelminth', 112 | 111: 'nematode, nematode worm, roundworm', 113 | 112: 'conch', 114 | 113: 'snail', 115 | 114: 'slug', 116 | 115: 'sea slug, nudibranch', 117 | 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore', 118 | 117: 'chambered nautilus, pearly nautilus, nautilus', 119 | 118: 'Dungeness crab, Cancer magister', 120 | 119: 'rock crab, Cancer irroratus', 121 | 120: 'fiddler crab', 122 | 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica', 123 | 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus', 124 | 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish', 125 | 124: 'crayfish, crawfish, crawdad, crawdaddy', 126 | 125: 'hermit crab', 127 | 126: 'isopod', 128 | 127: 'white stork, Ciconia ciconia', 129 | 128: 'black stork, Ciconia nigra', 130 | 129: 'spoonbill', 131 | 130: 'flamingo', 132 | 131: 'little blue heron, Egretta caerulea', 133 | 132: 'American egret, great white heron, Egretta albus', 134 | 133: 'bittern', 135 | 134: 'crane', 136 | 135: 'limpkin, Aramus pictus', 137 | 136: 'European gallinule, Porphyrio porphyrio', 138 | 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana', 139 | 138: 'bustard', 140 | 139: 'ruddy turnstone, Arenaria interpres', 141 | 140: 'red-backed sandpiper, dunlin, Erolia alpina', 142 | 141: 'redshank, Tringa totanus', 143 | 142: 'dowitcher', 144 | 143: 'oystercatcher, oyster catcher', 145 | 144: 'pelican', 146 | 145: 'king penguin, Aptenodytes patagonica', 147 | 146: 'albatross, mollymawk', 148 | 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus', 149 | 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca', 150 | 149: 'dugong, Dugong dugon', 151 | 150: 'sea lion', 152 | 151: 'Chihuahua', 153 | 152: 'Japanese spaniel', 154 | 153: 'Maltese dog, Maltese terrier, Maltese', 155 | 154: 'Pekinese, Pekingese, Peke', 156 | 155: 'Shih-Tzu', 157 | 156: 'Blenheim spaniel', 158 | 157: 'papillon', 159 | 158: 'toy terrier', 160 | 159: 'Rhodesian ridgeback', 161 | 160: 'Afghan hound, Afghan', 162 | 161: 'basset, basset hound', 163 | 162: 'beagle', 164 | 163: 'bloodhound, sleuthhound', 165 | 164: 'bluetick', 166 | 165: 'black-and-tan coonhound', 167 | 166: 'Walker hound, Walker foxhound', 168 | 167: 'English foxhound', 169 | 168: 'redbone', 170 | 169: 'borzoi, Russian wolfhound', 171 | 170: 'Irish wolfhound', 172 | 171: 'Italian greyhound', 173 | 172: 'whippet', 174 | 173: 'Ibizan hound, Ibizan Podenco', 175 | 174: 'Norwegian elkhound, elkhound', 176 | 175: 'otterhound, otter hound', 177 | 176: 'Saluki, gazelle hound', 178 | 177: 'Scottish deerhound, deerhound', 179 | 178: 'Weimaraner', 180 | 179: 'Staffordshire bullterrier, Staffordshire bull terrier', 181 | 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier', 182 | 181: 'Bedlington terrier', 183 | 182: 'Border terrier', 184 | 183: 'Kerry blue terrier', 185 | 184: 'Irish terrier', 186 | 185: 'Norfolk terrier', 187 | 186: 'Norwich terrier', 188 | 187: 'Yorkshire terrier', 189 | 188: 'wire-haired fox terrier', 190 | 189: 'Lakeland terrier', 191 | 190: 'Sealyham terrier, Sealyham', 192 | 191: 'Airedale, Airedale terrier', 193 | 192: 'cairn, cairn terrier', 194 | 193: 'Australian terrier', 195 | 194: 'Dandie Dinmont, Dandie Dinmont terrier', 196 | 195: 'Boston bull, Boston terrier', 197 | 196: 'miniature schnauzer', 198 | 197: 'giant schnauzer', 199 | 198: 'standard schnauzer', 200 | 199: 'Scotch terrier, Scottish terrier, Scottie', 201 | 200: 'Tibetan terrier, chrysanthemum dog', 202 | 201: 'silky terrier, Sydney silky', 203 | 202: 'soft-coated wheaten terrier', 204 | 203: 'West Highland white terrier', 205 | 204: 'Lhasa, Lhasa apso', 206 | 205: 'flat-coated retriever', 207 | 206: 'curly-coated retriever', 208 | 207: 'golden retriever', 209 | 208: 'Labrador retriever', 210 | 209: 'Chesapeake Bay retriever', 211 | 210: 'German short-haired pointer', 212 | 211: 'vizsla, Hungarian pointer', 213 | 212: 'English setter', 214 | 213: 'Irish setter, red setter', 215 | 214: 'Gordon setter', 216 | 215: 'Brittany spaniel', 217 | 216: 'clumber, clumber spaniel', 218 | 217: 'English springer, English springer spaniel', 219 | 218: 'Welsh springer spaniel', 220 | 219: 'cocker spaniel, English cocker spaniel, cocker', 221 | 220: 'Sussex spaniel', 222 | 221: 'Irish water spaniel', 223 | 222: 'kuvasz', 224 | 223: 'schipperke', 225 | 224: 'groenendael', 226 | 225: 'malinois', 227 | 226: 'briard', 228 | 227: 'kelpie', 229 | 228: 'komondor', 230 | 229: 'Old English sheepdog, bobtail', 231 | 230: 'Shetland sheepdog, Shetland sheep dog, Shetland', 232 | 231: 'collie', 233 | 232: 'Border collie', 234 | 233: 'Bouvier des Flandres, Bouviers des Flandres', 235 | 234: 'Rottweiler', 236 | 235: 'German shepherd, German shepherd dog, German police dog, alsatian', 237 | 236: 'Doberman, Doberman pinscher', 238 | 237: 'miniature pinscher', 239 | 238: 'Greater Swiss Mountain dog', 240 | 239: 'Bernese mountain dog', 241 | 240: 'Appenzeller', 242 | 241: 'EntleBucher', 243 | 242: 'boxer', 244 | 243: 'bull mastiff', 245 | 244: 'Tibetan mastiff', 246 | 245: 'French bulldog', 247 | 246: 'Great Dane', 248 | 247: 'Saint Bernard, St Bernard', 249 | 248: 'Eskimo dog, husky', 250 | 249: 'malamute, malemute, Alaskan malamute', 251 | 250: 'Siberian husky', 252 | 251: 'dalmatian, coach dog, carriage dog', 253 | 252: 'affenpinscher, monkey pinscher, monkey dog', 254 | 253: 'basenji', 255 | 254: 'pug, pug-dog', 256 | 255: 'Leonberg', 257 | 256: 'Newfoundland, Newfoundland dog', 258 | 257: 'Great Pyrenees', 259 | 258: 'Samoyed, Samoyede', 260 | 259: 'Pomeranian', 261 | 260: 'chow, chow chow', 262 | 261: 'keeshond', 263 | 262: 'Brabancon griffon', 264 | 263: 'Pembroke, Pembroke Welsh corgi', 265 | 264: 'Cardigan, Cardigan Welsh corgi', 266 | 265: 'toy poodle', 267 | 266: 'miniature poodle', 268 | 267: 'standard poodle', 269 | 268: 'Mexican hairless', 270 | 269: 'timber wolf, grey wolf, gray wolf, Canis lupus', 271 | 270: 'white wolf, Arctic wolf, Canis lupus tundrarum', 272 | 271: 'red wolf, maned wolf, Canis rufus, Canis niger', 273 | 272: 'coyote, prairie wolf, brush wolf, Canis latrans', 274 | 273: 'dingo, warrigal, warragal, Canis dingo', 275 | 274: 'dhole, Cuon alpinus', 276 | 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus', 277 | 276: 'hyena, hyaena', 278 | 277: 'red fox, Vulpes vulpes', 279 | 278: 'kit fox, Vulpes macrotis', 280 | 279: 'Arctic fox, white fox, Alopex lagopus', 281 | 280: 'grey fox, gray fox, Urocyon cinereoargenteus', 282 | 281: 'tabby, tabby cat', 283 | 282: 'tiger cat', 284 | 283: 'Persian cat', 285 | 284: 'Siamese cat, Siamese', 286 | 285: 'Egyptian cat', 287 | 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor', 288 | 287: 'lynx, catamount', 289 | 288: 'leopard, Panthera pardus', 290 | 289: 'snow leopard, ounce, Panthera uncia', 291 | 290: 'jaguar, panther, Panthera onca, Felis onca', 292 | 291: 'lion, king of beasts, Panthera leo', 293 | 292: 'tiger, Panthera tigris', 294 | 293: 'cheetah, chetah, Acinonyx jubatus', 295 | 294: 'brown bear, bruin, Ursus arctos', 296 | 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus', 297 | 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus', 298 | 297: 'sloth bear, Melursus ursinus, Ursus ursinus', 299 | 298: 'mongoose', 300 | 299: 'meerkat, mierkat', 301 | 300: 'tiger beetle', 302 | 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle', 303 | 302: 'ground beetle, carabid beetle', 304 | 303: 'long-horned beetle, longicorn, longicorn beetle', 305 | 304: 'leaf beetle, chrysomelid', 306 | 305: 'dung beetle', 307 | 306: 'rhinoceros beetle', 308 | 307: 'weevil', 309 | 308: 'fly', 310 | 309: 'bee', 311 | 310: 'ant, emmet, pismire', 312 | 311: 'grasshopper, hopper', 313 | 312: 'cricket', 314 | 313: 'walking stick, walkingstick, stick insect', 315 | 314: 'cockroach, roach', 316 | 315: 'mantis, mantid', 317 | 316: 'cicada, cicala', 318 | 317: 'leafhopper', 319 | 318: 'lacewing, lacewing fly', 320 | 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", 321 | 320: 'damselfly', 322 | 321: 'admiral', 323 | 322: 'ringlet, ringlet butterfly', 324 | 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus', 325 | 324: 'cabbage butterfly', 326 | 325: 'sulphur butterfly, sulfur butterfly', 327 | 326: 'lycaenid, lycaenid butterfly', 328 | 327: 'starfish, sea star', 329 | 328: 'sea urchin', 330 | 329: 'sea cucumber, holothurian', 331 | 330: 'wood rabbit, cottontail, cottontail rabbit', 332 | 331: 'hare', 333 | 332: 'Angora, Angora rabbit', 334 | 333: 'hamster', 335 | 334: 'porcupine, hedgehog', 336 | 335: 'fox squirrel, eastern fox squirrel, Sciurus niger', 337 | 336: 'marmot', 338 | 337: 'beaver', 339 | 338: 'guinea pig, Cavia cobaya', 340 | 339: 'sorrel', 341 | 340: 'zebra', 342 | 341: 'hog, pig, grunter, squealer, Sus scrofa', 343 | 342: 'wild boar, boar, Sus scrofa', 344 | 343: 'warthog', 345 | 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius', 346 | 345: 'ox', 347 | 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis', 348 | 347: 'bison', 349 | 348: 'ram, tup', 350 | 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis', 351 | 350: 'ibex, Capra ibex', 352 | 351: 'hartebeest', 353 | 352: 'impala, Aepyceros melampus', 354 | 353: 'gazelle', 355 | 354: 'Arabian camel, dromedary, Camelus dromedarius', 356 | 355: 'llama', 357 | 356: 'weasel', 358 | 357: 'mink', 359 | 358: 'polecat, fitch, foulmart, foumart, Mustela putorius', 360 | 359: 'black-footed ferret, ferret, Mustela nigripes', 361 | 360: 'otter', 362 | 361: 'skunk, polecat, wood pussy', 363 | 362: 'badger', 364 | 363: 'armadillo', 365 | 364: 'three-toed sloth, ai, Bradypus tridactylus', 366 | 365: 'orangutan, orang, orangutang, Pongo pygmaeus', 367 | 366: 'gorilla, Gorilla gorilla', 368 | 367: 'chimpanzee, chimp, Pan troglodytes', 369 | 368: 'gibbon, Hylobates lar', 370 | 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus', 371 | 370: 'guenon, guenon monkey', 372 | 371: 'patas, hussar monkey, Erythrocebus patas', 373 | 372: 'baboon', 374 | 373: 'macaque', 375 | 374: 'langur', 376 | 375: 'colobus, colobus monkey', 377 | 376: 'proboscis monkey, Nasalis larvatus', 378 | 377: 'marmoset', 379 | 378: 'capuchin, ringtail, Cebus capucinus', 380 | 379: 'howler monkey, howler', 381 | 380: 'titi, titi monkey', 382 | 381: 'spider monkey, Ateles geoffroyi', 383 | 382: 'squirrel monkey, Saimiri sciureus', 384 | 383: 'Madagascar cat, ring-tailed lemur, Lemur catta', 385 | 384: 'indri, indris, Indri indri, Indri brevicaudatus', 386 | 385: 'Indian elephant, Elephas maximus', 387 | 386: 'African elephant, Loxodonta africana', 388 | 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens', 389 | 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca', 390 | 389: 'barracouta, snoek', 391 | 390: 'eel', 392 | 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch', 393 | 392: 'rock beauty, Holocanthus tricolor', 394 | 393: 'anemone fish', 395 | 394: 'sturgeon', 396 | 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus', 397 | 396: 'lionfish', 398 | 397: 'puffer, pufferfish, blowfish, globefish', 399 | 398: 'abacus', 400 | 399: 'abaya', 401 | 400: "academic gown, academic robe, judge's robe", 402 | 401: 'accordion, piano accordion, squeeze box', 403 | 402: 'acoustic guitar', 404 | 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier', 405 | 404: 'airliner', 406 | 405: 'airship, dirigible', 407 | 406: 'altar', 408 | 407: 'ambulance', 409 | 408: 'amphibian, amphibious vehicle', 410 | 409: 'analog clock', 411 | 410: 'apiary, bee house', 412 | 411: 'apron', 413 | 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin', 414 | 413: 'assault rifle, assault gun', 415 | 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack', 416 | 415: 'bakery, bakeshop, bakehouse', 417 | 416: 'balance beam, beam', 418 | 417: 'balloon', 419 | 418: 'ballpoint, ballpoint pen, ballpen, Biro', 420 | 419: 'Band Aid', 421 | 420: 'banjo', 422 | 421: 'bannister, banister, balustrade, balusters, handrail', 423 | 422: 'barbell', 424 | 423: 'barber chair', 425 | 424: 'barbershop', 426 | 425: 'barn', 427 | 426: 'barometer', 428 | 427: 'barrel, cask', 429 | 428: 'barrow, garden cart, lawn cart, wheelbarrow', 430 | 429: 'baseball', 431 | 430: 'basketball', 432 | 431: 'bassinet', 433 | 432: 'bassoon', 434 | 433: 'bathing cap, swimming cap', 435 | 434: 'bath towel', 436 | 435: 'bathtub, bathing tub, bath, tub', 437 | 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon', 438 | 437: 'beacon, lighthouse, beacon light, pharos', 439 | 438: 'beaker', 440 | 439: 'bearskin, busby, shako', 441 | 440: 'beer bottle', 442 | 441: 'beer glass', 443 | 442: 'bell cote, bell cot', 444 | 443: 'bib', 445 | 444: 'bicycle-built-for-two, tandem bicycle, tandem', 446 | 445: 'bikini, two-piece', 447 | 446: 'binder, ring-binder', 448 | 447: 'binoculars, field glasses, opera glasses', 449 | 448: 'birdhouse', 450 | 449: 'boathouse', 451 | 450: 'bobsled, bobsleigh, bob', 452 | 451: 'bolo tie, bolo, bola tie, bola', 453 | 452: 'bonnet, poke bonnet', 454 | 453: 'bookcase', 455 | 454: 'bookshop, bookstore, bookstall', 456 | 455: 'bottlecap', 457 | 456: 'bow', 458 | 457: 'bow tie, bow-tie, bowtie', 459 | 458: 'brass, memorial tablet, plaque', 460 | 459: 'brassiere, bra, bandeau', 461 | 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty', 462 | 461: 'breastplate, aegis, egis', 463 | 462: 'broom', 464 | 463: 'bucket, pail', 465 | 464: 'buckle', 466 | 465: 'bulletproof vest', 467 | 466: 'bullet train, bullet', 468 | 467: 'butcher shop, meat market', 469 | 468: 'cab, hack, taxi, taxicab', 470 | 469: 'caldron, cauldron', 471 | 470: 'candle, taper, wax light', 472 | 471: 'cannon', 473 | 472: 'canoe', 474 | 473: 'can opener, tin opener', 475 | 474: 'cardigan', 476 | 475: 'car mirror', 477 | 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig', 478 | 477: "carpenter's kit, tool kit", 479 | 478: 'carton', 480 | 479: 'car wheel', 481 | 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM', 482 | 481: 'cassette', 483 | 482: 'cassette player', 484 | 483: 'castle', 485 | 484: 'catamaran', 486 | 485: 'CD player', 487 | 486: 'cello, violoncello', 488 | 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone', 489 | 488: 'chain', 490 | 489: 'chainlink fence', 491 | 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour', 492 | 491: 'chain saw, chainsaw', 493 | 492: 'chest', 494 | 493: 'chiffonier, commode', 495 | 494: 'chime, bell, gong', 496 | 495: 'china cabinet, china closet', 497 | 496: 'Christmas stocking', 498 | 497: 'church, church building', 499 | 498: 'cinema, movie theater, movie theatre, movie house, picture palace', 500 | 499: 'cleaver, meat cleaver, chopper', 501 | 500: 'cliff dwelling', 502 | 501: 'cloak', 503 | 502: 'clog, geta, patten, sabot', 504 | 503: 'cocktail shaker', 505 | 504: 'coffee mug', 506 | 505: 'coffeepot', 507 | 506: 'coil, spiral, volute, whorl, helix', 508 | 507: 'combination lock', 509 | 508: 'computer keyboard, keypad', 510 | 509: 'confectionery, confectionary, candy store', 511 | 510: 'container ship, containership, container vessel', 512 | 511: 'convertible', 513 | 512: 'corkscrew, bottle screw', 514 | 513: 'cornet, horn, trumpet, trump', 515 | 514: 'cowboy boot', 516 | 515: 'cowboy hat, ten-gallon hat', 517 | 516: 'cradle', 518 | 517: 'crane', 519 | 518: 'crash helmet', 520 | 519: 'crate', 521 | 520: 'crib, cot', 522 | 521: 'Crock Pot', 523 | 522: 'croquet ball', 524 | 523: 'crutch', 525 | 524: 'cuirass', 526 | 525: 'dam, dike, dyke', 527 | 526: 'desk', 528 | 527: 'desktop computer', 529 | 528: 'dial telephone, dial phone', 530 | 529: 'diaper, nappy, napkin', 531 | 530: 'digital clock', 532 | 531: 'digital watch', 533 | 532: 'dining table, board', 534 | 533: 'dishrag, dishcloth', 535 | 534: 'dishwasher, dish washer, dishwashing machine', 536 | 535: 'disk brake, disc brake', 537 | 536: 'dock, dockage, docking facility', 538 | 537: 'dogsled, dog sled, dog sleigh', 539 | 538: 'dome', 540 | 539: 'doormat, welcome mat', 541 | 540: 'drilling platform, offshore rig', 542 | 541: 'drum, membranophone, tympan', 543 | 542: 'drumstick', 544 | 543: 'dumbbell', 545 | 544: 'Dutch oven', 546 | 545: 'electric fan, blower', 547 | 546: 'electric guitar', 548 | 547: 'electric locomotive', 549 | 548: 'entertainment center', 550 | 549: 'envelope', 551 | 550: 'espresso maker', 552 | 551: 'face powder', 553 | 552: 'feather boa, boa', 554 | 553: 'file, file cabinet, filing cabinet', 555 | 554: 'fireboat', 556 | 555: 'fire engine, fire truck', 557 | 556: 'fire screen, fireguard', 558 | 557: 'flagpole, flagstaff', 559 | 558: 'flute, transverse flute', 560 | 559: 'folding chair', 561 | 560: 'football helmet', 562 | 561: 'forklift', 563 | 562: 'fountain', 564 | 563: 'fountain pen', 565 | 564: 'four-poster', 566 | 565: 'freight car', 567 | 566: 'French horn, horn', 568 | 567: 'frying pan, frypan, skillet', 569 | 568: 'fur coat', 570 | 569: 'garbage truck, dustcart', 571 | 570: 'gasmask, respirator, gas helmet', 572 | 571: 'gas pump, gasoline pump, petrol pump, island dispenser', 573 | 572: 'goblet', 574 | 573: 'go-kart', 575 | 574: 'golf ball', 576 | 575: 'golfcart, golf cart', 577 | 576: 'gondola', 578 | 577: 'gong, tam-tam', 579 | 578: 'gown', 580 | 579: 'grand piano, grand', 581 | 580: 'greenhouse, nursery, glasshouse', 582 | 581: 'grille, radiator grille', 583 | 582: 'grocery store, grocery, food market, market', 584 | 583: 'guillotine', 585 | 584: 'hair slide', 586 | 585: 'hair spray', 587 | 586: 'half track', 588 | 587: 'hammer', 589 | 588: 'hamper', 590 | 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier', 591 | 590: 'hand-held computer, hand-held microcomputer', 592 | 591: 'handkerchief, hankie, hanky, hankey', 593 | 592: 'hard disc, hard disk, fixed disk', 594 | 593: 'harmonica, mouth organ, harp, mouth harp', 595 | 594: 'harp', 596 | 595: 'harvester, reaper', 597 | 596: 'hatchet', 598 | 597: 'holster', 599 | 598: 'home theater, home theatre', 600 | 599: 'honeycomb', 601 | 600: 'hook, claw', 602 | 601: 'hoopskirt, crinoline', 603 | 602: 'horizontal bar, high bar', 604 | 603: 'horse cart, horse-cart', 605 | 604: 'hourglass', 606 | 605: 'iPod', 607 | 606: 'iron, smoothing iron', 608 | 607: "jack-o'-lantern", 609 | 608: 'jean, blue jean, denim', 610 | 609: 'jeep, landrover', 611 | 610: 'jersey, T-shirt, tee shirt', 612 | 611: 'jigsaw puzzle', 613 | 612: 'jinrikisha, ricksha, rickshaw', 614 | 613: 'joystick', 615 | 614: 'kimono', 616 | 615: 'knee pad', 617 | 616: 'knot', 618 | 617: 'lab coat, laboratory coat', 619 | 618: 'ladle', 620 | 619: 'lampshade, lamp shade', 621 | 620: 'laptop, laptop computer', 622 | 621: 'lawn mower, mower', 623 | 622: 'lens cap, lens cover', 624 | 623: 'letter opener, paper knife, paperknife', 625 | 624: 'library', 626 | 625: 'lifeboat', 627 | 626: 'lighter, light, igniter, ignitor', 628 | 627: 'limousine, limo', 629 | 628: 'liner, ocean liner', 630 | 629: 'lipstick, lip rouge', 631 | 630: 'Loafer', 632 | 631: 'lotion', 633 | 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system', 634 | 633: "loupe, jeweler's loupe", 635 | 634: 'lumbermill, sawmill', 636 | 635: 'magnetic compass', 637 | 636: 'mailbag, postbag', 638 | 637: 'mailbox, letter box', 639 | 638: 'maillot', 640 | 639: 'maillot, tank suit', 641 | 640: 'manhole cover', 642 | 641: 'maraca', 643 | 642: 'marimba, xylophone', 644 | 643: 'mask', 645 | 644: 'matchstick', 646 | 645: 'maypole', 647 | 646: 'maze, labyrinth', 648 | 647: 'measuring cup', 649 | 648: 'medicine chest, medicine cabinet', 650 | 649: 'megalith, megalithic structure', 651 | 650: 'microphone, mike', 652 | 651: 'microwave, microwave oven', 653 | 652: 'military uniform', 654 | 653: 'milk can', 655 | 654: 'minibus', 656 | 655: 'miniskirt, mini', 657 | 656: 'minivan', 658 | 657: 'missile', 659 | 658: 'mitten', 660 | 659: 'mixing bowl', 661 | 660: 'mobile home, manufactured home', 662 | 661: 'Model T', 663 | 662: 'modem', 664 | 663: 'monastery', 665 | 664: 'monitor', 666 | 665: 'moped', 667 | 666: 'mortar', 668 | 667: 'mortarboard', 669 | 668: 'mosque', 670 | 669: 'mosquito net', 671 | 670: 'motor scooter, scooter', 672 | 671: 'mountain bike, all-terrain bike, off-roader', 673 | 672: 'mountain tent', 674 | 673: 'mouse, computer mouse', 675 | 674: 'mousetrap', 676 | 675: 'moving van', 677 | 676: 'muzzle', 678 | 677: 'nail', 679 | 678: 'neck brace', 680 | 679: 'necklace', 681 | 680: 'nipple', 682 | 681: 'notebook, notebook computer', 683 | 682: 'obelisk', 684 | 683: 'oboe, hautboy, hautbois', 685 | 684: 'ocarina, sweet potato', 686 | 685: 'odometer, hodometer, mileometer, milometer', 687 | 686: 'oil filter', 688 | 687: 'organ, pipe organ', 689 | 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO', 690 | 689: 'overskirt', 691 | 690: 'oxcart', 692 | 691: 'oxygen mask', 693 | 692: 'packet', 694 | 693: 'paddle, boat paddle', 695 | 694: 'paddlewheel, paddle wheel', 696 | 695: 'padlock', 697 | 696: 'paintbrush', 698 | 697: "pajama, pyjama, pj's, jammies", 699 | 698: 'palace', 700 | 699: 'panpipe, pandean pipe, syrinx', 701 | 700: 'paper towel', 702 | 701: 'parachute, chute', 703 | 702: 'parallel bars, bars', 704 | 703: 'park bench', 705 | 704: 'parking meter', 706 | 705: 'passenger car, coach, carriage', 707 | 706: 'patio, terrace', 708 | 707: 'pay-phone, pay-station', 709 | 708: 'pedestal, plinth, footstall', 710 | 709: 'pencil box, pencil case', 711 | 710: 'pencil sharpener', 712 | 711: 'perfume, essence', 713 | 712: 'Petri dish', 714 | 713: 'photocopier', 715 | 714: 'pick, plectrum, plectron', 716 | 715: 'pickelhaube', 717 | 716: 'picket fence, paling', 718 | 717: 'pickup, pickup truck', 719 | 718: 'pier', 720 | 719: 'piggy bank, penny bank', 721 | 720: 'pill bottle', 722 | 721: 'pillow', 723 | 722: 'ping-pong ball', 724 | 723: 'pinwheel', 725 | 724: 'pirate, pirate ship', 726 | 725: 'pitcher, ewer', 727 | 726: "plane, carpenter's plane, woodworking plane", 728 | 727: 'planetarium', 729 | 728: 'plastic bag', 730 | 729: 'plate rack', 731 | 730: 'plow, plough', 732 | 731: "plunger, plumber's helper", 733 | 732: 'Polaroid camera, Polaroid Land camera', 734 | 733: 'pole', 735 | 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria', 736 | 735: 'poncho', 737 | 736: 'pool table, billiard table, snooker table', 738 | 737: 'pop bottle, soda bottle', 739 | 738: 'pot, flowerpot', 740 | 739: "potter's wheel", 741 | 740: 'power drill', 742 | 741: 'prayer rug, prayer mat', 743 | 742: 'printer', 744 | 743: 'prison, prison house', 745 | 744: 'projectile, missile', 746 | 745: 'projector', 747 | 746: 'puck, hockey puck', 748 | 747: 'punching bag, punch bag, punching ball, punchball', 749 | 748: 'purse', 750 | 749: 'quill, quill pen', 751 | 750: 'quilt, comforter, comfort, puff', 752 | 751: 'racer, race car, racing car', 753 | 752: 'racket, racquet', 754 | 753: 'radiator', 755 | 754: 'radio, wireless', 756 | 755: 'radio telescope, radio reflector', 757 | 756: 'rain barrel', 758 | 757: 'recreational vehicle, RV, R.V.', 759 | 758: 'reel', 760 | 759: 'reflex camera', 761 | 760: 'refrigerator, icebox', 762 | 761: 'remote control, remote', 763 | 762: 'restaurant, eating house, eating place, eatery', 764 | 763: 'revolver, six-gun, six-shooter', 765 | 764: 'rifle', 766 | 765: 'rocking chair, rocker', 767 | 766: 'rotisserie', 768 | 767: 'rubber eraser, rubber, pencil eraser', 769 | 768: 'rugby ball', 770 | 769: 'rule, ruler', 771 | 770: 'running shoe', 772 | 771: 'safe', 773 | 772: 'safety pin', 774 | 773: 'saltshaker, salt shaker', 775 | 774: 'sandal', 776 | 775: 'sarong', 777 | 776: 'sax, saxophone', 778 | 777: 'scabbard', 779 | 778: 'scale, weighing machine', 780 | 779: 'school bus', 781 | 780: 'schooner', 782 | 781: 'scoreboard', 783 | 782: 'screen, CRT screen', 784 | 783: 'screw', 785 | 784: 'screwdriver', 786 | 785: 'seat belt, seatbelt', 787 | 786: 'sewing machine', 788 | 787: 'shield, buckler', 789 | 788: 'shoe shop, shoe-shop, shoe store', 790 | 789: 'shoji', 791 | 790: 'shopping basket', 792 | 791: 'shopping cart', 793 | 792: 'shovel', 794 | 793: 'shower cap', 795 | 794: 'shower curtain', 796 | 795: 'ski', 797 | 796: 'ski mask', 798 | 797: 'sleeping bag', 799 | 798: 'slide rule, slipstick', 800 | 799: 'sliding door', 801 | 800: 'slot, one-armed bandit', 802 | 801: 'snorkel', 803 | 802: 'snowmobile', 804 | 803: 'snowplow, snowplough', 805 | 804: 'soap dispenser', 806 | 805: 'soccer ball', 807 | 806: 'sock', 808 | 807: 'solar dish, solar collector, solar furnace', 809 | 808: 'sombrero', 810 | 809: 'soup bowl', 811 | 810: 'space bar', 812 | 811: 'space heater', 813 | 812: 'space shuttle', 814 | 813: 'spatula', 815 | 814: 'speedboat', 816 | 815: "spider web, spider's web", 817 | 816: 'spindle', 818 | 817: 'sports car, sport car', 819 | 818: 'spotlight, spot', 820 | 819: 'stage', 821 | 820: 'steam locomotive', 822 | 821: 'steel arch bridge', 823 | 822: 'steel drum', 824 | 823: 'stethoscope', 825 | 824: 'stole', 826 | 825: 'stone wall', 827 | 826: 'stopwatch, stop watch', 828 | 827: 'stove', 829 | 828: 'strainer', 830 | 829: 'streetcar, tram, tramcar, trolley, trolley car', 831 | 830: 'stretcher', 832 | 831: 'studio couch, day bed', 833 | 832: 'stupa, tope', 834 | 833: 'submarine, pigboat, sub, U-boat', 835 | 834: 'suit, suit of clothes', 836 | 835: 'sundial', 837 | 836: 'sunglass', 838 | 837: 'sunglasses, dark glasses, shades', 839 | 838: 'sunscreen, sunblock, sun blocker', 840 | 839: 'suspension bridge', 841 | 840: 'swab, swob, mop', 842 | 841: 'sweatshirt', 843 | 842: 'swimming trunks, bathing trunks', 844 | 843: 'swing', 845 | 844: 'switch, electric switch, electrical switch', 846 | 845: 'syringe', 847 | 846: 'table lamp', 848 | 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle', 849 | 848: 'tape player', 850 | 849: 'teapot', 851 | 850: 'teddy, teddy bear', 852 | 851: 'television, television system', 853 | 852: 'tennis ball', 854 | 853: 'thatch, thatched roof', 855 | 854: 'theater curtain, theatre curtain', 856 | 855: 'thimble', 857 | 856: 'thresher, thrasher, threshing machine', 858 | 857: 'throne', 859 | 858: 'tile roof', 860 | 859: 'toaster', 861 | 860: 'tobacco shop, tobacconist shop, tobacconist', 862 | 861: 'toilet seat', 863 | 862: 'torch', 864 | 863: 'totem pole', 865 | 864: 'tow truck, tow car, wrecker', 866 | 865: 'toyshop', 867 | 866: 'tractor', 868 | 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi', 869 | 868: 'tray', 870 | 869: 'trench coat', 871 | 870: 'tricycle, trike, velocipede', 872 | 871: 'trimaran', 873 | 872: 'tripod', 874 | 873: 'triumphal arch', 875 | 874: 'trolleybus, trolley coach, trackless trolley', 876 | 875: 'trombone', 877 | 876: 'tub, vat', 878 | 877: 'turnstile', 879 | 878: 'typewriter keyboard', 880 | 879: 'umbrella', 881 | 880: 'unicycle, monocycle', 882 | 881: 'upright, upright piano', 883 | 882: 'vacuum, vacuum cleaner', 884 | 883: 'vase', 885 | 884: 'vault', 886 | 885: 'velvet', 887 | 886: 'vending machine', 888 | 887: 'vestment', 889 | 888: 'viaduct', 890 | 889: 'violin, fiddle', 891 | 890: 'volleyball', 892 | 891: 'waffle iron', 893 | 892: 'wall clock', 894 | 893: 'wallet, billfold, notecase, pocketbook', 895 | 894: 'wardrobe, closet, press', 896 | 895: 'warplane, military plane', 897 | 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin', 898 | 897: 'washer, automatic washer, washing machine', 899 | 898: 'water bottle', 900 | 899: 'water jug', 901 | 900: 'water tower', 902 | 901: 'whiskey jug', 903 | 902: 'whistle', 904 | 903: 'wig', 905 | 904: 'window screen', 906 | 905: 'window shade', 907 | 906: 'Windsor tie', 908 | 907: 'wine bottle', 909 | 908: 'wing', 910 | 909: 'wok', 911 | 910: 'wooden spoon', 912 | 911: 'wool, woolen, woollen', 913 | 912: 'worm fence, snake fence, snake-rail fence, Virginia fence', 914 | 913: 'wreck', 915 | 914: 'yawl', 916 | 915: 'yurt', 917 | 916: 'web site, website, internet site, site', 918 | 917: 'comic book', 919 | 918: 'crossword puzzle, crossword', 920 | 919: 'street sign', 921 | 920: 'traffic light, traffic signal, stoplight', 922 | 921: 'book jacket, dust cover, dust jacket, dust wrapper', 923 | 922: 'menu', 924 | 923: 'plate', 925 | 924: 'guacamole', 926 | 925: 'consomme', 927 | 926: 'hot pot, hotpot', 928 | 927: 'trifle', 929 | 928: 'ice cream, icecream', 930 | 929: 'ice lolly, lolly, lollipop, popsicle', 931 | 930: 'French loaf', 932 | 931: 'bagel, beigel', 933 | 932: 'pretzel', 934 | 933: 'cheeseburger', 935 | 934: 'hotdog, hot dog, red hot', 936 | 935: 'mashed potato', 937 | 936: 'head cabbage', 938 | 937: 'broccoli', 939 | 938: 'cauliflower', 940 | 939: 'zucchini, courgette', 941 | 940: 'spaghetti squash', 942 | 941: 'acorn squash', 943 | 942: 'butternut squash', 944 | 943: 'cucumber, cuke', 945 | 944: 'artichoke, globe artichoke', 946 | 945: 'bell pepper', 947 | 946: 'cardoon', 948 | 947: 'mushroom', 949 | 948: 'Granny Smith', 950 | 949: 'strawberry', 951 | 950: 'orange', 952 | 951: 'lemon', 953 | 952: 'fig', 954 | 953: 'pineapple, ananas', 955 | 954: 'banana', 956 | 955: 'jackfruit, jak, jack', 957 | 956: 'custard apple', 958 | 957: 'pomegranate', 959 | 958: 'hay', 960 | 959: 'carbonara', 961 | 960: 'chocolate sauce, chocolate syrup', 962 | 961: 'dough', 963 | 962: 'meat loaf, meatloaf', 964 | 963: 'pizza, pizza pie', 965 | 964: 'potpie', 966 | 965: 'burrito', 967 | 966: 'red wine', 968 | 967: 'espresso', 969 | 968: 'cup', 970 | 969: 'eggnog', 971 | 970: 'alp', 972 | 971: 'bubble', 973 | 972: 'cliff, drop, drop-off', 974 | 973: 'coral reef', 975 | 974: 'geyser', 976 | 975: 'lakeside, lakeshore', 977 | 976: 'promontory, headland, head, foreland', 978 | 977: 'sandbar, sand bar', 979 | 978: 'seashore, coast, seacoast, sea-coast', 980 | 979: 'valley, vale', 981 | 980: 'volcano', 982 | 981: 'ballplayer, baseball player', 983 | 982: 'groom, bridegroom', 984 | 983: 'scuba diver', 985 | 984: 'rapeseed', 986 | 985: 'daisy', 987 | 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", 988 | 987: 'corn', 989 | 988: 'acorn', 990 | 989: 'hip, rose hip, rosehip', 991 | 990: 'buckeye, horse chestnut, conker', 992 | 991: 'coral fungus', 993 | 992: 'agaric', 994 | 993: 'gyromitra', 995 | 994: 'stinkhorn, carrion fungus', 996 | 995: 'earthstar', 997 | 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa', 998 | 997: 'bolete', 999 | 998: 'ear, spike, capitulum', 1000 | 999: 'toilet tissue, toilet paper, bathroom tissue' -------------------------------------------------------------------------------- /code/ConvertLayer_caffe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2017-present, starime. 3 | All rights reserved. 4 | 5 | This source code is licensed under the BSD-style license found in the 6 | LICENSE file in the root directory of this source tree. An additional grant 7 | of patent rights can be found in the PATENTS file in the same directory. 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | import caffe_pb2 as pb2 13 | 14 | 15 | def as_blob(array): 16 | blob = pb2.BlobProto() 17 | blob.shape.dim.extend(array.shape) 18 | blob.data.extend(array.astype(float).flat) 19 | return blob 20 | 21 | 22 | def CopyTuple(param): 23 | if isinstance(param, tuple): 24 | return param 25 | elif isinstance(param, int): 26 | return param, param 27 | else: 28 | assert type(param) 29 | 30 | 31 | def ty(caffe_type): 32 | def f(_): 33 | layer = pb2.LayerParameter() 34 | layer.type = caffe_type 35 | return layer 36 | return f 37 | 38 | 39 | def data(inputs): 40 | layer = pb2.LayerParameter() 41 | layer.type = 'Input' 42 | input_shape = pb2.BlobShape() 43 | input_shape.dim.extend(inputs.data.numpy().shape) 44 | layer.input_param.shape.extend([input_shape]) 45 | return layer 46 | 47 | 48 | def Slice(pytorch_layer): 49 | layer = pb2.LayerParameter() 50 | layer.type = "Slice" 51 | 52 | layer.slice_param.axis = pytorch_layer.axis 53 | layer.slice_param.slice_point.extend(pytorch_layer.slice_point) 54 | return layer 55 | 56 | 57 | def inner_product(pytorch_layer): 58 | layer = pb2.LayerParameter() 59 | layer.type = "InnerProduct" 60 | 61 | blobs_weight = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.data.numpy() 62 | num_output = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.size(0) 63 | layer.inner_product_param.num_output = num_output 64 | 65 | if pytorch_layer.next_functions[0][0]: 66 | layer.inner_product_param.bias_term = True 67 | bias = pytorch_layer.next_functions[0][0].variable.data.numpy() 68 | layer.blobs.extend([as_blob(blobs_weight), as_blob(bias)]) 69 | else: 70 | layer.inner_product_param.bias_term = False 71 | layer.blobs.extend([as_blob(blobs_weight)]) 72 | 73 | return layer 74 | 75 | 76 | def concat(pytorch_layer): 77 | layer = pb2.LayerParameter() 78 | layer.type = "Concat" 79 | layer.concat_param.axis = int(pytorch_layer.dim) 80 | return layer 81 | 82 | 83 | def flatten(pytorch_layer): 84 | """ Only support flatten view """ 85 | total = 1 86 | for dim in pytorch_layer.old_size: 87 | total *= dim 88 | assert ((pytorch_layer.new_sizes[1] == total) or (pytorch_layer.new_sizes[1] == -1)) 89 | 90 | layer = pb2.LayerParameter() 91 | layer.type = "Flatten" 92 | return layer 93 | 94 | 95 | def spatial_convolution(pytorch_layer): 96 | layer = pb2.LayerParameter() 97 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 98 | assert len(blobs_weight.shape) == 4, blobs_weight.shape 99 | (nOutputPlane, nInputPlane, kH, kW) = blobs_weight.shape 100 | 101 | padH = pytorch_layer.padding[0] 102 | padW = pytorch_layer.padding[1] 103 | dH = pytorch_layer.stride[0] 104 | dW = pytorch_layer.stride[1] 105 | dilation = pytorch_layer.dilation[0] 106 | 107 | if pytorch_layer.transposed: 108 | layer.type = "Deconvolution" 109 | layer.convolution_param.num_output = nInputPlane 110 | else: 111 | layer.type = "Convolution" 112 | layer.convolution_param.num_output = nOutputPlane 113 | 114 | if kH == kW: 115 | layer.convolution_param.kernel_size.extend([kH]) 116 | else: 117 | layer.convolution_param.kernel_h = kH 118 | layer.convolution_param.kernel_w = kW 119 | if dH == dW: 120 | layer.convolution_param.stride.extend([dH]) 121 | else: 122 | layer.convolution_param.stride_h = dH 123 | layer.convolution_param.stride_w = dW 124 | if padH == padW: 125 | layer.convolution_param.pad.extend([padH]) 126 | else: 127 | layer.convolution_param.pad_h = padH 128 | layer.convolution_param.pad_w = padW 129 | layer.convolution_param.dilation.extend([dilation]) 130 | layer.convolution_param.group = pytorch_layer.groups 131 | 132 | if pytorch_layer.next_functions[2][0]: 133 | layer.convolution_param.bias_term = True 134 | bias = pytorch_layer.next_functions[2][0].variable.data.numpy() 135 | layer.blobs.extend([as_blob(blobs_weight), as_blob(bias)]) 136 | else: 137 | layer.convolution_param.bias_term = False 138 | layer.blobs.extend([as_blob(blobs_weight)]) 139 | 140 | return layer 141 | 142 | 143 | def FillBilinear(ch, k): 144 | blob = np.zeros(shape=(ch, 1, k, k)) 145 | 146 | """ Create bilinear weights in numpy array """ 147 | bilinear_kernel = np.zeros([k, k], dtype=np.float32) 148 | scale_factor = (k + 1) // 2 149 | if k % 2 == 1: 150 | center = scale_factor - 1 151 | else: 152 | center = scale_factor - 0.5 153 | for x in range(k): 154 | for y in range(k): 155 | bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor) 156 | 157 | for i in range(ch): 158 | blob[i, 0, :, :] = bilinear_kernel 159 | return blob 160 | 161 | 162 | def UpsampleBilinear(pytorch_layer): 163 | layer = pb2.LayerParameter() 164 | layer.type = "Deconvolution" 165 | 166 | assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1] 167 | factor = int(pytorch_layer.scale_factor[0]) 168 | c = int(pytorch_layer.input_size[1]) 169 | k = 2 * factor - factor % 2 170 | 171 | layer.convolution_param.num_output = c 172 | layer.convolution_param.kernel_size.extend([k]) 173 | layer.convolution_param.stride.extend([factor]) 174 | layer.convolution_param.pad.extend([int(math.ceil((factor - 1) / 2.))]) 175 | layer.convolution_param.group = c 176 | layer.convolution_param.weight_filler.type = 'bilinear' 177 | layer.convolution_param.bias_term = False 178 | 179 | learning_param = pb2.ParamSpec() 180 | learning_param.lr_mult = 0 181 | learning_param.decay_mult = 0 182 | layer.param.extend([learning_param]) 183 | 184 | """ Init weight blob of filter kernel """ 185 | blobs_weight = FillBilinear(c, k) 186 | layer.blobs.extend([as_blob(blobs_weight)]) 187 | 188 | return layer 189 | 190 | 191 | def CopyPoolingParameter(pytorch_layer, layer): 192 | 193 | kH, kW = CopyTuple(pytorch_layer.kernel_size) 194 | dH, dW = CopyTuple(pytorch_layer.stride) 195 | padH, padW = CopyTuple(pytorch_layer.padding) 196 | 197 | if kH == kW: 198 | layer.pooling_param.kernel_size = kH 199 | else: 200 | layer.pooling_param.kernel_h = kH 201 | layer.pooling_param.kernel_w = kW 202 | if dH == dW: 203 | layer.pooling_param.stride = dH 204 | else: 205 | layer.pooling_param.stride_h = dH 206 | layer.pooling_param.stride_w = dW 207 | if padH == padW: 208 | layer.pooling_param.pad = padH 209 | else: 210 | layer.pooling_param.pad_h = padH 211 | layer.pooling_param.pad_w = padW 212 | 213 | if pytorch_layer.ceil_mode is True: 214 | return 215 | 216 | if pytorch_layer.ceil_mode is False: 217 | if padH == padW: 218 | if dH > 1 and padH > 0: 219 | layer.pooling_param.pad = padH - 1 220 | else: 221 | if dH > 1 and padH > 0: 222 | layer.pooling_param.pad_h = padH - 1 223 | if dW > 1 and padW > 0: 224 | layer.pooling_param.pad_w = padW - 1 225 | 226 | 227 | def MaxPooling(pytorch_layer): 228 | layer = pb2.LayerParameter() 229 | layer.type = "Pooling" 230 | layer.pooling_param.pool = pb2.PoolingParameter.MAX 231 | CopyPoolingParameter(pytorch_layer, layer) 232 | return layer 233 | 234 | 235 | def AvgPooling(pytorch_layer): 236 | layer = pb2.LayerParameter() 237 | layer.type = "Pooling" 238 | layer.pooling_param.pool = pb2.PoolingParameter.AVE 239 | CopyPoolingParameter(pytorch_layer, layer) 240 | return layer 241 | 242 | 243 | def dropout(pytorch_layer): 244 | layer = pb2.LayerParameter() 245 | layer.type = "Dropout" 246 | layer.dropout_param.dropout_ratio = float(pytorch_layer.p) 247 | train_only = pb2.NetStateRule() 248 | train_only.phase = pb2.TEST 249 | layer.exclude.extend([train_only]) 250 | return layer 251 | 252 | 253 | def elu(pytorch_layer): 254 | layer = pb2.LayerParameter() 255 | layer.type = "ELU" 256 | layer.elu_param.alpha = pytorch_layer.additional_args[0] 257 | return layer 258 | 259 | 260 | def leaky_ReLU(pytorch_layer): 261 | layer = pb2.LayerParameter() 262 | layer.type = "ReLU" 263 | layer.relu_param.negative_slope = float(pytorch_layer.additional_args[0]) 264 | return layer 265 | 266 | 267 | def PReLU(pytorch_layer): 268 | layer = pb2.LayerParameter() 269 | layer.type = "PReLU" 270 | num_parameters = int(pytorch_layer.num_parameters) 271 | layer.prelu_param.channel_shared = True if num_parameters == 1 else False 272 | 273 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 274 | layer.blobs.extend([as_blob(blobs_weight)]) 275 | return layer 276 | 277 | 278 | def MulConst(pytorch_layer): 279 | layer = pb2.LayerParameter() 280 | layer.type = "Power" 281 | layer.power_param.power = 1 282 | layer.power_param.scale = float(pytorch_layer.constant) 283 | layer.power_param.shift = 0 284 | return layer 285 | 286 | 287 | def AddConst(pytorch_layer): 288 | layer = pb2.LayerParameter() 289 | layer.type = "Power" 290 | layer.power_param.power = 1 291 | layer.power_param.scale = 1 292 | """ Constant to add should be filled by hand, since not visible in autograd """ 293 | layer.power_param.shift = float('inf') 294 | return layer 295 | 296 | 297 | def softmax(pytorch_layer): 298 | layer = pb2.LayerParameter() 299 | layer.type = 'Softmax' 300 | return layer 301 | 302 | 303 | def eltwise(pytorch_layer): 304 | layer = pb2.LayerParameter() 305 | layer.type = "Eltwise" 306 | return layer 307 | 308 | 309 | def eltwise_max(pytorch_layer): 310 | layer = pb2.LayerParameter() 311 | layer.type = "Eltwise" 312 | layer.eltwise_param.operation = 2 313 | return layer 314 | 315 | 316 | def batchnorm(pytorch_layer): 317 | layer_bn = pb2.LayerParameter() 318 | layer_bn.type = "BatchNorm" 319 | 320 | layer_bn.batch_norm_param.use_global_stats = 1 321 | layer_bn.batch_norm_param.eps = pytorch_layer.eps 322 | layer_bn.blobs.extend([ 323 | as_blob(pytorch_layer.running_mean.numpy()), 324 | as_blob(pytorch_layer.running_var.numpy()), 325 | as_blob(np.array([1.])) 326 | ]) 327 | 328 | layer_scale = pb2.LayerParameter() 329 | layer_scale.type = "Scale" 330 | 331 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 332 | 333 | if pytorch_layer.next_functions[2][0]: 334 | layer_scale.scale_param.bias_term = True 335 | bias = pytorch_layer.next_functions[2][0].variable.data.numpy() 336 | layer_scale.blobs.extend([as_blob(blobs_weight), as_blob(bias)]) 337 | else: 338 | layer_scale.scale_param.bias_term = False 339 | layer_scale.blobs.extend([as_blob(blobs_weight)]) 340 | 341 | return [layer_bn, layer_scale] 342 | 343 | 344 | def build_converter(opts): 345 | return { 346 | 'data': data, 347 | 'Addmm': inner_product, 348 | 'Threshold': ty('ReLU'), 349 | 'ConvNd': spatial_convolution, 350 | 'MaxPool2d': MaxPooling, 351 | 'AvgPool2d': AvgPooling, 352 | 'Add': eltwise, 353 | 'Cmax': eltwise_max, 354 | 'BatchNorm': batchnorm, 355 | 'Concat': concat, 356 | 'Dropout': dropout, 357 | 'UpsamplingBilinear2d': UpsampleBilinear, 358 | 'MulConstant': MulConst, 359 | 'AddConstant': AddConst, 360 | 'Softmax': softmax, 361 | 'Sigmoid': ty('Sigmoid'), 362 | 'Tanh': ty('TanH'), 363 | 'ELU': elu, 364 | 'LeakyReLU': leaky_ReLU, 365 | 'PReLU': PReLU, 366 | 'Slice': Slice, 367 | 'View': flatten, 368 | } 369 | 370 | 371 | def convert_caffe(opts, typename, pytorch_layer): 372 | converter = build_converter(opts) 373 | if typename not in converter: 374 | raise ValueError("Unknown layer type: {}, known types: {}".format( 375 | typename, converter.keys())) 376 | return converter[typename](pytorch_layer) 377 | -------------------------------------------------------------------------------- /code/ConvertLayer_ncnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2017-present, starime. 3 | All rights reserved. 4 | 5 | This source code is licensed under the BSD-style license found in the 6 | LICENSE file in the root directory of this source tree. An additional grant 7 | of patent rights can be found in the PATENTS file in the same directory. 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | 13 | 14 | class LayerParameter_ncnn(object): 15 | 16 | def __init__(self): 17 | self.type = '' 18 | self.param = [] 19 | self.weights = [] 20 | 21 | 22 | def CopyTuple(param): 23 | if isinstance(param, tuple): 24 | return param 25 | elif isinstance(param, int): 26 | return param, param 27 | else: 28 | assert type(param) 29 | 30 | 31 | def ty(ncnn_type): 32 | def f(_): 33 | layer = LayerParameter_ncnn() 34 | layer.type = ncnn_type 35 | return layer 36 | return f 37 | 38 | 39 | def data(inputs): 40 | layer = LayerParameter_ncnn() 41 | layer.type = 'Input' 42 | 43 | input_shape = inputs.data.numpy().shape 44 | for dim in range(1, 4): 45 | if dim - 1 < len(input_shape): 46 | size = input_shape[dim] 47 | else: 48 | size = -233 49 | layer.param.append('%ld' % size) 50 | return layer 51 | 52 | 53 | def Slice(pytorch_layer): 54 | layer = LayerParameter_ncnn() 55 | layer.type = 'Slice' 56 | 57 | # """ ncnn only support slicing on channel dimension """ 58 | # assert pytorch_layer.axis == 1 59 | 60 | layer.param = {} 61 | num_slice = len(pytorch_layer.slice_point) + 1 62 | slice_param = ('%d' % num_slice) 63 | prev_offset = 0 64 | for p in pytorch_layer.slice_point: 65 | offset = p 66 | slice_param += (',%d' % (offset - prev_offset)) 67 | prev_offset = offset 68 | slice_param += (',%d' % -233) 69 | 70 | layer.param['-23300'] = slice_param 71 | layer.param['1'] = ('%d' % (pytorch_layer.axis - 1)) 72 | 73 | return layer 74 | 75 | 76 | def Split(pytorch_layer): 77 | layer = LayerParameter_ncnn() 78 | layer.type = 'Split' 79 | 80 | return layer 81 | 82 | 83 | def permute(pytorch_layer): 84 | layer = LayerParameter_ncnn() 85 | layer.type = 'Permute' 86 | assert len(pytorch_layer.rev_dim_indices) == 4, len(pytorch_layer.rev_dim_indices) 87 | assert pytorch_layer.rev_dim_indices[0] == 0, pytorch_layer.rev_dim_indices[0] 88 | 89 | """ order_type details at src/layer/permute.cpp """ 90 | h, w, c = pytorch_layer.rev_dim_indices[1], pytorch_layer.rev_dim_indices[2], pytorch_layer.rev_dim_indices[3] 91 | order_type = 0 92 | if c == 1 and h == 2 and w == 3: 93 | order_type = 0 94 | elif c == 1 and h == 3 and w == 2: 95 | order_type = 1 96 | elif c == 2 and h == 1 and w == 3: 97 | order_type = 2 98 | elif c == 2 and h == 3 and w == 1: 99 | order_type = 3 100 | elif c == 3 and h == 1 and w == 2: 101 | order_type = 4 102 | elif c == 3 and h == 2 and w == 1: 103 | order_type = 5 104 | 105 | layer.param.append('%d' % order_type) 106 | return layer 107 | 108 | 109 | def flatten(pytorch_layer): 110 | """ Only support flatten view """ 111 | total = 1 112 | for dim in pytorch_layer.old_size: 113 | total *= dim 114 | assert ((pytorch_layer.new_sizes[1] == total) or (pytorch_layer.new_sizes[1] == -1)) 115 | 116 | layer = LayerParameter_ncnn() 117 | layer.type = "Flatten" 118 | return layer 119 | 120 | 121 | def inner_product(pytorch_layer): 122 | layer = LayerParameter_ncnn() 123 | layer.type = 'InnerProduct' 124 | 125 | blobs_weight = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.data.numpy() 126 | num_output = pytorch_layer.next_functions[2][0].next_functions[0][0].variable.size(0) 127 | layer.param.append('%d' % num_output) 128 | 129 | if pytorch_layer.next_functions[0][0]: 130 | layer.param.append('%d' % True) 131 | bias = pytorch_layer.next_functions[0][0].variable.data.numpy() 132 | layer.param.append('%d' % blobs_weight.size) 133 | layer.weights.append(np.array([0.])) 134 | layer.weights.append(blobs_weight) 135 | layer.weights.append(bias) 136 | else: 137 | layer.param.append('%d' % False) 138 | layer.param.append('%d' % blobs_weight.size) 139 | layer.weights.append(np.array([0.])) 140 | layer.weights.append(blobs_weight) 141 | 142 | return layer 143 | 144 | 145 | def concat(pytorch_layer): 146 | layer = LayerParameter_ncnn() 147 | axis = int(pytorch_layer.dim) 148 | layer.type = 'Concat' 149 | if (axis == 1): 150 | pass 151 | else: 152 | dim = axis - 1 if axis >= 1 else 0 153 | layer.param.append('%d' % dim) 154 | return layer 155 | 156 | 157 | def spatial_convolution(pytorch_layer): 158 | layer = LayerParameter_ncnn() 159 | 160 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 161 | assert len(blobs_weight.shape) == 4, blobs_weight.shape 162 | (nOutputPlane, nInputPlane, kH, kW) = blobs_weight.shape 163 | 164 | padH = pytorch_layer.padding[0] 165 | padW = pytorch_layer.padding[1] 166 | dH = pytorch_layer.stride[0] 167 | dW = pytorch_layer.stride[1] 168 | dilation = pytorch_layer.dilation[0] 169 | groups = pytorch_layer.groups 170 | 171 | if pytorch_layer.transposed: 172 | layer.type = 'Deconvolution' 173 | layer.param.append('%d' % nInputPlane) 174 | 175 | """ ncnn: Need to swap input dim and output dim """ 176 | blobs_weight = np.swapaxes(blobs_weight, 0, 1) 177 | else: 178 | layer.type = 'Convolution' 179 | layer.param.append('%d' % nOutputPlane) 180 | 181 | assert kH == kW, [kH, kW] 182 | assert dH == dW, [dH, dW] 183 | assert padH == padW, [padH, padW] 184 | layer.param.append('%d' % kH) 185 | layer.param.append('%d' % dilation) 186 | layer.param.append('%d' % dH) 187 | layer.param.append('%d' % padH) 188 | 189 | if pytorch_layer.next_functions[2][0]: 190 | layer.param.append('%d' % True) 191 | bias = pytorch_layer.next_functions[2][0].variable.data.numpy() 192 | layer.param.append('%d' % blobs_weight.size) 193 | layer.weights.append(np.array([0.])) 194 | layer.weights.append(blobs_weight) 195 | layer.weights.append(bias) 196 | else: 197 | layer.param.append('%d' % False) 198 | layer.param.append('%d' % blobs_weight.size) 199 | layer.weights.append(np.array([0.])) 200 | layer.weights.append(blobs_weight) 201 | 202 | if groups != 1: 203 | layer.param.append('%d' % groups) 204 | layer.type += "DepthWise" 205 | 206 | return layer 207 | 208 | 209 | def FillBilinear(ch, k): 210 | blob = np.zeros(shape=(ch, 1, k, k)) 211 | 212 | """ Create bilinear weights in numpy array """ 213 | bilinear_kernel = np.zeros([k, k], dtype=np.float32) 214 | scale_factor = (k + 1) // 2 215 | if k % 2 == 1: 216 | center = scale_factor - 1 217 | else: 218 | center = scale_factor - 0.5 219 | for x in range(k): 220 | for y in range(k): 221 | bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * (1 - abs(y - center) / scale_factor) 222 | 223 | for i in range(ch): 224 | blob[i, 0, :, :] = bilinear_kernel 225 | return blob 226 | 227 | 228 | def UpsampleBilinear(pytorch_layer): 229 | layer = LayerParameter_ncnn() 230 | layer.type = 'Deconvolution' 231 | 232 | assert pytorch_layer.scale_factor[0] == pytorch_layer.scale_factor[1] 233 | factor = int(pytorch_layer.scale_factor[0]) 234 | c = int(pytorch_layer.input_size[1]) 235 | k = 2 * factor - factor % 2 236 | 237 | num_output = c 238 | kernel_size = k 239 | stride = factor 240 | pad = int(math.ceil((factor - 1) / 2.)) 241 | dilation = 1 242 | # group = c 243 | # weight_filler = 'bilinear' 244 | bias_term = False 245 | 246 | layer.param.append('%d' % num_output) 247 | layer.param.append('%d' % kernel_size) 248 | layer.param.append('%d' % dilation) 249 | layer.param.append('%d' % stride) 250 | layer.param.append('%d' % pad) 251 | layer.param.append('%d' % bias_term) 252 | 253 | # learning_param = pb2.ParamSpec() 254 | # learning_param.lr_mult = 0 255 | # learning_param.decay_mult = 0 256 | # layer.param.extend([learning_param]) 257 | 258 | """ init weight blob of filter kernel """ 259 | blobs_weight = FillBilinear(c, k) 260 | layer.param.append('%d' % blobs_weight.size) 261 | layer.weights.append(np.array([0.])) 262 | layer.weights.append(blobs_weight) 263 | 264 | return layer 265 | 266 | 267 | def CopyPoolingParameter(pytorch_layer, layer): 268 | 269 | padH, padW = CopyTuple(pytorch_layer.padding) 270 | kH, kW = CopyTuple(pytorch_layer.kernel_size) 271 | dH, dW = CopyTuple(pytorch_layer.stride) 272 | 273 | assert kH == kW, [kH, kW] 274 | assert dH == dW, [dH, dW] 275 | assert padH == padW, [padH, padW] 276 | layer.param.append('%d' % kH) 277 | layer.param.append('%d' % dH) 278 | 279 | # if pytorch_layer.ceil_mode is True: 280 | layer.param.append('%d' % padH) 281 | 282 | """ TODO: global_pooling? """ 283 | layer.param.append('%d' % 0) 284 | 285 | 286 | def MaxPooling(pytorch_layer): 287 | layer = LayerParameter_ncnn() 288 | layer.type = 'Pooling' 289 | layer.param.append('%d' % 0) 290 | CopyPoolingParameter(pytorch_layer, layer) 291 | return layer 292 | 293 | 294 | def AvgPooling(pytorch_layer): 295 | layer = LayerParameter_ncnn() 296 | layer.type = 'Pooling' 297 | layer.param.append('%d' % 1) 298 | CopyPoolingParameter(pytorch_layer, layer) 299 | return layer 300 | 301 | 302 | def dropout(pytorch_layer): 303 | layer = LayerParameter_ncnn() 304 | dropout_ratio = float(pytorch_layer.p) 305 | layer.type = 'Dropout' 306 | if abs(dropout_ratio - 0.5) < 1e-3: 307 | pass 308 | else: 309 | scale = 1.0 - dropout_ratio 310 | layer.param.append('%f' % scale) 311 | return layer 312 | 313 | 314 | def elu(pytorch_layer): 315 | layer = LayerParameter_ncnn() 316 | layer.type = 'ELU' 317 | alpha = pytorch_layer.additional_args[0] 318 | layer.param.append('%f' % alpha) 319 | return layer 320 | 321 | 322 | def ReLU(pytorch_layer): 323 | layer = LayerParameter_ncnn() 324 | layer.type = 'ReLU' 325 | layer.param.append('%f' % 0.0) 326 | return layer 327 | 328 | 329 | def leaky_ReLU(pytorch_layer): 330 | layer = LayerParameter_ncnn() 331 | layer.type = 'ReLU' 332 | negative_slope = float(pytorch_layer.additional_args[0]) 333 | layer.param.append('%f' % negative_slope) 334 | return layer 335 | 336 | 337 | def PReLU(pytorch_layer): 338 | layer = LayerParameter_ncnn() 339 | layer.type = 'PReLU' 340 | 341 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 342 | layer.param.append('%d' % blobs_weight.size) 343 | layer.weights.append(blobs_weight) 344 | return layer 345 | 346 | 347 | def MulConst(pytorch_layer): 348 | layer = LayerParameter_ncnn() 349 | layer.type = 'Power' 350 | layer.param.append('%f' % 1) 351 | layer.param.append('%f' % float(pytorch_layer.constant)) 352 | layer.param.append('%f' % 0) 353 | return layer 354 | 355 | 356 | def AddConst(pytorch_layer): 357 | layer = LayerParameter_ncnn() 358 | layer.type = 'Power' 359 | layer.param.append('%f' % 1) 360 | layer.param.append('%f' % 1) 361 | """ Constant to add should be filled by hand, since not visible in autograd """ 362 | layer.param.append('%f' % float('inf')) 363 | return layer 364 | 365 | 366 | def softmax(pytorch_layer): 367 | layer = LayerParameter_ncnn() 368 | layer.type = 'Softmax' 369 | """ TODO: axis """ 370 | layer.param.append('%d' % 0) 371 | 372 | return layer 373 | 374 | 375 | def eltwise(pytorch_layer): 376 | layer = LayerParameter_ncnn() 377 | layer.type = 'Eltwise' 378 | """ operation: 0=mul 1=add 2=max """ 379 | layer.param.append('%d' % 1) 380 | """ TODO: coefficient """ 381 | return layer 382 | 383 | 384 | def eltwise_max(pytorch_layer): 385 | layer = LayerParameter_ncnn() 386 | layer.type = 'Eltwise' 387 | """ operation: 0=mul 1=add 2=max """ 388 | layer.param.append('%d' % 2) 389 | """ TODO: coefficient """ 390 | return layer 391 | 392 | 393 | def negate(pytorch_layer): 394 | layer = LayerParameter_ncnn() 395 | layer.type = 'UnaryOp' 396 | """ Operation_NEG=1, more op details at src/layer/unaryop.h """ 397 | layer.param.append('%d' % 1) 398 | return layer 399 | 400 | 401 | def batchnorm(pytorch_layer): 402 | layer_bn = LayerParameter_ncnn() 403 | layer_bn.type = 'BatchNorm' 404 | 405 | layer_bn.param.append('%d' % pytorch_layer.running_mean.numpy().size) 406 | 407 | layer_bn.weights.append(np.ones(pytorch_layer.running_mean.numpy().shape)) 408 | layer_bn.weights.append(pytorch_layer.running_mean.numpy()) 409 | """ Add eps by hand for running_var in ncnn """ 410 | running_var = pytorch_layer.running_var.numpy() 411 | running_var = running_var + pytorch_layer.eps 412 | layer_bn.weights.append(running_var) 413 | layer_bn.weights.append(np.zeros(pytorch_layer.running_mean.numpy().shape)) 414 | 415 | layer_scale = LayerParameter_ncnn() 416 | layer_scale.type = 'Scale' 417 | 418 | blobs_weight = pytorch_layer.next_functions[1][0].variable.data.numpy() 419 | 420 | if pytorch_layer.next_functions[2][0]: 421 | layer_scale.param.append('%d' % blobs_weight.size) 422 | layer_scale.param.append('%d' % True) 423 | 424 | bias = pytorch_layer.next_functions[2][0].variable.data.numpy() 425 | layer_scale.weights.append(blobs_weight) 426 | layer_scale.weights.append(bias) 427 | else: 428 | layer_scale.param.append('%d' % blobs_weight.size) 429 | layer_scale.param.append('%d' % False) 430 | layer_scale.weights.append(blobs_weight) 431 | 432 | return [layer_bn, layer_scale] 433 | 434 | 435 | def build_converter(opts): 436 | return { 437 | 'data': data, 438 | 'Addmm': inner_product, 439 | 'Threshold': ReLU, 440 | 'ConvNd': spatial_convolution, 441 | 'MaxPool2d': MaxPooling, 442 | 'AvgPool2d': AvgPooling, 443 | 'Add': eltwise, 444 | 'Cmax': eltwise_max, 445 | 'BatchNorm': batchnorm, 446 | 'Concat': concat, 447 | 'Dropout': dropout, 448 | 'UpsamplingBilinear2d': UpsampleBilinear, 449 | 'MulConstant': MulConst, 450 | 'AddConstant': AddConst, 451 | 'Softmax': softmax, 452 | 'Sigmoid': ty('Sigmoid'), 453 | 'Tanh': ty('TanH'), 454 | 'ELU': elu, 455 | 'LeakyReLU': leaky_ReLU, 456 | 'PReLU': PReLU, 457 | 'Slice': Slice, 458 | 'MultiCopy': Split, 459 | 'Negate': negate, 460 | 'Permute': permute, 461 | 'View': flatten, 462 | } 463 | 464 | 465 | def convert_ncnn(opts, typename, pytorch_layer): 466 | converter = build_converter(opts) 467 | if typename not in converter: 468 | raise ValueError("Unknown layer type: {}, known types: {}".format( 469 | typename, converter.keys())) 470 | return converter[typename](pytorch_layer) 471 | -------------------------------------------------------------------------------- /code/ConvertModel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2017-present, starime. 3 | All rights reserved. 4 | 5 | This source code is licensed under the BSD-style license found in the 6 | LICENSE file in the root directory of this source tree. An additional grant 7 | of patent rights can be found in the PATENTS file in the same directory. 8 | """ 9 | 10 | import torch 11 | from torch.autograd import Variable 12 | 13 | 14 | def link_caffe(layer, name, bottom, top): 15 | layer.name = name 16 | for b in bottom: 17 | layer.bottom.append(b) 18 | for t in top: 19 | layer.top.append(t) 20 | 21 | caffe_net.append(layer) 22 | 23 | 24 | def link_ncnn(layer, name, bottom, top): 25 | layer_type = layer.type 26 | layer_param = layer.param 27 | if isinstance(layer_param, list): 28 | for ind, param in enumerate(layer_param): 29 | layer_param[ind] = str(ind) + '=' + param 30 | elif isinstance(layer_param, dict): 31 | param_dict = layer_param 32 | layer_param = [] 33 | for key, param in param_dict.iteritems(): 34 | layer_param.append(key + '=' + param) 35 | 36 | pp = [] 37 | pp.append('%-16s' % layer_type) 38 | pp.append('%-16s %d %d' % (name, len(bottom), len(top))) 39 | for b in bottom: 40 | pp.append('%s' % b) 41 | if b not in blob_set: 42 | blob_set.add(b) 43 | for t in top: 44 | pp.append('%s' % t) 45 | if t not in blob_set: 46 | blob_set.add(t) 47 | layer_param = pp + layer_param 48 | 49 | ncnn_net.append(' '.join(layer_param)) 50 | 51 | for w in layer.weights: 52 | ncnn_weights.append(w) 53 | 54 | 55 | def GetLayerParam_Index(func): 56 | for axis, slice_param in enumerate(func.index): 57 | if isinstance(slice_param, int): 58 | start = slice_param 59 | stop = slice_param + 1 60 | else: 61 | start = slice_param.start 62 | stop = slice_param.stop 63 | step = slice_param.step 64 | if (start or stop or step) is not None: 65 | break 66 | shape = func.input_size 67 | dim_size = shape[axis] 68 | return start, stop, dim_size, axis 69 | 70 | 71 | def DFS(func): 72 | if func in visited: 73 | return tops_dict[func] 74 | 75 | visited.add(func) 76 | layer_type = str(type(func).__name__) 77 | bottoms = [] 78 | 79 | father_func = None 80 | if hasattr(func, 'next_functions'): 81 | for u in func.next_functions: 82 | if u[0] is not None: 83 | child_type = str(type(u[0]).__name__) 84 | if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'): 85 | child_name = DFS(u[0]) 86 | bottoms.append(child_name) 87 | father_func = u[0] 88 | 89 | """ Gen layer name """ 90 | layer_type_name = layer_type.replace('Backward', '') 91 | if layer_type_name in layer_type_count: 92 | layer_type_count[layer_type_name] += 1 93 | else: 94 | layer_type_count[layer_type_name] = 1 95 | 96 | name = layer_type_name + '_' + str(layer_type_count[layer_type_name]) 97 | 98 | """ Reaching the root node """ 99 | """ TODO: multi data input """ 100 | if len(bottoms) == 0: 101 | if 'data' not in layer_type_count: 102 | layer_type_count['data'] = 1 103 | """ Gen data layer """ 104 | layer_data = convert('', 'data', inputs) 105 | link(layer_data, 'data', [], ['data']) 106 | 107 | """ Link it with data input """ 108 | bottoms.append('data') 109 | 110 | """ Skip some pytorch layers """ 111 | if dst == 'caffe': 112 | if layer_type_name in ['Clone', 'Threshold', 'Dropout', 'SetItem']: 113 | tops_dict[func] = bottoms[0] 114 | elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)): 115 | tops_dict[func] = bottoms[0] 116 | else: 117 | tops_dict[func] = name 118 | if layer_type_name == 'Index': 119 | """ Change layer name only for 'Slice' """ 120 | tops_dict[func] = tops_dict[father_func] + '_' + tops_dict[func] 121 | elif dst == 'ncnn': 122 | if layer_type_name in ['Clone', 'SetItem']: 123 | tops_dict[func] = bottoms[0] 124 | elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)): 125 | tops_dict[func] = bottoms[0] 126 | else: 127 | tops_dict[func] = name 128 | if layer_type_name == 'Index': 129 | """ Chane layer name for 'Slice' """ 130 | tops_dict[func] = tops_dict[father_func] + '_' + tops_dict[func] 131 | elif hasattr(func, 'next_functions'): 132 | """ Change bottom layers name for other multi top layers cases """ 133 | for u in func.next_functions: 134 | if u[0] is not None: 135 | child_type = str(type(u[0]).__name__) 136 | if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'): 137 | father_func = u[0] 138 | if (father_func in multi_tops) and (len(multi_tops[father_func]) > 1): 139 | for i in range(len(bottoms)): 140 | if bottoms[i] == tops_dict[father_func]: 141 | bottoms[i] = tops_dict[father_func] + '_' + tops_dict[func] 142 | 143 | """ Split to BatchNorm and Scale """ 144 | if layer_type_name == 'BatchNorm': 145 | layer_double = convert('', layer_type_name, func) 146 | scale_name = name + '_' + 'scale' 147 | if dst == 'caffe': 148 | link(layer_double[0], name, bottoms, [tops_dict[func]]) 149 | link(layer_double[1], scale_name, [tops_dict[func]], [tops_dict[func]]) 150 | elif dst == 'ncnn': 151 | link(layer_double[0], name, bottoms, [tops_dict[func]]) 152 | link(layer_double[1], scale_name, [tops_dict[func]], [scale_name]) 153 | tops_dict[func] = scale_name 154 | 155 | elif layer_type_name not in ['Index', 'Clone', 'SetItem']: 156 | """ Debug """ 157 | # if layer_type_name != 'Cmax': 158 | # return tops_dict[func] 159 | 160 | layer = convert('', layer_type_name, func) 161 | link(layer, name, bottoms, [tops_dict[func]]) 162 | 163 | """ If func layer has multiple top layers """ 164 | if (func in multi_tops) and (len(multi_tops[func]) > 1): 165 | if func in slice_point: 166 | """ Make an extra dummy layer type 'Slice' after func layer, which not exist in pytorch """ 167 | slice_func = torch.autograd.function 168 | slice_func.axis = axis_dict[func] 169 | slice_func.slice_point = slice_point[func] 170 | slice_layer = convert('', 'Slice', slice_func) 171 | link(slice_layer, tops_dict[func] + '_slicer', [tops_dict[func]], multi_tops[func]) 172 | elif dst == 'ncnn': 173 | """ 174 | Make 'Split' copy for each top layer respectively 175 | (only in ncnn, caffe will automatically handle this case) 176 | """ 177 | copy_func = torch.autograd.function 178 | split_layer = convert('', 'MultiCopy', copy_func) 179 | link(split_layer, tops_dict[func] + '_copyer', [tops_dict[func]], multi_tops[func]) 180 | 181 | return tops_dict[func] 182 | 183 | 184 | def FindMultiTops(func): 185 | """ 186 | Precount nodes with number of tops(indegree)>1, 187 | which could be Slice or Split(only in ncnn, for making multiple copies) 188 | """ 189 | if func in visited: 190 | return tops_dict[func] 191 | 192 | visited.add(func) 193 | layer_type = str(type(func).__name__) 194 | bottoms = [] 195 | 196 | if hasattr(func, 'next_functions'): 197 | for u in func.next_functions: 198 | if u[0] is not None: 199 | child_type = str(type(u[0]).__name__) 200 | if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'): 201 | child_name = FindMultiTops(u[0]) 202 | bottoms.append(child_name) 203 | 204 | """ Gen layer name """ 205 | layer_type_name = layer_type.replace('Backward', '') 206 | if layer_type_name in layer_type_count: 207 | layer_type_count[layer_type_name] += 1 208 | else: 209 | layer_type_count[layer_type_name] = 1 210 | 211 | name = layer_type_name + '_' + str(layer_type_count[layer_type_name]) 212 | 213 | """ Skip some pytorch layers """ 214 | if dst == 'caffe': 215 | if layer_type_name in ['Clone', 'Threshold', 'Dropout', 'SetItem']: 216 | tops_dict[func] = bottoms[0] 217 | elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)): 218 | tops_dict[func] = bottoms[0] 219 | else: 220 | tops_dict[func] = name 221 | elif dst == 'ncnn': 222 | if layer_type_name in ['Clone', 'SetItem']: 223 | tops_dict[func] = bottoms[0] 224 | elif (layer_type_name == 'Index') and (not isinstance(func.index, tuple)): 225 | tops_dict[func] = bottoms[0] 226 | elif layer_type_name == 'BatchNorm': 227 | tops_dict[func] = name + '_' + 'scale' 228 | else: 229 | tops_dict[func] = name 230 | 231 | if hasattr(func, 'next_functions'): 232 | for u in func.next_functions: 233 | if u[0] is not None: 234 | child_type = str(type(u[0]).__name__) 235 | if child_type != 'AccumulateGrad' and (layer_type != 'AddmmBackward' or child_type != 'TransposeBackward'): 236 | father_func = u[0] 237 | if father_func not in multi_tops: 238 | multi_tops[father_func] = [] 239 | multi_tops[father_func].append(tops_dict[father_func] + '_' + tops_dict[func]) 240 | 241 | if (layer_type == 'IndexBackward') and isinstance(func.index, tuple): 242 | if father_func not in slice_point: 243 | slice_point[father_func] = [] 244 | start, stop, dim_size, axis = GetLayerParam_Index(func) 245 | 246 | """ Persume the visit of Index layers will be ascending """ 247 | if start > 0: 248 | slice_point[father_func].append(start) 249 | axis_dict[father_func] = axis 250 | 251 | """ Last slice """ 252 | # if stop == dim_size 253 | 254 | return tops_dict[func] 255 | 256 | 257 | def ConvertModel_ncnn(pytorch_net, InputShape, softmax=False): 258 | """ Pytorch to ncnn, only support single tensor input """ 259 | from ConvertLayer_ncnn import convert_ncnn 260 | 261 | """ Need forward once """ 262 | pytorch_net.eval() 263 | global inputs 264 | n, c, h, w = InputShape 265 | inputs = Variable(torch.rand(n, c, h, w), requires_grad=True) 266 | outputs = pytorch_net(inputs) 267 | 268 | if softmax: 269 | import torch.nn as nn 270 | regularize = nn.Softmax() 271 | outputs = regularize(outputs) 272 | 273 | """ Travel computational graph in backward order """ 274 | """ Need to count number of tops(indegree) of all nodes first""" 275 | global visited, tops_dict, layer_type_count, dst 276 | global multi_tops, slice_point, axis_dict 277 | 278 | visited = set() 279 | tops_dict = dict() 280 | layer_type_count = dict() 281 | multi_tops = dict() 282 | slice_point = dict() 283 | axis_dict = dict() 284 | dst = 'ncnn' 285 | 286 | for out in outputs: 287 | FindMultiTops(out.grad_fn) 288 | 289 | """ Travel computational graph in backward order """ 290 | global ncnn_net, ncnn_weights, blob_set 291 | global convert, link 292 | ncnn_net = [] 293 | ncnn_weights = [] 294 | convert = convert_ncnn 295 | link = link_ncnn 296 | 297 | visited = set() 298 | tops_dict = dict() 299 | layer_type_count = dict() 300 | blob_set = set() 301 | 302 | for out in outputs: 303 | DFS(out.grad_fn) 304 | 305 | text_net = '\n'.join(ncnn_net) 306 | """ Add layer number and blob number """ 307 | text_net = ('%d %d\n' % (len(ncnn_net), len(blob_set))) + text_net 308 | """ Add ncnn magic number """ 309 | text_net = '7767517\n' + text_net 310 | 311 | return text_net, ncnn_weights 312 | 313 | 314 | def ConvertModel_caffe(pytorch_net, InputShape, softmax=False): 315 | """ Pytorch to Caffe, only support single tensor input """ 316 | import os 317 | import caffe_pb2 as pb2 318 | from ConvertLayer_caffe import convert_caffe 319 | 320 | """ Need forward once """ 321 | pytorch_net.eval() 322 | global inputs 323 | n, c, h, w = InputShape 324 | inputs = Variable(torch.rand(n, c, h, w), requires_grad=True) 325 | outputs = pytorch_net(inputs) 326 | 327 | if softmax: 328 | import torch.nn as nn 329 | regularize = nn.Softmax() 330 | outputs = regularize(outputs) 331 | 332 | """ Travel computational graph in backward order """ 333 | """ Need to count number of tops(indegree) of all nodes first """ 334 | global visited, tops_dict, layer_type_count, dst 335 | global slice_point, multi_tops, axis_dict 336 | visited = set() 337 | tops_dict = dict() 338 | layer_type_count = dict() 339 | slice_point = dict() 340 | multi_tops = dict() 341 | axis_dict = dict() 342 | dst = 'caffe' 343 | 344 | for out in outputs: 345 | FindMultiTops(out.grad_fn) 346 | 347 | """ Travel computational graph in backward order """ 348 | global caffe_net 349 | global convert, link 350 | convert = convert_caffe 351 | link = link_caffe 352 | caffe_net = [] 353 | 354 | visited = set() 355 | tops_dict = dict() 356 | layer_type_count = dict() 357 | 358 | for out in outputs: 359 | DFS(out.grad_fn) 360 | 361 | """ Caffe input """ 362 | text_net = pb2.NetParameter() 363 | if os.environ.get("T2C_DEBUG"): 364 | text_net.debug_info = True 365 | 366 | """ Caffe layer parameters """ 367 | binary_weights = pb2.NetParameter() 368 | binary_weights.CopyFrom(text_net) 369 | for layer in caffe_net: 370 | binary_weights.layer.extend([layer]) 371 | 372 | layer_proto = pb2.LayerParameter() 373 | layer_proto.CopyFrom(layer) 374 | del layer_proto.blobs[:] 375 | text_net.layer.extend([layer_proto]) 376 | 377 | return text_net, binary_weights 378 | -------------------------------------------------------------------------------- /code/ReplaceDenormals.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ReplaceDenormals(net): 6 | for name, param in net.named_parameters(): 7 | np_arr = param.data.numpy() 8 | for x in np.nditer(np_arr, op_flags=['readwrite']): 9 | if abs(x) < 1e-30: 10 | x[...] = 1e-30 11 | param.data = torch.from_numpy(np_arr) 12 | -------------------------------------------------------------------------------- /code/caffe.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto2"; 2 | 3 | package caffe; 4 | 5 | // Specifies the shape (dimensions) of a Blob. 6 | message BlobShape { 7 | repeated int64 dim = 1 [packed = true]; 8 | } 9 | 10 | message BlobProto { 11 | optional BlobShape shape = 7; 12 | repeated float data = 5 [packed = true]; 13 | repeated float diff = 6 [packed = true]; 14 | repeated double double_data = 8 [packed = true]; 15 | repeated double double_diff = 9 [packed = true]; 16 | 17 | // 4D dimensions -- deprecated. Use "shape" instead. 18 | optional int32 num = 1 [default = 0]; 19 | optional int32 channels = 2 [default = 0]; 20 | optional int32 height = 3 [default = 0]; 21 | optional int32 width = 4 [default = 0]; 22 | } 23 | 24 | // The BlobProtoVector is simply a way to pass multiple blobproto instances 25 | // around. 26 | message BlobProtoVector { 27 | repeated BlobProto blobs = 1; 28 | } 29 | 30 | message Datum { 31 | optional int32 channels = 1; 32 | optional int32 height = 2; 33 | optional int32 width = 3; 34 | // the actual image data, in bytes 35 | optional bytes data = 4; 36 | optional int32 label = 5; 37 | // Optionally, the datum could also hold float data. 38 | repeated float float_data = 6; 39 | // If true data contains an encoded image that need to be decoded 40 | optional bool encoded = 7 [default = false]; 41 | } 42 | 43 | message FillerParameter { 44 | // The filler type. 45 | optional string type = 1 [default = 'constant']; 46 | optional float value = 2 [default = 0]; // the value in constant filler 47 | optional float min = 3 [default = 0]; // the min value in uniform filler 48 | optional float max = 4 [default = 1]; // the max value in uniform filler 49 | optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 50 | optional float std = 6 [default = 1]; // the std value in Gaussian filler 51 | // The expected number of non-zero output weights for a given input in 52 | // Gaussian filler -- the default -1 means don't perform sparsification. 53 | optional int32 sparse = 7 [default = -1]; 54 | // Normalize the filler variance by fan_in, fan_out, or their average. 55 | // Applies to 'xavier' and 'msra' fillers. 56 | enum VarianceNorm { 57 | FAN_IN = 0; 58 | FAN_OUT = 1; 59 | AVERAGE = 2; 60 | } 61 | optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 62 | } 63 | 64 | message NetParameter { 65 | optional string name = 1; // consider giving the network a name 66 | // DEPRECATED. See InputParameter. The input blobs to the network. 67 | repeated string input = 3; 68 | // DEPRECATED. See InputParameter. The shape of the input blobs. 69 | repeated BlobShape input_shape = 8; 70 | 71 | // 4D input dimensions -- deprecated. Use "input_shape" instead. 72 | // If specified, for each input blob there should be four 73 | // values specifying the num, channels, height and width of the input blob. 74 | // Thus, there should be a total of (4 * #input) numbers. 75 | repeated int32 input_dim = 4; 76 | 77 | // Whether the network will force every layer to carry out backward operation. 78 | // If set False, then whether to carry out backward is determined 79 | // automatically according to the net structure and learning rates. 80 | optional bool force_backward = 5 [default = false]; 81 | // The current "state" of the network, including the phase, level, and stage. 82 | // Some layers may be included/excluded depending on this state and the states 83 | // specified in the layers' include and exclude fields. 84 | optional NetState state = 6; 85 | 86 | // Print debugging information about results while running Net::Forward, 87 | // Net::Backward, and Net::Update. 88 | optional bool debug_info = 7 [default = false]; 89 | 90 | // The layers that make up the net. Each of their configurations, including 91 | // connectivity and behavior, is specified as a LayerParameter. 92 | repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 93 | 94 | // DEPRECATED: use 'layer' instead. 95 | repeated V1LayerParameter layers = 2; 96 | } 97 | 98 | // NOTE 99 | // Update the next available ID when you add a new SolverParameter field. 100 | // 101 | // SolverParameter next available ID: 41 (last added: type) 102 | message SolverParameter { 103 | ////////////////////////////////////////////////////////////////////////////// 104 | // Specifying the train and test networks 105 | // 106 | // Exactly one train net must be specified using one of the following fields: 107 | // train_net_param, train_net, net_param, net 108 | // One or more test nets may be specified using any of the following fields: 109 | // test_net_param, test_net, net_param, net 110 | // If more than one test net field is specified (e.g., both net and 111 | // test_net are specified), they will be evaluated in the field order given 112 | // above: (1) test_net_param, (2) test_net, (3) net_param/net. 113 | // A test_iter must be specified for each test_net. 114 | // A test_level and/or a test_stage may also be specified for each test_net. 115 | ////////////////////////////////////////////////////////////////////////////// 116 | 117 | // Proto filename for the train net, possibly combined with one or more 118 | // test nets. 119 | optional string net = 24; 120 | // Inline train net param, possibly combined with one or more test nets. 121 | optional NetParameter net_param = 25; 122 | 123 | optional string train_net = 1; // Proto filename for the train net. 124 | repeated string test_net = 2; // Proto filenames for the test nets. 125 | optional NetParameter train_net_param = 21; // Inline train net params. 126 | repeated NetParameter test_net_param = 22; // Inline test net params. 127 | 128 | // The states for the train/test nets. Must be unspecified or 129 | // specified once per net. 130 | // 131 | // By default, all states will have solver = true; 132 | // train_state will have phase = TRAIN, 133 | // and all test_state's will have phase = TEST. 134 | // Other defaults are set according to the NetState defaults. 135 | optional NetState train_state = 26; 136 | repeated NetState test_state = 27; 137 | 138 | // The number of iterations for each test net. 139 | repeated int32 test_iter = 3; 140 | 141 | // The number of iterations between two testing phases. 142 | optional int32 test_interval = 4 [default = 0]; 143 | optional bool test_compute_loss = 19 [default = false]; 144 | // If true, run an initial test pass before the first iteration, 145 | // ensuring memory availability and printing the starting value of the loss. 146 | optional bool test_initialization = 32 [default = true]; 147 | optional float base_lr = 5; // The base learning rate 148 | // the number of iterations between displaying info. If display = 0, no info 149 | // will be displayed. 150 | optional int32 display = 6; 151 | // Display the loss averaged over the last average_loss iterations 152 | optional int32 average_loss = 33 [default = 1]; 153 | optional int32 max_iter = 7; // the maximum number of iterations 154 | // accumulate gradients over `iter_size` x `batch_size` instances 155 | optional int32 iter_size = 36 [default = 1]; 156 | 157 | // The learning rate decay policy. The currently implemented learning rate 158 | // policies are as follows: 159 | // - fixed: always return base_lr. 160 | // - step: return base_lr * gamma ^ (floor(iter / step)) 161 | // - exp: return base_lr * gamma ^ iter 162 | // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 163 | // - multistep: similar to step but it allows non uniform steps defined by 164 | // stepvalue 165 | // - poly: the effective learning rate follows a polynomial decay, to be 166 | // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 167 | // - sigmoid: the effective learning rate follows a sigmod decay 168 | // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 169 | // 170 | // where base_lr, max_iter, gamma, step, stepvalue and power are defined 171 | // in the solver parameter protocol buffer, and iter is the current iteration. 172 | optional string lr_policy = 8; 173 | optional float gamma = 9; // The parameter to compute the learning rate. 174 | optional float power = 10; // The parameter to compute the learning rate. 175 | optional float momentum = 11; // The momentum value. 176 | optional float weight_decay = 12; // The weight decay. 177 | // regularization types supported: L1 and L2 178 | // controlled by weight_decay 179 | optional string regularization_type = 29 [default = "L2"]; 180 | // the stepsize for learning rate policy "step" 181 | optional int32 stepsize = 13; 182 | // the stepsize for learning rate policy "multistep" 183 | repeated int32 stepvalue = 34; 184 | 185 | // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 186 | // whenever their actual L2 norm is larger. 187 | optional float clip_gradients = 35 [default = -1]; 188 | 189 | optional int32 snapshot = 14 [default = 0]; // The snapshot interval 190 | optional string snapshot_prefix = 15; // The prefix for the snapshot. 191 | // whether to snapshot diff in the results or not. Snapshotting diff will help 192 | // debugging but the final protocol buffer size will be much larger. 193 | optional bool snapshot_diff = 16 [default = false]; 194 | enum SnapshotFormat { 195 | HDF5 = 0; 196 | BINARYPROTO = 1; 197 | } 198 | optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 199 | // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 200 | enum SolverMode { 201 | CPU = 0; 202 | GPU = 1; 203 | } 204 | optional SolverMode solver_mode = 17 [default = GPU]; 205 | // the device_id will that be used in GPU mode. Use device_id = 0 in default. 206 | optional int32 device_id = 18 [default = 0]; 207 | // If non-negative, the seed with which the Solver will initialize the Caffe 208 | // random number generator -- useful for reproducible results. Otherwise, 209 | // (and by default) initialize using a seed derived from the system clock. 210 | optional int64 random_seed = 20 [default = -1]; 211 | 212 | // type of the solver 213 | optional string type = 40 [default = "SGD"]; 214 | 215 | // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 216 | optional float delta = 31 [default = 1e-8]; 217 | // parameters for the Adam solver 218 | optional float momentum2 = 39 [default = 0.999]; 219 | 220 | // RMSProp decay value 221 | // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 222 | optional float rms_decay = 38; 223 | 224 | // If true, print information about the state of the net that may help with 225 | // debugging learning problems. 226 | optional bool debug_info = 23 [default = false]; 227 | 228 | // If false, don't save a snapshot after training finishes. 229 | optional bool snapshot_after_train = 28 [default = true]; 230 | 231 | // DEPRECATED: old solver enum types, use string instead 232 | enum SolverType { 233 | SGD = 0; 234 | NESTEROV = 1; 235 | ADAGRAD = 2; 236 | RMSPROP = 3; 237 | ADADELTA = 4; 238 | ADAM = 5; 239 | } 240 | // DEPRECATED: use type instead of solver_type 241 | optional SolverType solver_type = 30 [default = SGD]; 242 | } 243 | 244 | // A message that stores the solver snapshots 245 | message SolverState { 246 | optional int32 iter = 1; // The current iteration 247 | optional string learned_net = 2; // The file that stores the learned net. 248 | repeated BlobProto history = 3; // The history for sgd solvers 249 | optional int32 current_step = 4 [default = 0]; // The current step for learning rate 250 | } 251 | 252 | enum Phase { 253 | TRAIN = 0; 254 | TEST = 1; 255 | } 256 | 257 | message NetState { 258 | optional Phase phase = 1 [default = TEST]; 259 | optional int32 level = 2 [default = 0]; 260 | repeated string stage = 3; 261 | } 262 | 263 | message NetStateRule { 264 | // Set phase to require the NetState have a particular phase (TRAIN or TEST) 265 | // to meet this rule. 266 | optional Phase phase = 1; 267 | 268 | // Set the minimum and/or maximum levels in which the layer should be used. 269 | // Leave undefined to meet the rule regardless of level. 270 | optional int32 min_level = 2; 271 | optional int32 max_level = 3; 272 | 273 | // Customizable sets of stages to include or exclude. 274 | // The net must have ALL of the specified stages and NONE of the specified 275 | // "not_stage"s to meet the rule. 276 | // (Use multiple NetStateRules to specify conjunctions of stages.) 277 | repeated string stage = 4; 278 | repeated string not_stage = 5; 279 | } 280 | 281 | // Specifies training parameters (multipliers on global learning constants, 282 | // and the name and other settings used for weight sharing). 283 | message ParamSpec { 284 | // The names of the parameter blobs -- useful for sharing parameters among 285 | // layers, but never required otherwise. To share a parameter between two 286 | // layers, give it a (non-empty) name. 287 | optional string name = 1; 288 | 289 | // Whether to require shared weights to have the same shape, or just the same 290 | // count -- defaults to STRICT if unspecified. 291 | optional DimCheckMode share_mode = 2; 292 | enum DimCheckMode { 293 | // STRICT (default) requires that num, channels, height, width each match. 294 | STRICT = 0; 295 | // PERMISSIVE requires only the count (num*channels*height*width) to match. 296 | PERMISSIVE = 1; 297 | } 298 | 299 | // The multiplier on the global learning rate for this parameter. 300 | optional float lr_mult = 3 [default = 1.0]; 301 | 302 | // The multiplier on the global weight decay for this parameter. 303 | optional float decay_mult = 4 [default = 1.0]; 304 | } 305 | 306 | // NOTE 307 | // Update the next available ID when you add a new LayerParameter field. 308 | // 309 | // LayerParameter next available layer-specific ID: 151 (last added: box_annotator_ohem_param) 310 | message LayerParameter { 311 | optional string name = 1; // the layer name 312 | optional string type = 2; // the layer type 313 | repeated string bottom = 3; // the name of each bottom blob 314 | repeated string top = 4; // the name of each top blob 315 | 316 | // The train / test phase for computation. 317 | optional Phase phase = 10; 318 | 319 | // The amount of weight to assign each top blob in the objective. 320 | // Each layer assigns a default value, usually of either 0 or 1, 321 | // to each top blob. 322 | repeated float loss_weight = 5; 323 | 324 | // Specifies training parameters (multipliers on global learning constants, 325 | // and the name and other settings used for weight sharing). 326 | repeated ParamSpec param = 6; 327 | 328 | // The blobs containing the numeric parameters of the layer. 329 | repeated BlobProto blobs = 7; 330 | 331 | // Specifies whether to backpropagate to each bottom. If unspecified, 332 | // Caffe will automatically infer whether each input needs backpropagation 333 | // to compute parameter gradients. If set to true for some inputs, 334 | // backpropagation to those inputs is forced; if set false for some inputs, 335 | // backpropagation to those inputs is skipped. 336 | // 337 | // The size must be either 0 or equal to the number of bottoms. 338 | repeated bool propagate_down = 11; 339 | 340 | // Rules controlling whether and when a layer is included in the network, 341 | // based on the current NetState. You may specify a non-zero number of rules 342 | // to include OR exclude, but not both. If no include or exclude rules are 343 | // specified, the layer is always included. If the current NetState meets 344 | // ANY (i.e., one or more) of the specified rules, the layer is 345 | // included/excluded. 346 | repeated NetStateRule include = 8; 347 | repeated NetStateRule exclude = 9; 348 | 349 | // Parameters for data pre-processing. 350 | optional TransformationParameter transform_param = 100; 351 | 352 | // Parameters shared by loss layers. 353 | optional LossParameter loss_param = 101; 354 | 355 | // Layer type-specific parameters. 356 | // 357 | // Note: certain layers may have more than one computational engine 358 | // for their implementation. These layers include an Engine type and 359 | // engine parameter for selecting the implementation. 360 | // The default for the engine is set by the ENGINE switch at compile-time. 361 | optional AccuracyParameter accuracy_param = 102; 362 | optional ArgMaxParameter argmax_param = 103; 363 | optional BatchNormParameter batch_norm_param = 139; 364 | optional BoxAnnotatorOHEMParameter box_annotator_ohem_param = 150; 365 | optional BiasParameter bias_param = 141; 366 | optional ConcatParameter concat_param = 104; 367 | optional ContrastiveLossParameter contrastive_loss_param = 105; 368 | optional ConvolutionParameter convolution_param = 106; 369 | optional CropParameter crop_param = 144; 370 | optional DataParameter data_param = 107; 371 | optional DropoutParameter dropout_param = 108; 372 | optional DummyDataParameter dummy_data_param = 109; 373 | optional EltwiseParameter eltwise_param = 110; 374 | optional ELUParameter elu_param = 140; 375 | optional EmbedParameter embed_param = 137; 376 | optional ExpParameter exp_param = 111; 377 | optional FlattenParameter flatten_param = 135; 378 | optional HDF5DataParameter hdf5_data_param = 112; 379 | optional HDF5OutputParameter hdf5_output_param = 113; 380 | optional HingeLossParameter hinge_loss_param = 114; 381 | optional ImageDataParameter image_data_param = 115; 382 | optional InfogainLossParameter infogain_loss_param = 116; 383 | optional InnerProductParameter inner_product_param = 117; 384 | optional InputParameter input_param = 143; 385 | optional LogParameter log_param = 134; 386 | optional LRNParameter lrn_param = 118; 387 | optional MemoryDataParameter memory_data_param = 119; 388 | optional MVNParameter mvn_param = 120; 389 | optional ParameterParameter parameter_param = 145; 390 | optional PoolingParameter pooling_param = 121; 391 | optional PowerParameter power_param = 122; 392 | optional PReLUParameter prelu_param = 131; 393 | optional PSROIPoolingParameter psroi_pooling_param = 149; 394 | optional PythonParameter python_param = 130; 395 | optional RecurrentParameter recurrent_param = 146; 396 | optional ReductionParameter reduction_param = 136; 397 | optional ReLUParameter relu_param = 123; 398 | optional ReshapeParameter reshape_param = 133; 399 | optional ROIPoolingParameter roi_pooling_param = 147; 400 | optional ScaleParameter scale_param = 142; 401 | optional SigmoidParameter sigmoid_param = 124; 402 | optional SmoothL1LossParameter smooth_l1_loss_param = 148; 403 | optional SoftmaxParameter softmax_param = 125; 404 | optional SPPParameter spp_param = 132; 405 | optional SliceParameter slice_param = 126; 406 | optional TanHParameter tanh_param = 127; 407 | optional ThresholdParameter threshold_param = 128; 408 | optional TileParameter tile_param = 138; 409 | optional WindowDataParameter window_data_param = 129; 410 | optional MILDataParameter mil_data_param = 0x004d4944; //"MID" 411 | optional MILParameter mil_param = 0x004d494c; //"MIL" 412 | } 413 | 414 | // Message that stores parameters used to apply transformation 415 | // to the data layer's data 416 | message TransformationParameter { 417 | // For data pre-processing, we can do simple scaling and subtracting the 418 | // data mean, if provided. Note that the mean subtraction is always carried 419 | // out before scaling. 420 | optional float scale = 1 [default = 1]; 421 | // Specify if we want to randomly mirror data. 422 | optional bool mirror = 2 [default = false]; 423 | // Specify if we would like to randomly crop an image. 424 | optional uint32 crop_size = 3 [default = 0]; 425 | // mean_file and mean_value cannot be specified at the same time 426 | optional string mean_file = 4; 427 | // if specified can be repeated once (would substract it from all the channels) 428 | // or can be repeated the same number of times as channels 429 | // (would subtract them from the corresponding channel) 430 | repeated float mean_value = 5; 431 | // Force the decoded image to have 3 color channels. 432 | optional bool force_color = 6 [default = false]; 433 | // Force the decoded image to have 1 color channels. 434 | optional bool force_gray = 7 [default = false]; 435 | } 436 | 437 | // Message that stores parameters shared by loss layers 438 | message LossParameter { 439 | // If specified, ignore instances with the given label. 440 | optional int32 ignore_label = 1; 441 | // How to normalize the loss for loss layers that aggregate across batches, 442 | // spatial dimensions, or other dimensions. Currently only implemented in 443 | // SoftmaxWithLoss layer. 444 | enum NormalizationMode { 445 | // Divide by the number of examples in the batch times spatial dimensions. 446 | // Outputs that receive the ignore label will NOT be ignored in computing 447 | // the normalization factor. 448 | FULL = 0; 449 | // Divide by the total number of output locations that do not take the 450 | // ignore_label. If ignore_label is not set, this behaves like FULL. 451 | VALID = 1; 452 | // Divide by the batch size. 453 | BATCH_SIZE = 2; 454 | // Divide by pre-fixed normalizer 455 | PRE_FIXED = 3; 456 | // Do not normalize the loss. 457 | NONE = 4; 458 | } 459 | optional NormalizationMode normalization = 3 [default = VALID]; 460 | // Deprecated. Ignored if normalization is specified. If normalization 461 | // is not specified, then setting this to false will be equivalent to 462 | // normalization = BATCH_SIZE to be consistent with previous behavior. 463 | optional bool normalize = 2; 464 | //pre-fixed normalizer 465 | optional float pre_fixed_normalizer = 4 [default = 1]; 466 | } 467 | 468 | // Messages that store parameters used by individual layer types follow, in 469 | // alphabetical order. 470 | 471 | message AccuracyParameter { 472 | // When computing accuracy, count as correct by comparing the true label to 473 | // the top k scoring classes. By default, only compare to the top scoring 474 | // class (i.e. argmax). 475 | optional uint32 top_k = 1 [default = 1]; 476 | 477 | // The "label" axis of the prediction blob, whose argmax corresponds to the 478 | // predicted label -- may be negative to index from the end (e.g., -1 for the 479 | // last axis). For example, if axis == 1 and the predictions are 480 | // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 481 | // labels with integer values in {0, 1, ..., C-1}. 482 | optional int32 axis = 2 [default = 1]; 483 | 484 | // If specified, ignore instances with the given label. 485 | optional int32 ignore_label = 3; 486 | } 487 | 488 | message ArgMaxParameter { 489 | // If true produce pairs (argmax, maxval) 490 | optional bool out_max_val = 1 [default = false]; 491 | optional uint32 top_k = 2 [default = 1]; 492 | // The axis along which to maximise -- may be negative to index from the 493 | // end (e.g., -1 for the last axis). 494 | // By default ArgMaxLayer maximizes over the flattened trailing dimensions 495 | // for each index of the first / num dimension. 496 | optional int32 axis = 3; 497 | } 498 | 499 | message ConcatParameter { 500 | // The axis along which to concatenate -- may be negative to index from the 501 | // end (e.g., -1 for the last axis). Other axes must have the 502 | // same dimension for all the bottom blobs. 503 | // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 504 | optional int32 axis = 2 [default = 1]; 505 | 506 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 507 | optional uint32 concat_dim = 1 [default = 1]; 508 | } 509 | 510 | message BatchNormParameter { 511 | // If false, accumulate global mean/variance values via a moving average. If 512 | // true, use those accumulated values instead of computing mean/variance 513 | // across the batch. 514 | optional bool use_global_stats = 1; 515 | // How much does the moving average decay each iteration? 516 | optional float moving_average_fraction = 2 [default = .999]; 517 | // Small value to add to the variance estimate so that we don't divide by 518 | // zero. 519 | optional float eps = 3 [default = 1e-5]; 520 | } 521 | 522 | message BoxAnnotatorOHEMParameter { 523 | required uint32 roi_per_img = 1; // number of rois for training 524 | optional int32 ignore_label = 2 [default = -1]; // ignore_label in scoring 525 | } 526 | 527 | message BiasParameter { 528 | // The first axis of bottom[0] (the first input Blob) along which to apply 529 | // bottom[1] (the second input Blob). May be negative to index from the end 530 | // (e.g., -1 for the last axis). 531 | // 532 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 533 | // top[0] will have the same shape, and bottom[1] may have any of the 534 | // following shapes (for the given value of axis): 535 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 536 | // (axis == 1 == -3) 3; 3x40; 3x40x60 537 | // (axis == 2 == -2) 40; 40x60 538 | // (axis == 3 == -1) 60 539 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 540 | // "axis") -- a scalar bias. 541 | optional int32 axis = 1 [default = 1]; 542 | 543 | // (num_axes is ignored unless just one bottom is given and the bias is 544 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 545 | // number of axes by the second bottom.) 546 | // The number of axes of the input (bottom[0]) covered by the bias 547 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 548 | // Set num_axes := 0, to add a zero-axis Blob: a scalar. 549 | optional int32 num_axes = 2 [default = 1]; 550 | 551 | // (filler is ignored unless just one bottom is given and the bias is 552 | // a learned parameter of the layer.) 553 | // The initialization for the learned bias parameter. 554 | // Default is the zero (0) initialization, resulting in the BiasLayer 555 | // initially performing the identity operation. 556 | optional FillerParameter filler = 3; 557 | } 558 | 559 | message ContrastiveLossParameter { 560 | // margin for dissimilar pair 561 | optional float margin = 1 [default = 1.0]; 562 | // The first implementation of this cost did not exactly match the cost of 563 | // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 564 | // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 565 | // Hadsell paper. New models should probably use this version. 566 | // legacy_version = true uses (margin - d^2). This is kept to support / 567 | // reproduce existing models and results 568 | optional bool legacy_version = 2 [default = false]; 569 | } 570 | 571 | message ConvolutionParameter { 572 | optional uint32 num_output = 1; // The number of outputs for the layer 573 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 574 | 575 | // Pad, kernel size, and stride are all given as a single value for equal 576 | // dimensions in all spatial dimensions, or once per spatial dimension. 577 | repeated uint32 pad = 3; // The padding size; defaults to 0 578 | repeated uint32 kernel_size = 4; // The kernel size 579 | repeated uint32 stride = 6; // The stride; defaults to 1 580 | // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 581 | // holes. (Kernel dilation is sometimes referred to by its use in the 582 | // algorithme à trous from Holschneider et al. 1987.) 583 | repeated uint32 dilation = 18; // The dilation; defaults to 1 584 | 585 | // For 2D convolution only, the *_h and *_w versions may also be used to 586 | // specify both spatial dimensions. 587 | optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 588 | optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 589 | optional uint32 kernel_h = 11; // The kernel height (2D only) 590 | optional uint32 kernel_w = 12; // The kernel width (2D only) 591 | optional uint32 stride_h = 13; // The stride height (2D only) 592 | optional uint32 stride_w = 14; // The stride width (2D only) 593 | 594 | optional uint32 group = 5 [default = 1]; // The group size for group conv 595 | 596 | optional FillerParameter weight_filler = 7; // The filler for the weight 597 | optional FillerParameter bias_filler = 8; // The filler for the bias 598 | enum Engine { 599 | DEFAULT = 0; 600 | CAFFE = 1; 601 | CUDNN = 2; 602 | } 603 | optional Engine engine = 15 [default = DEFAULT]; 604 | 605 | // The axis to interpret as "channels" when performing convolution. 606 | // Preceding dimensions are treated as independent inputs; 607 | // succeeding dimensions are treated as "spatial". 608 | // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 609 | // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 610 | // groups g>1) filters across the spatial axes (H, W) of the input. 611 | // With (N, C, D, H, W) inputs, and axis == 1, we perform 612 | // N independent 3D convolutions, sliding (C/g)-channels 613 | // filters across the spatial axes (D, H, W) of the input. 614 | optional int32 axis = 16 [default = 1]; 615 | 616 | // Whether to force use of the general ND convolution, even if a specific 617 | // implementation for blobs of the appropriate number of spatial dimensions 618 | // is available. (Currently, there is only a 2D-specific convolution 619 | // implementation; for input blobs with num_axes != 2, this option is 620 | // ignored and the ND implementation will be used.) 621 | optional bool force_nd_im2col = 17 [default = false]; 622 | } 623 | 624 | message CropParameter { 625 | // To crop, elements of the first bottom are selected to fit the dimensions 626 | // of the second, reference bottom. The crop is configured by 627 | // - the crop `axis` to pick the dimensions for cropping 628 | // - the crop `offset` to set the shift for all/each dimension 629 | // to align the cropped bottom with the reference bottom. 630 | // All dimensions up to but excluding `axis` are preserved, while 631 | // the dimensions including and trailing `axis` are cropped. 632 | // If only one `offset` is set, then all dimensions are offset by this amount. 633 | // Otherwise, the number of offsets must equal the number of cropped axes to 634 | // shift the crop in each dimension accordingly. 635 | // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 636 | // and `axis` may be negative to index from the end (e.g., -1 for the last 637 | // axis). 638 | optional int32 axis = 1 [default = 2]; 639 | repeated uint32 offset = 2; 640 | } 641 | 642 | message DataParameter { 643 | enum DB { 644 | LEVELDB = 0; 645 | LMDB = 1; 646 | } 647 | // Specify the data source. 648 | optional string source = 1; 649 | // Specify the batch size. 650 | optional uint32 batch_size = 4; 651 | // The rand_skip variable is for the data layer to skip a few data points 652 | // to avoid all asynchronous sgd clients to start at the same point. The skip 653 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 654 | // be larger than the number of keys in the database. 655 | // DEPRECATED. Each solver accesses a different subset of the database. 656 | optional uint32 rand_skip = 7 [default = 0]; 657 | optional DB backend = 8 [default = LEVELDB]; 658 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 659 | // simple scaling and subtracting the data mean, if provided. Note that the 660 | // mean subtraction is always carried out before scaling. 661 | optional float scale = 2 [default = 1]; 662 | optional string mean_file = 3; 663 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 664 | // crop an image. 665 | optional uint32 crop_size = 5 [default = 0]; 666 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 667 | // data. 668 | optional bool mirror = 6 [default = false]; 669 | // Force the encoded image to have 3 color channels 670 | optional bool force_encoded_color = 9 [default = false]; 671 | // Prefetch queue (Number of batches to prefetch to host memory, increase if 672 | // data access bandwidth varies). 673 | optional uint32 prefetch = 10 [default = 4]; 674 | } 675 | 676 | message DropoutParameter { 677 | optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 678 | optional bool scale_train = 2 [default = true]; // scale train or test phase 679 | } 680 | 681 | // DummyDataLayer fills any number of arbitrarily shaped blobs with random 682 | // (or constant) data generated by "Fillers" (see "message FillerParameter"). 683 | message DummyDataParameter { 684 | // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 685 | // shape fields, and 0, 1 or N data_fillers. 686 | // 687 | // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 688 | // If 1 data_filler is specified, it is applied to all top blobs. If N are 689 | // specified, the ith is applied to the ith top blob. 690 | repeated FillerParameter data_filler = 1; 691 | repeated BlobShape shape = 6; 692 | 693 | // 4D dimensions -- deprecated. Use "shape" instead. 694 | repeated uint32 num = 2; 695 | repeated uint32 channels = 3; 696 | repeated uint32 height = 4; 697 | repeated uint32 width = 5; 698 | } 699 | 700 | message EltwiseParameter { 701 | enum EltwiseOp { 702 | PROD = 0; 703 | SUM = 1; 704 | MAX = 2; 705 | } 706 | optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 707 | repeated float coeff = 2; // blob-wise coefficient for SUM operation 708 | 709 | // Whether to use an asymptotically slower (for >2 inputs) but stabler method 710 | // of computing the gradient for the PROD operation. (No effect for SUM op.) 711 | optional bool stable_prod_grad = 3 [default = true]; 712 | } 713 | 714 | // Message that stores parameters used by ELULayer 715 | message ELUParameter { 716 | // Described in: 717 | // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 718 | // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 719 | optional float alpha = 1 [default = 1]; 720 | } 721 | 722 | // Message that stores parameters used by EmbedLayer 723 | message EmbedParameter { 724 | optional uint32 num_output = 1; // The number of outputs for the layer 725 | // The input is given as integers to be interpreted as one-hot 726 | // vector indices with dimension num_input. Hence num_input should be 727 | // 1 greater than the maximum possible input value. 728 | optional uint32 input_dim = 2; 729 | 730 | optional bool bias_term = 3 [default = true]; // Whether to use a bias term 731 | optional FillerParameter weight_filler = 4; // The filler for the weight 732 | optional FillerParameter bias_filler = 5; // The filler for the bias 733 | 734 | } 735 | 736 | // Message that stores parameters used by ExpLayer 737 | message ExpParameter { 738 | // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 739 | // Or if base is set to the default (-1), base is set to e, 740 | // so y = exp(shift + scale * x). 741 | optional float base = 1 [default = -1.0]; 742 | optional float scale = 2 [default = 1.0]; 743 | optional float shift = 3 [default = 0.0]; 744 | } 745 | 746 | /// Message that stores parameters used by FlattenLayer 747 | message FlattenParameter { 748 | // The first axis to flatten: all preceding axes are retained in the output. 749 | // May be negative to index from the end (e.g., -1 for the last axis). 750 | optional int32 axis = 1 [default = 1]; 751 | 752 | // The last axis to flatten: all following axes are retained in the output. 753 | // May be negative to index from the end (e.g., the default -1 for the last 754 | // axis). 755 | optional int32 end_axis = 2 [default = -1]; 756 | } 757 | 758 | // Message that stores parameters used by HDF5DataLayer 759 | message HDF5DataParameter { 760 | // Specify the data source. 761 | optional string source = 1; 762 | // Specify the batch size. 763 | optional uint32 batch_size = 2; 764 | 765 | // Specify whether to shuffle the data. 766 | // If shuffle == true, the ordering of the HDF5 files is shuffled, 767 | // and the ordering of data within any given HDF5 file is shuffled, 768 | // but data between different files are not interleaved; all of a file's 769 | // data are output (in a random order) before moving onto another file. 770 | optional bool shuffle = 3 [default = false]; 771 | } 772 | 773 | message HDF5OutputParameter { 774 | optional string file_name = 1; 775 | } 776 | 777 | message HingeLossParameter { 778 | enum Norm { 779 | L1 = 1; 780 | L2 = 2; 781 | } 782 | // Specify the Norm to use L1 or L2 783 | optional Norm norm = 1 [default = L1]; 784 | } 785 | 786 | message ImageDataParameter { 787 | // Specify the data source. 788 | optional string source = 1; 789 | // Specify the batch size. 790 | optional uint32 batch_size = 4 [default = 1]; 791 | // The rand_skip variable is for the data layer to skip a few data points 792 | // to avoid all asynchronous sgd clients to start at the same point. The skip 793 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 794 | // be larger than the number of keys in the database. 795 | optional uint32 rand_skip = 7 [default = 0]; 796 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 797 | optional bool shuffle = 8 [default = false]; 798 | // It will also resize images if new_height or new_width are not zero. 799 | optional uint32 new_height = 9 [default = 0]; 800 | optional uint32 new_width = 10 [default = 0]; 801 | // Specify if the images are color or gray 802 | optional bool is_color = 11 [default = true]; 803 | // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 804 | // simple scaling and subtracting the data mean, if provided. Note that the 805 | // mean subtraction is always carried out before scaling. 806 | optional float scale = 2 [default = 1]; 807 | optional string mean_file = 3; 808 | // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 809 | // crop an image. 810 | optional uint32 crop_size = 5 [default = 0]; 811 | // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 812 | // data. 813 | optional bool mirror = 6 [default = false]; 814 | optional string root_folder = 12 [default = ""]; 815 | } 816 | 817 | message InfogainLossParameter { 818 | // Specify the infogain matrix source. 819 | optional string source = 1; 820 | } 821 | 822 | message InnerProductParameter { 823 | optional uint32 num_output = 1; // The number of outputs for the layer 824 | optional bool bias_term = 2 [default = true]; // whether to have bias terms 825 | optional FillerParameter weight_filler = 3; // The filler for the weight 826 | optional FillerParameter bias_filler = 4; // The filler for the bias 827 | 828 | // The first axis to be lumped into a single inner product computation; 829 | // all preceding axes are retained in the output. 830 | // May be negative to index from the end (e.g., -1 for the last axis). 831 | optional int32 axis = 5 [default = 1]; 832 | // Specify whether to transpose the weight matrix or not. 833 | // If transpose == true, any operations will be performed on the transpose 834 | // of the weight matrix. The weight matrix itself is not going to be transposed 835 | // but rather the transfer flag of operations will be toggled accordingly. 836 | optional bool transpose = 6 [default = false]; 837 | } 838 | 839 | message InputParameter { 840 | // This layer produces N >= 1 top blob(s) to be assigned manually. 841 | // Define N shapes to set a shape for each top. 842 | // Define 1 shape to set the same shape for every top. 843 | // Define no shape to defer to reshaping manually. 844 | repeated BlobShape shape = 1; 845 | } 846 | 847 | // Message that stores parameters used by LogLayer 848 | message LogParameter { 849 | // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 850 | // Or if base is set to the default (-1), base is set to e, 851 | // so y = ln(shift + scale * x) = log_e(shift + scale * x) 852 | optional float base = 1 [default = -1.0]; 853 | optional float scale = 2 [default = 1.0]; 854 | optional float shift = 3 [default = 0.0]; 855 | } 856 | 857 | // Message that stores parameters used by LRNLayer 858 | message LRNParameter { 859 | optional uint32 local_size = 1 [default = 5]; 860 | optional float alpha = 2 [default = 1.]; 861 | optional float beta = 3 [default = 0.75]; 862 | enum NormRegion { 863 | ACROSS_CHANNELS = 0; 864 | WITHIN_CHANNEL = 1; 865 | } 866 | optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 867 | optional float k = 5 [default = 1.]; 868 | enum Engine { 869 | DEFAULT = 0; 870 | CAFFE = 1; 871 | CUDNN = 2; 872 | } 873 | optional Engine engine = 6 [default = DEFAULT]; 874 | } 875 | 876 | message MemoryDataParameter { 877 | optional uint32 batch_size = 1; 878 | optional uint32 channels = 2; 879 | optional uint32 height = 3; 880 | optional uint32 width = 4; 881 | } 882 | 883 | message MVNParameter { 884 | // This parameter can be set to false to normalize mean only 885 | optional bool normalize_variance = 1 [default = true]; 886 | 887 | // This parameter can be set to true to perform DNN-like MVN 888 | optional bool across_channels = 2 [default = false]; 889 | 890 | // Epsilon for not dividing by zero while normalizing variance 891 | optional float eps = 3 [default = 1e-9]; 892 | } 893 | 894 | message ParameterParameter { 895 | optional BlobShape shape = 1; 896 | } 897 | 898 | message PoolingParameter { 899 | enum PoolMethod { 900 | MAX = 0; 901 | AVE = 1; 902 | STOCHASTIC = 2; 903 | } 904 | optional PoolMethod pool = 1 [default = MAX]; // The pooling method 905 | // Pad, kernel size, and stride are all given as a single value for equal 906 | // dimensions in height and width or as Y, X pairs. 907 | optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 908 | optional uint32 pad_h = 9 [default = 0]; // The padding height 909 | optional uint32 pad_w = 10 [default = 0]; // The padding width 910 | optional uint32 kernel_size = 2; // The kernel size (square) 911 | optional uint32 kernel_h = 5; // The kernel height 912 | optional uint32 kernel_w = 6; // The kernel width 913 | optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 914 | optional uint32 stride_h = 7; // The stride height 915 | optional uint32 stride_w = 8; // The stride width 916 | enum Engine { 917 | DEFAULT = 0; 918 | CAFFE = 1; 919 | CUDNN = 2; 920 | } 921 | optional Engine engine = 11 [default = DEFAULT]; 922 | // If global_pooling then it will pool over the size of the bottom by doing 923 | // kernel_h = bottom->height and kernel_w = bottom->width 924 | optional bool global_pooling = 12 [default = false]; 925 | } 926 | 927 | message PowerParameter { 928 | // PowerLayer computes outputs y = (shift + scale * x) ^ power. 929 | optional float power = 1 [default = 1.0]; 930 | optional float scale = 2 [default = 1.0]; 931 | optional float shift = 3 [default = 0.0]; 932 | } 933 | 934 | message PSROIPoolingParameter { 935 | required float spatial_scale = 1; 936 | required int32 output_dim = 2; // output channel number 937 | required int32 group_size = 3; // number of groups to encode position-sensitive score maps 938 | } 939 | 940 | message PythonParameter { 941 | optional string module = 1; 942 | optional string layer = 2; 943 | // This value is set to the attribute `param_str` of the `PythonLayer` object 944 | // in Python before calling the `setup()` method. This could be a number, 945 | // string, dictionary in Python dict format, JSON, etc. You may parse this 946 | // string in `setup` method and use it in `forward` and `backward`. 947 | optional string param_str = 3 [default = '']; 948 | // Whether this PythonLayer is shared among worker solvers during data parallelism. 949 | // If true, each worker solver sequentially run forward from this layer. 950 | // This value should be set true if you are using it as a data layer. 951 | optional bool share_in_parallel = 4 [default = false]; 952 | } 953 | 954 | // Message that stores parameters used by RecurrentLayer 955 | message RecurrentParameter { 956 | // The dimension of the output (and usually hidden state) representation -- 957 | // must be explicitly set to non-zero. 958 | optional uint32 num_output = 1 [default = 0]; 959 | 960 | optional FillerParameter weight_filler = 2; // The filler for the weight 961 | optional FillerParameter bias_filler = 3; // The filler for the bias 962 | 963 | // Whether to enable displaying debug_info in the unrolled recurrent net. 964 | optional bool debug_info = 4 [default = false]; 965 | 966 | // Whether to add as additional inputs (bottoms) the initial hidden state 967 | // blobs, and add as additional outputs (tops) the final timestep hidden state 968 | // blobs. The number of additional bottom/top blobs required depends on the 969 | // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 970 | optional bool expose_hidden = 5 [default = false]; 971 | } 972 | 973 | // Message that stores parameters used by ReductionLayer 974 | message ReductionParameter { 975 | enum ReductionOp { 976 | SUM = 1; 977 | ASUM = 2; 978 | SUMSQ = 3; 979 | MEAN = 4; 980 | } 981 | 982 | optional ReductionOp operation = 1 [default = SUM]; // reduction operation 983 | 984 | // The first axis to reduce to a scalar -- may be negative to index from the 985 | // end (e.g., -1 for the last axis). 986 | // (Currently, only reduction along ALL "tail" axes is supported; reduction 987 | // of axis M through N, where N < num_axes - 1, is unsupported.) 988 | // Suppose we have an n-axis bottom Blob with shape: 989 | // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 990 | // If axis == m, the output Blob will have shape 991 | // (d0, d1, d2, ..., d(m-1)), 992 | // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 993 | // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 994 | // If axis == 0 (the default), the output Blob always has the empty shape 995 | // (count 1), performing reduction across the entire input -- 996 | // often useful for creating new loss functions. 997 | optional int32 axis = 2 [default = 0]; 998 | 999 | optional float coeff = 3 [default = 1.0]; // coefficient for output 1000 | } 1001 | 1002 | // Message that stores parameters used by ReLULayer 1003 | message ReLUParameter { 1004 | // Allow non-zero slope for negative inputs to speed up optimization 1005 | // Described in: 1006 | // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1007 | // improve neural network acoustic models. In ICML Workshop on Deep Learning 1008 | // for Audio, Speech, and Language Processing. 1009 | optional float negative_slope = 1 [default = 0]; 1010 | enum Engine { 1011 | DEFAULT = 0; 1012 | CAFFE = 1; 1013 | CUDNN = 2; 1014 | } 1015 | optional Engine engine = 2 [default = DEFAULT]; 1016 | } 1017 | 1018 | message ReshapeParameter { 1019 | // Specify the output dimensions. If some of the dimensions are set to 0, 1020 | // the corresponding dimension from the bottom layer is used (unchanged). 1021 | // Exactly one dimension may be set to -1, in which case its value is 1022 | // inferred from the count of the bottom blob and the remaining dimensions. 1023 | // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1024 | // 1025 | // layer { 1026 | // type: "Reshape" bottom: "input" top: "output" 1027 | // reshape_param { ... } 1028 | // } 1029 | // 1030 | // If "input" is 2D with shape 2 x 8, then the following reshape_param 1031 | // specifications are all equivalent, producing a 3D blob "output" with shape 1032 | // 2 x 2 x 4: 1033 | // 1034 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1035 | // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1036 | // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1037 | // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1038 | // 1039 | optional BlobShape shape = 1; 1040 | 1041 | // axis and num_axes control the portion of the bottom blob's shape that are 1042 | // replaced by (included in) the reshape. By default (axis == 0 and 1043 | // num_axes == -1), the entire bottom blob shape is included in the reshape, 1044 | // and hence the shape field must specify the entire output shape. 1045 | // 1046 | // axis may be non-zero to retain some portion of the beginning of the input 1047 | // shape (and may be negative to index from the end; e.g., -1 to begin the 1048 | // reshape after the last axis, including nothing in the reshape, 1049 | // -2 to include only the last axis, etc.). 1050 | // 1051 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1052 | // Then the following ReshapeLayer specifications are all equivalent, 1053 | // producing a blob "output" with shape 2 x 2 x 4: 1054 | // 1055 | // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1056 | // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1057 | // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1058 | // 1059 | // num_axes specifies the extent of the reshape. 1060 | // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1061 | // input axes in the range [axis, axis+num_axes]. 1062 | // num_axes may also be -1, the default, to include all remaining axes 1063 | // (starting from axis). 1064 | // 1065 | // For example, suppose "input" is a 2D blob with shape 2 x 8. 1066 | // Then the following ReshapeLayer specifications are equivalent, 1067 | // producing a blob "output" with shape 1 x 2 x 8. 1068 | // 1069 | // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1070 | // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1071 | // reshape_param { shape { dim: 1 } num_axes: 0 } 1072 | // 1073 | // On the other hand, these would produce output blob shape 2 x 1 x 8: 1074 | // 1075 | // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1076 | // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1077 | // 1078 | optional int32 axis = 2 [default = 0]; 1079 | optional int32 num_axes = 3 [default = -1]; 1080 | } 1081 | 1082 | // Message that stores parameters used by ROIPoolingLayer 1083 | message ROIPoolingParameter { 1084 | // Pad, kernel size, and stride are all given as a single value for equal 1085 | // dimensions in height and width or as Y, X pairs. 1086 | optional uint32 pooled_h = 1 [default = 0]; // The pooled output height 1087 | optional uint32 pooled_w = 2 [default = 0]; // The pooled output width 1088 | // Multiplicative spatial scale factor to translate ROI coords from their 1089 | // input scale to the scale used when pooling 1090 | optional float spatial_scale = 3 [default = 1]; 1091 | } 1092 | 1093 | message ScaleParameter { 1094 | // The first axis of bottom[0] (the first input Blob) along which to apply 1095 | // bottom[1] (the second input Blob). May be negative to index from the end 1096 | // (e.g., -1 for the last axis). 1097 | // 1098 | // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1099 | // top[0] will have the same shape, and bottom[1] may have any of the 1100 | // following shapes (for the given value of axis): 1101 | // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1102 | // (axis == 1 == -3) 3; 3x40; 3x40x60 1103 | // (axis == 2 == -2) 40; 40x60 1104 | // (axis == 3 == -1) 60 1105 | // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1106 | // "axis") -- a scalar multiplier. 1107 | optional int32 axis = 1 [default = 1]; 1108 | 1109 | // (num_axes is ignored unless just one bottom is given and the scale is 1110 | // a learned parameter of the layer. Otherwise, num_axes is determined by the 1111 | // number of axes by the second bottom.) 1112 | // The number of axes of the input (bottom[0]) covered by the scale 1113 | // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1114 | // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1115 | optional int32 num_axes = 2 [default = 1]; 1116 | 1117 | // (filler is ignored unless just one bottom is given and the scale is 1118 | // a learned parameter of the layer.) 1119 | // The initialization for the learned scale parameter. 1120 | // Default is the unit (1) initialization, resulting in the ScaleLayer 1121 | // initially performing the identity operation. 1122 | optional FillerParameter filler = 3; 1123 | 1124 | // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1125 | // may be more efficient). Initialized with bias_filler (defaults to 0). 1126 | optional bool bias_term = 4 [default = false]; 1127 | optional FillerParameter bias_filler = 5; 1128 | } 1129 | 1130 | message SigmoidParameter { 1131 | enum Engine { 1132 | DEFAULT = 0; 1133 | CAFFE = 1; 1134 | CUDNN = 2; 1135 | } 1136 | optional Engine engine = 1 [default = DEFAULT]; 1137 | } 1138 | 1139 | message SliceParameter { 1140 | // The axis along which to slice -- may be negative to index from the end 1141 | // (e.g., -1 for the last axis). 1142 | // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1143 | optional int32 axis = 3 [default = 1]; 1144 | repeated uint32 slice_point = 2; 1145 | 1146 | // DEPRECATED: alias for "axis" -- does not support negative indexing. 1147 | optional uint32 slice_dim = 1 [default = 1]; 1148 | } 1149 | 1150 | message SmoothL1LossParameter { 1151 | // SmoothL1Loss(x) = 1152 | // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma 1153 | // |x| - 0.5 / sigma / sigma -- otherwise 1154 | optional float sigma = 1 [default = 1]; 1155 | } 1156 | 1157 | // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1158 | message SoftmaxParameter { 1159 | enum Engine { 1160 | DEFAULT = 0; 1161 | CAFFE = 1; 1162 | CUDNN = 2; 1163 | } 1164 | optional Engine engine = 1 [default = DEFAULT]; 1165 | 1166 | // The axis along which to perform the softmax -- may be negative to index 1167 | // from the end (e.g., -1 for the last axis). 1168 | // Any other axes will be evaluated as independent softmaxes. 1169 | optional int32 axis = 2 [default = 1]; 1170 | } 1171 | 1172 | message TanHParameter { 1173 | enum Engine { 1174 | DEFAULT = 0; 1175 | CAFFE = 1; 1176 | CUDNN = 2; 1177 | } 1178 | optional Engine engine = 1 [default = DEFAULT]; 1179 | } 1180 | 1181 | // Message that stores parameters used by TileLayer 1182 | message TileParameter { 1183 | // The index of the axis to tile. 1184 | optional int32 axis = 1 [default = 1]; 1185 | 1186 | // The number of copies (tiles) of the blob to output. 1187 | optional int32 tiles = 2; 1188 | } 1189 | 1190 | // Message that stores parameters used by ThresholdLayer 1191 | message ThresholdParameter { 1192 | optional float threshold = 1 [default = 0]; // Strictly positive values 1193 | } 1194 | 1195 | // Message that stores parameters used by MILLayer 1196 | message MILParameter { 1197 | enum MILType { 1198 | MAX = 0; 1199 | NOR = 1; 1200 | } 1201 | optional MILType type = 1 [default = MAX]; // The MIL method 1202 | } 1203 | 1204 | 1205 | message WindowDataParameter { 1206 | // Specify the data source. 1207 | optional string source = 1; 1208 | // For data pre-processing, we can do simple scaling and subtracting the 1209 | // data mean, if provided. Note that the mean subtraction is always carried 1210 | // out before scaling. 1211 | optional float scale = 2 [default = 1]; 1212 | optional string mean_file = 3; 1213 | // Specify the batch size. 1214 | optional uint32 batch_size = 4; 1215 | // Specify if we would like to randomly crop an image. 1216 | optional uint32 crop_size = 5 [default = 0]; 1217 | // Specify if we want to randomly mirror data. 1218 | optional bool mirror = 6 [default = false]; 1219 | // Foreground (object) overlap threshold 1220 | optional float fg_threshold = 7 [default = 0.5]; 1221 | // Background (non-object) overlap threshold 1222 | optional float bg_threshold = 8 [default = 0.5]; 1223 | // Fraction of batch that should be foreground objects 1224 | optional float fg_fraction = 9 [default = 0.25]; 1225 | // Amount of contextual padding to add around a window 1226 | // (used only by the window_data_layer) 1227 | optional uint32 context_pad = 10 [default = 0]; 1228 | // Mode for cropping out a detection window 1229 | // warp: cropped window is warped to a fixed size and aspect ratio 1230 | // square: the tightest square around the window is cropped 1231 | optional string crop_mode = 11 [default = "warp"]; 1232 | // cache_images: will load all images in memory for faster access 1233 | optional bool cache_images = 12 [default = false]; 1234 | // append root_folder to locate images 1235 | optional string root_folder = 13 [default = ""]; 1236 | } 1237 | 1238 | message MILDataParameter { 1239 | // Specify the data source. 1240 | optional string source = 1; 1241 | 1242 | // Number of scales for each image 1243 | optional uint32 num_scales = 2 [default = 1]; 1244 | 1245 | // Side length ratio between neighbouring scales 1246 | optional float scale_factor = 6 [default = 1]; 1247 | 1248 | // Number of channels in the image 1249 | optional uint32 channels = 4 [default = 3]; 1250 | 1251 | // Specify the number of images per batch 1252 | optional uint32 images_per_batch = 3; 1253 | // Specify the number of classes 1254 | optional uint32 n_classes = 5; 1255 | // specify the box_dir and label_dir 1256 | optional string label_file = 7; 1257 | 1258 | // Root directory which contains all the images 1259 | optional string root_dir = 11; 1260 | // Extention for the file 1261 | optional string ext = 12; 1262 | 1263 | // To randomize or not 1264 | optional bool randomize = 13 [default = true]; 1265 | } 1266 | 1267 | 1268 | 1269 | message SPPParameter { 1270 | enum PoolMethod { 1271 | MAX = 0; 1272 | AVE = 1; 1273 | STOCHASTIC = 2; 1274 | } 1275 | optional uint32 pyramid_height = 1; 1276 | optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1277 | enum Engine { 1278 | DEFAULT = 0; 1279 | CAFFE = 1; 1280 | CUDNN = 2; 1281 | } 1282 | optional Engine engine = 6 [default = DEFAULT]; 1283 | } 1284 | 1285 | // DEPRECATED: use LayerParameter. 1286 | message V1LayerParameter { 1287 | repeated string bottom = 2; 1288 | repeated string top = 3; 1289 | optional string name = 4; 1290 | repeated NetStateRule include = 32; 1291 | repeated NetStateRule exclude = 33; 1292 | enum LayerType { 1293 | NONE = 0; 1294 | ABSVAL = 35; 1295 | ACCURACY = 1; 1296 | ARGMAX = 30; 1297 | BNLL = 2; 1298 | CONCAT = 3; 1299 | CONTRASTIVE_LOSS = 37; 1300 | CONVOLUTION = 4; 1301 | DATA = 5; 1302 | DECONVOLUTION = 39; 1303 | DROPOUT = 6; 1304 | DUMMY_DATA = 32; 1305 | EUCLIDEAN_LOSS = 7; 1306 | ELTWISE = 25; 1307 | EXP = 38; 1308 | FLATTEN = 8; 1309 | HDF5_DATA = 9; 1310 | HDF5_OUTPUT = 10; 1311 | HINGE_LOSS = 28; 1312 | IM2COL = 11; 1313 | IMAGE_DATA = 12; 1314 | INFOGAIN_LOSS = 13; 1315 | INNER_PRODUCT = 14; 1316 | LRN = 15; 1317 | MEMORY_DATA = 29; 1318 | MULTINOMIAL_LOGISTIC_LOSS = 16; 1319 | MVN = 34; 1320 | POOLING = 17; 1321 | POWER = 26; 1322 | RELU = 18; 1323 | SIGMOID = 19; 1324 | SIGMOID_CROSS_ENTROPY_LOSS = 27; 1325 | SILENCE = 36; 1326 | SOFTMAX = 20; 1327 | SOFTMAX_LOSS = 21; 1328 | SPLIT = 22; 1329 | SLICE = 33; 1330 | TANH = 23; 1331 | WINDOW_DATA = 24; 1332 | THRESHOLD = 31; 1333 | } 1334 | optional LayerType type = 5; 1335 | repeated BlobProto blobs = 6; 1336 | repeated string param = 1001; 1337 | repeated DimCheckMode blob_share_mode = 1002; 1338 | enum DimCheckMode { 1339 | STRICT = 0; 1340 | PERMISSIVE = 1; 1341 | } 1342 | repeated float blobs_lr = 7; 1343 | repeated float weight_decay = 8; 1344 | repeated float loss_weight = 35; 1345 | optional AccuracyParameter accuracy_param = 27; 1346 | optional ArgMaxParameter argmax_param = 23; 1347 | optional ConcatParameter concat_param = 9; 1348 | optional ContrastiveLossParameter contrastive_loss_param = 40; 1349 | optional ConvolutionParameter convolution_param = 10; 1350 | optional DataParameter data_param = 11; 1351 | optional DropoutParameter dropout_param = 12; 1352 | optional DummyDataParameter dummy_data_param = 26; 1353 | optional EltwiseParameter eltwise_param = 24; 1354 | optional ExpParameter exp_param = 41; 1355 | optional HDF5DataParameter hdf5_data_param = 13; 1356 | optional HDF5OutputParameter hdf5_output_param = 14; 1357 | optional HingeLossParameter hinge_loss_param = 29; 1358 | optional ImageDataParameter image_data_param = 15; 1359 | optional InfogainLossParameter infogain_loss_param = 16; 1360 | optional InnerProductParameter inner_product_param = 17; 1361 | optional LRNParameter lrn_param = 18; 1362 | optional MemoryDataParameter memory_data_param = 22; 1363 | optional MVNParameter mvn_param = 34; 1364 | optional PoolingParameter pooling_param = 19; 1365 | optional PowerParameter power_param = 21; 1366 | optional ReLUParameter relu_param = 30; 1367 | optional SigmoidParameter sigmoid_param = 38; 1368 | optional SoftmaxParameter softmax_param = 39; 1369 | optional SliceParameter slice_param = 31; 1370 | optional TanHParameter tanh_param = 37; 1371 | optional ThresholdParameter threshold_param = 25; 1372 | optional WindowDataParameter window_data_param = 20; 1373 | optional TransformationParameter transform_param = 36; 1374 | optional LossParameter loss_param = 42; 1375 | optional V0LayerParameter layer = 1; 1376 | } 1377 | 1378 | // DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1379 | // in Caffe. We keep this message type around for legacy support. 1380 | message V0LayerParameter { 1381 | optional string name = 1; // the layer name 1382 | optional string type = 2; // the string to specify the layer type 1383 | 1384 | // Parameters to specify layers with inner products. 1385 | optional uint32 num_output = 3; // The number of outputs for the layer 1386 | optional bool biasterm = 4 [default = true]; // whether to have bias terms 1387 | optional FillerParameter weight_filler = 5; // The filler for the weight 1388 | optional FillerParameter bias_filler = 6; // The filler for the bias 1389 | 1390 | optional uint32 pad = 7 [default = 0]; // The padding size 1391 | optional uint32 kernelsize = 8; // The kernel size 1392 | optional uint32 group = 9 [default = 1]; // The group size for group conv 1393 | optional uint32 stride = 10 [default = 1]; // The stride 1394 | enum PoolMethod { 1395 | MAX = 0; 1396 | AVE = 1; 1397 | STOCHASTIC = 2; 1398 | } 1399 | optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1400 | optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1401 | 1402 | optional uint32 local_size = 13 [default = 5]; // for local response norm 1403 | optional float alpha = 14 [default = 1.]; // for local response norm 1404 | optional float beta = 15 [default = 0.75]; // for local response norm 1405 | optional float k = 22 [default = 1.]; 1406 | 1407 | // For data layers, specify the data source 1408 | optional string source = 16; 1409 | // For data pre-processing, we can do simple scaling and subtracting the 1410 | // data mean, if provided. Note that the mean subtraction is always carried 1411 | // out before scaling. 1412 | optional float scale = 17 [default = 1]; 1413 | optional string meanfile = 18; 1414 | // For data layers, specify the batch size. 1415 | optional uint32 batchsize = 19; 1416 | // For data layers, specify if we would like to randomly crop an image. 1417 | optional uint32 cropsize = 20 [default = 0]; 1418 | // For data layers, specify if we want to randomly mirror data. 1419 | optional bool mirror = 21 [default = false]; 1420 | 1421 | // The blobs containing the numeric parameters of the layer 1422 | repeated BlobProto blobs = 50; 1423 | // The ratio that is multiplied on the global learning rate. If you want to 1424 | // set the learning ratio for one blob, you need to set it for all blobs. 1425 | repeated float blobs_lr = 51; 1426 | // The weight decay that is multiplied on the global weight decay. 1427 | repeated float weight_decay = 52; 1428 | 1429 | // The rand_skip variable is for the data layer to skip a few data points 1430 | // to avoid all asynchronous sgd clients to start at the same point. The skip 1431 | // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1432 | // be larger than the number of keys in the database. 1433 | optional uint32 rand_skip = 53 [default = 0]; 1434 | 1435 | // Fields related to detection (det_*) 1436 | // foreground (object) overlap threshold 1437 | optional float det_fg_threshold = 54 [default = 0.5]; 1438 | // background (non-object) overlap threshold 1439 | optional float det_bg_threshold = 55 [default = 0.5]; 1440 | // Fraction of batch that should be foreground objects 1441 | optional float det_fg_fraction = 56 [default = 0.25]; 1442 | 1443 | // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1444 | 1445 | // Amount of contextual padding to add around a window 1446 | // (used only by the window_data_layer) 1447 | optional uint32 det_context_pad = 58 [default = 0]; 1448 | 1449 | // Mode for cropping out a detection window 1450 | // warp: cropped window is warped to a fixed size and aspect ratio 1451 | // square: the tightest square around the window is cropped 1452 | optional string det_crop_mode = 59 [default = "warp"]; 1453 | 1454 | // For ReshapeLayer, one needs to specify the new dimensions. 1455 | optional int32 new_num = 60 [default = 0]; 1456 | optional int32 new_channels = 61 [default = 0]; 1457 | optional int32 new_height = 62 [default = 0]; 1458 | optional int32 new_width = 63 [default = 0]; 1459 | 1460 | // Whether or not ImageLayer should shuffle the list of files at every epoch. 1461 | // It will also resize images if new_height or new_width are not zero. 1462 | optional bool shuffle_images = 64 [default = false]; 1463 | 1464 | // For ConcatLayer, one needs to specify the dimension for concatenation, and 1465 | // the other dimensions must be the same for all the bottom blobs. 1466 | // By default it will concatenate blobs along the channels dimension. 1467 | optional uint32 concat_dim = 65 [default = 1]; 1468 | 1469 | optional HDF5OutputParameter hdf5_output_param = 1001; 1470 | } 1471 | 1472 | message PReLUParameter { 1473 | // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1474 | // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1475 | 1476 | // Initial value of a_i. Default is a_i=0.25 for all i. 1477 | optional FillerParameter filler = 1; 1478 | // Whether or not slope paramters are shared across channels. 1479 | optional bool channel_shared = 2 [default = false]; 1480 | } 1481 | -------------------------------------------------------------------------------- /code/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2017-present, starime. 3 | All rights reserved. 4 | 5 | This source code is licensed under the BSD-style license found in the 6 | LICENSE file in the root directory of this source tree. An additional grant 7 | of patent rights can be found in the PATENTS file in the same directory. 8 | """ 9 | 10 | import os 11 | import torch 12 | import torch._utils 13 | try: 14 | torch._utils._rebuild_tensor_v2 15 | except AttributeError: 16 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): 17 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) 18 | tensor.requires_grad = requires_grad 19 | tensor._backward_hooks = backward_hooks 20 | return tensor 21 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 22 | 23 | import torchvision 24 | 25 | from ConvertModel import ConvertModel_caffe 26 | from ConvertModel import ConvertModel_ncnn 27 | 28 | from ReplaceDenormals import ReplaceDenormals 29 | 30 | 31 | """ Import your net structure here """ 32 | 33 | """ ResNet """ 34 | os.sys.path.append('../ModelFiles/ResNet') 35 | import resnet 36 | 37 | """ MobileNet """ 38 | os.sys.path.append('../ModelFiles/MobileNet') 39 | from MobileNet import MobileNet 40 | 41 | """ UNet """ 42 | os.sys.path.append('../ModelFiles/UNet') 43 | import UNet 44 | 45 | """ FaceBoxes """ 46 | os.sys.path.append('../ModelFiles/FaceBoxes') 47 | from FaceBoxes import FaceBoxes 48 | 49 | """ Anime Gan """ 50 | os.sys.path.append('../ModelFiles/_netG_1') 51 | import models 52 | 53 | 54 | def GenModelZoo(): 55 | """ Specify the input shape and model initializing param """ 56 | return { 57 | 0: (torchvision.models.squeezenet1_1, [1, 3, 224, 224], [True], {}), 58 | 1: (resnet.resnet50, [1, 3, 224, 224], [True], {}), 59 | 2: (torchvision.models.densenet121, [1, 3, 224, 224], [False], {}), 60 | 3: (MobileNet, [1, 3, 224, 224], [], {}), 61 | 62 | 17: (models._netG_1, [1, 100, 1, 1], [1, 100, 3, 64, 1], {}), 63 | 18: (FaceBoxes, [1, 3, 224, 224], [], {}), 64 | 20: (UNet.UNet, [1, 3, 64, 64], [2], {}), 65 | } 66 | 67 | 68 | """ Set empty path to use default weight initialization """ 69 | # model_path = '../ModelFiles/ResNet/resnet50.pth' 70 | model_path = '' 71 | 72 | ModelZoo = GenModelZoo() 73 | ModelDir = '../ModelFiles/' 74 | 75 | """ Set to caffe or ncnn """ 76 | dst = 'ncnn' 77 | 78 | for i in range(18, 19): 79 | if i not in ModelZoo: 80 | continue 81 | 82 | ModuleFunc, InputShape, args, kwargs = ModelZoo[i] 83 | """ Init pytorch model """ 84 | pytorch_net = ModuleFunc(*args, **kwargs) 85 | 86 | if model_path != '': 87 | try: 88 | pytorch_net.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage)) 89 | except AttributeError: 90 | pytorch_net = torch.load(model_path, map_location=lambda storage, loc: storage) 91 | else: 92 | NetName = str(pytorch_net.__class__.__name__) 93 | if not os.path.exists(ModelDir + NetName): 94 | os.makedirs(ModelDir + NetName) 95 | print 'Saving default weight initialization...' 96 | torch.save(pytorch_net.state_dict(), ModelDir + NetName + '/' + NetName + '.pth') 97 | 98 | """ Replace denormal weight values(<1e-30), otherwise may increase forward time cost """ 99 | ReplaceDenormals(pytorch_net) 100 | 101 | """ Connnnnnnnvert! """ 102 | print('Converting...') 103 | if dst == 'caffe': 104 | text_net, binary_weights = ConvertModel_caffe(pytorch_net, InputShape, softmax=False) 105 | elif dst == 'ncnn': 106 | text_net, binary_weights = ConvertModel_ncnn(pytorch_net, InputShape, softmax=False) 107 | 108 | """ Save files """ 109 | NetName = str(pytorch_net.__class__.__name__) 110 | if not os.path.exists(ModelDir + NetName): 111 | os.makedirs(ModelDir + NetName) 112 | print('Saving to ' + ModelDir + NetName) 113 | 114 | if dst == 'caffe': 115 | import google.protobuf.text_format 116 | with open(ModelDir + NetName + '/' + NetName + '.prototxt', 'w') as f: 117 | f.write(google.protobuf.text_format.MessageToString(text_net)) 118 | with open(ModelDir + NetName + '/' + NetName + '.caffemodel', 'w') as f: 119 | f.write(binary_weights.SerializeToString()) 120 | 121 | elif dst == 'ncnn': 122 | import numpy as np 123 | with open(ModelDir + NetName + '/' + NetName + '.param', 'w') as f: 124 | f.write(text_net) 125 | with open(ModelDir + NetName + '/' + NetName + '.bin', 'w') as f: 126 | for weights in binary_weights: 127 | for blob in weights: 128 | blob_32f = blob.flatten().astype(np.float32) 129 | blob_32f.tofile(f) 130 | 131 | print('Converting Done.') 132 | 133 | """ Test & Compare(optional) """ 134 | # from test import TestAndCompare 135 | # TestAndCompare(i, pytorch_net, InputShape, 'Addmm_1', UseImage=False) 136 | -------------------------------------------------------------------------------- /code/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.sys.path.append('/home/starimeliu/Documents/caffe/caffe-master/python') 3 | 4 | import caffe 5 | import numpy as np 6 | import cv2 7 | import torch.nn as nn 8 | from torchvision import transforms 9 | 10 | 11 | def PrintLabel(prob): 12 | labels_filename = '../TestData/ImageNetLabels.txt' 13 | labels = np.loadtxt(labels_filename, str, delimiter='\t') 14 | order = prob.argsort() 15 | for i in range(3): 16 | print(labels[order[-1 - i]], prob[order[-1 - i]]) 17 | 18 | 19 | def TestCaffe(proto_path, model_path, inputs, LayerCheck, ModelInd): 20 | net = caffe.Net(proto_path, model_path, caffe.TEST) 21 | net.blobs['data'].data[...] = inputs 22 | print('input blob:') 23 | print(net.blobs['data'].data[...]) 24 | 25 | net.forward() 26 | 27 | if LayerCheck == 'Softmax_1': 28 | PrintLabel(net.blobs[LayerCheck].data[0].flatten()) 29 | else: 30 | print(net.blobs[LayerCheck].data[0][...].flatten()) 31 | if (ModelInd == 17): 32 | result_img = net.blobs[LayerCheck].data[0] * 255 33 | result_img = result_img.astype(int) 34 | result_img = np.transpose(result_img, (1, 2, 0)) 35 | result_img = result_img[..., ::-1] 36 | cv2.imwrite("AnimeNet_result.png", result_img) 37 | if (ModelInd == 91): 38 | result_img = net.blobs[LayerCheck].data[0] * 255 39 | result_img = result_img.astype(int) 40 | result_img = np.transpose(result_img, (1, 2, 0)) 41 | result_img = result_img[..., ::-1] 42 | cv2.imwrite("Upsample_result.png", result_img) 43 | 44 | 45 | def TestPytorch(net, inputs, LayerCheck): 46 | from torch.autograd import Variable 47 | 48 | inputs = Variable(inputs, requires_grad=True) 49 | 50 | net.eval() 51 | outputs = net(inputs) 52 | 53 | if LayerCheck == 'Softmax_1': 54 | m = nn.Softmax() 55 | if isinstance(outputs, tuple): 56 | outputs = outputs[0] 57 | outputs = m(outputs) 58 | PrintLabel(outputs.data.numpy().flatten()) 59 | result = outputs.data.numpy().flatten() 60 | print(result.shape) 61 | else: 62 | print(outputs.data.numpy().flatten()) 63 | # result = outputs.data.numpy().flatten() 64 | 65 | 66 | class ColorSpaceTransform(object): 67 | def __call__(self, image): 68 | img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 69 | img_ycc = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB) 70 | img = np.concatenate((img_hsv, img_ycc), 2) 71 | 72 | return img 73 | 74 | 75 | def TestAndCompare(ModelInd, pytorch_net, InputShape, LayerCheck='Softmax_1', UseImage=False): 76 | 77 | # trans = ColorSpaceTransform() 78 | # inputs = trans(inputs) 79 | 80 | if UseImage: 81 | img = '../TestData/2008_000536.jpg' 82 | # inputs = cv2.imread(img, 0) # 0 for grayscale 83 | inputs = cv2.imread(img, 1) # 1 for color 84 | else: 85 | n, c, h, w = InputShape 86 | if (ModelInd == 17): 87 | """ mean and standard deviation """ 88 | mu, sigma = 0, 1 89 | inputs = np.random.normal(mu, sigma, w * h * c).reshape(w, h, c) 90 | else: 91 | # inputs = np.linspace(1, w * h * c, w * h * c).reshape(w, h, c) 92 | inputs = np.random.rand(w, h, c) 93 | 94 | print(inputs.shape) 95 | 96 | scale_factor = 1.0 / 255.0 97 | if UseImage: 98 | transform_inputs = transforms.Compose([ 99 | transforms.ToPILImage(), 100 | # transforms.CenterCrop(112), 101 | transforms.ToTensor(), 102 | transforms.Normalize((0, 0, 0), (scale_factor, scale_factor, scale_factor)), 103 | # transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) 104 | ]) 105 | else: 106 | transform_inputs = transforms.Compose([ 107 | transforms.ToTensor(), 108 | transforms.Normalize((0, 0, 0), (scale_factor, scale_factor, scale_factor)), 109 | ]) 110 | 111 | print('Caffe Output:') 112 | NetName = str(pytorch_net.__class__.__name__) 113 | proto_path = '../ModelFiles/' + NetName + '/' + NetName + '.prototxt' 114 | model_path = '../ModelFiles/' + NetName + '/' + NetName + '.caffemodel' 115 | inputs_caffe = transform_inputs(inputs).numpy() 116 | TestCaffe(proto_path, model_path, inputs_caffe, LayerCheck, ModelInd) 117 | 118 | print('') 119 | print('Pytorch Output:') 120 | inputs_pytorch = transform_inputs(inputs) 121 | inputs_pytorch = inputs_pytorch.unsqueeze(0) 122 | TestPytorch(pytorch_net, inputs_pytorch, LayerCheck) 123 | --------------------------------------------------------------------------------