├── LICENSE
├── MobileNet.py
├── README.md
└── main.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Zheng Qin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MobileNet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import numpy as np
 7 | 
 8 | 
 9 | def get_mask(in_channels, channels):
10 |     mask = np.zeros((in_channels, channels, 3, 3))
11 |     for _ in range(in_channels):
12 |         mask[_, _ % channels, :, :] = 1.
13 |     return mask
14 | 
15 | 
16 | class DiagonalwiseRefactorization(nn.Module):
17 |     def __init__(self, in_channels, stride=1, groups=1):
18 |         super(DiagonalwiseRefactorization, self).__init__()
19 |         channels = in_channels / groups
20 |         self.in_channels = in_channels
21 |         self.groups = groups
22 |         self.stride = stride
23 |         self.mask = nn.Parameter(torch.Tensor(get_mask(in_channels, channels)), requires_grad=False)
24 |         self.weight = nn.Parameter(torch.Tensor(in_channels, channels, 3, 3), requires_grad=True)
25 |         torch.nn.init.xavier_uniform(self.weight.data)
26 |         self.weight.data.mul_(self.mask.data)
27 | 
28 |     def forward(self, x):
29 |         weight = torch.mul(self.weight, self.mask)
30 |         x = torch.nn.functional.conv2d(x, weight, bias=None, stride=self.stride, padding=1, groups=self.groups)
31 |         return x
32 | 
33 | 
34 | def DepthwiseConv2d(in_channels, stride=1):
35 |     # The original Channel-by-channel Depthwise Convolution
36 |     # return nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, groups=in_channels, bias=False)
37 | 
38 |     # Standard Convolution
39 |     # return nn.Conv2d(in_channels, in_channels, 3, stride=stride, padding=1, bias=False)
40 | 
41 |     # Diagonalwise Refactorization
42 |     # groups = 16
43 |     groups = max(in_channels / 32, 1)
44 |     return DiagonalwiseRefactorization(in_channels, stride, groups)
45 | 
46 | 
47 | def PointwiseConv2d(in_channels, out_channels):
48 |     return nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
49 | 
50 | 
51 | class MobileNet(nn.Module):
52 |     def __init__(self):
53 |         super(MobileNet, self).__init__()
54 |         self.features = nn.Sequential(OrderedDict([
55 |             ('conv', nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False)),
56 |             ('bn_conv', nn.BatchNorm2d(32)),
57 |             ('relu_conv', nn.ReLU(inplace=True)),
58 |         ]))
59 | 
60 |         __mobilenet_channels = [64, 128, 128, 256, 256, 512, 512, 512, 512, 512, 512, 1024, 1024]
61 |         __mobilenet_strides = [1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1]
62 | 
63 |         in_c = 32
64 |         for _, (out_c, stride) in enumerate(zip(__mobilenet_channels, __mobilenet_strides)):
65 |             self.features.add_module('dw_conv_{}'.format(_), DepthwiseConv2d(in_c, stride=stride))
66 |             self.features.add_module('dw_norm_{}'.format(_), nn.BatchNorm2d(in_c))
67 |             self.features.add_module('dw_relu_{}'.format(_), nn.ReLU(inplace=True))
68 |             self.features.add_module('pw_conv_{}'.format(_), PointwiseConv2d(in_c, out_c))
69 |             self.features.add_module('pw_norm_{}'.format(_), nn.BatchNorm2d(out_c))
70 |             self.features.add_module('pw_relu_{}'.format(_), nn.ReLU(inplace=True))
71 |             in_c = out_c
72 | 
73 |         self.avgpool = nn.AvgPool2d(7)
74 |         self.classifier = nn.Linear(1024, 1000)
75 | 
76 |     def forward(self, x):
77 |         x = self.features(x)
78 |         x = self.avgpool(x)
79 |         x = x.view(x.size(0), -1)
80 |         x = self.classifier(x)
81 |         return x
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Diagonalwise Refactorization: An Efficient Training Method for Depthwise Convolutions
 2 | 
 3 | This is the PyTorch implementation of Diagonalwise Refactorization.
 4 | 
 5 | Diagonalwise Refactorization is an efficient implementation for depthwise convolutions.
 6 | The key idea of Diagonalwise Refactorization is to rearrange the weight vectors of a depthwise convolution into a large diagonal weight matrixi, so as to convert the depthwise convolution into one single standard convolution, which is well supported by the cuDNN library that is highly-optimized for GPU computations.
 7 | 
 8 | In PyTorch, Diagonalwise Refactorization is implemented in Python and can be further accelerated using C++.
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.optim as optim
 6 | from torch.autograd import Variable
 7 | import torchvision.transforms as transforms
 8 | import torchvision.datasets as datasets
 9 | from time import clock
10 | 
11 | from MobileNet import MobileNet
12 | 
13 | model = MobileNet()
14 | model.cuda()
15 | 
16 | print('Model created.')
17 | 
18 | transform = transforms.Compose([
19 |     transforms.RandomSizedCrop(224),
20 |     transforms.RandomHorizontalFlip(),
21 |     transforms.ToTensor(),
22 |     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
23 | ])
24 | 
25 | traindir = '/home/zheng/Datasets/ILSVRC/ILSVRC2012_images_train'
26 | train = datasets.ImageFolder(traindir, transform)
27 | 
28 | print('Dataset created.')
29 | 
30 | train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True, num_workers=4)
31 | 
32 | criterion = nn.CrossEntropyLoss().cuda()
33 | learning_rate = 0.01
34 | optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), learning_rate, 0.9)
35 | 
36 | print('Begin Training...')
37 | 
38 | num_epoch = 20
39 | max_batches = 50
40 | tot_time = 0
41 | 
42 | # preload data to eliminate data loading time
43 | data = 0
44 | for batch in train_loader:
45 |     data = batch
46 |     break
47 | inputs, labels = data
48 | inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
49 | 
50 | # time testing
51 | for _ in range(max_batches + 1):
52 |     t0 = clock()
53 | 
54 |     optimizer.zero_grad()
55 |     outputs = model(inputs)
56 |     loss = criterion(outputs, labels)
57 |     loss.backward()
58 |     optimizer.step()
59 | 
60 |     t1 = clock()
61 |     if _ > 0:
62 |         tot_time += t1 - t0
63 | 
64 |     print('{}: {} seconds'.format(_, t1 - t0))
65 | 
66 | print(tot_time / float(max_batches))
67 | 


--------------------------------------------------------------------------------