├── .gitignore
├── LICENSE
├── README.md
├── fig
    ├── cifar10.png
    └── cifar100.png
├── main_cifar10.py
├── main_cifar100.py
├── models
    ├── __init__.py
    ├── densenet.py
    ├── dpn.py
    ├── googlenet.py
    ├── lenet.py
    ├── mobilenet.py
    ├── preact_resnet.py
    ├── resnet.py
    ├── resnext.py
    ├── senet.py
    ├── shufflenet.py
    └── vgg.py
├── run.sh
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | dat
  2 | dat/*
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Harry Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mixup: Beyond Empirical Risk Minimization in PyTorch
 2 | 
 3 | This is an unofficial PyTorch implementation of [mixup: Beyond Empirical Risk Minimization](https://arxiv.org/abs/1710.09412). The code is adapted from [PyTorch CIFAR](https://github.com/kuangliu/pytorch-cifar). 
 4 | 
 5 | ## The results:
 6 | 
 7 | I only tested using CIFAR 10 and CIFAR 100. The network we used is PreAct ResNet-18. For mixup, we set alpha to be default value 1, meaning we sample the weight uniformly between zero and one. I trained 200 epochs for each setting. The learning rate is 0.1 (iter 1-100), 0.01 (iter 101-150) and 0.001 (iter 151-200). The batch size is 128. 
 8 | 
 9 | | Dataset and Model             | Acc.        |
10 | | ----------------- | ----------- |
11 | | CIFAR 10 no mixup              | 94.97%      |
12 | | CIFAR 10 mixup          | 95.53%      |
13 | | CIFAR 100 no mixup         | 76.53%      |
14 | | CIFAR 100 mixup         | 77.83%      |
15 | 
16 | ## CIFAR 10 test accuracy evolution
17 | ![cifar10](fig/cifar10.png "cifar10 accuracy evolution.")
18 | ## CIFAR 100 test accuracy evolution
19 | ![cifar100](fig/cifar100.png "cifar100 accuracy evolution.")
20 | 
21 | ## Usage
22 | ```
23 | # Train and test CIFAR 10 with mixup.
24 | python main_cifar10.py --mixup --exp='cifar10_nomixup'
25 | # Train and test CIFAR 10 without mixup.
26 | python main_cifar10.py --exp='cifar10_nomixup'
27 | # Train and test CIFAR 100 with mixup.
28 | python main_cifar100.py --mixup --exp='cifar100_mixup'
29 | # Train and test CIFAR 100 without mixup.
30 | python main_cifar100.py --exp='cifar100_nomixup'
31 | ```
32 | 


--------------------------------------------------------------------------------
/fig/cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leehomyc/mixup_pytorch/20604068224795f17697989d95d20d376b244e39/fig/cifar10.png


--------------------------------------------------------------------------------
/fig/cifar100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leehomyc/mixup_pytorch/20604068224795f17697989d95d20d376b244e39/fig/cifar100.png


--------------------------------------------------------------------------------
/main_cifar10.py:
--------------------------------------------------------------------------------
  1 | """Train CIFAR10 with PyTorch."""
  2 | from __future__ import print_function
  3 | 
  4 | import argparse
  5 | import os
  6 | 
  7 | import numpy
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | from torch.optim import lr_scheduler
 12 | import torch.backends.cudnn as cudnn
 13 | 
 14 | import torchvision
 15 | import torchvision.transforms as transforms
 16 | 
 17 | 
 18 | from models import PreActResNet18
 19 | from utils import progress_bar
 20 | from torch.autograd import Variable
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
 24 | parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
 25 | parser.add_argument('--resume', '-r', action='store_true',
 26 |                     help='resume from checkpoint')
 27 | parser.add_argument('--exp', default='cifar10_mixup', type=str,
 28 |                     help='name of the experiment')
 29 | parser.add_argument('--mixup', action='store_true',
 30 |                     help='whether to use mixup or not')
 31 | args = parser.parse_args()
 32 | 
 33 | use_cuda = torch.cuda.is_available()
 34 | best_acc = 0  # best test accuracy
 35 | start_epoch = 0  # start from epoch 0 or last checkpoint epoch
 36 | 
 37 | # Data
 38 | print('==> Preparing data..')
 39 | transform_train = transforms.Compose([
 40 |     transforms.RandomCrop(32, padding=4),
 41 |     transforms.RandomHorizontalFlip(),
 42 |     transforms.ToTensor(),
 43 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 44 | ])
 45 | 
 46 | transform_test = transforms.Compose([
 47 |     transforms.ToTensor(),
 48 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 49 | ])
 50 | 
 51 | trainset = torchvision.datasets.CIFAR10(
 52 |     root='/data/public/cifar10', train=True, download=True,
 53 |     transform=transform_train)
 54 | trainloader = torch.utils.data.DataLoader(
 55 |     trainset, batch_size=128, shuffle=True, num_workers=2)
 56 | 
 57 | testset = torchvision.datasets.CIFAR10(
 58 |     root='/data/public/cifar10', train=False, download=True,
 59 |     transform=transform_test)
 60 | testloader = torch.utils.data.DataLoader(
 61 |     testset, batch_size=100, shuffle=False, num_workers=2)
 62 | 
 63 | # Model
 64 | if args.resume:
 65 |     # Load checkpoint.
 66 |     print('==> Resuming from checkpoint..')
 67 |     assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
 68 |     checkpoint = torch.load('./checkpoint_{}/ckpt.t7'.format(args.exp))
 69 |     net = checkpoint['net']
 70 |     best_acc = checkpoint['acc']
 71 |     start_epoch = checkpoint['epoch']
 72 | else:
 73 |     print('==> Building model..')
 74 |     # net = VGG('VGG19')
 75 |     # net = ResNet18()
 76 |     net = PreActResNet18()
 77 |     # net = GoogLeNet()
 78 |     # net = DenseNet121()
 79 |     # net = ResNeXt29_2x64d()
 80 |     # net = MobileNet()
 81 |     # net = DPN92()
 82 |     # net = ShuffleNetG2()
 83 |     # net = SENet18()
 84 | 
 85 | if use_cuda:
 86 |     net.cuda()
 87 |     net = torch.nn.DataParallel(
 88 |         net, device_ids=range(torch.cuda.device_count()))
 89 |     cudnn.benchmark = True
 90 | 
 91 | criterion = nn.CrossEntropyLoss()
 92 | optimizer = optim.SGD(net.parameters(), lr=args.lr,
 93 |                       momentum=0.9, weight_decay=1e-4)
 94 | 
 95 | # Training
 96 | 
 97 | 
 98 | def shuffle_minibatch(inputs, targets, mixup=True):
 99 |     """Shuffle a minibatch and do linear interpolation between images and labels.
100 | 
101 |     Args:
102 |         inputs: a numpy array of images with size batch_size x H x W x 3.
103 |         targets: a numpy array of labels with size batch_size x 1.
104 |         mixup: a boolen as whether to do mixup or not. If mixup is True, we
105 |             sample the weight from beta distribution using parameter alpha=1,
106 |             beta=1. If mixup is False, we set the weight to be 1 and 0
107 |             respectively for the randomly shuffled mini-batches.
108 |     """
109 |     batch_size = inputs.shape[0]
110 |     rp1 = torch.randperm(batch_size)
111 |     inputs1 = inputs[rp1]
112 |     targets1 = targets[rp1]
113 |     targets1_1 = targets1.unsqueeze(1)
114 | 
115 |     rp2 = torch.randperm(batch_size)
116 |     inputs2 = inputs[rp2]
117 |     targets2 = targets[rp2]
118 |     targets2_1 = targets2.unsqueeze(1)
119 | 
120 |     y_onehot = torch.FloatTensor(batch_size, 10)
121 |     y_onehot.zero_()
122 |     targets1_oh = y_onehot.scatter_(1, targets1_1, 1)
123 | 
124 |     y_onehot2 = torch.FloatTensor(batch_size, 10)
125 |     y_onehot2.zero_()
126 |     targets2_oh = y_onehot2.scatter_(1, targets2_1, 1)
127 | 
128 |     if mixup is True:
129 |         a = numpy.random.beta(1, 1, [batch_size, 1])
130 |     else:
131 |         a = numpy.ones((batch_size, 1))
132 | 
133 |     b = numpy.tile(a[..., None, None], [1, 3, 32, 32])
134 | 
135 |     inputs1 = inputs1 * torch.from_numpy(b).float()
136 |     inputs2 = inputs2 * torch.from_numpy(1 - b).float()
137 | 
138 |     c = numpy.tile(a, [1, 10])
139 |     targets1_oh = targets1_oh.float() * torch.from_numpy(c).float()
140 |     targets2_oh = targets2_oh.float() * torch.from_numpy(1 - c).float()
141 | 
142 |     inputs_shuffle = inputs1 + inputs2
143 |     targets_shuffle = targets1_oh + targets2_oh
144 | 
145 |     return inputs_shuffle, targets_shuffle
146 | 
147 | 
148 | def train(epoch):
149 |     """Training function."""
150 |     print('\nEpoch: %d' % epoch)
151 |     net.train()
152 |     train_loss = 0
153 |     correct = 0
154 |     total = 0
155 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
156 |         inputs_shuffle, targets_shuffle = shuffle_minibatch(
157 |             inputs, targets, args.mixup)
158 | 
159 |         if use_cuda:
160 |             inputs_shuffle, targets_shuffle = inputs_shuffle.cuda(), \
161 |                 targets_shuffle.cuda()
162 | 
163 |         optimizer.zero_grad()
164 | 
165 |         inputs_shuffle, targets_shuffle = Variable(
166 |             inputs_shuffle), Variable(targets_shuffle)
167 | 
168 |         outputs = net(inputs_shuffle)
169 |         m = nn.LogSoftmax()
170 | 
171 |         loss = -m(outputs) * targets_shuffle
172 |         loss = torch.sum(loss) / 128
173 |         loss.backward()
174 |         optimizer.step()
175 | 
176 |         train_loss += loss.data[0]
177 |         _, predicted = torch.max(outputs.data, 1)
178 |         total += targets.size(0)
179 |         _, targets = torch.max(targets_shuffle.data, 1)
180 |         correct += predicted.eq(targets).cpu().sum()
181 | 
182 |         progress_bar(batch_idx, len(trainloader), 'Epoch %d, Training Loss: %.3f | Acc: %.3f%% (%d/%d)'  # noqa
183 |                      % (epoch, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))  # noqa
184 | 
185 | 
186 | def test(epoch):
187 |     """Testing function."""
188 |     global best_acc
189 |     net.eval()
190 |     test_loss = 0
191 |     correct = 0
192 |     total = 0
193 |     for batch_idx, (inputs, targets) in enumerate(testloader):
194 |         if use_cuda:
195 |             inputs, targets = inputs.cuda(), targets.cuda()
196 |         inputs, targets = Variable(inputs, volatile=True), Variable(targets)
197 |         outputs = net(inputs)
198 |         loss = criterion(outputs, targets)
199 | 
200 |         test_loss += loss.data[0]
201 |         _, predicted = torch.max(outputs.data, 1)
202 |         total += targets.size(0)
203 |         correct += predicted.eq(targets.data).cpu().sum()
204 | 
205 |         progress_bar(batch_idx, len(testloader), 'Epoch %d, Test Loss: %.3f | Acc: %.3f%% (%d/%d)'  # noqa
206 |                      % (epoch, test_loss / (batch_idx + 1), 100. * correct / total, correct, total))  # noqa
207 | 
208 |     # Save checkpoint.
209 |     acc = 100. * correct / total
210 |     if acc > best_acc:
211 |         print('Saving..')
212 |         state = {
213 |             'net': net.module if use_cuda else net,
214 |             'acc': acc,
215 |             'epoch': epoch,
216 |         }
217 |         if not os.path.isdir('checkpoint_{}'.format(args.exp)):
218 |             os.mkdir('checkpoint_{}'.format(args.exp))
219 |         torch.save(state, './checkpoint_{}/ckpt.t7'.format(args.exp))
220 |         best_acc = acc
221 | 
222 | scheduler = lr_scheduler.MultiStepLR(
223 |     optimizer, milestones=[100, 150], gamma=0.1)
224 | for epoch in range(start_epoch, start_epoch + 200):
225 |     scheduler.step()
226 |     train(epoch)
227 |     test(epoch)
228 | 


--------------------------------------------------------------------------------
/main_cifar100.py:
--------------------------------------------------------------------------------
  1 | """Train CIFAR100 with PyTorch."""
  2 | from __future__ import print_function
  3 | 
  4 | import argparse
  5 | import os
  6 | 
  7 | import numpy
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | from torch.optim import lr_scheduler
 12 | import torch.backends.cudnn as cudnn
 13 | 
 14 | import torchvision
 15 | import torchvision.transforms as transforms
 16 | 
 17 | from models import PreActResNet18
 18 | from utils import progress_bar
 19 | from torch.autograd import Variable
 20 | 
 21 | 
 22 | parser = argparse.ArgumentParser(description='PyTorch CIFAR100 Training')
 23 | parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
 24 | parser.add_argument('--resume', '-r', action='store_true',
 25 |                     help='resume from checkpoint')
 26 | parser.add_argument('--exp', default='cifar100_mixup', type=str,
 27 |                     help='name of the experiment')
 28 | parser.add_argument('--mixup', action='store_true',
 29 |                     help='whether to use mixup or not')
 30 | args = parser.parse_args()
 31 | 
 32 | use_cuda = torch.cuda.is_available()
 33 | best_acc = 0  # best test accuracy
 34 | start_epoch = 0  # start from epoch 0 or last checkpoint epoch
 35 | 
 36 | # Data
 37 | print('==> Preparing data..')
 38 | transform_train = transforms.Compose([
 39 |     transforms.RandomCrop(32, padding=4),
 40 |     transforms.RandomHorizontalFlip(),
 41 |     transforms.ToTensor(),
 42 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 43 | ])
 44 | 
 45 | transform_test = transforms.Compose([
 46 |     transforms.ToTensor(),
 47 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 48 | ])
 49 | 
 50 | trainset = torchvision.datasets.CIFAR100(
 51 |     root='/data/public/cifar100', train=True, download=True,
 52 |     transform=transform_train)
 53 | trainloader = torch.utils.data.DataLoader(
 54 |     trainset, batch_size=128, shuffle=True, num_workers=2)
 55 | 
 56 | testset = torchvision.datasets.CIFAR100(
 57 |     root='/data/public/cifar100', train=False, download=True,
 58 |     transform=transform_test)
 59 | testloader = torch.utils.data.DataLoader(
 60 |     testset, batch_size=100, shuffle=False, num_workers=2)
 61 | 
 62 | # Model
 63 | if args.resume:
 64 |     # Load checkpoint.
 65 |     print('==> Resuming from checkpoint..')
 66 |     assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
 67 |     checkpoint = torch.load('./checkpoint_{}/ckpt.t7'.format(args.exp))
 68 |     net = checkpoint['net']
 69 |     best_acc = checkpoint['acc']
 70 |     start_epoch = checkpoint['epoch']
 71 | else:
 72 |     print('==> Building model..')
 73 |     # net = VGG('VGG19')
 74 |     # net = ResNet18()
 75 |     net = PreActResNet18(num_classes=100)
 76 |     # net = GoogLeNet()
 77 |     # net = DenseNet121()
 78 |     # net = ResNeXt29_2x64d()
 79 |     # net = MobileNet()
 80 |     # net = DPN92()
 81 |     # net = ShuffleNetG2()
 82 |     # net = SENet18()
 83 | 
 84 | if use_cuda:
 85 |     net.cuda()
 86 |     net = torch.nn.DataParallel(
 87 |         net, device_ids=range(torch.cuda.device_count()))
 88 |     cudnn.benchmark = True
 89 | 
 90 | criterion = nn.CrossEntropyLoss()
 91 | optimizer = optim.SGD(net.parameters(), lr=args.lr,
 92 |                       momentum=0.9, weight_decay=1e-4)
 93 | 
 94 | # Training
 95 | 
 96 | 
 97 | def shuffle_minibatch(inputs, targets, mixup=True):
 98 |     """Shuffle a minibatch and do linear interpolation between images and labels.
 99 | 
100 |     Args:
101 |         inputs: a numpy array of images with size batch_size x H x W x 3.
102 |         targets: a numpy array of labels with size batch_size x 1.
103 |         mixup: a boolen as whether to do mixup or not. If mixup is True, we
104 |             sample the weight from beta distribution using parameter alpha=1,
105 |             beta=1. If mixup is False, we set the weight to be 1 and 0
106 |             respectively for the randomly shuffled mini-batches.
107 |     """
108 |     batch_size = inputs.shape[0]
109 | 
110 |     rp1 = torch.randperm(batch_size)
111 |     inputs1 = inputs[rp1]
112 |     targets1 = targets[rp1]
113 |     targets1_1 = targets1.unsqueeze(1)
114 | 
115 |     rp2 = torch.randperm(batch_size)
116 |     inputs2 = inputs[rp2]
117 |     targets2 = targets[rp2]
118 |     targets2_1 = targets2.unsqueeze(1)
119 | 
120 |     y_onehot = torch.FloatTensor(batch_size, 100)
121 |     y_onehot.zero_()
122 |     targets1_oh = y_onehot.scatter_(1, targets1_1, 1)
123 | 
124 |     y_onehot2 = torch.FloatTensor(batch_size, 100)
125 |     y_onehot2.zero_()
126 |     targets2_oh = y_onehot2.scatter_(1, targets2_1, 1)
127 | 
128 |     if mixup is True:
129 |         a = numpy.random.beta(1, 1, [batch_size, 1])
130 |     else:
131 |         a = numpy.ones((batch_size, 1))
132 | 
133 |     b = numpy.tile(a[..., None, None], [1, 3, 32, 32])
134 | 
135 |     inputs1 = inputs1 * torch.from_numpy(b).float()
136 |     inputs2 = inputs2 * torch.from_numpy(1 - b).float()
137 | 
138 |     c = numpy.tile(a, [1, 100])
139 | 
140 |     targets1_oh = targets1_oh.float() * torch.from_numpy(c).float()
141 |     targets2_oh = targets2_oh.float() * torch.from_numpy(1 - c).float()
142 | 
143 |     inputs_shuffle = inputs1 + inputs2
144 |     targets_shuffle = targets1_oh + targets2_oh
145 | 
146 |     return inputs_shuffle, targets_shuffle
147 | 
148 | 
149 | def train(epoch):
150 |     """Training function."""
151 |     print('\nEpoch: %d' % epoch)
152 |     net.train()
153 |     train_loss = 0
154 |     correct = 0
155 |     total = 0
156 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
157 |         inputs_shuffle, targets_shuffle = shuffle_minibatch(
158 |             inputs, targets, args.mixup)
159 | 
160 |         if use_cuda:
161 |             inputs_shuffle, targets_shuffle = inputs_shuffle.cuda(), \
162 |                 targets_shuffle.cuda()
163 | 
164 |         optimizer.zero_grad()
165 | 
166 |         inputs_shuffle, targets_shuffle = Variable(
167 |             inputs_shuffle), Variable(targets_shuffle)
168 | 
169 |         outputs = net(inputs_shuffle)
170 |         m = nn.LogSoftmax()
171 | 
172 |         loss = -m(outputs) * targets_shuffle
173 |         loss = torch.sum(loss) / 128
174 |         loss.backward()
175 |         optimizer.step()
176 | 
177 |         train_loss += loss.data[0]
178 |         _, predicted = torch.max(outputs.data, 1)
179 |         total += targets.size(0)
180 |         _, targets = torch.max(targets_shuffle.data, 1)
181 |         correct += predicted.eq(targets).cpu().sum()
182 | 
183 |         progress_bar(batch_idx, len(trainloader), 'Epoch %d, Training Loss: %.3f | Acc: %.3f%% (%d/%d)'  # noqa
184 |                      % (epoch, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))  # noqa
185 | 
186 | 
187 | def test(epoch):
188 |     """Testing function."""
189 |     global best_acc
190 |     net.eval()
191 |     test_loss = 0
192 |     correct = 0
193 |     total = 0
194 |     for batch_idx, (inputs, targets) in enumerate(testloader):
195 |         if use_cuda:
196 |             inputs, targets = inputs.cuda(), targets.cuda()
197 |         inputs, targets = Variable(inputs, volatile=True), Variable(targets)
198 |         outputs = net(inputs)
199 |         loss = criterion(outputs, targets)
200 | 
201 |         test_loss += loss.data[0]
202 |         _, predicted = torch.max(outputs.data, 1)
203 |         total += targets.size(0)
204 |         correct += predicted.eq(targets.data).cpu().sum()
205 | 
206 |         progress_bar(batch_idx, len(testloader), 'Epoch %d, Test Loss: %.3f | Acc: %.3f%% (%d/%d)'  # noqa
207 |                      % (epoch, test_loss / (batch_idx + 1), 100. * correct / total, correct, total))  # noqa
208 | 
209 |     # Save checkpoint.
210 |     acc = 100. * correct / total
211 |     if acc > best_acc:
212 |         print('Saving..')
213 |         state = {
214 |             'net': net.module if use_cuda else net,
215 |             'acc': acc,
216 |             'epoch': epoch,
217 |         }
218 |         if not os.path.isdir('checkpoint_{}'.format(args.exp)):
219 |             os.mkdir('checkpoint_{}'.format(args.exp))
220 |         torch.save(state, './checkpoint_{}/ckpt.t7'.format(args.exp))
221 |         best_acc = acc
222 | 
223 | scheduler = lr_scheduler.MultiStepLR(
224 |     optimizer, milestones=[100, 150], gamma=0.1)
225 | for epoch in range(start_epoch, start_epoch + 200):
226 |     scheduler.step()
227 |     train(epoch)
228 |     test(epoch)
229 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vgg import *
 2 | from .dpn import *
 3 | from .lenet import *
 4 | from .senet import *
 5 | from .resnet import *
 6 | from .resnext import *
 7 | from .densenet import *
 8 | from .googlenet import *
 9 | from .mobilenet import *
10 | from .shufflenet import *
11 | from .preact_resnet import *
12 | 


--------------------------------------------------------------------------------
/models/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | class Bottleneck(nn.Module):
 12 |     def __init__(self, in_planes, growth_rate):
 13 |         super(Bottleneck, self).__init__()
 14 |         self.bn1 = nn.BatchNorm2d(in_planes)
 15 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 17 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 18 | 
 19 |     def forward(self, x):
 20 |         out = self.conv1(F.relu(self.bn1(x)))
 21 |         out = self.conv2(F.relu(self.bn2(out)))
 22 |         out = torch.cat([out,x], 1)
 23 |         return out
 24 | 
 25 | 
 26 | class Transition(nn.Module):
 27 |     def __init__(self, in_planes, out_planes):
 28 |         super(Transition, self).__init__()
 29 |         self.bn = nn.BatchNorm2d(in_planes)
 30 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 31 | 
 32 |     def forward(self, x):
 33 |         out = self.conv(F.relu(self.bn(x)))
 34 |         out = F.avg_pool2d(out, 2)
 35 |         return out
 36 | 
 37 | 
 38 | class DenseNet(nn.Module):
 39 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
 40 |         super(DenseNet, self).__init__()
 41 |         self.growth_rate = growth_rate
 42 | 
 43 |         num_planes = 2*growth_rate
 44 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
 45 | 
 46 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 47 |         num_planes += nblocks[0]*growth_rate
 48 |         out_planes = int(math.floor(num_planes*reduction))
 49 |         self.trans1 = Transition(num_planes, out_planes)
 50 |         num_planes = out_planes
 51 | 
 52 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 53 |         num_planes += nblocks[1]*growth_rate
 54 |         out_planes = int(math.floor(num_planes*reduction))
 55 |         self.trans2 = Transition(num_planes, out_planes)
 56 |         num_planes = out_planes
 57 | 
 58 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 59 |         num_planes += nblocks[2]*growth_rate
 60 |         out_planes = int(math.floor(num_planes*reduction))
 61 |         self.trans3 = Transition(num_planes, out_planes)
 62 |         num_planes = out_planes
 63 | 
 64 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 65 |         num_planes += nblocks[3]*growth_rate
 66 | 
 67 |         self.bn = nn.BatchNorm2d(num_planes)
 68 |         self.linear = nn.Linear(num_planes, num_classes)
 69 | 
 70 |     def _make_dense_layers(self, block, in_planes, nblock):
 71 |         layers = []
 72 |         for i in range(nblock):
 73 |             layers.append(block(in_planes, self.growth_rate))
 74 |             in_planes += self.growth_rate
 75 |         return nn.Sequential(*layers)
 76 | 
 77 |     def forward(self, x):
 78 |         out = self.conv1(x)
 79 |         out = self.trans1(self.dense1(out))
 80 |         out = self.trans2(self.dense2(out))
 81 |         out = self.trans3(self.dense3(out))
 82 |         out = self.dense4(out)
 83 |         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
 84 |         out = out.view(out.size(0), -1)
 85 |         out = self.linear(out)
 86 |         return out
 87 | 
 88 | def DenseNet121():
 89 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
 90 | 
 91 | def DenseNet169():
 92 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
 93 | 
 94 | def DenseNet201():
 95 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
 96 | 
 97 | def DenseNet161():
 98 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
 99 | 
100 | def densenet_cifar():
101 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
102 | 
103 | def test_densenet():
104 |     net = densenet_cifar()
105 |     x = torch.randn(1,3,32,32)
106 |     y = net(Variable(x))
107 |     print(y)
108 | 
109 | # test_densenet()
110 | 


--------------------------------------------------------------------------------
/models/dpn.py:
--------------------------------------------------------------------------------
  1 | '''Dual Path Networks in PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
 11 |         super(Bottleneck, self).__init__()
 12 |         self.out_planes = out_planes
 13 |         self.dense_depth = dense_depth
 14 | 
 15 |         self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
 16 |         self.bn1 = nn.BatchNorm2d(in_planes)
 17 |         self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
 18 |         self.bn2 = nn.BatchNorm2d(in_planes)
 19 |         self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
 20 |         self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
 21 | 
 22 |         self.shortcut = nn.Sequential()
 23 |         if first_layer:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
 26 |                 nn.BatchNorm2d(out_planes+dense_depth)
 27 |             )
 28 | 
 29 |     def forward(self, x):
 30 |         out = F.relu(self.bn1(self.conv1(x)))
 31 |         out = F.relu(self.bn2(self.conv2(out)))
 32 |         out = self.bn3(self.conv3(out))
 33 |         x = self.shortcut(x)
 34 |         d = self.out_planes
 35 |         out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class DPN(nn.Module):
 41 |     def __init__(self, cfg):
 42 |         super(DPN, self).__init__()
 43 |         in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
 44 |         num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
 45 | 
 46 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 47 |         self.bn1 = nn.BatchNorm2d(64)
 48 |         self.last_planes = 64
 49 |         self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
 50 |         self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
 51 |         self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
 52 |         self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
 53 |         self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
 54 | 
 55 |     def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
 56 |         strides = [stride] + [1]*(num_blocks-1)
 57 |         layers = []
 58 |         for i,stride in enumerate(strides):
 59 |             layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
 60 |             self.last_planes = out_planes + (i+2) * dense_depth
 61 |         return nn.Sequential(*layers)
 62 | 
 63 |     def forward(self, x):
 64 |         out = F.relu(self.bn1(self.conv1(x)))
 65 |         out = self.layer1(out)
 66 |         out = self.layer2(out)
 67 |         out = self.layer3(out)
 68 |         out = self.layer4(out)
 69 |         out = F.avg_pool2d(out, 4)
 70 |         out = out.view(out.size(0), -1)
 71 |         out = self.linear(out)
 72 |         return out
 73 | 
 74 | 
 75 | def DPN26():
 76 |     cfg = {
 77 |         'in_planes': (96,192,384,768),
 78 |         'out_planes': (256,512,1024,2048),
 79 |         'num_blocks': (2,2,2,2),
 80 |         'dense_depth': (16,32,24,128)
 81 |     }
 82 |     return DPN(cfg)
 83 | 
 84 | def DPN92():
 85 |     cfg = {
 86 |         'in_planes': (96,192,384,768),
 87 |         'out_planes': (256,512,1024,2048),
 88 |         'num_blocks': (3,4,20,3),
 89 |         'dense_depth': (16,32,24,128)
 90 |     }
 91 |     return DPN(cfg)
 92 | 
 93 | 
 94 | def test():
 95 |     net = DPN92()
 96 |     x = Variable(torch.randn(1,3,32,32))
 97 |     y = net(x)
 98 |     print(y)
 99 | 
100 | # test()
101 | 


--------------------------------------------------------------------------------
/models/googlenet.py:
--------------------------------------------------------------------------------
  1 | '''GoogLeNet with PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | class Inception(nn.Module):
 10 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
 11 |         super(Inception, self).__init__()
 12 |         # 1x1 conv branch
 13 |         self.b1 = nn.Sequential(
 14 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
 15 |             nn.BatchNorm2d(n1x1),
 16 |             nn.ReLU(True),
 17 |         )
 18 | 
 19 |         # 1x1 conv -> 3x3 conv branch
 20 |         self.b2 = nn.Sequential(
 21 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
 22 |             nn.BatchNorm2d(n3x3red),
 23 |             nn.ReLU(True),
 24 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
 25 |             nn.BatchNorm2d(n3x3),
 26 |             nn.ReLU(True),
 27 |         )
 28 | 
 29 |         # 1x1 conv -> 5x5 conv branch
 30 |         self.b3 = nn.Sequential(
 31 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
 32 |             nn.BatchNorm2d(n5x5red),
 33 |             nn.ReLU(True),
 34 |             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
 35 |             nn.BatchNorm2d(n5x5),
 36 |             nn.ReLU(True),
 37 |             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
 38 |             nn.BatchNorm2d(n5x5),
 39 |             nn.ReLU(True),
 40 |         )
 41 | 
 42 |         # 3x3 pool -> 1x1 conv branch
 43 |         self.b4 = nn.Sequential(
 44 |             nn.MaxPool2d(3, stride=1, padding=1),
 45 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
 46 |             nn.BatchNorm2d(pool_planes),
 47 |             nn.ReLU(True),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         y1 = self.b1(x)
 52 |         y2 = self.b2(x)
 53 |         y3 = self.b3(x)
 54 |         y4 = self.b4(x)
 55 |         return torch.cat([y1,y2,y3,y4], 1)
 56 | 
 57 | 
 58 | class GoogLeNet(nn.Module):
 59 |     def __init__(self):
 60 |         super(GoogLeNet, self).__init__()
 61 |         self.pre_layers = nn.Sequential(
 62 |             nn.Conv2d(3, 192, kernel_size=3, padding=1),
 63 |             nn.BatchNorm2d(192),
 64 |             nn.ReLU(True),
 65 |         )
 66 | 
 67 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
 68 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 69 | 
 70 |         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 71 | 
 72 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
 73 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
 74 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
 75 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
 76 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 77 | 
 78 |         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
 79 |         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 80 | 
 81 |         self.avgpool = nn.AvgPool2d(8, stride=1)
 82 |         self.linear = nn.Linear(1024, 10)
 83 | 
 84 |     def forward(self, x):
 85 |         out = self.pre_layers(x)
 86 |         out = self.a3(out)
 87 |         out = self.b3(out)
 88 |         out = self.maxpool(out)
 89 |         out = self.a4(out)
 90 |         out = self.b4(out)
 91 |         out = self.c4(out)
 92 |         out = self.d4(out)
 93 |         out = self.e4(out)
 94 |         out = self.maxpool(out)
 95 |         out = self.a5(out)
 96 |         out = self.b5(out)
 97 |         out = self.avgpool(out)
 98 |         out = out.view(out.size(0), -1)
 99 |         out = self.linear(out)
100 |         return out
101 | 
102 | # net = GoogLeNet()
103 | # x = torch.randn(1,3,32,32)
104 | # y = net(Variable(x))
105 | # print(y.size())
106 | 


--------------------------------------------------------------------------------
/models/lenet.py:
--------------------------------------------------------------------------------
 1 | '''LeNet in PyTorch.'''
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LeNet(nn.Module):
 6 |     def __init__(self):
 7 |         super(LeNet, self).__init__()
 8 |         self.conv1 = nn.Conv2d(3, 6, 5)
 9 |         self.conv2 = nn.Conv2d(6, 16, 5)
10 |         self.fc1   = nn.Linear(16*5*5, 120)
11 |         self.fc2   = nn.Linear(120, 84)
12 |         self.fc3   = nn.Linear(84, 10)
13 | 
14 |     def forward(self, x):
15 |         out = F.relu(self.conv1(x))
16 |         out = F.max_pool2d(out, 2)
17 |         out = F.relu(self.conv2(out))
18 |         out = F.max_pool2d(out, 2)
19 |         out = out.view(out.size(0), -1)
20 |         out = F.relu(self.fc1(out))
21 |         out = F.relu(self.fc2(out))
22 |         out = self.fc3(out)
23 |         return out
24 | 


--------------------------------------------------------------------------------
/models/mobilenet.py:
--------------------------------------------------------------------------------
 1 | '''MobileNet in PyTorch.
 2 | 
 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
 4 | for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | from torch.autograd import Variable
11 | 
12 | 
13 | class Block(nn.Module):
14 |     '''Depthwise conv + Pointwise conv'''
15 |     def __init__(self, in_planes, out_planes, stride=1):
16 |         super(Block, self).__init__()
17 |         self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(in_planes)
19 |         self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
20 |         self.bn2 = nn.BatchNorm2d(out_planes)
21 | 
22 |     def forward(self, x):
23 |         out = F.relu(self.bn1(self.conv1(x)))
24 |         out = F.relu(self.bn2(self.conv2(out)))
25 |         return out
26 | 
27 | 
28 | class MobileNet(nn.Module):
29 |     # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
30 |     cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
31 | 
32 |     def __init__(self, num_classes=10):
33 |         super(MobileNet, self).__init__()
34 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
35 |         self.bn1 = nn.BatchNorm2d(32)
36 |         self.layers = self._make_layers(in_planes=32)
37 |         self.linear = nn.Linear(1024, num_classes)
38 | 
39 |     def _make_layers(self, in_planes):
40 |         layers = []
41 |         for x in self.cfg:
42 |             out_planes = x if isinstance(x, int) else x[0]
43 |             stride = 1 if isinstance(x, int) else x[1]
44 |             layers.append(Block(in_planes, out_planes, stride))
45 |             in_planes = out_planes
46 |         return nn.Sequential(*layers)
47 | 
48 |     def forward(self, x):
49 |         out = F.relu(self.bn1(self.conv1(x)))
50 |         out = self.layers(out)
51 |         out = F.avg_pool2d(out, 2)
52 |         out = out.view(out.size(0), -1)
53 |         out = self.linear(out)
54 |         return out
55 | 
56 | 
57 | def test():
58 |     net = MobileNet()
59 |     x = torch.randn(1,3,32,32)
60 |     y = net(Variable(x))
61 |     print(y.size())
62 | 
63 | # test()
64 | 


--------------------------------------------------------------------------------
/models/preact_resnet.py:
--------------------------------------------------------------------------------
  1 | '''Pre-activation ResNet in PyTorch.
  2 | 
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | from torch.autograd import Variable
 12 | 
 13 | 
 14 | class PreActBlock(nn.Module):
 15 |     '''Pre-activation version of the BasicBlock.'''
 16 |     expansion = 1
 17 | 
 18 |     def __init__(self, in_planes, planes, stride=1):
 19 |         super(PreActBlock, self).__init__()
 20 |         self.bn1 = nn.BatchNorm2d(in_planes)
 21 |         self.conv1 = nn.Conv2d(
 22 |             in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 23 |         self.bn2 = nn.BatchNorm2d(planes)
 24 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 25 |                                stride=1, padding=1, bias=False)
 26 | 
 27 |         if stride != 1 or in_planes != self.expansion*planes:
 28 |             self.shortcut = nn.Sequential(
 29 |                 nn.Conv2d(in_planes, self.expansion*planes,
 30 |                           kernel_size=1, stride=stride, bias=False)
 31 |             )
 32 | 
 33 |     def forward(self, x):
 34 |         out = F.relu(self.bn1(x))
 35 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 36 |         out = self.conv1(out)
 37 |         out = self.conv2(F.relu(self.bn2(out)))
 38 |         out += shortcut
 39 |         return out
 40 | 
 41 | 
 42 | class PreActBottleneck(nn.Module):
 43 |     '''Pre-activation version of the original Bottleneck module.'''
 44 |     expansion = 4
 45 | 
 46 |     def __init__(self, in_planes, planes, stride=1):
 47 |         super(PreActBottleneck, self).__init__()
 48 |         self.bn1 = nn.BatchNorm2d(in_planes)
 49 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 50 |         self.bn2 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
 52 |                                stride=stride, padding=1, bias=False)
 53 |         self.bn3 = nn.BatchNorm2d(planes)
 54 |         self.conv3 = nn.Conv2d(planes, self.expansion *
 55 |                                planes, kernel_size=1, bias=False)
 56 | 
 57 |         if stride != 1 or in_planes != self.expansion*planes:
 58 |             self.shortcut = nn.Sequential(
 59 |                 nn.Conv2d(in_planes, self.expansion*planes,
 60 |                           kernel_size=1, stride=stride, bias=False)
 61 |             )
 62 | 
 63 |     def forward(self, x):
 64 |         out = F.relu(self.bn1(x))
 65 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 66 |         out = self.conv1(out)
 67 |         out = self.conv2(F.relu(self.bn2(out)))
 68 |         out = self.conv3(F.relu(self.bn3(out)))
 69 |         out += shortcut
 70 |         return out
 71 | 
 72 | 
 73 | class PreActResNet(nn.Module):
 74 | 
 75 |     def __init__(self, block, num_blocks, num_classes=10):
 76 |         super(PreActResNet, self).__init__()
 77 |         self.in_planes = 64
 78 | 
 79 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
 80 |                                stride=1, padding=1, bias=False)
 81 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 82 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 83 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 84 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 85 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 86 | 
 87 |     def _make_layer(self, block, planes, num_blocks, stride):
 88 |         strides = [stride] + [1]*(num_blocks-1)
 89 |         layers = []
 90 |         for stride in strides:
 91 |             layers.append(block(self.in_planes, planes, stride))
 92 |             self.in_planes = planes * block.expansion
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def forward(self, x):
 96 |         out = self.conv1(x)
 97 |         out = self.layer1(out)
 98 |         out = self.layer2(out)
 99 |         out = self.layer3(out)
100 |         out = self.layer4(out)
101 |         out = F.avg_pool2d(out, 4)
102 |         out = out.view(out.size(0), -1)
103 |         out = self.linear(out)
104 |         return out
105 | 
106 | 
107 | def PreActResNet18(num_classes=10):
108 |     return PreActResNet(PreActBlock, [2, 2, 2, 2], num_classes=num_classes)
109 | 
110 | 
111 | def PreActResNet34():
112 |     return PreActResNet(PreActBlock, [3, 4, 6, 3])
113 | 
114 | 
115 | def PreActResNet50():
116 |     return PreActResNet(PreActBottleneck, [3, 4, 6, 3])
117 | 
118 | 
119 | def PreActResNet101():
120 |     return PreActResNet(PreActBottleneck, [3, 4, 23, 3])
121 | 
122 | 
123 | def PreActResNet152():
124 |     return PreActResNet(PreActBottleneck, [3, 8, 36, 3])
125 | 
126 | 
127 | def test():
128 |     net = PreActResNet18()
129 |     y = net(Variable(torch.randn(1, 3, 32, 32)))
130 |     print(y.size())
131 | 
132 | # test()
133 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | 
  5 | Reference:
  6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  7 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  8 | '''
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | from torch.autograd import Variable
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, in_planes, planes, stride=1):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 22 |         self.bn1 = nn.BatchNorm2d(planes)
 23 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 | 
 26 |         self.shortcut = nn.Sequential()
 27 |         if stride != 1 or in_planes != self.expansion*planes:
 28 |             self.shortcut = nn.Sequential(
 29 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 30 |                 nn.BatchNorm2d(self.expansion*planes)
 31 |             )
 32 | 
 33 |     def forward(self, x):
 34 |         out = F.relu(self.bn1(self.conv1(x)))
 35 |         out = self.bn2(self.conv2(out))
 36 |         out += self.shortcut(x)
 37 |         out = F.relu(out)
 38 |         return out
 39 | 
 40 | 
 41 | class Bottleneck(nn.Module):
 42 |     expansion = 4
 43 | 
 44 |     def __init__(self, in_planes, planes, stride=1):
 45 |         super(Bottleneck, self).__init__()
 46 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 47 |         self.bn1 = nn.BatchNorm2d(planes)
 48 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 49 |         self.bn2 = nn.BatchNorm2d(planes)
 50 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 51 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 52 | 
 53 |         self.shortcut = nn.Sequential()
 54 |         if stride != 1 or in_planes != self.expansion*planes:
 55 |             self.shortcut = nn.Sequential(
 56 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 57 |                 nn.BatchNorm2d(self.expansion*planes)
 58 |             )
 59 | 
 60 |     def forward(self, x):
 61 |         out = F.relu(self.bn1(self.conv1(x)))
 62 |         out = F.relu(self.bn2(self.conv2(out)))
 63 |         out = self.bn3(self.conv3(out))
 64 |         out += self.shortcut(x)
 65 |         out = F.relu(out)
 66 |         return out
 67 | 
 68 | 
 69 | class ResNet(nn.Module):
 70 |     def __init__(self, block, num_blocks, num_classes=10):
 71 |         super(ResNet, self).__init__()
 72 |         self.in_planes = 64
 73 | 
 74 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 75 |         self.bn1 = nn.BatchNorm2d(64)
 76 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 77 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 78 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 79 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 80 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 81 | 
 82 |     def _make_layer(self, block, planes, num_blocks, stride):
 83 |         strides = [stride] + [1]*(num_blocks-1)
 84 |         layers = []
 85 |         for stride in strides:
 86 |             layers.append(block(self.in_planes, planes, stride))
 87 |             self.in_planes = planes * block.expansion
 88 |         return nn.Sequential(*layers)
 89 | 
 90 |     def forward(self, x):
 91 |         out = F.relu(self.bn1(self.conv1(x)))
 92 |         out = self.layer1(out)
 93 |         out = self.layer2(out)
 94 |         out = self.layer3(out)
 95 |         out = self.layer4(out)
 96 |         out = F.avg_pool2d(out, 4)
 97 |         out = out.view(out.size(0), -1)
 98 |         out = self.linear(out)
 99 |         return out
100 | 
101 | 
102 | def ResNet18():
103 |     return ResNet(BasicBlock, [2,2,2,2])
104 | 
105 | def ResNet34():
106 |     return ResNet(BasicBlock, [3,4,6,3])
107 | 
108 | def ResNet50():
109 |     return ResNet(Bottleneck, [3,4,6,3])
110 | 
111 | def ResNet101():
112 |     return ResNet(Bottleneck, [3,4,23,3])
113 | 
114 | def ResNet152():
115 |     return ResNet(Bottleneck, [3,8,36,3])
116 | 
117 | 
118 | def test():
119 |     net = ResNet18()
120 |     y = net(Variable(torch.randn(1,3,32,32)))
121 |     print(y.size())
122 | 
123 | # test()
124 | 


--------------------------------------------------------------------------------
/models/resnext.py:
--------------------------------------------------------------------------------
 1 | '''ResNeXt in PyTorch.
 2 | 
 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | from torch.autograd import Variable
10 | 
11 | 
12 | class Block(nn.Module):
13 |     '''Grouped convolution block.'''
14 |     expansion = 2
15 | 
16 |     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
17 |         super(Block, self).__init__()
18 |         group_width = cardinality * bottleneck_width
19 |         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
20 |         self.bn1 = nn.BatchNorm2d(group_width)
21 |         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
22 |         self.bn2 = nn.BatchNorm2d(group_width)
23 |         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
24 |         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
25 | 
26 |         self.shortcut = nn.Sequential()
27 |         if stride != 1 or in_planes != self.expansion*group_width:
28 |             self.shortcut = nn.Sequential(
29 |                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
30 |                 nn.BatchNorm2d(self.expansion*group_width)
31 |             )
32 | 
33 |     def forward(self, x):
34 |         out = F.relu(self.bn1(self.conv1(x)))
35 |         out = F.relu(self.bn2(self.conv2(out)))
36 |         out = self.bn3(self.conv3(out))
37 |         out += self.shortcut(x)
38 |         out = F.relu(out)
39 |         return out
40 | 
41 | 
42 | class ResNeXt(nn.Module):
43 |     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
44 |         super(ResNeXt, self).__init__()
45 |         self.cardinality = cardinality
46 |         self.bottleneck_width = bottleneck_width
47 |         self.in_planes = 64
48 | 
49 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
50 |         self.bn1 = nn.BatchNorm2d(64)
51 |         self.layer1 = self._make_layer(num_blocks[0], 1)
52 |         self.layer2 = self._make_layer(num_blocks[1], 2)
53 |         self.layer3 = self._make_layer(num_blocks[2], 2)
54 |         # self.layer4 = self._make_layer(num_blocks[3], 2)
55 |         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
56 | 
57 |     def _make_layer(self, num_blocks, stride):
58 |         strides = [stride] + [1]*(num_blocks-1)
59 |         layers = []
60 |         for stride in strides:
61 |             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
62 |             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
63 |         # Increase bottleneck_width by 2 after each stage.
64 |         self.bottleneck_width *= 2
65 |         return nn.Sequential(*layers)
66 | 
67 |     def forward(self, x):
68 |         out = F.relu(self.bn1(self.conv1(x)))
69 |         out = self.layer1(out)
70 |         out = self.layer2(out)
71 |         out = self.layer3(out)
72 |         # out = self.layer4(out)
73 |         out = F.avg_pool2d(out, 8)
74 |         out = out.view(out.size(0), -1)
75 |         out = self.linear(out)
76 |         return out
77 | 
78 | 
79 | def ResNeXt29_2x64d():
80 |     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
81 | 
82 | def ResNeXt29_4x64d():
83 |     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
84 | 
85 | def ResNeXt29_8x64d():
86 |     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
87 | 
88 | def ResNeXt29_32x4d():
89 |     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
90 | 
91 | def test_resnext():
92 |     net = ResNeXt29_2x64d()
93 |     x = torch.randn(1,3,32,32)
94 |     y = net(Variable(x))
95 |     print(y.size())
96 | 
97 | # test_resnext()
98 | 


--------------------------------------------------------------------------------
/models/senet.py:
--------------------------------------------------------------------------------
  1 | '''SENet in PyTorch.
  2 | 
  3 | SENet is the winner of ImageNet-2017. The paper is not released yet.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class BasicBlock(nn.Module):
 13 |     def __init__(self, in_planes, planes, stride=1):
 14 |         super(BasicBlock, self).__init__()
 15 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 16 |         self.bn1 = nn.BatchNorm2d(planes)
 17 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 18 |         self.bn2 = nn.BatchNorm2d(planes)
 19 | 
 20 |         self.shortcut = nn.Sequential()
 21 |         if stride != 1 or in_planes != planes:
 22 |             self.shortcut = nn.Sequential(
 23 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
 24 |                 nn.BatchNorm2d(planes)
 25 |             )
 26 | 
 27 |         # SE layers
 28 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
 29 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = self.bn2(self.conv2(out))
 34 | 
 35 |         # Squeeze
 36 |         w = F.avg_pool2d(out, out.size(2))
 37 |         w = F.relu(self.fc1(w))
 38 |         w = F.sigmoid(self.fc2(w))
 39 |         # Excitation
 40 |         out = out * w  # New broadcasting feature from v0.2!
 41 | 
 42 |         out += self.shortcut(x)
 43 |         out = F.relu(out)
 44 |         return out
 45 | 
 46 | 
 47 | class PreActBlock(nn.Module):
 48 |     def __init__(self, in_planes, planes, stride=1):
 49 |         super(PreActBlock, self).__init__()
 50 |         self.bn1 = nn.BatchNorm2d(in_planes)
 51 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 52 |         self.bn2 = nn.BatchNorm2d(planes)
 53 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 54 | 
 55 |         if stride != 1 or in_planes != planes:
 56 |             self.shortcut = nn.Sequential(
 57 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
 58 |             )
 59 | 
 60 |         # SE layers
 61 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
 62 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 63 | 
 64 |     def forward(self, x):
 65 |         out = F.relu(self.bn1(x))
 66 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 67 |         out = self.conv1(out)
 68 |         out = self.conv2(F.relu(self.bn2(out)))
 69 | 
 70 |         # Squeeze
 71 |         w = F.avg_pool2d(out, out.size(2))
 72 |         w = F.relu(self.fc1(w))
 73 |         w = F.sigmoid(self.fc2(w))
 74 |         # Excitation
 75 |         out = out * w
 76 | 
 77 |         out += shortcut
 78 |         return out
 79 | 
 80 | 
 81 | class SENet(nn.Module):
 82 |     def __init__(self, block, num_blocks, num_classes=10):
 83 |         super(SENet, self).__init__()
 84 |         self.in_planes = 64
 85 | 
 86 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 87 |         self.bn1 = nn.BatchNorm2d(64)
 88 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 89 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 90 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 91 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 92 |         self.linear = nn.Linear(512, num_classes)
 93 | 
 94 |     def _make_layer(self, block, planes, num_blocks, stride):
 95 |         strides = [stride] + [1]*(num_blocks-1)
 96 |         layers = []
 97 |         for stride in strides:
 98 |             layers.append(block(self.in_planes, planes, stride))
 99 |             self.in_planes = planes
100 |         return nn.Sequential(*layers)
101 | 
102 |     def forward(self, x):
103 |         out = F.relu(self.bn1(self.conv1(x)))
104 |         out = self.layer1(out)
105 |         out = self.layer2(out)
106 |         out = self.layer3(out)
107 |         out = self.layer4(out)
108 |         out = F.avg_pool2d(out, 4)
109 |         out = out.view(out.size(0), -1)
110 |         out = self.linear(out)
111 |         return out
112 | 
113 | 
114 | def SENet18():
115 |     return SENet(PreActBlock, [2,2,2,2])
116 | 
117 | 
118 | def test():
119 |     net = SENet18()
120 |     y = net(Variable(torch.randn(1,3,32,32)))
121 |     print(y.size())
122 | 
123 | # test()
124 | 


--------------------------------------------------------------------------------
/models/shufflenet.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNet in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | 
 12 | class ShuffleBlock(nn.Module):
 13 |     def __init__(self, groups):
 14 |         super(ShuffleBlock, self).__init__()
 15 |         self.groups = groups
 16 | 
 17 |     def forward(self, x):
 18 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 19 |         N,C,H,W = x.size()
 20 |         g = self.groups
 21 |         return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
 22 | 
 23 | 
 24 | class Bottleneck(nn.Module):
 25 |     def __init__(self, in_planes, out_planes, stride, groups):
 26 |         super(Bottleneck, self).__init__()
 27 |         self.stride = stride
 28 | 
 29 |         mid_planes = out_planes/4
 30 |         g = 1 if in_planes==24 else groups
 31 |         self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
 32 |         self.bn1 = nn.BatchNorm2d(mid_planes)
 33 |         self.shuffle1 = ShuffleBlock(groups=g)
 34 |         self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
 35 |         self.bn2 = nn.BatchNorm2d(mid_planes)
 36 |         self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
 37 |         self.bn3 = nn.BatchNorm2d(out_planes)
 38 | 
 39 |         self.shortcut = nn.Sequential()
 40 |         if stride == 2:
 41 |             self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
 42 | 
 43 |     def forward(self, x):
 44 |         out = F.relu(self.bn1(self.conv1(x)))
 45 |         out = self.shuffle1(out)
 46 |         out = F.relu(self.bn2(self.conv2(out)))
 47 |         out = self.bn3(self.conv3(out))
 48 |         res = self.shortcut(x)
 49 |         out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
 50 |         return out
 51 | 
 52 | 
 53 | class ShuffleNet(nn.Module):
 54 |     def __init__(self, cfg):
 55 |         super(ShuffleNet, self).__init__()
 56 |         out_planes = cfg['out_planes']
 57 |         num_blocks = cfg['num_blocks']
 58 |         groups = cfg['groups']
 59 | 
 60 |         self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
 61 |         self.bn1 = nn.BatchNorm2d(24)
 62 |         self.in_planes = 24
 63 |         self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
 64 |         self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
 65 |         self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
 66 |         self.linear = nn.Linear(out_planes[2], 10)
 67 | 
 68 |     def _make_layer(self, out_planes, num_blocks, groups):
 69 |         layers = []
 70 |         for i in range(num_blocks):
 71 |             stride = 2 if i == 0 else 1
 72 |             cat_planes = self.in_planes if i == 0 else 0
 73 |             layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
 74 |             self.in_planes = out_planes
 75 |         return nn.Sequential(*layers)
 76 | 
 77 |     def forward(self, x):
 78 |         out = F.relu(self.bn1(self.conv1(x)))
 79 |         out = self.layer1(out)
 80 |         out = self.layer2(out)
 81 |         out = self.layer3(out)
 82 |         out = F.avg_pool2d(out, 4)
 83 |         out = out.view(out.size(0), -1)
 84 |         out = self.linear(out)
 85 |         return out
 86 | 
 87 | 
 88 | def ShuffleNetG2():
 89 |     cfg = {
 90 |         'out_planes': [200,400,800],
 91 |         'num_blocks': [4,8,4],
 92 |         'groups': 2
 93 |     }
 94 |     return ShuffleNet(cfg)
 95 | 
 96 | def ShuffleNetG3():
 97 |     cfg = {
 98 |         'out_planes': [240,480,960],
 99 |         'num_blocks': [4,8,4],
100 |         'groups': 3
101 |     }
102 |     return ShuffleNet(cfg)
103 | 
104 | 
105 | def test():
106 |     net = ShuffleNetG2()
107 |     x = Variable(torch.randn(1,3,32,32))
108 |     y = net(x)
109 |     print(y)
110 | 
111 | # test()
112 | 


--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | cfg = {
 8 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 9 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
10 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
11 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
12 | }
13 | 
14 | 
15 | class VGG(nn.Module):
16 |     def __init__(self, vgg_name):
17 |         super(VGG, self).__init__()
18 |         self.features = self._make_layers(cfg[vgg_name])
19 |         self.classifier = nn.Linear(512, 10)
20 | 
21 |     def forward(self, x):
22 |         out = self.features(x)
23 |         out = out.view(out.size(0), -1)
24 |         out = self.classifier(out)
25 |         return out
26 | 
27 |     def _make_layers(self, cfg):
28 |         layers = []
29 |         in_channels = 3
30 |         for x in cfg:
31 |             if x == 'M':
32 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
33 |             else:
34 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
35 |                            nn.BatchNorm2d(x),
36 |                            nn.ReLU(inplace=True)]
37 |                 in_channels = x
38 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
39 |         return nn.Sequential(*layers)
40 | 
41 | # net = VGG('VGG11')
42 | # x = torch.randn(2,3,32,32)
43 | # print(net(Variable(x)).size())
44 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | python main_cifar10.py --mixup --exp='cifar10_mixup'
2 | python main_cifar10.py --exp='cifar10_nomixup'
3 | python main_cifar100.py --mixup --exp='cifar100_mixup'
4 | python main_cifar100.py --exp='cifar100_nomixup'


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | '''Some helper functions for PyTorch, including:
  2 |     - get_mean_and_std: calculate the mean and std value of dataset.
  3 |     - msr_init: net parameter initialization.
  4 |     - progress_bar: progress bar mimic xlua.progress.
  5 | '''
  6 | import os
  7 | import sys
  8 | import time
  9 | import math
 10 | 
 11 | import torch.nn as nn
 12 | import torch.nn.init as init
 13 | 
 14 | 
 15 | def get_mean_and_std(dataset):
 16 |     '''Compute the mean and std value of dataset.'''
 17 |     dataloader = torch.utils.data.DataLoader(
 18 |         dataset, batch_size=1, shuffle=True, num_workers=2)
 19 |     mean = torch.zeros(3)
 20 |     std = torch.zeros(3)
 21 |     print('==> Computing mean and std..')
 22 |     for inputs, targets in dataloader:
 23 |         for i in range(3):
 24 |             mean[i] += inputs[:, i, :, :].mean()
 25 |             std[i] += inputs[:, i, :, :].std()
 26 |     mean.div_(len(dataset))
 27 |     std.div_(len(dataset))
 28 |     return mean, std
 29 | 
 30 | 
 31 | def init_params(net):
 32 |     '''Init layer parameters.'''
 33 |     for m in net.modules():
 34 |         if isinstance(m, nn.Conv2d):
 35 |             init.kaiming_normal(m.weight, mode='fan_out')
 36 |             if m.bias:
 37 |                 init.constant(m.bias, 0)
 38 |         elif isinstance(m, nn.BatchNorm2d):
 39 |             init.constant(m.weight, 1)
 40 |             init.constant(m.bias, 0)
 41 |         elif isinstance(m, nn.Linear):
 42 |             init.normal(m.weight, std=1e-3)
 43 |             if m.bias:
 44 |                 init.constant(m.bias, 0)
 45 | 
 46 | 
 47 | #_, term_width = os.popen('stty size', 'r').read().split()
 48 | # term_width = int(term_width)
 49 | term_width = 192
 50 | 
 51 | TOTAL_BAR_LENGTH = 65.
 52 | last_time = time.time()
 53 | begin_time = last_time
 54 | 
 55 | 
 56 | def progress_bar(current, total, msg=None):
 57 |     global last_time, begin_time
 58 |     if current == 0:
 59 |         begin_time = time.time()  # Reset for new bar.
 60 | 
 61 |     cur_len = int(TOTAL_BAR_LENGTH*current/total)
 62 |     rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
 63 | 
 64 |     sys.stdout.write(' [')
 65 |     for i in range(cur_len):
 66 |         sys.stdout.write('=')
 67 |     sys.stdout.write('>')
 68 |     for i in range(rest_len):
 69 |         sys.stdout.write('.')
 70 |     sys.stdout.write(']')
 71 | 
 72 |     cur_time = time.time()
 73 |     step_time = cur_time - last_time
 74 |     last_time = cur_time
 75 |     tot_time = cur_time - begin_time
 76 | 
 77 |     L = []
 78 |     L.append('  Step: %s' % format_time(step_time))
 79 |     L.append(' | Tot: %s' % format_time(tot_time))
 80 |     if msg:
 81 |         L.append(' | ' + msg)
 82 | 
 83 |     msg = ''.join(L)
 84 |     sys.stdout.write(msg)
 85 |     for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
 86 |         sys.stdout.write(' ')
 87 | 
 88 |     # Go back to the center of the bar.
 89 |     for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
 90 |         sys.stdout.write('\b')
 91 |     sys.stdout.write(' %d/%d ' % (current+1, total))
 92 | 
 93 |     if current < total-1:
 94 |         sys.stdout.write('\r')
 95 |     else:
 96 |         sys.stdout.write('\n')
 97 |     sys.stdout.flush()
 98 | 
 99 | 
100 | def format_time(seconds):
101 |     days = int(seconds / 3600/24)
102 |     seconds = seconds - days*3600*24
103 |     hours = int(seconds / 3600)
104 |     seconds = seconds - hours*3600
105 |     minutes = int(seconds / 60)
106 |     seconds = seconds - minutes*60
107 |     secondsf = int(seconds)
108 |     seconds = seconds - secondsf
109 |     millis = int(seconds*1000)
110 | 
111 |     f = ''
112 |     i = 1
113 |     if days > 0:
114 |         f += str(days) + 'D'
115 |         i += 1
116 |     if hours > 0 and i <= 2:
117 |         f += str(hours) + 'h'
118 |         i += 1
119 |     if minutes > 0 and i <= 2:
120 |         f += str(minutes) + 'm'
121 |         i += 1
122 |     if secondsf > 0 and i <= 2:
123 |         f += str(secondsf) + 's'
124 |         i += 1
125 |     if millis > 0 and i <= 2:
126 |         f += str(millis) + 'ms'
127 |         i += 1
128 |     if f == '':
129 |         f = '0ms'
130 |     return f
131 | 


--------------------------------------------------------------------------------