├── LICENSE
├── README.md
├── layers.py
├── main.py
└── utils.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Marco Birck
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # adaptative-dropout-pytorch
 2 | Pytorch implementation of Adaptive Dropout a.k.a Standout.
 3 | 
 4 | * Unfortunately I wasn't able to achieve the results reported in the paper, Regular dropout always get aproximately 98.70, while the standout version I was able to achieve only 98.51 after tweaking a lot the knoobs, i didn't make grid search was all empirical and cloning the paper parameters, furthermore I did not make any unsupervized pretraining, maybe this tecnique is very sensitive to this step! Hope someone can achieve the reporteded results from this code.
 5 | 
 6 | # References:
 7 | 
 8 |   -Papers:
 9 |     https://papers.nips.cc/paper/5032-adaptive-dropout-for-training-deep-neural-networks.pdf
10 | 
11 |   -Code:
12 | 
13 |     https://github.com/gngdb/adaptive-standout
14 | 
15 |     https://github.com/pytorch/examples
16 | 


--------------------------------------------------------------------------------
/layers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch import nn
 4 | 
 5 | class Standout(nn.Module):
 6 | 
 7 |     def __init__(self, last_layer, alpha, beta):
 8 |         print("<<<<<<<<< THIS IS DEFINETLY A STANDOUT TRAINING >>>>>>>>>>>>>>>")
 9 |         super(Standout, self).__init__()
10 |         self.pi = last_layer.weight
11 |         self.alpha = alpha
12 |         self.beta = beta
13 |         self.nonlinearity = nn.Sigmoid()
14 | 
15 | 
16 |     def forward(self, previous, current, p=0.5, deterministic=False):
17 |         # Function as in page 3 of paper: Variational Dropout
18 |         self.p = self.nonlinearity(self.alpha * previous.matmul(self.pi.t()) + self.beta)
19 |         self.mask = sample_mask(self.p)
20 | 
21 |         # Deterministic version as in the paper
22 |         if(deterministic or torch.mean(self.p).data.cpu().numpy()==0):
23 |             return self.p * current
24 |         else:
25 |             return self.mask * current
26 | 
27 | def sample_mask(p):
28 |     """Given a matrix of probabilities, this will sample a mask in PyTorch."""
29 | 
30 |     if torch.cuda.is_available():
31 |         uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1).cuda())
32 |     else:
33 |         uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1))
34 |     mask = uniform < p
35 | 
36 |     if torch.cuda.is_available():
37 |         mask = mask.type(torch.cuda.FloatTensor)
38 |     else:
39 |         mask = mask.type(torch.FloatTensor)
40 | 
41 |     return mask
42 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torch.optim as optim
  7 | from torchvision import datasets, transforms
  8 | from torch.autograd import Variable
  9 | from layers import Standout
 10 | from utils import saveLog
 11 | 
 12 | # Training settings
 13 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 14 | parser.add_argument('--batch-size', type=int, default=100, metavar='N',
 15 |                     help='input batch size for training (default: 64)')
 16 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
 17 |                     help='input batch size for testing (default: 1000)')
 18 | parser.add_argument('--epochs', type=int, default=1000, metavar='N',
 19 |                     help='number of epochs to train (default: 10)')
 20 | parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
 21 |                     help='learning rate (default: 0.01)')
 22 | parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
 23 |                     help='SGD momentum (default: 0.99)')
 24 | parser.add_argument('--no-cuda', action='store_true', default=False,
 25 |                     help='disables CUDA training')
 26 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 27 |                     help='random seed (default: 1)')
 28 | parser.add_argument('--log-interval', type=int, default=10000, metavar='N',
 29 |                     help='how many batches to wait before logging training status')
 30 | parser.add_argument('--standout', action='store_true', default=False,
 31 |                     help='Activates standout training!')
 32 | 
 33 | args = parser.parse_args()
 34 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 35 | 
 36 | torch.manual_seed(args.seed)
 37 | if torch.cuda.is_available():
 38 |     torch.cuda.manual_seed(args.seed)
 39 | 
 40 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 41 | train_loader = torch.utils.data.DataLoader(
 42 |     datasets.MNIST('../data', train=True, download=True,
 43 |                    transform=transforms.Compose([
 44 |                        transforms.ToTensor(),
 45 |                        transforms.Normalize((0.1307,), (0.3081,))
 46 |                    ])),
 47 |     batch_size=args.batch_size, shuffle=True, **kwargs)
 48 | test_loader = torch.utils.data.DataLoader(
 49 |     datasets.MNIST('../data', train=False, transform=transforms.Compose([
 50 |                        transforms.ToTensor(),
 51 |                        transforms.Normalize((0.1307,), (0.3081,))
 52 |                    ])),
 53 |     batch_size=args.test_batch_size, shuffle=True, **kwargs)
 54 | 
 55 | 
 56 | class Net(nn.Module):
 57 |     def __init__(self, standout):
 58 |         super(Net, self).__init__()
 59 |         #### SELF ARGS ####
 60 |         self.standout = standout
 61 | 
 62 |         #### MODEL PARAMS ####
 63 |         self.fc1 = nn.Linear(784, 1000)
 64 |         self.fc1_drop = Standout(self.fc1, 0.5, 1) if standout else nn.Dropout(0.5)
 65 |         self.fc2 = nn.Linear(1000, 1000)
 66 |         self.fc2_drop = Standout(self.fc2, 0.5, 1) if standout else nn.Dropout(0.5)
 67 |         self.fc_final = nn.Linear(1000, 10)
 68 | 
 69 |     def forward(self, x):
 70 |         # Flatten input
 71 |         x = x.view(-1, 784)
 72 |         # Keep it for standout
 73 | 
 74 |         #FIRST FC
 75 |         previous = x
 76 |         x_relu = F.relu(self.fc1(x))
 77 |         # Select between dropouts styles
 78 |         x = self.fc1_drop(previous, x_relu) if self.standout else self.fc1_drop(x_relu)
 79 | 
 80 |         #SECOND FC
 81 |         previous = x
 82 |         x_relu = F.relu(self.fc2(x))
 83 |         # Select between dropouts styles
 84 |         x = self.fc2_drop(previous, x_relu) if self.standout else self.fc2_drop(x_relu)
 85 | 
 86 |         x = self.fc_final(x)
 87 | 
 88 |         return F.log_softmax(x, dim=1)
 89 | 
 90 | 
 91 | def train(model, epoch):
 92 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
 93 |     model.train()
 94 |     for batch_idx, (data, target) in enumerate(train_loader):
 95 |         if torch.cuda.is_available():
 96 |             data, target = data.cuda(), target.cuda()
 97 |         data, target = Variable(data), Variable(target)
 98 |         optimizer.zero_grad()
 99 |         output = model(data)
100 |         loss = F.nll_loss(output, target)
101 |         loss.backward()
102 |         optimizer.step()
103 |         if batch_idx % args.log_interval == 0:
104 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
105 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
106 |                 100. * batch_idx / len(train_loader), loss.data[0]))
107 | 
108 | def test(model, standout, epoch):
109 |     optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
110 |     model.eval()
111 |     test_loss = 0
112 |     correct = 0
113 |     for data, target in test_loader:
114 |         if torch.cuda.is_available():
115 |             data, target = data.cuda(), target.cuda()
116 |         data, target = Variable(data, volatile=True), Variable(target)
117 |         output = model(data)
118 |         test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
119 |         pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
120 |         correct += pred.eq(target.data.view_as(pred)).cpu().sum()
121 | 
122 |     test_loss /= len(test_loader.dataset)
123 |     test_acc = 100. * correct / len(test_loader.dataset)
124 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.5f}%)\n'.format(
125 |         test_loss, correct, len(test_loader.dataset),
126 |         test_acc))
127 |     if standout == True:
128 |         drop_way = "Standout"
129 |     else:
130 |         drop_way = "Dropout"
131 |     saveLog(test_loss, test_acc, correct, drop_way, args, epoch)
132 | 
133 | 
134 | def run(standout=False):
135 | 
136 |     model = Net(standout)
137 |     if torch.cuda.is_available():
138 |         model.cuda()
139 | 
140 |     test(model, standout, 0)
141 |     for epoch in range(1, args.epochs + 1):
142 |         train(model, epoch)
143 |         test(model, standout, epoch)
144 | 
145 | def main():
146 |     print("RUNNING STANDOUT ONE")
147 |     run(standout=True)
148 | 
149 |     print("RUNNING DROPOUT ONE")
150 |     run()
151 | 
152 | if __name__ == "__main__":
153 |     main()
154 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import os, csv
 2 | 
 3 | 
 4 | def saveLog(test_loss, test_acc, correct, standout, args, epoch):
 5 |     path = './log/'
 6 |     #path += "_".join([args.arc, str(args.epochs), args.filter_reg, str(args.phi), 'seed', str(args.seed), 'depth', str(args.depth), args.intra_extra])
 7 |     path+= standout+'_MNIST_'+str(args.seed)
 8 |     path = path+'.csv'
 9 |     if epoch == 0 and os.path.isfile(path): os.remove(path)
10 |     assert not(os.path.isfile(path) == True and epoch ==0), "That can't be right. This file should not be here!!!!"
11 |     fields = ['epoch', epoch, 'test_loss', test_loss, 'test_acc', test_acc, 'correct', correct]
12 |     with open(path, 'a+') as f:
13 |         writer = csv.writer(f)
14 |         writer.writerow(fields)
15 | 


--------------------------------------------------------------------------------