├── LICENSE ├── README.md ├── layers.py ├── main.py └── utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Marco Birck 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # adaptative-dropout-pytorch 2 | Pytorch implementation of Adaptive Dropout a.k.a Standout. 3 | 4 | * Unfortunately I wasn't able to achieve the results reported in the paper, Regular dropout always get aproximately 98.70, while the standout version I was able to achieve only 98.51 after tweaking a lot the knoobs, i didn't make grid search was all empirical and cloning the paper parameters, furthermore I did not make any unsupervized pretraining, maybe this tecnique is very sensitive to this step! Hope someone can achieve the reporteded results from this code. 5 | 6 | # References: 7 | 8 | -Papers: 9 | https://papers.nips.cc/paper/5032-adaptive-dropout-for-training-deep-neural-networks.pdf 10 | 11 | -Code: 12 | 13 | https://github.com/gngdb/adaptive-standout 14 | 15 | https://github.com/pytorch/examples 16 | -------------------------------------------------------------------------------- /layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch import nn 4 | 5 | class Standout(nn.Module): 6 | 7 | def __init__(self, last_layer, alpha, beta): 8 | print("<<<<<<<<< THIS IS DEFINETLY A STANDOUT TRAINING >>>>>>>>>>>>>>>") 9 | super(Standout, self).__init__() 10 | self.pi = last_layer.weight 11 | self.alpha = alpha 12 | self.beta = beta 13 | self.nonlinearity = nn.Sigmoid() 14 | 15 | 16 | def forward(self, previous, current, p=0.5, deterministic=False): 17 | # Function as in page 3 of paper: Variational Dropout 18 | self.p = self.nonlinearity(self.alpha * previous.matmul(self.pi.t()) + self.beta) 19 | self.mask = sample_mask(self.p) 20 | 21 | # Deterministic version as in the paper 22 | if(deterministic or torch.mean(self.p).data.cpu().numpy()==0): 23 | return self.p * current 24 | else: 25 | return self.mask * current 26 | 27 | def sample_mask(p): 28 | """Given a matrix of probabilities, this will sample a mask in PyTorch.""" 29 | 30 | if torch.cuda.is_available(): 31 | uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1).cuda()) 32 | else: 33 | uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1)) 34 | mask = uniform < p 35 | 36 | if torch.cuda.is_available(): 37 | mask = mask.type(torch.cuda.FloatTensor) 38 | else: 39 | mask = mask.type(torch.FloatTensor) 40 | 41 | return mask 42 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | from torchvision import datasets, transforms 8 | from torch.autograd import Variable 9 | from layers import Standout 10 | from utils import saveLog 11 | 12 | # Training settings 13 | parser = argparse.ArgumentParser(description='PyTorch MNIST Example') 14 | parser.add_argument('--batch-size', type=int, default=100, metavar='N', 15 | help='input batch size for training (default: 64)') 16 | parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', 17 | help='input batch size for testing (default: 1000)') 18 | parser.add_argument('--epochs', type=int, default=1000, metavar='N', 19 | help='number of epochs to train (default: 10)') 20 | parser.add_argument('--lr', type=float, default=0.001, metavar='LR', 21 | help='learning rate (default: 0.01)') 22 | parser.add_argument('--momentum', type=float, default=0.9, metavar='M', 23 | help='SGD momentum (default: 0.99)') 24 | parser.add_argument('--no-cuda', action='store_true', default=False, 25 | help='disables CUDA training') 26 | parser.add_argument('--seed', type=int, default=1, metavar='S', 27 | help='random seed (default: 1)') 28 | parser.add_argument('--log-interval', type=int, default=10000, metavar='N', 29 | help='how many batches to wait before logging training status') 30 | parser.add_argument('--standout', action='store_true', default=False, 31 | help='Activates standout training!') 32 | 33 | args = parser.parse_args() 34 | args.cuda = not args.no_cuda and torch.cuda.is_available() 35 | 36 | torch.manual_seed(args.seed) 37 | if torch.cuda.is_available(): 38 | torch.cuda.manual_seed(args.seed) 39 | 40 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 41 | train_loader = torch.utils.data.DataLoader( 42 | datasets.MNIST('../data', train=True, download=True, 43 | transform=transforms.Compose([ 44 | transforms.ToTensor(), 45 | transforms.Normalize((0.1307,), (0.3081,)) 46 | ])), 47 | batch_size=args.batch_size, shuffle=True, **kwargs) 48 | test_loader = torch.utils.data.DataLoader( 49 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 50 | transforms.ToTensor(), 51 | transforms.Normalize((0.1307,), (0.3081,)) 52 | ])), 53 | batch_size=args.test_batch_size, shuffle=True, **kwargs) 54 | 55 | 56 | class Net(nn.Module): 57 | def __init__(self, standout): 58 | super(Net, self).__init__() 59 | #### SELF ARGS #### 60 | self.standout = standout 61 | 62 | #### MODEL PARAMS #### 63 | self.fc1 = nn.Linear(784, 1000) 64 | self.fc1_drop = Standout(self.fc1, 0.5, 1) if standout else nn.Dropout(0.5) 65 | self.fc2 = nn.Linear(1000, 1000) 66 | self.fc2_drop = Standout(self.fc2, 0.5, 1) if standout else nn.Dropout(0.5) 67 | self.fc_final = nn.Linear(1000, 10) 68 | 69 | def forward(self, x): 70 | # Flatten input 71 | x = x.view(-1, 784) 72 | # Keep it for standout 73 | 74 | #FIRST FC 75 | previous = x 76 | x_relu = F.relu(self.fc1(x)) 77 | # Select between dropouts styles 78 | x = self.fc1_drop(previous, x_relu) if self.standout else self.fc1_drop(x_relu) 79 | 80 | #SECOND FC 81 | previous = x 82 | x_relu = F.relu(self.fc2(x)) 83 | # Select between dropouts styles 84 | x = self.fc2_drop(previous, x_relu) if self.standout else self.fc2_drop(x_relu) 85 | 86 | x = self.fc_final(x) 87 | 88 | return F.log_softmax(x, dim=1) 89 | 90 | 91 | def train(model, epoch): 92 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 93 | model.train() 94 | for batch_idx, (data, target) in enumerate(train_loader): 95 | if torch.cuda.is_available(): 96 | data, target = data.cuda(), target.cuda() 97 | data, target = Variable(data), Variable(target) 98 | optimizer.zero_grad() 99 | output = model(data) 100 | loss = F.nll_loss(output, target) 101 | loss.backward() 102 | optimizer.step() 103 | if batch_idx % args.log_interval == 0: 104 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 105 | epoch, batch_idx * len(data), len(train_loader.dataset), 106 | 100. * batch_idx / len(train_loader), loss.data[0])) 107 | 108 | def test(model, standout, epoch): 109 | optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) 110 | model.eval() 111 | test_loss = 0 112 | correct = 0 113 | for data, target in test_loader: 114 | if torch.cuda.is_available(): 115 | data, target = data.cuda(), target.cuda() 116 | data, target = Variable(data, volatile=True), Variable(target) 117 | output = model(data) 118 | test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss 119 | pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability 120 | correct += pred.eq(target.data.view_as(pred)).cpu().sum() 121 | 122 | test_loss /= len(test_loader.dataset) 123 | test_acc = 100. * correct / len(test_loader.dataset) 124 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.5f}%)\n'.format( 125 | test_loss, correct, len(test_loader.dataset), 126 | test_acc)) 127 | if standout == True: 128 | drop_way = "Standout" 129 | else: 130 | drop_way = "Dropout" 131 | saveLog(test_loss, test_acc, correct, drop_way, args, epoch) 132 | 133 | 134 | def run(standout=False): 135 | 136 | model = Net(standout) 137 | if torch.cuda.is_available(): 138 | model.cuda() 139 | 140 | test(model, standout, 0) 141 | for epoch in range(1, args.epochs + 1): 142 | train(model, epoch) 143 | test(model, standout, epoch) 144 | 145 | def main(): 146 | print("RUNNING STANDOUT ONE") 147 | run(standout=True) 148 | 149 | print("RUNNING DROPOUT ONE") 150 | run() 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os, csv 2 | 3 | 4 | def saveLog(test_loss, test_acc, correct, standout, args, epoch): 5 | path = './log/' 6 | #path += "_".join([args.arc, str(args.epochs), args.filter_reg, str(args.phi), 'seed', str(args.seed), 'depth', str(args.depth), args.intra_extra]) 7 | path+= standout+'_MNIST_'+str(args.seed) 8 | path = path+'.csv' 9 | if epoch == 0 and os.path.isfile(path): os.remove(path) 10 | assert not(os.path.isfile(path) == True and epoch ==0), "That can't be right. This file should not be here!!!!" 11 | fields = ['epoch', epoch, 'test_loss', test_loss, 'test_acc', test_acc, 'correct', correct] 12 | with open(path, 'a+') as f: 13 | writer = csv.writer(f) 14 | writer.writerow(fields) 15 | --------------------------------------------------------------------------------