├── advrush
    ├── hessianflow
    │   ├── optimizer
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── absa.cpython-36.pyc
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── baseline.cpython-36.pyc
    │   │   │   ├── optm_utils.cpython-36.pyc
    │   │   │   └── progressbar.cpython-36.pyc
    │   │   ├── baseline.py
    │   │   ├── progressbar.py
    │   │   ├── optm_utils.py
    │   │   └── absa.py
    │   ├── __pycache__
    │   │   ├── eigen.cpython-36.pyc
    │   │   ├── utils.cpython-36.pyc
    │   │   └── __init__.cpython-36.pyc
    │   ├── __init__.py
    │   ├── utils.py
    │   └── eigen.py
    ├── visualize.py
    ├── genotypes.py
    ├── operations.py
    ├── architect.py
    ├── trades.py
    ├── utils.py
    ├── model_search.py
    ├── model.py
    ├── adv_train.py
    ├── train_search.py
    └── regularizer.py
├── README.md
├── eval
    ├── genotypes.py
    ├── operations.py
    ├── utils.py
    ├── model.py
    └── pgd_attack.py
└── LICENSE


/advrush/hessianflow/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .baseline import baseline
2 | from .absa import absa
3 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/eigen.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/eigen.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Hessian tool for neural networks based on pytorch 0.4.1
3 | """
4 | 
5 | name = 'Hessian Flow'
6 | 
7 | from .eigen import *
8 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/absa.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/absa.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/baseline.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/baseline.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/optm_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/optm_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/progressbar.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/progressbar.cpython-36.pyc


--------------------------------------------------------------------------------
/advrush/visualize.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import genotypes
 3 | from graphviz import Digraph
 4 | 
 5 | 
 6 | def plot(genotype, filename):
 7 |   g = Digraph(
 8 |       format='pdf',
 9 |       edge_attr=dict(fontsize='20', fontname="times"),
10 |       node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
11 |       engine='dot')
12 |   g.body.extend(['rankdir=LR'])
13 | 
14 |   g.node("c_{k-2}", fillcolor='darkseagreen2')
15 |   g.node("c_{k-1}", fillcolor='darkseagreen2')
16 |   assert len(genotype) % 2 == 0
17 |   steps = len(genotype) // 2
18 | 
19 |   for i in range(steps):
20 |     g.node(str(i), fillcolor='lightblue')
21 | 
22 |   for i in range(steps):
23 |     for k in [2*i, 2*i + 1]:
24 |       op, j = genotype[k]
25 |       if j == 0:
26 |         u = "c_{k-2}"
27 |       elif j == 1:
28 |         u = "c_{k-1}"
29 |       else:
30 |         u = str(j-2)
31 |       v = str(i)
32 |       g.edge(u, v, label=op, fillcolor="gray")
33 | 
34 |   g.node("c_{k}", fillcolor='palegoldenrod')
35 |   for i in range(steps):
36 |     g.edge(str(i), "c_{k}", fillcolor="gray")
37 | 
38 |   g.render(filename, view=True)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |   if len(sys.argv) != 2:
43 |     print("usage:\n python {} ARCH_NAME".format(sys.argv[0]))
44 |     sys.exit(1)
45 | 
46 |   genotype_name = sys.argv[1]
47 |   try:
48 |     genotype = eval('genotypes.{}'.format(genotype_name))
49 |   except AttributeError:
50 |     print("{} is not specified in genotypes.py".format(genotype_name)) 
51 |     sys.exit(1)
52 | 
53 |   plot(genotype.normal, "advrush_normal")
54 |   plot(genotype.reduce, "advrush_reduction")
55 | 
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AdvRush
 2 | Official Code for [AdvRush: Searching for Adversarially Robust Neural Architectures](https://openaccess.thecvf.com/content/ICCV2021/html/Mok_AdvRush_Searching_for_Adversarially_Robust_Neural_Architectures_ICCV_2021_paper.html) (ICCV '21)
 3 | 
 4 | ## Environmental Set-up
 5 | ```
 6 | Python == 3.6.12, PyTorch == 1.2.0, torchvision == 0.4.0
 7 | ```
 8 | 
 9 | ## AdvRush Search Process
10 | ```
11 | cd advrush && python train_search.py --batch_size 32 --gpu 0 --epochs 60 --a_gamma 0.01 --a_warmup_epochs 50 --w_warmup_epochs 60 --loss_hessian loss_cure
12 | ```
13 | 
14 | ## Adversarial Training
15 | ```
16 | cd advrush && python adv_train.py --batch_size 64 --gpu 0 --epochs 200 --adv_loss pgd --arch ADVRUSH
17 | ```
18 | 
19 | ## Evaluation under PGD Attack
20 | Prior to the evaluation process, add all necessary checkpoint files (preferably in the form of .pth.tar) to the /eval/checkpoints folder.
21 | To conduct white-box attacks, 
22 | ```
23 | cd eval &&
24 | python pgd_attack.py --white-box-attack True --test-batch-size 10 --arch [arch_name] --checkpoint [./checkpoints/file_name.pth.tar] --data_type [cifar10/svhn]
25 | ```
26 | 
27 | To conduct black-box attacks, 
28 | ```
29 | cd eval &&
30 | python pgd_attack.py --test-batch-size 10 --target_arch [target_arch] --target_checkpoint [./checkpoints/target_file.pth.tar] --source_arch [source_arch] --source_checkpoint [./checkpoints/source_file.pth.tar] --data_type cifar10
31 | ```
32 | 
33 | ## References
34 | 
35 | DARTS: Differentiable Architecture Search [ICLR '19] [code](https://github.com/quark0/darts) [paper](https://arxiv.org/abs/1806.09055)
36 | 
37 | Robustness via Curvature Regularization, and Vice Versa [CVPR '19] [code](https://github.com/F-Salehi/CURE_robustness) [paper](https://openaccess.thecvf.com/content_CVPR_2019/papers/Moosavi-Dezfooli_Robustness_via_Curvature_Regularization_and_Vice_Versa_CVPR_2019_paper.pdf)
38 | 
39 | Tradeoff-inspired Adversarial Defense via Surrogate-loss Minimization [ICML '19] [code](https://github.com/yaodongyu/TRADES) [paper](https://arxiv.org/pdf/1901.08573.pdf)
40 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/utils.py:
--------------------------------------------------------------------------------
 1 | #*
 2 | # @file Different utility functions 
 3 | # Copyright (c) Zhewei Yao, Amir Gholami
 4 | # All rights reserved.
 5 | # This file is part of HessianFlow library.
 6 | #
 7 | # HessianFlow is free software: you can redistribute it and/or modify
 8 | # it under the terms of the GNU General Public License as published by
 9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # HessianFlow is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with HessianFlow.  If not, see <http://www.gnu.org/licenses/>.
19 | #*
20 | 
21 | import torch
22 | import math
23 | from torch.autograd import Variable
24 | import numpy as np
25 | 
26 | 
27 | def group_product(xs, ys):
28 |     """
29 |     the inner product of two lists of variables xs,ys
30 |     :param xs:
31 |     :param ys:
32 |     :return:
33 |     """
34 |     return sum([torch.sum(x * y) for (x, y) in zip(xs, ys)])
35 | 
36 | def group_add(params, update, alpha=1):
37 |     """
38 |     params = params + update*alpha
39 |     :param params: list of variable
40 |     :param update: list of data
41 |     :return:
42 |     """
43 |     for i,p in enumerate(params):
44 |         params[i].data.add_(update[i] * alpha) 
45 |     return params
46 | 
47 | def normalization(v):
48 |     """
49 |     normalization of a list of vectors
50 |     return: normalized vectors v
51 |     """
52 |     s = group_product(v,v)
53 |     s = s ** 0.5
54 |     s = s.cpu().item()
55 |     v = [vi / (s + 1e-6) for vi in v]
56 |     return v
57 | 
58 | 
59 | def get_params_grad(model):
60 |     """
61 |     get model parameters and corresponding gradients
62 |     """
63 |     params = []
64 |     grads = []
65 |     for param in model.parameters():
66 |         params.append(param)
67 |         if param.grad is None:
68 |             continue
69 |         grads.append(param.grad + 0.)
70 |     return params, grads
71 | 
72 | def hessian_vector_product(gradsH, params, v):
73 |     """
74 |     compute the hessian vector product of Hv, where
75 |     gradsH is the gradient at the current point,
76 |     params is the corresponding variables,
77 |     v is the vector.
78 |     """
79 |     hv = torch.autograd.grad(gradsH, params, grad_outputs = v, only_inputs = True, retain_graph = True)
80 |     return hv
81 | 
82 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/baseline.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import torch.optim as optim
 7 | from torchvision import datasets, transforms
 8 | from torch.autograd import Variable
 9 | 
10 | from .progressbar import progress_bar
11 | from .optm_utils import exp_lr_scheduler, test
12 | 
13 | # import hessianflow
14 | 
15 | def baseline(model, train_loader, test_loader, criterion, optimizer, epochs, lr_decay_epoch,
16 |         lr_decay_ratio, batch_size = 128, max_large_ratio = 1, cuda = True):
17 |     """
18 |     baseline method training, i,e, vanilla training schedule
19 |     """
20 |     
21 |     inner_loop = 0
22 |     num_updates = 0
23 |     large_ratio = max_large_ratio 
24 |     # assert that shuffle is set for train_loader
25 |     # assert and explain large ratio 
26 |     # assert that the train_loader is always set with a small batch size if not print error/warning telling
27 |     # the user to instead use large_ratio
28 |     for epoch in range(1, epochs + 1):
29 |         print('\nCurrent Epoch: ', epoch)
30 |         print('\nTraining')
31 |         train_loss = 0.
32 |         total_num = 0.
33 |         correct = 0.
34 | 
35 |         for batch_idx, (data, target) in enumerate(train_loader):
36 |             if target.size(0) < 128:
37 |                 continue
38 |             model.train()
39 |             # gather input and target for large batch training        
40 |             inner_loop += 1
41 |             # get small model update
42 |             if cuda:
43 |                 data, target = data.cuda(), target.cuda()
44 |             output = model(data)
45 |             loss = criterion(output, target)/float(large_ratio)
46 |             loss.backward()
47 |             train_loss += loss.item()*target.size(0)*float(large_ratio)
48 |             total_num += target.size(0)
49 |             _, predicted = output.max(1)
50 |             correct += predicted.eq(target).sum().item()
51 | 
52 |             if inner_loop % large_ratio  == 0:
53 |                 num_updates += 1
54 |                 optimizer.step()
55 |                 inner_loop = 0
56 |                 optimizer.zero_grad()
57 | 
58 |             progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
59 |                          % (train_loss / total_num,
60 |                             100. * correct / total_num, correct, total_num))
61 | 
62 |         if epoch in lr_decay_epoch:
63 |             exp_lr_scheduler(optimizer, decay_ratio=lr_decay_ratio)
64 |         
65 |         test(model, test_loader)     
66 |     return model, num_updates
67 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/progressbar.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | The progress_bar is from:
 3 | https://github.com/noahgolmant/skeletor/blob/master/skeletor/utils.py
 4 | '''
 5 | 
 6 | import os
 7 | import sys
 8 | import time
 9 | import math
10 | 
11 | ######## fancy progress bar
12 | try:
13 |     _, term_width = os.popen('stty size', 'r').read().split()
14 | except:
15 |     term_width = 100
16 | term_width = int(term_width)
17 | 
18 | 
19 | TOTAL_BAR_LENGTH = 65.
20 | last_time = time.time()
21 | begin_time = last_time
22 | def progress_bar(current, total, msg=None):
23 |     global last_time, begin_time
24 |     if current == 0:
25 |         begin_time = time.time()  # Reset for new bar.
26 | 
27 |     cur_len = int(TOTAL_BAR_LENGTH*current/total)
28 |     rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
29 | 
30 |     sys.stdout.write(' [')
31 |     for i in range(cur_len):
32 |         sys.stdout.write('=')
33 |     sys.stdout.write('>')
34 |     for i in range(rest_len):
35 |         sys.stdout.write('.')
36 |     sys.stdout.write(']')
37 | 
38 |     cur_time = time.time()
39 |     step_time = cur_time - last_time
40 |     last_time = cur_time
41 |     tot_time = cur_time - begin_time
42 | 
43 |     L = []
44 |     L.append('  Step: %s' % format_time(step_time))
45 |     L.append(' | Tot: %s' % format_time(tot_time))
46 |     if msg:
47 |         L.append(' | ' + msg)
48 | 
49 |     msg = ''.join(L)
50 |     sys.stdout.write(msg)
51 |     for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
52 |         sys.stdout.write(' ')
53 | 
54 |     # Go back to the center of the bar.
55 |     for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
56 |         sys.stdout.write('\b')
57 |     sys.stdout.write(' %d/%d ' % (current+1, total))
58 | 
59 |     if current < total-1:
60 |         sys.stdout.write('\r')
61 |     else:
62 |         sys.stdout.write('\n')
63 |     sys.stdout.flush()
64 | 
65 | def format_time(seconds):
66 |     days = int(seconds / 3600/24)
67 |     seconds = seconds - days*3600*24
68 |     hours = int(seconds / 3600)
69 |     seconds = seconds - hours*3600
70 |     minutes = int(seconds / 60)
71 |     seconds = seconds - minutes*60
72 |     secondsf = int(seconds)
73 |     seconds = seconds - secondsf
74 |     millis = int(seconds*1000)
75 | 
76 |     f = ''
77 |     i = 1
78 |     if days > 0:
79 |         f += str(days) + 'D'
80 |         i += 1
81 |     if hours > 0 and i <= 2:
82 |         f += str(hours) + 'h'
83 |         i += 1
84 |     if minutes > 0 and i <= 2:
85 |         f += str(minutes) + 'm'
86 |         i += 1
87 |     if secondsf > 0 and i <= 2:
88 |         f += str(secondsf) + 's'
89 |         i += 1
90 |     if millis > 0 and i <= 2:
91 |         f += str(millis) + 'ms'
92 |         i += 1
93 |     if f == '':
94 |         f = '0ms'
95 |     return f
96 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/optm_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #*
 3 | # @file optm_utils.py different utility functions
 4 | # This file is part of HessianFlow library.
 5 | #
 6 | # HessianFlow is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # HessianFlow is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with HessianFlow.  If not, see <http://www.gnu.org/licenses/>.
18 | #*
19 | from __future__ import print_function
20 | import numpy as np
21 | import torch
22 | import torch.nn as nn
23 | import torch.nn.functional as F
24 | import torch.optim as optim
25 | from torchvision import datasets, transforms
26 | from torch.autograd import Variable
27 | from .progressbar import progress_bar
28 | 
29 | 
30 | def fgsm(model, data, target, eps, cuda = True):
31 |     """Generate an adversarial pertubation using the fast gradient sign method.
32 | 
33 |     Args:
34 |         data: input image to perturb
35 |     """
36 |     model.eval()
37 |     if cuda:
38 |         data, target = data.cuda(), target.cuda()
39 |     data.requires_grad = True
40 |     model.zero_grad()
41 |     output = model(data)
42 |     loss = F.cross_entropy(output, target)
43 |     loss.backward(create_graph = False)
44 |     pertubation = eps * torch.sign(data.grad.data)
45 |     x_fgsm = data.data + pertubation
46 |     X_adv = torch.clamp(x_fgsm, torch.min(data.data), torch.max(data.data))
47 | 
48 |     return X_adv.cpu()
49 | 
50 | def exp_lr_scheduler(optimizer, decay_ratio = 0.1):
51 |     """
52 |     Decay learning rate by a factor of lr_decay 
53 |     """
54 |     for param_group in optimizer.param_groups:
55 |         param_group['lr'] *= decay_ratio
56 |     return optimizer
57 | 
58 |     
59 | def test(model, test_loader):
60 |     """
61 |     Evaluation the performance of model on test_loader
62 |     """
63 |     print('\nTesting')
64 |     model.eval()
65 |     correct = 0
66 |     total = 0
67 |     with torch.no_grad():
68 |         for batch_idx, (inputs, targets) in enumerate(test_loader):
69 |             inputs, targets = inputs.cuda(), targets.cuda()
70 |             outputs = model(inputs)
71 |             _, predicted = outputs.max(1)
72 |             total += targets.size(0)
73 |             correct += predicted.eq(targets).sum().item()
74 | 
75 |             progress_bar(batch_idx, len(test_loader), 'Acc: %.3f%% (%d/%d)'
76 |                          % (100. * correct/total, correct, total))
77 | 
78 |     return correct * 100 / total
79 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/eigen.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | from torch.autograd import Variable
  4 | import numpy as np
  5 | 
  6 | from .utils import *
  7 | 
  8 | 
  9 | def get_eigen(model, inputs, targets, criterion, cuda = True, maxIter = 50, tol = 1e-3):
 10 |     """
 11 |     compute the top eigenvalues of model parameters and 
 12 |     the corresponding eigenvectors.
 13 |     """
 14 |     if cuda:
 15 |         inputs, targets = inputs.cuda(), targets.cuda()
 16 |         device = 'cuda'
 17 |     else:
 18 |         device = 'cpu'
 19 |     # change the model to evaluation mode, otherwise the batch Normalization Layer will change.
 20 |     # If you call this functino during training, remember to change the mode back to training mode.
 21 |     model.eval()
 22 | 
 23 |     outputs = model(inputs)
 24 |     loss = criterion(outputs, targets)
 25 |     loss.backward(create_graph = True)
 26 | 
 27 |     params, gradsH = get_params_grad(model)
 28 |     v = [torch.randn(p.size()).to(device) for p in params]
 29 |     v = normalization(v)
 30 | 
 31 |     eigenvalue = None
 32 | 
 33 |     for i in range(maxIter):
 34 |         model.zero_grad()
 35 |         Hv = hessian_vector_product(gradsH, params, v)
 36 |         eigenvalue_tmp = group_product(Hv, v).cpu().item()
 37 |         v = normalization(Hv)
 38 |         if eigenvalue == None:
 39 |             eigenvalue = eigenvalue_tmp
 40 |         else:
 41 |             if abs(eigenvalue-eigenvalue_tmp)/abs(eigenvalue) < tol:
 42 |                 return eigenvalue_tmp, v
 43 |             else:
 44 |                 eigenvalue = eigenvalue_tmp
 45 |     return eigenvalue, v
 46 | 
 47 | def get_eigen_full_dataset(model, dataloader, criterion, cuda = True, maxIter = 50, tol = 1e-3):
 48 |     """
 49 |     compute the top eigenvalues of model parameters and 
 50 |     the corresponding eigenvectors with a full dataset. 
 51 |     Notice, this is very expensive.
 52 |     """
 53 |     if cuda:
 54 |         device = 'cuda'
 55 |     else:
 56 |         device = 'cpu'
 57 |     # change the model to evaluation mode, otherwise the batch Normalization Layer will change.
 58 |     # If you call this functino during training, remember to change the mode back to training mode.
 59 |     model.eval()
 60 | 
 61 |     
 62 |     params,_ = get_params_grad(model)
 63 |     v = [torch.randn(p.size()).to(device) for p in params]
 64 |     v = normalization(v)
 65 | 
 66 |     batch_size = None
 67 |     eigenvalue = None
 68 | 
 69 |     for i in range(maxIter):
 70 |         THv = [torch.zeros(p.size()).to(device) for p in params]
 71 |         counter = 0
 72 |         for inputs, targets in dataloader:
 73 |             
 74 |             if batch_size == None:
 75 |                 batch_size = targets.size(0)
 76 |                
 77 |             if targets.size(0) < batch_size:
 78 |                 continue
 79 |             
 80 |             model.zero_grad()
 81 |             outputs = model(inputs.to(device))
 82 |             loss = criterion(outputs, targets.to(device))
 83 |             loss.backward(create_graph=True)
 84 | 
 85 |             params, gradsH = get_params_grad(model)
 86 |             Hv = torch.autograd.grad(gradsH, params, grad_outputs = v, only_inputs = True, retain_graph = False)
 87 | 
 88 |             THv = [THv1 + Hv1 + 0. for THv1, Hv1 in zip(THv, Hv)]
 89 |             counter += 1
 90 | 
 91 |         eigenvalue_tmp =group_product(THv,v).cpu().item() / float(counter)
 92 |         v = normalization(THv)
 93 |         
 94 |         if eigenvalue == None:
 95 |             eigenvalue = eigenvalue_tmp
 96 |         else:
 97 |             if abs(eigenvalue-eigenvalue_tmp)/abs(eigenvalue) < tol:
 98 |                 return eigenvalue_tmp, v
 99 |             else:
100 |                 eigenvalue = eigenvalue_tmp
101 | 
102 |     return eigenvalue, v
103 | 


--------------------------------------------------------------------------------
/advrush/genotypes.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
 4 | 
 5 | PRIMITIVES = [
 6 |     'none',
 7 |     'max_pool_3x3',
 8 |     'avg_pool_3x3',
 9 |     'skip_connect',
10 |     'sep_conv_3x3',
11 |     'sep_conv_5x5',
12 |     'dil_conv_3x3',
13 |     'dil_conv_5x5'
14 | ]
15 | 
16 | NASNet = Genotype(
17 |   normal = [
18 |     ('sep_conv_5x5', 1),
19 |     ('sep_conv_3x3', 0),
20 |     ('sep_conv_5x5', 0),
21 |     ('sep_conv_3x3', 0),
22 |     ('avg_pool_3x3', 1),
23 |     ('skip_connect', 0),
24 |     ('avg_pool_3x3', 0),
25 |     ('avg_pool_3x3', 0),
26 |     ('sep_conv_3x3', 1),
27 |     ('skip_connect', 1),
28 |   ],
29 |   normal_concat = [2, 3, 4, 5, 6],
30 |   reduce = [
31 |     ('sep_conv_5x5', 1),
32 |     ('sep_conv_7x7', 0),
33 |     ('max_pool_3x3', 1),
34 |     ('sep_conv_7x7', 0),
35 |     ('avg_pool_3x3', 1),
36 |     ('sep_conv_5x5', 0),
37 |     ('skip_connect', 3),
38 |     ('avg_pool_3x3', 2),
39 |     ('sep_conv_3x3', 2),
40 |     ('max_pool_3x3', 1),
41 |   ],
42 |   reduce_concat = [4, 5, 6],
43 | )
44 |     
45 | AmoebaNet = Genotype(
46 |   normal = [
47 |     ('avg_pool_3x3', 0),
48 |     ('max_pool_3x3', 1),
49 |     ('sep_conv_3x3', 0),
50 |     ('sep_conv_5x5', 2),
51 |     ('sep_conv_3x3', 0),
52 |     ('avg_pool_3x3', 3),
53 |     ('sep_conv_3x3', 1),
54 |     ('skip_connect', 1),
55 |     ('skip_connect', 0),
56 |     ('avg_pool_3x3', 1),
57 |     ],
58 |   normal_concat = [4, 5, 6],
59 |   reduce = [
60 |     ('avg_pool_3x3', 0),
61 |     ('sep_conv_3x3', 1),
62 |     ('max_pool_3x3', 0),
63 |     ('sep_conv_7x7', 2),
64 |     ('sep_conv_7x7', 0),
65 |     ('avg_pool_3x3', 1),
66 |     ('max_pool_3x3', 0),
67 |     ('max_pool_3x3', 1),
68 |     ('conv_7x1_1x7', 0),
69 |     ('sep_conv_3x3', 5),
70 |   ],
71 |   reduce_concat = [3, 4, 6]
72 | )
73 | 
74 | ADVRUSH = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('avg_pool_3x3', 1), ('skip_connect', 0), ('skip_connect', 2)], reduce_concat=range(2, 6))
75 | 
76 | DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
77 | DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
78 | 
79 | DARTS = DARTS_V2
80 | 
81 | PDARTS = Genotype(normal=[('skip_connect', 0), ('dil_conv_3x3', 1), ('skip_connect', 0),('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3',0), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
82 | 
83 | RACL = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('skip_connect', 3), ('sep_conv_3x3', 3), ('skip_connect', 4)], normal_concat=[2, 3, 4, 5], reduce=[('sep_conv_3x3',0), ('sep_conv_5x5', 1), ('avg_pool_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5',1), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3)], reduce_concat=[2, 3, 4, 5])
84 | 


--------------------------------------------------------------------------------
/eval/genotypes.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
 4 | 
 5 | PRIMITIVES = [
 6 |     'none',
 7 |     'max_pool_3x3',
 8 |     'avg_pool_3x3',
 9 |     'skip_connect',
10 |     'sep_conv_3x3',
11 |     'sep_conv_5x5',
12 |     'dil_conv_3x3',
13 |     'dil_conv_5x5'
14 | ]
15 | 
16 | NASNet = Genotype(
17 |   normal = [
18 |     ('sep_conv_5x5', 1),
19 |     ('sep_conv_3x3', 0),
20 |     ('sep_conv_5x5', 0),
21 |     ('sep_conv_3x3', 0),
22 |     ('avg_pool_3x3', 1),
23 |     ('skip_connect', 0),
24 |     ('avg_pool_3x3', 0),
25 |     ('avg_pool_3x3', 0),
26 |     ('sep_conv_3x3', 1),
27 |     ('skip_connect', 1),
28 |   ],
29 |   normal_concat = [2, 3, 4, 5, 6],
30 |   reduce = [
31 |     ('sep_conv_5x5', 1),
32 |     ('sep_conv_7x7', 0),
33 |     ('max_pool_3x3', 1),
34 |     ('sep_conv_7x7', 0),
35 |     ('avg_pool_3x3', 1),
36 |     ('sep_conv_5x5', 0),
37 |     ('skip_connect', 3),
38 |     ('avg_pool_3x3', 2),
39 |     ('sep_conv_3x3', 2),
40 |     ('max_pool_3x3', 1),
41 |   ],
42 |   reduce_concat = [4, 5, 6],
43 | )
44 |     
45 | AmoebaNet = Genotype(
46 |   normal = [
47 |     ('avg_pool_3x3', 0),
48 |     ('max_pool_3x3', 1),
49 |     ('sep_conv_3x3', 0),
50 |     ('sep_conv_5x5', 2),
51 |     ('sep_conv_3x3', 0),
52 |     ('avg_pool_3x3', 3),
53 |     ('sep_conv_3x3', 1),
54 |     ('skip_connect', 1),
55 |     ('skip_connect', 0),
56 |     ('avg_pool_3x3', 1),
57 |     ],
58 |   normal_concat = [4, 5, 6],
59 |   reduce = [
60 |     ('avg_pool_3x3', 0),
61 |     ('sep_conv_3x3', 1),
62 |     ('max_pool_3x3', 0),
63 |     ('sep_conv_7x7', 2),
64 |     ('sep_conv_7x7', 0),
65 |     ('avg_pool_3x3', 1),
66 |     ('max_pool_3x3', 0),
67 |     ('max_pool_3x3', 1),
68 |     ('conv_7x1_1x7', 0),
69 |     ('sep_conv_3x3', 5),
70 |   ],
71 |   reduce_concat = [3, 4, 6]
72 | )
73 | 
74 | ADVRUSH = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('avg_pool_3x3', 1), ('skip_connect', 0), ('skip_connect', 2)], reduce_concat=range(2, 6))
75 | 
76 | DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
77 | DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
78 | 
79 | DARTS = DARTS_V2
80 | 
81 | PDARTS = Genotype(normal=[('skip_connect', 0), ('dil_conv_3x3', 1), ('skip_connect', 0),('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3',0), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
82 | 
83 | RACL = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('skip_connect', 3), ('sep_conv_3x3', 3), ('skip_connect', 4)], normal_concat=[2, 3, 4, 5], reduce=[('sep_conv_3x3',0), ('sep_conv_5x5', 1), ('avg_pool_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5',1), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3)], reduce_concat=[2, 3, 4, 5])
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/eval/operations.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | OPS = {
  5 |   'none' : lambda C, stride, affine: Zero(stride),
  6 |   'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
  7 |   'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
  8 |   'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
  9 |   'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
 10 |   'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
 11 |   'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
 12 |   'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
 13 |   'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
 14 |   'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
 15 |     nn.ReLU(inplace=False),
 16 |     nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
 17 |     nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
 18 |     nn.BatchNorm2d(C, affine=affine)
 19 |     ),
 20 | }
 21 | 
 22 | class ReLUConvBN(nn.Module):
 23 | 
 24 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
 25 |     super(ReLUConvBN, self).__init__()
 26 |     self.op = nn.Sequential(
 27 |       nn.ReLU(inplace=False),
 28 |       nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
 29 |       nn.BatchNorm2d(C_out, affine=affine)
 30 |     )
 31 | 
 32 |   def forward(self, x):
 33 |     return self.op(x)
 34 | 
 35 | class DilConv(nn.Module):
 36 |     
 37 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
 38 |     super(DilConv, self).__init__()
 39 |     self.op = nn.Sequential(
 40 |       nn.ReLU(inplace=False),
 41 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
 42 |       nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
 43 |       nn.BatchNorm2d(C_out, affine=affine),
 44 |       )
 45 | 
 46 |   def forward(self, x):
 47 |     return self.op(x)
 48 | 
 49 | 
 50 | class SepConv(nn.Module):
 51 |     
 52 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
 53 |     super(SepConv, self).__init__()
 54 |     self.op = nn.Sequential(
 55 |       nn.ReLU(inplace=False),
 56 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
 57 |       nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
 58 |       nn.BatchNorm2d(C_in, affine=affine),
 59 |       nn.ReLU(inplace=False),
 60 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
 61 |       nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
 62 |       nn.BatchNorm2d(C_out, affine=affine),
 63 |       )
 64 | 
 65 |   def forward(self, x):
 66 |     return self.op(x)
 67 | 
 68 | 
 69 | class Identity(nn.Module):
 70 | 
 71 |   def __init__(self):
 72 |     super(Identity, self).__init__()
 73 | 
 74 |   def forward(self, x):
 75 |     return x
 76 | 
 77 | 
 78 | class Zero(nn.Module):
 79 | 
 80 |   def __init__(self, stride):
 81 |     super(Zero, self).__init__()
 82 |     self.stride = stride
 83 | 
 84 |   def forward(self, x):
 85 |     if self.stride == 1:
 86 |       return x.mul(0.)
 87 |     return x[:,:,::self.stride,::self.stride].mul(0.)
 88 | 
 89 | 
 90 | class FactorizedReduce(nn.Module):
 91 | 
 92 |   def __init__(self, C_in, C_out, affine=True):
 93 |     super(FactorizedReduce, self).__init__()
 94 |     assert C_out % 2 == 0
 95 |     self.relu = nn.ReLU(inplace=False)
 96 |     self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
 97 |     self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) 
 98 |     self.bn = nn.BatchNorm2d(C_out, affine=affine)
 99 | 
100 |   def forward(self, x):
101 |     x = self.relu(x)
102 |     out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
103 |     out = self.bn(out)
104 |     return out
105 | 
106 | 


--------------------------------------------------------------------------------
/advrush/operations.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | OPS = {
  5 |   'none' : lambda C, stride, affine: Zero(stride),
  6 |   'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
  7 |   'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
  8 |   'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
  9 |   'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
 10 |   'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
 11 |   'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
 12 |   'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
 13 |   'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
 14 |   'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
 15 |     nn.ReLU(inplace=False),
 16 |     nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
 17 |     nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
 18 |     nn.BatchNorm2d(C, affine=affine)
 19 |     ),
 20 | }
 21 | 
 22 | class ReLUConvBN(nn.Module):
 23 | 
 24 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
 25 |     super(ReLUConvBN, self).__init__()
 26 |     self.op = nn.Sequential(
 27 |       nn.ReLU(inplace=False),
 28 |       nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
 29 |       nn.BatchNorm2d(C_out, affine=affine)
 30 |     )
 31 | 
 32 |   def forward(self, x):
 33 |     return self.op(x)
 34 | 
 35 | class DilConv(nn.Module):
 36 |     
 37 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
 38 |     super(DilConv, self).__init__()
 39 |     self.op = nn.Sequential(
 40 |       nn.ReLU(inplace=False),
 41 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
 42 |       nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
 43 |       nn.BatchNorm2d(C_out, affine=affine),
 44 |       )
 45 | 
 46 |   def forward(self, x):
 47 |     return self.op(x)
 48 | 
 49 | 
 50 | class SepConv(nn.Module):
 51 |     
 52 |   def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
 53 |     super(SepConv, self).__init__()
 54 |     self.op = nn.Sequential(
 55 |       nn.ReLU(inplace=False),
 56 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
 57 |       nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
 58 |       nn.BatchNorm2d(C_in, affine=affine),
 59 |       nn.ReLU(inplace=False),
 60 |       nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
 61 |       nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
 62 |       nn.BatchNorm2d(C_out, affine=affine),
 63 |       )
 64 | 
 65 |   def forward(self, x):
 66 |     return self.op(x)
 67 | 
 68 | 
 69 | class Identity(nn.Module):
 70 | 
 71 |   def __init__(self):
 72 |     super(Identity, self).__init__()
 73 | 
 74 |   def forward(self, x):
 75 |     return x
 76 | 
 77 | 
 78 | class Zero(nn.Module):
 79 | 
 80 |   def __init__(self, stride):
 81 |     super(Zero, self).__init__()
 82 |     self.stride = stride
 83 | 
 84 |   def forward(self, x):
 85 |     if self.stride == 1:
 86 |       return x.mul(0.)
 87 |     return x[:,:,::self.stride,::self.stride].mul(0.)
 88 | 
 89 | 
 90 | class FactorizedReduce(nn.Module):
 91 | 
 92 |   def __init__(self, C_in, C_out, affine=True):
 93 |     super(FactorizedReduce, self).__init__()
 94 |     assert C_out % 2 == 0
 95 |     self.relu = nn.ReLU(inplace=False)
 96 |     self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
 97 |     self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) 
 98 |     self.bn = nn.BatchNorm2d(C_out, affine=affine)
 99 | 
100 |   def forward(self, x):
101 |     x = self.relu(x)
102 |     out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
103 |     out = self.bn(out)
104 |     return out
105 | 
106 | 


--------------------------------------------------------------------------------
/eval/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import shutil
  5 | import torchvision.transforms as transforms
  6 | import torchvision.datasets as dset
  7 | from torch.autograd import Variable
  8 | 
  9 | 
 10 | class AvgrageMeter(object):
 11 | 
 12 |   def __init__(self):
 13 |     self.reset()
 14 | 
 15 |   def reset(self):
 16 |     self.avg = 0
 17 |     self.sum = 0
 18 |     self.cnt = 0
 19 | 
 20 |   def update(self, val, n=1):
 21 |     self.sum += val * n
 22 |     self.cnt += n
 23 |     self.avg = self.sum / self.cnt
 24 | 
 25 | 
 26 | def accuracy(output, target, topk=(1,)):
 27 |   maxk = max(topk)
 28 |   batch_size = target.size(0)
 29 | 
 30 |   _, pred = output.topk(maxk, 1, True, True)
 31 |   pred = pred.t()
 32 |   correct = pred.eq(target.view(1, -1).expand_as(pred))
 33 | 
 34 |   res = []
 35 |   for k in topk:
 36 |     correct_k = correct[:k].view(-1).float().sum(0)
 37 |     res.append(correct_k.mul_(100.0/batch_size))
 38 |   return res
 39 | 
 40 | 
 41 | class Cutout(object):
 42 |     def __init__(self, length):
 43 |         self.length = length
 44 | 
 45 |     def __call__(self, img):
 46 |         h, w = img.size(1), img.size(2)
 47 |         mask = np.ones((h, w), np.float32)
 48 |         y = np.random.randint(h)
 49 |         x = np.random.randint(w)
 50 | 
 51 |         y1 = np.clip(y - self.length // 2, 0, h)
 52 |         y2 = np.clip(y + self.length // 2, 0, h)
 53 |         x1 = np.clip(x - self.length // 2, 0, w)
 54 |         x2 = np.clip(x + self.length // 2, 0, w)
 55 | 
 56 |         mask[y1: y2, x1: x2] = 0.
 57 |         mask = torch.from_numpy(mask)
 58 |         mask = mask.expand_as(img)
 59 |         img *= mask
 60 |         return img
 61 | 
 62 | 
 63 | def _data_transforms_cifar10(args):
 64 |   CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
 65 |   CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
 66 | 
 67 |   train_transform = transforms.Compose([
 68 |     transforms.RandomCrop(32, padding=4),
 69 |     transforms.RandomHorizontalFlip(),
 70 |     transforms.ToTensor(),
 71 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
 72 |   ])
 73 |   if args.cutout:
 74 |     train_transform.transforms.append(Cutout(args.cutout_length))
 75 | 
 76 |   valid_transform = transforms.Compose([
 77 |     transforms.ToTensor(),
 78 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
 79 |     ])
 80 |   return train_transform, valid_transform
 81 | 
 82 | def _data_transforms_cifar10_eval(args):
 83 |   train_transform = transforms.Compose([
 84 |     transforms.RandomCrop(32, padding=4),
 85 |     transforms.RandomHorizontalFlip(),
 86 |     transforms.ToTensor(),
 87 |   ])
 88 |   if args.cutout:
 89 |     train_transform.transforms.append(Cutout(args.cutout_length))
 90 | 
 91 |   valid_transform = transforms.Compose([
 92 |     transforms.ToTensor()
 93 |     ])
 94 |   return train_transform, valid_transform
 95 | 
 96 | def _data_imagenet(args):
 97 |   traindir = os.path.join(args.data, 'train')
 98 |   #validdir = os.path.join(args.data, 'val')
 99 |   normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
100 |   train_data = dset.ImageFolder(
101 |     traindir,
102 |     transforms.Compose([
103 |       transforms.RandomResizedCrop(224),
104 |       transforms.RandomHorizontalFlip(),
105 |       transforms.ToTensor(),
106 |       normalize,
107 |     ]))
108 |   return train_data
109 | 
110 | def count_parameters_in_MB(model):
111 |   return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
112 | 
113 | 
114 | def save_checkpoint(state, is_best, save, epoch):
115 |   filename = os.path.join(save, 'checkpoint-epoch{}.pth.tar'.format(epoch))
116 |   torch.save(state, filename)
117 |   if is_best:
118 |     best_filename = os.path.join(save, 'model_best_epoch{}.pth.tar'.format(epoch))
119 |     shutil.copyfile(filename, best_filename)
120 | 
121 | 
122 | def save(model, model_path):
123 |   torch.save(model.state_dict(), model_path)
124 | 
125 | 
126 | def load(model, model_path):
127 |   model.load_state_dict(torch.load(model_path))
128 | 
129 | 
130 | def drop_path(x, drop_prob):
131 |   if drop_prob > 0.:
132 |     keep_prob = 1.-drop_prob
133 |     mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
134 |     x.div_(keep_prob)
135 |     x.mul_(mask)
136 |   return x
137 | 
138 | 
139 | def create_exp_dir(path, scripts_to_save=None):
140 |   if not os.path.exists(path):
141 |     os.mkdir(path)
142 |   print('Experiment dir : {}'.format(path))
143 | 
144 |   if scripts_to_save is not None:
145 |     os.mkdir(os.path.join(path, 'scripts'))
146 |     for script in scripts_to_save:
147 |       dst_file = os.path.join(path, 'scripts', os.path.basename(script))
148 |       shutil.copyfile(script, dst_file)
149 | 
150 | 


--------------------------------------------------------------------------------
/advrush/architect.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn as nn
  4 | from torch.autograd import Variable
  5 | 
  6 | from regularizer import *
  7 | import hessianflow as hf
  8 | import hessianflow.optimizer.optm_utils as hf_optm_utils
  9 | import hessianflow.optimizer.progressbar as hf_optm_pgb
 10 | 
 11 | def _concat(xs):
 12 |   return torch.cat([x.view(-1) for x in xs])
 13 | 
 14 | 
 15 | class Architect(object):
 16 | 
 17 |   def __init__(self, model, args):
 18 |     self.network_momentum = args.momentum
 19 |     self.network_weight_decay = args.weight_decay
 20 |     self.model = model
 21 |     self.optimizer = torch.optim.Adam(self.model.arch_parameters(),
 22 |         lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
 23 | 
 24 |   def _compute_unrolled_model(self, input, target, eta, network_optimizer):
 25 |     logits, loss = self.model._loss(input, target)
 26 |     theta = _concat(self.model.parameters()).data
 27 |     try:
 28 |       moment = _concat(network_optimizer.state[v]['momentum_buffer'] for v in self.model.parameters()).mul_(self.network_momentum)
 29 |     except:
 30 |       moment = torch.zeros_like(theta)
 31 |     dtheta = _concat(torch.autograd.grad(loss, self.model.parameters())).data + self.network_weight_decay*theta
 32 |     unrolled_model = self._construct_model_from_theta(theta.sub(eta, moment+dtheta))
 33 |     return unrolled_model
 34 | 
 35 |   def step(self, input_train, target_train, epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, eta, network_optimizer, unrolled, h):
 36 |     self.optimizer.zero_grad()
 37 |     if unrolled:
 38 |         self._backward_step_unrolled(input_train, target_train, input_valid, target_valid, eta, network_optimizer)
 39 |     else:
 40 |         regularizer = self._backward_step(epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, h)
 41 |     self.optimizer.step()
 42 |     return regularizer
 43 | 
 44 |   def _backward_step(self, epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, h):
 45 |     logits, loss = self.model._loss(input_valid, target_valid)
 46 |     if epoch < warm_epoch:
 47 |       loss = loss #criterion(logits, target)
 48 |       regularizer = torch.tensor(0, dtype=torch.float)
 49 |     else:
 50 |       if loss_hessian == 'loss_cure':
 51 |         reg = loss_cure(self.model, criterion, lambda_=4, device='cuda')
 52 |         regularizer, grad_norm = reg.regularizer(input_valid, target_valid, h=h)
 53 |       else:
 54 |         reg = loss_eigen(self.model, valid_queue, input_valid, target_valid, criterion, full_eigen=False, maxIter=10, tol=1e-2)
 55 |         regularizer, _ = reg.regularizer()
 56 |       loss += gamma * regularizer
 57 |     loss.backward()
 58 |     return regularizer
 59 | 
 60 |   def _backward_step_unrolled(self, input_train, target_train, input_valid, target_valid, eta, network_optimizer):
 61 |     unrolled_model = self._compute_unrolled_model(input_train, target_train, eta, network_optimizer)
 62 |     unrolled_loss = unrolled_model._loss(input_valid, target_valid)
 63 | 
 64 |     unrolled_loss.backward()
 65 |     dalpha = [v.grad for v in unrolled_model.arch_parameters()]
 66 |     vector = [v.grad.data for v in unrolled_model.parameters()]
 67 |     implicit_grads = self._hessian_vector_product(vector, input_train, target_train)
 68 | 
 69 |     for g, ig in zip(dalpha, implicit_grads):
 70 |       g.data.sub_(eta, ig.data)
 71 | 
 72 |     for v, g in zip(self.model.arch_parameters(), dalpha):
 73 |       if v.grad is None:
 74 |         v.grad = Variable(g.data)
 75 |       else:
 76 |         v.grad.data.copy_(g.data)
 77 | 
 78 |   def _construct_model_from_theta(self, theta):
 79 |     model_new = self.model.new()
 80 |     model_dict = self.model.state_dict()
 81 | 
 82 |     params, offset = {}, 0
 83 |     for k, v in self.model.named_parameters():
 84 |       v_length = np.prod(v.size())
 85 |       params[k] = theta[offset: offset+v_length].view(v.size())
 86 |       offset += v_length
 87 | 
 88 |     assert offset == len(theta)
 89 |     model_dict.update(params)
 90 |     model_new.load_state_dict(model_dict)
 91 |     return model_new.cuda()
 92 | 
 93 |   def _hessian_vector_product(self, vector, input, target, r=1e-2):
 94 |     R = r / _concat(vector).norm()
 95 |     for p, v in zip(self.model.parameters(), vector):
 96 |       p.data.add_(R, v)
 97 |     loss = self.model._loss(input, target)
 98 |     grads_p = torch.autograd.grad(loss, self.model.arch_parameters())
 99 | 
100 |     for p, v in zip(self.model.parameters(), vector):
101 |       p.data.sub_(2*R, v)
102 |     loss = self.model._loss(input, target)
103 |     grads_n = torch.autograd.grad(loss, self.model.arch_parameters())
104 | 
105 |     for p, v in zip(self.model.parameters(), vector):
106 |       p.data.add_(R, v)
107 | 
108 |     return [(x-y).div_(2*R) for x, y in zip(grads_p, grads_n)]
109 | 
110 | 


--------------------------------------------------------------------------------
/advrush/trades.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import torch.optim as optim
  6 | 
  7 | 
  8 | def squared_l2_norm(x):
  9 |     flattened = x.view(x.unsqueeze(0).shape[0], -1)
 10 |     return (flattened ** 2).sum(1)
 11 | 
 12 | 
 13 | def l2_norm(x):
 14 |     return squared_l2_norm(x).sqrt()
 15 | 
 16 | 
 17 | def trades_loss(model,
 18 |                 x_natural,
 19 |                 y,
 20 |                 optimizer,
 21 |                 step_size=0.003,
 22 |                 epsilon=0.031,
 23 |                 perturb_steps=10,
 24 |                 beta=1.0,
 25 |                 distance='l_inf'):
 26 |     # define KL-loss
 27 |     criterion_kl = nn.KLDivLoss(size_average=False)
 28 |     model.eval()
 29 |     batch_size = len(x_natural)
 30 |     # generate adversarial example
 31 |     x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
 32 |     if distance == 'l_inf':
 33 |         for _ in range(perturb_steps):
 34 |             x_adv.requires_grad_()
 35 |             with torch.enable_grad():
 36 |                 adv_logits, _ = model(x_adv)
 37 |                 clean_logits, _ = model(x_natural)
 38 |                 loss_kl = criterion_kl(F.log_softmax(adv_logits, dim=1), #model(x_adv)
 39 |                                        F.softmax(clean_logits, dim=1)) #model(x_natural)
 40 |             grad = torch.autograd.grad(loss_kl, [x_adv])[0]
 41 |             x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
 42 |             x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
 43 |             x_adv = torch.clamp(x_adv, 0.0, 1.0)
 44 |     elif distance == 'l_2':
 45 |         delta = 0.001 * torch.randn(x_natural.shape).cuda().detach()
 46 |         delta = Variable(delta.data, requires_grad=True)
 47 | 
 48 |         # Setup optimizers
 49 |         optimizer_delta = optim.SGD([delta], lr=epsilon / perturb_steps * 2)
 50 | 
 51 |         for _ in range(perturb_steps):
 52 |             adv = x_natural + delta
 53 | 
 54 |             # optimize
 55 |             optimizer_delta.zero_grad()
 56 |             with torch.enable_grad():
 57 |                 loss = (-1) * criterion_kl(F.log_softmax(model(adv), dim=1),
 58 |                                            F.softmax(model(x_natural), dim=1))
 59 |             loss.backward()
 60 |             # renorming gradient
 61 |             grad_norms = delta.grad.view(batch_size, -1).norm(p=2, dim=1)
 62 |             delta.grad.div_(grad_norms.view(-1, 1, 1, 1))
 63 |             # avoid nan or inf if gradient is 0
 64 |             if (grad_norms == 0).any():
 65 |                 delta.grad[grad_norms == 0] = torch.randn_like(delta.grad[grad_norms == 0])
 66 |             optimizer_delta.step()
 67 | 
 68 |             # projection
 69 |             delta.data.add_(x_natural)
 70 |             delta.data.clamp_(0, 1).sub_(x_natural)
 71 |             delta.data.renorm_(p=2, dim=0, maxnorm=epsilon)
 72 |         x_adv = Variable(x_natural + delta, requires_grad=False)
 73 |     else:
 74 |         x_adv = torch.clamp(x_adv, 0.0, 1.0)
 75 |     model.train()
 76 | 
 77 |     x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)
 78 |     # zero gradient
 79 |     optimizer.zero_grad()
 80 |     # calculate robust loss
 81 |     clean_logits_new, _ = model(x_natural)
 82 |     adv_logits_new, _ = model(x_natural)
 83 |     loss_natural = F.cross_entropy(clean_logits_new, y) #model(x_natural)
 84 |     loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(adv_logits_new, dim=1), #model(x_adv)
 85 |                                                     F.softmax(clean_logits_new, dim=1)) #model(x_natural)
 86 |     loss = loss_natural + beta * loss_robust
 87 |     return loss
 88 | 
 89 | 
 90 | def madry_loss(model,
 91 |                x_natural,
 92 |                y,
 93 |                optimizer,
 94 |                step_size=0.003,
 95 |                epsilon=0.031,
 96 |                perturb_steps=10,
 97 |                distance='l_inf',
 98 |                ):
 99 |     # define KL-loss
100 |     criterion_ce = torch.nn.CrossEntropyLoss(reduction='none')
101 |     model.eval()
102 |     batch_size = len(x_natural)
103 | 
104 |     # generate adversarial example
105 |     x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
106 | 
107 |     if distance == 'l_inf':
108 |         for _ in range(perturb_steps):
109 |             x_adv.requires_grad_()
110 |             with torch.enable_grad():
111 |                 logits, _ = model(x_adv)
112 |                 loss_ce = criterion_ce(logits, y).mean()
113 |             grad = torch.autograd.grad(loss_ce, [x_adv])[0]
114 |             x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
115 |             x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
116 |             x_adv = torch.clamp(x_adv, 0.0, 1.0)
117 |     else:
118 |         x_adv = torch.clamp(x_adv, 0.0, 1.0)
119 |     model.train()
120 | 
121 |     x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)
122 |     # zero gradient
123 |     optimizer.zero_grad()
124 | 
125 |     logits, _ = model(x_adv)
126 |     loss = F.cross_entropy(logits, y)
127 | 
128 |     return loss
129 | 


--------------------------------------------------------------------------------
/advrush/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | import shutil
  5 | import torchvision.transforms as transforms
  6 | import torchvision.datasets as dset
  7 | from torch.autograd import Variable
  8 | 
  9 | 
 10 | class AvgrageMeter(object):
 11 | 
 12 |   def __init__(self):
 13 |     self.reset()
 14 | 
 15 |   def reset(self):
 16 |     self.avg = 0
 17 |     self.sum = 0
 18 |     self.cnt = 0
 19 | 
 20 |   def update(self, val, n=1):
 21 |     self.sum += val * n
 22 |     self.cnt += n
 23 |     self.avg = self.sum / self.cnt
 24 | 
 25 | 
 26 | def accuracy(output, target, topk=(1,)):
 27 |   maxk = max(topk)
 28 |   batch_size = target.size(0)
 29 | 
 30 |   _, pred = output.topk(maxk, 1, True, True)
 31 |   pred = pred.t()
 32 |   correct = pred.eq(target.view(1, -1).expand_as(pred))
 33 | 
 34 |   res = []
 35 |   for k in topk:
 36 |     correct_k = correct[:k].view(-1).float().sum(0)
 37 |     res.append(correct_k.mul_(100.0/batch_size))
 38 |   return res
 39 | 
 40 | 
 41 | class Cutout(object):
 42 |     def __init__(self, length):
 43 |         self.length = length
 44 | 
 45 |     def __call__(self, img):
 46 |         h, w = img.size(1), img.size(2)
 47 |         mask = np.ones((h, w), np.float32)
 48 |         y = np.random.randint(h)
 49 |         x = np.random.randint(w)
 50 | 
 51 |         y1 = np.clip(y - self.length // 2, 0, h)
 52 |         y2 = np.clip(y + self.length // 2, 0, h)
 53 |         x1 = np.clip(x - self.length // 2, 0, w)
 54 |         x2 = np.clip(x + self.length // 2, 0, w)
 55 | 
 56 |         mask[y1: y2, x1: x2] = 0.
 57 |         mask = torch.from_numpy(mask)
 58 |         mask = mask.expand_as(img)
 59 |         img *= mask
 60 |         return img
 61 | 
 62 | 
 63 | def _data_transforms_cifar10(args):
 64 |   CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
 65 |   CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
 66 | 
 67 |   train_transform = transforms.Compose([
 68 |     transforms.RandomCrop(32, padding=4),
 69 |     transforms.RandomHorizontalFlip(),
 70 |     transforms.ToTensor(),
 71 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
 72 |   ])
 73 |   if args.cutout:
 74 |     train_transform.transforms.append(Cutout(args.cutout_length))
 75 | 
 76 |   valid_transform = transforms.Compose([
 77 |     transforms.ToTensor(),
 78 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
 79 |     ])
 80 |   return train_transform, valid_transform
 81 | 
 82 | def _data_transforms_cifar10_eval(args):
 83 |   train_transform = transforms.Compose([
 84 |     transforms.RandomCrop(32, padding=4),
 85 |     transforms.RandomHorizontalFlip(),
 86 |     transforms.ToTensor(),
 87 |   ])
 88 |   if args.cutout:
 89 |     train_transform.transforms.append(Cutout(args.cutout_length))
 90 | 
 91 |   valid_transform = transforms.Compose([
 92 |     transforms.ToTensor()
 93 |     ])
 94 |   return train_transform, valid_transform
 95 | 
 96 | def _data_transforms_cifar100(args):
 97 |   CIFAR_MEAN = [0.5071, 0.4867, 0.4408]
 98 |   CIFAR_STD = [0.2675, 0.2565, 0.2761]
 99 | 
100 |   train_transform = transforms.Compose([
101 |     transforms.RandomCrop(32, padding=4),
102 |     transforms.RandomHorizontalFlip(),
103 |     transforms.ToTensor(),
104 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
105 |   ])
106 |   if args.cutout:
107 |     train_transform.transforms.append(Cutout(args.cutout_length))
108 | 
109 |   valid_transform = transforms.Compose([
110 |     transforms.ToTensor(),
111 |     transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
112 |     ])
113 |   return train_transform, valid_transform
114 | 
115 | def _data_imagenet(args):
116 |   traindir = os.path.join(args.data, 'train')
117 |   #validdir = os.path.join(args.data, 'val')
118 |   normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
119 |   train_data = dset.ImageFolder(
120 |     traindir,
121 |     transforms.Compose([
122 |       transforms.RandomResizedCrop(224),
123 |       transforms.RandomHorizontalFlip(),
124 |       transforms.ToTensor(),
125 |       normalize,
126 |     ]))
127 |   return train_data
128 | 
129 | def count_parameters_in_MB(model):
130 |   return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
131 | 
132 | 
133 | def save_checkpoint(state, is_best, save, epoch):
134 |   filename = os.path.join(save, 'checkpoint-epoch{}.pth.tar'.format(epoch))
135 |   torch.save(state, filename)
136 |   if is_best:
137 |     best_filename = os.path.join(save, 'model_best.pth.tar')
138 |     shutil.copyfile(filename, best_filename)
139 | 
140 | 
141 | def save(model, model_path):
142 |   torch.save(model.state_dict(), model_path)
143 | 
144 | 
145 | def load(model, model_path):
146 |   model.load_state_dict(torch.load(model_path))
147 | 
148 | 
149 | def drop_path(x, drop_prob):
150 |   if drop_prob > 0.:
151 |     keep_prob = 1.-drop_prob
152 |     mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
153 |     x.div_(keep_prob)
154 |     x.mul_(mask)
155 |   return x
156 | 
157 | 
158 | def create_exp_dir(path, scripts_to_save=None):
159 |   if not os.path.exists(path):
160 |     os.mkdir(path)
161 |   print('Experiment dir : {}'.format(path))
162 | 
163 |   if scripts_to_save is not None:
164 |     os.mkdir(os.path.join(path, 'scripts'))
165 |     for script in scripts_to_save:
166 |       dst_file = os.path.join(path, 'scripts', os.path.basename(script))
167 |       shutil.copyfile(script, dst_file)
168 | 
169 | 


--------------------------------------------------------------------------------
/advrush/model_search.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from operations import *
  5 | from torch.autograd import Variable
  6 | from genotypes import PRIMITIVES
  7 | from genotypes import Genotype
  8 | 
  9 | 
 10 | class MixedOp(nn.Module):
 11 | 
 12 |   def __init__(self, C, stride):
 13 |     super(MixedOp, self).__init__()
 14 |     self._ops = nn.ModuleList()
 15 |     for primitive in PRIMITIVES:
 16 |       op = OPS[primitive](C, stride, False)
 17 |       if 'pool' in primitive:
 18 |         op = nn.Sequential(op, nn.BatchNorm2d(C, affine=False))
 19 |       self._ops.append(op)
 20 | 
 21 |   def forward(self, x, weights):
 22 |     return sum(w * op(x) for w, op in zip(weights, self._ops))
 23 | 
 24 | 
 25 | class Cell(nn.Module):
 26 | 
 27 |   def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
 28 |     super(Cell, self).__init__()
 29 |     self.reduction = reduction
 30 | 
 31 |     if reduction_prev:
 32 |       self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
 33 |     else:
 34 |       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
 35 |     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
 36 |     self._steps = steps
 37 |     self._multiplier = multiplier
 38 | 
 39 |     self._ops = nn.ModuleList()
 40 |     self._bns = nn.ModuleList()
 41 |     for i in range(self._steps):
 42 |       for j in range(2+i):
 43 |         stride = 2 if reduction and j < 2 else 1
 44 |         op = MixedOp(C, stride)
 45 |         self._ops.append(op)
 46 | 
 47 |   def forward(self, s0, s1, weights):
 48 |     s0 = self.preprocess0(s0)
 49 |     s1 = self.preprocess1(s1)
 50 | 
 51 |     states = [s0, s1]
 52 |     offset = 0
 53 |     for i in range(self._steps):
 54 |       s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states))
 55 |       offset += len(states)
 56 |       states.append(s)
 57 | 
 58 |     return torch.cat(states[-self._multiplier:], dim=1)
 59 | 
 60 | 
 61 | class Network(nn.Module):
 62 | 
 63 |   def __init__(self, C, num_classes, layers, criterion, steps=4, multiplier=4, stem_multiplier=3):
 64 |     super(Network, self).__init__()
 65 |     self._C = C
 66 |     self._num_classes = num_classes
 67 |     self._layers = layers
 68 |     self._criterion = criterion
 69 |     self._steps = steps
 70 |     self._multiplier = multiplier
 71 | 
 72 |     C_curr = stem_multiplier*C
 73 |     self.stem = nn.Sequential(
 74 |       nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
 75 |       nn.BatchNorm2d(C_curr)
 76 |     )
 77 |  
 78 |     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
 79 |     self.cells = nn.ModuleList()
 80 |     reduction_prev = False
 81 |     for i in range(layers):
 82 |       if i in [layers//3, 2*layers//3]:
 83 |         C_curr *= 2
 84 |         reduction = True
 85 |       else:
 86 |         reduction = False
 87 |       cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
 88 |       reduction_prev = reduction
 89 |       self.cells += [cell]
 90 |       C_prev_prev, C_prev = C_prev, multiplier*C_curr
 91 | 
 92 |     self.global_pooling = nn.AdaptiveAvgPool2d(1)
 93 |     self.classifier = nn.Linear(C_prev, num_classes)
 94 | 
 95 |     self._initialize_alphas()
 96 | 
 97 |   def new(self):
 98 |     model_new = Network(self._C, self._num_classes, self._layers, self._criterion).cuda()
 99 |     for x, y in zip(model_new.arch_parameters(), self.arch_parameters()):
100 |         x.data.copy_(y.data)
101 |     return model_new
102 | 
103 |   def forward(self, input):
104 |     s0 = s1 = self.stem(input)
105 |     for i, cell in enumerate(self.cells):
106 |       if cell.reduction:
107 |         weights = F.softmax(self.alphas_reduce, dim=-1)
108 |       else:
109 |         weights = F.softmax(self.alphas_normal, dim=-1)
110 |       s0, s1 = s1, cell(s0, s1, weights)
111 |     out = self.global_pooling(s1)
112 |     logits = self.classifier(out.view(out.size(0),-1))
113 |     return logits
114 | 
115 |   def _loss(self, input, target):
116 |     logits = self(input)
117 |     return logits, self._criterion(logits, target)
118 | 
119 |   def _initialize_alphas(self):
120 |     k = sum(1 for i in range(self._steps) for n in range(2+i))
121 |     num_ops = len(PRIMITIVES)
122 | 
123 |     self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True)
124 |     self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True)
125 |     self._arch_parameters = [
126 |       self.alphas_normal,
127 |       self.alphas_reduce,
128 |     ]
129 | 
130 |   def arch_parameters(self):
131 |     return self._arch_parameters
132 | 
133 |   def restore(self, alphas_normal, alphas_reduce):
134 |     self.alphas_normal = alphas_normal
135 |     self.alphas_reduce = alphas_reduce
136 |     self.alphas_normal = Variable(self.alphas_normal, requires_grad=True)
137 |     self.alphas_reduce = Variable(self.alphas_reduce, requires_grad=True)
138 | 
139 |   def genotype(self):
140 | 
141 |     def _parse(weights):
142 |       gene = []
143 |       n = 2
144 |       start = 0
145 |       for i in range(self._steps):
146 |         end = start + n
147 |         W = weights[start:end].copy()
148 |         edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
149 |         for j in edges:
150 |           k_best = None
151 |           for k in range(len(W[j])):
152 |             if k != PRIMITIVES.index('none'):
153 |               if k_best is None or W[j][k] > W[j][k_best]:
154 |                 k_best = k
155 |           gene.append((PRIMITIVES[k_best], j))
156 |         start = end
157 |         n += 1
158 |       return gene
159 | 
160 |     gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).data.cpu().numpy())
161 |     gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).data.cpu().numpy())
162 | 
163 |     concat = range(2+self._steps-self._multiplier, self._steps+2)
164 |     genotype = Genotype(
165 |       normal=gene_normal, normal_concat=concat,
166 |       reduce=gene_reduce, reduce_concat=concat
167 |     )
168 |     return genotype
169 | 
170 | 


--------------------------------------------------------------------------------
/advrush/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from operations import *
  4 | from torch.autograd import Variable
  5 | from utils import drop_path
  6 | 
  7 | 
  8 | class Cell(nn.Module):
  9 | 
 10 |   def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
 11 |     super(Cell, self).__init__()
 12 |     print(C_prev_prev, C_prev, C)
 13 | 
 14 |     if reduction_prev:
 15 |       self.preprocess0 = FactorizedReduce(C_prev_prev, C)
 16 |     else:
 17 |       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
 18 |     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
 19 |     
 20 |     if reduction:
 21 |       op_names, indices = zip(*genotype.reduce)
 22 |       concat = genotype.reduce_concat
 23 |     else:
 24 |       op_names, indices = zip(*genotype.normal)
 25 |       concat = genotype.normal_concat
 26 |     self._compile(C, op_names, indices, concat, reduction)
 27 | 
 28 |   def _compile(self, C, op_names, indices, concat, reduction):
 29 |     assert len(op_names) == len(indices)
 30 |     self._steps = len(op_names) // 2
 31 |     self._concat = concat
 32 |     self.multiplier = len(concat)
 33 | 
 34 |     self._ops = nn.ModuleList()
 35 |     for name, index in zip(op_names, indices):
 36 |       stride = 2 if reduction and index < 2 else 1
 37 |       op = OPS[name](C, stride, True)
 38 |       self._ops += [op]
 39 |     self._indices = indices
 40 | 
 41 |   def forward(self, s0, s1, drop_prob):
 42 |     s0 = self.preprocess0(s0)
 43 |     s1 = self.preprocess1(s1)
 44 | 
 45 |     states = [s0, s1]
 46 |     for i in range(self._steps):
 47 |       h1 = states[self._indices[2*i]]
 48 |       h2 = states[self._indices[2*i+1]]
 49 |       op1 = self._ops[2*i]
 50 |       op2 = self._ops[2*i+1]
 51 |       h1 = op1(h1)
 52 |       h2 = op2(h2)
 53 |       if self.training and drop_prob > 0.:
 54 |         if not isinstance(op1, Identity):
 55 |           h1 = drop_path(h1, drop_prob)
 56 |         if not isinstance(op2, Identity):
 57 |           h2 = drop_path(h2, drop_prob)
 58 |       s = h1 + h2
 59 |       states += [s]
 60 |     return torch.cat([states[i] for i in self._concat], dim=1)
 61 | 
 62 | 
 63 | class AuxiliaryHeadCIFAR(nn.Module):
 64 | 
 65 |   def __init__(self, C, num_classes):
 66 |     """assuming input size 8x8"""
 67 |     super(AuxiliaryHeadCIFAR, self).__init__()
 68 |     self.features = nn.Sequential(
 69 |       nn.ReLU(inplace=True),
 70 |       nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
 71 |       nn.Conv2d(C, 128, 1, bias=False),
 72 |       nn.BatchNorm2d(128),
 73 |       nn.ReLU(inplace=True),
 74 |       nn.Conv2d(128, 768, 2, bias=False),
 75 |       nn.BatchNorm2d(768),
 76 |       nn.ReLU(inplace=True)
 77 |     )
 78 |     self.classifier = nn.Linear(768, num_classes)
 79 | 
 80 |   def forward(self, x):
 81 |     x = self.features(x)
 82 |     x = self.classifier(x.view(x.size(0),-1))
 83 |     return x
 84 | 
 85 | 
 86 | class AuxiliaryHeadImageNet(nn.Module):
 87 | 
 88 |   def __init__(self, C, num_classes):
 89 |     """assuming input size 14x14"""
 90 |     super(AuxiliaryHeadImageNet, self).__init__()
 91 |     self.features = nn.Sequential(
 92 |       nn.ReLU(inplace=True),
 93 |       nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
 94 |       nn.Conv2d(C, 128, 1, bias=False),
 95 |       nn.BatchNorm2d(128),
 96 |       nn.ReLU(inplace=True),
 97 |       nn.Conv2d(128, 768, 2, bias=False),
 98 |       # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
 99 |       # Commenting it out for consistency with the experiments in the paper.
100 |       # nn.BatchNorm2d(768),
101 |       nn.ReLU(inplace=True)
102 |     )
103 |     self.classifier = nn.Linear(768, num_classes)
104 | 
105 |   def forward(self, x):
106 |     x = self.features(x)
107 |     x = self.classifier(x.view(x.size(0),-1))
108 |     return x
109 | 
110 | 
111 | class NetworkCIFAR(nn.Module):
112 | 
113 |   def __init__(self, C, num_classes, layers, auxiliary, genotype):
114 |     super(NetworkCIFAR, self).__init__()
115 |     self._layers = layers
116 |     self._auxiliary = auxiliary
117 | 
118 |     stem_multiplier = 3
119 |     C_curr = stem_multiplier*C
120 |     self.stem = nn.Sequential(
121 |       nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
122 |       nn.BatchNorm2d(C_curr)
123 |     )
124 |     
125 |     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
126 |     self.cells = nn.ModuleList()
127 |     reduction_prev = False
128 |     for i in range(layers):
129 |       if i in [layers//3, 2*layers//3]:
130 |         C_curr *= 2
131 |         reduction = True
132 |       else:
133 |         reduction = False
134 |       cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
135 |       reduction_prev = reduction
136 |       self.cells += [cell]
137 |       C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
138 |       if i == 2*layers//3:
139 |         C_to_auxiliary = C_prev
140 | 
141 |     if auxiliary:
142 |       self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
143 |     self.global_pooling = nn.AdaptiveAvgPool2d(1)
144 |     self.classifier = nn.Linear(C_prev, num_classes)
145 | 
146 |   def forward(self, input):
147 |     logits_aux = None
148 |     s0 = s1 = self.stem(input)
149 |     for i, cell in enumerate(self.cells):
150 |       s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
151 |       if i == 2*self._layers//3:
152 |         if self._auxiliary and self.training:
153 |           logits_aux = self.auxiliary_head(s1)
154 |     out = self.global_pooling(s1)
155 |     logits = self.classifier(out.view(out.size(0),-1))
156 |     return logits, logits_aux
157 | 
158 | 
159 | class NetworkImageNet(nn.Module):
160 | 
161 |   def __init__(self, C, num_classes, layers, auxiliary, genotype):
162 |     super(NetworkImageNet, self).__init__()
163 |     self._layers = layers
164 |     self._auxiliary = auxiliary
165 | 
166 |     self.stem0 = nn.Sequential(
167 |       nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
168 |       nn.BatchNorm2d(C // 2),
169 |       nn.ReLU(inplace=True),
170 |       nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
171 |       nn.BatchNorm2d(C),
172 |     )
173 | 
174 |     self.stem1 = nn.Sequential(
175 |       nn.ReLU(inplace=True),
176 |       nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
177 |       nn.BatchNorm2d(C),
178 |     )
179 | 
180 |     C_prev_prev, C_prev, C_curr = C, C, C
181 | 
182 |     self.cells = nn.ModuleList()
183 |     reduction_prev = True
184 |     for i in range(layers):
185 |       if i in [layers // 3, 2 * layers // 3]:
186 |         C_curr *= 2
187 |         reduction = True
188 |       else:
189 |         reduction = False
190 |       cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
191 |       reduction_prev = reduction
192 |       self.cells += [cell]
193 |       C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
194 |       if i == 2 * layers // 3:
195 |         C_to_auxiliary = C_prev
196 | 
197 |     if auxiliary:
198 |       self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
199 |     self.global_pooling = nn.AvgPool2d(7)
200 |     self.classifier = nn.Linear(C_prev, num_classes)
201 | 
202 |   def forward(self, input):
203 |     logits_aux = None
204 |     s0 = self.stem0(input)
205 |     s1 = self.stem1(s0)
206 |     for i, cell in enumerate(self.cells):
207 |       s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
208 |       if i == 2 * self._layers // 3:
209 |         if self._auxiliary and self.training:
210 |           logits_aux = self.auxiliary_head(s1)
211 |     out = self.global_pooling(s1)
212 |     logits = self.classifier(out.view(out.size(0), -1))
213 |     return logits, logits_aux
214 | 
215 | 


--------------------------------------------------------------------------------
/eval/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from operations import *
  4 | from torch.autograd import Variable
  5 | from utils import drop_path
  6 | 
  7 | 
  8 | class Cell(nn.Module):
  9 | 
 10 |   def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
 11 |     super(Cell, self).__init__()
 12 |     print(C_prev_prev, C_prev, C)
 13 | 
 14 |     if reduction_prev:
 15 |       self.preprocess0 = FactorizedReduce(C_prev_prev, C)
 16 |     else:
 17 |       self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
 18 |     self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
 19 |     
 20 |     if reduction:
 21 |       op_names, indices = zip(*genotype.reduce)
 22 |       concat = genotype.reduce_concat
 23 |     else:
 24 |       op_names, indices = zip(*genotype.normal)
 25 |       concat = genotype.normal_concat
 26 |     self._compile(C, op_names, indices, concat, reduction)
 27 | 
 28 |   def _compile(self, C, op_names, indices, concat, reduction):
 29 |     assert len(op_names) == len(indices)
 30 |     self._steps = len(op_names) // 2
 31 |     self._concat = concat
 32 |     self.multiplier = len(concat)
 33 | 
 34 |     self._ops = nn.ModuleList()
 35 |     for name, index in zip(op_names, indices):
 36 |       stride = 2 if reduction and index < 2 else 1
 37 |       op = OPS[name](C, stride, True)
 38 |       self._ops += [op]
 39 |     self._indices = indices
 40 | 
 41 |   def forward(self, s0, s1, drop_prob):
 42 |     s0 = self.preprocess0(s0)
 43 |     s1 = self.preprocess1(s1)
 44 | 
 45 |     states = [s0, s1]
 46 |     for i in range(self._steps):
 47 |       h1 = states[self._indices[2*i]]
 48 |       h2 = states[self._indices[2*i+1]]
 49 |       op1 = self._ops[2*i]
 50 |       op2 = self._ops[2*i+1]
 51 |       h1 = op1(h1)
 52 |       h2 = op2(h2)
 53 |       if self.training and drop_prob > 0.:
 54 |         if not isinstance(op1, Identity):
 55 |           h1 = drop_path(h1, drop_prob)
 56 |         if not isinstance(op2, Identity):
 57 |           h2 = drop_path(h2, drop_prob)
 58 |       s = h1 + h2
 59 |       states += [s]
 60 |     return torch.cat([states[i] for i in self._concat], dim=1)
 61 | 
 62 | 
 63 | class AuxiliaryHeadCIFAR(nn.Module):
 64 | 
 65 |   def __init__(self, C, num_classes):
 66 |     """assuming input size 8x8"""
 67 |     super(AuxiliaryHeadCIFAR, self).__init__()
 68 |     self.features = nn.Sequential(
 69 |       nn.ReLU(inplace=True),
 70 |       nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
 71 |       nn.Conv2d(C, 128, 1, bias=False),
 72 |       nn.BatchNorm2d(128),
 73 |       nn.ReLU(inplace=True),
 74 |       nn.Conv2d(128, 768, 2, bias=False),
 75 |       nn.BatchNorm2d(768),
 76 |       nn.ReLU(inplace=True)
 77 |     )
 78 |     self.classifier = nn.Linear(768, num_classes)
 79 | 
 80 |   def forward(self, x):
 81 |     x = self.features(x)
 82 |     x = self.classifier(x.view(x.size(0),-1))
 83 |     return x
 84 | 
 85 | 
 86 | class AuxiliaryHeadImageNet(nn.Module):
 87 | 
 88 |   def __init__(self, C, num_classes):
 89 |     """assuming input size 14x14"""
 90 |     super(AuxiliaryHeadImageNet, self).__init__()
 91 |     self.features = nn.Sequential(
 92 |       nn.ReLU(inplace=True),
 93 |       nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
 94 |       nn.Conv2d(C, 128, 1, bias=False),
 95 |       nn.BatchNorm2d(128),
 96 |       nn.ReLU(inplace=True),
 97 |       nn.Conv2d(128, 768, 2, bias=False),
 98 |       # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
 99 |       # Commenting it out for consistency with the experiments in the paper.
100 |       # nn.BatchNorm2d(768),
101 |       nn.ReLU(inplace=True)
102 |     )
103 |     self.classifier = nn.Linear(768, num_classes)
104 | 
105 |   def forward(self, x):
106 |     x = self.features(x)
107 |     x = self.classifier(x.view(x.size(0),-1))
108 |     return x
109 | 
110 | 
111 | class NetworkCIFAR(nn.Module):
112 | 
113 |   def __init__(self, C, num_classes, layers, auxiliary, genotype):
114 |     super(NetworkCIFAR, self).__init__()
115 |     self._layers = layers
116 |     self._auxiliary = auxiliary
117 | 
118 |     stem_multiplier = 3
119 |     C_curr = stem_multiplier*C
120 |     self.stem = nn.Sequential(
121 |       nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
122 |       nn.BatchNorm2d(C_curr)
123 |     )
124 |     
125 |     C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
126 |     self.cells = nn.ModuleList()
127 |     reduction_prev = False
128 |     for i in range(layers):
129 |       if i in [layers//3, 2*layers//3]:
130 |         C_curr *= 2
131 |         reduction = True
132 |       else:
133 |         reduction = False
134 |       cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
135 |       reduction_prev = reduction
136 |       self.cells += [cell]
137 |       C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
138 |       if i == 2*layers//3:
139 |         C_to_auxiliary = C_prev
140 | 
141 |     if auxiliary:
142 |       self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
143 |     self.global_pooling = nn.AdaptiveAvgPool2d(1)
144 |     self.classifier = nn.Linear(C_prev, num_classes)
145 | 
146 |   def forward(self, input):
147 |     logits_aux = None
148 |     s0 = s1 = self.stem(input)
149 |     for i, cell in enumerate(self.cells):
150 |       s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
151 |       if i == 2*self._layers//3:
152 |         if self._auxiliary and self.training:
153 |           logits_aux = self.auxiliary_head(s1)
154 |     out = self.global_pooling(s1)
155 |     logits = self.classifier(out.view(out.size(0),-1))
156 |     return logits#, logits_aux
157 | 
158 | 
159 | class NetworkImageNet(nn.Module):
160 | 
161 |   def __init__(self, C, num_classes, layers, auxiliary, genotype):
162 |     super(NetworkImageNet, self).__init__()
163 |     self._layers = layers
164 |     self._auxiliary = auxiliary
165 | 
166 |     self.stem0 = nn.Sequential(
167 |       nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
168 |       nn.BatchNorm2d(C // 2),
169 |       nn.ReLU(inplace=True),
170 |       nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
171 |       nn.BatchNorm2d(C),
172 |     )
173 | 
174 |     self.stem1 = nn.Sequential(
175 |       nn.ReLU(inplace=True),
176 |       nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
177 |       nn.BatchNorm2d(C),
178 |     )
179 | 
180 |     C_prev_prev, C_prev, C_curr = C, C, C
181 | 
182 |     self.cells = nn.ModuleList()
183 |     reduction_prev = True
184 |     for i in range(layers):
185 |       if i in [layers // 3, 2 * layers // 3]:
186 |         C_curr *= 2
187 |         reduction = True
188 |       else:
189 |         reduction = False
190 |       cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
191 |       reduction_prev = reduction
192 |       self.cells += [cell]
193 |       C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
194 |       if i == 2 * layers // 3:
195 |         C_to_auxiliary = C_prev
196 | 
197 |     if auxiliary:
198 |       self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
199 |     self.global_pooling = nn.AvgPool2d(7)
200 |     self.classifier = nn.Linear(C_prev, num_classes)
201 | 
202 |   def forward(self, input):
203 |     logits_aux = None
204 |     s0 = self.stem0(input)
205 |     s1 = self.stem1(s0)
206 |     for i, cell in enumerate(self.cells):
207 |       s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
208 |       if i == 2 * self._layers // 3:
209 |         if self._auxiliary and self.training:
210 |           logits_aux = self.auxiliary_head(s1)
211 |     out = self.global_pooling(s1)
212 |     logits = self.classifier(out.view(out.size(0), -1))
213 |     return logits, logits_aux
214 | 
215 | 


--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/absa.py:
--------------------------------------------------------------------------------
  1 | #*
  2 | # @file ABSA training driver based on arxiv:1810.01021 
  3 | # Copyright (c) Zhewei Yao, Amir Gholami
  4 | # All rights reserved.
  5 | # This file is part of HessianFlow library.
  6 | #
  7 | # HessianFlow is free software: you can redistribute it and/or modify
  8 | # it under the terms of the GNU General Public License as published by
  9 | # the Free Software Foundation, either version 3 of the License, or
 10 | # (at your option) any later version.
 11 | #
 12 | # HessianFlow is distributed in the hope that it will be useful,
 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 | # GNU General Public License for more details.
 16 | #
 17 | # You should have received a copy of the GNU General Public License
 18 | # along with HessianFlow.  If not, see <http://www.gnu.org/licenses/>.
 19 | #*
 20 | 
 21 | from __future__ import print_function
 22 | import numpy as np
 23 | import torch
 24 | import torch.nn as nn
 25 | import torch.nn.functional as F
 26 | import torch.optim as optim
 27 | from torchvision import datasets, transforms
 28 | from torch.autograd import Variable
 29 | 
 30 | from .progressbar import progress_bar
 31 | from .optm_utils import fgsm, exp_lr_scheduler, test
 32 | 
 33 | import hessianflow
 34 | from hessianflow.utils import get_params_grad, group_add
 35 | from hessianflow.eigen import get_eigen
 36 | from copy import deepcopy
 37 | 
 38 | 
 39 | def get_lr(opt):
 40 |     """
 41 |     get the learning rate 
 42 |     """
 43 |     for param_group in opt.param_groups:
 44 |         return param_group['lr']
 45 | 
 46 | def copy_update(opt, grad):
 47 |     """
 48 |     used for optimizer update
 49 |     """
 50 |     for group in opt.param_groups:
 51 |         weight_decay = group['weight_decay']
 52 |         momentum = group['momentum']
 53 |         dampening = group['dampening']
 54 |         nesterov = group['nesterov']
 55 | 
 56 |         for i,p in enumerate(group['params']):
 57 |             d_p = grad[i]
 58 |             if weight_decay != 0:
 59 |                 d_p.add_(weight_decay, p.data)
 60 |             if momentum != 0:
 61 |                 param_state = opt.state[p]
 62 |                 if 'momentum_buffer' not in param_state:
 63 |                     buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
 64 |                     buf.mul_(momentum).add_(d_p)
 65 |                 else:
 66 |                     buf = param_state['momentum_buffer']
 67 |                     buf.mul_(momentum).add_(1 - dampening, d_p)
 68 |                 if nesterov:
 69 |                     d_p = d_p.add(momentum, buf)
 70 |                 else:
 71 |                     d_p = buf
 72 |             p.data.add_(-group['lr'], d_p)
 73 | 
 74 | def absa(model, train_loader, hessian_loader, test_loader, criterion, optimizer, epochs, lr_decay_epoch, lr_decay_ratio, batch_size = 128,
 75 |         max_large_ratio = 1, adv_ratio = 0., eps = 0., duration = True, cuda = True, print_flag = False):
 76 |     """
 77 |     adaptive batch size with adversarial training
 78 |     """
 79 |     
 80 |     # initilization 
 81 |     large_grad = []
 82 |     inner_loop = 0
 83 |     large_ratio = 1
 84 |     max_eig = None
 85 |     decay_ratio = 2
 86 |     flag = True
 87 |     if max_large_ratio == 1:
 88 |         flag = False
 89 |     
 90 |     data_eigen = None
 91 |     target_eigen = None
 92 |     flag_data = True
 93 |     if duration == True: 
 94 |         duration = 10
 95 |     else:
 96 |         duration = None
 97 | 
 98 |     cur_duration = 0
 99 |     num_updates = 0
100 |     initial_lr = get_lr(optimizer)
101 |     
102 |     
103 |     for epoch in range(1, epochs + 1):
104 |         print('\nCurrent Epoch: %d' % epoch)
105 |         print('\nTraining')
106 |         train_loss = 0.
107 |         total_num = 0
108 |         correct = 0
109 |         
110 |         for batch_idx, (data, target) in enumerate(train_loader):
111 |             if data.size()[0] < batch_size:
112 |                 continue
113 |             # gather input and target for large batch training        
114 |             inner_loop += 1
115 |             
116 |             # save the data for eigen-computation
117 |             if flag_data:
118 |                 data_eigen = data
119 |                 target_eigen = target
120 |                 #flag_data = False
121 |             # get small model update
122 |             # use adversarial training
123 |             if adv_ratio > 1. / batch_size:
124 |                 adv_r = max(int(batch_size * adv_ratio), 1)
125 |                 model.eval() # set flag so that Batch Norm statistics would not be polluted with fgsm
126 |                 adv_data = fgsm(model, data[:adv_r], target[:adv_r], eps, cuda)
127 |                 model.train() # set flag to train for Batch Norm
128 |                 adv_data = torch.cat([adv_data, data[adv_r:]])
129 |             else:
130 |                 model.train()
131 |                 adv_data = data
132 | 
133 |             optimizer.zero_grad()
134 |             if cuda:
135 |                 adv_data, target = adv_data.cuda(), target.cuda()
136 | 
137 |             output = model(adv_data)
138 |             loss = criterion(output, target) / large_ratio
139 |             total_num +=target.size(0)
140 |             _, predicted = output.max(1)
141 |             correct += predicted.eq(target).sum().item()
142 |             
143 |             train_loss += loss.item() * target.size(0) * float(large_ratio)
144 |             loss.backward()
145 |             _, small_grad= get_params_grad(model)
146 |             if not large_grad:
147 |                 large_grad = deepcopy(small_grad) #[small_grad_ + 0. for small_grad_ in small_grad]
148 |             else:
149 |                 large_grad = group_add(large_grad, small_grad)
150 | 
151 | 
152 |             if inner_loop % large_ratio  == 0:
153 |                 num_updates += 1
154 |                 copy_update(optimizer, large_grad) # todo: see if we can use deep copy to set optimizer.grad = large_grad
155 |                 large_grad = []
156 |                 inner_loop = 0
157 |                 optimizer.zero_grad()
158 |                 
159 |             progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
160 |              % (train_loss / total_num,
161 |                 100. * correct/total_num, correct, total_num))
162 |                 
163 |         ## compute eigenvalues and update large_ratio, adv_ratio etc
164 |         if flag:
165 |             for data, target in hessian_loader:
166 |                 data_eigen = data
167 |                 target_eigen = target
168 |                 break
169 |             eig, _ = get_eigen(model, data_eigen, target_eigen, criterion, cuda = True, maxIter = 10, tol = 1e-2)
170 |             cur_duration += 1
171 | 
172 |             if max_eig == None:
173 |                 max_eig = eig
174 |             else:
175 |                 if eig <= max_eig/decay_ratio:
176 |                     # ensure the learning rate is not too crazy, espeacially for model without batch normalization
177 |                     max_eig = eig
178 |                     prev_ratio = large_ratio
179 |                     large_ratio = int(large_ratio*decay_ratio)
180 |                     adv_ratio /= decay_ratio
181 |                     if large_ratio  >= max_large_ratio:
182 |                         large_ratio = max_large_ratio
183 |                         adv_ratio = 0.
184 |                         flag = False
185 |                     cur_duration = 0
186 |                     optimizer = exp_lr_scheduler(optimizer, decay_ratio = large_ratio/prev_ratio)
187 |         if duration != None: # if it is around a quadratic bowl, increase batch size
188 |             # ensure the learning rate is not too crazy, espeacially for model without batch normalization
189 |             if cur_duration - duration > -0.5:
190 |                 prev_ratio = large_ratio
191 |                 large_ratio = int(large_ratio*decay_ratio)
192 |                 adv_ratio /= decay_ratio
193 |                 if large_ratio  >= max_large_ratio:
194 |                     large_ratio = max_large_ratio
195 |                     adv_ratio = 0.
196 |                     flag = False
197 |                 cur_duration = 0
198 |                 optimizer = exp_lr_scheduler(optimizer, decay_ratio = large_ratio/prev_ratio)
199 | 
200 | 
201 |         if epoch in lr_decay_epoch:
202 |             optimizer = exp_lr_scheduler(optimizer, decay_ratio = lr_decay_ratio)
203 |             
204 |         if epoch >= epochs // 2:
205 |             adv_ratio = 0.
206 |         
207 |         if print_flag:
208 |             #print('\n Batch size %d' % (batch_size*large_ratio))
209 |             print('\n Eig %f Max Eig %f Batch size %d' % (eig, max_eig, batch_size * large_ratio))
210 |             
211 |         test(model, test_loader)
212 |         
213 |     return model, num_updates
214 | 


--------------------------------------------------------------------------------
/advrush/adv_train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import glob
  5 | import numpy as np
  6 | import torch
  7 | import utils
  8 | import logging
  9 | import argparse
 10 | import torch.nn as nn
 11 | import genotypes
 12 | import torch.utils
 13 | import torchvision.datasets as dset
 14 | import torch.backends.cudnn as cudnn
 15 | 
 16 | from torch.autograd import Variable
 17 | from model import NetworkCIFAR as Network
 18 | from trades import trades_loss, madry_loss
 19 | 
 20 | parser = argparse.ArgumentParser("cifar")
 21 | parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
 22 | parser.add_argument('--batch_size', type=int, default=64, help='batch size') #128
 23 | parser.add_argument('--learning_rate', type=float, default=0.1, help='init learning rate')
 24 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 25 | parser.add_argument('--weight_decay', type=float, default=1e-4, help='weight decay')
 26 | parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
 27 | parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
 28 | parser.add_argument('--epochs', type=int, default=200, help='num of training epochs')
 29 | parser.add_argument('--epsilon', type=float, default=0.031, help='perturbation')
 30 | parser.add_argument('--num_steps', type=int, default=7, help='perturb number of steps')
 31 | parser.add_argument('--step_size', type=float, default=0.01, help='perturb step size')
 32 | parser.add_argument('--beta', type=float, default=6.0, help='regularization in TRADES')
 33 | parser.add_argument('--adv_loss', type=str, default='pgd', help='experiment name')
 34 | parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
 35 | parser.add_argument('--layers', type=int, default=20, help='total number of layers')
 36 | parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
 37 | parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
 38 | parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss')
 39 | parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
 40 | parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
 41 | parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability')
 42 | parser.add_argument('--save', type=str, default='EXP', help='experiment name')
 43 | parser.add_argument('--seed', type=int, default=0, help='random seed')
 44 | parser.add_argument('--arch', type=str, default='ADVRUSH', help='which architecture to use')
 45 | parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
 46 | 
 47 | args = parser.parse_args()
 48 | 
 49 | args.save = 'eval-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
 50 | utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
 51 | 
 52 | log_format = '%(asctime)s %(message)s'
 53 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 54 |     format=log_format, datefmt='%m/%d %I:%M:%S %p')
 55 | fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
 56 | fh.setFormatter(logging.Formatter(log_format))
 57 | logging.getLogger().addHandler(fh)
 58 | 
 59 | CIFAR_CLASSES = 10
 60 | 
 61 | 
 62 | def main():
 63 |   if not torch.cuda.is_available():
 64 |     logging.info('no gpu device available')
 65 |     sys.exit(1)
 66 | 
 67 |   np.random.seed(args.seed)
 68 |   torch.cuda.set_device(args.gpu)
 69 |   cudnn.benchmark = True
 70 |   torch.manual_seed(args.seed)
 71 |   cudnn.enabled=True
 72 |   torch.cuda.manual_seed(args.seed)
 73 |   logging.info('gpu device = %d' % args.gpu)
 74 |   logging.info("args = %s", args)
 75 | 
 76 |   genotype = eval("genotypes.%s" % args.arch)
 77 |   model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
 78 |   model = model.cuda()
 79 | 
 80 |   logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
 81 | 
 82 |   criterion = nn.CrossEntropyLoss()
 83 |   criterion = criterion.cuda()
 84 |   optimizer = torch.optim.SGD(
 85 |       model.parameters(),
 86 |       args.learning_rate,
 87 |       momentum=args.momentum,
 88 |       weight_decay=args.weight_decay
 89 |       )
 90 | 
 91 |   train_transform, valid_transform = utils._data_transforms_cifar10_eval(args)
 92 |   train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
 93 |   valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
 94 | 
 95 |   train_queue = torch.utils.data.DataLoader(
 96 |       train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)
 97 | 
 98 |   valid_queue = torch.utils.data.DataLoader(
 99 |       valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)
100 | 
101 |   best_acc = 0.0
102 |   for epoch in range(args.epochs):
103 |     adjust_learning_rate(optimizer, epoch)
104 |     model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
105 | 
106 |     train_acc, train_obj = train(train_queue, model, criterion, optimizer)
107 |     logging.info('epoch %d train_acc %f', epoch, train_acc)
108 | 
109 |     valid_acc, valid_obj = infer(valid_queue, model, criterion)
110 |     if valid_acc > best_acc:
111 |       best_acc = valid_acc
112 |       utils.save_checkpoint({
113 | 	'epoch': epoch +1,
114 | 	'state_dict': model.state_dict(),
115 | 	'optimizer': optimizer.state_dict(),
116 | 	}, is_best=True, save=args.save, epoch=epoch)
117 |     logging.info('epoch %d valid_acc %f, best_acc %f', epoch, valid_acc, best_acc)
118 | 
119 |     utils.save(model, os.path.join(args.save, 'weights.pt'))
120 |     utils.save_checkpoint({
121 |         'epoch': epoch + 1, 
122 |         'state_dict': model.state_dict(),
123 |         'optimizer': optimizer.state_dict(),
124 |         }, is_best=False, save=args.save, epoch=epoch)
125 | 
126 | 
127 | def train(train_queue, model, criterion, optimizer):
128 |   objs = utils.AvgrageMeter()
129 |   top1 = utils.AvgrageMeter()
130 |   top5 = utils.AvgrageMeter()
131 |   model.train()
132 | 
133 |   for step, (input, target) in enumerate(train_queue):
134 |     input = Variable(input).cuda(non_blocking=True)
135 |     target = Variable(target).cuda(non_blocking=True)
136 | 
137 |     optimizer.zero_grad()
138 |     logits, logits_aux = model(input)
139 |     if args.adv_loss == 'pgd':
140 |       loss = madry_loss(
141 |             model,
142 |             input, 
143 |             target, 
144 |             optimizer,
145 |             step_size = args.step_size,
146 |             epsilon = args.epsilon, 
147 |             perturb_steps = args.num_steps)
148 |     elif args.adv_loss == 'trades':
149 |       loss = trades_loss(model,
150 |                 input,
151 |                 target,
152 |                 optimizer,
153 |                 step_size=args.step_size,
154 |                 epsilon=args.epsilon,
155 |                 perturb_steps=args.num_steps,
156 |                 beta=args.beta,
157 |                 distance='l_inf')
158 |     #loss = criterion(logits, target)
159 |     if args.auxiliary:
160 |       loss_aux = criterion(logits_aux, target)
161 |       loss += args.auxiliary_weight*loss_aux
162 |     loss.backward()
163 |     nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
164 |     optimizer.step()
165 | 
166 |     prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
167 |     n = input.size(0)
168 |     objs.update(loss.data.item(), n)
169 |     top1.update(prec1.data.item(), n)
170 |     top5.update(prec5.data.item(), n)
171 | 
172 |     if step % args.report_freq == 0:
173 |       logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
174 | 
175 |   return top1.avg, objs.avg
176 | 
177 | 
178 | def infer(valid_queue, model, criterion):
179 |   objs = utils.AvgrageMeter()
180 |   top1 = utils.AvgrageMeter()
181 |   top5 = utils.AvgrageMeter()
182 |   model.eval()
183 | 
184 |   with torch.no_grad():
185 |     for step, (input, target) in enumerate(valid_queue):
186 |       input = Variable(input, requires_grad=False).cuda(non_blocking=True)
187 |       target = Variable(target, requires_grad=False).cuda(non_blocking=True)
188 | 
189 |       logits, _ = model(input)
190 |       loss = criterion(logits, target)
191 | 
192 |       prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
193 |       n = input.size(0)
194 |       objs.update(loss.data.item(), n)
195 |       top1.update(prec1.data.item(), n)
196 |       top5.update(prec5.data.item(), n)
197 | 
198 |       if step % args.report_freq == 0:
199 |         logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
200 | 
201 |   return top1.avg, objs.avg
202 | 
203 | def adjust_learning_rate(optimizer, epoch):
204 |   """decrease the learning rate"""
205 |   lr = args.learning_rate
206 |   if epoch >= 99:
207 |     lr = args.learning_rate * 0.1
208 |   if epoch >= 149:
209 |     lr = args.learning_rate * 0.01
210 |   for param_group in optimizer.param_groups:
211 |     param_group['lr'] = lr
212 | 
213 | if __name__ == '__main__':
214 |   main() 
215 | 
216 | 


--------------------------------------------------------------------------------
/eval/pgd_attack.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import argparse
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torchvision
  8 | from torch.autograd import Variable
  9 | import torch.optim as optim
 10 | from torchvision import datasets, transforms
 11 | from model import NetworkCIFAR as Network
 12 | import genotypes
 13 | 
 14 | parser = argparse.ArgumentParser(description='PyTorch CIFAR PGD Attack Evaluation')
 15 | parser.add_argument('--test-batch-size', type=int, default=25, metavar='N',
 16 |                     help='input batch size for testing (default: 200)')
 17 | parser.add_argument('--no-cuda', action='store_true', default=False,
 18 |                     help='disables CUDA training')
 19 | parser.add_argument('--epsilon', default=0.031,
 20 |                     help='perturbation')
 21 | parser.add_argument('--num-steps', type=int, default=20,
 22 |                     help='perturb number of steps')
 23 | parser.add_argument('--step-size', default=0.01,
 24 |                     help='perturb step size')
 25 | parser.add_argument('--random',
 26 |                     default=True,
 27 |                     help='random initialization for PGD')
 28 | parser.add_argument('--white-box-attack', default=False,
 29 |                     help='whether perform white-box attack')
 30 | parser.add_argument('--arch', type=str, default='ADVRUSH', help='which architecture to use')
 31 | parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
 32 | parser.add_argument('--layers', type=int, default=20, help='total number of layers')
 33 | parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
 34 | parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability')
 35 | parser.add_argument('--target_arch', type=str, default='ADVRUSH', help='which architecture to use')
 36 | parser.add_argument('--source_arch', type=str, default='ADVRUSH', help='which architecture to use')
 37 | parser.add_argument('--target_checkpoint', type=str, default='./', help='which architecture to use')
 38 | parser.add_argument('--source_checkpoint', type=str, default='./', help='which architecture to use')
 39 | parser.add_argument('--log_path', type=str, default='./', help='path to store log file')
 40 | parser.add_argument('--checkpoint', type=str, default='./', help='which architecture to use')
 41 | parser.add_argument('--data_type', type=str, default='cifar10', help='which dataset to use')
 42 | 
 43 | args = parser.parse_args()
 44 | 
 45 | # settings
 46 | use_cuda = not args.no_cuda and torch.cuda.is_available()
 47 | device = torch.device("cuda" if use_cuda else "cpu")
 48 | kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
 49 | 
 50 | # set up data loader
 51 | if args.data_type == 'cifar10':
 52 |     transform_list = [transforms.ToTensor()]
 53 |     transform_test = transforms.Compose(transform_list)
 54 |     testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
 55 | elif args.data_type == 'cifar100':
 56 |     transform_list = [transforms.ToTensor()]
 57 |     transform_test = transforms.Compose(transform_list)
 58 |     testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
 59 | elif args.data_type == 'svhn':
 60 |     transform_list = [transforms.ToTensor()]
 61 |     transform_test = transforms.Compose(transform_list)
 62 |     testset = torchvision.datasets.SVHN(root='../data', split='test', download=True, transform=transform_test)
 63 | 
 64 | test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
 65 | 
 66 | 
 67 | def _pgd_whitebox(model,
 68 |                   X,
 69 |                   y,
 70 |                   epsilon=args.epsilon,
 71 |                   num_steps=args.num_steps,
 72 |                   step_size=args.step_size):
 73 |     out = model(X)
 74 |     err = (out.data.max(1)[1] != y.data).float().sum()
 75 |     X_pgd = Variable(X.data, requires_grad=True)
 76 |     if args.random:
 77 |         random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
 78 |         X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
 79 | 
 80 |     for _ in range(num_steps):
 81 |         opt = optim.SGD([X_pgd], lr=1e-3)
 82 |         opt.zero_grad()
 83 | 
 84 |         with torch.enable_grad():
 85 |             loss = nn.CrossEntropyLoss()(model(X_pgd), y)
 86 |         loss.backward()
 87 |         eta = step_size * X_pgd.grad.data.sign()
 88 |         X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
 89 |         eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
 90 |         X_pgd = Variable(X.data + eta, requires_grad=True)
 91 |         X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
 92 |     err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum()
 93 |     print('err pgd (white-box): ', err_pgd)
 94 |     return err, err_pgd
 95 | 
 96 | 
 97 | def _pgd_blackbox(model_target,
 98 |                   model_source,
 99 |                   X,
100 |                   y,
101 |                   epsilon=args.epsilon,
102 |                   num_steps=args.num_steps,
103 |                   step_size=args.step_size):
104 |     out = model_target(X)
105 |     err = (out.data.max(1)[1] != y.data).float().sum()
106 |     X_pgd = Variable(X.data, requires_grad=True)
107 |     if args.random:
108 |         random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
109 |         X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
110 | 
111 |     for _ in range(num_steps):
112 |         opt = optim.SGD([X_pgd], lr=1e-3)
113 |         opt.zero_grad()
114 |         with torch.enable_grad():
115 |             loss = nn.CrossEntropyLoss()(model_source(X_pgd), y)
116 |         loss.backward()
117 |         eta = step_size * X_pgd.grad.data.sign()
118 |         X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
119 |         eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
120 |         X_pgd = Variable(X.data + eta, requires_grad=True)
121 |         X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
122 | 
123 |     err_pgd = (model_target(X_pgd).data.max(1)[1] != y.data).float().sum()
124 |     print('err pgd black-box: ', err_pgd)
125 |     return err, err_pgd
126 | 
127 | 
128 | def eval_adv_test_whitebox(model, device, test_loader):
129 |     """
130 |     evaluate model by white-box attack
131 |     """
132 |     model.eval()
133 |     robust_err_total = 0
134 |     natural_err_total = 0
135 | 
136 |     for data, target in test_loader:
137 |         data, target = data.to(device), target.to(device)
138 |         # pgd attack
139 |         X, y = Variable(data, requires_grad=True), Variable(target)
140 |         err_natural, err_robust = _pgd_whitebox(model, X, y)
141 |         robust_err_total += err_robust
142 |         natural_err_total += err_natural
143 |     print('natural_err_total: ', natural_err_total)
144 |     print('robust_err_total: ', robust_err_total)
145 | 
146 | 
147 | def eval_adv_test_blackbox(model_target, model_source, device, test_loader):
148 |     """
149 |     evaluate model by black-box attack
150 |     """
151 |     model_target.eval()
152 |     model_source.eval()
153 |     robust_err_total = 0
154 |     natural_err_total = 0
155 | 
156 |     for data, target in test_loader:
157 |         data, target = data.to(device), target.to(device)
158 |         # pgd attack
159 |         X, y = Variable(data, requires_grad=True), Variable(target)
160 |         err_natural, err_robust = _pgd_blackbox(model_target, model_source, X, y)
161 |         robust_err_total += err_robust
162 |         natural_err_total += err_natural
163 |     print('natural_err_total: ', natural_err_total)
164 |     print('robust_err_total: ', robust_err_total)
165 | 
166 | def main():
167 |     
168 | 
169 |     if args.white_box_attack:
170 |         # white-box attack
171 |         print('pgd white-box attack')
172 |         if args.data_type == 'cifar100':
173 |             CIFAR_CLASSES = 100
174 |         else:
175 |             CIFAR_CLASSES = 10
176 |         genotype = eval("genotypes.%s" % args.arch)
177 |         model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
178 |         checkpoint = torch.load(args.checkpoint, map_location='cuda:0')
179 |         model.load_state_dict(checkpoint['state_dict'])
180 |         model.drop_path_prob = args.drop_path_prob
181 |         model.cuda()
182 |         eval_adv_test_whitebox(model, device, test_loader)
183 | 
184 |     else:
185 |         # black-box attack
186 |         CIFAR_CLASSES = 10
187 |         print('pgd black-box attack')
188 |         target_genotype = eval("genotypes.%s" % args.target_arch)
189 |         source_genotype = eval("genotypes.%s" % args.source_arch)
190 |         
191 |         model_source = Network(args.init_channels,CIFAR_CLASSES, args.layers, args.auxiliary, source_genotype)
192 |         source_checkpoint = torch.load(args.source_checkpoint)
193 |         model_source.load_state_dict(source_checkpoint['state_dict'])
194 |         model_source.drop_path_prob = args.drop_path_prob
195 |         model_source.cuda()
196 | 
197 |         model_target = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, target_genotype)
198 |         target_checkpoint = torch.load(args.target_checkpoint)
199 |         model_target.load_state_dict(target_checkpoint['state_dict'])
200 |         model_target.drop_path_prob = args.drop_path_prob
201 |         model_target.cuda()
202 | 
203 |         eval_adv_test_blackbox(model_target, model_source, device, test_loader)
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     main()
208 | 


--------------------------------------------------------------------------------
/advrush/train_search.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import glob
  5 | from random import shuffle
  6 | import numpy as np
  7 | import torch
  8 | import utils
  9 | import logging
 10 | import argparse
 11 | import torch.nn as nn
 12 | import torch.utils
 13 | import torch.nn.functional as F
 14 | import torchvision.datasets as dset
 15 | import torch.backends.cudnn as cudnn
 16 | 
 17 | from torch.autograd import Variable
 18 | from model_search import Network
 19 | from architect import Architect
 20 | from regularizer import *
 21 | from tensorboardX import SummaryWriter
 22 | import hessianflow as hf
 23 | import hessianflow.optimizer.optm_utils as hf_optm_utils
 24 | import hessianflow.optimizer.progressbar as hf_optm_pgb
 25 | 
 26 | parser = argparse.ArgumentParser("cifar")
 27 | parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
 28 | parser.add_argument('--batch_size', type=int, default=64, help='batch size')
 29 | parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
 30 | parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate')
 31 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
 32 | parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay')
 33 | parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
 34 | parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
 35 | parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
 36 | parser.add_argument('--init_channels', type=int, default=16, help='num of init channels')
 37 | parser.add_argument('--layers', type=int, default=8, help='total number of layers')
 38 | parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
 39 | parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
 40 | parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
 41 | parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability')
 42 | parser.add_argument('--save', type=str, default='EXP', help='experiment name')
 43 | parser.add_argument('--seed', type=int, default=2, help='random seed')
 44 | parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
 45 | parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
 46 | parser.add_argument('--unrolled', action='store_true', default=False, help='use one-step unrolled validation loss')
 47 | parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
 48 | parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
 49 | parser.add_argument('--a_gamma', type=float, default=0.01, help='a regularization strength')
 50 | parser.add_argument('--w_gamma', type=float, default=1e-4, help='w regularization strength')
 51 | parser.add_argument('--a_warmup_epochs', type=int, default=50, help='num of warm up epochs before using Hessian - architecture weight')
 52 | parser.add_argument('--w_warmup_epochs', type=int, default=60, help='num of warm up epochs before using Hessian - model parameters')
 53 | parser.add_argument('--loss_hessian', type=str, default='loss_cure', help='type of hessian loss to use, loss_eigen')
 54 | 
 55 | args = parser.parse_args()
 56 | 
 57 | args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
 58 | utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
 59 | 
 60 | log_format = '%(asctime)s %(message)s'
 61 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 62 |     format=log_format, datefmt='%m/%d %I:%M:%S %p')
 63 | fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
 64 | fh.setFormatter(logging.Formatter(log_format))
 65 | logging.getLogger().addHandler(fh)
 66 | 
 67 | if not os.path.isdir(os.path.join(args.save, './log')):
 68 |   os.makedirs(os.path.join(args.save, './log'))
 69 | tb_logger = SummaryWriter(os.path.join(args.save, './log'))
 70 | 
 71 | CIFAR_CLASSES = 10
 72 | 
 73 | 
 74 | def main():
 75 |   if not torch.cuda.is_available():
 76 |     logging.info('no gpu device available')
 77 |     sys.exit(1)
 78 | 
 79 |   np.random.seed(args.seed)
 80 |   torch.cuda.set_device(args.gpu)
 81 |   cudnn.benchmark = True
 82 |   torch.manual_seed(args.seed)
 83 |   cudnn.enabled=True
 84 |   torch.cuda.manual_seed(args.seed)
 85 |   logging.info('gpu device = %d' % args.gpu)
 86 |   logging.info("args = %s", args)
 87 | 
 88 |   criterion = nn.CrossEntropyLoss()
 89 |   criterion = criterion.cuda()
 90 |   model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
 91 |   model = model.cuda()
 92 |   logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
 93 | 
 94 |   optimizer = torch.optim.SGD(
 95 |       model.parameters(),
 96 |       args.learning_rate,
 97 |       momentum=args.momentum,
 98 |       weight_decay=args.weight_decay)
 99 | 
100 |   train_transform, valid_transform = utils._data_transforms_cifar10(args)
101 |   train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
102 | 
103 |   train_queue = torch.utils.data.DataLoader(
104 |         train_data, batch_size=args.batch_size, pin_memory=True, num_workers=2)
105 | 
106 |   valid_queue = torch.utils.data.DataLoader(
107 |         train_data, batch_size=args.batch_size, pin_memory=True, num_workers=2)
108 | 
109 |   scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
110 |         optimizer, float(args.epochs), eta_min=args.learning_rate_min)
111 | 
112 |   architect = Architect(model, args)
113 | 
114 |   for epoch in range(args.epochs):
115 |     scheduler.step()
116 |     lr = scheduler.get_lr()[0]
117 |     logging.info('epoch %d lr %e', epoch, lr)
118 | 
119 |     genotype = model.genotype()
120 |     logging.info('genotype = %s', genotype)
121 | 
122 |     logging.info(F.softmax(model.alphas_normal, dim=-1))
123 |     logging.info(F.softmax(model.alphas_reduce, dim=-1))
124 |     h_all = np.array([0.0, 0.3, 0.6, 0.9, 1.2, 1.5])
125 |     h_all = np.append(h_all, [1.5]*int(args.epochs-6))
126 |     # training
127 |     train_acc, train_obj, a_reg, w_reg = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, h=h_all[epoch])
128 |     logging.info('train_acc %f', train_acc)
129 |     tb_logger.add_scalar('train_accuracy', train_acc, epoch)
130 |     tb_logger.add_scalar('train_loss', train_obj, epoch)
131 |     tb_logger.add_scalar('alpha_regularization', a_reg, epoch)
132 |     tb_logger.add_scalar('weight_regularization', w_reg, epoch)
133 | 
134 |     # validation
135 |     valid_acc, valid_obj = infer(valid_queue, model, criterion)
136 |     logging.info('valid_acc %f', valid_acc)
137 | 
138 |     utils.save(model, os.path.join(args.save, 'weights.pt'))
139 |     utils.save_checkpoint({
140 |         'epoch': epoch + 1,
141 |         'model_optimizer': optimizer.state_dict(),
142 |         'arch_optimizer': architect.optimizer.state_dict(),
143 |         'model':  model.state_dict(),
144 |         'scheduler': scheduler.state_dict(),
145 |         'alpha_normal': model.alphas_normal,
146 |         'alpha_reduce': model.alphas_reduce}, is_best=False, save=args.save, epoch=epoch)
147 | 
148 | 
149 | 
150 | def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, h):
151 |   objs = utils.AvgrageMeter()
152 |   a_regs = utils.AvgrageMeter()
153 |   w_regs = utils.AvgrageMeter()
154 |   top1 = utils.AvgrageMeter()
155 |   top5 = utils.AvgrageMeter()
156 | 
157 |   for step, (input, target) in enumerate(train_queue):
158 |     model.train()
159 |     n = input.size(0)
160 | 
161 |     input = Variable(input, requires_grad=False).cuda(non_blocking=True)
162 |     target = Variable(target, requires_grad=False).cuda(non_blocking=True)
163 | 
164 |     # get a random minibatch from the search queue with replacement
165 |     input_search, target_search = next(iter(valid_queue))
166 |     input_search = Variable(input_search, requires_grad=False).cuda(non_blocking=True)
167 |     target_search = Variable(target_search, requires_grad=False).cuda(non_blocking=True)
168 | 
169 |     a_regularizer = architect.step(input, target, epoch, args.a_warmup_epochs, args.a_gamma, criterion, args.loss_hessian, valid_queue, input_search, target_search, lr, optimizer, unrolled=args.unrolled, h=h)
170 | 
171 |     optimizer.zero_grad()
172 |     logits = model(input)
173 | 
174 |     if epoch < args.w_warmup_epochs:
175 |       loss = criterion(logits, target)
176 |       w_regularizer = torch.tensor(0, dtype=torch.float)
177 |     else:
178 |       if args.loss_hessian == 'loss_cure':
179 |         reg = loss_cure(model, criterion, lambda_=1, device='cuda')
180 |         w_regularizer, grad_norm = reg.regularizer(input, target, h=h)
181 |       else:
182 |         reg = loss_eigen(model, train_queue, input, target, criterion, full_eigen=False, maxIter=10, tol=1e-2)
183 |         regularizer, _ = reg.regularizer()
184 | 
185 |     loss = criterion(logits, target) + args.w_gamma * w_regularizer
186 |     print(f'epoch={epoch} | step={step} | loss={loss} | w_reg={w_regularizer} | a_reg = {a_regularizer}')
187 | 
188 |     loss.backward()
189 |     nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
190 |     optimizer.step()
191 | 
192 |     prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
193 |     objs.update(loss.data.item(), n)
194 |     w_regs.update(w_regularizer.data.item(), n)
195 |     a_regs.update(a_regularizer.data.item(), n)
196 |     top1.update(prec1.data.item(), n)
197 |     top5.update(prec5.data.item(), n)
198 | 
199 |     if step % args.report_freq == 0:
200 |       logging.info('train %03d objs %e a_regs %e w_regs %e %f %f', step, objs.avg, a_regs.avg, w_regs.avg, top1.avg, top5.avg)
201 | 
202 |   return top1.avg, objs.avg, a_regs.avg, w_regs.avg
203 | 
204 | 
205 | def infer(valid_queue, model, criterion):
206 |   objs = utils.AvgrageMeter()
207 |   top1 = utils.AvgrageMeter()
208 |   top5 = utils.AvgrageMeter()
209 |   model.eval()
210 | 
211 |   with torch.no_grad():
212 |     for step, (input, target) in enumerate(valid_queue):
213 |       input = Variable(input, requires_grad=False).cuda(non_blocking=True)
214 |       target = Variable(target, requires_grad=False).cuda(non_blocking=True)
215 | 
216 |       logits = model(input)
217 |       loss = criterion(logits, target)
218 | 
219 |       prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
220 |       n = input.size(0)
221 |       objs.update(loss.data.item(), n)
222 |       top1.update(prec1.data.item(), n)
223 |       top5.update(prec5.data.item(), n)
224 | 
225 |       if step % args.report_freq == 0:
226 |         logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
227 | 
228 |   return top1.avg, objs.avg
229 | 
230 | 
231 | if __name__ == '__main__':
232 |   main() 
233 | 
234 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/advrush/regularizer.py:
--------------------------------------------------------------------------------
  1 | # import torch
  2 | # import copy
  3 | # import torch.nn as nn
  4 | from torch.autograd.gradcheck import zero_gradients
  5 | # from utils.utils import progress_bar
  6 | # import numpy as np
  7 | # import matplotlib.pyplot as plt
  8 | # from utils.utils import pgd
  9 | # import torchvision
 10 | # import os
 11 | import torch
 12 | # import torch.nn as nn
 13 | # from torch.autograd import grad
 14 | # import torch.optim as optim
 15 | # import torch.nn.functional as F
 16 | # import torch.backends.cudnn as cudnn
 17 | # from torch.optim.lr_scheduler import StepLR
 18 | # from torch.distributions import uniform
 19 | 
 20 | import hessianflow as hf
 21 | import hessianflow.optimizer.optm_utils as hf_optm_utils
 22 | import hessianflow.optimizer.progressbar as hf_optm_pgb
 23 | 
 24 | class loss_cure():
 25 |     def __init__(self, net, criterion, lambda_, device='cuda'):
 26 |         self.net = net
 27 |         self.criterion = criterion
 28 |         self.lambda_ = lambda_
 29 |         self.device = device
 30 | 
 31 |     def _find_z(self, inputs, targets, h):
 32 | 
 33 |         inputs.requires_grad_()
 34 |         outputs = self.net.eval()(inputs)
 35 |         loss_z = self.criterion(outputs, targets) #self.net.eval()(inputs)
 36 | 
 37 |         loss_z.backward(torch.ones(targets.size(), dtype=torch.float).to(self.device)) #torch.ones(targets.size(), dtype=torch.float).to(self.device)
 38 |         grad = inputs.grad.data + 0.0
 39 |         norm_grad = grad.norm().item()
 40 |         z = torch.sign(grad).detach() + 0.
 41 |         z = 1. * (h) * (z + 1e-7) / (z.reshape(z.size(0), -1).norm(dim=1)[:, None, None, None] + 1e-7)
 42 |         inputs.grad.detach()
 43 |         inputs.grad.zero_()
 44 |         #zero_gradients(inputs)
 45 |         self.net.zero_grad()
 46 | 
 47 |         return z, norm_grad
 48 | 
 49 |     def regularizer(self, inputs, targets, h=3., lambda_=4):
 50 |         '''
 51 |         Regularizer term in CURE
 52 |         '''
 53 |         z, norm_grad = self._find_z(inputs, targets, h)
 54 | 
 55 |         inputs.requires_grad_()
 56 |         outputs_pos = self.net.eval()(inputs + z)
 57 |         outputs_orig = self.net.eval()(inputs)
 58 | 
 59 |         loss_pos = self.criterion(outputs_pos, targets)
 60 |         loss_orig = self.criterion(outputs_orig, targets)
 61 |         grad_diff = \
 62 |         torch.autograd.grad((loss_pos - loss_orig), inputs, grad_outputs=torch.ones(targets.size()).to(self.device),
 63 |                             create_graph=True)[0]
 64 |         reg = grad_diff.reshape(grad_diff.size(0), -1).norm(dim=1)
 65 |         self.net.zero_grad()
 66 | 
 67 |         return torch.sum(self.lambda_ * reg) / float(inputs.size(0)), norm_grad
 68 | 
 69 | class loss_eigen():
 70 |     def __init__(self, net, test_loader, input, target, criterion, full_eigen, maxIter=10, tol=1e-2):
 71 |         self.net = net
 72 |         self.test_loader = test_loader
 73 |         self.criterion = criterion
 74 |         self.full_eigen = full_eigen
 75 |         self.max_iter = maxIter
 76 |         self.tol = tol
 77 |         self.input = input
 78 |         self.target = target
 79 |         self.cuda = True
 80 | 
 81 |     def regularizer(self):
 82 |         if self.full_eigen:
 83 |             eigenvalue, eigenvector = hf.get_eigen_full_dataset(self.net, self.test_loader, self.criterion, self.max_iter, self.tol)
 84 |         else:
 85 |             eigenvalue, eigenvector= hf.get_eigen(self.net, self.input, self.target, self.criterion, self.cuda, self.max_iter, self.tol)
 86 | 
 87 |         return eigenvalue, eigenvector
 88 | 
 89 | # class CURELearner():
 90 | #     def __init__(self, net, trainloader, testloader, device='cuda', lambda_=4,
 91 | #                  path='./checkpoint'):
 92 | #         '''
 93 | #         CURE Class: Implementation of "Robustness via curvature regularization, and vice versa"
 94 | #                     in https://arxiv.org/abs/1811.09716
 95 | #         ================================================
 96 | #         Arguments:
 97 | #
 98 | #         net: PyTorch nn
 99 | #             network structure
100 | #         trainloader: PyTorch Dataloader
101 | #         testloader: PyTorch Dataloader
102 | #         device: 'cpu' or 'cuda' if GPU available
103 | #             type of decide to move tensors
104 | #         lambda_: float
105 | #             power of regularization
106 | #         path: string
107 | #             path to save the best model
108 | #         '''
109 | #         if not torch.cuda.is_available() and device == 'cuda':
110 | #             raise ValueError("cuda is not available")
111 | #
112 | #         self.net = net.to(device)
113 | #         self.criterion = nn.CrossEntropyLoss()
114 | #         self.device = device
115 | #         self.lambda_ = lambda_
116 | #         self.trainloader, self.testloader = trainloader, testloader
117 | #         self.path = path
118 | #         self.test_acc_adv_best = 0
119 | #         self.train_loss, self.train_acc, self.train_curv = [], [], []
120 | #         self.test_loss, self.test_acc_adv, self.test_acc_clean, self.test_curv = [], [], [], []
121 | #
122 | #     def set_optimizer(self, optim_alg='Adam', args={'lr': 1e-4}, scheduler=None, args_scheduler={}):
123 | #         '''
124 | #         Setting the optimizer of the network
125 | #         ================================================
126 | #         Arguments:
127 | #
128 | #         optim_alg : string
129 | #             Name of the optimizer
130 | #         args: dict
131 | #             Parameter of the optimizer
132 | #         scheduler: optim.lr_scheduler
133 | #             Learning rate scheduler
134 | #         args_scheduler : dict
135 | #             Parameters of the scheduler
136 | #         '''
137 | #         self.optimizer = getattr(optim, optim_alg)(self.net.parameters(), **args)
138 | #         if not scheduler:
139 | #             self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10 ** 6, gamma=1)
140 | #         else:
141 | #             self.scheduler = getattr(optim.lr_scheduler, scheduler)(self.optimizer, **args_scheduler)
142 | #
143 | #     def train(self, h=[3], epochs=15):
144 | #         '''
145 | #         Training the network
146 | #         ================================================
147 | #         Arguemnets:
148 | #
149 | #         h : list with length less than the number of epochs
150 | #             Different h for different epochs of training,
151 | #             can have a single number or a list of floats for each epoch
152 | #         epochs : int
153 | #             Number of epochs
154 | #         '''
155 | #         if len(h) > epochs:
156 | #             raise ValueError('Length of h should be less than number of epochs')
157 | #         if len(h) == 1:
158 | #             h_all = epochs * [h[0]]
159 | #         else:
160 | #             h_all = epochs * [1.0]
161 | #             h_all[:len(h)] = list(h[:])
162 | #             h_all[len(h):] = (epochs - len(h)) * [h[-1]]
163 | #
164 | #         for epoch, h_tmp in enumerate(h_all):
165 | #             self._train(epoch, h=h_tmp)
166 | #             self.test(epoch, h=h_tmp)
167 | #             self.scheduler.step()
168 | #
169 | #     def _train(self, epoch, h):
170 | #         '''
171 | #         Training the model
172 | #         '''
173 | #         print('\nEpoch: %d' % epoch)
174 | #         train_loss, total = 0, 0
175 | #         num_correct = 0
176 | #         curv, curvature, norm_grad_sum = 0, 0, 0
177 | #         for batch_idx, (inputs, targets) in enumerate(self.trainloader):
178 | #             inputs, targets = inputs.to(self.device), targets.to(self.device)
179 | #             self.optimizer.zero_grad()
180 | #             total += targets.size(0)
181 | #             outputs = self.net.train()(inputs)
182 | #
183 | #             regularizer, grad_norm = self.regularizer(inputs, targets, h=h)
184 | #
185 | #             curvature += regularizer.item()
186 | #             neg_log_likelihood = self.criterion(outputs, targets)
187 | #             loss = neg_log_likelihood + regularizer
188 | #             loss.backward()
189 | #             self.optimizer.step()
190 | #             self.optimizer.zero_grad()
191 | #
192 | #             train_loss += loss.item()
193 | #             _, predicted = outputs.max(1)
194 | #             outcome = predicted.data == targets
195 | #             num_correct += outcome.sum().item()
196 | #
197 | #             progress_bar(batch_idx, len(self.trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d) | curvature: %.3f ' % \
198 | #                          (train_loss / (batch_idx + 1), 100. * num_correct / total, num_correct, total,
199 | #                           curvature / (batch_idx + 1)))
200 | #
201 | #         self.train_loss.append(train_loss / (batch_idx + 1))
202 | #         self.train_acc.append(100. * num_correct / total)
203 | #         self.train_curv.append(curvature / (batch_idx + 1))
204 | #
205 | #     def test(self, epoch, h, num_pgd_steps=20):
206 | #         '''
207 | #         Testing the model
208 | #         '''
209 | #         test_loss, adv_acc, total, curvature, clean_acc, grad_sum = 0, 0, 0, 0, 0, 0
210 | #
211 | #         for batch_idx, (inputs, targets) in enumerate(self.testloader):
212 | #             inputs, targets = inputs.to(self.device), targets.to(self.device)
213 | #             outputs = self.net.eval()(inputs)
214 | #             loss = self.criterion(outputs, targets)
215 | #             test_loss += loss.item()
216 | #             _, predicted = outputs.max(1)
217 | #             clean_acc += predicted.eq(targets).sum().item()
218 | #             total += targets.size(0)
219 | #
220 | #             inputs_pert = inputs + 0.
221 | #             eps = 5. / 255. * 8
222 | #             r = pgd(inputs, self.net.eval(), epsilon=[eps], targets=targets, step_size=0.04,
223 | #                     num_steps=num_pgd_steps, epsil=eps)
224 | #
225 | #             inputs_pert = inputs_pert + eps * torch.Tensor(r).to(self.device)
226 | #             outputs = self.net(inputs_pert)
227 | #             probs, predicted = outputs.max(1)
228 | #             adv_acc += predicted.eq(targets).sum().item()
229 | #             cur, norm_grad = self.regularizer(inputs, targets, h=h)
230 | #             grad_sum += norm_grad
231 | #             curvature += cur.item()
232 | #             test_loss += cur.item()
233 | #
234 | #         print(
235 | #             f'epoch = {epoch}, adv_acc = {100. * adv_acc / total}, clean_acc = {100. * clean_acc / total}, loss = {test_loss / (batch_idx + 1)}', \
236 | #             f'curvature = {curvature / (batch_idx + 1)}')
237 | #
238 | #         self.test_loss.append(test_loss / (batch_idx + 1))
239 | #         self.test_acc_adv.append(100. * adv_acc / total)
240 | #         self.test_acc_clean.append(100. * clean_acc / total)
241 | #         self.test_curv.append(curvature / (batch_idx + 1))
242 | #         if self.test_acc_adv[-1] > self.test_acc_adv_best:
243 | #             self.test_acc_adv_best = self.test_acc_adv[-1]
244 | #             print(f'Saving the best model to {self.path}')
245 | #             self.save_model(self.path)
246 | #
247 | #         return test_loss / (batch_idx + 1), 100. * adv_acc / total, 100. * clean_acc / total, curvature / (
248 | #                     batch_idx + 1)
249 | #
250 | #     def _find_z(self, inputs, targets, h):
251 | #         '''
252 | #         Finding the direction in the regularizer
253 | #         '''
254 | #         inputs.requires_grad_()
255 | #         outputs = self.net.eval()(inputs)
256 | #         loss_z = self.criterion(self.net.eval()(inputs), targets)
257 | #         loss_z.backward(torch.ones(targets.size()).to(self.device))
258 | #         grad = inputs.grad.data + 0.0
259 | #         norm_grad = grad.norm().item()
260 | #         z = torch.sign(grad).detach() + 0.
261 | #         z = 1. * (h) * (z + 1e-7) / (z.reshape(z.size(0), -1).norm(dim=1)[:, None, None, None] + 1e-7)
262 | #         zero_gradients(inputs)
263 | #         self.net.zero_grad()
264 | #
265 | #         return z, norm_grad
266 | #
267 | #     def regularizer(self, inputs, targets, h=3., lambda_=4):
268 | #         '''
269 | #         Regularizer term in CURE
270 | #         '''
271 | #         z, norm_grad = self._find_z(inputs, targets, h)
272 | #
273 | #         inputs.requires_grad_()
274 | #         outputs_pos = self.net.eval()(inputs + z)
275 | #         outputs_orig = self.net.eval()(inputs)
276 | #
277 | #         loss_pos = self.criterion(outputs_pos, targets)
278 | #         loss_orig = self.criterion(outputs_orig, targets)
279 | #         grad_diff = \
280 | #         torch.autograd.grad((loss_pos - loss_orig), inputs, grad_outputs=torch.ones(targets.size()).to(self.device),
281 | #                             create_graph=True)[0]
282 | #         reg = grad_diff.reshape(grad_diff.size(0), -1).norm(dim=1)
283 | #         self.net.zero_grad()
284 | #
285 | #         return torch.sum(self.lambda_ * reg) / float(inputs.size(0)), norm_grad
286 | #
287 | #     def save_model(self, path):
288 | #         '''
289 | #         Saving the model
290 | #         ================================================
291 | #         Arguments:
292 | #
293 | #         path: string
294 | #             path to save the model
295 | #         '''
296 | #
297 | #         print('Saving...')
298 | #
299 | #         state = {
300 | #             'net': self.net.state_dict(),
301 | #             'optimizer': self.optimizer.state_dict()
302 | #         }
303 | #         torch.save(state, path)
304 | #
305 | #     def import_model(self, path):
306 | #         '''
307 | #         Importing the pre-trained model
308 | #         '''
309 | #         checkpoint = torch.load(path)
310 | #         self.net.load_state_dict(checkpoint['net'])
311 | #
312 | #
313 | #
314 | #
315 | 


--------------------------------------------------------------------------------