├── advrush
├── hessianflow
│ ├── optimizer
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── absa.cpython-36.pyc
│ │ │ ├── __init__.cpython-36.pyc
│ │ │ ├── baseline.cpython-36.pyc
│ │ │ ├── optm_utils.cpython-36.pyc
│ │ │ └── progressbar.cpython-36.pyc
│ │ ├── baseline.py
│ │ ├── progressbar.py
│ │ ├── optm_utils.py
│ │ └── absa.py
│ ├── __pycache__
│ │ ├── eigen.cpython-36.pyc
│ │ ├── utils.cpython-36.pyc
│ │ └── __init__.cpython-36.pyc
│ ├── __init__.py
│ ├── utils.py
│ └── eigen.py
├── visualize.py
├── genotypes.py
├── operations.py
├── architect.py
├── trades.py
├── utils.py
├── model_search.py
├── model.py
├── adv_train.py
├── train_search.py
└── regularizer.py
├── README.md
├── eval
├── genotypes.py
├── operations.py
├── utils.py
├── model.py
└── pgd_attack.py
└── LICENSE
/advrush/hessianflow/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .baseline import baseline
2 | from .absa import absa
3 |
--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/eigen.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/eigen.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Hessian tool for neural networks based on pytorch 0.4.1
3 | """
4 |
5 | name = 'Hessian Flow'
6 |
7 | from .eigen import *
8 |
--------------------------------------------------------------------------------
/advrush/hessianflow/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/absa.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/absa.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/baseline.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/baseline.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/optm_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/optm_utils.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/__pycache__/progressbar.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nutellamok/advrush/HEAD/advrush/hessianflow/optimizer/__pycache__/progressbar.cpython-36.pyc
--------------------------------------------------------------------------------
/advrush/visualize.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import genotypes
3 | from graphviz import Digraph
4 |
5 |
6 | def plot(genotype, filename):
7 | g = Digraph(
8 | format='pdf',
9 | edge_attr=dict(fontsize='20', fontname="times"),
10 | node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
11 | engine='dot')
12 | g.body.extend(['rankdir=LR'])
13 |
14 | g.node("c_{k-2}", fillcolor='darkseagreen2')
15 | g.node("c_{k-1}", fillcolor='darkseagreen2')
16 | assert len(genotype) % 2 == 0
17 | steps = len(genotype) // 2
18 |
19 | for i in range(steps):
20 | g.node(str(i), fillcolor='lightblue')
21 |
22 | for i in range(steps):
23 | for k in [2*i, 2*i + 1]:
24 | op, j = genotype[k]
25 | if j == 0:
26 | u = "c_{k-2}"
27 | elif j == 1:
28 | u = "c_{k-1}"
29 | else:
30 | u = str(j-2)
31 | v = str(i)
32 | g.edge(u, v, label=op, fillcolor="gray")
33 |
34 | g.node("c_{k}", fillcolor='palegoldenrod')
35 | for i in range(steps):
36 | g.edge(str(i), "c_{k}", fillcolor="gray")
37 |
38 | g.render(filename, view=True)
39 |
40 |
41 | if __name__ == '__main__':
42 | if len(sys.argv) != 2:
43 | print("usage:\n python {} ARCH_NAME".format(sys.argv[0]))
44 | sys.exit(1)
45 |
46 | genotype_name = sys.argv[1]
47 | try:
48 | genotype = eval('genotypes.{}'.format(genotype_name))
49 | except AttributeError:
50 | print("{} is not specified in genotypes.py".format(genotype_name))
51 | sys.exit(1)
52 |
53 | plot(genotype.normal, "advrush_normal")
54 | plot(genotype.reduce, "advrush_reduction")
55 |
56 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AdvRush
2 | Official Code for [AdvRush: Searching for Adversarially Robust Neural Architectures](https://openaccess.thecvf.com/content/ICCV2021/html/Mok_AdvRush_Searching_for_Adversarially_Robust_Neural_Architectures_ICCV_2021_paper.html) (ICCV '21)
3 |
4 | ## Environmental Set-up
5 | ```
6 | Python == 3.6.12, PyTorch == 1.2.0, torchvision == 0.4.0
7 | ```
8 |
9 | ## AdvRush Search Process
10 | ```
11 | cd advrush && python train_search.py --batch_size 32 --gpu 0 --epochs 60 --a_gamma 0.01 --a_warmup_epochs 50 --w_warmup_epochs 60 --loss_hessian loss_cure
12 | ```
13 |
14 | ## Adversarial Training
15 | ```
16 | cd advrush && python adv_train.py --batch_size 64 --gpu 0 --epochs 200 --adv_loss pgd --arch ADVRUSH
17 | ```
18 |
19 | ## Evaluation under PGD Attack
20 | Prior to the evaluation process, add all necessary checkpoint files (preferably in the form of .pth.tar) to the /eval/checkpoints folder.
21 | To conduct white-box attacks,
22 | ```
23 | cd eval &&
24 | python pgd_attack.py --white-box-attack True --test-batch-size 10 --arch [arch_name] --checkpoint [./checkpoints/file_name.pth.tar] --data_type [cifar10/svhn]
25 | ```
26 |
27 | To conduct black-box attacks,
28 | ```
29 | cd eval &&
30 | python pgd_attack.py --test-batch-size 10 --target_arch [target_arch] --target_checkpoint [./checkpoints/target_file.pth.tar] --source_arch [source_arch] --source_checkpoint [./checkpoints/source_file.pth.tar] --data_type cifar10
31 | ```
32 |
33 | ## References
34 |
35 | DARTS: Differentiable Architecture Search [ICLR '19] [code](https://github.com/quark0/darts) [paper](https://arxiv.org/abs/1806.09055)
36 |
37 | Robustness via Curvature Regularization, and Vice Versa [CVPR '19] [code](https://github.com/F-Salehi/CURE_robustness) [paper](https://openaccess.thecvf.com/content_CVPR_2019/papers/Moosavi-Dezfooli_Robustness_via_Curvature_Regularization_and_Vice_Versa_CVPR_2019_paper.pdf)
38 |
39 | Tradeoff-inspired Adversarial Defense via Surrogate-loss Minimization [ICML '19] [code](https://github.com/yaodongyu/TRADES) [paper](https://arxiv.org/pdf/1901.08573.pdf)
40 |
--------------------------------------------------------------------------------
/advrush/hessianflow/utils.py:
--------------------------------------------------------------------------------
1 | #*
2 | # @file Different utility functions
3 | # Copyright (c) Zhewei Yao, Amir Gholami
4 | # All rights reserved.
5 | # This file is part of HessianFlow library.
6 | #
7 | # HessianFlow is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # HessianFlow is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with HessianFlow. If not, see .
19 | #*
20 |
21 | import torch
22 | import math
23 | from torch.autograd import Variable
24 | import numpy as np
25 |
26 |
27 | def group_product(xs, ys):
28 | """
29 | the inner product of two lists of variables xs,ys
30 | :param xs:
31 | :param ys:
32 | :return:
33 | """
34 | return sum([torch.sum(x * y) for (x, y) in zip(xs, ys)])
35 |
36 | def group_add(params, update, alpha=1):
37 | """
38 | params = params + update*alpha
39 | :param params: list of variable
40 | :param update: list of data
41 | :return:
42 | """
43 | for i,p in enumerate(params):
44 | params[i].data.add_(update[i] * alpha)
45 | return params
46 |
47 | def normalization(v):
48 | """
49 | normalization of a list of vectors
50 | return: normalized vectors v
51 | """
52 | s = group_product(v,v)
53 | s = s ** 0.5
54 | s = s.cpu().item()
55 | v = [vi / (s + 1e-6) for vi in v]
56 | return v
57 |
58 |
59 | def get_params_grad(model):
60 | """
61 | get model parameters and corresponding gradients
62 | """
63 | params = []
64 | grads = []
65 | for param in model.parameters():
66 | params.append(param)
67 | if param.grad is None:
68 | continue
69 | grads.append(param.grad + 0.)
70 | return params, grads
71 |
72 | def hessian_vector_product(gradsH, params, v):
73 | """
74 | compute the hessian vector product of Hv, where
75 | gradsH is the gradient at the current point,
76 | params is the corresponding variables,
77 | v is the vector.
78 | """
79 | hv = torch.autograd.grad(gradsH, params, grad_outputs = v, only_inputs = True, retain_graph = True)
80 | return hv
81 |
82 |
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/baseline.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.optim as optim
7 | from torchvision import datasets, transforms
8 | from torch.autograd import Variable
9 |
10 | from .progressbar import progress_bar
11 | from .optm_utils import exp_lr_scheduler, test
12 |
13 | # import hessianflow
14 |
15 | def baseline(model, train_loader, test_loader, criterion, optimizer, epochs, lr_decay_epoch,
16 | lr_decay_ratio, batch_size = 128, max_large_ratio = 1, cuda = True):
17 | """
18 | baseline method training, i,e, vanilla training schedule
19 | """
20 |
21 | inner_loop = 0
22 | num_updates = 0
23 | large_ratio = max_large_ratio
24 | # assert that shuffle is set for train_loader
25 | # assert and explain large ratio
26 | # assert that the train_loader is always set with a small batch size if not print error/warning telling
27 | # the user to instead use large_ratio
28 | for epoch in range(1, epochs + 1):
29 | print('\nCurrent Epoch: ', epoch)
30 | print('\nTraining')
31 | train_loss = 0.
32 | total_num = 0.
33 | correct = 0.
34 |
35 | for batch_idx, (data, target) in enumerate(train_loader):
36 | if target.size(0) < 128:
37 | continue
38 | model.train()
39 | # gather input and target for large batch training
40 | inner_loop += 1
41 | # get small model update
42 | if cuda:
43 | data, target = data.cuda(), target.cuda()
44 | output = model(data)
45 | loss = criterion(output, target)/float(large_ratio)
46 | loss.backward()
47 | train_loss += loss.item()*target.size(0)*float(large_ratio)
48 | total_num += target.size(0)
49 | _, predicted = output.max(1)
50 | correct += predicted.eq(target).sum().item()
51 |
52 | if inner_loop % large_ratio == 0:
53 | num_updates += 1
54 | optimizer.step()
55 | inner_loop = 0
56 | optimizer.zero_grad()
57 |
58 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
59 | % (train_loss / total_num,
60 | 100. * correct / total_num, correct, total_num))
61 |
62 | if epoch in lr_decay_epoch:
63 | exp_lr_scheduler(optimizer, decay_ratio=lr_decay_ratio)
64 |
65 | test(model, test_loader)
66 | return model, num_updates
67 |
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/progressbar.py:
--------------------------------------------------------------------------------
1 | '''
2 | The progress_bar is from:
3 | https://github.com/noahgolmant/skeletor/blob/master/skeletor/utils.py
4 | '''
5 |
6 | import os
7 | import sys
8 | import time
9 | import math
10 |
11 | ######## fancy progress bar
12 | try:
13 | _, term_width = os.popen('stty size', 'r').read().split()
14 | except:
15 | term_width = 100
16 | term_width = int(term_width)
17 |
18 |
19 | TOTAL_BAR_LENGTH = 65.
20 | last_time = time.time()
21 | begin_time = last_time
22 | def progress_bar(current, total, msg=None):
23 | global last_time, begin_time
24 | if current == 0:
25 | begin_time = time.time() # Reset for new bar.
26 |
27 | cur_len = int(TOTAL_BAR_LENGTH*current/total)
28 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
29 |
30 | sys.stdout.write(' [')
31 | for i in range(cur_len):
32 | sys.stdout.write('=')
33 | sys.stdout.write('>')
34 | for i in range(rest_len):
35 | sys.stdout.write('.')
36 | sys.stdout.write(']')
37 |
38 | cur_time = time.time()
39 | step_time = cur_time - last_time
40 | last_time = cur_time
41 | tot_time = cur_time - begin_time
42 |
43 | L = []
44 | L.append(' Step: %s' % format_time(step_time))
45 | L.append(' | Tot: %s' % format_time(tot_time))
46 | if msg:
47 | L.append(' | ' + msg)
48 |
49 | msg = ''.join(L)
50 | sys.stdout.write(msg)
51 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
52 | sys.stdout.write(' ')
53 |
54 | # Go back to the center of the bar.
55 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
56 | sys.stdout.write('\b')
57 | sys.stdout.write(' %d/%d ' % (current+1, total))
58 |
59 | if current < total-1:
60 | sys.stdout.write('\r')
61 | else:
62 | sys.stdout.write('\n')
63 | sys.stdout.flush()
64 |
65 | def format_time(seconds):
66 | days = int(seconds / 3600/24)
67 | seconds = seconds - days*3600*24
68 | hours = int(seconds / 3600)
69 | seconds = seconds - hours*3600
70 | minutes = int(seconds / 60)
71 | seconds = seconds - minutes*60
72 | secondsf = int(seconds)
73 | seconds = seconds - secondsf
74 | millis = int(seconds*1000)
75 |
76 | f = ''
77 | i = 1
78 | if days > 0:
79 | f += str(days) + 'D'
80 | i += 1
81 | if hours > 0 and i <= 2:
82 | f += str(hours) + 'h'
83 | i += 1
84 | if minutes > 0 and i <= 2:
85 | f += str(minutes) + 'm'
86 | i += 1
87 | if secondsf > 0 and i <= 2:
88 | f += str(secondsf) + 's'
89 | i += 1
90 | if millis > 0 and i <= 2:
91 | f += str(millis) + 'ms'
92 | i += 1
93 | if f == '':
94 | f = '0ms'
95 | return f
96 |
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/optm_utils.py:
--------------------------------------------------------------------------------
1 |
2 | #*
3 | # @file optm_utils.py different utility functions
4 | # This file is part of HessianFlow library.
5 | #
6 | # HessianFlow is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU General Public License as published by
8 | # the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # HessianFlow is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with HessianFlow. If not, see .
18 | #*
19 | from __future__ import print_function
20 | import numpy as np
21 | import torch
22 | import torch.nn as nn
23 | import torch.nn.functional as F
24 | import torch.optim as optim
25 | from torchvision import datasets, transforms
26 | from torch.autograd import Variable
27 | from .progressbar import progress_bar
28 |
29 |
30 | def fgsm(model, data, target, eps, cuda = True):
31 | """Generate an adversarial pertubation using the fast gradient sign method.
32 |
33 | Args:
34 | data: input image to perturb
35 | """
36 | model.eval()
37 | if cuda:
38 | data, target = data.cuda(), target.cuda()
39 | data.requires_grad = True
40 | model.zero_grad()
41 | output = model(data)
42 | loss = F.cross_entropy(output, target)
43 | loss.backward(create_graph = False)
44 | pertubation = eps * torch.sign(data.grad.data)
45 | x_fgsm = data.data + pertubation
46 | X_adv = torch.clamp(x_fgsm, torch.min(data.data), torch.max(data.data))
47 |
48 | return X_adv.cpu()
49 |
50 | def exp_lr_scheduler(optimizer, decay_ratio = 0.1):
51 | """
52 | Decay learning rate by a factor of lr_decay
53 | """
54 | for param_group in optimizer.param_groups:
55 | param_group['lr'] *= decay_ratio
56 | return optimizer
57 |
58 |
59 | def test(model, test_loader):
60 | """
61 | Evaluation the performance of model on test_loader
62 | """
63 | print('\nTesting')
64 | model.eval()
65 | correct = 0
66 | total = 0
67 | with torch.no_grad():
68 | for batch_idx, (inputs, targets) in enumerate(test_loader):
69 | inputs, targets = inputs.cuda(), targets.cuda()
70 | outputs = model(inputs)
71 | _, predicted = outputs.max(1)
72 | total += targets.size(0)
73 | correct += predicted.eq(targets).sum().item()
74 |
75 | progress_bar(batch_idx, len(test_loader), 'Acc: %.3f%% (%d/%d)'
76 | % (100. * correct/total, correct, total))
77 |
78 | return correct * 100 / total
79 |
--------------------------------------------------------------------------------
/advrush/hessianflow/eigen.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 | from torch.autograd import Variable
4 | import numpy as np
5 |
6 | from .utils import *
7 |
8 |
9 | def get_eigen(model, inputs, targets, criterion, cuda = True, maxIter = 50, tol = 1e-3):
10 | """
11 | compute the top eigenvalues of model parameters and
12 | the corresponding eigenvectors.
13 | """
14 | if cuda:
15 | inputs, targets = inputs.cuda(), targets.cuda()
16 | device = 'cuda'
17 | else:
18 | device = 'cpu'
19 | # change the model to evaluation mode, otherwise the batch Normalization Layer will change.
20 | # If you call this functino during training, remember to change the mode back to training mode.
21 | model.eval()
22 |
23 | outputs = model(inputs)
24 | loss = criterion(outputs, targets)
25 | loss.backward(create_graph = True)
26 |
27 | params, gradsH = get_params_grad(model)
28 | v = [torch.randn(p.size()).to(device) for p in params]
29 | v = normalization(v)
30 |
31 | eigenvalue = None
32 |
33 | for i in range(maxIter):
34 | model.zero_grad()
35 | Hv = hessian_vector_product(gradsH, params, v)
36 | eigenvalue_tmp = group_product(Hv, v).cpu().item()
37 | v = normalization(Hv)
38 | if eigenvalue == None:
39 | eigenvalue = eigenvalue_tmp
40 | else:
41 | if abs(eigenvalue-eigenvalue_tmp)/abs(eigenvalue) < tol:
42 | return eigenvalue_tmp, v
43 | else:
44 | eigenvalue = eigenvalue_tmp
45 | return eigenvalue, v
46 |
47 | def get_eigen_full_dataset(model, dataloader, criterion, cuda = True, maxIter = 50, tol = 1e-3):
48 | """
49 | compute the top eigenvalues of model parameters and
50 | the corresponding eigenvectors with a full dataset.
51 | Notice, this is very expensive.
52 | """
53 | if cuda:
54 | device = 'cuda'
55 | else:
56 | device = 'cpu'
57 | # change the model to evaluation mode, otherwise the batch Normalization Layer will change.
58 | # If you call this functino during training, remember to change the mode back to training mode.
59 | model.eval()
60 |
61 |
62 | params,_ = get_params_grad(model)
63 | v = [torch.randn(p.size()).to(device) for p in params]
64 | v = normalization(v)
65 |
66 | batch_size = None
67 | eigenvalue = None
68 |
69 | for i in range(maxIter):
70 | THv = [torch.zeros(p.size()).to(device) for p in params]
71 | counter = 0
72 | for inputs, targets in dataloader:
73 |
74 | if batch_size == None:
75 | batch_size = targets.size(0)
76 |
77 | if targets.size(0) < batch_size:
78 | continue
79 |
80 | model.zero_grad()
81 | outputs = model(inputs.to(device))
82 | loss = criterion(outputs, targets.to(device))
83 | loss.backward(create_graph=True)
84 |
85 | params, gradsH = get_params_grad(model)
86 | Hv = torch.autograd.grad(gradsH, params, grad_outputs = v, only_inputs = True, retain_graph = False)
87 |
88 | THv = [THv1 + Hv1 + 0. for THv1, Hv1 in zip(THv, Hv)]
89 | counter += 1
90 |
91 | eigenvalue_tmp =group_product(THv,v).cpu().item() / float(counter)
92 | v = normalization(THv)
93 |
94 | if eigenvalue == None:
95 | eigenvalue = eigenvalue_tmp
96 | else:
97 | if abs(eigenvalue-eigenvalue_tmp)/abs(eigenvalue) < tol:
98 | return eigenvalue_tmp, v
99 | else:
100 | eigenvalue = eigenvalue_tmp
101 |
102 | return eigenvalue, v
103 |
--------------------------------------------------------------------------------
/advrush/genotypes.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
4 |
5 | PRIMITIVES = [
6 | 'none',
7 | 'max_pool_3x3',
8 | 'avg_pool_3x3',
9 | 'skip_connect',
10 | 'sep_conv_3x3',
11 | 'sep_conv_5x5',
12 | 'dil_conv_3x3',
13 | 'dil_conv_5x5'
14 | ]
15 |
16 | NASNet = Genotype(
17 | normal = [
18 | ('sep_conv_5x5', 1),
19 | ('sep_conv_3x3', 0),
20 | ('sep_conv_5x5', 0),
21 | ('sep_conv_3x3', 0),
22 | ('avg_pool_3x3', 1),
23 | ('skip_connect', 0),
24 | ('avg_pool_3x3', 0),
25 | ('avg_pool_3x3', 0),
26 | ('sep_conv_3x3', 1),
27 | ('skip_connect', 1),
28 | ],
29 | normal_concat = [2, 3, 4, 5, 6],
30 | reduce = [
31 | ('sep_conv_5x5', 1),
32 | ('sep_conv_7x7', 0),
33 | ('max_pool_3x3', 1),
34 | ('sep_conv_7x7', 0),
35 | ('avg_pool_3x3', 1),
36 | ('sep_conv_5x5', 0),
37 | ('skip_connect', 3),
38 | ('avg_pool_3x3', 2),
39 | ('sep_conv_3x3', 2),
40 | ('max_pool_3x3', 1),
41 | ],
42 | reduce_concat = [4, 5, 6],
43 | )
44 |
45 | AmoebaNet = Genotype(
46 | normal = [
47 | ('avg_pool_3x3', 0),
48 | ('max_pool_3x3', 1),
49 | ('sep_conv_3x3', 0),
50 | ('sep_conv_5x5', 2),
51 | ('sep_conv_3x3', 0),
52 | ('avg_pool_3x3', 3),
53 | ('sep_conv_3x3', 1),
54 | ('skip_connect', 1),
55 | ('skip_connect', 0),
56 | ('avg_pool_3x3', 1),
57 | ],
58 | normal_concat = [4, 5, 6],
59 | reduce = [
60 | ('avg_pool_3x3', 0),
61 | ('sep_conv_3x3', 1),
62 | ('max_pool_3x3', 0),
63 | ('sep_conv_7x7', 2),
64 | ('sep_conv_7x7', 0),
65 | ('avg_pool_3x3', 1),
66 | ('max_pool_3x3', 0),
67 | ('max_pool_3x3', 1),
68 | ('conv_7x1_1x7', 0),
69 | ('sep_conv_3x3', 5),
70 | ],
71 | reduce_concat = [3, 4, 6]
72 | )
73 |
74 | ADVRUSH = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('avg_pool_3x3', 1), ('skip_connect', 0), ('skip_connect', 2)], reduce_concat=range(2, 6))
75 |
76 | DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
77 | DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
78 |
79 | DARTS = DARTS_V2
80 |
81 | PDARTS = Genotype(normal=[('skip_connect', 0), ('dil_conv_3x3', 1), ('skip_connect', 0),('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3',0), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
82 |
83 | RACL = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('skip_connect', 3), ('sep_conv_3x3', 3), ('skip_connect', 4)], normal_concat=[2, 3, 4, 5], reduce=[('sep_conv_3x3',0), ('sep_conv_5x5', 1), ('avg_pool_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5',1), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3)], reduce_concat=[2, 3, 4, 5])
84 |
--------------------------------------------------------------------------------
/eval/genotypes.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
4 |
5 | PRIMITIVES = [
6 | 'none',
7 | 'max_pool_3x3',
8 | 'avg_pool_3x3',
9 | 'skip_connect',
10 | 'sep_conv_3x3',
11 | 'sep_conv_5x5',
12 | 'dil_conv_3x3',
13 | 'dil_conv_5x5'
14 | ]
15 |
16 | NASNet = Genotype(
17 | normal = [
18 | ('sep_conv_5x5', 1),
19 | ('sep_conv_3x3', 0),
20 | ('sep_conv_5x5', 0),
21 | ('sep_conv_3x3', 0),
22 | ('avg_pool_3x3', 1),
23 | ('skip_connect', 0),
24 | ('avg_pool_3x3', 0),
25 | ('avg_pool_3x3', 0),
26 | ('sep_conv_3x3', 1),
27 | ('skip_connect', 1),
28 | ],
29 | normal_concat = [2, 3, 4, 5, 6],
30 | reduce = [
31 | ('sep_conv_5x5', 1),
32 | ('sep_conv_7x7', 0),
33 | ('max_pool_3x3', 1),
34 | ('sep_conv_7x7', 0),
35 | ('avg_pool_3x3', 1),
36 | ('sep_conv_5x5', 0),
37 | ('skip_connect', 3),
38 | ('avg_pool_3x3', 2),
39 | ('sep_conv_3x3', 2),
40 | ('max_pool_3x3', 1),
41 | ],
42 | reduce_concat = [4, 5, 6],
43 | )
44 |
45 | AmoebaNet = Genotype(
46 | normal = [
47 | ('avg_pool_3x3', 0),
48 | ('max_pool_3x3', 1),
49 | ('sep_conv_3x3', 0),
50 | ('sep_conv_5x5', 2),
51 | ('sep_conv_3x3', 0),
52 | ('avg_pool_3x3', 3),
53 | ('sep_conv_3x3', 1),
54 | ('skip_connect', 1),
55 | ('skip_connect', 0),
56 | ('avg_pool_3x3', 1),
57 | ],
58 | normal_concat = [4, 5, 6],
59 | reduce = [
60 | ('avg_pool_3x3', 0),
61 | ('sep_conv_3x3', 1),
62 | ('max_pool_3x3', 0),
63 | ('sep_conv_7x7', 2),
64 | ('sep_conv_7x7', 0),
65 | ('avg_pool_3x3', 1),
66 | ('max_pool_3x3', 0),
67 | ('max_pool_3x3', 1),
68 | ('conv_7x1_1x7', 0),
69 | ('sep_conv_3x3', 5),
70 | ],
71 | reduce_concat = [3, 4, 6]
72 | )
73 |
74 | ADVRUSH = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('avg_pool_3x3', 1), ('skip_connect', 0), ('skip_connect', 2)], reduce_concat=range(2, 6))
75 |
76 | DARTS_V1 = Genotype(normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('avg_pool_3x3', 0)], reduce_concat=[2, 3, 4, 5])
77 | DARTS_V2 = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 0), ('dil_conv_3x3', 2)], normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('max_pool_3x3', 1)], reduce_concat=[2, 3, 4, 5])
78 |
79 | DARTS = DARTS_V2
80 |
81 | PDARTS = Genotype(normal=[('skip_connect', 0), ('dil_conv_3x3', 1), ('skip_connect', 0),('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('sep_conv_3x3', 3), ('sep_conv_3x3',0), ('dil_conv_5x5', 4)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 0), ('sep_conv_5x5', 1), ('sep_conv_3x3', 0), ('dil_conv_5x5', 2), ('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('dil_conv_3x3', 1), ('dil_conv_5x5', 3)], reduce_concat=range(2, 6))
82 |
83 | RACL = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_5x5', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('skip_connect', 3), ('sep_conv_3x3', 3), ('skip_connect', 4)], normal_concat=[2, 3, 4, 5], reduce=[('sep_conv_3x3',0), ('sep_conv_5x5', 1), ('avg_pool_3x3', 0), ('dil_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_5x5',1), ('sep_conv_3x3', 2), ('dil_conv_3x3', 3)], reduce_concat=[2, 3, 4, 5])
84 |
85 |
86 |
--------------------------------------------------------------------------------
/eval/operations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | OPS = {
5 | 'none' : lambda C, stride, affine: Zero(stride),
6 | 'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
7 | 'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
8 | 'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
9 | 'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
10 | 'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
11 | 'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
12 | 'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
13 | 'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
14 | 'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
15 | nn.ReLU(inplace=False),
16 | nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
17 | nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
18 | nn.BatchNorm2d(C, affine=affine)
19 | ),
20 | }
21 |
22 | class ReLUConvBN(nn.Module):
23 |
24 | def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
25 | super(ReLUConvBN, self).__init__()
26 | self.op = nn.Sequential(
27 | nn.ReLU(inplace=False),
28 | nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
29 | nn.BatchNorm2d(C_out, affine=affine)
30 | )
31 |
32 | def forward(self, x):
33 | return self.op(x)
34 |
35 | class DilConv(nn.Module):
36 |
37 | def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
38 | super(DilConv, self).__init__()
39 | self.op = nn.Sequential(
40 | nn.ReLU(inplace=False),
41 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
42 | nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
43 | nn.BatchNorm2d(C_out, affine=affine),
44 | )
45 |
46 | def forward(self, x):
47 | return self.op(x)
48 |
49 |
50 | class SepConv(nn.Module):
51 |
52 | def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
53 | super(SepConv, self).__init__()
54 | self.op = nn.Sequential(
55 | nn.ReLU(inplace=False),
56 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
57 | nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
58 | nn.BatchNorm2d(C_in, affine=affine),
59 | nn.ReLU(inplace=False),
60 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
61 | nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
62 | nn.BatchNorm2d(C_out, affine=affine),
63 | )
64 |
65 | def forward(self, x):
66 | return self.op(x)
67 |
68 |
69 | class Identity(nn.Module):
70 |
71 | def __init__(self):
72 | super(Identity, self).__init__()
73 |
74 | def forward(self, x):
75 | return x
76 |
77 |
78 | class Zero(nn.Module):
79 |
80 | def __init__(self, stride):
81 | super(Zero, self).__init__()
82 | self.stride = stride
83 |
84 | def forward(self, x):
85 | if self.stride == 1:
86 | return x.mul(0.)
87 | return x[:,:,::self.stride,::self.stride].mul(0.)
88 |
89 |
90 | class FactorizedReduce(nn.Module):
91 |
92 | def __init__(self, C_in, C_out, affine=True):
93 | super(FactorizedReduce, self).__init__()
94 | assert C_out % 2 == 0
95 | self.relu = nn.ReLU(inplace=False)
96 | self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
97 | self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
98 | self.bn = nn.BatchNorm2d(C_out, affine=affine)
99 |
100 | def forward(self, x):
101 | x = self.relu(x)
102 | out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
103 | out = self.bn(out)
104 | return out
105 |
106 |
--------------------------------------------------------------------------------
/advrush/operations.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | OPS = {
5 | 'none' : lambda C, stride, affine: Zero(stride),
6 | 'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
7 | 'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
8 | 'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
9 | 'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
10 | 'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
11 | 'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
12 | 'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
13 | 'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
14 | 'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
15 | nn.ReLU(inplace=False),
16 | nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
17 | nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
18 | nn.BatchNorm2d(C, affine=affine)
19 | ),
20 | }
21 |
22 | class ReLUConvBN(nn.Module):
23 |
24 | def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
25 | super(ReLUConvBN, self).__init__()
26 | self.op = nn.Sequential(
27 | nn.ReLU(inplace=False),
28 | nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
29 | nn.BatchNorm2d(C_out, affine=affine)
30 | )
31 |
32 | def forward(self, x):
33 | return self.op(x)
34 |
35 | class DilConv(nn.Module):
36 |
37 | def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
38 | super(DilConv, self).__init__()
39 | self.op = nn.Sequential(
40 | nn.ReLU(inplace=False),
41 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
42 | nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
43 | nn.BatchNorm2d(C_out, affine=affine),
44 | )
45 |
46 | def forward(self, x):
47 | return self.op(x)
48 |
49 |
50 | class SepConv(nn.Module):
51 |
52 | def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
53 | super(SepConv, self).__init__()
54 | self.op = nn.Sequential(
55 | nn.ReLU(inplace=False),
56 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
57 | nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
58 | nn.BatchNorm2d(C_in, affine=affine),
59 | nn.ReLU(inplace=False),
60 | nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
61 | nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
62 | nn.BatchNorm2d(C_out, affine=affine),
63 | )
64 |
65 | def forward(self, x):
66 | return self.op(x)
67 |
68 |
69 | class Identity(nn.Module):
70 |
71 | def __init__(self):
72 | super(Identity, self).__init__()
73 |
74 | def forward(self, x):
75 | return x
76 |
77 |
78 | class Zero(nn.Module):
79 |
80 | def __init__(self, stride):
81 | super(Zero, self).__init__()
82 | self.stride = stride
83 |
84 | def forward(self, x):
85 | if self.stride == 1:
86 | return x.mul(0.)
87 | return x[:,:,::self.stride,::self.stride].mul(0.)
88 |
89 |
90 | class FactorizedReduce(nn.Module):
91 |
92 | def __init__(self, C_in, C_out, affine=True):
93 | super(FactorizedReduce, self).__init__()
94 | assert C_out % 2 == 0
95 | self.relu = nn.ReLU(inplace=False)
96 | self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
97 | self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
98 | self.bn = nn.BatchNorm2d(C_out, affine=affine)
99 |
100 | def forward(self, x):
101 | x = self.relu(x)
102 | out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
103 | out = self.bn(out)
104 | return out
105 |
106 |
--------------------------------------------------------------------------------
/eval/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import torch
4 | import shutil
5 | import torchvision.transforms as transforms
6 | import torchvision.datasets as dset
7 | from torch.autograd import Variable
8 |
9 |
10 | class AvgrageMeter(object):
11 |
12 | def __init__(self):
13 | self.reset()
14 |
15 | def reset(self):
16 | self.avg = 0
17 | self.sum = 0
18 | self.cnt = 0
19 |
20 | def update(self, val, n=1):
21 | self.sum += val * n
22 | self.cnt += n
23 | self.avg = self.sum / self.cnt
24 |
25 |
26 | def accuracy(output, target, topk=(1,)):
27 | maxk = max(topk)
28 | batch_size = target.size(0)
29 |
30 | _, pred = output.topk(maxk, 1, True, True)
31 | pred = pred.t()
32 | correct = pred.eq(target.view(1, -1).expand_as(pred))
33 |
34 | res = []
35 | for k in topk:
36 | correct_k = correct[:k].view(-1).float().sum(0)
37 | res.append(correct_k.mul_(100.0/batch_size))
38 | return res
39 |
40 |
41 | class Cutout(object):
42 | def __init__(self, length):
43 | self.length = length
44 |
45 | def __call__(self, img):
46 | h, w = img.size(1), img.size(2)
47 | mask = np.ones((h, w), np.float32)
48 | y = np.random.randint(h)
49 | x = np.random.randint(w)
50 |
51 | y1 = np.clip(y - self.length // 2, 0, h)
52 | y2 = np.clip(y + self.length // 2, 0, h)
53 | x1 = np.clip(x - self.length // 2, 0, w)
54 | x2 = np.clip(x + self.length // 2, 0, w)
55 |
56 | mask[y1: y2, x1: x2] = 0.
57 | mask = torch.from_numpy(mask)
58 | mask = mask.expand_as(img)
59 | img *= mask
60 | return img
61 |
62 |
63 | def _data_transforms_cifar10(args):
64 | CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
65 | CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
66 |
67 | train_transform = transforms.Compose([
68 | transforms.RandomCrop(32, padding=4),
69 | transforms.RandomHorizontalFlip(),
70 | transforms.ToTensor(),
71 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
72 | ])
73 | if args.cutout:
74 | train_transform.transforms.append(Cutout(args.cutout_length))
75 |
76 | valid_transform = transforms.Compose([
77 | transforms.ToTensor(),
78 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
79 | ])
80 | return train_transform, valid_transform
81 |
82 | def _data_transforms_cifar10_eval(args):
83 | train_transform = transforms.Compose([
84 | transforms.RandomCrop(32, padding=4),
85 | transforms.RandomHorizontalFlip(),
86 | transforms.ToTensor(),
87 | ])
88 | if args.cutout:
89 | train_transform.transforms.append(Cutout(args.cutout_length))
90 |
91 | valid_transform = transforms.Compose([
92 | transforms.ToTensor()
93 | ])
94 | return train_transform, valid_transform
95 |
96 | def _data_imagenet(args):
97 | traindir = os.path.join(args.data, 'train')
98 | #validdir = os.path.join(args.data, 'val')
99 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
100 | train_data = dset.ImageFolder(
101 | traindir,
102 | transforms.Compose([
103 | transforms.RandomResizedCrop(224),
104 | transforms.RandomHorizontalFlip(),
105 | transforms.ToTensor(),
106 | normalize,
107 | ]))
108 | return train_data
109 |
110 | def count_parameters_in_MB(model):
111 | return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
112 |
113 |
114 | def save_checkpoint(state, is_best, save, epoch):
115 | filename = os.path.join(save, 'checkpoint-epoch{}.pth.tar'.format(epoch))
116 | torch.save(state, filename)
117 | if is_best:
118 | best_filename = os.path.join(save, 'model_best_epoch{}.pth.tar'.format(epoch))
119 | shutil.copyfile(filename, best_filename)
120 |
121 |
122 | def save(model, model_path):
123 | torch.save(model.state_dict(), model_path)
124 |
125 |
126 | def load(model, model_path):
127 | model.load_state_dict(torch.load(model_path))
128 |
129 |
130 | def drop_path(x, drop_prob):
131 | if drop_prob > 0.:
132 | keep_prob = 1.-drop_prob
133 | mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
134 | x.div_(keep_prob)
135 | x.mul_(mask)
136 | return x
137 |
138 |
139 | def create_exp_dir(path, scripts_to_save=None):
140 | if not os.path.exists(path):
141 | os.mkdir(path)
142 | print('Experiment dir : {}'.format(path))
143 |
144 | if scripts_to_save is not None:
145 | os.mkdir(os.path.join(path, 'scripts'))
146 | for script in scripts_to_save:
147 | dst_file = os.path.join(path, 'scripts', os.path.basename(script))
148 | shutil.copyfile(script, dst_file)
149 |
150 |
--------------------------------------------------------------------------------
/advrush/architect.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import torch.nn as nn
4 | from torch.autograd import Variable
5 |
6 | from regularizer import *
7 | import hessianflow as hf
8 | import hessianflow.optimizer.optm_utils as hf_optm_utils
9 | import hessianflow.optimizer.progressbar as hf_optm_pgb
10 |
11 | def _concat(xs):
12 | return torch.cat([x.view(-1) for x in xs])
13 |
14 |
15 | class Architect(object):
16 |
17 | def __init__(self, model, args):
18 | self.network_momentum = args.momentum
19 | self.network_weight_decay = args.weight_decay
20 | self.model = model
21 | self.optimizer = torch.optim.Adam(self.model.arch_parameters(),
22 | lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay)
23 |
24 | def _compute_unrolled_model(self, input, target, eta, network_optimizer):
25 | logits, loss = self.model._loss(input, target)
26 | theta = _concat(self.model.parameters()).data
27 | try:
28 | moment = _concat(network_optimizer.state[v]['momentum_buffer'] for v in self.model.parameters()).mul_(self.network_momentum)
29 | except:
30 | moment = torch.zeros_like(theta)
31 | dtheta = _concat(torch.autograd.grad(loss, self.model.parameters())).data + self.network_weight_decay*theta
32 | unrolled_model = self._construct_model_from_theta(theta.sub(eta, moment+dtheta))
33 | return unrolled_model
34 |
35 | def step(self, input_train, target_train, epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, eta, network_optimizer, unrolled, h):
36 | self.optimizer.zero_grad()
37 | if unrolled:
38 | self._backward_step_unrolled(input_train, target_train, input_valid, target_valid, eta, network_optimizer)
39 | else:
40 | regularizer = self._backward_step(epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, h)
41 | self.optimizer.step()
42 | return regularizer
43 |
44 | def _backward_step(self, epoch, warm_epoch, gamma, criterion, loss_hessian, valid_queue, input_valid, target_valid, h):
45 | logits, loss = self.model._loss(input_valid, target_valid)
46 | if epoch < warm_epoch:
47 | loss = loss #criterion(logits, target)
48 | regularizer = torch.tensor(0, dtype=torch.float)
49 | else:
50 | if loss_hessian == 'loss_cure':
51 | reg = loss_cure(self.model, criterion, lambda_=4, device='cuda')
52 | regularizer, grad_norm = reg.regularizer(input_valid, target_valid, h=h)
53 | else:
54 | reg = loss_eigen(self.model, valid_queue, input_valid, target_valid, criterion, full_eigen=False, maxIter=10, tol=1e-2)
55 | regularizer, _ = reg.regularizer()
56 | loss += gamma * regularizer
57 | loss.backward()
58 | return regularizer
59 |
60 | def _backward_step_unrolled(self, input_train, target_train, input_valid, target_valid, eta, network_optimizer):
61 | unrolled_model = self._compute_unrolled_model(input_train, target_train, eta, network_optimizer)
62 | unrolled_loss = unrolled_model._loss(input_valid, target_valid)
63 |
64 | unrolled_loss.backward()
65 | dalpha = [v.grad for v in unrolled_model.arch_parameters()]
66 | vector = [v.grad.data for v in unrolled_model.parameters()]
67 | implicit_grads = self._hessian_vector_product(vector, input_train, target_train)
68 |
69 | for g, ig in zip(dalpha, implicit_grads):
70 | g.data.sub_(eta, ig.data)
71 |
72 | for v, g in zip(self.model.arch_parameters(), dalpha):
73 | if v.grad is None:
74 | v.grad = Variable(g.data)
75 | else:
76 | v.grad.data.copy_(g.data)
77 |
78 | def _construct_model_from_theta(self, theta):
79 | model_new = self.model.new()
80 | model_dict = self.model.state_dict()
81 |
82 | params, offset = {}, 0
83 | for k, v in self.model.named_parameters():
84 | v_length = np.prod(v.size())
85 | params[k] = theta[offset: offset+v_length].view(v.size())
86 | offset += v_length
87 |
88 | assert offset == len(theta)
89 | model_dict.update(params)
90 | model_new.load_state_dict(model_dict)
91 | return model_new.cuda()
92 |
93 | def _hessian_vector_product(self, vector, input, target, r=1e-2):
94 | R = r / _concat(vector).norm()
95 | for p, v in zip(self.model.parameters(), vector):
96 | p.data.add_(R, v)
97 | loss = self.model._loss(input, target)
98 | grads_p = torch.autograd.grad(loss, self.model.arch_parameters())
99 |
100 | for p, v in zip(self.model.parameters(), vector):
101 | p.data.sub_(2*R, v)
102 | loss = self.model._loss(input, target)
103 | grads_n = torch.autograd.grad(loss, self.model.arch_parameters())
104 |
105 | for p, v in zip(self.model.parameters(), vector):
106 | p.data.add_(R, v)
107 |
108 | return [(x-y).div_(2*R) for x, y in zip(grads_p, grads_n)]
109 |
110 |
--------------------------------------------------------------------------------
/advrush/trades.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 | import torch.optim as optim
6 |
7 |
8 | def squared_l2_norm(x):
9 | flattened = x.view(x.unsqueeze(0).shape[0], -1)
10 | return (flattened ** 2).sum(1)
11 |
12 |
13 | def l2_norm(x):
14 | return squared_l2_norm(x).sqrt()
15 |
16 |
17 | def trades_loss(model,
18 | x_natural,
19 | y,
20 | optimizer,
21 | step_size=0.003,
22 | epsilon=0.031,
23 | perturb_steps=10,
24 | beta=1.0,
25 | distance='l_inf'):
26 | # define KL-loss
27 | criterion_kl = nn.KLDivLoss(size_average=False)
28 | model.eval()
29 | batch_size = len(x_natural)
30 | # generate adversarial example
31 | x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
32 | if distance == 'l_inf':
33 | for _ in range(perturb_steps):
34 | x_adv.requires_grad_()
35 | with torch.enable_grad():
36 | adv_logits, _ = model(x_adv)
37 | clean_logits, _ = model(x_natural)
38 | loss_kl = criterion_kl(F.log_softmax(adv_logits, dim=1), #model(x_adv)
39 | F.softmax(clean_logits, dim=1)) #model(x_natural)
40 | grad = torch.autograd.grad(loss_kl, [x_adv])[0]
41 | x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
42 | x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
43 | x_adv = torch.clamp(x_adv, 0.0, 1.0)
44 | elif distance == 'l_2':
45 | delta = 0.001 * torch.randn(x_natural.shape).cuda().detach()
46 | delta = Variable(delta.data, requires_grad=True)
47 |
48 | # Setup optimizers
49 | optimizer_delta = optim.SGD([delta], lr=epsilon / perturb_steps * 2)
50 |
51 | for _ in range(perturb_steps):
52 | adv = x_natural + delta
53 |
54 | # optimize
55 | optimizer_delta.zero_grad()
56 | with torch.enable_grad():
57 | loss = (-1) * criterion_kl(F.log_softmax(model(adv), dim=1),
58 | F.softmax(model(x_natural), dim=1))
59 | loss.backward()
60 | # renorming gradient
61 | grad_norms = delta.grad.view(batch_size, -1).norm(p=2, dim=1)
62 | delta.grad.div_(grad_norms.view(-1, 1, 1, 1))
63 | # avoid nan or inf if gradient is 0
64 | if (grad_norms == 0).any():
65 | delta.grad[grad_norms == 0] = torch.randn_like(delta.grad[grad_norms == 0])
66 | optimizer_delta.step()
67 |
68 | # projection
69 | delta.data.add_(x_natural)
70 | delta.data.clamp_(0, 1).sub_(x_natural)
71 | delta.data.renorm_(p=2, dim=0, maxnorm=epsilon)
72 | x_adv = Variable(x_natural + delta, requires_grad=False)
73 | else:
74 | x_adv = torch.clamp(x_adv, 0.0, 1.0)
75 | model.train()
76 |
77 | x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)
78 | # zero gradient
79 | optimizer.zero_grad()
80 | # calculate robust loss
81 | clean_logits_new, _ = model(x_natural)
82 | adv_logits_new, _ = model(x_natural)
83 | loss_natural = F.cross_entropy(clean_logits_new, y) #model(x_natural)
84 | loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(adv_logits_new, dim=1), #model(x_adv)
85 | F.softmax(clean_logits_new, dim=1)) #model(x_natural)
86 | loss = loss_natural + beta * loss_robust
87 | return loss
88 |
89 |
90 | def madry_loss(model,
91 | x_natural,
92 | y,
93 | optimizer,
94 | step_size=0.003,
95 | epsilon=0.031,
96 | perturb_steps=10,
97 | distance='l_inf',
98 | ):
99 | # define KL-loss
100 | criterion_ce = torch.nn.CrossEntropyLoss(reduction='none')
101 | model.eval()
102 | batch_size = len(x_natural)
103 |
104 | # generate adversarial example
105 | x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
106 |
107 | if distance == 'l_inf':
108 | for _ in range(perturb_steps):
109 | x_adv.requires_grad_()
110 | with torch.enable_grad():
111 | logits, _ = model(x_adv)
112 | loss_ce = criterion_ce(logits, y).mean()
113 | grad = torch.autograd.grad(loss_ce, [x_adv])[0]
114 | x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
115 | x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
116 | x_adv = torch.clamp(x_adv, 0.0, 1.0)
117 | else:
118 | x_adv = torch.clamp(x_adv, 0.0, 1.0)
119 | model.train()
120 |
121 | x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)
122 | # zero gradient
123 | optimizer.zero_grad()
124 |
125 | logits, _ = model(x_adv)
126 | loss = F.cross_entropy(logits, y)
127 |
128 | return loss
129 |
--------------------------------------------------------------------------------
/advrush/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import torch
4 | import shutil
5 | import torchvision.transforms as transforms
6 | import torchvision.datasets as dset
7 | from torch.autograd import Variable
8 |
9 |
10 | class AvgrageMeter(object):
11 |
12 | def __init__(self):
13 | self.reset()
14 |
15 | def reset(self):
16 | self.avg = 0
17 | self.sum = 0
18 | self.cnt = 0
19 |
20 | def update(self, val, n=1):
21 | self.sum += val * n
22 | self.cnt += n
23 | self.avg = self.sum / self.cnt
24 |
25 |
26 | def accuracy(output, target, topk=(1,)):
27 | maxk = max(topk)
28 | batch_size = target.size(0)
29 |
30 | _, pred = output.topk(maxk, 1, True, True)
31 | pred = pred.t()
32 | correct = pred.eq(target.view(1, -1).expand_as(pred))
33 |
34 | res = []
35 | for k in topk:
36 | correct_k = correct[:k].view(-1).float().sum(0)
37 | res.append(correct_k.mul_(100.0/batch_size))
38 | return res
39 |
40 |
41 | class Cutout(object):
42 | def __init__(self, length):
43 | self.length = length
44 |
45 | def __call__(self, img):
46 | h, w = img.size(1), img.size(2)
47 | mask = np.ones((h, w), np.float32)
48 | y = np.random.randint(h)
49 | x = np.random.randint(w)
50 |
51 | y1 = np.clip(y - self.length // 2, 0, h)
52 | y2 = np.clip(y + self.length // 2, 0, h)
53 | x1 = np.clip(x - self.length // 2, 0, w)
54 | x2 = np.clip(x + self.length // 2, 0, w)
55 |
56 | mask[y1: y2, x1: x2] = 0.
57 | mask = torch.from_numpy(mask)
58 | mask = mask.expand_as(img)
59 | img *= mask
60 | return img
61 |
62 |
63 | def _data_transforms_cifar10(args):
64 | CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
65 | CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
66 |
67 | train_transform = transforms.Compose([
68 | transforms.RandomCrop(32, padding=4),
69 | transforms.RandomHorizontalFlip(),
70 | transforms.ToTensor(),
71 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
72 | ])
73 | if args.cutout:
74 | train_transform.transforms.append(Cutout(args.cutout_length))
75 |
76 | valid_transform = transforms.Compose([
77 | transforms.ToTensor(),
78 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
79 | ])
80 | return train_transform, valid_transform
81 |
82 | def _data_transforms_cifar10_eval(args):
83 | train_transform = transforms.Compose([
84 | transforms.RandomCrop(32, padding=4),
85 | transforms.RandomHorizontalFlip(),
86 | transforms.ToTensor(),
87 | ])
88 | if args.cutout:
89 | train_transform.transforms.append(Cutout(args.cutout_length))
90 |
91 | valid_transform = transforms.Compose([
92 | transforms.ToTensor()
93 | ])
94 | return train_transform, valid_transform
95 |
96 | def _data_transforms_cifar100(args):
97 | CIFAR_MEAN = [0.5071, 0.4867, 0.4408]
98 | CIFAR_STD = [0.2675, 0.2565, 0.2761]
99 |
100 | train_transform = transforms.Compose([
101 | transforms.RandomCrop(32, padding=4),
102 | transforms.RandomHorizontalFlip(),
103 | transforms.ToTensor(),
104 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
105 | ])
106 | if args.cutout:
107 | train_transform.transforms.append(Cutout(args.cutout_length))
108 |
109 | valid_transform = transforms.Compose([
110 | transforms.ToTensor(),
111 | transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
112 | ])
113 | return train_transform, valid_transform
114 |
115 | def _data_imagenet(args):
116 | traindir = os.path.join(args.data, 'train')
117 | #validdir = os.path.join(args.data, 'val')
118 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
119 | train_data = dset.ImageFolder(
120 | traindir,
121 | transforms.Compose([
122 | transforms.RandomResizedCrop(224),
123 | transforms.RandomHorizontalFlip(),
124 | transforms.ToTensor(),
125 | normalize,
126 | ]))
127 | return train_data
128 |
129 | def count_parameters_in_MB(model):
130 | return np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
131 |
132 |
133 | def save_checkpoint(state, is_best, save, epoch):
134 | filename = os.path.join(save, 'checkpoint-epoch{}.pth.tar'.format(epoch))
135 | torch.save(state, filename)
136 | if is_best:
137 | best_filename = os.path.join(save, 'model_best.pth.tar')
138 | shutil.copyfile(filename, best_filename)
139 |
140 |
141 | def save(model, model_path):
142 | torch.save(model.state_dict(), model_path)
143 |
144 |
145 | def load(model, model_path):
146 | model.load_state_dict(torch.load(model_path))
147 |
148 |
149 | def drop_path(x, drop_prob):
150 | if drop_prob > 0.:
151 | keep_prob = 1.-drop_prob
152 | mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
153 | x.div_(keep_prob)
154 | x.mul_(mask)
155 | return x
156 |
157 |
158 | def create_exp_dir(path, scripts_to_save=None):
159 | if not os.path.exists(path):
160 | os.mkdir(path)
161 | print('Experiment dir : {}'.format(path))
162 |
163 | if scripts_to_save is not None:
164 | os.mkdir(os.path.join(path, 'scripts'))
165 | for script in scripts_to_save:
166 | dst_file = os.path.join(path, 'scripts', os.path.basename(script))
167 | shutil.copyfile(script, dst_file)
168 |
169 |
--------------------------------------------------------------------------------
/advrush/model_search.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from operations import *
5 | from torch.autograd import Variable
6 | from genotypes import PRIMITIVES
7 | from genotypes import Genotype
8 |
9 |
10 | class MixedOp(nn.Module):
11 |
12 | def __init__(self, C, stride):
13 | super(MixedOp, self).__init__()
14 | self._ops = nn.ModuleList()
15 | for primitive in PRIMITIVES:
16 | op = OPS[primitive](C, stride, False)
17 | if 'pool' in primitive:
18 | op = nn.Sequential(op, nn.BatchNorm2d(C, affine=False))
19 | self._ops.append(op)
20 |
21 | def forward(self, x, weights):
22 | return sum(w * op(x) for w, op in zip(weights, self._ops))
23 |
24 |
25 | class Cell(nn.Module):
26 |
27 | def __init__(self, steps, multiplier, C_prev_prev, C_prev, C, reduction, reduction_prev):
28 | super(Cell, self).__init__()
29 | self.reduction = reduction
30 |
31 | if reduction_prev:
32 | self.preprocess0 = FactorizedReduce(C_prev_prev, C, affine=False)
33 | else:
34 | self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0, affine=False)
35 | self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0, affine=False)
36 | self._steps = steps
37 | self._multiplier = multiplier
38 |
39 | self._ops = nn.ModuleList()
40 | self._bns = nn.ModuleList()
41 | for i in range(self._steps):
42 | for j in range(2+i):
43 | stride = 2 if reduction and j < 2 else 1
44 | op = MixedOp(C, stride)
45 | self._ops.append(op)
46 |
47 | def forward(self, s0, s1, weights):
48 | s0 = self.preprocess0(s0)
49 | s1 = self.preprocess1(s1)
50 |
51 | states = [s0, s1]
52 | offset = 0
53 | for i in range(self._steps):
54 | s = sum(self._ops[offset+j](h, weights[offset+j]) for j, h in enumerate(states))
55 | offset += len(states)
56 | states.append(s)
57 |
58 | return torch.cat(states[-self._multiplier:], dim=1)
59 |
60 |
61 | class Network(nn.Module):
62 |
63 | def __init__(self, C, num_classes, layers, criterion, steps=4, multiplier=4, stem_multiplier=3):
64 | super(Network, self).__init__()
65 | self._C = C
66 | self._num_classes = num_classes
67 | self._layers = layers
68 | self._criterion = criterion
69 | self._steps = steps
70 | self._multiplier = multiplier
71 |
72 | C_curr = stem_multiplier*C
73 | self.stem = nn.Sequential(
74 | nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
75 | nn.BatchNorm2d(C_curr)
76 | )
77 |
78 | C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
79 | self.cells = nn.ModuleList()
80 | reduction_prev = False
81 | for i in range(layers):
82 | if i in [layers//3, 2*layers//3]:
83 | C_curr *= 2
84 | reduction = True
85 | else:
86 | reduction = False
87 | cell = Cell(steps, multiplier, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
88 | reduction_prev = reduction
89 | self.cells += [cell]
90 | C_prev_prev, C_prev = C_prev, multiplier*C_curr
91 |
92 | self.global_pooling = nn.AdaptiveAvgPool2d(1)
93 | self.classifier = nn.Linear(C_prev, num_classes)
94 |
95 | self._initialize_alphas()
96 |
97 | def new(self):
98 | model_new = Network(self._C, self._num_classes, self._layers, self._criterion).cuda()
99 | for x, y in zip(model_new.arch_parameters(), self.arch_parameters()):
100 | x.data.copy_(y.data)
101 | return model_new
102 |
103 | def forward(self, input):
104 | s0 = s1 = self.stem(input)
105 | for i, cell in enumerate(self.cells):
106 | if cell.reduction:
107 | weights = F.softmax(self.alphas_reduce, dim=-1)
108 | else:
109 | weights = F.softmax(self.alphas_normal, dim=-1)
110 | s0, s1 = s1, cell(s0, s1, weights)
111 | out = self.global_pooling(s1)
112 | logits = self.classifier(out.view(out.size(0),-1))
113 | return logits
114 |
115 | def _loss(self, input, target):
116 | logits = self(input)
117 | return logits, self._criterion(logits, target)
118 |
119 | def _initialize_alphas(self):
120 | k = sum(1 for i in range(self._steps) for n in range(2+i))
121 | num_ops = len(PRIMITIVES)
122 |
123 | self.alphas_normal = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True)
124 | self.alphas_reduce = Variable(1e-3*torch.randn(k, num_ops).cuda(), requires_grad=True)
125 | self._arch_parameters = [
126 | self.alphas_normal,
127 | self.alphas_reduce,
128 | ]
129 |
130 | def arch_parameters(self):
131 | return self._arch_parameters
132 |
133 | def restore(self, alphas_normal, alphas_reduce):
134 | self.alphas_normal = alphas_normal
135 | self.alphas_reduce = alphas_reduce
136 | self.alphas_normal = Variable(self.alphas_normal, requires_grad=True)
137 | self.alphas_reduce = Variable(self.alphas_reduce, requires_grad=True)
138 |
139 | def genotype(self):
140 |
141 | def _parse(weights):
142 | gene = []
143 | n = 2
144 | start = 0
145 | for i in range(self._steps):
146 | end = start + n
147 | W = weights[start:end].copy()
148 | edges = sorted(range(i + 2), key=lambda x: -max(W[x][k] for k in range(len(W[x])) if k != PRIMITIVES.index('none')))[:2]
149 | for j in edges:
150 | k_best = None
151 | for k in range(len(W[j])):
152 | if k != PRIMITIVES.index('none'):
153 | if k_best is None or W[j][k] > W[j][k_best]:
154 | k_best = k
155 | gene.append((PRIMITIVES[k_best], j))
156 | start = end
157 | n += 1
158 | return gene
159 |
160 | gene_normal = _parse(F.softmax(self.alphas_normal, dim=-1).data.cpu().numpy())
161 | gene_reduce = _parse(F.softmax(self.alphas_reduce, dim=-1).data.cpu().numpy())
162 |
163 | concat = range(2+self._steps-self._multiplier, self._steps+2)
164 | genotype = Genotype(
165 | normal=gene_normal, normal_concat=concat,
166 | reduce=gene_reduce, reduce_concat=concat
167 | )
168 | return genotype
169 |
170 |
--------------------------------------------------------------------------------
/advrush/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from operations import *
4 | from torch.autograd import Variable
5 | from utils import drop_path
6 |
7 |
8 | class Cell(nn.Module):
9 |
10 | def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
11 | super(Cell, self).__init__()
12 | print(C_prev_prev, C_prev, C)
13 |
14 | if reduction_prev:
15 | self.preprocess0 = FactorizedReduce(C_prev_prev, C)
16 | else:
17 | self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
18 | self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
19 |
20 | if reduction:
21 | op_names, indices = zip(*genotype.reduce)
22 | concat = genotype.reduce_concat
23 | else:
24 | op_names, indices = zip(*genotype.normal)
25 | concat = genotype.normal_concat
26 | self._compile(C, op_names, indices, concat, reduction)
27 |
28 | def _compile(self, C, op_names, indices, concat, reduction):
29 | assert len(op_names) == len(indices)
30 | self._steps = len(op_names) // 2
31 | self._concat = concat
32 | self.multiplier = len(concat)
33 |
34 | self._ops = nn.ModuleList()
35 | for name, index in zip(op_names, indices):
36 | stride = 2 if reduction and index < 2 else 1
37 | op = OPS[name](C, stride, True)
38 | self._ops += [op]
39 | self._indices = indices
40 |
41 | def forward(self, s0, s1, drop_prob):
42 | s0 = self.preprocess0(s0)
43 | s1 = self.preprocess1(s1)
44 |
45 | states = [s0, s1]
46 | for i in range(self._steps):
47 | h1 = states[self._indices[2*i]]
48 | h2 = states[self._indices[2*i+1]]
49 | op1 = self._ops[2*i]
50 | op2 = self._ops[2*i+1]
51 | h1 = op1(h1)
52 | h2 = op2(h2)
53 | if self.training and drop_prob > 0.:
54 | if not isinstance(op1, Identity):
55 | h1 = drop_path(h1, drop_prob)
56 | if not isinstance(op2, Identity):
57 | h2 = drop_path(h2, drop_prob)
58 | s = h1 + h2
59 | states += [s]
60 | return torch.cat([states[i] for i in self._concat], dim=1)
61 |
62 |
63 | class AuxiliaryHeadCIFAR(nn.Module):
64 |
65 | def __init__(self, C, num_classes):
66 | """assuming input size 8x8"""
67 | super(AuxiliaryHeadCIFAR, self).__init__()
68 | self.features = nn.Sequential(
69 | nn.ReLU(inplace=True),
70 | nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
71 | nn.Conv2d(C, 128, 1, bias=False),
72 | nn.BatchNorm2d(128),
73 | nn.ReLU(inplace=True),
74 | nn.Conv2d(128, 768, 2, bias=False),
75 | nn.BatchNorm2d(768),
76 | nn.ReLU(inplace=True)
77 | )
78 | self.classifier = nn.Linear(768, num_classes)
79 |
80 | def forward(self, x):
81 | x = self.features(x)
82 | x = self.classifier(x.view(x.size(0),-1))
83 | return x
84 |
85 |
86 | class AuxiliaryHeadImageNet(nn.Module):
87 |
88 | def __init__(self, C, num_classes):
89 | """assuming input size 14x14"""
90 | super(AuxiliaryHeadImageNet, self).__init__()
91 | self.features = nn.Sequential(
92 | nn.ReLU(inplace=True),
93 | nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
94 | nn.Conv2d(C, 128, 1, bias=False),
95 | nn.BatchNorm2d(128),
96 | nn.ReLU(inplace=True),
97 | nn.Conv2d(128, 768, 2, bias=False),
98 | # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
99 | # Commenting it out for consistency with the experiments in the paper.
100 | # nn.BatchNorm2d(768),
101 | nn.ReLU(inplace=True)
102 | )
103 | self.classifier = nn.Linear(768, num_classes)
104 |
105 | def forward(self, x):
106 | x = self.features(x)
107 | x = self.classifier(x.view(x.size(0),-1))
108 | return x
109 |
110 |
111 | class NetworkCIFAR(nn.Module):
112 |
113 | def __init__(self, C, num_classes, layers, auxiliary, genotype):
114 | super(NetworkCIFAR, self).__init__()
115 | self._layers = layers
116 | self._auxiliary = auxiliary
117 |
118 | stem_multiplier = 3
119 | C_curr = stem_multiplier*C
120 | self.stem = nn.Sequential(
121 | nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
122 | nn.BatchNorm2d(C_curr)
123 | )
124 |
125 | C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
126 | self.cells = nn.ModuleList()
127 | reduction_prev = False
128 | for i in range(layers):
129 | if i in [layers//3, 2*layers//3]:
130 | C_curr *= 2
131 | reduction = True
132 | else:
133 | reduction = False
134 | cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
135 | reduction_prev = reduction
136 | self.cells += [cell]
137 | C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
138 | if i == 2*layers//3:
139 | C_to_auxiliary = C_prev
140 |
141 | if auxiliary:
142 | self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
143 | self.global_pooling = nn.AdaptiveAvgPool2d(1)
144 | self.classifier = nn.Linear(C_prev, num_classes)
145 |
146 | def forward(self, input):
147 | logits_aux = None
148 | s0 = s1 = self.stem(input)
149 | for i, cell in enumerate(self.cells):
150 | s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
151 | if i == 2*self._layers//3:
152 | if self._auxiliary and self.training:
153 | logits_aux = self.auxiliary_head(s1)
154 | out = self.global_pooling(s1)
155 | logits = self.classifier(out.view(out.size(0),-1))
156 | return logits, logits_aux
157 |
158 |
159 | class NetworkImageNet(nn.Module):
160 |
161 | def __init__(self, C, num_classes, layers, auxiliary, genotype):
162 | super(NetworkImageNet, self).__init__()
163 | self._layers = layers
164 | self._auxiliary = auxiliary
165 |
166 | self.stem0 = nn.Sequential(
167 | nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
168 | nn.BatchNorm2d(C // 2),
169 | nn.ReLU(inplace=True),
170 | nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
171 | nn.BatchNorm2d(C),
172 | )
173 |
174 | self.stem1 = nn.Sequential(
175 | nn.ReLU(inplace=True),
176 | nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
177 | nn.BatchNorm2d(C),
178 | )
179 |
180 | C_prev_prev, C_prev, C_curr = C, C, C
181 |
182 | self.cells = nn.ModuleList()
183 | reduction_prev = True
184 | for i in range(layers):
185 | if i in [layers // 3, 2 * layers // 3]:
186 | C_curr *= 2
187 | reduction = True
188 | else:
189 | reduction = False
190 | cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
191 | reduction_prev = reduction
192 | self.cells += [cell]
193 | C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
194 | if i == 2 * layers // 3:
195 | C_to_auxiliary = C_prev
196 |
197 | if auxiliary:
198 | self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
199 | self.global_pooling = nn.AvgPool2d(7)
200 | self.classifier = nn.Linear(C_prev, num_classes)
201 |
202 | def forward(self, input):
203 | logits_aux = None
204 | s0 = self.stem0(input)
205 | s1 = self.stem1(s0)
206 | for i, cell in enumerate(self.cells):
207 | s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
208 | if i == 2 * self._layers // 3:
209 | if self._auxiliary and self.training:
210 | logits_aux = self.auxiliary_head(s1)
211 | out = self.global_pooling(s1)
212 | logits = self.classifier(out.view(out.size(0), -1))
213 | return logits, logits_aux
214 |
215 |
--------------------------------------------------------------------------------
/eval/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from operations import *
4 | from torch.autograd import Variable
5 | from utils import drop_path
6 |
7 |
8 | class Cell(nn.Module):
9 |
10 | def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
11 | super(Cell, self).__init__()
12 | print(C_prev_prev, C_prev, C)
13 |
14 | if reduction_prev:
15 | self.preprocess0 = FactorizedReduce(C_prev_prev, C)
16 | else:
17 | self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
18 | self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
19 |
20 | if reduction:
21 | op_names, indices = zip(*genotype.reduce)
22 | concat = genotype.reduce_concat
23 | else:
24 | op_names, indices = zip(*genotype.normal)
25 | concat = genotype.normal_concat
26 | self._compile(C, op_names, indices, concat, reduction)
27 |
28 | def _compile(self, C, op_names, indices, concat, reduction):
29 | assert len(op_names) == len(indices)
30 | self._steps = len(op_names) // 2
31 | self._concat = concat
32 | self.multiplier = len(concat)
33 |
34 | self._ops = nn.ModuleList()
35 | for name, index in zip(op_names, indices):
36 | stride = 2 if reduction and index < 2 else 1
37 | op = OPS[name](C, stride, True)
38 | self._ops += [op]
39 | self._indices = indices
40 |
41 | def forward(self, s0, s1, drop_prob):
42 | s0 = self.preprocess0(s0)
43 | s1 = self.preprocess1(s1)
44 |
45 | states = [s0, s1]
46 | for i in range(self._steps):
47 | h1 = states[self._indices[2*i]]
48 | h2 = states[self._indices[2*i+1]]
49 | op1 = self._ops[2*i]
50 | op2 = self._ops[2*i+1]
51 | h1 = op1(h1)
52 | h2 = op2(h2)
53 | if self.training and drop_prob > 0.:
54 | if not isinstance(op1, Identity):
55 | h1 = drop_path(h1, drop_prob)
56 | if not isinstance(op2, Identity):
57 | h2 = drop_path(h2, drop_prob)
58 | s = h1 + h2
59 | states += [s]
60 | return torch.cat([states[i] for i in self._concat], dim=1)
61 |
62 |
63 | class AuxiliaryHeadCIFAR(nn.Module):
64 |
65 | def __init__(self, C, num_classes):
66 | """assuming input size 8x8"""
67 | super(AuxiliaryHeadCIFAR, self).__init__()
68 | self.features = nn.Sequential(
69 | nn.ReLU(inplace=True),
70 | nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
71 | nn.Conv2d(C, 128, 1, bias=False),
72 | nn.BatchNorm2d(128),
73 | nn.ReLU(inplace=True),
74 | nn.Conv2d(128, 768, 2, bias=False),
75 | nn.BatchNorm2d(768),
76 | nn.ReLU(inplace=True)
77 | )
78 | self.classifier = nn.Linear(768, num_classes)
79 |
80 | def forward(self, x):
81 | x = self.features(x)
82 | x = self.classifier(x.view(x.size(0),-1))
83 | return x
84 |
85 |
86 | class AuxiliaryHeadImageNet(nn.Module):
87 |
88 | def __init__(self, C, num_classes):
89 | """assuming input size 14x14"""
90 | super(AuxiliaryHeadImageNet, self).__init__()
91 | self.features = nn.Sequential(
92 | nn.ReLU(inplace=True),
93 | nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
94 | nn.Conv2d(C, 128, 1, bias=False),
95 | nn.BatchNorm2d(128),
96 | nn.ReLU(inplace=True),
97 | nn.Conv2d(128, 768, 2, bias=False),
98 | # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
99 | # Commenting it out for consistency with the experiments in the paper.
100 | # nn.BatchNorm2d(768),
101 | nn.ReLU(inplace=True)
102 | )
103 | self.classifier = nn.Linear(768, num_classes)
104 |
105 | def forward(self, x):
106 | x = self.features(x)
107 | x = self.classifier(x.view(x.size(0),-1))
108 | return x
109 |
110 |
111 | class NetworkCIFAR(nn.Module):
112 |
113 | def __init__(self, C, num_classes, layers, auxiliary, genotype):
114 | super(NetworkCIFAR, self).__init__()
115 | self._layers = layers
116 | self._auxiliary = auxiliary
117 |
118 | stem_multiplier = 3
119 | C_curr = stem_multiplier*C
120 | self.stem = nn.Sequential(
121 | nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
122 | nn.BatchNorm2d(C_curr)
123 | )
124 |
125 | C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
126 | self.cells = nn.ModuleList()
127 | reduction_prev = False
128 | for i in range(layers):
129 | if i in [layers//3, 2*layers//3]:
130 | C_curr *= 2
131 | reduction = True
132 | else:
133 | reduction = False
134 | cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
135 | reduction_prev = reduction
136 | self.cells += [cell]
137 | C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
138 | if i == 2*layers//3:
139 | C_to_auxiliary = C_prev
140 |
141 | if auxiliary:
142 | self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
143 | self.global_pooling = nn.AdaptiveAvgPool2d(1)
144 | self.classifier = nn.Linear(C_prev, num_classes)
145 |
146 | def forward(self, input):
147 | logits_aux = None
148 | s0 = s1 = self.stem(input)
149 | for i, cell in enumerate(self.cells):
150 | s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
151 | if i == 2*self._layers//3:
152 | if self._auxiliary and self.training:
153 | logits_aux = self.auxiliary_head(s1)
154 | out = self.global_pooling(s1)
155 | logits = self.classifier(out.view(out.size(0),-1))
156 | return logits#, logits_aux
157 |
158 |
159 | class NetworkImageNet(nn.Module):
160 |
161 | def __init__(self, C, num_classes, layers, auxiliary, genotype):
162 | super(NetworkImageNet, self).__init__()
163 | self._layers = layers
164 | self._auxiliary = auxiliary
165 |
166 | self.stem0 = nn.Sequential(
167 | nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
168 | nn.BatchNorm2d(C // 2),
169 | nn.ReLU(inplace=True),
170 | nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
171 | nn.BatchNorm2d(C),
172 | )
173 |
174 | self.stem1 = nn.Sequential(
175 | nn.ReLU(inplace=True),
176 | nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
177 | nn.BatchNorm2d(C),
178 | )
179 |
180 | C_prev_prev, C_prev, C_curr = C, C, C
181 |
182 | self.cells = nn.ModuleList()
183 | reduction_prev = True
184 | for i in range(layers):
185 | if i in [layers // 3, 2 * layers // 3]:
186 | C_curr *= 2
187 | reduction = True
188 | else:
189 | reduction = False
190 | cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
191 | reduction_prev = reduction
192 | self.cells += [cell]
193 | C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
194 | if i == 2 * layers // 3:
195 | C_to_auxiliary = C_prev
196 |
197 | if auxiliary:
198 | self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
199 | self.global_pooling = nn.AvgPool2d(7)
200 | self.classifier = nn.Linear(C_prev, num_classes)
201 |
202 | def forward(self, input):
203 | logits_aux = None
204 | s0 = self.stem0(input)
205 | s1 = self.stem1(s0)
206 | for i, cell in enumerate(self.cells):
207 | s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
208 | if i == 2 * self._layers // 3:
209 | if self._auxiliary and self.training:
210 | logits_aux = self.auxiliary_head(s1)
211 | out = self.global_pooling(s1)
212 | logits = self.classifier(out.view(out.size(0), -1))
213 | return logits, logits_aux
214 |
215 |
--------------------------------------------------------------------------------
/advrush/hessianflow/optimizer/absa.py:
--------------------------------------------------------------------------------
1 | #*
2 | # @file ABSA training driver based on arxiv:1810.01021
3 | # Copyright (c) Zhewei Yao, Amir Gholami
4 | # All rights reserved.
5 | # This file is part of HessianFlow library.
6 | #
7 | # HessianFlow is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # HessianFlow is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with HessianFlow. If not, see .
19 | #*
20 |
21 | from __future__ import print_function
22 | import numpy as np
23 | import torch
24 | import torch.nn as nn
25 | import torch.nn.functional as F
26 | import torch.optim as optim
27 | from torchvision import datasets, transforms
28 | from torch.autograd import Variable
29 |
30 | from .progressbar import progress_bar
31 | from .optm_utils import fgsm, exp_lr_scheduler, test
32 |
33 | import hessianflow
34 | from hessianflow.utils import get_params_grad, group_add
35 | from hessianflow.eigen import get_eigen
36 | from copy import deepcopy
37 |
38 |
39 | def get_lr(opt):
40 | """
41 | get the learning rate
42 | """
43 | for param_group in opt.param_groups:
44 | return param_group['lr']
45 |
46 | def copy_update(opt, grad):
47 | """
48 | used for optimizer update
49 | """
50 | for group in opt.param_groups:
51 | weight_decay = group['weight_decay']
52 | momentum = group['momentum']
53 | dampening = group['dampening']
54 | nesterov = group['nesterov']
55 |
56 | for i,p in enumerate(group['params']):
57 | d_p = grad[i]
58 | if weight_decay != 0:
59 | d_p.add_(weight_decay, p.data)
60 | if momentum != 0:
61 | param_state = opt.state[p]
62 | if 'momentum_buffer' not in param_state:
63 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
64 | buf.mul_(momentum).add_(d_p)
65 | else:
66 | buf = param_state['momentum_buffer']
67 | buf.mul_(momentum).add_(1 - dampening, d_p)
68 | if nesterov:
69 | d_p = d_p.add(momentum, buf)
70 | else:
71 | d_p = buf
72 | p.data.add_(-group['lr'], d_p)
73 |
74 | def absa(model, train_loader, hessian_loader, test_loader, criterion, optimizer, epochs, lr_decay_epoch, lr_decay_ratio, batch_size = 128,
75 | max_large_ratio = 1, adv_ratio = 0., eps = 0., duration = True, cuda = True, print_flag = False):
76 | """
77 | adaptive batch size with adversarial training
78 | """
79 |
80 | # initilization
81 | large_grad = []
82 | inner_loop = 0
83 | large_ratio = 1
84 | max_eig = None
85 | decay_ratio = 2
86 | flag = True
87 | if max_large_ratio == 1:
88 | flag = False
89 |
90 | data_eigen = None
91 | target_eigen = None
92 | flag_data = True
93 | if duration == True:
94 | duration = 10
95 | else:
96 | duration = None
97 |
98 | cur_duration = 0
99 | num_updates = 0
100 | initial_lr = get_lr(optimizer)
101 |
102 |
103 | for epoch in range(1, epochs + 1):
104 | print('\nCurrent Epoch: %d' % epoch)
105 | print('\nTraining')
106 | train_loss = 0.
107 | total_num = 0
108 | correct = 0
109 |
110 | for batch_idx, (data, target) in enumerate(train_loader):
111 | if data.size()[0] < batch_size:
112 | continue
113 | # gather input and target for large batch training
114 | inner_loop += 1
115 |
116 | # save the data for eigen-computation
117 | if flag_data:
118 | data_eigen = data
119 | target_eigen = target
120 | #flag_data = False
121 | # get small model update
122 | # use adversarial training
123 | if adv_ratio > 1. / batch_size:
124 | adv_r = max(int(batch_size * adv_ratio), 1)
125 | model.eval() # set flag so that Batch Norm statistics would not be polluted with fgsm
126 | adv_data = fgsm(model, data[:adv_r], target[:adv_r], eps, cuda)
127 | model.train() # set flag to train for Batch Norm
128 | adv_data = torch.cat([adv_data, data[adv_r:]])
129 | else:
130 | model.train()
131 | adv_data = data
132 |
133 | optimizer.zero_grad()
134 | if cuda:
135 | adv_data, target = adv_data.cuda(), target.cuda()
136 |
137 | output = model(adv_data)
138 | loss = criterion(output, target) / large_ratio
139 | total_num +=target.size(0)
140 | _, predicted = output.max(1)
141 | correct += predicted.eq(target).sum().item()
142 |
143 | train_loss += loss.item() * target.size(0) * float(large_ratio)
144 | loss.backward()
145 | _, small_grad= get_params_grad(model)
146 | if not large_grad:
147 | large_grad = deepcopy(small_grad) #[small_grad_ + 0. for small_grad_ in small_grad]
148 | else:
149 | large_grad = group_add(large_grad, small_grad)
150 |
151 |
152 | if inner_loop % large_ratio == 0:
153 | num_updates += 1
154 | copy_update(optimizer, large_grad) # todo: see if we can use deep copy to set optimizer.grad = large_grad
155 | large_grad = []
156 | inner_loop = 0
157 | optimizer.zero_grad()
158 |
159 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
160 | % (train_loss / total_num,
161 | 100. * correct/total_num, correct, total_num))
162 |
163 | ## compute eigenvalues and update large_ratio, adv_ratio etc
164 | if flag:
165 | for data, target in hessian_loader:
166 | data_eigen = data
167 | target_eigen = target
168 | break
169 | eig, _ = get_eigen(model, data_eigen, target_eigen, criterion, cuda = True, maxIter = 10, tol = 1e-2)
170 | cur_duration += 1
171 |
172 | if max_eig == None:
173 | max_eig = eig
174 | else:
175 | if eig <= max_eig/decay_ratio:
176 | # ensure the learning rate is not too crazy, espeacially for model without batch normalization
177 | max_eig = eig
178 | prev_ratio = large_ratio
179 | large_ratio = int(large_ratio*decay_ratio)
180 | adv_ratio /= decay_ratio
181 | if large_ratio >= max_large_ratio:
182 | large_ratio = max_large_ratio
183 | adv_ratio = 0.
184 | flag = False
185 | cur_duration = 0
186 | optimizer = exp_lr_scheduler(optimizer, decay_ratio = large_ratio/prev_ratio)
187 | if duration != None: # if it is around a quadratic bowl, increase batch size
188 | # ensure the learning rate is not too crazy, espeacially for model without batch normalization
189 | if cur_duration - duration > -0.5:
190 | prev_ratio = large_ratio
191 | large_ratio = int(large_ratio*decay_ratio)
192 | adv_ratio /= decay_ratio
193 | if large_ratio >= max_large_ratio:
194 | large_ratio = max_large_ratio
195 | adv_ratio = 0.
196 | flag = False
197 | cur_duration = 0
198 | optimizer = exp_lr_scheduler(optimizer, decay_ratio = large_ratio/prev_ratio)
199 |
200 |
201 | if epoch in lr_decay_epoch:
202 | optimizer = exp_lr_scheduler(optimizer, decay_ratio = lr_decay_ratio)
203 |
204 | if epoch >= epochs // 2:
205 | adv_ratio = 0.
206 |
207 | if print_flag:
208 | #print('\n Batch size %d' % (batch_size*large_ratio))
209 | print('\n Eig %f Max Eig %f Batch size %d' % (eig, max_eig, batch_size * large_ratio))
210 |
211 | test(model, test_loader)
212 |
213 | return model, num_updates
214 |
--------------------------------------------------------------------------------
/advrush/adv_train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | import glob
5 | import numpy as np
6 | import torch
7 | import utils
8 | import logging
9 | import argparse
10 | import torch.nn as nn
11 | import genotypes
12 | import torch.utils
13 | import torchvision.datasets as dset
14 | import torch.backends.cudnn as cudnn
15 |
16 | from torch.autograd import Variable
17 | from model import NetworkCIFAR as Network
18 | from trades import trades_loss, madry_loss
19 |
20 | parser = argparse.ArgumentParser("cifar")
21 | parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
22 | parser.add_argument('--batch_size', type=int, default=64, help='batch size') #128
23 | parser.add_argument('--learning_rate', type=float, default=0.1, help='init learning rate')
24 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
25 | parser.add_argument('--weight_decay', type=float, default=1e-4, help='weight decay')
26 | parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
27 | parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
28 | parser.add_argument('--epochs', type=int, default=200, help='num of training epochs')
29 | parser.add_argument('--epsilon', type=float, default=0.031, help='perturbation')
30 | parser.add_argument('--num_steps', type=int, default=7, help='perturb number of steps')
31 | parser.add_argument('--step_size', type=float, default=0.01, help='perturb step size')
32 | parser.add_argument('--beta', type=float, default=6.0, help='regularization in TRADES')
33 | parser.add_argument('--adv_loss', type=str, default='pgd', help='experiment name')
34 | parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
35 | parser.add_argument('--layers', type=int, default=20, help='total number of layers')
36 | parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
37 | parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
38 | parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss')
39 | parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
40 | parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
41 | parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability')
42 | parser.add_argument('--save', type=str, default='EXP', help='experiment name')
43 | parser.add_argument('--seed', type=int, default=0, help='random seed')
44 | parser.add_argument('--arch', type=str, default='ADVRUSH', help='which architecture to use')
45 | parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
46 |
47 | args = parser.parse_args()
48 |
49 | args.save = 'eval-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
50 | utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
51 |
52 | log_format = '%(asctime)s %(message)s'
53 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
54 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
55 | fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
56 | fh.setFormatter(logging.Formatter(log_format))
57 | logging.getLogger().addHandler(fh)
58 |
59 | CIFAR_CLASSES = 10
60 |
61 |
62 | def main():
63 | if not torch.cuda.is_available():
64 | logging.info('no gpu device available')
65 | sys.exit(1)
66 |
67 | np.random.seed(args.seed)
68 | torch.cuda.set_device(args.gpu)
69 | cudnn.benchmark = True
70 | torch.manual_seed(args.seed)
71 | cudnn.enabled=True
72 | torch.cuda.manual_seed(args.seed)
73 | logging.info('gpu device = %d' % args.gpu)
74 | logging.info("args = %s", args)
75 |
76 | genotype = eval("genotypes.%s" % args.arch)
77 | model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
78 | model = model.cuda()
79 |
80 | logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
81 |
82 | criterion = nn.CrossEntropyLoss()
83 | criterion = criterion.cuda()
84 | optimizer = torch.optim.SGD(
85 | model.parameters(),
86 | args.learning_rate,
87 | momentum=args.momentum,
88 | weight_decay=args.weight_decay
89 | )
90 |
91 | train_transform, valid_transform = utils._data_transforms_cifar10_eval(args)
92 | train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
93 | valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)
94 |
95 | train_queue = torch.utils.data.DataLoader(
96 | train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)
97 |
98 | valid_queue = torch.utils.data.DataLoader(
99 | valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)
100 |
101 | best_acc = 0.0
102 | for epoch in range(args.epochs):
103 | adjust_learning_rate(optimizer, epoch)
104 | model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
105 |
106 | train_acc, train_obj = train(train_queue, model, criterion, optimizer)
107 | logging.info('epoch %d train_acc %f', epoch, train_acc)
108 |
109 | valid_acc, valid_obj = infer(valid_queue, model, criterion)
110 | if valid_acc > best_acc:
111 | best_acc = valid_acc
112 | utils.save_checkpoint({
113 | 'epoch': epoch +1,
114 | 'state_dict': model.state_dict(),
115 | 'optimizer': optimizer.state_dict(),
116 | }, is_best=True, save=args.save, epoch=epoch)
117 | logging.info('epoch %d valid_acc %f, best_acc %f', epoch, valid_acc, best_acc)
118 |
119 | utils.save(model, os.path.join(args.save, 'weights.pt'))
120 | utils.save_checkpoint({
121 | 'epoch': epoch + 1,
122 | 'state_dict': model.state_dict(),
123 | 'optimizer': optimizer.state_dict(),
124 | }, is_best=False, save=args.save, epoch=epoch)
125 |
126 |
127 | def train(train_queue, model, criterion, optimizer):
128 | objs = utils.AvgrageMeter()
129 | top1 = utils.AvgrageMeter()
130 | top5 = utils.AvgrageMeter()
131 | model.train()
132 |
133 | for step, (input, target) in enumerate(train_queue):
134 | input = Variable(input).cuda(non_blocking=True)
135 | target = Variable(target).cuda(non_blocking=True)
136 |
137 | optimizer.zero_grad()
138 | logits, logits_aux = model(input)
139 | if args.adv_loss == 'pgd':
140 | loss = madry_loss(
141 | model,
142 | input,
143 | target,
144 | optimizer,
145 | step_size = args.step_size,
146 | epsilon = args.epsilon,
147 | perturb_steps = args.num_steps)
148 | elif args.adv_loss == 'trades':
149 | loss = trades_loss(model,
150 | input,
151 | target,
152 | optimizer,
153 | step_size=args.step_size,
154 | epsilon=args.epsilon,
155 | perturb_steps=args.num_steps,
156 | beta=args.beta,
157 | distance='l_inf')
158 | #loss = criterion(logits, target)
159 | if args.auxiliary:
160 | loss_aux = criterion(logits_aux, target)
161 | loss += args.auxiliary_weight*loss_aux
162 | loss.backward()
163 | nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
164 | optimizer.step()
165 |
166 | prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
167 | n = input.size(0)
168 | objs.update(loss.data.item(), n)
169 | top1.update(prec1.data.item(), n)
170 | top5.update(prec5.data.item(), n)
171 |
172 | if step % args.report_freq == 0:
173 | logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
174 |
175 | return top1.avg, objs.avg
176 |
177 |
178 | def infer(valid_queue, model, criterion):
179 | objs = utils.AvgrageMeter()
180 | top1 = utils.AvgrageMeter()
181 | top5 = utils.AvgrageMeter()
182 | model.eval()
183 |
184 | with torch.no_grad():
185 | for step, (input, target) in enumerate(valid_queue):
186 | input = Variable(input, requires_grad=False).cuda(non_blocking=True)
187 | target = Variable(target, requires_grad=False).cuda(non_blocking=True)
188 |
189 | logits, _ = model(input)
190 | loss = criterion(logits, target)
191 |
192 | prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
193 | n = input.size(0)
194 | objs.update(loss.data.item(), n)
195 | top1.update(prec1.data.item(), n)
196 | top5.update(prec5.data.item(), n)
197 |
198 | if step % args.report_freq == 0:
199 | logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
200 |
201 | return top1.avg, objs.avg
202 |
203 | def adjust_learning_rate(optimizer, epoch):
204 | """decrease the learning rate"""
205 | lr = args.learning_rate
206 | if epoch >= 99:
207 | lr = args.learning_rate * 0.1
208 | if epoch >= 149:
209 | lr = args.learning_rate * 0.01
210 | for param_group in optimizer.param_groups:
211 | param_group['lr'] = lr
212 |
213 | if __name__ == '__main__':
214 | main()
215 |
216 |
--------------------------------------------------------------------------------
/eval/pgd_attack.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import argparse
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import torchvision
8 | from torch.autograd import Variable
9 | import torch.optim as optim
10 | from torchvision import datasets, transforms
11 | from model import NetworkCIFAR as Network
12 | import genotypes
13 |
14 | parser = argparse.ArgumentParser(description='PyTorch CIFAR PGD Attack Evaluation')
15 | parser.add_argument('--test-batch-size', type=int, default=25, metavar='N',
16 | help='input batch size for testing (default: 200)')
17 | parser.add_argument('--no-cuda', action='store_true', default=False,
18 | help='disables CUDA training')
19 | parser.add_argument('--epsilon', default=0.031,
20 | help='perturbation')
21 | parser.add_argument('--num-steps', type=int, default=20,
22 | help='perturb number of steps')
23 | parser.add_argument('--step-size', default=0.01,
24 | help='perturb step size')
25 | parser.add_argument('--random',
26 | default=True,
27 | help='random initialization for PGD')
28 | parser.add_argument('--white-box-attack', default=False,
29 | help='whether perform white-box attack')
30 | parser.add_argument('--arch', type=str, default='ADVRUSH', help='which architecture to use')
31 | parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
32 | parser.add_argument('--layers', type=int, default=20, help='total number of layers')
33 | parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
34 | parser.add_argument('--drop_path_prob', type=float, default=0.0, help='drop path probability')
35 | parser.add_argument('--target_arch', type=str, default='ADVRUSH', help='which architecture to use')
36 | parser.add_argument('--source_arch', type=str, default='ADVRUSH', help='which architecture to use')
37 | parser.add_argument('--target_checkpoint', type=str, default='./', help='which architecture to use')
38 | parser.add_argument('--source_checkpoint', type=str, default='./', help='which architecture to use')
39 | parser.add_argument('--log_path', type=str, default='./', help='path to store log file')
40 | parser.add_argument('--checkpoint', type=str, default='./', help='which architecture to use')
41 | parser.add_argument('--data_type', type=str, default='cifar10', help='which dataset to use')
42 |
43 | args = parser.parse_args()
44 |
45 | # settings
46 | use_cuda = not args.no_cuda and torch.cuda.is_available()
47 | device = torch.device("cuda" if use_cuda else "cpu")
48 | kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
49 |
50 | # set up data loader
51 | if args.data_type == 'cifar10':
52 | transform_list = [transforms.ToTensor()]
53 | transform_test = transforms.Compose(transform_list)
54 | testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test)
55 | elif args.data_type == 'cifar100':
56 | transform_list = [transforms.ToTensor()]
57 | transform_test = transforms.Compose(transform_list)
58 | testset = torchvision.datasets.CIFAR100(root='../data', train=False, download=True, transform=transform_test)
59 | elif args.data_type == 'svhn':
60 | transform_list = [transforms.ToTensor()]
61 | transform_test = transforms.Compose(transform_list)
62 | testset = torchvision.datasets.SVHN(root='../data', split='test', download=True, transform=transform_test)
63 |
64 | test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
65 |
66 |
67 | def _pgd_whitebox(model,
68 | X,
69 | y,
70 | epsilon=args.epsilon,
71 | num_steps=args.num_steps,
72 | step_size=args.step_size):
73 | out = model(X)
74 | err = (out.data.max(1)[1] != y.data).float().sum()
75 | X_pgd = Variable(X.data, requires_grad=True)
76 | if args.random:
77 | random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
78 | X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
79 |
80 | for _ in range(num_steps):
81 | opt = optim.SGD([X_pgd], lr=1e-3)
82 | opt.zero_grad()
83 |
84 | with torch.enable_grad():
85 | loss = nn.CrossEntropyLoss()(model(X_pgd), y)
86 | loss.backward()
87 | eta = step_size * X_pgd.grad.data.sign()
88 | X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
89 | eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
90 | X_pgd = Variable(X.data + eta, requires_grad=True)
91 | X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
92 | err_pgd = (model(X_pgd).data.max(1)[1] != y.data).float().sum()
93 | print('err pgd (white-box): ', err_pgd)
94 | return err, err_pgd
95 |
96 |
97 | def _pgd_blackbox(model_target,
98 | model_source,
99 | X,
100 | y,
101 | epsilon=args.epsilon,
102 | num_steps=args.num_steps,
103 | step_size=args.step_size):
104 | out = model_target(X)
105 | err = (out.data.max(1)[1] != y.data).float().sum()
106 | X_pgd = Variable(X.data, requires_grad=True)
107 | if args.random:
108 | random_noise = torch.FloatTensor(*X_pgd.shape).uniform_(-epsilon, epsilon).to(device)
109 | X_pgd = Variable(X_pgd.data + random_noise, requires_grad=True)
110 |
111 | for _ in range(num_steps):
112 | opt = optim.SGD([X_pgd], lr=1e-3)
113 | opt.zero_grad()
114 | with torch.enable_grad():
115 | loss = nn.CrossEntropyLoss()(model_source(X_pgd), y)
116 | loss.backward()
117 | eta = step_size * X_pgd.grad.data.sign()
118 | X_pgd = Variable(X_pgd.data + eta, requires_grad=True)
119 | eta = torch.clamp(X_pgd.data - X.data, -epsilon, epsilon)
120 | X_pgd = Variable(X.data + eta, requires_grad=True)
121 | X_pgd = Variable(torch.clamp(X_pgd, 0, 1.0), requires_grad=True)
122 |
123 | err_pgd = (model_target(X_pgd).data.max(1)[1] != y.data).float().sum()
124 | print('err pgd black-box: ', err_pgd)
125 | return err, err_pgd
126 |
127 |
128 | def eval_adv_test_whitebox(model, device, test_loader):
129 | """
130 | evaluate model by white-box attack
131 | """
132 | model.eval()
133 | robust_err_total = 0
134 | natural_err_total = 0
135 |
136 | for data, target in test_loader:
137 | data, target = data.to(device), target.to(device)
138 | # pgd attack
139 | X, y = Variable(data, requires_grad=True), Variable(target)
140 | err_natural, err_robust = _pgd_whitebox(model, X, y)
141 | robust_err_total += err_robust
142 | natural_err_total += err_natural
143 | print('natural_err_total: ', natural_err_total)
144 | print('robust_err_total: ', robust_err_total)
145 |
146 |
147 | def eval_adv_test_blackbox(model_target, model_source, device, test_loader):
148 | """
149 | evaluate model by black-box attack
150 | """
151 | model_target.eval()
152 | model_source.eval()
153 | robust_err_total = 0
154 | natural_err_total = 0
155 |
156 | for data, target in test_loader:
157 | data, target = data.to(device), target.to(device)
158 | # pgd attack
159 | X, y = Variable(data, requires_grad=True), Variable(target)
160 | err_natural, err_robust = _pgd_blackbox(model_target, model_source, X, y)
161 | robust_err_total += err_robust
162 | natural_err_total += err_natural
163 | print('natural_err_total: ', natural_err_total)
164 | print('robust_err_total: ', robust_err_total)
165 |
166 | def main():
167 |
168 |
169 | if args.white_box_attack:
170 | # white-box attack
171 | print('pgd white-box attack')
172 | if args.data_type == 'cifar100':
173 | CIFAR_CLASSES = 100
174 | else:
175 | CIFAR_CLASSES = 10
176 | genotype = eval("genotypes.%s" % args.arch)
177 | model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
178 | checkpoint = torch.load(args.checkpoint, map_location='cuda:0')
179 | model.load_state_dict(checkpoint['state_dict'])
180 | model.drop_path_prob = args.drop_path_prob
181 | model.cuda()
182 | eval_adv_test_whitebox(model, device, test_loader)
183 |
184 | else:
185 | # black-box attack
186 | CIFAR_CLASSES = 10
187 | print('pgd black-box attack')
188 | target_genotype = eval("genotypes.%s" % args.target_arch)
189 | source_genotype = eval("genotypes.%s" % args.source_arch)
190 |
191 | model_source = Network(args.init_channels,CIFAR_CLASSES, args.layers, args.auxiliary, source_genotype)
192 | source_checkpoint = torch.load(args.source_checkpoint)
193 | model_source.load_state_dict(source_checkpoint['state_dict'])
194 | model_source.drop_path_prob = args.drop_path_prob
195 | model_source.cuda()
196 |
197 | model_target = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, target_genotype)
198 | target_checkpoint = torch.load(args.target_checkpoint)
199 | model_target.load_state_dict(target_checkpoint['state_dict'])
200 | model_target.drop_path_prob = args.drop_path_prob
201 | model_target.cuda()
202 |
203 | eval_adv_test_blackbox(model_target, model_source, device, test_loader)
204 |
205 |
206 | if __name__ == '__main__':
207 | main()
208 |
--------------------------------------------------------------------------------
/advrush/train_search.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import time
4 | import glob
5 | from random import shuffle
6 | import numpy as np
7 | import torch
8 | import utils
9 | import logging
10 | import argparse
11 | import torch.nn as nn
12 | import torch.utils
13 | import torch.nn.functional as F
14 | import torchvision.datasets as dset
15 | import torch.backends.cudnn as cudnn
16 |
17 | from torch.autograd import Variable
18 | from model_search import Network
19 | from architect import Architect
20 | from regularizer import *
21 | from tensorboardX import SummaryWriter
22 | import hessianflow as hf
23 | import hessianflow.optimizer.optm_utils as hf_optm_utils
24 | import hessianflow.optimizer.progressbar as hf_optm_pgb
25 |
26 | parser = argparse.ArgumentParser("cifar")
27 | parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
28 | parser.add_argument('--batch_size', type=int, default=64, help='batch size')
29 | parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
30 | parser.add_argument('--learning_rate_min', type=float, default=0.001, help='min learning rate')
31 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
32 | parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay')
33 | parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
34 | parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
35 | parser.add_argument('--epochs', type=int, default=50, help='num of training epochs')
36 | parser.add_argument('--init_channels', type=int, default=16, help='num of init channels')
37 | parser.add_argument('--layers', type=int, default=8, help='total number of layers')
38 | parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
39 | parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
40 | parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
41 | parser.add_argument('--drop_path_prob', type=float, default=0.3, help='drop path probability')
42 | parser.add_argument('--save', type=str, default='EXP', help='experiment name')
43 | parser.add_argument('--seed', type=int, default=2, help='random seed')
44 | parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
45 | parser.add_argument('--train_portion', type=float, default=0.5, help='portion of training data')
46 | parser.add_argument('--unrolled', action='store_true', default=False, help='use one-step unrolled validation loss')
47 | parser.add_argument('--arch_learning_rate', type=float, default=3e-4, help='learning rate for arch encoding')
48 | parser.add_argument('--arch_weight_decay', type=float, default=1e-3, help='weight decay for arch encoding')
49 | parser.add_argument('--a_gamma', type=float, default=0.01, help='a regularization strength')
50 | parser.add_argument('--w_gamma', type=float, default=1e-4, help='w regularization strength')
51 | parser.add_argument('--a_warmup_epochs', type=int, default=50, help='num of warm up epochs before using Hessian - architecture weight')
52 | parser.add_argument('--w_warmup_epochs', type=int, default=60, help='num of warm up epochs before using Hessian - model parameters')
53 | parser.add_argument('--loss_hessian', type=str, default='loss_cure', help='type of hessian loss to use, loss_eigen')
54 |
55 | args = parser.parse_args()
56 |
57 | args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
58 | utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
59 |
60 | log_format = '%(asctime)s %(message)s'
61 | logging.basicConfig(stream=sys.stdout, level=logging.INFO,
62 | format=log_format, datefmt='%m/%d %I:%M:%S %p')
63 | fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
64 | fh.setFormatter(logging.Formatter(log_format))
65 | logging.getLogger().addHandler(fh)
66 |
67 | if not os.path.isdir(os.path.join(args.save, './log')):
68 | os.makedirs(os.path.join(args.save, './log'))
69 | tb_logger = SummaryWriter(os.path.join(args.save, './log'))
70 |
71 | CIFAR_CLASSES = 10
72 |
73 |
74 | def main():
75 | if not torch.cuda.is_available():
76 | logging.info('no gpu device available')
77 | sys.exit(1)
78 |
79 | np.random.seed(args.seed)
80 | torch.cuda.set_device(args.gpu)
81 | cudnn.benchmark = True
82 | torch.manual_seed(args.seed)
83 | cudnn.enabled=True
84 | torch.cuda.manual_seed(args.seed)
85 | logging.info('gpu device = %d' % args.gpu)
86 | logging.info("args = %s", args)
87 |
88 | criterion = nn.CrossEntropyLoss()
89 | criterion = criterion.cuda()
90 | model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
91 | model = model.cuda()
92 | logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
93 |
94 | optimizer = torch.optim.SGD(
95 | model.parameters(),
96 | args.learning_rate,
97 | momentum=args.momentum,
98 | weight_decay=args.weight_decay)
99 |
100 | train_transform, valid_transform = utils._data_transforms_cifar10(args)
101 | train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
102 |
103 | train_queue = torch.utils.data.DataLoader(
104 | train_data, batch_size=args.batch_size, pin_memory=True, num_workers=2)
105 |
106 | valid_queue = torch.utils.data.DataLoader(
107 | train_data, batch_size=args.batch_size, pin_memory=True, num_workers=2)
108 |
109 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
110 | optimizer, float(args.epochs), eta_min=args.learning_rate_min)
111 |
112 | architect = Architect(model, args)
113 |
114 | for epoch in range(args.epochs):
115 | scheduler.step()
116 | lr = scheduler.get_lr()[0]
117 | logging.info('epoch %d lr %e', epoch, lr)
118 |
119 | genotype = model.genotype()
120 | logging.info('genotype = %s', genotype)
121 |
122 | logging.info(F.softmax(model.alphas_normal, dim=-1))
123 | logging.info(F.softmax(model.alphas_reduce, dim=-1))
124 | h_all = np.array([0.0, 0.3, 0.6, 0.9, 1.2, 1.5])
125 | h_all = np.append(h_all, [1.5]*int(args.epochs-6))
126 | # training
127 | train_acc, train_obj, a_reg, w_reg = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, h=h_all[epoch])
128 | logging.info('train_acc %f', train_acc)
129 | tb_logger.add_scalar('train_accuracy', train_acc, epoch)
130 | tb_logger.add_scalar('train_loss', train_obj, epoch)
131 | tb_logger.add_scalar('alpha_regularization', a_reg, epoch)
132 | tb_logger.add_scalar('weight_regularization', w_reg, epoch)
133 |
134 | # validation
135 | valid_acc, valid_obj = infer(valid_queue, model, criterion)
136 | logging.info('valid_acc %f', valid_acc)
137 |
138 | utils.save(model, os.path.join(args.save, 'weights.pt'))
139 | utils.save_checkpoint({
140 | 'epoch': epoch + 1,
141 | 'model_optimizer': optimizer.state_dict(),
142 | 'arch_optimizer': architect.optimizer.state_dict(),
143 | 'model': model.state_dict(),
144 | 'scheduler': scheduler.state_dict(),
145 | 'alpha_normal': model.alphas_normal,
146 | 'alpha_reduce': model.alphas_reduce}, is_best=False, save=args.save, epoch=epoch)
147 |
148 |
149 |
150 | def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, h):
151 | objs = utils.AvgrageMeter()
152 | a_regs = utils.AvgrageMeter()
153 | w_regs = utils.AvgrageMeter()
154 | top1 = utils.AvgrageMeter()
155 | top5 = utils.AvgrageMeter()
156 |
157 | for step, (input, target) in enumerate(train_queue):
158 | model.train()
159 | n = input.size(0)
160 |
161 | input = Variable(input, requires_grad=False).cuda(non_blocking=True)
162 | target = Variable(target, requires_grad=False).cuda(non_blocking=True)
163 |
164 | # get a random minibatch from the search queue with replacement
165 | input_search, target_search = next(iter(valid_queue))
166 | input_search = Variable(input_search, requires_grad=False).cuda(non_blocking=True)
167 | target_search = Variable(target_search, requires_grad=False).cuda(non_blocking=True)
168 |
169 | a_regularizer = architect.step(input, target, epoch, args.a_warmup_epochs, args.a_gamma, criterion, args.loss_hessian, valid_queue, input_search, target_search, lr, optimizer, unrolled=args.unrolled, h=h)
170 |
171 | optimizer.zero_grad()
172 | logits = model(input)
173 |
174 | if epoch < args.w_warmup_epochs:
175 | loss = criterion(logits, target)
176 | w_regularizer = torch.tensor(0, dtype=torch.float)
177 | else:
178 | if args.loss_hessian == 'loss_cure':
179 | reg = loss_cure(model, criterion, lambda_=1, device='cuda')
180 | w_regularizer, grad_norm = reg.regularizer(input, target, h=h)
181 | else:
182 | reg = loss_eigen(model, train_queue, input, target, criterion, full_eigen=False, maxIter=10, tol=1e-2)
183 | regularizer, _ = reg.regularizer()
184 |
185 | loss = criterion(logits, target) + args.w_gamma * w_regularizer
186 | print(f'epoch={epoch} | step={step} | loss={loss} | w_reg={w_regularizer} | a_reg = {a_regularizer}')
187 |
188 | loss.backward()
189 | nn.utils.clip_grad_norm(model.parameters(), args.grad_clip)
190 | optimizer.step()
191 |
192 | prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
193 | objs.update(loss.data.item(), n)
194 | w_regs.update(w_regularizer.data.item(), n)
195 | a_regs.update(a_regularizer.data.item(), n)
196 | top1.update(prec1.data.item(), n)
197 | top5.update(prec5.data.item(), n)
198 |
199 | if step % args.report_freq == 0:
200 | logging.info('train %03d objs %e a_regs %e w_regs %e %f %f', step, objs.avg, a_regs.avg, w_regs.avg, top1.avg, top5.avg)
201 |
202 | return top1.avg, objs.avg, a_regs.avg, w_regs.avg
203 |
204 |
205 | def infer(valid_queue, model, criterion):
206 | objs = utils.AvgrageMeter()
207 | top1 = utils.AvgrageMeter()
208 | top5 = utils.AvgrageMeter()
209 | model.eval()
210 |
211 | with torch.no_grad():
212 | for step, (input, target) in enumerate(valid_queue):
213 | input = Variable(input, requires_grad=False).cuda(non_blocking=True)
214 | target = Variable(target, requires_grad=False).cuda(non_blocking=True)
215 |
216 | logits = model(input)
217 | loss = criterion(logits, target)
218 |
219 | prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
220 | n = input.size(0)
221 | objs.update(loss.data.item(), n)
222 | top1.update(prec1.data.item(), n)
223 | top5.update(prec5.data.item(), n)
224 |
225 | if step % args.report_freq == 0:
226 | logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
227 |
228 | return top1.avg, objs.avg
229 |
230 |
231 | if __name__ == '__main__':
232 | main()
233 |
234 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/advrush/regularizer.py:
--------------------------------------------------------------------------------
1 | # import torch
2 | # import copy
3 | # import torch.nn as nn
4 | from torch.autograd.gradcheck import zero_gradients
5 | # from utils.utils import progress_bar
6 | # import numpy as np
7 | # import matplotlib.pyplot as plt
8 | # from utils.utils import pgd
9 | # import torchvision
10 | # import os
11 | import torch
12 | # import torch.nn as nn
13 | # from torch.autograd import grad
14 | # import torch.optim as optim
15 | # import torch.nn.functional as F
16 | # import torch.backends.cudnn as cudnn
17 | # from torch.optim.lr_scheduler import StepLR
18 | # from torch.distributions import uniform
19 |
20 | import hessianflow as hf
21 | import hessianflow.optimizer.optm_utils as hf_optm_utils
22 | import hessianflow.optimizer.progressbar as hf_optm_pgb
23 |
24 | class loss_cure():
25 | def __init__(self, net, criterion, lambda_, device='cuda'):
26 | self.net = net
27 | self.criterion = criterion
28 | self.lambda_ = lambda_
29 | self.device = device
30 |
31 | def _find_z(self, inputs, targets, h):
32 |
33 | inputs.requires_grad_()
34 | outputs = self.net.eval()(inputs)
35 | loss_z = self.criterion(outputs, targets) #self.net.eval()(inputs)
36 |
37 | loss_z.backward(torch.ones(targets.size(), dtype=torch.float).to(self.device)) #torch.ones(targets.size(), dtype=torch.float).to(self.device)
38 | grad = inputs.grad.data + 0.0
39 | norm_grad = grad.norm().item()
40 | z = torch.sign(grad).detach() + 0.
41 | z = 1. * (h) * (z + 1e-7) / (z.reshape(z.size(0), -1).norm(dim=1)[:, None, None, None] + 1e-7)
42 | inputs.grad.detach()
43 | inputs.grad.zero_()
44 | #zero_gradients(inputs)
45 | self.net.zero_grad()
46 |
47 | return z, norm_grad
48 |
49 | def regularizer(self, inputs, targets, h=3., lambda_=4):
50 | '''
51 | Regularizer term in CURE
52 | '''
53 | z, norm_grad = self._find_z(inputs, targets, h)
54 |
55 | inputs.requires_grad_()
56 | outputs_pos = self.net.eval()(inputs + z)
57 | outputs_orig = self.net.eval()(inputs)
58 |
59 | loss_pos = self.criterion(outputs_pos, targets)
60 | loss_orig = self.criterion(outputs_orig, targets)
61 | grad_diff = \
62 | torch.autograd.grad((loss_pos - loss_orig), inputs, grad_outputs=torch.ones(targets.size()).to(self.device),
63 | create_graph=True)[0]
64 | reg = grad_diff.reshape(grad_diff.size(0), -1).norm(dim=1)
65 | self.net.zero_grad()
66 |
67 | return torch.sum(self.lambda_ * reg) / float(inputs.size(0)), norm_grad
68 |
69 | class loss_eigen():
70 | def __init__(self, net, test_loader, input, target, criterion, full_eigen, maxIter=10, tol=1e-2):
71 | self.net = net
72 | self.test_loader = test_loader
73 | self.criterion = criterion
74 | self.full_eigen = full_eigen
75 | self.max_iter = maxIter
76 | self.tol = tol
77 | self.input = input
78 | self.target = target
79 | self.cuda = True
80 |
81 | def regularizer(self):
82 | if self.full_eigen:
83 | eigenvalue, eigenvector = hf.get_eigen_full_dataset(self.net, self.test_loader, self.criterion, self.max_iter, self.tol)
84 | else:
85 | eigenvalue, eigenvector= hf.get_eigen(self.net, self.input, self.target, self.criterion, self.cuda, self.max_iter, self.tol)
86 |
87 | return eigenvalue, eigenvector
88 |
89 | # class CURELearner():
90 | # def __init__(self, net, trainloader, testloader, device='cuda', lambda_=4,
91 | # path='./checkpoint'):
92 | # '''
93 | # CURE Class: Implementation of "Robustness via curvature regularization, and vice versa"
94 | # in https://arxiv.org/abs/1811.09716
95 | # ================================================
96 | # Arguments:
97 | #
98 | # net: PyTorch nn
99 | # network structure
100 | # trainloader: PyTorch Dataloader
101 | # testloader: PyTorch Dataloader
102 | # device: 'cpu' or 'cuda' if GPU available
103 | # type of decide to move tensors
104 | # lambda_: float
105 | # power of regularization
106 | # path: string
107 | # path to save the best model
108 | # '''
109 | # if not torch.cuda.is_available() and device == 'cuda':
110 | # raise ValueError("cuda is not available")
111 | #
112 | # self.net = net.to(device)
113 | # self.criterion = nn.CrossEntropyLoss()
114 | # self.device = device
115 | # self.lambda_ = lambda_
116 | # self.trainloader, self.testloader = trainloader, testloader
117 | # self.path = path
118 | # self.test_acc_adv_best = 0
119 | # self.train_loss, self.train_acc, self.train_curv = [], [], []
120 | # self.test_loss, self.test_acc_adv, self.test_acc_clean, self.test_curv = [], [], [], []
121 | #
122 | # def set_optimizer(self, optim_alg='Adam', args={'lr': 1e-4}, scheduler=None, args_scheduler={}):
123 | # '''
124 | # Setting the optimizer of the network
125 | # ================================================
126 | # Arguments:
127 | #
128 | # optim_alg : string
129 | # Name of the optimizer
130 | # args: dict
131 | # Parameter of the optimizer
132 | # scheduler: optim.lr_scheduler
133 | # Learning rate scheduler
134 | # args_scheduler : dict
135 | # Parameters of the scheduler
136 | # '''
137 | # self.optimizer = getattr(optim, optim_alg)(self.net.parameters(), **args)
138 | # if not scheduler:
139 | # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=10 ** 6, gamma=1)
140 | # else:
141 | # self.scheduler = getattr(optim.lr_scheduler, scheduler)(self.optimizer, **args_scheduler)
142 | #
143 | # def train(self, h=[3], epochs=15):
144 | # '''
145 | # Training the network
146 | # ================================================
147 | # Arguemnets:
148 | #
149 | # h : list with length less than the number of epochs
150 | # Different h for different epochs of training,
151 | # can have a single number or a list of floats for each epoch
152 | # epochs : int
153 | # Number of epochs
154 | # '''
155 | # if len(h) > epochs:
156 | # raise ValueError('Length of h should be less than number of epochs')
157 | # if len(h) == 1:
158 | # h_all = epochs * [h[0]]
159 | # else:
160 | # h_all = epochs * [1.0]
161 | # h_all[:len(h)] = list(h[:])
162 | # h_all[len(h):] = (epochs - len(h)) * [h[-1]]
163 | #
164 | # for epoch, h_tmp in enumerate(h_all):
165 | # self._train(epoch, h=h_tmp)
166 | # self.test(epoch, h=h_tmp)
167 | # self.scheduler.step()
168 | #
169 | # def _train(self, epoch, h):
170 | # '''
171 | # Training the model
172 | # '''
173 | # print('\nEpoch: %d' % epoch)
174 | # train_loss, total = 0, 0
175 | # num_correct = 0
176 | # curv, curvature, norm_grad_sum = 0, 0, 0
177 | # for batch_idx, (inputs, targets) in enumerate(self.trainloader):
178 | # inputs, targets = inputs.to(self.device), targets.to(self.device)
179 | # self.optimizer.zero_grad()
180 | # total += targets.size(0)
181 | # outputs = self.net.train()(inputs)
182 | #
183 | # regularizer, grad_norm = self.regularizer(inputs, targets, h=h)
184 | #
185 | # curvature += regularizer.item()
186 | # neg_log_likelihood = self.criterion(outputs, targets)
187 | # loss = neg_log_likelihood + regularizer
188 | # loss.backward()
189 | # self.optimizer.step()
190 | # self.optimizer.zero_grad()
191 | #
192 | # train_loss += loss.item()
193 | # _, predicted = outputs.max(1)
194 | # outcome = predicted.data == targets
195 | # num_correct += outcome.sum().item()
196 | #
197 | # progress_bar(batch_idx, len(self.trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d) | curvature: %.3f ' % \
198 | # (train_loss / (batch_idx + 1), 100. * num_correct / total, num_correct, total,
199 | # curvature / (batch_idx + 1)))
200 | #
201 | # self.train_loss.append(train_loss / (batch_idx + 1))
202 | # self.train_acc.append(100. * num_correct / total)
203 | # self.train_curv.append(curvature / (batch_idx + 1))
204 | #
205 | # def test(self, epoch, h, num_pgd_steps=20):
206 | # '''
207 | # Testing the model
208 | # '''
209 | # test_loss, adv_acc, total, curvature, clean_acc, grad_sum = 0, 0, 0, 0, 0, 0
210 | #
211 | # for batch_idx, (inputs, targets) in enumerate(self.testloader):
212 | # inputs, targets = inputs.to(self.device), targets.to(self.device)
213 | # outputs = self.net.eval()(inputs)
214 | # loss = self.criterion(outputs, targets)
215 | # test_loss += loss.item()
216 | # _, predicted = outputs.max(1)
217 | # clean_acc += predicted.eq(targets).sum().item()
218 | # total += targets.size(0)
219 | #
220 | # inputs_pert = inputs + 0.
221 | # eps = 5. / 255. * 8
222 | # r = pgd(inputs, self.net.eval(), epsilon=[eps], targets=targets, step_size=0.04,
223 | # num_steps=num_pgd_steps, epsil=eps)
224 | #
225 | # inputs_pert = inputs_pert + eps * torch.Tensor(r).to(self.device)
226 | # outputs = self.net(inputs_pert)
227 | # probs, predicted = outputs.max(1)
228 | # adv_acc += predicted.eq(targets).sum().item()
229 | # cur, norm_grad = self.regularizer(inputs, targets, h=h)
230 | # grad_sum += norm_grad
231 | # curvature += cur.item()
232 | # test_loss += cur.item()
233 | #
234 | # print(
235 | # f'epoch = {epoch}, adv_acc = {100. * adv_acc / total}, clean_acc = {100. * clean_acc / total}, loss = {test_loss / (batch_idx + 1)}', \
236 | # f'curvature = {curvature / (batch_idx + 1)}')
237 | #
238 | # self.test_loss.append(test_loss / (batch_idx + 1))
239 | # self.test_acc_adv.append(100. * adv_acc / total)
240 | # self.test_acc_clean.append(100. * clean_acc / total)
241 | # self.test_curv.append(curvature / (batch_idx + 1))
242 | # if self.test_acc_adv[-1] > self.test_acc_adv_best:
243 | # self.test_acc_adv_best = self.test_acc_adv[-1]
244 | # print(f'Saving the best model to {self.path}')
245 | # self.save_model(self.path)
246 | #
247 | # return test_loss / (batch_idx + 1), 100. * adv_acc / total, 100. * clean_acc / total, curvature / (
248 | # batch_idx + 1)
249 | #
250 | # def _find_z(self, inputs, targets, h):
251 | # '''
252 | # Finding the direction in the regularizer
253 | # '''
254 | # inputs.requires_grad_()
255 | # outputs = self.net.eval()(inputs)
256 | # loss_z = self.criterion(self.net.eval()(inputs), targets)
257 | # loss_z.backward(torch.ones(targets.size()).to(self.device))
258 | # grad = inputs.grad.data + 0.0
259 | # norm_grad = grad.norm().item()
260 | # z = torch.sign(grad).detach() + 0.
261 | # z = 1. * (h) * (z + 1e-7) / (z.reshape(z.size(0), -1).norm(dim=1)[:, None, None, None] + 1e-7)
262 | # zero_gradients(inputs)
263 | # self.net.zero_grad()
264 | #
265 | # return z, norm_grad
266 | #
267 | # def regularizer(self, inputs, targets, h=3., lambda_=4):
268 | # '''
269 | # Regularizer term in CURE
270 | # '''
271 | # z, norm_grad = self._find_z(inputs, targets, h)
272 | #
273 | # inputs.requires_grad_()
274 | # outputs_pos = self.net.eval()(inputs + z)
275 | # outputs_orig = self.net.eval()(inputs)
276 | #
277 | # loss_pos = self.criterion(outputs_pos, targets)
278 | # loss_orig = self.criterion(outputs_orig, targets)
279 | # grad_diff = \
280 | # torch.autograd.grad((loss_pos - loss_orig), inputs, grad_outputs=torch.ones(targets.size()).to(self.device),
281 | # create_graph=True)[0]
282 | # reg = grad_diff.reshape(grad_diff.size(0), -1).norm(dim=1)
283 | # self.net.zero_grad()
284 | #
285 | # return torch.sum(self.lambda_ * reg) / float(inputs.size(0)), norm_grad
286 | #
287 | # def save_model(self, path):
288 | # '''
289 | # Saving the model
290 | # ================================================
291 | # Arguments:
292 | #
293 | # path: string
294 | # path to save the model
295 | # '''
296 | #
297 | # print('Saving...')
298 | #
299 | # state = {
300 | # 'net': self.net.state_dict(),
301 | # 'optimizer': self.optimizer.state_dict()
302 | # }
303 | # torch.save(state, path)
304 | #
305 | # def import_model(self, path):
306 | # '''
307 | # Importing the pre-trained model
308 | # '''
309 | # checkpoint = torch.load(path)
310 | # self.net.load_state_dict(checkpoint['net'])
311 | #
312 | #
313 | #
314 | #
315 |
--------------------------------------------------------------------------------