├── .gitignore
├── Attacks
    └── Gradient_based
    │   ├── fast_gradient_method.py
    │   ├── least_likely_class_method.py
    │   └── utils_pytorch.py
├── README.md
├── ens_adv_train.py
├── images
    └── tensorboard.png
├── main_ens_adv_train.py
├── models
    └── cifar10
    │   ├── inception.py
    │   ├── mobilenetv2_2.py
    │   └── resnet.py
└── tensorboard
    └── cifar10
        └── adv_train
            └── adv_models:-resnet18-resnet50-mobilenet_125-googlenet
                ├── resnet101
                    └── events.out.tfevents.1558584038.destc0strapp81.eu.sony.com
                └── resnet34
                    └── events.out.tfevents.1558545149.destc0strapp81.eu.sony.com


/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints
 2 | __pycache__
 3 | history_main
 4 | checkpoints/*
 5 | *.pyc
 6 | *.tar
 7 | *.pth
 8 | *.log
 9 | *.txt
10 | *.so
11 | 
12 | ** temp/
13 | temp
14 | *.log
15 | *.sh
16 | 
17 | 


--------------------------------------------------------------------------------
/Attacks/Gradient_based/fast_gradient_method.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py
 3 | 
 4 | ## modification:
 5 | model_fn.zero_grad()
 6 | '''
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | from Attacks.Gradient_based.utils_pytorch import optimize_linear
12 | 
13 | 
14 | def fast_gradient_method(model_fn, x, eps, ord,
15 |                          clip_min=None, clip_max=None, y=None, targeted=False, sanity_checks=False):
16 |   """
17 |   PyTorch implementation of the Fast Gradient Method.
18 |   :param model_fn: a callable that takes an input tensor and returns the model logits.
19 |   :param x: input tensor.
20 |   :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
21 |   :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2.
22 |   :param clip_min: (optional) float. Minimum float value for adversarial example components.
23 |   :param clip_max: (optional) float. Maximum float value for adversarial example components.
24 |   :param y: (optional) Tensor with true labels. If targeted is true, then provide the
25 |             target label. Otherwise, only provide this parameter if you'd like to use true
26 |             labels when crafting adversarial samples. Otherwise, model predictions are used
27 |             as labels to avoid the "label leaking" effect (explained in this paper:
28 |             https://arxiv.org/abs/1611.01236). Default is None.
29 |   :param targeted: (optional) bool. Is the attack targeted or untargeted?
30 |             Untargeted, the default, will try to make the label incorrect.
31 |             Targeted will instead try to move in the direction of being more like y.
32 |   :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
33 |             memory or for unit tests that intentionally pass strange input)
34 |   :return: a tensor for the adversarial example
35 |   """
36 |   if ord not in [np.inf, 1, 2]:
37 |     raise ValueError("Norm order must be either np.inf, 1, or 2.")
38 | 
39 |   asserts = []
40 | 
41 |   # If a data range was specified, check that the input was in that range
42 |   if clip_min is not None:
43 |     assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype)))
44 |     asserts.append(assert_ge)
45 | 
46 |   if clip_max is not None:
47 |     assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype)))
48 |     asserts.append(assert_le)
49 | 
50 |   # x needs to be a leaf variable, of floating point type and have requires_grad being True for
51 |   # its grad to be computed and stored properly in a backward call
52 |   x = x.clone().detach().to(torch.float).requires_grad_(True)
53 |   if y is None:
54 |     # Using model predictions as ground truth to avoid label leaking
55 |     _, y = torch.max(model_fn(x), 1)
56 | 
57 |   # Compute loss
58 |   loss_fn = torch.nn.CrossEntropyLoss()
59 |   loss = loss_fn(model_fn(x), y)
60 |   # If attack is targeted, minimize loss of target label rather than maximize loss of correct label
61 |   if targeted:
62 |     loss = -loss
63 | 
64 |   # Define gradient of loss wrt input
65 |   model_fn.zero_grad()
66 |   loss.backward()
67 |   optimal_perturbation = optimize_linear(x.grad, eps, ord)
68 | 
69 |   # Add perturbation to original example to obtain adversarial example
70 |   adv_x = x + optimal_perturbation
71 | 
72 |   # If clipping is needed, reset all values outside of [clip_min, clip_max]
73 |   if (clip_min is not None) or (clip_max is not None):
74 |     # We don't currently support one-sided clipping
75 |     assert clip_min is not None and clip_max is not None
76 |     adv_x = torch.clamp(adv_x, clip_min, clip_max)
77 | 
78 |   if sanity_checks:
79 |     assert np.all(asserts)
80 |   return adv_x


--------------------------------------------------------------------------------
/Attacks/Gradient_based/least_likely_class_method.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The least_likely_class_method attack
 3 | modified from fast_gradient_method
 4 | 
 5 | """
 6 | import numpy as np
 7 | import torch
 8 | import sys
 9 | 
10 | from Attacks.Gradient_based.utils_pytorch import optimize_linear
11 | 
12 | 
13 | def least_likely_class_method(model_fn, x, eps, ord = np.inf,
14 |                          clip_min=None, clip_max=None, sanity_checks=False):
15 |     """
16 |     PyTorch implementation of the Fast Gradient Method.
17 |     :param model_fn: a callable that takes an input tensor and returns the model logits.
18 |     :param x: input tensor.
19 |     :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
20 |     :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2.
21 |     :param clip_min: (optional) float. Minimum float value for adversarial example components.
22 |     :param clip_max: (optional) float. Maximum float value for adversarial example components.
23 |     :param y: (optional) Tensor with true labels. If targeted is true, then provide the
24 |               target label. Otherwise, only provide this parameter if you'd like to use true
25 |               labels when crafting adversarial samples. Otherwise, model predictions are used
26 |               as labels to avoid the "label leaking" effect (explained in this paper:
27 |               https://arxiv.org/abs/1611.01236). Default is None.
28 |     :param targeted: (optional) bool. Is the attack targeted or untargeted?
29 |               Untargeted, the default, will try to make the label incorrect.
30 |               Targeted will instead try to move in the direction of being more like y.
31 |     :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
32 |               memory or for unit tests that intentionally pass strange input)
33 |     :return: a tensor for the adversarial example
34 |     """
35 |     if ord not in [np.inf, 1, 2]:
36 |       raise ValueError("Norm order must be either np.inf, 1, or 2.")
37 | 
38 |     asserts = []
39 | 
40 |     # If a data range was specified, check that the input was in that range
41 |     if clip_min is not None:
42 |       assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype)))
43 |       asserts.append(assert_ge)
44 | 
45 |     if clip_max is not None:
46 |       assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype)))
47 |       asserts.append(assert_le)
48 | 
49 |     # x needs to be a leaf variable, of floating point type and have requires_grad being True for
50 |     # its grad to be computed and stored properly in a backward call
51 |     x = x.clone().detach().to(torch.float).requires_grad_(True)
52 | 
53 | 
54 |       # compute the least likely label
55 |     _, y_ll = torch.min(model_fn(x), 1)
56 | 
57 |     # Compute loss
58 |     loss_fn = torch.nn.CrossEntropyLoss()
59 |     loss = loss_fn(model_fn(x), y_ll)
60 |     # If attack is targeted, minimize loss of target label rather than maximize loss of correct label
61 |     loss = -loss
62 | 
63 |     # Define gradient of loss wrt input
64 |     model_fn.zero_grad()
65 |     loss.backward()
66 |     optimal_perturbation = optimize_linear(x.grad, eps, ord)
67 | 
68 |     # Add perturbation to original example to obtain adversarial example
69 |     adv_x = x + optimal_perturbation
70 | 
71 |     # If clipping is needed, reset all values outside of [clip_min, clip_max]
72 |     if (clip_min is not None) or (clip_max is not None):
73 |       # We don't currently support one-sided clipping
74 |       assert clip_min is not None and clip_max is not None
75 |       adv_x = torch.clamp(adv_x, clip_min, clip_max)
76 | 
77 |     if sanity_checks:
78 |       assert np.all(asserts)
79 |     return adv_x


--------------------------------------------------------------------------------
/Attacks/Gradient_based/utils_pytorch.py:
--------------------------------------------------------------------------------
  1 | """
  2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py
  3 | """
  4 | 
  5 | import warnings
  6 | from random import getrandbits
  7 | 
  8 | import numpy as np
  9 | # import tensorflow as tf
 10 | import torch
 11 | from torch.autograd import Variable
 12 | 
 13 | 
 14 | # https://gist.github.com/kingspp/3ec7d9958c13b94310c1a365759aa3f4
 15 | # Pyfunc Gradient Function
 16 | def _py_func_with_gradient(func, inp, Tout, stateful=True, name=None,
 17 |                            grad_func=None):
 18 |   """
 19 |   PyFunc defined as given by Tensorflow
 20 |   :param func: Custom Function
 21 |   :param inp: Function Inputs
 22 |   :param Tout: Ouput Type of out Custom Function
 23 |   :param stateful: Calculate Gradients when stateful is True
 24 |   :param name: Name of the PyFunction
 25 |   :param grad: Custom Gradient Function
 26 |   :return:
 27 |   """
 28 |   # Generate random name in order to avoid conflicts with inbuilt names
 29 |   rnd_name = 'PyFuncGrad-' + '%0x' % getrandbits(30 * 4)
 30 | 
 31 |   # Register Tensorflow Gradient
 32 |   tf.RegisterGradient(rnd_name)(grad_func)
 33 | 
 34 |   # Get current graph
 35 |   g = tf.get_default_graph()
 36 | 
 37 |   # Add gradient override map
 38 |   with g.gradient_override_map({"PyFunc": rnd_name,
 39 |                                 "PyFuncStateless": rnd_name}):
 40 |     return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
 41 | 
 42 | 
 43 | def convert_pytorch_model_to_tf(model, out_dims=None):
 44 |   """
 45 |   Convert a pytorch model into a tensorflow op that allows backprop
 46 |   :param model: A pytorch nn.Module object
 47 |   :param out_dims: The number of output dimensions (classes) for the model
 48 |   :return: A model function that maps an input (tf.Tensor) to the
 49 |   output of the model (tf.Tensor)
 50 |   """
 51 |   warnings.warn("convert_pytorch_model_to_tf is deprecated, switch to"
 52 |                 + " dedicated PyTorch support provided by CleverHans v4.")
 53 | 
 54 |   torch_state = {
 55 |       'logits': None,
 56 |       'x': None,
 57 |   }
 58 |   if not out_dims:
 59 |     out_dims = list(model.modules())[-1].out_features
 60 | 
 61 |   def _fprop_fn(x_np):
 62 |     """TODO: write this"""
 63 |     x_tensor = torch.Tensor(x_np)
 64 |     if torch.cuda.is_available():
 65 |       x_tensor = x_tensor.cuda()
 66 |     torch_state['x'] = Variable(x_tensor, requires_grad=True)
 67 |     torch_state['logits'] = model(torch_state['x'])
 68 |     return torch_state['logits'].data.cpu().numpy()
 69 | 
 70 |   def _bprop_fn(x_np, grads_in_np):
 71 |     """TODO: write this"""
 72 |     _fprop_fn(x_np)
 73 | 
 74 |     grads_in_tensor = torch.Tensor(grads_in_np)
 75 |     if torch.cuda.is_available():
 76 |       grads_in_tensor = grads_in_tensor.cuda()
 77 | 
 78 |     # Run our backprop through our logits to our xs
 79 |     loss = torch.sum(torch_state['logits'] * grads_in_tensor)
 80 |     loss.backward()
 81 |     return torch_state['x'].grad.cpu().data.numpy()
 82 | 
 83 |   def _tf_gradient_fn(op, grads_in):
 84 |     """TODO: write this"""
 85 |     return tf.py_func(_bprop_fn, [op.inputs[0], grads_in],
 86 |                       Tout=[tf.float32])
 87 | 
 88 |   def tf_model_fn(x_op):
 89 |     """TODO: write this"""
 90 |     out = _py_func_with_gradient(_fprop_fn, [x_op], Tout=[tf.float32],
 91 |                                  stateful=True,
 92 |                                  grad_func=_tf_gradient_fn)[0]
 93 |     out.set_shape([None, out_dims])
 94 |     return out
 95 | 
 96 |   return tf_model_fn
 97 | 
 98 | 
 99 | def clip_eta(eta, ord, eps):
100 |   """
101 |   PyTorch implementation of the clip_eta in utils_tf.
102 |   :param eta: Tensor
103 |   :param ord: np.inf, 1, or 2
104 |   :param eps: float
105 |   """
106 |   if ord not in [np.inf, 1, 2]:
107 |     raise ValueError('ord must be np.inf, 1, or 2.')
108 | 
109 |   avoid_zero_div = torch.tensor(1e-12, dtype=eta.dtype, device=eta.device)
110 |   reduc_ind = list(range(1, len(eta.size())))
111 |   if ord == np.inf:
112 |     eta = torch.clamp(eta, -eps, eps)
113 |   else:
114 |     if ord == 1:
115 |       # TODO
116 |       # raise NotImplementedError("L1 clip is not implemented.")
117 |       norm = torch.max(
118 |           avoid_zero_div,
119 |           torch.sum(torch.abs(eta), dim=reduc_ind, keepdim=True)
120 |       )
121 |     elif ord == 2:
122 |       norm = torch.sqrt(torch.max(
123 |           avoid_zero_div,
124 |           torch.sum(eta ** 2, dim=reduc_ind, keepdim=True)
125 |       ))
126 |     factor = torch.min(
127 |         torch.tensor(1., dtype=eta.dtype, device=eta.device),
128 |         eps / norm
129 |         )
130 |     eta *= factor
131 |   return eta
132 | 
133 | def get_or_guess_labels(model, x, **kwargs):
134 |   """
135 |   Get the label to use in generating an adversarial example for x.
136 |   The kwargs are fed directly from the kwargs of the attack.
137 |   If 'y' is in kwargs, then assume it's an untargeted attack and
138 |   use that as the label.
139 |   If 'y_target' is in kwargs and is not none, then assume it's a
140 |   targeted attack and use that as the label.
141 |   Otherwise, use the model's prediction as the label and perform an
142 |   untargeted attack.
143 |   :param model: PyTorch model. Do not add a softmax gate to the output.
144 |   :param x: Tensor, shape (N, d_1, ...).
145 |   :param y: (optional) Tensor, shape (N).
146 |   :param y_target: (optional) Tensor, shape (N).
147 |   """
148 |   if 'y' in kwargs and 'y_target' in kwargs:
149 |     raise ValueError("Can not set both 'y' and 'y_target'.")
150 |   if 'y' in kwargs:
151 |     labels = kwargs['y']
152 |   elif 'y_target' in kwargs and kwargs['y_target'] is not None:
153 |     labels = kwargs['y_target']
154 |   else:
155 |     _, labels = torch.max(model(x), 1)
156 |   return labels
157 | 
158 | 
159 | def optimize_linear(grad, eps, ord=np.inf):
160 |   """
161 |   Solves for the optimal input to a linear function under a norm constraint.
162 |   Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad)
163 |   :param grad: Tensor, shape (N, d_1, ...). Batch of gradients
164 |   :param eps: float. Scalar specifying size of constraint region
165 |   :param ord: np.inf, 1, or 2. Order of norm constraint.
166 |   :returns: Tensor, shape (N, d_1, ...). Optimal perturbation
167 |   """
168 | 
169 |   red_ind = list(range(1, len(grad.size())))
170 |   avoid_zero_div = torch.tensor(1e-12, dtype=grad.dtype, device=grad.device)
171 |   if ord == np.inf:
172 |     # Take sign of gradient
173 |     optimal_perturbation = torch.sign(grad)
174 |   elif ord == 1:
175 |     abs_grad = torch.abs(grad)
176 |     sign = torch.sign(grad)
177 |     red_ind = list(range(1, len(grad.size())))
178 |     abs_grad = torch.abs(grad)
179 |     ori_shape = [1]*len(grad.size())
180 |     ori_shape[0] = grad.size(0)
181 | 
182 |     max_abs_grad, _ = torch.max(abs_grad.view(grad.size(0), -1), 1)
183 |     max_mask = abs_grad.eq(max_abs_grad.view(ori_shape)).to(torch.float)
184 |     num_ties = max_mask
185 |     for red_scalar in red_ind:
186 |       num_ties = torch.sum(num_ties, red_scalar, keepdim=True)
187 |     optimal_perturbation = sign * max_mask / num_ties
188 |     # TODO integrate below to a test file
189 |     # check that the optimal perturbations have been correctly computed
190 |     opt_pert_norm = optimal_perturbation.abs().sum(dim=red_ind)
191 |     assert torch.all(opt_pert_norm == torch.ones_like(opt_pert_norm))
192 |   elif ord == 2:
193 |     square = torch.max(
194 |         avoid_zero_div,
195 |         torch.sum(grad ** 2, red_ind, keepdim=True)
196 |         )
197 |     optimal_perturbation = grad / torch.sqrt(square)
198 |     # TODO integrate below to a test file
199 |     # check that the optimal perturbations have been correctly computed
200 |     opt_pert_norm = optimal_perturbation.pow(2).sum(dim=red_ind, keepdim=True).sqrt()
201 |     one_mask = (square <= avoid_zero_div).to(torch.float) * opt_pert_norm + \
202 |             (square > avoid_zero_div).to(torch.float)
203 |     assert torch.allclose(opt_pert_norm, one_mask, rtol=1e-05, atol=1e-08)
204 |   else:
205 |     raise NotImplementedError("Only L-inf, L1 and L2 norms are "
206 |                               "currently implemented.")
207 | 
208 |   # Scale perturbation to be the solution for the norm=eps rather than
209 |   # norm=1 problem
210 |   scaled_perturbation = eps * optimal_perturbation
211 |   return scaled_perturbation


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ensemble Adversarial Training
  2 | 
  3 | 
  4 | This repository contains code for performing ensemble adversarial training in Pytorch.
  5 | 
  6 | 
  7 | ## Reference
  8 | 
  9 | The code takes followings as reference and has some little modifications:
 10 | 
 11 | * Florian, Tramèr, et al. "[Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204)" arXiv preprint arXiv:1705.07204 (2018).
 12 | * Alexey, Kurakin, et al. "[Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)" arXiv preprint arXiv:1611.01236 (2017).
 13 | * [ftramer/ensemble-adv-training](https://github.com/ftramer/ensemble-adv-training)
 14 | 
 15 | 
 16 | 
 17 | <br>
 18 | 
 19 | ## REQUIREMENTS
 20 | 
 21 | The code was tested with Python 3.6, Pytorch 1.0.1.
 22 | tensorboardX
 23 | scipy
 24 | 
 25 | 
 26 | <br>
 27 | 
 28 | 
 29 | ## Getting started
 30 | 1. Prepare your pre-trained-static models (`Defense/models/` and `Defense/checkpoints/` )and modify the variable and directory to them accordingly in main file.
 31 | 
 32 | ```python
 33 | # adv models: the static model used to generate adv input images
 34 | adv_resnet18 = ResNet18()
 35 | adv_resnet50 = ResNet50()
 36 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25)
 37 | adv_googlenet = GoogLeNet()
 38 | 
 39 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet]
 40 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet']
 41 | 
 42 | ```
 43 | 
 44 | 
 45 | 2. Setting the models to be trained and output and log directories.
 46 | 
 47 | ```python
 48 | # models: models for be adv training
 49 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2]
 50 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075']
 51 | params = {
 52 |     'mobilenet_1': 1.0,
 53 |     'mobilenet_075': 0.75,
 54 | }
 55 | 
 56 | adv_checkpoint_root_path = 'checkpoints/cifar10/'
 57 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/'
 58 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/'
 59 | ```
 60 | 
 61 | 3. Setting options for training schema, refered from [arXiv:1611.01236](https://arxiv.org/abs/1611.01236) and [arXiv:1705.07204](https://arxiv.org/abs/1705.07204) :
 62 | * option1 : distribution for random epsilon
 63 | ```
 64 | arg.eps range (0,1) : fixed epsilon
 65 | arg.eps = 1 		: [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), favor small epsilon
 66 | arg.eps = 2 		: uniform distribution, even the possibility for large and small eps
 67 | ```
 68 | * option2: attacking method
 69 | ```
 70 | arg.attacker = 'stepll'	: Step.L.L adv input, [arXiv:1705.07204](https://arxiv.org/abs/1705.07204)
 71 | arg.attacker = 'fgsm'	: fgsm adv input 
 72 |  ``` 
 73 | * option3: loss 
 74 | ```
 75 | arg.loss_schema = 'averaged': no bias loss 
 76 | arg.loss_schema = 'weighted': [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), loss favor for clean input
 77 | ```
 78 | 
 79 | <br>
 80 | 
 81 | 
 82 | ## Simple Example
 83 | <table class="tg" align="center">
 84 |   <tr>
 85 |     <th class="tg-0pky">Acc.</th>
 86 |     <th class="tg-de2y" colspan="2">sources</th>
 87 |     <th class="tg-de2y" colspan="2">holdout_sources</th>
 88 |   </tr>
 89 |   <tr>
 90 |     <td class="tg-0pky">Target</td>
 91 |     <td class="tg-de2y">resnet18</td>
 92 |     <td class="tg-de2y">googlenet</td>
 93 |     <td class="tg-de2y">resnet101</td>
 94 |     <td class="tg-de2y">vgg_11</td>
 95 |   </tr>
 96 |   <tr>
 97 |     <td class="tg-0pky">ResNet34</td>
 98 |     <td class="tg-0pky">17.44</td>
 99 |     <td class="tg-0pky">18.36</td>
100 |     <td class="tg-0pky">18.09</td>
101 |     <td class="tg-0pky">18.13</td>
102 |   </tr>
103 |   <tr>
104 |     <td class="tg-0pky">ResNet34_adv</td>
105 |     <td class="tg-0pky">63.50</td>
106 |     <td class="tg-0pky">66.25</td>
107 |     <td class="tg-0pky">64.89</td>
108 |     <td class="tg-0pky">63.77</td>
109 |   </tr>
110 | </table>
111 | <center>(adv training on 200 epochs, more epochs would lead to higher Acc)</center>
112 | 
113 | ```bash
114 | CUDA_VISIBLE_DEVICES=2,3 python3 main_ens_adv_train_cifar10.py --eps 2 --attacker 'stepll' --loss_schema 'averaged' --dataset 'cifar10'
115 | ```
116 | * a resnet34 model adv training on CIFAR10, 
117 | 	adv_generators: [resnet18, resnet50, googlenet, mobilenet]
118 | 	holdout models: [resnet101]
119 | 
120 | * we can see from the following figure that as training the **loss decrease** (for both clean and adv images). 
121 | 
122 | * Specially there is violent oscillation in the adv_loss in the first half of the training procedure, which is due to we randomize the magnitude of epsilon at each batch. when epsilon is change from small to a large value, loss jump drastically, which match our expectation that **it is hard to learn from adv input**.
123 | 
124 | * The oscillation of adv_loss is damped after **30k iter**, showing that as the adv training going on, the adv_loss decrease and converge and the top1 accuracy on adv image increase to around the same level as clean images, regardless the magnitude of epsilon and the randomness of the adv_models, which showing model start to learning from perturbated input.
125 | 
126 | <p align="center">
127 | <img src="images/tensorboard.png" >
128 | </p>
129 | 
130 | <br>
131 | 
132 | 
133 | ## note
134 | Make sure you have enough GPU memory to load all the pre-trained-static models. 
135 | <br>
136 | <br>
137 | Compared with the refered [tensorflow 1.0 version](https://github.com/ftramer/ensemble-adv-training), which need to feed input images to all candidate adv generators when generating adv inputs, since the **static computational graph in TF 1.0**. <br>
138 | This code just need to feed one adv_model (the currently selected one) for adv input generating, enjoying the advantage of **dynamic computational graph** in Pytorch, which (MAYBE) reduce the training time and allow to have more candidate-adv-generators.
139 | 


--------------------------------------------------------------------------------
/ens_adv_train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import os
  4 | import sys
  5 | from scipy.stats import truncnorm
  6 | 
  7 | from Attacks.Gradient_based.least_likely_class_method import least_likely_class_method
  8 | from Attacks.Gradient_based.fast_gradient_method import fast_gradient_method
  9 | 
 10 | def ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args):
 11 | 
 12 |     losses_combine = AverageMeter()
 13 |     top1_combine = AverageMeter()
 14 |     losses_clean = AverageMeter()
 15 |     top1_clean = AverageMeter()
 16 |     losses_adv = AverageMeter()
 17 |     top1_adv = AverageMeter()
 18 |    
 19 |     # training
 20 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 21 |     for i, (inputs_clean, targets_clean) in enumerate(trainloader):
 22 | 
 23 |         # inputs
 24 |         inputs_clean, targets_clean = inputs_clean.to(device), targets_clean.to(device)
 25 |         # generate adv images
 26 |         # in paper, clean and adv images are half to half in each batch, 
 27 |         # but in author's github, clean and adv image are using entire batch and then the loss is averaged from loss of these two batch 
 28 |         # when selected == len(adv_models), select the currunt state of the model
 29 |         # otherwise choose the corresponding static model 
 30 |         selected = np.random.randint(len(adv_models) + 1)
 31 |         if selected == len(adv_models):
 32 |             adv_generating_model = model
 33 |         else:
 34 |             adv_generating_model = adv_models[selected]
 35 |         # the model generate adv should be in eval() model
 36 |         adv_generating_model.eval()
 37 | 
 38 | 
 39 |         # setting epsilon, normal it to range: [0, 1]
 40 |         if 0 < args.eps and  args.eps < 1:
 41 |             # fixed epsilon
 42 |             eps = args.eps 
 43 |         elif args.eps == 1 :
 44 |             # paper: <adversarial machine learning at scale>, arXiv:1611.01236
 45 |             # favor small epsilon
 46 |             # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
 47 |             eps = truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8) / 255.0
 48 |         elif args.eps == 2 :
 49 |             # uniform distribution, even the possibility for large and small eps, range [2/255, 16/255]
 50 |             eps = np.random.randint(low = 2, high =17) / 255.0
 51 |  
 52 | 
 53 |         # generate adv images
 54 |         if args.attacker == 'stepll':
 55 |             # Step.L.L adv 
 56 |             inputs_adv = least_likely_class_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1)
 57 |         elif args.attacker =='fgsm':
 58 |             # Step.L.L adv 
 59 |             inputs_adv = fast_gradient_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1)            
 60 | 
 61 | 
 62 |         # training
 63 |         ## in case that the adv_generating_model is the training model itself, clean the gradient and swith the model
 64 |         model.zero_grad()
 65 |         model.train()
 66 | 
 67 |         # clean image
 68 |         logits_clean = model(inputs_clean)
 69 |         loss1 = criterion(logits_clean, targets_clean)
 70 | 
 71 | 
 72 |         # adv image 
 73 |         logits_adv = model(inputs_adv)
 74 |         loss2 = criterion(logits_adv, targets_clean)
 75 | 
 76 | 
 77 |         # combine the loss1 and loss2
 78 |         if args.loss_schema == 'averaged':
 79 |             # loss on multiple outputs
 80 |             # https://discuss.pytorch.org/t/a-model-with-multiple-outputs/10440
 81 |             loss = 0.5*(loss1 + loss2)
 82 |         elif args.loss_schema == 'weighted':
 83 |             # paper: <adversarial machine learning at scale>, arXiv:1611.01236
 84 |             # favor for clean input
 85 |             loss = (1 / 1.3) (loss1 + 0.3* loss2)
 86 | 
 87 |         optimizer.zero_grad()
 88 |         loss.backward()
 89 |         optimizer.step()
 90 | 
 91 | 
 92 |         # print log and tensorboard
 93 |         # clean
 94 |         acc1, _  = accuracy(logits_clean, targets_clean, topk=(1,5))
 95 |         losses_clean.update(loss1.item(), inputs_clean.size(0))
 96 |         top1_clean.update(acc1[0], inputs_clean.size(0))
 97 | 
 98 |         # adv
 99 |         acc2, _ = accuracy(logits_adv, targets_clean, topk=(1, 5))
100 |         losses_adv.update(loss2.item(), inputs_clean.size(0))
101 |         top1_adv.update(acc2[0], inputs_clean.size(0))
102 | 
103 |         # combine
104 |         acc = 0.5*(acc1[0] + acc2[0])
105 |         losses_combine.update(loss.item(), inputs_clean.size(0))
106 |         top1_combine.update(acc, inputs_clean.size(0))
107 | 
108 |         # return losses_clean, top1_clean, losses_adv, top1_adv, losses_combine, top1_combine
109 | 
110 |         # progress_bar(i, len(trainloader), 'Epoch: %d | clean: %.3f | Top1: %.3f | Top5: %.3f '
111 |         # % (epoch, losses.avg, top1.avg, top5.avg))
112 |         
113 |         if i % 20 == 0:
114 |             n_iter = epoch * len(trainloader) + i
115 |             writer.add_scalar('Train/Loss_clean', losses_clean.val, n_iter)
116 |             writer.add_scalar('Train/Loss_adv', losses_adv.val, n_iter)
117 |             writer.add_scalar('Train/Losses_combine', losses_combine.val, n_iter)
118 |             writer.add_scalar('Train/Prec@1_clean', top1_clean.val, n_iter)
119 |             writer.add_scalar('Train/Prec@1_adv', top1_adv.val, n_iter)
120 |             writer.add_scalar('Train/Prec@1_combine', top1_combine.val, n_iter)
121 |             writer.add_scalar('Train/epsilon', eps, n_iter)
122 |             writer.add_scalar('Train/selected', selected, n_iter)
123 | 
124 | 
125 | 
126 | def validate(testloader, model, criterion, writer, epoch):
127 | 
128 |     losses = AverageMeter()
129 |     top1 = AverageMeter()
130 | 
131 |     # switch to evaluate mode
132 |     model.eval()
133 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
134 |     with torch.no_grad():
135 |         for i, (inputs, targets) in enumerate(testloader):
136 |             # inputs
137 |             inputs, targets = inputs.to(device), targets.to(device)
138 | 
139 |             # compute output
140 |             outputs = model(inputs)
141 |             loss = criterion(outputs, targets)
142 | 
143 |             # measure accuracy and record loss
144 |             acc1, _ = accuracy(outputs, targets, topk=(1,5))
145 |             losses.update(loss.item(), inputs.size(0))
146 |             top1.update(acc1[0], inputs.size(0))
147 | 
148 |             n_iter_val = epoch * len(testloader) + i
149 |             writer.add_scalar('Test/Loss_clean', losses.val, n_iter_val)
150 |             writer.add_scalar('Test/Prec@1_clean', top1.val, n_iter_val)
151 |           
152 |     return top1.avg            
153 | 
154 | class AverageMeter(object):
155 |     """Computes and stores the average and current value"""
156 |     def __init__(self):
157 |         self.reset()
158 | 
159 |     def reset(self):
160 |         self.val = 0
161 |         self.avg = 0
162 |         self.sum = 0
163 |         self.count = 0
164 | 
165 |     def update(self, val, n=1):
166 |         self.val = val
167 |         self.sum += val * n
168 |         self.count += n
169 |         self.avg = self.sum / self.count
170 | 
171 | 
172 | 
173 | def accuracy(output, target, topk=(1,)):
174 |     """Computes the accuracy over the k top predictions for the specified values of k"""
175 |     with torch.no_grad():
176 |         maxk = max(topk)
177 |         batch_size = target.size(0)
178 | 
179 |         _, pred = output.topk(maxk, 1, True, True)
180 |         pred = pred.t()
181 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
182 | 
183 |         res = []
184 |         for k in topk:
185 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
186 |             res.append(correct_k.mul_(100.0 / batch_size))
187 |         return res
188 | 
189 | def eps_truncnorm():
190 |     # paper: <adversarial machine learning at scale>
191 |     # eps drawn from a truncated normal schema in interval [0, 16] with [mean=0, std=8]: 
192 |     # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
193 |     a = 0
194 |     b = 2
195 |     loc = 0
196 |     scale = 8
197 | 
198 |     return truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8)


--------------------------------------------------------------------------------
/images/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/images/tensorboard.png


--------------------------------------------------------------------------------
/main_ens_adv_train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torch.backends.cudnn as cudnn
  6 | from torchvision import datasets, transforms
  7 | import torchvision
  8 | import numpy as np
  9 | import os
 10 | import argparse
 11 | import pathlib
 12 | from tensorboardX import SummaryWriter
 13 | 
 14 | import sys
 15 | from ens_adv_train import ens_adv_train, validate
 16 | 
 17 | # import models
 18 | from models.cifar10.resnet import ResNet34, ResNet101, ResNet18, ResNet50
 19 | from models.cifar10.mobilenetv2_2 import MobileNetV2
 20 | from models.cifar10.inception import GoogLeNet
 21 | 
 22 | 
 23 | parser = argparse.ArgumentParser(description='Adv Training')
 24 | 
 25 | parser.add_argument('--dataset', default='cifar10', type=str,
 26 |                     help='select the training dataset')
 27 | 
 28 | parser.add_argument('--epochs', default=200, type=int, metavar='N',
 29 |                     help='number of total epochs to run')
 30 | 
 31 | parser.add_argument('--eps', default = 2, type=float, metavar='M',
 32 |                     help='option1: random epsilon distribution')
 33 | 
 34 | parser.add_argument('--attacker', default='stepll', type=str,
 35 |                     help='option2: attacker for generating adv input')
 36 | 
 37 | parser.add_argument('--loss_schema', default='averaged', type=str,
 38 |                     help='option3: loss schema')
 39 | 
 40 | 
 41 | # reproducible 
 42 | torch.manual_seed(66)
 43 | np.random.seed(66)
 44 | 
 45 | 
 46 | ######################################### modify accordingly ##################################################
 47 | # adv models: the static model used to generate adv input images
 48 | # fixed to memory for all the trainings to speed up.
 49 | adv_resnet18 = ResNet18()
 50 | adv_resnet50 = ResNet50()
 51 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25)
 52 | adv_googlenet = GoogLeNet()
 53 | 
 54 | 
 55 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet]
 56 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet']
 57 | 
 58 | # models: models for be adv training
 59 | # loaded only on its training to save memory.
 60 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2]
 61 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075']
 62 | params = {
 63 |     'mobilenet_1': 1.0,
 64 |     'mobilenet_075': 0.75,
 65 | }
 66 | 
 67 | 
 68 | # path
 69 | trial_name = 'adv_models:'
 70 | for adv_model_name in adv_model_names:
 71 |     trial_name = trial_name + '-' + adv_model_name
 72 | # path to pre-trained models checkpoints
 73 | adv_checkpoint_path = 'checkpoints/cifar10/'
 74 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/'
 75 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/'
 76 | ######################################### modify accordingly ##################################################
 77 | 
 78 | 
 79 | 
 80 | if not os.path.isdir(output_path):
 81 |     pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
 82 | if not os.path.isdir(tensorboard_path):
 83 |     pathlib.Path(tensorboard_path).mkdir(parents=True, exist_ok=True)
 84 | 
 85 | def main(model_class, model_name, model_path, adv_models, writer, args):
 86 |     dataset = args.dataset
 87 |     epochs = args.epochs
 88 | 
 89 |     best_acc = 0
 90 | 
 91 |     # prepare data loader 
 92 |     trainloader, testloader = get_data_loader(dataset)
 93 | 
 94 |     # create model
 95 |     if model_name in params.keys():
 96 |         model = model_class(params[model_name])
 97 |     else:
 98 |         model = model_class()
 99 | 
100 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
101 | 
102 |     if device == 'cuda':
103 |         model = torch.nn.DataParallel(model)
104 |         model = model.cuda()
105 | 
106 |     # optimizer 
107 |     criterion = nn.CrossEntropyLoss(reduction = 'mean')
108 |     # paper use RMSProp but author's github use adam, here we follow the author's github
109 |     optimizer = optim.Adam(model.parameters(), lr= 0.001, weight_decay=5e-4)
110 | 
111 |     # training
112 |     for epoch in range(epochs): 
113 |         ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args)
114 |         acc = validate(testloader, model, criterion, writer, epoch)
115 | 
116 |         if acc > best_acc :
117 |             best_acc = acc
118 |             save_checkpoint(model, model_path, optimizer, best_acc, epoch)
119 | 
120 | 
121 | 
122 | # save model
123 | def save_checkpoint(model, model_path, optimizer, best_acc, epoch):
124 |     state = {
125 |         'state_dict': model.state_dict(),
126 |         'acc': best_acc,
127 |         'epoch': epoch,
128 |         'optimizer' : optimizer.state_dict(),
129 | 
130 |     }
131 |     torch.save(state, model_path)
132 | 
133 | 
134 | 
135 | def get_data_loader(dataset):
136 |     if dataset == 'cifar10':
137 |         transform_train = transforms.Compose([
138 |         transforms.RandomCrop(32, padding=4),
139 |         transforms.RandomHorizontalFlip(),
140 |         transforms.ToTensor(),
141 |         # mean subtract 
142 |         # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
143 |         ])
144 | 
145 |         transform_test = transforms.Compose([
146 |             transforms.ToTensor(),
147 |             # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
148 |         ])
149 | 
150 |         trainset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=True, download=False, transform=transform_train)
151 |         trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)
152 | 
153 |         testset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=False, download=False, transform=transform_test)
154 |         testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
155 | 
156 |         classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
157 | 
158 |     elif dataset == "cinic10":
159 |         cinic_directory = '/home/deliangj/data/cinic10'
160 |         cinic_mean = [0, 0, 0]
161 |         cinic_std = [0, 0, 0]
162 | 
163 |         transform_train = transforms.Compose([
164 |         transforms.RandomCrop(32, padding=4),
165 |         transforms.RandomHorizontalFlip(),
166 |         transforms.ToTensor(),
167 |         # mean subtract 
168 |         transforms.Normalize(mean=cinic_mean,std=cinic_std)
169 |         ,])
170 | 
171 |         transform_test = transforms.Compose([
172 |             transforms.ToTensor(),
173 |             transforms.Normalize(mean=cinic_mean,std=cinic_std),
174 |         ])
175 | 
176 |         trainset = torchvision.datasets.ImageFolder(cinic_directory + '/train', transform=transform_train)
177 |         trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)
178 | 
179 |         testset = torchvision.datasets.ImageFolder(cinic_directory + '/test',  transform=transform_test)
180 |         testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
181 |     else:
182 |         print('not such dataset !')
183 |         return 
184 | 
185 |     return trainloader, testloader
186 | 
187 | 
188 | if __name__ == '__main__':
189 | 
190 |     # training parameters
191 |     args = parser.parse_args()
192 | 
193 |     # checkpoint paths
194 |     model_save_paths = [output_path + model_name + '.pth.tar' for model_name in model_names]
195 |     adv_model_paths = [adv_checkpoint_path + adv_model_name + '.pth.tar' for adv_model_name in adv_model_names]
196 | 
197 |     # load adv models
198 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
199 |     if device == 'cuda':
200 |         for i in range(len(adv_models)):
201 |             adv_models[i] = torch.nn.DataParallel(adv_models[i])
202 |             adv_models[i] = adv_models[i].cuda()
203 |             # pre-trained static models !
204 |             adv_models[i] = adv_models[i].eval()
205 |     else:
206 |         print('gpu not avaible please check !')
207 |         sys.exit()
208 | 
209 |     # adv pre-trained static models
210 |     for i in range(len(adv_model_paths)):
211 |         checkpoint = torch.load(adv_model_paths[i])
212 |         if 'state_dict' in checkpoint.keys():
213 |             state = 'state_dict'
214 |         elif 'net' in checkpoint.keys():
215 |             state = 'net'
216 |         adv_models[i].load_state_dict(checkpoint[state])
217 | 
218 |     # starting training each model
219 |     for i in range(len(model_classes)):
220 |         print('adv training model: ' + model_names[i])
221 |         writer = SummaryWriter(tensorboard_path + model_names[i])
222 |         main(model_classes[i], model_names[i], model_save_paths[i], adv_models, writer, args)
223 | 
224 | 


--------------------------------------------------------------------------------
/models/cifar10/inception.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/googlenet.py
  3 | '''
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class Inception(nn.Module):
 10 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
 11 |         super(Inception, self).__init__()
 12 |         # 1x1 conv branch
 13 |         self.b1 = nn.Sequential(
 14 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
 15 |             nn.BatchNorm2d(n1x1),
 16 |             nn.ReLU(True),
 17 |         )
 18 | 
 19 |         # 1x1 conv -> 3x3 conv branch
 20 |         self.b2 = nn.Sequential(
 21 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
 22 |             nn.BatchNorm2d(n3x3red),
 23 |             nn.ReLU(True),
 24 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
 25 |             nn.BatchNorm2d(n3x3),
 26 |             nn.ReLU(True),
 27 |         )
 28 | 
 29 |         # 1x1 conv -> 5x5 conv branch
 30 |         self.b3 = nn.Sequential(
 31 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
 32 |             nn.BatchNorm2d(n5x5red),
 33 |             nn.ReLU(True),
 34 |             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
 35 |             nn.BatchNorm2d(n5x5),
 36 |             nn.ReLU(True),
 37 |             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
 38 |             nn.BatchNorm2d(n5x5),
 39 |             nn.ReLU(True),
 40 |         )
 41 | 
 42 |         # 3x3 pool -> 1x1 conv branch
 43 |         self.b4 = nn.Sequential(
 44 |             nn.MaxPool2d(3, stride=1, padding=1),
 45 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
 46 |             nn.BatchNorm2d(pool_planes),
 47 |             nn.ReLU(True),
 48 |         )
 49 | 
 50 |     def forward(self, x):
 51 |         y1 = self.b1(x)
 52 |         y2 = self.b2(x)
 53 |         y3 = self.b3(x)
 54 |         y4 = self.b4(x)
 55 |         return torch.cat([y1,y2,y3,y4], 1)
 56 | 
 57 | 
 58 | class GoogLeNet(nn.Module):
 59 |     def __init__(self):
 60 |         super(GoogLeNet, self).__init__()
 61 |         self.pre_layers = nn.Sequential(
 62 |             nn.Conv2d(3, 192, kernel_size=3, padding=1),
 63 |             nn.BatchNorm2d(192),
 64 |             nn.ReLU(True),
 65 |         )
 66 | 
 67 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
 68 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 69 | 
 70 |         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 71 | 
 72 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
 73 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
 74 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
 75 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
 76 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 77 | 
 78 |         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
 79 |         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 80 | 
 81 |         self.avgpool = nn.AvgPool2d(8, stride=1)
 82 |         self.linear = nn.Linear(1024, 10)
 83 | 
 84 |     def forward(self, x):
 85 |         out = self.pre_layers(x)
 86 |         out = self.a3(out)
 87 |         out = self.b3(out)
 88 |         out = self.maxpool(out)
 89 |         out = self.a4(out)
 90 |         out = self.b4(out)
 91 |         out = self.c4(out)
 92 |         out = self.d4(out)
 93 |         out = self.e4(out)
 94 |         out = self.maxpool(out)
 95 |         out = self.a5(out)
 96 |         out = self.b5(out)
 97 |         out = self.avgpool(out)
 98 |         out = out.view(out.size(0), -1)
 99 |         out = self.linear(out)
100 |         return out
101 | 
102 | 
103 | def test():
104 |     net = GoogLeNet()
105 |     x = torch.randn(1,3,32,32)
106 |     y = net(x)
107 |     print(y.size())


--------------------------------------------------------------------------------
/models/cifar10/mobilenetv2_2.py:
--------------------------------------------------------------------------------
  1 | ''' 
  2 | https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/MobileNetV2.py
  3 | make some small modification to adapt input from imagenet to cifar10.
  4 | '''
  5 | import torch.nn as nn
  6 | import math
  7 | 
  8 | 
  9 | def conv_bn(inp, oup, stride):
 10 |     return nn.Sequential(
 11 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 12 |         nn.BatchNorm2d(oup),
 13 |         nn.ReLU6(inplace=True)
 14 |     )
 15 | 
 16 | 
 17 | def conv_1x1_bn(inp, oup):
 18 |     return nn.Sequential(
 19 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 20 |         nn.BatchNorm2d(oup),
 21 |         nn.ReLU6(inplace=True)
 22 |     )
 23 | 
 24 | 
 25 | class InvertedResidual(nn.Module):
 26 |     def __init__(self, inp, oup, stride, expand_ratio):
 27 |         super(InvertedResidual, self).__init__()
 28 |         self.stride = stride
 29 |         assert stride in [1, 2]
 30 | 
 31 |         hidden_dim = round(inp * expand_ratio)
 32 | 
 33 |         self.use_res_connect = self.stride == 1 and inp == oup
 34 | 
 35 |         if expand_ratio == 1:
 36 |             self.conv = nn.Sequential(
 37 |                 # dw
 38 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 39 |                 nn.BatchNorm2d(hidden_dim),
 40 |                 nn.ReLU6(inplace=True),
 41 |                 # pw-linear
 42 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 43 |                 nn.BatchNorm2d(oup),
 44 |             )
 45 |         else:
 46 |             self.conv = nn.Sequential(
 47 |                 # pw
 48 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 49 |                 nn.BatchNorm2d(hidden_dim),
 50 |                 nn.ReLU6(inplace=True),
 51 |                 # dw
 52 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 53 |                 nn.BatchNorm2d(hidden_dim),
 54 |                 nn.ReLU6(inplace=True),
 55 |                 # pw-linear
 56 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 57 |                 nn.BatchNorm2d(oup),
 58 |             )
 59 | 
 60 |     def forward(self, x):
 61 |         if self.use_res_connect:
 62 |             return x + self.conv(x)
 63 |         else:
 64 |             return self.conv(x)
 65 | 
 66 | 
 67 | class MobileNetV2(nn.Module):
 68 |     def __init__(self, width_mult=1., n_class=10, input_size=32):
 69 |         super(MobileNetV2, self).__init__()
 70 |         block = InvertedResidual
 71 |         input_channel = 32
 72 |         last_channel = 1280
 73 |         print ('width_mult =', width_mult)
 74 | 
 75 |         interverted_residual_setting = [
 76 |            (1,  16, 1, 1),
 77 |            (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
 78 |            (6,  32, 3, 2),
 79 |            (6,  64, 4, 2),
 80 |            (6,  96, 3, 1),
 81 |            (6, 160, 3, 2),
 82 |            (6, 320, 1, 1)]
 83 | 
 84 |         # building first layer
 85 |         assert input_size % 32 == 0
 86 |         ## (0.37 * 5) = 1.85; int(0.37 * 5) = 1
 87 |         input_channel = int(input_channel * width_mult)
 88 |         # we apply width multiplier to all layers except the very last convolutional layer. This improves performance for smaller models
 89 |         self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
 90 |         self.features = [conv_bn(3, input_channel,1)]# NOTE: change conv1 stride 2 -> 1 for CIFAR10
 91 |         # building inverted residual blocks
 92 |         for t, c, n, s in interverted_residual_setting:
 93 |             output_channel = int(c * width_mult)
 94 |             for i in range(n):
 95 |                 if i == 0:
 96 |                     self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
 97 |                 else:
 98 |                     self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
 99 |                 input_channel = output_channel
100 |         # building last several layers
101 |         self.features.append(conv_1x1_bn(input_channel, self.last_channel))
102 |         # make it nn.Sequential
103 |         self.features = nn.Sequential(*self.features)
104 | 
105 |         # building classifier
106 |         self.classifier = nn.Sequential(
107 |             nn.Dropout(0.2),
108 |             nn.Linear(self.last_channel, n_class),
109 |         )
110 | 
111 |         self._initialize_weights()
112 | 
113 |     def forward(self, x):
114 |         x = self.features(x)
115 |         # torch tensor (C, H, W)
116 |         x = x.mean(3).mean(2)
117 |         x = self.classifier(x)
118 |         return x
119 | 
120 |     def _initialize_weights(self):
121 |         for m in self.modules():
122 |             if isinstance(m, nn.Conv2d):
123 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
124 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
125 |                 if m.bias is not None:
126 |                     m.bias.data.zero_()
127 |             elif isinstance(m, nn.BatchNorm2d):
128 |                 m.weight.data.fill_(1)
129 |                 m.bias.data.zero_()
130 |             elif isinstance(m, nn.Linear):
131 |                 n = m.weight.size(1)
132 |                 m.weight.data.normal_(0, 0.01)
133 |                 m.bias.data.zero_()
134 | 


--------------------------------------------------------------------------------
/models/cifar10/resnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
  3 | '''
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class BasicBlock(nn.Module):
 10 |     expansion = 1
 11 | 
 12 |     def __init__(self, in_planes, planes, stride=1):
 13 |         super(BasicBlock, self).__init__()
 14 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 15 |         self.bn1 = nn.BatchNorm2d(planes)
 16 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(planes)
 18 | 
 19 |         self.shortcut = nn.Sequential()
 20 |         if stride != 1 or in_planes != self.expansion*planes:
 21 |             self.shortcut = nn.Sequential(
 22 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 23 |                 nn.BatchNorm2d(self.expansion*planes)
 24 |             )
 25 | 
 26 |     def forward(self, x):
 27 |         out = F.relu(self.bn1(self.conv1(x)))
 28 |         out = self.bn2(self.conv2(out))
 29 |         out += self.shortcut(x)
 30 |         out = F.relu(out)
 31 |         return out
 32 | 
 33 | 
 34 | class Bottleneck(nn.Module):
 35 |     expansion = 4
 36 | 
 37 |     def __init__(self, in_planes, planes, stride=1):
 38 |         super(Bottleneck, self).__init__()
 39 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 40 |         self.bn1 = nn.BatchNorm2d(planes)
 41 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 42 |         self.bn2 = nn.BatchNorm2d(planes)
 43 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 44 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 45 | 
 46 |         self.shortcut = nn.Sequential()
 47 |         if stride != 1 or in_planes != self.expansion*planes:
 48 |             self.shortcut = nn.Sequential(
 49 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 50 |                 nn.BatchNorm2d(self.expansion*planes)
 51 |             )
 52 | 
 53 |     def forward(self, x):
 54 |         out = F.relu(self.bn1(self.conv1(x)))
 55 |         out = F.relu(self.bn2(self.conv2(out)))
 56 |         out = self.bn3(self.conv3(out))
 57 |         out += self.shortcut(x)
 58 |         out = F.relu(out)
 59 |         return out
 60 | 
 61 | 
 62 | class ResNet(nn.Module):
 63 |     def __init__(self, block, num_blocks, num_classes=10):
 64 |         super(ResNet, self).__init__()
 65 |         self.in_planes = 64
 66 | 
 67 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 68 |         self.bn1 = nn.BatchNorm2d(64)
 69 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 70 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 71 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 72 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 73 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 74 | 
 75 |     def _make_layer(self, block, planes, num_blocks, stride):
 76 |         strides = [stride] + [1]*(num_blocks-1)
 77 |         layers = []
 78 |         for stride in strides:
 79 |             layers.append(block(self.in_planes, planes, stride))
 80 |             self.in_planes = planes * block.expansion
 81 |         return nn.Sequential(*layers)
 82 | 
 83 |     def forward(self, x):
 84 |         out = F.relu(self.bn1(self.conv1(x)))
 85 |         out = self.layer1(out)
 86 |         out = self.layer2(out)
 87 |         out = self.layer3(out)
 88 |         out = self.layer4(out)
 89 |         out = F.avg_pool2d(out, 4)
 90 |         out = out.view(out.size(0), -1)
 91 |         out = self.linear(out)
 92 |         return out
 93 | 
 94 | 
 95 | def ResNet18():
 96 |     return ResNet(BasicBlock, [2,2,2,2])
 97 | 
 98 | def ResNet34():
 99 |     return ResNet(BasicBlock, [3,4,6,3])
100 | 
101 | def ResNet50():
102 |     return ResNet(Bottleneck, [3,4,6,3])
103 | 
104 | def ResNet101():
105 |     return ResNet(Bottleneck, [3,4,23,3])
106 | 
107 | def ResNet152():
108 |     return ResNet(Bottleneck, [3,8,36,3])
109 | 
110 | 
111 | def test():
112 |     net = ResNet18()
113 |     y = net(torch.randn(1,3,32,32))
114 |     print(y.size())
115 | 
116 | # test()
117 | 


--------------------------------------------------------------------------------
/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com


--------------------------------------------------------------------------------
/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com


--------------------------------------------------------------------------------