├── .gitignore
├── Attacks
└── Gradient_based
│ ├── fast_gradient_method.py
│ ├── least_likely_class_method.py
│ └── utils_pytorch.py
├── README.md
├── ens_adv_train.py
├── images
└── tensorboard.png
├── main_ens_adv_train.py
├── models
└── cifar10
│ ├── inception.py
│ ├── mobilenetv2_2.py
│ └── resnet.py
└── tensorboard
└── cifar10
└── adv_train
└── adv_models:-resnet18-resnet50-mobilenet_125-googlenet
├── resnet101
└── events.out.tfevents.1558584038.destc0strapp81.eu.sony.com
└── resnet34
└── events.out.tfevents.1558545149.destc0strapp81.eu.sony.com
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | __pycache__
3 | history_main
4 | checkpoints/*
5 | *.pyc
6 | *.tar
7 | *.pth
8 | *.log
9 | *.txt
10 | *.so
11 |
12 | ** temp/
13 | temp
14 | *.log
15 | *.sh
16 |
17 |
--------------------------------------------------------------------------------
/Attacks/Gradient_based/fast_gradient_method.py:
--------------------------------------------------------------------------------
1 | '''
2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py
3 |
4 | ## modification:
5 | model_fn.zero_grad()
6 | '''
7 |
8 | import numpy as np
9 | import torch
10 |
11 | from Attacks.Gradient_based.utils_pytorch import optimize_linear
12 |
13 |
14 | def fast_gradient_method(model_fn, x, eps, ord,
15 | clip_min=None, clip_max=None, y=None, targeted=False, sanity_checks=False):
16 | """
17 | PyTorch implementation of the Fast Gradient Method.
18 | :param model_fn: a callable that takes an input tensor and returns the model logits.
19 | :param x: input tensor.
20 | :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
21 | :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2.
22 | :param clip_min: (optional) float. Minimum float value for adversarial example components.
23 | :param clip_max: (optional) float. Maximum float value for adversarial example components.
24 | :param y: (optional) Tensor with true labels. If targeted is true, then provide the
25 | target label. Otherwise, only provide this parameter if you'd like to use true
26 | labels when crafting adversarial samples. Otherwise, model predictions are used
27 | as labels to avoid the "label leaking" effect (explained in this paper:
28 | https://arxiv.org/abs/1611.01236). Default is None.
29 | :param targeted: (optional) bool. Is the attack targeted or untargeted?
30 | Untargeted, the default, will try to make the label incorrect.
31 | Targeted will instead try to move in the direction of being more like y.
32 | :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
33 | memory or for unit tests that intentionally pass strange input)
34 | :return: a tensor for the adversarial example
35 | """
36 | if ord not in [np.inf, 1, 2]:
37 | raise ValueError("Norm order must be either np.inf, 1, or 2.")
38 |
39 | asserts = []
40 |
41 | # If a data range was specified, check that the input was in that range
42 | if clip_min is not None:
43 | assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype)))
44 | asserts.append(assert_ge)
45 |
46 | if clip_max is not None:
47 | assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype)))
48 | asserts.append(assert_le)
49 |
50 | # x needs to be a leaf variable, of floating point type and have requires_grad being True for
51 | # its grad to be computed and stored properly in a backward call
52 | x = x.clone().detach().to(torch.float).requires_grad_(True)
53 | if y is None:
54 | # Using model predictions as ground truth to avoid label leaking
55 | _, y = torch.max(model_fn(x), 1)
56 |
57 | # Compute loss
58 | loss_fn = torch.nn.CrossEntropyLoss()
59 | loss = loss_fn(model_fn(x), y)
60 | # If attack is targeted, minimize loss of target label rather than maximize loss of correct label
61 | if targeted:
62 | loss = -loss
63 |
64 | # Define gradient of loss wrt input
65 | model_fn.zero_grad()
66 | loss.backward()
67 | optimal_perturbation = optimize_linear(x.grad, eps, ord)
68 |
69 | # Add perturbation to original example to obtain adversarial example
70 | adv_x = x + optimal_perturbation
71 |
72 | # If clipping is needed, reset all values outside of [clip_min, clip_max]
73 | if (clip_min is not None) or (clip_max is not None):
74 | # We don't currently support one-sided clipping
75 | assert clip_min is not None and clip_max is not None
76 | adv_x = torch.clamp(adv_x, clip_min, clip_max)
77 |
78 | if sanity_checks:
79 | assert np.all(asserts)
80 | return adv_x
--------------------------------------------------------------------------------
/Attacks/Gradient_based/least_likely_class_method.py:
--------------------------------------------------------------------------------
1 | """
2 | The least_likely_class_method attack
3 | modified from fast_gradient_method
4 |
5 | """
6 | import numpy as np
7 | import torch
8 | import sys
9 |
10 | from Attacks.Gradient_based.utils_pytorch import optimize_linear
11 |
12 |
13 | def least_likely_class_method(model_fn, x, eps, ord = np.inf,
14 | clip_min=None, clip_max=None, sanity_checks=False):
15 | """
16 | PyTorch implementation of the Fast Gradient Method.
17 | :param model_fn: a callable that takes an input tensor and returns the model logits.
18 | :param x: input tensor.
19 | :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572.
20 | :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2.
21 | :param clip_min: (optional) float. Minimum float value for adversarial example components.
22 | :param clip_max: (optional) float. Maximum float value for adversarial example components.
23 | :param y: (optional) Tensor with true labels. If targeted is true, then provide the
24 | target label. Otherwise, only provide this parameter if you'd like to use true
25 | labels when crafting adversarial samples. Otherwise, model predictions are used
26 | as labels to avoid the "label leaking" effect (explained in this paper:
27 | https://arxiv.org/abs/1611.01236). Default is None.
28 | :param targeted: (optional) bool. Is the attack targeted or untargeted?
29 | Untargeted, the default, will try to make the label incorrect.
30 | Targeted will instead try to move in the direction of being more like y.
31 | :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime /
32 | memory or for unit tests that intentionally pass strange input)
33 | :return: a tensor for the adversarial example
34 | """
35 | if ord not in [np.inf, 1, 2]:
36 | raise ValueError("Norm order must be either np.inf, 1, or 2.")
37 |
38 | asserts = []
39 |
40 | # If a data range was specified, check that the input was in that range
41 | if clip_min is not None:
42 | assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype)))
43 | asserts.append(assert_ge)
44 |
45 | if clip_max is not None:
46 | assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype)))
47 | asserts.append(assert_le)
48 |
49 | # x needs to be a leaf variable, of floating point type and have requires_grad being True for
50 | # its grad to be computed and stored properly in a backward call
51 | x = x.clone().detach().to(torch.float).requires_grad_(True)
52 |
53 |
54 | # compute the least likely label
55 | _, y_ll = torch.min(model_fn(x), 1)
56 |
57 | # Compute loss
58 | loss_fn = torch.nn.CrossEntropyLoss()
59 | loss = loss_fn(model_fn(x), y_ll)
60 | # If attack is targeted, minimize loss of target label rather than maximize loss of correct label
61 | loss = -loss
62 |
63 | # Define gradient of loss wrt input
64 | model_fn.zero_grad()
65 | loss.backward()
66 | optimal_perturbation = optimize_linear(x.grad, eps, ord)
67 |
68 | # Add perturbation to original example to obtain adversarial example
69 | adv_x = x + optimal_perturbation
70 |
71 | # If clipping is needed, reset all values outside of [clip_min, clip_max]
72 | if (clip_min is not None) or (clip_max is not None):
73 | # We don't currently support one-sided clipping
74 | assert clip_min is not None and clip_max is not None
75 | adv_x = torch.clamp(adv_x, clip_min, clip_max)
76 |
77 | if sanity_checks:
78 | assert np.all(asserts)
79 | return adv_x
--------------------------------------------------------------------------------
/Attacks/Gradient_based/utils_pytorch.py:
--------------------------------------------------------------------------------
1 | """
2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py
3 | """
4 |
5 | import warnings
6 | from random import getrandbits
7 |
8 | import numpy as np
9 | # import tensorflow as tf
10 | import torch
11 | from torch.autograd import Variable
12 |
13 |
14 | # https://gist.github.com/kingspp/3ec7d9958c13b94310c1a365759aa3f4
15 | # Pyfunc Gradient Function
16 | def _py_func_with_gradient(func, inp, Tout, stateful=True, name=None,
17 | grad_func=None):
18 | """
19 | PyFunc defined as given by Tensorflow
20 | :param func: Custom Function
21 | :param inp: Function Inputs
22 | :param Tout: Ouput Type of out Custom Function
23 | :param stateful: Calculate Gradients when stateful is True
24 | :param name: Name of the PyFunction
25 | :param grad: Custom Gradient Function
26 | :return:
27 | """
28 | # Generate random name in order to avoid conflicts with inbuilt names
29 | rnd_name = 'PyFuncGrad-' + '%0x' % getrandbits(30 * 4)
30 |
31 | # Register Tensorflow Gradient
32 | tf.RegisterGradient(rnd_name)(grad_func)
33 |
34 | # Get current graph
35 | g = tf.get_default_graph()
36 |
37 | # Add gradient override map
38 | with g.gradient_override_map({"PyFunc": rnd_name,
39 | "PyFuncStateless": rnd_name}):
40 | return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
41 |
42 |
43 | def convert_pytorch_model_to_tf(model, out_dims=None):
44 | """
45 | Convert a pytorch model into a tensorflow op that allows backprop
46 | :param model: A pytorch nn.Module object
47 | :param out_dims: The number of output dimensions (classes) for the model
48 | :return: A model function that maps an input (tf.Tensor) to the
49 | output of the model (tf.Tensor)
50 | """
51 | warnings.warn("convert_pytorch_model_to_tf is deprecated, switch to"
52 | + " dedicated PyTorch support provided by CleverHans v4.")
53 |
54 | torch_state = {
55 | 'logits': None,
56 | 'x': None,
57 | }
58 | if not out_dims:
59 | out_dims = list(model.modules())[-1].out_features
60 |
61 | def _fprop_fn(x_np):
62 | """TODO: write this"""
63 | x_tensor = torch.Tensor(x_np)
64 | if torch.cuda.is_available():
65 | x_tensor = x_tensor.cuda()
66 | torch_state['x'] = Variable(x_tensor, requires_grad=True)
67 | torch_state['logits'] = model(torch_state['x'])
68 | return torch_state['logits'].data.cpu().numpy()
69 |
70 | def _bprop_fn(x_np, grads_in_np):
71 | """TODO: write this"""
72 | _fprop_fn(x_np)
73 |
74 | grads_in_tensor = torch.Tensor(grads_in_np)
75 | if torch.cuda.is_available():
76 | grads_in_tensor = grads_in_tensor.cuda()
77 |
78 | # Run our backprop through our logits to our xs
79 | loss = torch.sum(torch_state['logits'] * grads_in_tensor)
80 | loss.backward()
81 | return torch_state['x'].grad.cpu().data.numpy()
82 |
83 | def _tf_gradient_fn(op, grads_in):
84 | """TODO: write this"""
85 | return tf.py_func(_bprop_fn, [op.inputs[0], grads_in],
86 | Tout=[tf.float32])
87 |
88 | def tf_model_fn(x_op):
89 | """TODO: write this"""
90 | out = _py_func_with_gradient(_fprop_fn, [x_op], Tout=[tf.float32],
91 | stateful=True,
92 | grad_func=_tf_gradient_fn)[0]
93 | out.set_shape([None, out_dims])
94 | return out
95 |
96 | return tf_model_fn
97 |
98 |
99 | def clip_eta(eta, ord, eps):
100 | """
101 | PyTorch implementation of the clip_eta in utils_tf.
102 | :param eta: Tensor
103 | :param ord: np.inf, 1, or 2
104 | :param eps: float
105 | """
106 | if ord not in [np.inf, 1, 2]:
107 | raise ValueError('ord must be np.inf, 1, or 2.')
108 |
109 | avoid_zero_div = torch.tensor(1e-12, dtype=eta.dtype, device=eta.device)
110 | reduc_ind = list(range(1, len(eta.size())))
111 | if ord == np.inf:
112 | eta = torch.clamp(eta, -eps, eps)
113 | else:
114 | if ord == 1:
115 | # TODO
116 | # raise NotImplementedError("L1 clip is not implemented.")
117 | norm = torch.max(
118 | avoid_zero_div,
119 | torch.sum(torch.abs(eta), dim=reduc_ind, keepdim=True)
120 | )
121 | elif ord == 2:
122 | norm = torch.sqrt(torch.max(
123 | avoid_zero_div,
124 | torch.sum(eta ** 2, dim=reduc_ind, keepdim=True)
125 | ))
126 | factor = torch.min(
127 | torch.tensor(1., dtype=eta.dtype, device=eta.device),
128 | eps / norm
129 | )
130 | eta *= factor
131 | return eta
132 |
133 | def get_or_guess_labels(model, x, **kwargs):
134 | """
135 | Get the label to use in generating an adversarial example for x.
136 | The kwargs are fed directly from the kwargs of the attack.
137 | If 'y' is in kwargs, then assume it's an untargeted attack and
138 | use that as the label.
139 | If 'y_target' is in kwargs and is not none, then assume it's a
140 | targeted attack and use that as the label.
141 | Otherwise, use the model's prediction as the label and perform an
142 | untargeted attack.
143 | :param model: PyTorch model. Do not add a softmax gate to the output.
144 | :param x: Tensor, shape (N, d_1, ...).
145 | :param y: (optional) Tensor, shape (N).
146 | :param y_target: (optional) Tensor, shape (N).
147 | """
148 | if 'y' in kwargs and 'y_target' in kwargs:
149 | raise ValueError("Can not set both 'y' and 'y_target'.")
150 | if 'y' in kwargs:
151 | labels = kwargs['y']
152 | elif 'y_target' in kwargs and kwargs['y_target'] is not None:
153 | labels = kwargs['y_target']
154 | else:
155 | _, labels = torch.max(model(x), 1)
156 | return labels
157 |
158 |
159 | def optimize_linear(grad, eps, ord=np.inf):
160 | """
161 | Solves for the optimal input to a linear function under a norm constraint.
162 | Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad)
163 | :param grad: Tensor, shape (N, d_1, ...). Batch of gradients
164 | :param eps: float. Scalar specifying size of constraint region
165 | :param ord: np.inf, 1, or 2. Order of norm constraint.
166 | :returns: Tensor, shape (N, d_1, ...). Optimal perturbation
167 | """
168 |
169 | red_ind = list(range(1, len(grad.size())))
170 | avoid_zero_div = torch.tensor(1e-12, dtype=grad.dtype, device=grad.device)
171 | if ord == np.inf:
172 | # Take sign of gradient
173 | optimal_perturbation = torch.sign(grad)
174 | elif ord == 1:
175 | abs_grad = torch.abs(grad)
176 | sign = torch.sign(grad)
177 | red_ind = list(range(1, len(grad.size())))
178 | abs_grad = torch.abs(grad)
179 | ori_shape = [1]*len(grad.size())
180 | ori_shape[0] = grad.size(0)
181 |
182 | max_abs_grad, _ = torch.max(abs_grad.view(grad.size(0), -1), 1)
183 | max_mask = abs_grad.eq(max_abs_grad.view(ori_shape)).to(torch.float)
184 | num_ties = max_mask
185 | for red_scalar in red_ind:
186 | num_ties = torch.sum(num_ties, red_scalar, keepdim=True)
187 | optimal_perturbation = sign * max_mask / num_ties
188 | # TODO integrate below to a test file
189 | # check that the optimal perturbations have been correctly computed
190 | opt_pert_norm = optimal_perturbation.abs().sum(dim=red_ind)
191 | assert torch.all(opt_pert_norm == torch.ones_like(opt_pert_norm))
192 | elif ord == 2:
193 | square = torch.max(
194 | avoid_zero_div,
195 | torch.sum(grad ** 2, red_ind, keepdim=True)
196 | )
197 | optimal_perturbation = grad / torch.sqrt(square)
198 | # TODO integrate below to a test file
199 | # check that the optimal perturbations have been correctly computed
200 | opt_pert_norm = optimal_perturbation.pow(2).sum(dim=red_ind, keepdim=True).sqrt()
201 | one_mask = (square <= avoid_zero_div).to(torch.float) * opt_pert_norm + \
202 | (square > avoid_zero_div).to(torch.float)
203 | assert torch.allclose(opt_pert_norm, one_mask, rtol=1e-05, atol=1e-08)
204 | else:
205 | raise NotImplementedError("Only L-inf, L1 and L2 norms are "
206 | "currently implemented.")
207 |
208 | # Scale perturbation to be the solution for the norm=eps rather than
209 | # norm=1 problem
210 | scaled_perturbation = eps * optimal_perturbation
211 | return scaled_perturbation
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ensemble Adversarial Training
2 |
3 |
4 | This repository contains code for performing ensemble adversarial training in Pytorch.
5 |
6 |
7 | ## Reference
8 |
9 | The code takes followings as reference and has some little modifications:
10 |
11 | * Florian, Tramèr, et al. "[Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204)" arXiv preprint arXiv:1705.07204 (2018).
12 | * Alexey, Kurakin, et al. "[Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)" arXiv preprint arXiv:1611.01236 (2017).
13 | * [ftramer/ensemble-adv-training](https://github.com/ftramer/ensemble-adv-training)
14 |
15 |
16 |
17 |
18 |
19 | ## REQUIREMENTS
20 |
21 | The code was tested with Python 3.6, Pytorch 1.0.1.
22 | tensorboardX
23 | scipy
24 |
25 |
26 |
27 |
28 |
29 | ## Getting started
30 | 1. Prepare your pre-trained-static models (`Defense/models/` and `Defense/checkpoints/` )and modify the variable and directory to them accordingly in main file.
31 |
32 | ```python
33 | # adv models: the static model used to generate adv input images
34 | adv_resnet18 = ResNet18()
35 | adv_resnet50 = ResNet50()
36 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25)
37 | adv_googlenet = GoogLeNet()
38 |
39 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet]
40 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet']
41 |
42 | ```
43 |
44 |
45 | 2. Setting the models to be trained and output and log directories.
46 |
47 | ```python
48 | # models: models for be adv training
49 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2]
50 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075']
51 | params = {
52 | 'mobilenet_1': 1.0,
53 | 'mobilenet_075': 0.75,
54 | }
55 |
56 | adv_checkpoint_root_path = 'checkpoints/cifar10/'
57 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/'
58 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/'
59 | ```
60 |
61 | 3. Setting options for training schema, refered from [arXiv:1611.01236](https://arxiv.org/abs/1611.01236) and [arXiv:1705.07204](https://arxiv.org/abs/1705.07204) :
62 | * option1 : distribution for random epsilon
63 | ```
64 | arg.eps range (0,1) : fixed epsilon
65 | arg.eps = 1 : [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), favor small epsilon
66 | arg.eps = 2 : uniform distribution, even the possibility for large and small eps
67 | ```
68 | * option2: attacking method
69 | ```
70 | arg.attacker = 'stepll' : Step.L.L adv input, [arXiv:1705.07204](https://arxiv.org/abs/1705.07204)
71 | arg.attacker = 'fgsm' : fgsm adv input
72 | ```
73 | * option3: loss
74 | ```
75 | arg.loss_schema = 'averaged': no bias loss
76 | arg.loss_schema = 'weighted': [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), loss favor for clean input
77 | ```
78 |
79 |
80 |
81 |
82 | ## Simple Example
83 |
84 |
85 | | Acc. |
86 | sources |
87 | holdout_sources |
88 |
89 |
90 | | Target |
91 | resnet18 |
92 | googlenet |
93 | resnet101 |
94 | vgg_11 |
95 |
96 |
97 | | ResNet34 |
98 | 17.44 |
99 | 18.36 |
100 | 18.09 |
101 | 18.13 |
102 |
103 |
104 | | ResNet34_adv |
105 | 63.50 |
106 | 66.25 |
107 | 64.89 |
108 | 63.77 |
109 |
110 |
111 | (adv training on 200 epochs, more epochs would lead to higher Acc)
112 |
113 | ```bash
114 | CUDA_VISIBLE_DEVICES=2,3 python3 main_ens_adv_train_cifar10.py --eps 2 --attacker 'stepll' --loss_schema 'averaged' --dataset 'cifar10'
115 | ```
116 | * a resnet34 model adv training on CIFAR10,
117 | adv_generators: [resnet18, resnet50, googlenet, mobilenet]
118 | holdout models: [resnet101]
119 |
120 | * we can see from the following figure that as training the **loss decrease** (for both clean and adv images).
121 |
122 | * Specially there is violent oscillation in the adv_loss in the first half of the training procedure, which is due to we randomize the magnitude of epsilon at each batch. when epsilon is change from small to a large value, loss jump drastically, which match our expectation that **it is hard to learn from adv input**.
123 |
124 | * The oscillation of adv_loss is damped after **30k iter**, showing that as the adv training going on, the adv_loss decrease and converge and the top1 accuracy on adv image increase to around the same level as clean images, regardless the magnitude of epsilon and the randomness of the adv_models, which showing model start to learning from perturbated input.
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | ## note
134 | Make sure you have enough GPU memory to load all the pre-trained-static models.
135 |
136 |
137 | Compared with the refered [tensorflow 1.0 version](https://github.com/ftramer/ensemble-adv-training), which need to feed input images to all candidate adv generators when generating adv inputs, since the **static computational graph in TF 1.0**.
138 | This code just need to feed one adv_model (the currently selected one) for adv input generating, enjoying the advantage of **dynamic computational graph** in Pytorch, which (MAYBE) reduce the training time and allow to have more candidate-adv-generators.
139 |
--------------------------------------------------------------------------------
/ens_adv_train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import os
4 | import sys
5 | from scipy.stats import truncnorm
6 |
7 | from Attacks.Gradient_based.least_likely_class_method import least_likely_class_method
8 | from Attacks.Gradient_based.fast_gradient_method import fast_gradient_method
9 |
10 | def ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args):
11 |
12 | losses_combine = AverageMeter()
13 | top1_combine = AverageMeter()
14 | losses_clean = AverageMeter()
15 | top1_clean = AverageMeter()
16 | losses_adv = AverageMeter()
17 | top1_adv = AverageMeter()
18 |
19 | # training
20 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
21 | for i, (inputs_clean, targets_clean) in enumerate(trainloader):
22 |
23 | # inputs
24 | inputs_clean, targets_clean = inputs_clean.to(device), targets_clean.to(device)
25 | # generate adv images
26 | # in paper, clean and adv images are half to half in each batch,
27 | # but in author's github, clean and adv image are using entire batch and then the loss is averaged from loss of these two batch
28 | # when selected == len(adv_models), select the currunt state of the model
29 | # otherwise choose the corresponding static model
30 | selected = np.random.randint(len(adv_models) + 1)
31 | if selected == len(adv_models):
32 | adv_generating_model = model
33 | else:
34 | adv_generating_model = adv_models[selected]
35 | # the model generate adv should be in eval() model
36 | adv_generating_model.eval()
37 |
38 |
39 | # setting epsilon, normal it to range: [0, 1]
40 | if 0 < args.eps and args.eps < 1:
41 | # fixed epsilon
42 | eps = args.eps
43 | elif args.eps == 1 :
44 | # paper: , arXiv:1611.01236
45 | # favor small epsilon
46 | # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
47 | eps = truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8) / 255.0
48 | elif args.eps == 2 :
49 | # uniform distribution, even the possibility for large and small eps, range [2/255, 16/255]
50 | eps = np.random.randint(low = 2, high =17) / 255.0
51 |
52 |
53 | # generate adv images
54 | if args.attacker == 'stepll':
55 | # Step.L.L adv
56 | inputs_adv = least_likely_class_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1)
57 | elif args.attacker =='fgsm':
58 | # Step.L.L adv
59 | inputs_adv = fast_gradient_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1)
60 |
61 |
62 | # training
63 | ## in case that the adv_generating_model is the training model itself, clean the gradient and swith the model
64 | model.zero_grad()
65 | model.train()
66 |
67 | # clean image
68 | logits_clean = model(inputs_clean)
69 | loss1 = criterion(logits_clean, targets_clean)
70 |
71 |
72 | # adv image
73 | logits_adv = model(inputs_adv)
74 | loss2 = criterion(logits_adv, targets_clean)
75 |
76 |
77 | # combine the loss1 and loss2
78 | if args.loss_schema == 'averaged':
79 | # loss on multiple outputs
80 | # https://discuss.pytorch.org/t/a-model-with-multiple-outputs/10440
81 | loss = 0.5*(loss1 + loss2)
82 | elif args.loss_schema == 'weighted':
83 | # paper: , arXiv:1611.01236
84 | # favor for clean input
85 | loss = (1 / 1.3) (loss1 + 0.3* loss2)
86 |
87 | optimizer.zero_grad()
88 | loss.backward()
89 | optimizer.step()
90 |
91 |
92 | # print log and tensorboard
93 | # clean
94 | acc1, _ = accuracy(logits_clean, targets_clean, topk=(1,5))
95 | losses_clean.update(loss1.item(), inputs_clean.size(0))
96 | top1_clean.update(acc1[0], inputs_clean.size(0))
97 |
98 | # adv
99 | acc2, _ = accuracy(logits_adv, targets_clean, topk=(1, 5))
100 | losses_adv.update(loss2.item(), inputs_clean.size(0))
101 | top1_adv.update(acc2[0], inputs_clean.size(0))
102 |
103 | # combine
104 | acc = 0.5*(acc1[0] + acc2[0])
105 | losses_combine.update(loss.item(), inputs_clean.size(0))
106 | top1_combine.update(acc, inputs_clean.size(0))
107 |
108 | # return losses_clean, top1_clean, losses_adv, top1_adv, losses_combine, top1_combine
109 |
110 | # progress_bar(i, len(trainloader), 'Epoch: %d | clean: %.3f | Top1: %.3f | Top5: %.3f '
111 | # % (epoch, losses.avg, top1.avg, top5.avg))
112 |
113 | if i % 20 == 0:
114 | n_iter = epoch * len(trainloader) + i
115 | writer.add_scalar('Train/Loss_clean', losses_clean.val, n_iter)
116 | writer.add_scalar('Train/Loss_adv', losses_adv.val, n_iter)
117 | writer.add_scalar('Train/Losses_combine', losses_combine.val, n_iter)
118 | writer.add_scalar('Train/Prec@1_clean', top1_clean.val, n_iter)
119 | writer.add_scalar('Train/Prec@1_adv', top1_adv.val, n_iter)
120 | writer.add_scalar('Train/Prec@1_combine', top1_combine.val, n_iter)
121 | writer.add_scalar('Train/epsilon', eps, n_iter)
122 | writer.add_scalar('Train/selected', selected, n_iter)
123 |
124 |
125 |
126 | def validate(testloader, model, criterion, writer, epoch):
127 |
128 | losses = AverageMeter()
129 | top1 = AverageMeter()
130 |
131 | # switch to evaluate mode
132 | model.eval()
133 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
134 | with torch.no_grad():
135 | for i, (inputs, targets) in enumerate(testloader):
136 | # inputs
137 | inputs, targets = inputs.to(device), targets.to(device)
138 |
139 | # compute output
140 | outputs = model(inputs)
141 | loss = criterion(outputs, targets)
142 |
143 | # measure accuracy and record loss
144 | acc1, _ = accuracy(outputs, targets, topk=(1,5))
145 | losses.update(loss.item(), inputs.size(0))
146 | top1.update(acc1[0], inputs.size(0))
147 |
148 | n_iter_val = epoch * len(testloader) + i
149 | writer.add_scalar('Test/Loss_clean', losses.val, n_iter_val)
150 | writer.add_scalar('Test/Prec@1_clean', top1.val, n_iter_val)
151 |
152 | return top1.avg
153 |
154 | class AverageMeter(object):
155 | """Computes and stores the average and current value"""
156 | def __init__(self):
157 | self.reset()
158 |
159 | def reset(self):
160 | self.val = 0
161 | self.avg = 0
162 | self.sum = 0
163 | self.count = 0
164 |
165 | def update(self, val, n=1):
166 | self.val = val
167 | self.sum += val * n
168 | self.count += n
169 | self.avg = self.sum / self.count
170 |
171 |
172 |
173 | def accuracy(output, target, topk=(1,)):
174 | """Computes the accuracy over the k top predictions for the specified values of k"""
175 | with torch.no_grad():
176 | maxk = max(topk)
177 | batch_size = target.size(0)
178 |
179 | _, pred = output.topk(maxk, 1, True, True)
180 | pred = pred.t()
181 | correct = pred.eq(target.view(1, -1).expand_as(pred))
182 |
183 | res = []
184 | for k in topk:
185 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
186 | res.append(correct_k.mul_(100.0 / batch_size))
187 | return res
188 |
189 | def eps_truncnorm():
190 | # paper:
191 | # eps drawn from a truncated normal schema in interval [0, 16] with [mean=0, std=8]:
192 | # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
193 | a = 0
194 | b = 2
195 | loc = 0
196 | scale = 8
197 |
198 | return truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8)
--------------------------------------------------------------------------------
/images/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/images/tensorboard.png
--------------------------------------------------------------------------------
/main_ens_adv_train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | import torch.nn.functional as F
5 | import torch.backends.cudnn as cudnn
6 | from torchvision import datasets, transforms
7 | import torchvision
8 | import numpy as np
9 | import os
10 | import argparse
11 | import pathlib
12 | from tensorboardX import SummaryWriter
13 |
14 | import sys
15 | from ens_adv_train import ens_adv_train, validate
16 |
17 | # import models
18 | from models.cifar10.resnet import ResNet34, ResNet101, ResNet18, ResNet50
19 | from models.cifar10.mobilenetv2_2 import MobileNetV2
20 | from models.cifar10.inception import GoogLeNet
21 |
22 |
23 | parser = argparse.ArgumentParser(description='Adv Training')
24 |
25 | parser.add_argument('--dataset', default='cifar10', type=str,
26 | help='select the training dataset')
27 |
28 | parser.add_argument('--epochs', default=200, type=int, metavar='N',
29 | help='number of total epochs to run')
30 |
31 | parser.add_argument('--eps', default = 2, type=float, metavar='M',
32 | help='option1: random epsilon distribution')
33 |
34 | parser.add_argument('--attacker', default='stepll', type=str,
35 | help='option2: attacker for generating adv input')
36 |
37 | parser.add_argument('--loss_schema', default='averaged', type=str,
38 | help='option3: loss schema')
39 |
40 |
41 | # reproducible
42 | torch.manual_seed(66)
43 | np.random.seed(66)
44 |
45 |
46 | ######################################### modify accordingly ##################################################
47 | # adv models: the static model used to generate adv input images
48 | # fixed to memory for all the trainings to speed up.
49 | adv_resnet18 = ResNet18()
50 | adv_resnet50 = ResNet50()
51 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25)
52 | adv_googlenet = GoogLeNet()
53 |
54 |
55 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet]
56 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet']
57 |
58 | # models: models for be adv training
59 | # loaded only on its training to save memory.
60 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2]
61 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075']
62 | params = {
63 | 'mobilenet_1': 1.0,
64 | 'mobilenet_075': 0.75,
65 | }
66 |
67 |
68 | # path
69 | trial_name = 'adv_models:'
70 | for adv_model_name in adv_model_names:
71 | trial_name = trial_name + '-' + adv_model_name
72 | # path to pre-trained models checkpoints
73 | adv_checkpoint_path = 'checkpoints/cifar10/'
74 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/'
75 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/'
76 | ######################################### modify accordingly ##################################################
77 |
78 |
79 |
80 | if not os.path.isdir(output_path):
81 | pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
82 | if not os.path.isdir(tensorboard_path):
83 | pathlib.Path(tensorboard_path).mkdir(parents=True, exist_ok=True)
84 |
85 | def main(model_class, model_name, model_path, adv_models, writer, args):
86 | dataset = args.dataset
87 | epochs = args.epochs
88 |
89 | best_acc = 0
90 |
91 | # prepare data loader
92 | trainloader, testloader = get_data_loader(dataset)
93 |
94 | # create model
95 | if model_name in params.keys():
96 | model = model_class(params[model_name])
97 | else:
98 | model = model_class()
99 |
100 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
101 |
102 | if device == 'cuda':
103 | model = torch.nn.DataParallel(model)
104 | model = model.cuda()
105 |
106 | # optimizer
107 | criterion = nn.CrossEntropyLoss(reduction = 'mean')
108 | # paper use RMSProp but author's github use adam, here we follow the author's github
109 | optimizer = optim.Adam(model.parameters(), lr= 0.001, weight_decay=5e-4)
110 |
111 | # training
112 | for epoch in range(epochs):
113 | ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args)
114 | acc = validate(testloader, model, criterion, writer, epoch)
115 |
116 | if acc > best_acc :
117 | best_acc = acc
118 | save_checkpoint(model, model_path, optimizer, best_acc, epoch)
119 |
120 |
121 |
122 | # save model
123 | def save_checkpoint(model, model_path, optimizer, best_acc, epoch):
124 | state = {
125 | 'state_dict': model.state_dict(),
126 | 'acc': best_acc,
127 | 'epoch': epoch,
128 | 'optimizer' : optimizer.state_dict(),
129 |
130 | }
131 | torch.save(state, model_path)
132 |
133 |
134 |
135 | def get_data_loader(dataset):
136 | if dataset == 'cifar10':
137 | transform_train = transforms.Compose([
138 | transforms.RandomCrop(32, padding=4),
139 | transforms.RandomHorizontalFlip(),
140 | transforms.ToTensor(),
141 | # mean subtract
142 | # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
143 | ])
144 |
145 | transform_test = transforms.Compose([
146 | transforms.ToTensor(),
147 | # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
148 | ])
149 |
150 | trainset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=True, download=False, transform=transform_train)
151 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)
152 |
153 | testset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=False, download=False, transform=transform_test)
154 | testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
155 |
156 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
157 |
158 | elif dataset == "cinic10":
159 | cinic_directory = '/home/deliangj/data/cinic10'
160 | cinic_mean = [0, 0, 0]
161 | cinic_std = [0, 0, 0]
162 |
163 | transform_train = transforms.Compose([
164 | transforms.RandomCrop(32, padding=4),
165 | transforms.RandomHorizontalFlip(),
166 | transforms.ToTensor(),
167 | # mean subtract
168 | transforms.Normalize(mean=cinic_mean,std=cinic_std)
169 | ,])
170 |
171 | transform_test = transforms.Compose([
172 | transforms.ToTensor(),
173 | transforms.Normalize(mean=cinic_mean,std=cinic_std),
174 | ])
175 |
176 | trainset = torchvision.datasets.ImageFolder(cinic_directory + '/train', transform=transform_train)
177 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4)
178 |
179 | testset = torchvision.datasets.ImageFolder(cinic_directory + '/test', transform=transform_test)
180 | testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4)
181 | else:
182 | print('not such dataset !')
183 | return
184 |
185 | return trainloader, testloader
186 |
187 |
188 | if __name__ == '__main__':
189 |
190 | # training parameters
191 | args = parser.parse_args()
192 |
193 | # checkpoint paths
194 | model_save_paths = [output_path + model_name + '.pth.tar' for model_name in model_names]
195 | adv_model_paths = [adv_checkpoint_path + adv_model_name + '.pth.tar' for adv_model_name in adv_model_names]
196 |
197 | # load adv models
198 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
199 | if device == 'cuda':
200 | for i in range(len(adv_models)):
201 | adv_models[i] = torch.nn.DataParallel(adv_models[i])
202 | adv_models[i] = adv_models[i].cuda()
203 | # pre-trained static models !
204 | adv_models[i] = adv_models[i].eval()
205 | else:
206 | print('gpu not avaible please check !')
207 | sys.exit()
208 |
209 | # adv pre-trained static models
210 | for i in range(len(adv_model_paths)):
211 | checkpoint = torch.load(adv_model_paths[i])
212 | if 'state_dict' in checkpoint.keys():
213 | state = 'state_dict'
214 | elif 'net' in checkpoint.keys():
215 | state = 'net'
216 | adv_models[i].load_state_dict(checkpoint[state])
217 |
218 | # starting training each model
219 | for i in range(len(model_classes)):
220 | print('adv training model: ' + model_names[i])
221 | writer = SummaryWriter(tensorboard_path + model_names[i])
222 | main(model_classes[i], model_names[i], model_save_paths[i], adv_models, writer, args)
223 |
224 |
--------------------------------------------------------------------------------
/models/cifar10/inception.py:
--------------------------------------------------------------------------------
1 | '''
2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/googlenet.py
3 | '''
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 |
9 | class Inception(nn.Module):
10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
11 | super(Inception, self).__init__()
12 | # 1x1 conv branch
13 | self.b1 = nn.Sequential(
14 | nn.Conv2d(in_planes, n1x1, kernel_size=1),
15 | nn.BatchNorm2d(n1x1),
16 | nn.ReLU(True),
17 | )
18 |
19 | # 1x1 conv -> 3x3 conv branch
20 | self.b2 = nn.Sequential(
21 | nn.Conv2d(in_planes, n3x3red, kernel_size=1),
22 | nn.BatchNorm2d(n3x3red),
23 | nn.ReLU(True),
24 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
25 | nn.BatchNorm2d(n3x3),
26 | nn.ReLU(True),
27 | )
28 |
29 | # 1x1 conv -> 5x5 conv branch
30 | self.b3 = nn.Sequential(
31 | nn.Conv2d(in_planes, n5x5red, kernel_size=1),
32 | nn.BatchNorm2d(n5x5red),
33 | nn.ReLU(True),
34 | nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
35 | nn.BatchNorm2d(n5x5),
36 | nn.ReLU(True),
37 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
38 | nn.BatchNorm2d(n5x5),
39 | nn.ReLU(True),
40 | )
41 |
42 | # 3x3 pool -> 1x1 conv branch
43 | self.b4 = nn.Sequential(
44 | nn.MaxPool2d(3, stride=1, padding=1),
45 | nn.Conv2d(in_planes, pool_planes, kernel_size=1),
46 | nn.BatchNorm2d(pool_planes),
47 | nn.ReLU(True),
48 | )
49 |
50 | def forward(self, x):
51 | y1 = self.b1(x)
52 | y2 = self.b2(x)
53 | y3 = self.b3(x)
54 | y4 = self.b4(x)
55 | return torch.cat([y1,y2,y3,y4], 1)
56 |
57 |
58 | class GoogLeNet(nn.Module):
59 | def __init__(self):
60 | super(GoogLeNet, self).__init__()
61 | self.pre_layers = nn.Sequential(
62 | nn.Conv2d(3, 192, kernel_size=3, padding=1),
63 | nn.BatchNorm2d(192),
64 | nn.ReLU(True),
65 | )
66 |
67 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
68 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
69 |
70 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
71 |
72 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
73 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
74 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
75 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
76 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
77 |
78 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
79 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
80 |
81 | self.avgpool = nn.AvgPool2d(8, stride=1)
82 | self.linear = nn.Linear(1024, 10)
83 |
84 | def forward(self, x):
85 | out = self.pre_layers(x)
86 | out = self.a3(out)
87 | out = self.b3(out)
88 | out = self.maxpool(out)
89 | out = self.a4(out)
90 | out = self.b4(out)
91 | out = self.c4(out)
92 | out = self.d4(out)
93 | out = self.e4(out)
94 | out = self.maxpool(out)
95 | out = self.a5(out)
96 | out = self.b5(out)
97 | out = self.avgpool(out)
98 | out = out.view(out.size(0), -1)
99 | out = self.linear(out)
100 | return out
101 |
102 |
103 | def test():
104 | net = GoogLeNet()
105 | x = torch.randn(1,3,32,32)
106 | y = net(x)
107 | print(y.size())
--------------------------------------------------------------------------------
/models/cifar10/mobilenetv2_2.py:
--------------------------------------------------------------------------------
1 | '''
2 | https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/MobileNetV2.py
3 | make some small modification to adapt input from imagenet to cifar10.
4 | '''
5 | import torch.nn as nn
6 | import math
7 |
8 |
9 | def conv_bn(inp, oup, stride):
10 | return nn.Sequential(
11 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
12 | nn.BatchNorm2d(oup),
13 | nn.ReLU6(inplace=True)
14 | )
15 |
16 |
17 | def conv_1x1_bn(inp, oup):
18 | return nn.Sequential(
19 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
20 | nn.BatchNorm2d(oup),
21 | nn.ReLU6(inplace=True)
22 | )
23 |
24 |
25 | class InvertedResidual(nn.Module):
26 | def __init__(self, inp, oup, stride, expand_ratio):
27 | super(InvertedResidual, self).__init__()
28 | self.stride = stride
29 | assert stride in [1, 2]
30 |
31 | hidden_dim = round(inp * expand_ratio)
32 |
33 | self.use_res_connect = self.stride == 1 and inp == oup
34 |
35 | if expand_ratio == 1:
36 | self.conv = nn.Sequential(
37 | # dw
38 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
39 | nn.BatchNorm2d(hidden_dim),
40 | nn.ReLU6(inplace=True),
41 | # pw-linear
42 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
43 | nn.BatchNorm2d(oup),
44 | )
45 | else:
46 | self.conv = nn.Sequential(
47 | # pw
48 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
49 | nn.BatchNorm2d(hidden_dim),
50 | nn.ReLU6(inplace=True),
51 | # dw
52 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
53 | nn.BatchNorm2d(hidden_dim),
54 | nn.ReLU6(inplace=True),
55 | # pw-linear
56 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
57 | nn.BatchNorm2d(oup),
58 | )
59 |
60 | def forward(self, x):
61 | if self.use_res_connect:
62 | return x + self.conv(x)
63 | else:
64 | return self.conv(x)
65 |
66 |
67 | class MobileNetV2(nn.Module):
68 | def __init__(self, width_mult=1., n_class=10, input_size=32):
69 | super(MobileNetV2, self).__init__()
70 | block = InvertedResidual
71 | input_channel = 32
72 | last_channel = 1280
73 | print ('width_mult =', width_mult)
74 |
75 | interverted_residual_setting = [
76 | (1, 16, 1, 1),
77 | (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10
78 | (6, 32, 3, 2),
79 | (6, 64, 4, 2),
80 | (6, 96, 3, 1),
81 | (6, 160, 3, 2),
82 | (6, 320, 1, 1)]
83 |
84 | # building first layer
85 | assert input_size % 32 == 0
86 | ## (0.37 * 5) = 1.85; int(0.37 * 5) = 1
87 | input_channel = int(input_channel * width_mult)
88 | # we apply width multiplier to all layers except the very last convolutional layer. This improves performance for smaller models
89 | self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel
90 | self.features = [conv_bn(3, input_channel,1)]# NOTE: change conv1 stride 2 -> 1 for CIFAR10
91 | # building inverted residual blocks
92 | for t, c, n, s in interverted_residual_setting:
93 | output_channel = int(c * width_mult)
94 | for i in range(n):
95 | if i == 0:
96 | self.features.append(block(input_channel, output_channel, s, expand_ratio=t))
97 | else:
98 | self.features.append(block(input_channel, output_channel, 1, expand_ratio=t))
99 | input_channel = output_channel
100 | # building last several layers
101 | self.features.append(conv_1x1_bn(input_channel, self.last_channel))
102 | # make it nn.Sequential
103 | self.features = nn.Sequential(*self.features)
104 |
105 | # building classifier
106 | self.classifier = nn.Sequential(
107 | nn.Dropout(0.2),
108 | nn.Linear(self.last_channel, n_class),
109 | )
110 |
111 | self._initialize_weights()
112 |
113 | def forward(self, x):
114 | x = self.features(x)
115 | # torch tensor (C, H, W)
116 | x = x.mean(3).mean(2)
117 | x = self.classifier(x)
118 | return x
119 |
120 | def _initialize_weights(self):
121 | for m in self.modules():
122 | if isinstance(m, nn.Conv2d):
123 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
124 | m.weight.data.normal_(0, math.sqrt(2. / n))
125 | if m.bias is not None:
126 | m.bias.data.zero_()
127 | elif isinstance(m, nn.BatchNorm2d):
128 | m.weight.data.fill_(1)
129 | m.bias.data.zero_()
130 | elif isinstance(m, nn.Linear):
131 | n = m.weight.size(1)
132 | m.weight.data.normal_(0, 0.01)
133 | m.bias.data.zero_()
134 |
--------------------------------------------------------------------------------
/models/cifar10/resnet.py:
--------------------------------------------------------------------------------
1 | '''
2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py
3 | '''
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 |
9 | class BasicBlock(nn.Module):
10 | expansion = 1
11 |
12 | def __init__(self, in_planes, planes, stride=1):
13 | super(BasicBlock, self).__init__()
14 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
15 | self.bn1 = nn.BatchNorm2d(planes)
16 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
17 | self.bn2 = nn.BatchNorm2d(planes)
18 |
19 | self.shortcut = nn.Sequential()
20 | if stride != 1 or in_planes != self.expansion*planes:
21 | self.shortcut = nn.Sequential(
22 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
23 | nn.BatchNorm2d(self.expansion*planes)
24 | )
25 |
26 | def forward(self, x):
27 | out = F.relu(self.bn1(self.conv1(x)))
28 | out = self.bn2(self.conv2(out))
29 | out += self.shortcut(x)
30 | out = F.relu(out)
31 | return out
32 |
33 |
34 | class Bottleneck(nn.Module):
35 | expansion = 4
36 |
37 | def __init__(self, in_planes, planes, stride=1):
38 | super(Bottleneck, self).__init__()
39 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
40 | self.bn1 = nn.BatchNorm2d(planes)
41 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
42 | self.bn2 = nn.BatchNorm2d(planes)
43 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
44 | self.bn3 = nn.BatchNorm2d(self.expansion*planes)
45 |
46 | self.shortcut = nn.Sequential()
47 | if stride != 1 or in_planes != self.expansion*planes:
48 | self.shortcut = nn.Sequential(
49 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
50 | nn.BatchNorm2d(self.expansion*planes)
51 | )
52 |
53 | def forward(self, x):
54 | out = F.relu(self.bn1(self.conv1(x)))
55 | out = F.relu(self.bn2(self.conv2(out)))
56 | out = self.bn3(self.conv3(out))
57 | out += self.shortcut(x)
58 | out = F.relu(out)
59 | return out
60 |
61 |
62 | class ResNet(nn.Module):
63 | def __init__(self, block, num_blocks, num_classes=10):
64 | super(ResNet, self).__init__()
65 | self.in_planes = 64
66 |
67 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
68 | self.bn1 = nn.BatchNorm2d(64)
69 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
70 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
71 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
72 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
73 | self.linear = nn.Linear(512*block.expansion, num_classes)
74 |
75 | def _make_layer(self, block, planes, num_blocks, stride):
76 | strides = [stride] + [1]*(num_blocks-1)
77 | layers = []
78 | for stride in strides:
79 | layers.append(block(self.in_planes, planes, stride))
80 | self.in_planes = planes * block.expansion
81 | return nn.Sequential(*layers)
82 |
83 | def forward(self, x):
84 | out = F.relu(self.bn1(self.conv1(x)))
85 | out = self.layer1(out)
86 | out = self.layer2(out)
87 | out = self.layer3(out)
88 | out = self.layer4(out)
89 | out = F.avg_pool2d(out, 4)
90 | out = out.view(out.size(0), -1)
91 | out = self.linear(out)
92 | return out
93 |
94 |
95 | def ResNet18():
96 | return ResNet(BasicBlock, [2,2,2,2])
97 |
98 | def ResNet34():
99 | return ResNet(BasicBlock, [3,4,6,3])
100 |
101 | def ResNet50():
102 | return ResNet(Bottleneck, [3,4,6,3])
103 |
104 | def ResNet101():
105 | return ResNet(Bottleneck, [3,4,23,3])
106 |
107 | def ResNet152():
108 | return ResNet(Bottleneck, [3,8,36,3])
109 |
110 |
111 | def test():
112 | net = ResNet18()
113 | y = net(torch.randn(1,3,32,32))
114 | print(y.size())
115 |
116 | # test()
117 |
--------------------------------------------------------------------------------
/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com
--------------------------------------------------------------------------------
/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com
--------------------------------------------------------------------------------