├── .gitignore ├── Attacks └── Gradient_based │ ├── fast_gradient_method.py │ ├── least_likely_class_method.py │ └── utils_pytorch.py ├── README.md ├── ens_adv_train.py ├── images └── tensorboard.png ├── main_ens_adv_train.py ├── models └── cifar10 │ ├── inception.py │ ├── mobilenetv2_2.py │ └── resnet.py └── tensorboard └── cifar10 └── adv_train └── adv_models:-resnet18-resnet50-mobilenet_125-googlenet ├── resnet101 └── events.out.tfevents.1558584038.destc0strapp81.eu.sony.com └── resnet34 └── events.out.tfevents.1558545149.destc0strapp81.eu.sony.com /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | history_main 4 | checkpoints/* 5 | *.pyc 6 | *.tar 7 | *.pth 8 | *.log 9 | *.txt 10 | *.so 11 | 12 | ** temp/ 13 | temp 14 | *.log 15 | *.sh 16 | 17 | -------------------------------------------------------------------------------- /Attacks/Gradient_based/fast_gradient_method.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py 3 | 4 | ## modification: 5 | model_fn.zero_grad() 6 | ''' 7 | 8 | import numpy as np 9 | import torch 10 | 11 | from Attacks.Gradient_based.utils_pytorch import optimize_linear 12 | 13 | 14 | def fast_gradient_method(model_fn, x, eps, ord, 15 | clip_min=None, clip_max=None, y=None, targeted=False, sanity_checks=False): 16 | """ 17 | PyTorch implementation of the Fast Gradient Method. 18 | :param model_fn: a callable that takes an input tensor and returns the model logits. 19 | :param x: input tensor. 20 | :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572. 21 | :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2. 22 | :param clip_min: (optional) float. Minimum float value for adversarial example components. 23 | :param clip_max: (optional) float. Maximum float value for adversarial example components. 24 | :param y: (optional) Tensor with true labels. If targeted is true, then provide the 25 | target label. Otherwise, only provide this parameter if you'd like to use true 26 | labels when crafting adversarial samples. Otherwise, model predictions are used 27 | as labels to avoid the "label leaking" effect (explained in this paper: 28 | https://arxiv.org/abs/1611.01236). Default is None. 29 | :param targeted: (optional) bool. Is the attack targeted or untargeted? 30 | Untargeted, the default, will try to make the label incorrect. 31 | Targeted will instead try to move in the direction of being more like y. 32 | :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime / 33 | memory or for unit tests that intentionally pass strange input) 34 | :return: a tensor for the adversarial example 35 | """ 36 | if ord not in [np.inf, 1, 2]: 37 | raise ValueError("Norm order must be either np.inf, 1, or 2.") 38 | 39 | asserts = [] 40 | 41 | # If a data range was specified, check that the input was in that range 42 | if clip_min is not None: 43 | assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype))) 44 | asserts.append(assert_ge) 45 | 46 | if clip_max is not None: 47 | assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype))) 48 | asserts.append(assert_le) 49 | 50 | # x needs to be a leaf variable, of floating point type and have requires_grad being True for 51 | # its grad to be computed and stored properly in a backward call 52 | x = x.clone().detach().to(torch.float).requires_grad_(True) 53 | if y is None: 54 | # Using model predictions as ground truth to avoid label leaking 55 | _, y = torch.max(model_fn(x), 1) 56 | 57 | # Compute loss 58 | loss_fn = torch.nn.CrossEntropyLoss() 59 | loss = loss_fn(model_fn(x), y) 60 | # If attack is targeted, minimize loss of target label rather than maximize loss of correct label 61 | if targeted: 62 | loss = -loss 63 | 64 | # Define gradient of loss wrt input 65 | model_fn.zero_grad() 66 | loss.backward() 67 | optimal_perturbation = optimize_linear(x.grad, eps, ord) 68 | 69 | # Add perturbation to original example to obtain adversarial example 70 | adv_x = x + optimal_perturbation 71 | 72 | # If clipping is needed, reset all values outside of [clip_min, clip_max] 73 | if (clip_min is not None) or (clip_max is not None): 74 | # We don't currently support one-sided clipping 75 | assert clip_min is not None and clip_max is not None 76 | adv_x = torch.clamp(adv_x, clip_min, clip_max) 77 | 78 | if sanity_checks: 79 | assert np.all(asserts) 80 | return adv_x -------------------------------------------------------------------------------- /Attacks/Gradient_based/least_likely_class_method.py: -------------------------------------------------------------------------------- 1 | """ 2 | The least_likely_class_method attack 3 | modified from fast_gradient_method 4 | 5 | """ 6 | import numpy as np 7 | import torch 8 | import sys 9 | 10 | from Attacks.Gradient_based.utils_pytorch import optimize_linear 11 | 12 | 13 | def least_likely_class_method(model_fn, x, eps, ord = np.inf, 14 | clip_min=None, clip_max=None, sanity_checks=False): 15 | """ 16 | PyTorch implementation of the Fast Gradient Method. 17 | :param model_fn: a callable that takes an input tensor and returns the model logits. 18 | :param x: input tensor. 19 | :param eps: epsilon (input variation parameter); see https://arxiv.org/abs/1412.6572. 20 | :param ord: Order of the norm (mimics NumPy). Possible values: np.inf, 1 or 2. 21 | :param clip_min: (optional) float. Minimum float value for adversarial example components. 22 | :param clip_max: (optional) float. Maximum float value for adversarial example components. 23 | :param y: (optional) Tensor with true labels. If targeted is true, then provide the 24 | target label. Otherwise, only provide this parameter if you'd like to use true 25 | labels when crafting adversarial samples. Otherwise, model predictions are used 26 | as labels to avoid the "label leaking" effect (explained in this paper: 27 | https://arxiv.org/abs/1611.01236). Default is None. 28 | :param targeted: (optional) bool. Is the attack targeted or untargeted? 29 | Untargeted, the default, will try to make the label incorrect. 30 | Targeted will instead try to move in the direction of being more like y. 31 | :param sanity_checks: bool, if True, include asserts (Turn them off to use less runtime / 32 | memory or for unit tests that intentionally pass strange input) 33 | :return: a tensor for the adversarial example 34 | """ 35 | if ord not in [np.inf, 1, 2]: 36 | raise ValueError("Norm order must be either np.inf, 1, or 2.") 37 | 38 | asserts = [] 39 | 40 | # If a data range was specified, check that the input was in that range 41 | if clip_min is not None: 42 | assert_ge = torch.all(torch.ge(x, torch.tensor(clip_min, device=x.device, dtype=x.dtype))) 43 | asserts.append(assert_ge) 44 | 45 | if clip_max is not None: 46 | assert_le = torch.all(torch.le(x, torch.tensor(clip_max, device=x.device, dtype=x.dtype))) 47 | asserts.append(assert_le) 48 | 49 | # x needs to be a leaf variable, of floating point type and have requires_grad being True for 50 | # its grad to be computed and stored properly in a backward call 51 | x = x.clone().detach().to(torch.float).requires_grad_(True) 52 | 53 | 54 | # compute the least likely label 55 | _, y_ll = torch.min(model_fn(x), 1) 56 | 57 | # Compute loss 58 | loss_fn = torch.nn.CrossEntropyLoss() 59 | loss = loss_fn(model_fn(x), y_ll) 60 | # If attack is targeted, minimize loss of target label rather than maximize loss of correct label 61 | loss = -loss 62 | 63 | # Define gradient of loss wrt input 64 | model_fn.zero_grad() 65 | loss.backward() 66 | optimal_perturbation = optimize_linear(x.grad, eps, ord) 67 | 68 | # Add perturbation to original example to obtain adversarial example 69 | adv_x = x + optimal_perturbation 70 | 71 | # If clipping is needed, reset all values outside of [clip_min, clip_max] 72 | if (clip_min is not None) or (clip_max is not None): 73 | # We don't currently support one-sided clipping 74 | assert clip_min is not None and clip_max is not None 75 | adv_x = torch.clamp(adv_x, clip_min, clip_max) 76 | 77 | if sanity_checks: 78 | assert np.all(asserts) 79 | return adv_x -------------------------------------------------------------------------------- /Attacks/Gradient_based/utils_pytorch.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/utils_pytorch.py 3 | """ 4 | 5 | import warnings 6 | from random import getrandbits 7 | 8 | import numpy as np 9 | # import tensorflow as tf 10 | import torch 11 | from torch.autograd import Variable 12 | 13 | 14 | # https://gist.github.com/kingspp/3ec7d9958c13b94310c1a365759aa3f4 15 | # Pyfunc Gradient Function 16 | def _py_func_with_gradient(func, inp, Tout, stateful=True, name=None, 17 | grad_func=None): 18 | """ 19 | PyFunc defined as given by Tensorflow 20 | :param func: Custom Function 21 | :param inp: Function Inputs 22 | :param Tout: Ouput Type of out Custom Function 23 | :param stateful: Calculate Gradients when stateful is True 24 | :param name: Name of the PyFunction 25 | :param grad: Custom Gradient Function 26 | :return: 27 | """ 28 | # Generate random name in order to avoid conflicts with inbuilt names 29 | rnd_name = 'PyFuncGrad-' + '%0x' % getrandbits(30 * 4) 30 | 31 | # Register Tensorflow Gradient 32 | tf.RegisterGradient(rnd_name)(grad_func) 33 | 34 | # Get current graph 35 | g = tf.get_default_graph() 36 | 37 | # Add gradient override map 38 | with g.gradient_override_map({"PyFunc": rnd_name, 39 | "PyFuncStateless": rnd_name}): 40 | return tf.py_func(func, inp, Tout, stateful=stateful, name=name) 41 | 42 | 43 | def convert_pytorch_model_to_tf(model, out_dims=None): 44 | """ 45 | Convert a pytorch model into a tensorflow op that allows backprop 46 | :param model: A pytorch nn.Module object 47 | :param out_dims: The number of output dimensions (classes) for the model 48 | :return: A model function that maps an input (tf.Tensor) to the 49 | output of the model (tf.Tensor) 50 | """ 51 | warnings.warn("convert_pytorch_model_to_tf is deprecated, switch to" 52 | + " dedicated PyTorch support provided by CleverHans v4.") 53 | 54 | torch_state = { 55 | 'logits': None, 56 | 'x': None, 57 | } 58 | if not out_dims: 59 | out_dims = list(model.modules())[-1].out_features 60 | 61 | def _fprop_fn(x_np): 62 | """TODO: write this""" 63 | x_tensor = torch.Tensor(x_np) 64 | if torch.cuda.is_available(): 65 | x_tensor = x_tensor.cuda() 66 | torch_state['x'] = Variable(x_tensor, requires_grad=True) 67 | torch_state['logits'] = model(torch_state['x']) 68 | return torch_state['logits'].data.cpu().numpy() 69 | 70 | def _bprop_fn(x_np, grads_in_np): 71 | """TODO: write this""" 72 | _fprop_fn(x_np) 73 | 74 | grads_in_tensor = torch.Tensor(grads_in_np) 75 | if torch.cuda.is_available(): 76 | grads_in_tensor = grads_in_tensor.cuda() 77 | 78 | # Run our backprop through our logits to our xs 79 | loss = torch.sum(torch_state['logits'] * grads_in_tensor) 80 | loss.backward() 81 | return torch_state['x'].grad.cpu().data.numpy() 82 | 83 | def _tf_gradient_fn(op, grads_in): 84 | """TODO: write this""" 85 | return tf.py_func(_bprop_fn, [op.inputs[0], grads_in], 86 | Tout=[tf.float32]) 87 | 88 | def tf_model_fn(x_op): 89 | """TODO: write this""" 90 | out = _py_func_with_gradient(_fprop_fn, [x_op], Tout=[tf.float32], 91 | stateful=True, 92 | grad_func=_tf_gradient_fn)[0] 93 | out.set_shape([None, out_dims]) 94 | return out 95 | 96 | return tf_model_fn 97 | 98 | 99 | def clip_eta(eta, ord, eps): 100 | """ 101 | PyTorch implementation of the clip_eta in utils_tf. 102 | :param eta: Tensor 103 | :param ord: np.inf, 1, or 2 104 | :param eps: float 105 | """ 106 | if ord not in [np.inf, 1, 2]: 107 | raise ValueError('ord must be np.inf, 1, or 2.') 108 | 109 | avoid_zero_div = torch.tensor(1e-12, dtype=eta.dtype, device=eta.device) 110 | reduc_ind = list(range(1, len(eta.size()))) 111 | if ord == np.inf: 112 | eta = torch.clamp(eta, -eps, eps) 113 | else: 114 | if ord == 1: 115 | # TODO 116 | # raise NotImplementedError("L1 clip is not implemented.") 117 | norm = torch.max( 118 | avoid_zero_div, 119 | torch.sum(torch.abs(eta), dim=reduc_ind, keepdim=True) 120 | ) 121 | elif ord == 2: 122 | norm = torch.sqrt(torch.max( 123 | avoid_zero_div, 124 | torch.sum(eta ** 2, dim=reduc_ind, keepdim=True) 125 | )) 126 | factor = torch.min( 127 | torch.tensor(1., dtype=eta.dtype, device=eta.device), 128 | eps / norm 129 | ) 130 | eta *= factor 131 | return eta 132 | 133 | def get_or_guess_labels(model, x, **kwargs): 134 | """ 135 | Get the label to use in generating an adversarial example for x. 136 | The kwargs are fed directly from the kwargs of the attack. 137 | If 'y' is in kwargs, then assume it's an untargeted attack and 138 | use that as the label. 139 | If 'y_target' is in kwargs and is not none, then assume it's a 140 | targeted attack and use that as the label. 141 | Otherwise, use the model's prediction as the label and perform an 142 | untargeted attack. 143 | :param model: PyTorch model. Do not add a softmax gate to the output. 144 | :param x: Tensor, shape (N, d_1, ...). 145 | :param y: (optional) Tensor, shape (N). 146 | :param y_target: (optional) Tensor, shape (N). 147 | """ 148 | if 'y' in kwargs and 'y_target' in kwargs: 149 | raise ValueError("Can not set both 'y' and 'y_target'.") 150 | if 'y' in kwargs: 151 | labels = kwargs['y'] 152 | elif 'y_target' in kwargs and kwargs['y_target'] is not None: 153 | labels = kwargs['y_target'] 154 | else: 155 | _, labels = torch.max(model(x), 1) 156 | return labels 157 | 158 | 159 | def optimize_linear(grad, eps, ord=np.inf): 160 | """ 161 | Solves for the optimal input to a linear function under a norm constraint. 162 | Optimal_perturbation = argmax_{eta, ||eta||_{ord} < eps} dot(eta, grad) 163 | :param grad: Tensor, shape (N, d_1, ...). Batch of gradients 164 | :param eps: float. Scalar specifying size of constraint region 165 | :param ord: np.inf, 1, or 2. Order of norm constraint. 166 | :returns: Tensor, shape (N, d_1, ...). Optimal perturbation 167 | """ 168 | 169 | red_ind = list(range(1, len(grad.size()))) 170 | avoid_zero_div = torch.tensor(1e-12, dtype=grad.dtype, device=grad.device) 171 | if ord == np.inf: 172 | # Take sign of gradient 173 | optimal_perturbation = torch.sign(grad) 174 | elif ord == 1: 175 | abs_grad = torch.abs(grad) 176 | sign = torch.sign(grad) 177 | red_ind = list(range(1, len(grad.size()))) 178 | abs_grad = torch.abs(grad) 179 | ori_shape = [1]*len(grad.size()) 180 | ori_shape[0] = grad.size(0) 181 | 182 | max_abs_grad, _ = torch.max(abs_grad.view(grad.size(0), -1), 1) 183 | max_mask = abs_grad.eq(max_abs_grad.view(ori_shape)).to(torch.float) 184 | num_ties = max_mask 185 | for red_scalar in red_ind: 186 | num_ties = torch.sum(num_ties, red_scalar, keepdim=True) 187 | optimal_perturbation = sign * max_mask / num_ties 188 | # TODO integrate below to a test file 189 | # check that the optimal perturbations have been correctly computed 190 | opt_pert_norm = optimal_perturbation.abs().sum(dim=red_ind) 191 | assert torch.all(opt_pert_norm == torch.ones_like(opt_pert_norm)) 192 | elif ord == 2: 193 | square = torch.max( 194 | avoid_zero_div, 195 | torch.sum(grad ** 2, red_ind, keepdim=True) 196 | ) 197 | optimal_perturbation = grad / torch.sqrt(square) 198 | # TODO integrate below to a test file 199 | # check that the optimal perturbations have been correctly computed 200 | opt_pert_norm = optimal_perturbation.pow(2).sum(dim=red_ind, keepdim=True).sqrt() 201 | one_mask = (square <= avoid_zero_div).to(torch.float) * opt_pert_norm + \ 202 | (square > avoid_zero_div).to(torch.float) 203 | assert torch.allclose(opt_pert_norm, one_mask, rtol=1e-05, atol=1e-08) 204 | else: 205 | raise NotImplementedError("Only L-inf, L1 and L2 norms are " 206 | "currently implemented.") 207 | 208 | # Scale perturbation to be the solution for the norm=eps rather than 209 | # norm=1 problem 210 | scaled_perturbation = eps * optimal_perturbation 211 | return scaled_perturbation -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ensemble Adversarial Training 2 | 3 | 4 | This repository contains code for performing ensemble adversarial training in Pytorch. 5 | 6 | 7 | ## Reference 8 | 9 | The code takes followings as reference and has some little modifications: 10 | 11 | * Florian, Tramèr, et al. "[Ensemble Adversarial Training: Attacks and Defenses](https://arxiv.org/abs/1705.07204)" arXiv preprint arXiv:1705.07204 (2018). 12 | * Alexey, Kurakin, et al. "[Adversarial Machine Learning at Scale](https://arxiv.org/abs/1611.01236)" arXiv preprint arXiv:1611.01236 (2017). 13 | * [ftramer/ensemble-adv-training](https://github.com/ftramer/ensemble-adv-training) 14 | 15 | 16 | 17 |
18 | 19 | ## REQUIREMENTS 20 | 21 | The code was tested with Python 3.6, Pytorch 1.0.1. 22 | tensorboardX 23 | scipy 24 | 25 | 26 |
27 | 28 | 29 | ## Getting started 30 | 1. Prepare your pre-trained-static models (`Defense/models/` and `Defense/checkpoints/` )and modify the variable and directory to them accordingly in main file. 31 | 32 | ```python 33 | # adv models: the static model used to generate adv input images 34 | adv_resnet18 = ResNet18() 35 | adv_resnet50 = ResNet50() 36 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25) 37 | adv_googlenet = GoogLeNet() 38 | 39 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet] 40 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet'] 41 | 42 | ``` 43 | 44 | 45 | 2. Setting the models to be trained and output and log directories. 46 | 47 | ```python 48 | # models: models for be adv training 49 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2] 50 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075'] 51 | params = { 52 | 'mobilenet_1': 1.0, 53 | 'mobilenet_075': 0.75, 54 | } 55 | 56 | adv_checkpoint_root_path = 'checkpoints/cifar10/' 57 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/' 58 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/' 59 | ``` 60 | 61 | 3. Setting options for training schema, refered from [arXiv:1611.01236](https://arxiv.org/abs/1611.01236) and [arXiv:1705.07204](https://arxiv.org/abs/1705.07204) : 62 | * option1 : distribution for random epsilon 63 | ``` 64 | arg.eps range (0,1) : fixed epsilon 65 | arg.eps = 1 : [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), favor small epsilon 66 | arg.eps = 2 : uniform distribution, even the possibility for large and small eps 67 | ``` 68 | * option2: attacking method 69 | ``` 70 | arg.attacker = 'stepll' : Step.L.L adv input, [arXiv:1705.07204](https://arxiv.org/abs/1705.07204) 71 | arg.attacker = 'fgsm' : fgsm adv input 72 | ``` 73 | * option3: loss 74 | ``` 75 | arg.loss_schema = 'averaged': no bias loss 76 | arg.loss_schema = 'weighted': [arXiv:1611.01236](https://arxiv.org/abs/1611.01236), loss favor for clean input 77 | ``` 78 | 79 |
80 | 81 | 82 | ## Simple Example 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 |
Acc.sourcesholdout_sources
Targetresnet18googlenetresnet101vgg_11
ResNet3417.4418.3618.0918.13
ResNet34_adv63.5066.2564.8963.77
111 |
(adv training on 200 epochs, more epochs would lead to higher Acc)
112 | 113 | ```bash 114 | CUDA_VISIBLE_DEVICES=2,3 python3 main_ens_adv_train_cifar10.py --eps 2 --attacker 'stepll' --loss_schema 'averaged' --dataset 'cifar10' 115 | ``` 116 | * a resnet34 model adv training on CIFAR10, 117 | adv_generators: [resnet18, resnet50, googlenet, mobilenet] 118 | holdout models: [resnet101] 119 | 120 | * we can see from the following figure that as training the **loss decrease** (for both clean and adv images). 121 | 122 | * Specially there is violent oscillation in the adv_loss in the first half of the training procedure, which is due to we randomize the magnitude of epsilon at each batch. when epsilon is change from small to a large value, loss jump drastically, which match our expectation that **it is hard to learn from adv input**. 123 | 124 | * The oscillation of adv_loss is damped after **30k iter**, showing that as the adv training going on, the adv_loss decrease and converge and the top1 accuracy on adv image increase to around the same level as clean images, regardless the magnitude of epsilon and the randomness of the adv_models, which showing model start to learning from perturbated input. 125 | 126 |

127 | 128 |

129 | 130 |
131 | 132 | 133 | ## note 134 | Make sure you have enough GPU memory to load all the pre-trained-static models. 135 |
136 |
137 | Compared with the refered [tensorflow 1.0 version](https://github.com/ftramer/ensemble-adv-training), which need to feed input images to all candidate adv generators when generating adv inputs, since the **static computational graph in TF 1.0**.
138 | This code just need to feed one adv_model (the currently selected one) for adv input generating, enjoying the advantage of **dynamic computational graph** in Pytorch, which (MAYBE) reduce the training time and allow to have more candidate-adv-generators. 139 | -------------------------------------------------------------------------------- /ens_adv_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | import sys 5 | from scipy.stats import truncnorm 6 | 7 | from Attacks.Gradient_based.least_likely_class_method import least_likely_class_method 8 | from Attacks.Gradient_based.fast_gradient_method import fast_gradient_method 9 | 10 | def ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args): 11 | 12 | losses_combine = AverageMeter() 13 | top1_combine = AverageMeter() 14 | losses_clean = AverageMeter() 15 | top1_clean = AverageMeter() 16 | losses_adv = AverageMeter() 17 | top1_adv = AverageMeter() 18 | 19 | # training 20 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 21 | for i, (inputs_clean, targets_clean) in enumerate(trainloader): 22 | 23 | # inputs 24 | inputs_clean, targets_clean = inputs_clean.to(device), targets_clean.to(device) 25 | # generate adv images 26 | # in paper, clean and adv images are half to half in each batch, 27 | # but in author's github, clean and adv image are using entire batch and then the loss is averaged from loss of these two batch 28 | # when selected == len(adv_models), select the currunt state of the model 29 | # otherwise choose the corresponding static model 30 | selected = np.random.randint(len(adv_models) + 1) 31 | if selected == len(adv_models): 32 | adv_generating_model = model 33 | else: 34 | adv_generating_model = adv_models[selected] 35 | # the model generate adv should be in eval() model 36 | adv_generating_model.eval() 37 | 38 | 39 | # setting epsilon, normal it to range: [0, 1] 40 | if 0 < args.eps and args.eps < 1: 41 | # fixed epsilon 42 | eps = args.eps 43 | elif args.eps == 1 : 44 | # paper: , arXiv:1611.01236 45 | # favor small epsilon 46 | # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std 47 | eps = truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8) / 255.0 48 | elif args.eps == 2 : 49 | # uniform distribution, even the possibility for large and small eps, range [2/255, 16/255] 50 | eps = np.random.randint(low = 2, high =17) / 255.0 51 | 52 | 53 | # generate adv images 54 | if args.attacker == 'stepll': 55 | # Step.L.L adv 56 | inputs_adv = least_likely_class_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1) 57 | elif args.attacker =='fgsm': 58 | # Step.L.L adv 59 | inputs_adv = fast_gradient_method(adv_generating_model, inputs_clean, eps, clip_min= 0, clip_max= 1) 60 | 61 | 62 | # training 63 | ## in case that the adv_generating_model is the training model itself, clean the gradient and swith the model 64 | model.zero_grad() 65 | model.train() 66 | 67 | # clean image 68 | logits_clean = model(inputs_clean) 69 | loss1 = criterion(logits_clean, targets_clean) 70 | 71 | 72 | # adv image 73 | logits_adv = model(inputs_adv) 74 | loss2 = criterion(logits_adv, targets_clean) 75 | 76 | 77 | # combine the loss1 and loss2 78 | if args.loss_schema == 'averaged': 79 | # loss on multiple outputs 80 | # https://discuss.pytorch.org/t/a-model-with-multiple-outputs/10440 81 | loss = 0.5*(loss1 + loss2) 82 | elif args.loss_schema == 'weighted': 83 | # paper: , arXiv:1611.01236 84 | # favor for clean input 85 | loss = (1 / 1.3) (loss1 + 0.3* loss2) 86 | 87 | optimizer.zero_grad() 88 | loss.backward() 89 | optimizer.step() 90 | 91 | 92 | # print log and tensorboard 93 | # clean 94 | acc1, _ = accuracy(logits_clean, targets_clean, topk=(1,5)) 95 | losses_clean.update(loss1.item(), inputs_clean.size(0)) 96 | top1_clean.update(acc1[0], inputs_clean.size(0)) 97 | 98 | # adv 99 | acc2, _ = accuracy(logits_adv, targets_clean, topk=(1, 5)) 100 | losses_adv.update(loss2.item(), inputs_clean.size(0)) 101 | top1_adv.update(acc2[0], inputs_clean.size(0)) 102 | 103 | # combine 104 | acc = 0.5*(acc1[0] + acc2[0]) 105 | losses_combine.update(loss.item(), inputs_clean.size(0)) 106 | top1_combine.update(acc, inputs_clean.size(0)) 107 | 108 | # return losses_clean, top1_clean, losses_adv, top1_adv, losses_combine, top1_combine 109 | 110 | # progress_bar(i, len(trainloader), 'Epoch: %d | clean: %.3f | Top1: %.3f | Top5: %.3f ' 111 | # % (epoch, losses.avg, top1.avg, top5.avg)) 112 | 113 | if i % 20 == 0: 114 | n_iter = epoch * len(trainloader) + i 115 | writer.add_scalar('Train/Loss_clean', losses_clean.val, n_iter) 116 | writer.add_scalar('Train/Loss_adv', losses_adv.val, n_iter) 117 | writer.add_scalar('Train/Losses_combine', losses_combine.val, n_iter) 118 | writer.add_scalar('Train/Prec@1_clean', top1_clean.val, n_iter) 119 | writer.add_scalar('Train/Prec@1_adv', top1_adv.val, n_iter) 120 | writer.add_scalar('Train/Prec@1_combine', top1_combine.val, n_iter) 121 | writer.add_scalar('Train/epsilon', eps, n_iter) 122 | writer.add_scalar('Train/selected', selected, n_iter) 123 | 124 | 125 | 126 | def validate(testloader, model, criterion, writer, epoch): 127 | 128 | losses = AverageMeter() 129 | top1 = AverageMeter() 130 | 131 | # switch to evaluate mode 132 | model.eval() 133 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 134 | with torch.no_grad(): 135 | for i, (inputs, targets) in enumerate(testloader): 136 | # inputs 137 | inputs, targets = inputs.to(device), targets.to(device) 138 | 139 | # compute output 140 | outputs = model(inputs) 141 | loss = criterion(outputs, targets) 142 | 143 | # measure accuracy and record loss 144 | acc1, _ = accuracy(outputs, targets, topk=(1,5)) 145 | losses.update(loss.item(), inputs.size(0)) 146 | top1.update(acc1[0], inputs.size(0)) 147 | 148 | n_iter_val = epoch * len(testloader) + i 149 | writer.add_scalar('Test/Loss_clean', losses.val, n_iter_val) 150 | writer.add_scalar('Test/Prec@1_clean', top1.val, n_iter_val) 151 | 152 | return top1.avg 153 | 154 | class AverageMeter(object): 155 | """Computes and stores the average and current value""" 156 | def __init__(self): 157 | self.reset() 158 | 159 | def reset(self): 160 | self.val = 0 161 | self.avg = 0 162 | self.sum = 0 163 | self.count = 0 164 | 165 | def update(self, val, n=1): 166 | self.val = val 167 | self.sum += val * n 168 | self.count += n 169 | self.avg = self.sum / self.count 170 | 171 | 172 | 173 | def accuracy(output, target, topk=(1,)): 174 | """Computes the accuracy over the k top predictions for the specified values of k""" 175 | with torch.no_grad(): 176 | maxk = max(topk) 177 | batch_size = target.size(0) 178 | 179 | _, pred = output.topk(maxk, 1, True, True) 180 | pred = pred.t() 181 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 182 | 183 | res = [] 184 | for k in topk: 185 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 186 | res.append(correct_k.mul_(100.0 / batch_size)) 187 | return res 188 | 189 | def eps_truncnorm(): 190 | # paper: 191 | # eps drawn from a truncated normal schema in interval [0, 16] with [mean=0, std=8]: 192 | # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std 193 | a = 0 194 | b = 2 195 | loc = 0 196 | scale = 8 197 | 198 | return truncnorm.rvs(a = 0, b = 2, loc = 0, scale = 8) -------------------------------------------------------------------------------- /images/tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/images/tensorboard.png -------------------------------------------------------------------------------- /main_ens_adv_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torch.backends.cudnn as cudnn 6 | from torchvision import datasets, transforms 7 | import torchvision 8 | import numpy as np 9 | import os 10 | import argparse 11 | import pathlib 12 | from tensorboardX import SummaryWriter 13 | 14 | import sys 15 | from ens_adv_train import ens_adv_train, validate 16 | 17 | # import models 18 | from models.cifar10.resnet import ResNet34, ResNet101, ResNet18, ResNet50 19 | from models.cifar10.mobilenetv2_2 import MobileNetV2 20 | from models.cifar10.inception import GoogLeNet 21 | 22 | 23 | parser = argparse.ArgumentParser(description='Adv Training') 24 | 25 | parser.add_argument('--dataset', default='cifar10', type=str, 26 | help='select the training dataset') 27 | 28 | parser.add_argument('--epochs', default=200, type=int, metavar='N', 29 | help='number of total epochs to run') 30 | 31 | parser.add_argument('--eps', default = 2, type=float, metavar='M', 32 | help='option1: random epsilon distribution') 33 | 34 | parser.add_argument('--attacker', default='stepll', type=str, 35 | help='option2: attacker for generating adv input') 36 | 37 | parser.add_argument('--loss_schema', default='averaged', type=str, 38 | help='option3: loss schema') 39 | 40 | 41 | # reproducible 42 | torch.manual_seed(66) 43 | np.random.seed(66) 44 | 45 | 46 | ######################################### modify accordingly ################################################## 47 | # adv models: the static model used to generate adv input images 48 | # fixed to memory for all the trainings to speed up. 49 | adv_resnet18 = ResNet18() 50 | adv_resnet50 = ResNet50() 51 | adv_mobilenet_125 = MobileNetV2(width_mult=1.25) 52 | adv_googlenet = GoogLeNet() 53 | 54 | 55 | adv_models = [adv_resnet18, adv_resnet50, adv_mobilenet_125, adv_googlenet] 56 | adv_model_names = ['resnet18', 'resnet50', 'mobilenet_125', 'googlenet'] 57 | 58 | # models: models for be adv training 59 | # loaded only on its training to save memory. 60 | model_classes = [ ResNet34, ResNet101, MobileNetV2, MobileNetV2] 61 | model_names = [ 'resnet34', 'resnet101', 'mobilenet_1', 'mobilenet_075'] 62 | params = { 63 | 'mobilenet_1': 1.0, 64 | 'mobilenet_075': 0.75, 65 | } 66 | 67 | 68 | # path 69 | trial_name = 'adv_models:' 70 | for adv_model_name in adv_model_names: 71 | trial_name = trial_name + '-' + adv_model_name 72 | # path to pre-trained models checkpoints 73 | adv_checkpoint_path = 'checkpoints/cifar10/' 74 | output_path = 'checkpoints/adv_train/cifar10/' + trial_name +'/' 75 | tensorboard_path = 'tensorboard/cifar10/adv_train/' + trial_name +'/' 76 | ######################################### modify accordingly ################################################## 77 | 78 | 79 | 80 | if not os.path.isdir(output_path): 81 | pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) 82 | if not os.path.isdir(tensorboard_path): 83 | pathlib.Path(tensorboard_path).mkdir(parents=True, exist_ok=True) 84 | 85 | def main(model_class, model_name, model_path, adv_models, writer, args): 86 | dataset = args.dataset 87 | epochs = args.epochs 88 | 89 | best_acc = 0 90 | 91 | # prepare data loader 92 | trainloader, testloader = get_data_loader(dataset) 93 | 94 | # create model 95 | if model_name in params.keys(): 96 | model = model_class(params[model_name]) 97 | else: 98 | model = model_class() 99 | 100 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 101 | 102 | if device == 'cuda': 103 | model = torch.nn.DataParallel(model) 104 | model = model.cuda() 105 | 106 | # optimizer 107 | criterion = nn.CrossEntropyLoss(reduction = 'mean') 108 | # paper use RMSProp but author's github use adam, here we follow the author's github 109 | optimizer = optim.Adam(model.parameters(), lr= 0.001, weight_decay=5e-4) 110 | 111 | # training 112 | for epoch in range(epochs): 113 | ens_adv_train(trainloader, criterion, optimizer, model, adv_models, writer, epoch, args) 114 | acc = validate(testloader, model, criterion, writer, epoch) 115 | 116 | if acc > best_acc : 117 | best_acc = acc 118 | save_checkpoint(model, model_path, optimizer, best_acc, epoch) 119 | 120 | 121 | 122 | # save model 123 | def save_checkpoint(model, model_path, optimizer, best_acc, epoch): 124 | state = { 125 | 'state_dict': model.state_dict(), 126 | 'acc': best_acc, 127 | 'epoch': epoch, 128 | 'optimizer' : optimizer.state_dict(), 129 | 130 | } 131 | torch.save(state, model_path) 132 | 133 | 134 | 135 | def get_data_loader(dataset): 136 | if dataset == 'cifar10': 137 | transform_train = transforms.Compose([ 138 | transforms.RandomCrop(32, padding=4), 139 | transforms.RandomHorizontalFlip(), 140 | transforms.ToTensor(), 141 | # mean subtract 142 | # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 143 | ]) 144 | 145 | transform_test = transforms.Compose([ 146 | transforms.ToTensor(), 147 | # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), 148 | ]) 149 | 150 | trainset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=True, download=False, transform=transform_train) 151 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4) 152 | 153 | testset = torchvision.datasets.CIFAR10(root='/home/deliangj/data/', train=False, download=False, transform=transform_test) 154 | testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4) 155 | 156 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 157 | 158 | elif dataset == "cinic10": 159 | cinic_directory = '/home/deliangj/data/cinic10' 160 | cinic_mean = [0, 0, 0] 161 | cinic_std = [0, 0, 0] 162 | 163 | transform_train = transforms.Compose([ 164 | transforms.RandomCrop(32, padding=4), 165 | transforms.RandomHorizontalFlip(), 166 | transforms.ToTensor(), 167 | # mean subtract 168 | transforms.Normalize(mean=cinic_mean,std=cinic_std) 169 | ,]) 170 | 171 | transform_test = transforms.Compose([ 172 | transforms.ToTensor(), 173 | transforms.Normalize(mean=cinic_mean,std=cinic_std), 174 | ]) 175 | 176 | trainset = torchvision.datasets.ImageFolder(cinic_directory + '/train', transform=transform_train) 177 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4) 178 | 179 | testset = torchvision.datasets.ImageFolder(cinic_directory + '/test', transform=transform_test) 180 | testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=4) 181 | else: 182 | print('not such dataset !') 183 | return 184 | 185 | return trainloader, testloader 186 | 187 | 188 | if __name__ == '__main__': 189 | 190 | # training parameters 191 | args = parser.parse_args() 192 | 193 | # checkpoint paths 194 | model_save_paths = [output_path + model_name + '.pth.tar' for model_name in model_names] 195 | adv_model_paths = [adv_checkpoint_path + adv_model_name + '.pth.tar' for adv_model_name in adv_model_names] 196 | 197 | # load adv models 198 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 199 | if device == 'cuda': 200 | for i in range(len(adv_models)): 201 | adv_models[i] = torch.nn.DataParallel(adv_models[i]) 202 | adv_models[i] = adv_models[i].cuda() 203 | # pre-trained static models ! 204 | adv_models[i] = adv_models[i].eval() 205 | else: 206 | print('gpu not avaible please check !') 207 | sys.exit() 208 | 209 | # adv pre-trained static models 210 | for i in range(len(adv_model_paths)): 211 | checkpoint = torch.load(adv_model_paths[i]) 212 | if 'state_dict' in checkpoint.keys(): 213 | state = 'state_dict' 214 | elif 'net' in checkpoint.keys(): 215 | state = 'net' 216 | adv_models[i].load_state_dict(checkpoint[state]) 217 | 218 | # starting training each model 219 | for i in range(len(model_classes)): 220 | print('adv training model: ' + model_names[i]) 221 | writer = SummaryWriter(tensorboard_path + model_names[i]) 222 | main(model_classes[i], model_names[i], model_save_paths[i], adv_models, writer, args) 223 | 224 | -------------------------------------------------------------------------------- /models/cifar10/inception.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/googlenet.py 3 | ''' 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Inception(nn.Module): 10 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): 11 | super(Inception, self).__init__() 12 | # 1x1 conv branch 13 | self.b1 = nn.Sequential( 14 | nn.Conv2d(in_planes, n1x1, kernel_size=1), 15 | nn.BatchNorm2d(n1x1), 16 | nn.ReLU(True), 17 | ) 18 | 19 | # 1x1 conv -> 3x3 conv branch 20 | self.b2 = nn.Sequential( 21 | nn.Conv2d(in_planes, n3x3red, kernel_size=1), 22 | nn.BatchNorm2d(n3x3red), 23 | nn.ReLU(True), 24 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), 25 | nn.BatchNorm2d(n3x3), 26 | nn.ReLU(True), 27 | ) 28 | 29 | # 1x1 conv -> 5x5 conv branch 30 | self.b3 = nn.Sequential( 31 | nn.Conv2d(in_planes, n5x5red, kernel_size=1), 32 | nn.BatchNorm2d(n5x5red), 33 | nn.ReLU(True), 34 | nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), 35 | nn.BatchNorm2d(n5x5), 36 | nn.ReLU(True), 37 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), 38 | nn.BatchNorm2d(n5x5), 39 | nn.ReLU(True), 40 | ) 41 | 42 | # 3x3 pool -> 1x1 conv branch 43 | self.b4 = nn.Sequential( 44 | nn.MaxPool2d(3, stride=1, padding=1), 45 | nn.Conv2d(in_planes, pool_planes, kernel_size=1), 46 | nn.BatchNorm2d(pool_planes), 47 | nn.ReLU(True), 48 | ) 49 | 50 | def forward(self, x): 51 | y1 = self.b1(x) 52 | y2 = self.b2(x) 53 | y3 = self.b3(x) 54 | y4 = self.b4(x) 55 | return torch.cat([y1,y2,y3,y4], 1) 56 | 57 | 58 | class GoogLeNet(nn.Module): 59 | def __init__(self): 60 | super(GoogLeNet, self).__init__() 61 | self.pre_layers = nn.Sequential( 62 | nn.Conv2d(3, 192, kernel_size=3, padding=1), 63 | nn.BatchNorm2d(192), 64 | nn.ReLU(True), 65 | ) 66 | 67 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 68 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 69 | 70 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) 71 | 72 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 73 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 74 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 75 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 76 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 77 | 78 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) 79 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) 80 | 81 | self.avgpool = nn.AvgPool2d(8, stride=1) 82 | self.linear = nn.Linear(1024, 10) 83 | 84 | def forward(self, x): 85 | out = self.pre_layers(x) 86 | out = self.a3(out) 87 | out = self.b3(out) 88 | out = self.maxpool(out) 89 | out = self.a4(out) 90 | out = self.b4(out) 91 | out = self.c4(out) 92 | out = self.d4(out) 93 | out = self.e4(out) 94 | out = self.maxpool(out) 95 | out = self.a5(out) 96 | out = self.b5(out) 97 | out = self.avgpool(out) 98 | out = out.view(out.size(0), -1) 99 | out = self.linear(out) 100 | return out 101 | 102 | 103 | def test(): 104 | net = GoogLeNet() 105 | x = torch.randn(1,3,32,32) 106 | y = net(x) 107 | print(y.size()) -------------------------------------------------------------------------------- /models/cifar10/mobilenetv2_2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/tonylins/pytorch-mobilenet-v2/blob/master/MobileNetV2.py 3 | make some small modification to adapt input from imagenet to cifar10. 4 | ''' 5 | import torch.nn as nn 6 | import math 7 | 8 | 9 | def conv_bn(inp, oup, stride): 10 | return nn.Sequential( 11 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 12 | nn.BatchNorm2d(oup), 13 | nn.ReLU6(inplace=True) 14 | ) 15 | 16 | 17 | def conv_1x1_bn(inp, oup): 18 | return nn.Sequential( 19 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 20 | nn.BatchNorm2d(oup), 21 | nn.ReLU6(inplace=True) 22 | ) 23 | 24 | 25 | class InvertedResidual(nn.Module): 26 | def __init__(self, inp, oup, stride, expand_ratio): 27 | super(InvertedResidual, self).__init__() 28 | self.stride = stride 29 | assert stride in [1, 2] 30 | 31 | hidden_dim = round(inp * expand_ratio) 32 | 33 | self.use_res_connect = self.stride == 1 and inp == oup 34 | 35 | if expand_ratio == 1: 36 | self.conv = nn.Sequential( 37 | # dw 38 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 39 | nn.BatchNorm2d(hidden_dim), 40 | nn.ReLU6(inplace=True), 41 | # pw-linear 42 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 43 | nn.BatchNorm2d(oup), 44 | ) 45 | else: 46 | self.conv = nn.Sequential( 47 | # pw 48 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 49 | nn.BatchNorm2d(hidden_dim), 50 | nn.ReLU6(inplace=True), 51 | # dw 52 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 53 | nn.BatchNorm2d(hidden_dim), 54 | nn.ReLU6(inplace=True), 55 | # pw-linear 56 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 57 | nn.BatchNorm2d(oup), 58 | ) 59 | 60 | def forward(self, x): 61 | if self.use_res_connect: 62 | return x + self.conv(x) 63 | else: 64 | return self.conv(x) 65 | 66 | 67 | class MobileNetV2(nn.Module): 68 | def __init__(self, width_mult=1., n_class=10, input_size=32): 69 | super(MobileNetV2, self).__init__() 70 | block = InvertedResidual 71 | input_channel = 32 72 | last_channel = 1280 73 | print ('width_mult =', width_mult) 74 | 75 | interverted_residual_setting = [ 76 | (1, 16, 1, 1), 77 | (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 78 | (6, 32, 3, 2), 79 | (6, 64, 4, 2), 80 | (6, 96, 3, 1), 81 | (6, 160, 3, 2), 82 | (6, 320, 1, 1)] 83 | 84 | # building first layer 85 | assert input_size % 32 == 0 86 | ## (0.37 * 5) = 1.85; int(0.37 * 5) = 1 87 | input_channel = int(input_channel * width_mult) 88 | # we apply width multiplier to all layers except the very last convolutional layer. This improves performance for smaller models 89 | self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel 90 | self.features = [conv_bn(3, input_channel,1)]# NOTE: change conv1 stride 2 -> 1 for CIFAR10 91 | # building inverted residual blocks 92 | for t, c, n, s in interverted_residual_setting: 93 | output_channel = int(c * width_mult) 94 | for i in range(n): 95 | if i == 0: 96 | self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) 97 | else: 98 | self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) 99 | input_channel = output_channel 100 | # building last several layers 101 | self.features.append(conv_1x1_bn(input_channel, self.last_channel)) 102 | # make it nn.Sequential 103 | self.features = nn.Sequential(*self.features) 104 | 105 | # building classifier 106 | self.classifier = nn.Sequential( 107 | nn.Dropout(0.2), 108 | nn.Linear(self.last_channel, n_class), 109 | ) 110 | 111 | self._initialize_weights() 112 | 113 | def forward(self, x): 114 | x = self.features(x) 115 | # torch tensor (C, H, W) 116 | x = x.mean(3).mean(2) 117 | x = self.classifier(x) 118 | return x 119 | 120 | def _initialize_weights(self): 121 | for m in self.modules(): 122 | if isinstance(m, nn.Conv2d): 123 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 124 | m.weight.data.normal_(0, math.sqrt(2. / n)) 125 | if m.bias is not None: 126 | m.bias.data.zero_() 127 | elif isinstance(m, nn.BatchNorm2d): 128 | m.weight.data.fill_(1) 129 | m.bias.data.zero_() 130 | elif isinstance(m, nn.Linear): 131 | n = m.weight.size(1) 132 | m.weight.data.normal_(0, 0.01) 133 | m.bias.data.zero_() 134 | -------------------------------------------------------------------------------- /models/cifar10/resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | https://github.com/kuangliu/pytorch-cifar/blob/master/models/resnet.py 3 | ''' 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class BasicBlock(nn.Module): 10 | expansion = 1 11 | 12 | def __init__(self, in_planes, planes, stride=1): 13 | super(BasicBlock, self).__init__() 14 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 15 | self.bn1 = nn.BatchNorm2d(planes) 16 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(planes) 18 | 19 | self.shortcut = nn.Sequential() 20 | if stride != 1 or in_planes != self.expansion*planes: 21 | self.shortcut = nn.Sequential( 22 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 23 | nn.BatchNorm2d(self.expansion*planes) 24 | ) 25 | 26 | def forward(self, x): 27 | out = F.relu(self.bn1(self.conv1(x))) 28 | out = self.bn2(self.conv2(out)) 29 | out += self.shortcut(x) 30 | out = F.relu(out) 31 | return out 32 | 33 | 34 | class Bottleneck(nn.Module): 35 | expansion = 4 36 | 37 | def __init__(self, in_planes, planes, stride=1): 38 | super(Bottleneck, self).__init__() 39 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 40 | self.bn1 = nn.BatchNorm2d(planes) 41 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 42 | self.bn2 = nn.BatchNorm2d(planes) 43 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 44 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 45 | 46 | self.shortcut = nn.Sequential() 47 | if stride != 1 or in_planes != self.expansion*planes: 48 | self.shortcut = nn.Sequential( 49 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 50 | nn.BatchNorm2d(self.expansion*planes) 51 | ) 52 | 53 | def forward(self, x): 54 | out = F.relu(self.bn1(self.conv1(x))) 55 | out = F.relu(self.bn2(self.conv2(out))) 56 | out = self.bn3(self.conv3(out)) 57 | out += self.shortcut(x) 58 | out = F.relu(out) 59 | return out 60 | 61 | 62 | class ResNet(nn.Module): 63 | def __init__(self, block, num_blocks, num_classes=10): 64 | super(ResNet, self).__init__() 65 | self.in_planes = 64 66 | 67 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(64) 69 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 70 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 71 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 72 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 73 | self.linear = nn.Linear(512*block.expansion, num_classes) 74 | 75 | def _make_layer(self, block, planes, num_blocks, stride): 76 | strides = [stride] + [1]*(num_blocks-1) 77 | layers = [] 78 | for stride in strides: 79 | layers.append(block(self.in_planes, planes, stride)) 80 | self.in_planes = planes * block.expansion 81 | return nn.Sequential(*layers) 82 | 83 | def forward(self, x): 84 | out = F.relu(self.bn1(self.conv1(x))) 85 | out = self.layer1(out) 86 | out = self.layer2(out) 87 | out = self.layer3(out) 88 | out = self.layer4(out) 89 | out = F.avg_pool2d(out, 4) 90 | out = out.view(out.size(0), -1) 91 | out = self.linear(out) 92 | return out 93 | 94 | 95 | def ResNet18(): 96 | return ResNet(BasicBlock, [2,2,2,2]) 97 | 98 | def ResNet34(): 99 | return ResNet(BasicBlock, [3,4,6,3]) 100 | 101 | def ResNet50(): 102 | return ResNet(Bottleneck, [3,4,6,3]) 103 | 104 | def ResNet101(): 105 | return ResNet(Bottleneck, [3,4,23,3]) 106 | 107 | def ResNet152(): 108 | return ResNet(Bottleneck, [3,8,36,3]) 109 | 110 | 111 | def test(): 112 | net = ResNet18() 113 | y = net(torch.randn(1,3,32,32)) 114 | print(y.size()) 115 | 116 | # test() 117 | -------------------------------------------------------------------------------- /tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet101/events.out.tfevents.1558584038.destc0strapp81.eu.sony.com -------------------------------------------------------------------------------- /tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JZ-LIANG/Ensemble-Adversarial-Training/255902d5ada181a727da666f75b08d121b3fd044/tensorboard/cifar10/adv_train/adv_models:-resnet18-resnet50-mobilenet_125-googlenet/resnet34/events.out.tfevents.1558545149.destc0strapp81.eu.sony.com --------------------------------------------------------------------------------