├── abs_models ├── __init__.py ├── loss_functions.py ├── sampling.py ├── attack_utils.py ├── utils.py ├── inference.py ├── visualization.py ├── models.py └── nets.py ├── exp ├── imgs │ ├── qualitative.png │ └── distal_adversarials.png ├── VAE_swarm_MSE │ └── nets │ │ ├── ABS_0.net │ │ ├── ABS_1.net │ │ ├── ABS_2.net │ │ ├── ABS_3.net │ │ ├── ABS_4.net │ │ ├── ABS_5.net │ │ ├── ABS_6.net │ │ ├── ABS_7.net │ │ ├── ABS_8.net │ │ └── ABS_9.net └── mnist_cnn │ └── nets │ ├── binary_cnn.net │ ├── vanilla_cnn.net │ └── transfer_cnn.net ├── foolbox_model.py ├── madry └── mnist_challenge │ ├── config.json │ ├── model_robustml.py │ ├── LICENSE │ ├── fetch_model.py │ ├── model.py │ ├── run_attack.py │ ├── pgd_attack.py │ ├── train.py │ ├── eval.py │ └── README.md ├── robustml_model.py ├── README.md ├── scripts ├── attacks.py └── attacks.ipynb └── LICENSE /abs_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /exp/imgs/qualitative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/imgs/qualitative.png -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_0.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_0.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_1.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_1.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_2.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_2.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_3.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_3.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_4.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_4.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_5.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_5.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_6.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_6.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_7.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_7.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_8.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_8.net -------------------------------------------------------------------------------- /exp/VAE_swarm_MSE/nets/ABS_9.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_9.net -------------------------------------------------------------------------------- /exp/imgs/distal_adversarials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/imgs/distal_adversarials.png -------------------------------------------------------------------------------- /exp/mnist_cnn/nets/binary_cnn.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/binary_cnn.net -------------------------------------------------------------------------------- /exp/mnist_cnn/nets/vanilla_cnn.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/vanilla_cnn.net -------------------------------------------------------------------------------- /exp/mnist_cnn/nets/transfer_cnn.net: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/transfer_cnn.net -------------------------------------------------------------------------------- /foolbox_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import foolbox 3 | import numpy as np 4 | from abs_models.models import get_ABS 5 | import os 6 | 7 | def create(): 8 | model = get_ABS() 9 | model.eval() 10 | fmodel = foolbox.models.PyTorchModel( 11 | model, (0, 1)) 12 | return fmodel 13 | 14 | 15 | if __name__ == '__main__': 16 | fmodel = create() 17 | 18 | # design an input that looks like a 1 19 | x = np.zeros((1, 28, 28), dtype=np.float32) 20 | x[0, 5:-5, 12:-12] = 1 21 | 22 | logits = fmodel.predictions(x) 23 | 24 | print('logits', logits) 25 | print('probabilities', foolbox.utils.softmax(logits)) 26 | print('class', np.argmax(logits)) 27 | -------------------------------------------------------------------------------- /madry/mnist_challenge/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_comment": "===== MODEL CONFIGURATION =====", 3 | "model_dir": "models/a_very_robust_model", 4 | 5 | "_comment": "===== TRAINING CONFIGURATION =====", 6 | "random_seed": 4557077, 7 | "max_num_training_steps": 100000, 8 | "num_output_steps": 100, 9 | "num_summary_steps": 100, 10 | "num_checkpoint_steps": 300, 11 | "training_batch_size": 50, 12 | 13 | "_comment": "===== EVAL CONFIGURATION =====", 14 | "num_eval_examples": 10000, 15 | "eval_batch_size": 200, 16 | "eval_on_cpu": true, 17 | 18 | "_comment": "=====ADVERSARIAL EXAMPLES CONFIGURATION=====", 19 | "epsilon": 0.3, 20 | "k": 40, 21 | "a": 0.01, 22 | "random_start": true, 23 | "loss_func": "xent", 24 | "store_adv_path": "attack.npy" 25 | } 26 | -------------------------------------------------------------------------------- /robustml_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import robustml 3 | import numpy as np 4 | import foolbox_model 5 | 6 | 7 | class ABSModel(robustml.model.Model): 8 | """RobustML interface for the Analysis by Synthesis (ABS) model.""" 9 | 10 | def __init__(self): 11 | self._dataset = robustml.dataset.MNIST() 12 | self._threat_model = robustml.threat_model.L2(epsilon=1.5) 13 | self._fmodel = foolbox_model.create() 14 | assert self._fmodel.bounds() == (0, 1) 15 | 16 | @property 17 | def dataset(self): 18 | return self._dataset 19 | 20 | @property 21 | def threat_model(self): 22 | return self._threat_model 23 | 24 | def classify(self, x): 25 | assert x.shape == (28, 28) 26 | x = x[np.newaxis] # add chanell axis 27 | assert x.shape == (1, 28, 28) 28 | return np.argmax(self._fmodel.predictions(x)) 29 | 30 | 31 | if __name__ == '__main__': 32 | model = ABSModel() 33 | 34 | # design an input that looks like a 1 35 | x = np.zeros((28, 28), dtype=np.float32) 36 | x[5:-5, 12:-12] = 1 37 | 38 | print('class', model.classify(x)) 39 | -------------------------------------------------------------------------------- /madry/mnist_challenge/model_robustml.py: -------------------------------------------------------------------------------- 1 | import robustml 2 | import tensorflow as tf 3 | 4 | import model 5 | 6 | class Model(robustml.model.Model): 7 | def __init__(self, sess): 8 | self._model = model.Model() 9 | 10 | saver = tf.train.Saver() 11 | checkpoint = tf.train.latest_checkpoint('models/secret') 12 | saver.restore(sess, checkpoint) 13 | 14 | self._sess = sess 15 | self._input = self._model.x_input 16 | self._logits = self._model.pre_softmax 17 | self._predictions = self._model.y_pred 18 | self._dataset = robustml.dataset.MNIST() 19 | self._threat_model = robustml.threat_model.Linf(epsilon=0.3) 20 | 21 | @property 22 | def dataset(self): 23 | return self._dataset 24 | 25 | @property 26 | def threat_model(self): 27 | return self._threat_model 28 | 29 | def classify(self, x): 30 | return self._sess.run(self._predictions, 31 | {self._input: x})[0] 32 | 33 | # expose attack interface 34 | 35 | @property 36 | def input(self): 37 | return self._input 38 | 39 | @property 40 | def logits(self): 41 | return self._logits 42 | 43 | @property 44 | def predictions(self): 45 | return self._predictions 46 | -------------------------------------------------------------------------------- /madry/mnist_challenge/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /madry/mnist_challenge/fetch_model.py: -------------------------------------------------------------------------------- 1 | """Downloads a model, computes its SHA256 hash and unzips it 2 | at the proper location.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import sys 8 | import zipfile 9 | import hashlib 10 | 11 | if len(sys.argv) != 2 or sys.argv[1] not in ['natural', 12 | 'adv_trained', 13 | 'secret']: 14 | print('Usage: python fetch_model.py [natural, adv_trained, secret]') 15 | sys.exit(1) 16 | 17 | if sys.argv[1] == 'natural': 18 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/natural.zip' 19 | elif sys.argv[1] == 'secret': 20 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/secret.zip' 21 | else: # fetch adv_trained model 22 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/adv_trained.zip' 23 | 24 | fname = url.split('/')[-1] # get the name of the file 25 | 26 | # model download 27 | print('Downloading models') 28 | if sys.version_info >= (3,): 29 | import urllib.request 30 | urllib.request.urlretrieve(url, fname) 31 | else: 32 | import urllib 33 | urllib.urlretrieve(url, fname) 34 | 35 | # computing model hash 36 | sha256 = hashlib.sha256() 37 | with open(fname, 'rb') as f: 38 | data = f.read() 39 | sha256.update(data) 40 | print('SHA256 hash: {}'.format(sha256.hexdigest())) 41 | 42 | # extracting model 43 | print('Extracting model') 44 | with zipfile.ZipFile(fname, 'r') as model_zip: 45 | model_zip.extractall() 46 | print('Extracted model in {}'.format(model_zip.namelist()[0])) 47 | -------------------------------------------------------------------------------- /abs_models/loss_functions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from abs_models import utils as u 3 | import numpy as np 4 | 5 | def squared_L2_loss(a, b, axes, keepdim=True): 6 | return u.tsum((a - b)**2, axes=axes, keepdim=keepdim) 7 | 8 | 9 | def KLD(mu_latent_q, sig_q=1., dim=-3): 10 | """ 11 | 12 | :param mu_latent_q: z must be shape (..., n_latent ...) at i-th pos 13 | :param sig_q: scalar 14 | :param dim: determines pos i 15 | :return: 16 | """ 17 | return -0.5 * torch.sum(1 - mu_latent_q ** 2 + u.tlog(sig_q) - sig_q**2, 18 | dim=dim, keepdim=True) 19 | 20 | 21 | def ELBOs(x_rec: torch.Tensor, samples_latent: torch.Tensor, x_orig: torch.Tensor, 22 | beta=1, dist_fct=squared_L2_loss): 23 | """ 24 | :param x_rec: shape (..., n_channels, nx, ny) 25 | :param samples_latent: (..., n_latent, 1, 1) 26 | :param x_orig: (..., n_channels, nx, ny) 27 | :param beta: 28 | :param dist_fct: 29 | :return: 30 | """ 31 | n_ch, nx, ny = x_rec.shape[-3:] 32 | kld = KLD(samples_latent, sig_q=1.) 33 | rec_loss = dist_fct(x_orig, x_rec, axes=[-1, -2, -3]) 34 | elbo = rec_loss + beta * kld 35 | # del x_rec, x_orig, kld 36 | # del x_rec, samples_latent, x_orig 37 | return elbo / (n_ch * nx * ny) 38 | 39 | 40 | def ELBOs2(x, rec_x, samples_latent, beta): 41 | """This is the loss function used during inference to calculate the logits. 42 | 43 | This function must only operate on the last the dimensions of x and rec_x. 44 | There can be varying number of additional dimensions before them! 45 | """ 46 | 47 | input_size = int(np.prod(x.shape[-3:])) 48 | assert len(x.shape) == 4 and len(rec_x.shape) == 4 49 | # alternative implementation that is much faster and more memory efficient 50 | # when each sample in x needs to be compared to each sample in rec_x 51 | assert x.shape[-3:] == rec_x.shape[-3:] 52 | x = x.reshape(x.shape[0], input_size) 53 | y = rec_x.reshape(rec_x.shape[0], input_size) 54 | 55 | x2 = torch.norm(x, p=2, dim=-1, keepdim=True).pow(2) # x2 shape (bs, 1) 56 | y2 = torch.norm(y, p=2, dim=-1, keepdim=True).pow(2) # y2 shape (1, nsamples) 57 | # note that we could cache the calculation of y2, but 58 | # it's so fast that it doesn't matter 59 | 60 | L2squared = x2 + y2.t() - 2 * torch.mm(x, y.t()) 61 | L2squared = L2squared / input_size 62 | 63 | kld = KLD(samples_latent, sig_q=1.)[None, :, 0, 0, 0] / input_size 64 | # note that the KLD sum is over the latents, not over the input size 65 | return L2squared + beta * kld 66 | 67 | 68 | -------------------------------------------------------------------------------- /madry/mnist_challenge/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | The model is adapted from the tensorflow tutorial: 3 | https://www.tensorflow.org/get_started/mnist/pros 4 | """ 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | 11 | 12 | class Model(object): 13 | def __init__(self): 14 | self.x_input = tf.placeholder(tf.float32, shape = [None, 1, 28, 28]) 15 | self.y_input = tf.placeholder(tf.int64, shape = [None]) 16 | 17 | self.x_image = tf.reshape(self.x_input, [-1, 28, 28, 1]) 18 | 19 | # first convolutional layer 20 | self.W_conv1 = self._weight_variable([5,5,1,32]) 21 | b_conv1 = self._bias_variable([32]) 22 | 23 | self.h_conv1 = tf.nn.relu(self._conv2d(self.x_image, self.W_conv1) + b_conv1) 24 | h_pool1 = self._max_pool_2x2(self.h_conv1) 25 | 26 | # second convolutional layer 27 | W_conv2 = self._weight_variable([5,5,32,64]) 28 | b_conv2 = self._bias_variable([64]) 29 | 30 | h_conv2 = tf.nn.relu(self._conv2d(h_pool1, W_conv2) + b_conv2) 31 | self.h_pool2 = self._max_pool_2x2(h_conv2) 32 | 33 | # first fully connected layer 34 | W_fc1 = self._weight_variable([7 * 7 * 64, 1024]) 35 | b_fc1 = self._bias_variable([1024]) 36 | 37 | h_pool2_flat = tf.reshape(self.h_pool2, [-1, 7 * 7 * 64]) 38 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) 39 | 40 | # output layer 41 | W_fc2 = self._weight_variable([1024,10]) 42 | b_fc2 = self._bias_variable([10]) 43 | 44 | self.pre_softmax = tf.matmul(h_fc1, W_fc2) + b_fc2 45 | 46 | y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits( 47 | labels=self.y_input, logits=self.pre_softmax) 48 | 49 | self.xent = tf.reduce_sum(y_xent) 50 | 51 | self.y_pred = tf.argmax(self.pre_softmax, 1) 52 | 53 | correct_prediction = tf.equal(self.y_pred, self.y_input) 54 | 55 | self.num_correct = tf.reduce_sum(tf.cast(correct_prediction, tf.int64)) 56 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 57 | 58 | @staticmethod 59 | def _weight_variable(shape): 60 | initial = tf.truncated_normal(shape, stddev=0.1) 61 | return tf.Variable(initial) 62 | 63 | @staticmethod 64 | def _bias_variable(shape): 65 | initial = tf.constant(0.1, shape = shape) 66 | return tf.Variable(initial) 67 | 68 | @staticmethod 69 | def _conv2d(x, W): 70 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 71 | 72 | @staticmethod 73 | def _max_pool_2x2( x): 74 | return tf.nn.max_pool(x, 75 | ksize = [1,2,2,1], 76 | strides=[1,2,2,1], 77 | padding='SAME') 78 | -------------------------------------------------------------------------------- /madry/mnist_challenge/run_attack.py: -------------------------------------------------------------------------------- 1 | """Evaluates a model against examples from a .npy file as specified 2 | in config.json""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from datetime import datetime 8 | import json 9 | import math 10 | import os 11 | import sys 12 | import time 13 | 14 | import tensorflow as tf 15 | from tensorflow.examples.tutorials.mnist import input_data 16 | 17 | import numpy as np 18 | 19 | from model import Model 20 | 21 | def run_attack(checkpoint, x_adv, epsilon): 22 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False) 23 | 24 | model = Model() 25 | 26 | saver = tf.train.Saver() 27 | 28 | num_eval_examples = 10000 29 | eval_batch_size = 64 30 | 31 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) 32 | total_corr = 0 33 | 34 | x_nat = mnist.test.images 35 | l_inf = np.amax(np.abs(x_nat - x_adv)) 36 | 37 | if l_inf > epsilon + 0.0001: 38 | print('maximum perturbation found: {}'.format(l_inf)) 39 | print('maximum perturbation allowed: {}'.format(epsilon)) 40 | return 41 | 42 | y_pred = [] # label accumulator 43 | 44 | with tf.Session() as sess: 45 | # Restore the checkpoint 46 | saver.restore(sess, checkpoint) 47 | 48 | # Iterate over the samples batch-by-batch 49 | for ibatch in range(num_batches): 50 | bstart = ibatch * eval_batch_size 51 | bend = min(bstart + eval_batch_size, num_eval_examples) 52 | 53 | x_batch = x_adv[bstart:bend, :] 54 | y_batch = mnist.test.labels[bstart:bend] 55 | 56 | dict_adv = {model.x_input: x_batch, 57 | model.y_input: y_batch} 58 | cur_corr, y_pred_batch = sess.run([model.num_correct, model.y_pred], 59 | feed_dict=dict_adv) 60 | 61 | total_corr += cur_corr 62 | y_pred.append(y_pred_batch) 63 | 64 | accuracy = total_corr / num_eval_examples 65 | 66 | print('Accuracy: {:.2f}%'.format(100.0 * accuracy)) 67 | y_pred = np.concatenate(y_pred, axis=0) 68 | np.save('pred.npy', y_pred) 69 | print('Output saved at pred.npy') 70 | 71 | if __name__ == '__main__': 72 | import json 73 | 74 | with open('config.json') as config_file: 75 | config = json.load(config_file) 76 | 77 | model_dir = config['model_dir'] 78 | 79 | checkpoint = tf.train.latest_checkpoint(model_dir) 80 | x_adv = np.load(config['store_adv_path']) 81 | 82 | if checkpoint is None: 83 | print('No checkpoint found') 84 | elif x_adv.shape != (10000, 784): 85 | print('Invalid shape: expected (10000,784), found {}'.format(x_adv.shape)) 86 | elif np.amax(x_adv) > 1.0001 or \ 87 | np.amin(x_adv) < -0.0001 or \ 88 | np.isnan(np.amax(x_adv)): 89 | print('Invalid pixel range. Expected [0, 1], found [{}, {}]'.format( 90 | np.amin(x_adv), 91 | np.amax(x_adv))) 92 | else: 93 | run_attack(checkpoint, x_adv, config['epsilon']) 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Towards the first adversarially robust neural network model on MNIST 2 | 3 | The ABS model is a robust (w.r.t. Adversarial Examples) classifier on MNIST. For more details checkout our paper "Towards the first adversarially robust neural network model on MNIST 4 | https://arxiv.org/abs/1805.09190 [1]. 5 | 6 | This code provides the pre-trained ABS models and baselines such as: 7 | a vanilla CNN, a binary CNN, a Nearest Neighbour classifier, the model of Madry et al. [2] and our Analysis by Synthesis (**ABS**) model. 8 | 9 | A random selection of adversarial examples for the different models can be seen below. 10 | 11 |

12 | dist advs
13 | Smallest adversarial examples for different architectures. 14 |

15 | 16 | 17 | 18 | To generate adversarial examples and run the code agnostic of the deeplearning framework (e.g. tensorflow, torch), we use foolbox [3]. 19 | Foolbox support decision-, score- and gradient-based attacks. For gradient-based attacks, the gradients can either be calculated directly or estimated with the model scores and finite difference based methods. 20 | Additionally some model specific attacks (LatentDescent) are provided. 21 | 22 | Lastly we also compute distal (also called trash) adversarial examples which are unrecognizabale images which are classified with high confidence. 23 | 24 |

25 | dist advs
26 | Distal adversarials which are classiefied as "1" with >90% certainty. 27 |

28 | 29 | 30 | 31 | 32 | ## Loading the ABS Model 33 | The model can be loaded and supports the standard pytorch API 34 | 35 | ``` 36 | from abs_models import models as mz # model zoo 37 | from abs_models import utils as u 38 | 39 | model = mz.get_VAE(n_iter=50) # ABS do n_iter=1 for speedup (but ess accurate) 40 | batch, label = u.get_batch() # returns torch.tensor, shape (batch_size, n_channels, nx, ny) 41 | logits = model(u.n2t(batch)) 42 | ``` 43 | For a complete example using foolbox see "_scripts/attacks.ipynb_" or "_scripts/attacks.py_". 44 | 45 | With the power of foolbox one can also run a code agnostic version of the model 46 | 47 | ## Installation 48 | Our code used pytorch and python3.6 and can be found here (this repo): 49 | ``` 50 | git clone https://github.com/lukas-schott/AnalysisBySynthesis.git 51 | ``` 52 | 53 | The dependencies are: 54 | ``` 55 | pip3 --no-cache-dir install \ 56 | numpy \ 57 | http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-linux_x86_64.whl \ 58 | torchvision \ 59 | foolbox \ 60 | ``` 61 | 62 | Have fun :). 63 | 64 | 65 | [1] Lukas Schott, Jonas Rauber, Matthias Bethge, and Wieland Brendel. Towards the first adversarially robust neural network model on mnist. International Conference for Learning Representations 2019, 2019. URL https://arxiv.org/abs/1805.09190 66 | 67 | 68 | [2] Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. Towards deep 69 | learning models resistant to adversarial attacks. In _International Conference on Learning Representations_, 2018. URL https://openreview.net/forum?id=rJzIBfZ 70 | 71 | [3] Jonas Rauber and Wieland Brendel. Foolbox Documentation. Read the Docs, 2018. URL https://media.readthedocs.org/pdf/foolbox/latest/foolbox.pdf 72 | -------------------------------------------------------------------------------- /abs_models/sampling.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from abs_models import utils as u 3 | from torch import tensor 4 | import numpy as np 5 | from torch.nn import functional as F 6 | from scipy.stats import multivariate_normal 7 | 8 | 9 | class GridMan(object): 10 | def __init__(self, AEs, nd, n_classes, nc=1, nx=28, ny=28, limit=0.99): 11 | self.samples = {} 12 | self.images = {} 13 | self.th_images = {} 14 | self.classes = {} 15 | self.l_v = {} 16 | self.AEs = AEs 17 | self.nd = nd 18 | self.n_classes = n_classes 19 | self.nx = nx 20 | self.ny = ny 21 | self.nc = nc 22 | self.limit = None 23 | 24 | def init_grid(self, n_samples, fraction_to_dismiss=None, 25 | sample_sigma=None): 26 | n_grid = self.n_samples_to_n_grid(n_samples) 27 | print('init new grid', n_samples, n_grid) 28 | limit = 0.99 29 | if self.limit is not None: 30 | limit = self.limit 31 | grids = [(np.linspace(-limit, limit, n_grid)) for i in range(self.nd)] 32 | xys = np.array(np.meshgrid(*grids)) 33 | xys = np.moveaxis(xys, 0, -1).reshape(n_grid ** self.nd, self.nd) 34 | self.samples[n_samples] = xys 35 | self.l_v[n_samples] = \ 36 | torch.from_numpy(xys[:, :, None, None].astype(np.float32)).to(u.dev()) 37 | 38 | def get_images(self, n_samples=10, fraction_to_dismiss=0.1, 39 | weighted=False, sample_sigma=1): 40 | if n_samples not in self.images.keys(): 41 | self.init_grid(n_samples, fraction_to_dismiss=fraction_to_dismiss, 42 | sample_sigma=sample_sigma) 43 | self.images[n_samples] = np.empty((self.n_classes, n_samples, 44 | self.nc, self.nx, self.ny)) 45 | for c, AE in enumerate(self.AEs[:self.n_classes]): 46 | AE.eval() 47 | images = torch.sigmoid(AE.Decoder.forward(self.l_v[n_samples])).cpu().data.numpy() 48 | if weighted: 49 | images = images[:, 0, None] 50 | self.images[n_samples][c, ...] = images 51 | 52 | self.l_v[n_samples] 53 | assert n_samples not in self.th_images 54 | self.th_images[n_samples] = tensor(self.images[n_samples]).type( 55 | torch.FloatTensor).to(u.dev()) 56 | print('done creating samples') 57 | 58 | return self.images[n_samples] 59 | 60 | def n_samples_to_n_grid(self, n_samples): 61 | return int(np.round(n_samples ** (1. / self.nd))) 62 | 63 | 64 | class GaussianSamples(GridMan): 65 | def init_grid(self, n_samples, fraction_to_dismiss=0.1, 66 | mus=None, sample_sigma=1): 67 | if mus is None: 68 | mus = np.zeros(self.nd) 69 | samples = get_gaussian_samples(n_samples, self.nd, mus, 70 | fraction_to_dismiss=fraction_to_dismiss, 71 | sample_sigma=sample_sigma) 72 | self.samples[n_samples] = samples 73 | self.l_v[n_samples] = \ 74 | torch.from_numpy(samples[:, :, None, None].astype( 75 | np.float32)).to(u.dev()) 76 | 77 | def n_samples_to_n_grid(self, n_samples): 78 | return n_samples 79 | 80 | 81 | def get_gaussian_samples(n_samples, nd, mus, 82 | fraction_to_dismiss=0.1, sample_sigma=1): 83 | # returns nd coords sampled from gaussian in shape (n_samples, nd) 84 | sigmas = np.diag(np.ones(nd)) * sample_sigma 85 | g = multivariate_normal(mus, sigmas) 86 | samples = g.rvs(size=int(n_samples / (1. - fraction_to_dismiss))) 87 | probs = g.pdf(samples) 88 | thresh = np.sort(probs)[-n_samples] 89 | samples = samples[probs >= thresh] 90 | return samples 91 | -------------------------------------------------------------------------------- /madry/mnist_challenge/pgd_attack.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of attack methods. Running this file as a program will 3 | apply the attack to the model specified by the config file and store 4 | the examples in an .npy file. 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import tensorflow as tf 11 | import numpy as np 12 | 13 | 14 | class LinfPGDAttack: 15 | def __init__(self, model, epsilon, k, a, random_start, loss_func): 16 | """Attack parameter initialization. The attack performs k steps of 17 | size a, while always staying within epsilon from the initial 18 | point.""" 19 | self.model = model 20 | self.epsilon = epsilon 21 | self.k = k 22 | self.a = a 23 | self.rand = random_start 24 | 25 | if loss_func == 'xent': 26 | loss = model.xent 27 | elif loss_func == 'cw': 28 | label_mask = tf.one_hot(model.y_input, 29 | 10, 30 | on_value=1.0, 31 | off_value=0.0, 32 | dtype=tf.float32) 33 | correct_logit = tf.reduce_sum(label_mask * model.pre_softmax, axis=1) 34 | wrong_logit = tf.reduce_max((1-label_mask) * model.pre_softmax, axis=1) 35 | loss = -tf.nn.relu(correct_logit - wrong_logit + 50) 36 | else: 37 | print('Unknown loss function. Defaulting to cross-entropy') 38 | loss = model.xent 39 | 40 | self.grad = tf.gradients(loss, model.x_input)[0] 41 | 42 | def perturb(self, x_nat, y, sess): 43 | """Given a set of examples (x_nat, y), returns a set of adversarial 44 | examples within epsilon of x_nat in l_infinity norm.""" 45 | if self.rand: 46 | x = x_nat + np.random.uniform(-self.epsilon, self.epsilon, x_nat.shape) 47 | else: 48 | x = np.copy(x_nat) 49 | 50 | for i in range(self.k): 51 | grad = sess.run(self.grad, feed_dict={self.model.x_input: x, 52 | self.model.y_input: y}) 53 | 54 | x += self.a * np.sign(grad) 55 | 56 | x = np.clip(x, x_nat - self.epsilon, x_nat + self.epsilon) 57 | x = np.clip(x, 0, 1) # ensure valid pixel range 58 | 59 | return x 60 | 61 | 62 | if __name__ == '__main__': 63 | import json 64 | import sys 65 | import math 66 | 67 | from tensorflow.examples.tutorials.mnist import input_data 68 | 69 | from model import Model 70 | 71 | with open('config.json') as config_file: 72 | config = json.load(config_file) 73 | 74 | model_file = tf.train.latest_checkpoint(config['model_dir']) 75 | if model_file is None: 76 | print('No model found') 77 | sys.exit() 78 | 79 | model = Model() 80 | attack = LinfPGDAttack(model, 81 | config['epsilon'], 82 | config['k'], 83 | config['a'], 84 | config['random_start'], 85 | config['loss_func']) 86 | saver = tf.train.Saver() 87 | 88 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False) 89 | 90 | with tf.Session() as sess: 91 | # Restore the checkpoint 92 | saver.restore(sess, model_file) 93 | 94 | # Iterate over the samples batch-by-batch 95 | num_eval_examples = config['num_eval_examples'] 96 | eval_batch_size = config['eval_batch_size'] 97 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) 98 | 99 | x_adv = [] # adv accumulator 100 | 101 | print('Iterating over {} batches'.format(num_batches)) 102 | 103 | for ibatch in range(num_batches): 104 | bstart = ibatch * eval_batch_size 105 | bend = min(bstart + eval_batch_size, num_eval_examples) 106 | print('batch size: {}'.format(bend - bstart)) 107 | 108 | x_batch = mnist.test.images[bstart:bend, :] 109 | y_batch = mnist.test.labels[bstart:bend] 110 | 111 | x_batch_adv = attack.perturb(x_batch, y_batch, sess) 112 | 113 | x_adv.append(x_batch_adv) 114 | 115 | print('Storing examples') 116 | path = config['store_adv_path'] 117 | x_adv = np.concatenate(x_adv, axis=0) 118 | np.save(path, x_adv) 119 | print('Examples stored in {}'.format(path)) 120 | -------------------------------------------------------------------------------- /madry/mnist_challenge/train.py: -------------------------------------------------------------------------------- 1 | """Trains a model, saving checkpoints and tensorboard summaries along 2 | the way.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | from datetime import datetime 8 | import json 9 | import os 10 | import shutil 11 | from timeit import default_timer as timer 12 | 13 | import tensorflow as tf 14 | import numpy as np 15 | from tensorflow.examples.tutorials.mnist import input_data 16 | 17 | from model import Model 18 | from pgd_attack import LinfPGDAttack 19 | 20 | with open('config.json') as config_file: 21 | config = json.load(config_file) 22 | 23 | # Setting up training parameters 24 | tf.set_random_seed(config['random_seed']) 25 | 26 | max_num_training_steps = config['max_num_training_steps'] 27 | num_output_steps = config['num_output_steps'] 28 | num_summary_steps = config['num_summary_steps'] 29 | num_checkpoint_steps = config['num_checkpoint_steps'] 30 | 31 | batch_size = config['training_batch_size'] 32 | 33 | # Setting up the data and the model 34 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False) 35 | global_step = tf.contrib.framework.get_or_create_global_step() 36 | model = Model() 37 | 38 | # Setting up the optimizer 39 | train_step = tf.train.AdamOptimizer(1e-4).minimize(model.xent, 40 | global_step=global_step) 41 | 42 | # Set up adversary 43 | attack = LinfPGDAttack(model, 44 | config['epsilon'], 45 | config['k'], 46 | config['a'], 47 | config['random_start'], 48 | config['loss_func']) 49 | 50 | # Setting up the Tensorboard and checkpoint outputs 51 | model_dir = config['model_dir'] 52 | if not os.path.exists(model_dir): 53 | os.makedirs(model_dir) 54 | 55 | # We add accuracy and xent twice so we can easily make three types of 56 | # comparisons in Tensorboard: 57 | # - train vs eval (for a single run) 58 | # - train of different runs 59 | # - eval of different runs 60 | 61 | saver = tf.train.Saver(max_to_keep=3) 62 | tf.summary.scalar('accuracy adv train', model.accuracy) 63 | tf.summary.scalar('accuracy adv', model.accuracy) 64 | tf.summary.scalar('xent adv train', model.xent / batch_size) 65 | tf.summary.scalar('xent adv', model.xent / batch_size) 66 | tf.summary.image('images adv train', model.x_image) 67 | merged_summaries = tf.summary.merge_all() 68 | 69 | shutil.copy('config.json', model_dir) 70 | 71 | with tf.Session() as sess: 72 | # Initialize the summary writer, global variables, and our time counter. 73 | summary_writer = tf.summary.FileWriter(model_dir, sess.graph) 74 | sess.run(tf.global_variables_initializer()) 75 | training_time = 0.0 76 | 77 | # Main training loop 78 | for ii in range(max_num_training_steps): 79 | x_batch, y_batch = mnist.train.next_batch(batch_size) 80 | 81 | # Compute Adversarial Perturbations 82 | start = timer() 83 | x_batch_adv = attack.perturb(x_batch, y_batch, sess) 84 | end = timer() 85 | training_time += end - start 86 | 87 | nat_dict = {model.x_input: x_batch, 88 | model.y_input: y_batch} 89 | 90 | adv_dict = {model.x_input: x_batch_adv, 91 | model.y_input: y_batch} 92 | 93 | # Output to stdout 94 | if ii % num_output_steps == 0: 95 | nat_acc = sess.run(model.accuracy, feed_dict=nat_dict) 96 | adv_acc = sess.run(model.accuracy, feed_dict=adv_dict) 97 | print('Step {}: ({})'.format(ii, datetime.now())) 98 | print(' training nat accuracy {:.4}%'.format(nat_acc * 100)) 99 | print(' training adv accuracy {:.4}%'.format(adv_acc * 100)) 100 | if ii != 0: 101 | print(' {} examples per second'.format( 102 | num_output_steps * batch_size / training_time)) 103 | training_time = 0.0 104 | # Tensorboard summaries 105 | if ii % num_summary_steps == 0: 106 | summary = sess.run(merged_summaries, feed_dict=adv_dict) 107 | summary_writer.add_summary(summary, global_step.eval(sess)) 108 | 109 | # Write a checkpoint 110 | if ii % num_checkpoint_steps == 0: 111 | saver.save(sess, 112 | os.path.join(model_dir, 'checkpoint'), 113 | global_step=global_step) 114 | 115 | # Actual training step 116 | start = timer() 117 | sess.run(train_step, feed_dict=adv_dict) 118 | end = timer() 119 | training_time += end - start 120 | -------------------------------------------------------------------------------- /abs_models/attack_utils.py: -------------------------------------------------------------------------------- 1 | import foolbox 2 | import foolbox.attacks as fa 3 | import numpy as np 4 | import torch 5 | 6 | from abs_models import utils as u 7 | from abs_models import models 8 | 9 | 10 | def get_attack(attack, fmodel): 11 | args = [] 12 | kwargs = {} 13 | # L0 14 | if attack == 'SAPA': 15 | metric = foolbox.distances.L0 16 | A = fa.SaltAndPepperNoiseAttack(fmodel) 17 | elif attack == 'PA': 18 | metric = foolbox.distances.L0 19 | A = fa.PointwiseAttack(fmodel) 20 | 21 | # L2 22 | elif 'IGD' in attack: 23 | metric = foolbox.distances.MSE 24 | A = fa.L2BasicIterativeAttack(fmodel) 25 | elif attack == 'AGNA': 26 | metric = foolbox.distances.MSE 27 | kwargs['epsilons'] = np.linspace(0.5, 1, 50) 28 | A = fa.AdditiveGaussianNoiseAttack(fmodel) 29 | elif attack == 'BA': 30 | metric = foolbox.distances.MSE 31 | A = fa.BoundaryAttack(fmodel) 32 | elif 'DeepFool' in attack: 33 | metric = foolbox.distances.MSE 34 | A = fa.DeepFoolL2Attack(fmodel) 35 | elif attack == 'PAL2': 36 | metric = foolbox.distances.MSE 37 | A = fa.PointwiseAttack(fmodel) 38 | 39 | # L inf 40 | elif 'FGSM' in attack and not 'IFGSM' in attack: 41 | metric = foolbox.distances.Linf 42 | A = fa.FGSM(fmodel) 43 | kwargs['epsilons'] = 20 44 | 45 | elif 'IFGSM' in attack: 46 | metric = foolbox.distances.Linf 47 | A = fa.IterativeGradientSignAttack(fmodel) 48 | elif 'PGD' in attack: 49 | metric = foolbox.distances.Linf 50 | A = fa.LinfinityBasicIterativeAttack(fmodel) 51 | elif 'IGM' in attack: 52 | metric = foolbox.distances.Linf 53 | A = fa.MomentumIterativeAttack(fmodel) 54 | else: 55 | raise Exception('Not implemented') 56 | return A, metric, args, kwargs 57 | 58 | 59 | class LineSearchAttack: 60 | def __init__(self, abs_model : models.ELBOVAE): 61 | self.abs = abs_model 62 | 63 | def __call__(self, x, l, n_coarse_steps=3, n_ft_steps=10): 64 | x, l = u.n2t(x), u.n2t(l) 65 | x, l = x.to(u.dev()), l.to(u.dev()) 66 | bs = x.shape[0] 67 | best_other = 0 68 | best_advs = [{'original_label': -1, 'adversarial_label': None, 69 | 'distance': np.inf, 'img': torch.zeros(x.shape[1:]).to(u.dev())} 70 | for _ in range(bs)] 71 | coarse_steps = torch.zeros(bs).to(u.dev()) 72 | 73 | n_adv_found = 0 74 | for i, coarse_step in enumerate(torch.linspace(0, 1., n_coarse_steps).to(u.dev())): 75 | current_adv = (1 - coarse_step) * x + coarse_step * best_other 76 | best_other, current_label = self.get_best_prototypes(current_adv, l) 77 | for j, (current_adv_i, pred_l_i, l_i) in enumerate(zip(current_adv, current_label, l)): 78 | if best_advs[j]['original_label'] == -1 and pred_l_i != l_i: 79 | self.update_adv(best_advs[j], current_adv_i, pred_l_i, l_i, x[j]) 80 | coarse_steps[i] = coarse_step 81 | n_adv_found += 1 82 | if n_adv_found == bs: 83 | break 84 | best_advs_imgs = torch.cat([a['img'][None] for a in best_advs]) 85 | coarse_steps_old = coarse_steps[:, None, None, None] 86 | 87 | # binary search 88 | best_advs_imgs_old = best_advs_imgs.clone() 89 | sign, step = - torch.ones(bs, 1, 1, 1).to(u.dev()), 0.5 90 | for i in range(n_ft_steps): 91 | coarse_steps = coarse_steps_old + step * sign 92 | current_adv = (1 - coarse_steps) * x + coarse_steps * best_advs_imgs_old 93 | _, current_label = self.get_best_prototypes(current_adv, l) 94 | 95 | for j, (pred_l_i, l_i) in enumerate(zip(current_label, l)): 96 | if pred_l_i == l_i: 97 | sign[j] = 1 98 | else: 99 | self.update_adv(best_advs[j], current_adv[j], pred_l_i, l_i, x[j]) 100 | 101 | sign[j] = -1 102 | step /= 2 103 | 104 | return best_advs 105 | 106 | def get_best_prototypes(self, x: torch.Tensor, l: torch.Tensor): 107 | bs = l.shape[0] 108 | p_c, elbos, l_v_classes, reconsts = self.abs.forward(x, return_more=True) 109 | _, pred_classes = torch.max(p_c, dim=1) 110 | p_c[range(bs), l] = - np.inf 111 | _, pred_classes_other = torch.max(p_c, dim=1) 112 | best_other_reconst = reconsts[range(bs), pred_classes_other.squeeze()] 113 | best_other_reconst = self.post_process_reconst(best_other_reconst, x) 114 | 115 | return best_other_reconst, pred_classes.squeeze() 116 | 117 | def update_adv(self, best_adv, current_adv, pred_l, orig_l, orig_x): 118 | best_adv['img'] = current_adv.data.clone() 119 | best_adv['original_label'] = orig_l.cpu().numpy() 120 | best_adv['adversarial_label'] = pred_l.cpu().numpy() 121 | best_adv['distance'] = np.mean((current_adv - orig_x).cpu().numpy()**2) 122 | 123 | def post_process_reconst(self, reconst, x): 124 | return reconst 125 | 126 | 127 | class BinaryLineSearchAttack(LineSearchAttack): 128 | def post_process_reconst(self, reconst, x): 129 | return u.binary_projection(reconst, x) 130 | 131 | 132 | def update_distal_adv(a, a_up, grads, opti): 133 | a_up.data = torch.from_numpy(a) 134 | opti.zero_grad() 135 | a_up.grad = torch.from_numpy(grads) 136 | opti.step() 137 | a_up.data.clamp_(0, 1) 138 | a = a_up.data.numpy() 139 | return a 140 | -------------------------------------------------------------------------------- /scripts/attacks.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext_format_version: '1.2' 4 | # kernelspec: 5 | # display_name: Python 3 6 | # language: python 7 | # name: python3 8 | # language_info: 9 | # codemirror_mode: 10 | # name: ipython 11 | # version: 3 12 | # file_extension: .py 13 | # mimetype: text/x-python 14 | # name: python 15 | # nbconvert_exporter: python 16 | # pygments_lexer: ipython3 17 | # version: 3.6.4 18 | # --- 19 | 20 | # + 21 | import sys 22 | sys.path.insert(0, './../') 23 | # %load_ext autoreload 24 | # %autoreload 2 25 | # %matplotlib inline 26 | 27 | import torch 28 | from torchvision import datasets, transforms 29 | 30 | import numpy as np 31 | from matplotlib import pyplot as plt 32 | import foolbox 33 | from foolbox import attacks as fa 34 | 35 | # own modules 36 | from abs_models import utils as u 37 | from abs_models import models as mz 38 | from abs_models import attack_utils as au 39 | # - 40 | 41 | model = mz.get_VAE(n_iter=10) # ABS, do n_iter=50 for original model 42 | # model = mz.get_VAE(binary=True) # ABS with scaling and binaryzation 43 | # model = mz.get_binary_CNN() # Binary CNN 44 | # model = mz.get_CNN() # Vanilla CNN 45 | # model = mz.get_NearestNeighbor() # Nearest Neighbor, "nearest L2 dist to each class"=logits 46 | # model = mz.get_madry() # Robust network from Madry et al. in tf 47 | 48 | # code is agnostic of pytorch/ tensorflow model --> foolbox model 49 | if model.code_base == 'tensorflow': 50 | fmodel = foolbox.models.TensorFlowModel(model.x_input, model.pre_softmax, (0., 1.), 51 | channel_axis=3) 52 | elif model.code_base == 'pytorch': 53 | model.eval() 54 | fmodel = foolbox.models.PyTorchModel(model, # return logits in shape (bs, n_classes) 55 | bounds=(0., 1.), num_classes=10, 56 | device=u.dev()) 57 | else: 58 | print('not implemented') 59 | 60 | # test model 61 | b, l = u.get_batch(bs=10000) # returns random batch as np.array 62 | pred_label = np.argmax(fmodel.batch_predictions(b), axis=1) 63 | print('score', float(np.sum(pred_label == l)) / b.shape[0]) 64 | 65 | # # Decision based attacks 66 | # Note that this is only demo code. All experiments were optimized to our compute architecture. 67 | 68 | b, l = u.get_batch(bs=1) # returns random batch 69 | 70 | # + 71 | import time 72 | start = time.time() 73 | att = fa.DeepFoolL2Attack(fmodel) 74 | metric = foolbox.distances.MSE 75 | criterion = foolbox.criteria.Misclassification() 76 | 77 | plt.imshow(b[0, 0], cmap='gray') 78 | plt.title('orig') 79 | plt.axis('off') 80 | plt.show() 81 | 82 | # Estimate gradients from scores 83 | if not model.has_grad: 84 | GE = foolbox.gradient_estimators.CoordinateWiseGradientEstimator(0.1) 85 | fmodel = foolbox.models.ModelWithEstimatedGradients(fmodel, GE) 86 | 87 | # gernate Adversarial 88 | a = foolbox.adversarial.Adversarial(fmodel, criterion, b[0], l[0], distance=metric) 89 | att(a) 90 | 91 | print('runtime', time.time() - start, 'seconds') 92 | print('pred', np.argmax(fmodel.predictions(a.image))) 93 | if a.image is not None: # attack was successful 94 | plt.imshow(a.image[0], cmap='gray') 95 | plt.title('adv') 96 | plt.axis('off') 97 | plt.show() 98 | # - 99 | 100 | # # get Trash Adversarials 101 | 102 | from foolbox.gradient_estimators import CoordinateWiseGradientEstimator as CWGE 103 | 104 | a = np.random.random((1, 28, 28)).astype(np.float32) 105 | a_helper = torch.tensor(torch.from_numpy(a.copy()), requires_grad=True) 106 | fixed_class = 1 107 | GE = CWGE(1.) 108 | 109 | opti = torch.optim.SGD([a_helper], lr=1, momentum=0.95) 110 | 111 | # + 112 | confidence_level = model.confidence_level # abs 0.0000031, CNN 1439000, madry 60, 1-NN 0.000000000004 113 | logits_scale = model.logit_scale # ABS 430, madry 1, CNN 1, 1-NN 5 114 | 115 | a_orig = a 116 | plt.imshow(u.t2n(a[0]), cmap='gray') 117 | plt.show() 118 | 119 | for i in range(10000): 120 | logits = fmodel.predictions(a) 121 | probs = u.t2n(u.confidence_softmax(logits_scale*torch.from_numpy(logits[None, :]), dim=1, 122 | const=confidence_level))[0] 123 | pred_class = np.argmax(u.t2n(logits).squeeze()) 124 | 125 | if probs[fixed_class]>= 0.9: 126 | break 127 | grads = GE(fmodel.batch_predictions, a, fixed_class, (0,1)) 128 | 129 | a = au.update_distal_adv(a, a_helper, grads, opti) 130 | if i % 1000 == 0: 131 | print(f'probs {probs[pred_class]:.3f} class', pred_class) 132 | fig, ax = plt.subplots(1,3, squeeze=False, figsize=(10, 4)) 133 | ax[0, 0].imshow(u.t2n(a[0]), cmap='gray') 134 | ax[0, 1].imshow(u.t2n(grads[0]), cmap='gray') 135 | ax[0, 2].imshow(np.sign(grads[0]), cmap='gray') 136 | plt.show() 137 | plt.imshow(u.t2n(a[0]), cmap='gray') 138 | plt.show() 139 | # - 140 | 141 | # # Latent Descent Attack 142 | 143 | # + 144 | # only for abs 145 | att = au.LineSearchAttack(model) # BinaryLineSearchAttack 146 | b, l = u.get_batch(bs=200) 147 | 148 | advs = att(b, l, n_coarse_steps=50+1, n_ft_steps=2) 149 | 150 | for adv in advs: 151 | adv['img'] = adv['img'].cpu().numpy() 152 | 153 | for i, (a_i, b_i) in enumerate(zip(advs, b)): 154 | l2 = np.sqrt(a_i['distance'] * 784) # convert from MSE 155 | 156 | fig, ax = plt.subplots(1, 2, squeeze=False) 157 | ax[0, 0].set_title(str(a_i['original_label'])) 158 | ax[0, 0].imshow(u.t2n(b_i[0]), cmap='gray') 159 | ax[0, 1].set_title(str(a_i['adversarial_label'])) 160 | ax[0, 1].imshow(u.t2n(a_i['img'][0]), cmap='gray') 161 | plt.show() 162 | if i ==10: 163 | break 164 | print('mean L2', np.mean([np.sqrt(a_i['distance'] * 784) for a_i in advs])) 165 | -------------------------------------------------------------------------------- /madry/mnist_challenge/eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Infinite evaluation loop going through the checkpoints in the model directory 3 | as they appear and evaluating them. Accuracy and average loss are printed and 4 | added as tensorboard summaries. 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from datetime import datetime 11 | import json 12 | import math 13 | import os 14 | import sys 15 | import time 16 | 17 | import tensorflow as tf 18 | from tensorflow.examples.tutorials.mnist import input_data 19 | 20 | from model import Model 21 | from pgd_attack import LinfPGDAttack 22 | 23 | # Global constants 24 | with open('config.json') as config_file: 25 | config = json.load(config_file) 26 | num_eval_examples = config['num_eval_examples'] 27 | eval_batch_size = config['eval_batch_size'] 28 | eval_on_cpu = config['eval_on_cpu'] 29 | 30 | model_dir = config['model_dir'] 31 | 32 | # Set upd the data, hyperparameters, and the model 33 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False) 34 | 35 | if eval_on_cpu: 36 | with tf.device("/cpu:0"): 37 | model = Model() 38 | attack = LinfPGDAttack(model, 39 | config['epsilon'], 40 | config['k'], 41 | config['a'], 42 | config['random_start'], 43 | config['loss_func']) 44 | else: 45 | model = Model() 46 | attack = LinfPGDAttack(model, 47 | config['epsilon'], 48 | config['k'], 49 | config['a'], 50 | config['random_start'], 51 | config['loss_func']) 52 | 53 | global_step = tf.contrib.framework.get_or_create_global_step() 54 | 55 | # Setting up the Tensorboard and checkpoint outputs 56 | if not os.path.exists(model_dir): 57 | os.makedirs(model_dir) 58 | eval_dir = os.path.join(model_dir, 'eval') 59 | if not os.path.exists(eval_dir): 60 | os.makedirs(eval_dir) 61 | 62 | last_checkpoint_filename = '' 63 | already_seen_state = False 64 | 65 | saver = tf.train.Saver() 66 | summary_writer = tf.summary.FileWriter(eval_dir) 67 | 68 | # A function for evaluating a single checkpoint 69 | def evaluate_checkpoint(filename): 70 | with tf.Session() as sess: 71 | # Restore the checkpoint 72 | saver.restore(sess, filename) 73 | 74 | # Iterate over the samples batch-by-batch 75 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) 76 | total_xent_nat = 0. 77 | total_xent_adv = 0. 78 | total_corr_nat = 0 79 | total_corr_adv = 0 80 | 81 | for ibatch in range(num_batches): 82 | bstart = ibatch * eval_batch_size 83 | bend = min(bstart + eval_batch_size, num_eval_examples) 84 | 85 | x_batch = mnist.test.images[bstart:bend, :] 86 | y_batch = mnist.test.labels[bstart:bend] 87 | 88 | dict_nat = {model.x_input: x_batch, 89 | model.y_input: y_batch} 90 | 91 | x_batch_adv = attack.perturb(x_batch, y_batch, sess) 92 | 93 | dict_adv = {model.x_input: x_batch_adv, 94 | model.y_input: y_batch} 95 | 96 | cur_corr_nat, cur_xent_nat = sess.run( 97 | [model.num_correct,model.xent], 98 | feed_dict = dict_nat) 99 | cur_corr_adv, cur_xent_adv = sess.run( 100 | [model.num_correct,model.xent], 101 | feed_dict = dict_adv) 102 | 103 | total_xent_nat += cur_xent_nat 104 | total_xent_adv += cur_xent_adv 105 | total_corr_nat += cur_corr_nat 106 | total_corr_adv += cur_corr_adv 107 | 108 | avg_xent_nat = total_xent_nat / num_eval_examples 109 | avg_xent_adv = total_xent_adv / num_eval_examples 110 | acc_nat = total_corr_nat / num_eval_examples 111 | acc_adv = total_corr_adv / num_eval_examples 112 | 113 | summary = tf.Summary(value=[ 114 | tf.Summary.Value(tag='xent adv eval', simple_value= avg_xent_adv), 115 | tf.Summary.Value(tag='xent adv', simple_value= avg_xent_adv), 116 | tf.Summary.Value(tag='xent nat', simple_value= avg_xent_nat), 117 | tf.Summary.Value(tag='accuracy adv eval', simple_value= acc_adv), 118 | tf.Summary.Value(tag='accuracy adv', simple_value= acc_adv), 119 | tf.Summary.Value(tag='accuracy nat', simple_value= acc_nat)]) 120 | summary_writer.add_summary(summary, global_step.eval(sess)) 121 | 122 | print('natural: {:.2f}%'.format(100 * acc_nat)) 123 | print('adversarial: {:.2f}%'.format(100 * acc_adv)) 124 | print('avg nat loss: {:.4f}'.format(avg_xent_nat)) 125 | print('avg adv loss: {:.4f}'.format(avg_xent_adv)) 126 | 127 | # Infinite eval loop 128 | while True: 129 | cur_checkpoint = tf.train.latest_checkpoint(model_dir) 130 | 131 | # Case 1: No checkpoint yet 132 | if cur_checkpoint is None: 133 | if not already_seen_state: 134 | print('No checkpoint yet, waiting ...', end='') 135 | already_seen_state = True 136 | else: 137 | print('.', end='') 138 | sys.stdout.flush() 139 | time.sleep(10) 140 | # Case 2: Previously unseen checkpoint 141 | elif cur_checkpoint != last_checkpoint_filename: 142 | print('\nCheckpoint {}, evaluating ... ({})'.format(cur_checkpoint, 143 | datetime.now())) 144 | sys.stdout.flush() 145 | last_checkpoint_filename = cur_checkpoint 146 | already_seen_state = False 147 | evaluate_checkpoint(cur_checkpoint) 148 | # Case 3: Previously evaluated checkpoint 149 | else: 150 | if not already_seen_state: 151 | print('Waiting for the next checkpoint ... ({}) '.format( 152 | datetime.now()), 153 | end='') 154 | already_seen_state = True 155 | else: 156 | print('.', end='') 157 | sys.stdout.flush() 158 | time.sleep(10) 159 | -------------------------------------------------------------------------------- /abs_models/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import numpy as np 4 | import time 5 | 6 | 7 | def get_batch(bs=1): 8 | loader = torch.utils.data.DataLoader( 9 | torchvision.datasets.MNIST('./../data/MNIST/', train=False, download=True, 10 | transform=torchvision.transforms.ToTensor()), 11 | batch_size=bs, shuffle=True) 12 | b, l = next(iter(loader)) 13 | return t2n(b), t2n(l) 14 | 15 | 16 | def clip_to_sphere(tens, radius, channel_dim=1): 17 | radi2 = torch.sum(tens**2, dim=channel_dim, keepdim=True) 18 | mask = torch.gt(radi2, radius**2).expand_as(tens) 19 | tens[mask] = torch.sqrt( 20 | tens[mask]**2 / radi2.expand_as(tens)[mask] * radius**2) 21 | return tens 22 | 23 | 24 | def binarize(tens, thresh=0.5): 25 | if isinstance(tens, torch.Tensor): 26 | tens = tens.clone() 27 | else: 28 | tens = np.copy(tens) 29 | tens[tens < thresh] = 0. 30 | tens[tens >= thresh] = 1. 31 | return tens 32 | 33 | 34 | def tens2numpy(tens): 35 | if tens.is_cuda: 36 | tens = tens.cpu() 37 | if tens.requires_grad: 38 | tens = tens.detach() 39 | return tens.numpy() 40 | 41 | 42 | def t2n(tens): 43 | if isinstance(tens, np.ndarray): 44 | return tens 45 | elif isinstance(tens, list): 46 | return np.array(tens) 47 | elif isinstance(tens, float) or isinstance(tens, int): 48 | return np.array([tens]) 49 | else: 50 | return tens2numpy(tens) 51 | 52 | 53 | def n2t(tens): 54 | return torch.from_numpy(tens).to(dev()) 55 | 56 | 57 | class LinearActFct(torch.nn.Module): 58 | def forward(self, input): 59 | return input 60 | 61 | def __repr__(self): 62 | return self.__class__.__name__ 63 | 64 | 65 | def tsum(input, axes=None, keepdim=False): 66 | if axes is None: 67 | axes = range(len(input.size())) 68 | 69 | # probably some check for uniqueness of axes 70 | if keepdim: 71 | for ax in axes: 72 | input = input.sum(ax, keepdim=True) 73 | else: 74 | for ax in sorted(axes, reverse=True): 75 | input = input.sum(ax, keepdim=False) 76 | 77 | return input 78 | 79 | 80 | def tlog(x): 81 | if isinstance(x, float): 82 | return np.log(x) 83 | elif isinstance(x, int): 84 | return np.log(float(x)) 85 | else: 86 | return torch.log(x) 87 | 88 | 89 | def best_other(logits, gt_label): 90 | best_other = np.argsort(logits) 91 | best_other = best_other[best_other != gt_label][-1] 92 | return best_other 93 | 94 | 95 | def L2(a, b, axes=None): 96 | if len(a.shape) != len(b.shape): 97 | print(a.shape, b.shape) 98 | raise(Exception('broadcasting not possible')) 99 | L2_dist = torch.sqrt(tsum((a - b)**2, axes=axes)) 100 | return L2_dist 101 | 102 | 103 | def auto_batch(max_batch_size, f, xs, *args, verbose=False, **kwargs): 104 | """Will automatically pass list subxbatches of xs to f. 105 | f must return torch tensors""" 106 | if not isinstance(xs, list): 107 | xs = [xs] 108 | n = xs[0].shape[0] 109 | y = [] 110 | for start in range(0, n, max_batch_size): 111 | xb = [x[start:start + max_batch_size] for x in xs] 112 | yb = f(*xb, *args, **kwargs) 113 | y.append(yb) 114 | if not isinstance(yb, tuple): 115 | y = torch.cat(y) 116 | assert y.shape[0] == n 117 | return y 118 | else: 119 | return (torch.cat(y_i) for y_i in list(zip(*y))) 120 | 121 | 122 | def timeit(method): 123 | def timed(*args, **kw): 124 | ts = time.time() 125 | result = method(*args, **kw) 126 | te = time.time() 127 | print('%r %2.2f ms' % (method.__name__, (te - ts) * 1000)) 128 | return result 129 | return timed 130 | 131 | 132 | def t_loop_collect(fct, iter_obj, *args, concat_dim=1, **kwargs): 133 | all_outs = [] 134 | for obj in iter_obj: 135 | outs = fct(obj, *args, **kwargs) 136 | all_outs.append(outs) 137 | all_outs = list(map(list, zip(*all_outs))) 138 | all_outs = [torch.cat(out, dim=concat_dim) for out in all_outs] 139 | return all_outs 140 | 141 | def dev(): 142 | if torch.cuda.is_available(): 143 | return 'cuda:0' 144 | else: 145 | return 'cpu' 146 | 147 | 148 | def y_2_one_hot(y, n_classes=10): 149 | assert len(y.shape) == 1 150 | y_one_hot = torch.FloatTensor(y.shape[0], n_classes).to(dev()) 151 | y_one_hot.zero_() 152 | return y_one_hot.scatter_(1, y[:, None], 1) 153 | 154 | 155 | def confidence_softmax(x, const=0, dim=1): 156 | x = torch.exp(x) 157 | n_classes = x.shape[1] 158 | # return x 159 | norms = torch.sum(x, dim=dim, keepdim=True) 160 | return (x + const) / (norms + const * n_classes) 161 | 162 | 163 | def cross_entropy(label, logits): 164 | """Calculates the cross-entropy. 165 | logits: np.array with shape (bs, n_classes) 166 | label: np.array with shape (bs) 167 | 168 | """ 169 | assert label.shape[0] == logits.shape[0] 170 | assert len(logits.shape) == 2 171 | 172 | # for numerical reasons we subtract the max logit 173 | # (mathematically it doesn't matter!) 174 | # otherwise exp(logits) might become too large or too small 175 | logits = logits - np.max(logits, axis=1)[:, None] 176 | e = np.exp(logits) 177 | s = np.sum(e, axis=1) 178 | ce = np.log(s) - logits[np.arange(label.shape[0]), label] 179 | return ce 180 | 181 | 182 | def show_gpu_usages(thresh=100000): 183 | tmp = 0 184 | import gc 185 | for obj in gc.get_objects(): 186 | try: 187 | if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): 188 | if obj.is_cuda and np.prod(obj.shape) > thresh \ 189 | and not isinstance(obj, torch.nn.parameter.Parameter): 190 | tmp += 1 191 | print(type(obj), list(obj.size()), obj.dtype, obj.is_cuda, 192 | np.prod(obj.shape), tmp) 193 | except: 194 | pass 195 | print() 196 | 197 | 198 | def binary_projection(rec, orig): 199 | # rec > 0.5 and orig > 0.5 --> rec[mask] = orig[mask] 200 | mask = [(rec >= 0.5) & (orig >= 0.5)] 201 | rec[mask] = orig[mask] 202 | # both smaller 0.5 203 | mask = [(rec < 0.5) & (orig < 0.5)] 204 | rec[mask] = orig[mask] 205 | 206 | # rec > 0.5 and orig < 0.5 --> rec[mask] 0.5 207 | rec[(rec >= 0.5) & (orig < 0.5)] = 0.5 208 | rec[(rec < 0.5) & (orig >= 0.5)] = 0.49999 209 | return rec 210 | 211 | 212 | 213 | pass 214 | -------------------------------------------------------------------------------- /abs_models/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import optim 3 | from torch.nn import functional as F 4 | from torch import tensor 5 | import numpy as np 6 | 7 | from abs_models import utils as u 8 | from abs_models import loss_functions 9 | 10 | 11 | def inference(AEs, x_inp, n_samples, n_iter, beta, GM, fraction_to_dismiss=0.1, lr=0.01, 12 | n_classes=10, nd=8, clip=2, GD_inference_b=True, 13 | dist_fct=loss_functions.squared_L2_loss): 14 | 15 | if n_iter == 0: 16 | GD_inference_b = False 17 | 18 | tmp_bs = x_inp.size()[0] 19 | 20 | # get_images has built-in caching 21 | if n_samples not in GM.th_images: 22 | print('setting random seed') 23 | # fix random numbers for attacks 24 | torch.cuda.manual_seed_all(999) 25 | torch.manual_seed(1234) 26 | np.random.seed(1234) 27 | # generate a bunch of samples for each VAE 28 | GM.get_images(n_samples, fraction_to_dismiss) 29 | 30 | # use caching for conversion to torch 31 | x_test_samples = GM.th_images[n_samples] 32 | 33 | # calculate the likelihood for all samples 34 | with torch.no_grad(): 35 | bs, n_ch, nx, ny = x_inp.shape 36 | n_samples, n_latent = GM.l_v[n_samples].shape[-4:-2] 37 | 38 | all_ELBOs = \ 39 | [loss_functions.ELBOs2(x_inp, recs.detach(), GM.l_v[n_samples], beta) 40 | for recs in x_test_samples] 41 | all_ELBOs = torch.stack(all_ELBOs, dim=1) 42 | 43 | x_inp = x_inp.view(bs, n_ch, nx, ny) 44 | 45 | # tmp save memory 46 | # GM.th_images[n_samples] = GM.th_images[n_samples].cpu() 47 | # GM.l_v[n_samples] = GM.l_v[n_samples].cpu() 48 | 49 | # select the best prototype for each VAE 50 | min_val_c, min_val_c_args = torch.min(all_ELBOs, dim=2) 51 | indices = min_val_c_args.view(tmp_bs * n_classes) 52 | # l_v_best shape: (bs, n_classes, 8, 1, 1) 53 | l_v_best = GM.l_v[n_samples][indices].view(tmp_bs, n_classes, nd, 1, 1) 54 | 55 | if GD_inference_b: # gradient descent in latent space 56 | return GD_inference(AEs, l_v_best.data, x_inp.data, 57 | clip=clip, lr=lr, n_iter=n_iter, beta=beta, dist_fct=dist_fct) 58 | else: 59 | if tmp_bs == 1: 60 | all_recs = GM.images[n_samples][list(range(n_classes)), u.t2n(indices), :, :, :] 61 | else: 62 | all_recs = None 63 | return min_val_c, l_v_best, all_recs 64 | 65 | 66 | def GD_inference(AEs, l_v_best, x_inp, clip=5, lr=0.01, n_iter=20, 67 | beta=1, dist_fct=loss_functions.squared_L2_loss): 68 | n_classes = len(AEs) 69 | 70 | # l_v_best are the latents 71 | # has shape (batch_size, n_classes == 10, n_latents == 8) + singleton dims 72 | 73 | # do gradient descent w.r.t. ELBO in latent space starting from l_v_best 74 | def gd_inference_b(l_v_best, x_inp, AEs, n_classes=10, clip=5, lr=0.01, n_iter=20, 75 | beta=1, dist_fct=loss_functions.squared_L2_loss): 76 | 77 | bs, n_ch, nx, ny = x_inp.shape 78 | with torch.enable_grad(): 79 | l_v_best = l_v_best.data.clone().detach().requires_grad_(True).to(u.dev()) 80 | opti = optim.Adam([l_v_best], lr=lr) 81 | for i in range(n_iter): 82 | ELBOs = [] 83 | all_recs = [] 84 | for j in range(n_classes): 85 | if i == n_iter - 1: 86 | l_v_best = l_v_best.detach() # no gradients in last run 87 | AEs[j].eval() 88 | 89 | rec = torch.sigmoid(AEs[j].Decoder.forward(l_v_best[:, j])) 90 | 91 | ELBOs.append(loss_functions.ELBOs(rec, # (bs, n_ch, nx, ny) 92 | l_v_best[:, j], # (bs, n_latent, 1, 1) 93 | x_inp, # (bs, n_ch, nx, ny) 94 | beta=beta, 95 | dist_fct=dist_fct)) 96 | if i == n_iter - 1: 97 | all_recs.append(rec.view(bs, 1, n_ch, nx, ny).detach()) 98 | 99 | ELBOs = torch.cat(ELBOs, dim=1) 100 | if i < n_iter - 1: 101 | loss = (torch.sum(ELBOs)) - 8./784./2 # historic reasons 102 | # backward 103 | opti.zero_grad() 104 | loss.backward() 105 | opti.step() 106 | l_v_best.data = u.clip_to_sphere(l_v_best.data, clip, channel_dim=2) 107 | else: 108 | opti.zero_grad() 109 | all_recs = torch.cat(all_recs, dim=1) 110 | 111 | return ELBOs.detach(), l_v_best.detach(), all_recs 112 | 113 | ELBOs, l_v_best, all_recs = u.auto_batch(1000, gd_inference_b, [l_v_best, x_inp], AEs, 114 | n_classes=n_classes, clip=clip, lr=lr, 115 | n_iter=n_iter, beta=beta, dist_fct=dist_fct) 116 | 117 | return ELBOs, l_v_best, all_recs 118 | 119 | 120 | # pytorch 1.0: 121 | # def GD_inference_new(AEs, l_v_best, x_inp, clip=5, lr=0.01, n_iter=20, 122 | # beta=1, dist_fct=loss_functions.squared_L2_loss): 123 | # n_classes = len(AEs) 124 | # 125 | # # l_v_best are the latents 126 | # # have shape (batch_size, n_classes == 10, n_latents == 8) + singleton dims 127 | # 128 | # # do gradient descent w.r.t. ELBO in latent space starting from l_v_best 129 | # def gd_inference_b(l_v_best, x_inp, AEs, clip=5, lr=0.01, n_iter=20, 130 | # beta=1, dist_fct=loss_functions.squared_L2_loss): 131 | # 132 | # with torch.enable_grad(): 133 | # l_v_best = l_v_best.data.clone().detach().requires_grad_(True).to(u.dev()) 134 | # opti = optim.Adam([l_v_best], lr=lr) 135 | # for i in range(n_iter): 136 | # recs = torch.nn.parallel.parallel_apply( 137 | # [AE.Decoder.forward for AE in AEs], 138 | # [best_latent for best_latent in l_v_best.transpose(0, 1)]) 139 | # recs = torch.nn.functional.sigmoid(torch.stack(recs, dim=1)) 140 | # ELBOs = loss_functions.ELBOs(recs, l_v_best, x_inp[:, None], beta=beta, 141 | # dist_fct=dist_fct)[..., 0] 142 | # 143 | # if i < n_iter - 1: 144 | # loss = (torch.sum(ELBOs)) - 8./784./2 # historic reasons 145 | # # backward 146 | # opti.zero_grad() 147 | # loss.backward() 148 | # opti.step() 149 | # l_v_best.data = u.clip_to_sphere(l_v_best.data, clip, channel_dim=2) 150 | # else: 151 | # opti.zero_grad() 152 | # 153 | # return ELBOs.detach(), l_v_best.detach(), recs.detach() 154 | # 155 | # ELBOs, l_v_best, all_recs = u.auto_batch(2000, gd_inference_b, [l_v_best, x_inp], AEs, 156 | # clip=clip, lr=lr, n_iter=n_iter, beta=beta, dist_fct=dist_fct) 157 | # 158 | # return ELBOs, l_v_best, all_recs 159 | -------------------------------------------------------------------------------- /abs_models/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from matplotlib import pyplot as plt 4 | from PIL import Image 5 | 6 | from abs_models import utils as u 7 | 8 | 9 | def visualize_image(ax, im, title=None, clear=False, **kwargs): 10 | if clear: 11 | ax.cla() 12 | ax.imshow(im, **kwargs) 13 | if title is not None: 14 | ax.set_title(title) 15 | ax.axis('off') 16 | return(ax) 17 | 18 | 19 | def plot(ax, y_datas, x_data=None, title=None, clear=True, 20 | scale=None, legend=None): 21 | if not any(isinstance(i, list) for i in y_datas): 22 | y_datas = [y_datas] 23 | if clear: 24 | ax.clear() 25 | if x_data is None: 26 | x_data = range(len(y_datas[0])) 27 | 28 | # acutal plotting 29 | plots = [] 30 | for y_data in y_datas: 31 | pl, = ax.plot(x_data, y_data) 32 | plots.append(pl) 33 | 34 | if legend: 35 | ax.legend(plots, legend) 36 | if scale is not None: 37 | ax.set_yscale(scale) 38 | if title is not None: 39 | ax.set_title(title) 40 | return ax 41 | 42 | 43 | def scatter(ax, x_data, y_data, title=None, clear=True): 44 | if clear: 45 | ax.clear() 46 | ax.scatter(x_data, y_data) 47 | if title is not None: 48 | ax.set_title(title) 49 | 50 | 51 | def subplots(*args, height=6, width=15, **kwargs): 52 | fig, ax = plt.subplots(*args, squeeze=False, **kwargs) 53 | if height is not None: 54 | fig.set_figheight(height) 55 | if width is not None: 56 | fig.set_figwidth(width) 57 | return fig, ax 58 | 59 | 60 | class Visualizer: 61 | def __init__(self): 62 | self.plots = {} 63 | self.i = -1 64 | self.reset() 65 | 66 | def reset(self): 67 | self.ny = 4 68 | self.nx = 4 69 | fig = plt.figure() 70 | plt.ion() 71 | fig.show() 72 | fig.canvas.draw() 73 | 74 | self.fig = fig 75 | self.i = 0 76 | # for key in self.plots.keys(): 77 | # self.plots[key].ax = self.get_next_plot() 78 | 79 | def add_scalar(self, name, y, x): 80 | y = u.t2n(y) 81 | if name in self.plots.keys(): 82 | self.plots[name].x.append(x) 83 | self.plots[name].y.append(y) 84 | else: 85 | self.plots[name] = PlotObj(x, y, self.get_next_plot()) 86 | self.plots[name].ax.clear() 87 | plot(self.plots[name].ax, self.plots[name].y, 88 | self.plots[name].x, title=name) 89 | self.fig.canvas.draw() 90 | 91 | def add_image(self, name, img, x): 92 | if not isinstance(img, np.ndarray): 93 | img = u.t2n(img) 94 | img = img.squeeze() 95 | if name not in self.plots.keys(): 96 | self.plots[name] = self.plots[name] \ 97 | = PlotObj(0, 0, self.get_next_plot()) 98 | visualize_image(self.plots[name].ax, img, title=name, cmap='gray') 99 | 100 | def get_next_plot(self): 101 | self.i += 1 102 | ax = self.fig.add_subplot(self.nx, self.ny, self.i) 103 | return ax 104 | 105 | 106 | class PlotObj: 107 | def __init__(self, x, y, ax): 108 | self.x = [x] 109 | self.y = [y] 110 | self.ax = ax 111 | 112 | 113 | # visualize hidden space 114 | class RobNNVisualisor(object): 115 | def __init__(self): 116 | self.xl = [] 117 | self.yl = [] 118 | self.cl = [] 119 | 120 | def generate_data(self, model, loader, cuda=False): 121 | for i, (test_data, test_label) in enumerate(loader): 122 | if i == int(np.ceil(400 / loader.batch_size)): 123 | break 124 | x = test_data 125 | yt = test_label 126 | x = x.to(u.dev()) 127 | model.forward(x) 128 | latent = model.latent.cpu().data.numpy().swapaxes(0, 1).squeeze() 129 | self.xl += latent[0].tolist() 130 | self.yl += latent[1].tolist() 131 | self.cl += yt.data.numpy().tolist() 132 | 133 | def visualize_hidden_space(self, fig, ax, model=None, 134 | loader=None, cuda=False, 135 | reload=False, colorbar=False): 136 | if self.xl == [] or reload: 137 | self.generate_data(model, loader, cuda=cuda) 138 | cmap = plt.cm.get_cmap("viridis", 10) 139 | 140 | pl = ax.scatter(self.xl, self.yl, c=self.cl, label=self.cl, 141 | vmin=-0.5, vmax=9.5, cmap=cmap) 142 | 143 | if colorbar: 144 | fig.colorbar(pl, ax=ax, ticks=range(10)) 145 | return ax 146 | 147 | 148 | def fig2img(fig): 149 | """ 150 | @brief Convert a Matplotlib figure to a PIL Image in RGBA format 151 | and return it 152 | @param fig a matplotlib figure 153 | @return a Python Imaging Library ( PIL ) image 154 | """ 155 | # put the figure pixmap into a numpy array 156 | buf = fig2data(fig) 157 | w, h, d = buf.shape 158 | return Image.frombytes("RGBA", (w, h), buf.tostring()) 159 | 160 | 161 | def fig2data(fig): 162 | """ 163 | @brief Convert a Matplotlib figure to a 4D numpy array with 164 | RGBA channels and return it 165 | @param fig a matplotlib figure 166 | @return a numpy 3D array of RGBA values 167 | """ 168 | # draw the renderer 169 | fig.canvas.draw() 170 | 171 | # Get the RGBA buffer from the figure 172 | w, h = fig.canvas.get_width_height() 173 | buf = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8) 174 | buf.shape = (w, h, 4) 175 | 176 | # canvas.tostring_argb give pixmap in ARGB mode. 177 | # Roll the ALPHA channel to have it in RGBA mode 178 | buf = np.roll(buf, 3, axis=2) 179 | return buf 180 | 181 | 182 | # adapted from https://github.com/lanpa/tensorboard-pytorch 183 | def tens2scattters(tens, lims=None, labels=None): 184 | tens_np = u.tens2numpy(tens) 185 | labels = u.tens2numpy(labels) 186 | 187 | # draw 188 | fig = plt.figure() 189 | ax = plt.gca() 190 | ax.scatter(tens_np[0], tens_np[1], c=labels) 191 | plt.axis('scaled') 192 | if lims is not None: 193 | ax.set_xlim(lims[0], lims[1]) 194 | ax.set_ylim(lims[0], lims[1]) 195 | return fig2data(fig) 196 | 197 | 198 | def fig2data(fig): 199 | fig.canvas.draw() 200 | # Now we can save it to a numpy array. 201 | data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) 202 | data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) 203 | plt.close() 204 | return data 205 | 206 | 207 | def visualize_latent_distr(CNN, nd, limit=2, n_grid=100): 208 | limit = 2 209 | n_grid = 100 210 | fig, ax = subplots(1, 1, width=7, height=6) 211 | fig.subplots_adjust(right=0.8) 212 | grids = [(np.linspace(-limit, limit, n_grid)) for i in range(nd)] 213 | xys = np.array(np.meshgrid(*grids)) 214 | xys = np.moveaxis(xys, 0, -1).reshape(n_grid ** nd, nd) 215 | outs = CNN.forward(torch.from_numpy(xys[:, :, None, None]).type(torch.cuda.FloatTensor)) # noqa: E501 216 | outs = u.t2n(outs.squeeze()) 217 | sc = ax[0, 0].scatter(xys[:, 0], xys[:, 1], c=(outs - np.min(outs)) / (np.max(outs) - np.min(outs))) # noqa: E501 218 | cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) 219 | fig.colorbar(sc, cax=cbar_ax) 220 | return fig2data(fig) 221 | 222 | 223 | if __name__ == '__main__': 224 | fig, ax = subplots(2) 225 | print(ax) 226 | -------------------------------------------------------------------------------- /abs_models/models.py: -------------------------------------------------------------------------------- 1 | from os.path import join, dirname 2 | import torch 3 | from torch import nn 4 | from torchvision import datasets, transforms 5 | 6 | from abs_models import utils as u 7 | from abs_models import nets 8 | from abs_models.inference import inference 9 | from abs_models import sampling 10 | 11 | DEFAULT_PATH = dirname(__file__) 12 | 13 | 14 | class ELBOVAE(nn.Module): 15 | def __init__(self, AEs, n_samples, n_iter, beta, GM, 16 | fraction_to_dismiss=0.1, clip=5, lr=0.05): 17 | 18 | super().__init__() 19 | self.AEs = AEs 20 | for i, AE in enumerate(self.AEs): 21 | self.add_module(f'VAE_{i}', AE) 22 | self.n_samples = n_samples 23 | self.n_iter = n_iter 24 | self.beta = beta 25 | self.GM = GM 26 | self.fraction_to_dismiss = fraction_to_dismiss 27 | self.clip = clip 28 | self.lr = lr 29 | self.logit_scale = 440 30 | self.confidence_level = 0.000039 31 | self.name_check = 'MNIST_MSE' 32 | 33 | def forward(self, x, return_more=False): 34 | # assert (torch.ge(x, 0).all()) 35 | # assert (torch.le(x, 1).all()) 36 | 37 | ELBOs, l_v_classes, reconsts = inference(self.AEs, x, self.n_samples, self.n_iter, 38 | self.beta, self.GM, self.fraction_to_dismiss, 39 | clip=self.clip, lr=self.lr) 40 | ELBOs = self.rescale(ELBOs) # class specific fine-scaling 41 | 42 | if return_more: 43 | p_c = u.confidence_softmax(-ELBOs * self.logit_scale, const=self.confidence_level, 44 | dim=1) 45 | return p_c, ELBOs, l_v_classes, reconsts 46 | else: 47 | return -ELBOs[:, :, 0, 0] # like logits 48 | 49 | def rescale(self, logits): 50 | return logits 51 | 52 | 53 | class ELBOVAE_binary(ELBOVAE): 54 | def __init__(self, AEs, n_samples, n_iter, beta, GM, 55 | fraction_to_dismiss=0.1, clip=5, lr=0.05): 56 | 57 | super().__init__(AEs, n_samples, n_iter, beta, GM, 58 | fraction_to_dismiss=fraction_to_dismiss, 59 | clip=clip, lr=lr) 60 | 61 | self.name_check = 'ABS' 62 | self.rescale_b = True 63 | self.discriminative_scalings = torch.tensor( 64 | [1., 0.96, 1.001, 1.06, 0.98, 0.96, 1.03, 1., 1., 1.]).to(u.dev()) 65 | 66 | def forward(self, x, return_more=False): 67 | # assert (torch.ge(x, 0).all()) 68 | # assert (torch.le(x, 1).all()) 69 | x = u.binarize(x) 70 | return super().forward(x, return_more=return_more) 71 | 72 | def rescale(self, logits): 73 | if self.rescale_b: 74 | return logits * self.discriminative_scalings[None, :, None, None] 75 | else: 76 | return logits 77 | 78 | 79 | def get_ABS(n_samples=8000, n_iter=50, beta=1, clip=5, 80 | fraction_to_dismiss=0.1, lr=0.05, load=True, 81 | binary=True, load_path=DEFAULT_PATH): 82 | return get_VAE(n_samples=n_samples, n_iter=n_iter, beta=beta, clip=clip, 83 | fraction_to_dismiss=fraction_to_dismiss, lr=lr, load=load, 84 | binary=binary, load_path=load_path) 85 | 86 | 87 | def get_VAE(n_samples=8000, n_iter=50, beta=1, clip=5, fraction_to_dismiss=0.1, lr=0.05, 88 | load=True, binary=False, load_path=DEFAULT_PATH): 89 | """Creates the ABS model. If binary is True, returns the full 90 | ABS model including binarization and scalar, otherwise returns 91 | the base ABS model without binarization and without scalar.""" 92 | 93 | load_path = join(DEFAULT_PATH, '../exp/VAE_swarm_MSE/nets/') 94 | 95 | print('ABS model') 96 | 97 | n_classes = 10 98 | nd = 8 99 | nx, ny = 28, 28 100 | 101 | def init_models(): 102 | strides = [1, 2, 2, 1] 103 | latent_act_fct = u.LinearActFct 104 | 105 | kernelE = [5, 4, 3, 5] 106 | feat_mapsE = [32, 32, 64, nd] 107 | encoder = { 'feat_maps': feat_mapsE, 'kernels': kernelE, 'strides': strides} 108 | kernelD = [4, 5, 5, 4] 109 | feat_mapsD = [32, 16, 16, 1] 110 | decoder = {'feat_maps': feat_mapsD, 'kernels': kernelD, 'strides': strides} 111 | 112 | AEs = [] 113 | for i in range(n_classes): 114 | AE = nets.VariationalAutoEncoder(encoder, decoder, latent_act_fct=latent_act_fct) 115 | AE.eval() 116 | AE.to(u.dev()) 117 | AEs.append(AE) 118 | return AEs 119 | 120 | AEs = init_models() 121 | 122 | if load: 123 | for i in range(n_classes): 124 | path = load_path + f'/ABS_{i}.net' 125 | AEs[i].iters = 29000 126 | AEs[i].load_state_dict(torch.load(path, map_location=str(u.dev()))) 127 | print('model loaded') 128 | 129 | GM = sampling.GaussianSamples(AEs, nd, n_classes, nx=nx, ny=ny) 130 | if binary: 131 | model = ELBOVAE_binary 132 | else: 133 | model = ELBOVAE 134 | model = model(AEs, n_samples, n_iter, beta, GM, fraction_to_dismiss, clip, lr=lr) 135 | model.eval() 136 | model.code_base = 'pytorch' 137 | model.has_grad = False 138 | return model 139 | 140 | 141 | class CNN(nets.Architectures): 142 | def __init__(self, model): 143 | super().__init__() 144 | self.add_module('net', model) 145 | self.model = model 146 | self.has_grad = True 147 | self.confidence_level = 1439000 148 | self.logit_scale = 1 149 | self.name_check = 'MNIST_baseline' 150 | 151 | def forward(self, input): 152 | # assert (torch.ge(input, 0).all()) 153 | # assert (torch.le(input, 1).all()) 154 | return self.model.forward(input)[:, :, 0, 0] 155 | 156 | 157 | def get_CNN(load_path=DEFAULT_PATH): 158 | 159 | load_path = join(DEFAULT_PATH, '../exp/mnist_cnn/nets/') 160 | 161 | 162 | # network 163 | shape = (1, 1, 28, 28) 164 | kernelE = [5, 4, 3, 5] 165 | strides = [1, 2, 2, 1] 166 | feat_mapsE = [20, 70, 256, 10] # (32, 32, 16, 2) 167 | 168 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides) 169 | # load net 170 | print('path', load_path + '/vanilla_cnn.net') 171 | model.load_state_dict(torch.load(load_path + '/vanilla_cnn.net', map_location=str(u.dev()))) 172 | print('model loaded') 173 | NN = CNN(model) 174 | NN.eval() 175 | NN.to(u.dev()) 176 | NN.code_base = 'pytorch' 177 | return NN 178 | 179 | 180 | class BinaryCNN(CNN): 181 | def __init__(self, model): 182 | super().__init__(model) 183 | self.name_check = 'MNIST_baseline_binary' 184 | 185 | def forward(self, input): 186 | input = u.binarize(input) 187 | return super().forward(input) 188 | 189 | 190 | def get_binary_CNN(load_path=DEFAULT_PATH, binarize=True): 191 | load_path = join(DEFAULT_PATH, '../exp/mnist_cnn/nets/') 192 | 193 | # network 194 | shape = (1, 1, 28, 28) 195 | kernelE = [5, 4, 3, 5] 196 | strides = [1, 2, 2, 1] 197 | feat_mapsE = [20, 70, 256, 10] # (32, 32, 16, 2) 198 | 199 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides) 200 | 201 | # load net 202 | model.load_state_dict(torch.load(load_path + '/vanilla_cnn.net', map_location=str(u.dev()))) 203 | print('model loaded') 204 | if binarize: 205 | model = BinaryCNN(model) 206 | else: 207 | model = CNN(model) 208 | model.eval() 209 | model.to(u.dev()) 210 | model.code_base = 'pytorch' 211 | return model 212 | 213 | 214 | def get_transfer_model(load_path=DEFAULT_PATH): 215 | 216 | # new arch 217 | shape = (1, 1, 28, 28) 218 | strides = [1, 2, 2, 1] 219 | kernelE = [5, 4, 3, 5] 220 | feat_mapsE = [32, 32, 64, 10] # (32, 32, 16, 2) 221 | 222 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides) 223 | model.load_state_dict(torch.load(load_path + 'transfer_cnn.net', map_location=str(u.dev()))) 224 | 225 | model.to(u.dev()) 226 | if load_path is not None: 227 | model.load_state_dict(torch.load(load_path, map_location=str(u.dev()))) 228 | model.eval() 229 | model.code_base = 'pytorch' 230 | return model 231 | 232 | 233 | class NearestNeighbor(nets.NearestNeighborLogits): 234 | def __init__(self, samples, classes, n_classes): 235 | """ 236 | :param samples: 4D: (n_samples, nchannels, nx, ny) 237 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples) 238 | """ 239 | super().__init__(samples, classes, n_classes) 240 | self.name_check = 'MNIST_NN' 241 | 242 | def forward(self, input_batch, return_more=False): 243 | # assert (torch.ge(input_batch, 0).all()) 244 | # assert (torch.le(input_batch, 1).all()) 245 | return super().forward(input_batch, return_more=return_more) 246 | 247 | 248 | def get_NearestNeighbor(): 249 | n_classes = 10 250 | mnist_train = datasets.MNIST('./../data', train=True, download=True, 251 | transform=transforms.Compose([transforms.ToTensor()])) 252 | 253 | NN = NearestNeighbor(mnist_train.train_data[:, None, ...].type(torch.float32).to(u.dev()) / 255, 254 | mnist_train.train_labels.to(u.dev()), n_classes=n_classes) 255 | 256 | print('model initialized') 257 | NN.eval() # does nothing but avoids warnings 258 | NN.code_base = 'pytorch' 259 | NN.has_grad = False 260 | return NN 261 | 262 | 263 | def get_madry(load_path='./../madry/mnist_challenge/models/secret/'): 264 | import tensorflow as tf 265 | from madry.mnist_challenge.model import Model 266 | sess = tf.InteractiveSession() 267 | model = Model() 268 | model_file = tf.train.latest_checkpoint(load_path) 269 | restorer = tf.train.Saver() 270 | restorer.restore(sess, model_file) 271 | model.code_base = 'tensorflow' 272 | model.logit_scale = 1. 273 | model.confidence_level = 60. 274 | model.has_grad = True 275 | return model -------------------------------------------------------------------------------- /abs_models/nets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import nn 4 | 5 | from abs_models import utils as u 6 | 7 | 8 | class Architectures(nn.Module): 9 | def __init__(self, input_size=None): 10 | super(Architectures, self).__init__() 11 | self.c = input_size 12 | self.iters = 0 13 | 14 | def forward(self, input): 15 | for module in self._modules.values(): 16 | input = module(input) 17 | return input 18 | 19 | 20 | class ConvAE(Architectures): 21 | def __init__(self, EncArgs, DecArgs): 22 | super().__init__(input_size=None) 23 | self.latent = None 24 | self.Encoder = ConvEncoder(**EncArgs) 25 | self.Decoder = ConvDecoder(**DecArgs) 26 | 27 | def forward(self, x): 28 | self.latent = self.Encoder.forward(x) 29 | return self.Decoder.forward(self.latent) 30 | 31 | 32 | class VariationalAutoEncoder(ConvAE): 33 | def __init__(self, EncArgs, DecArgs, latent_act_fct=nn.Tanh): 34 | 35 | self.fac = 2 36 | 37 | # Decoder must match encoder 38 | EncArgs['feat_maps'][-1] = int(EncArgs['feat_maps'][-1] * self.fac) 39 | self.n_latent = int(EncArgs['feat_maps'][-1]) 40 | self.depth = len(EncArgs['feat_maps']) 41 | 42 | if 'act_fcts' not in EncArgs.keys(): 43 | EncArgs['act_fcts'] = self.depth * [torch.nn.ELU] 44 | EncArgs['act_fcts'][-1] = None 45 | 46 | # half amount of layers (half mu, half sigma) 47 | DecArgs['input_sizes'] = [int(EncArgs['feat_maps'][-1] / self.fac)] 48 | super().__init__(EncArgs, DecArgs) 49 | EncArgs['feat_maps'][-1] = int(EncArgs['feat_maps'][-1] / self.fac) 50 | 51 | self.std = None 52 | self.mu = None 53 | self.logvar = None 54 | 55 | self.latent_act_fct = latent_act_fct() 56 | 57 | def reparameterize(self, inp): 58 | self.mu = self.latent_act_fct( 59 | inp[:, :int(self.n_latent / self.fac), :, :]) 60 | 61 | if self.training: 62 | # std 63 | self.logvar = inp[:, int(self.n_latent / 2):, :, :] 64 | self.std = self.logvar.mul(0.5).exp_() 65 | 66 | # reparam of mu 67 | eps = torch.empty_like(self.mu.data).normal_() 68 | self.latent = eps.mul(self.std).add_(self.mu) 69 | 70 | else: # test 71 | self.latent = self.mu 72 | self.logvar = inp[:, int(self.n_latent / 2):, :, :] 73 | self.std = self.logvar.mul(0.5).exp_() 74 | 75 | def forward(self, x): 76 | prelatent = self.Encoder.forward(x) 77 | self.reparameterize(prelatent) 78 | out = self.Decoder(self.latent) 79 | return out 80 | 81 | 82 | class ConvEncoder(nn.Sequential): 83 | def __init__(self, feat_maps=(256, 128, 128), input_sizes=(1, 28, 28), 84 | kernels=(5, 3, 3), 85 | BNs=None, act_fcts=None, dilations=None, strides=None): 86 | 87 | super().__init__() 88 | 89 | self.latent = None 90 | 91 | self.depth = len(feat_maps) 92 | if BNs is None: 93 | BNs = self.depth * [True] 94 | BNs[-1] = False 95 | if act_fcts is None: 96 | act_fcts = self.depth * [nn.ELU] 97 | act_fcts[-1] = nn.Tanh 98 | if dilations is None: 99 | dilations = self.depth * [1] 100 | if strides is None: 101 | strides = self.depth * [1] 102 | 103 | # check 104 | args = [feat_maps, kernels, dilations, strides] 105 | for i, it in enumerate(args): 106 | if len(it) != self.depth: 107 | raise Exception('wrong length' + str(it) + str(i)) 108 | feat_maps = [input_sizes[0]] + list(feat_maps) 109 | 110 | # build net 111 | for i, (BN, act_fct, kx, dil, stride) in enumerate( 112 | zip(BNs, act_fcts, kernels, dilations, strides)): 113 | 114 | self.add_module('conv_%i' % i, nn.Conv2d( 115 | feat_maps[i], feat_maps[i + 1], kx, 116 | stride=stride, dilation=dil)) 117 | 118 | if BN: 119 | self.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1])) 120 | if act_fct is not None: 121 | self.add_module('nl_%i' % i, act_fct()) 122 | 123 | def forward(self, input): 124 | for module in self._modules.values(): 125 | input = module(input) 126 | self.latent = input 127 | return input 128 | 129 | 130 | class ConvDecoder(nn.Sequential): 131 | def __init__(self, feat_maps=(32, 32, 1), input_sizes=(2, 1, 1), 132 | kernels=(3, 3, 3), 133 | BNs=None, act_fcts=None, dilations=None, strides=(1, 1, 1), 134 | conv_fct=None): 135 | 136 | super().__init__() 137 | 138 | self.depth = len(feat_maps) 139 | if BNs is None: 140 | BNs = self.depth * [True] 141 | BNs[-1] = False 142 | if act_fcts is None: 143 | act_fcts = self.depth * [nn.ELU] 144 | act_fcts[-1] = u.LinearActFct 145 | if dilations is None: 146 | dilations = self.depth * [1] 147 | 148 | # check 149 | args = [feat_maps, kernels, dilations, strides] 150 | for i, it in enumerate(args): 151 | if len(it) != self.depth: 152 | raise Exception('wrong length' + str(it) + str(i)) 153 | 154 | feat_maps = [input_sizes[0]] + list(feat_maps) 155 | 156 | if conv_fct is None: 157 | conv_fct = nn.ConvTranspose2d 158 | 159 | # build net 160 | for i, (BN, act_fct, kx, dil, stride) in enumerate( 161 | zip(BNs, act_fcts, kernels, dilations, strides)): 162 | 163 | self.add_module('conv_%i' % i, conv_fct( 164 | feat_maps[i], feat_maps[i + 1], kx, stride=stride)) 165 | if BN: 166 | self.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1])) 167 | self.add_module('nl_%i' % i, act_fct()) 168 | 169 | 170 | # Other models 171 | # ------------ 172 | 173 | class NN(Architectures): 174 | def __init__(self, feat_maps=(16, 16, 8), input_sizes=(1, 28, 28), 175 | kernels=(5, 3, 3), strides=None, 176 | BNs=None, act_fcts=None): 177 | super().__init__(input_size=input_sizes) 178 | self.depth = len(feat_maps) 179 | ad_feat_maps = [input_sizes[0]] + list(feat_maps) 180 | 181 | if strides is None: 182 | strides = self.depth * [1] 183 | 184 | if BNs is None: 185 | BNs = self.depth * [True] 186 | BNs[-1] = False 187 | 188 | if act_fcts is None: 189 | act_fcts = self.depth * [nn.ELU] 190 | act_fcts[-1] = None 191 | 192 | net_builder(self, BNs, act_fcts=act_fcts, feat_maps=ad_feat_maps, 193 | kernel_sizes=kernels, strides=strides) 194 | 195 | 196 | class View(nn.Module): 197 | def __init__(self, *shape): 198 | super(View, self).__init__() 199 | self.shape = shape 200 | 201 | def forward(self, input): 202 | bs = input.size()[0] 203 | return input.view((bs,) + self.shape) 204 | 205 | 206 | class NearestNeighbor(nn.Module): 207 | def __init__(self, samples, classes, n_classes): 208 | """ 209 | :param samples: 4D: (n_samples, nchannels, nx, ny) 210 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples) 211 | """ 212 | super().__init__() 213 | self.samples = samples[None, ...] # (1, n_samples, nch, x, y) 214 | self.classes = classes 215 | self.n_classes = n_classes 216 | self.max_bs = 20 217 | 218 | def forward(self, input_batch, return_more=True): 219 | assert len(input_batch.size()) == 4 220 | assert input_batch.size()[-1] == self.samples.size()[-1] 221 | assert input_batch.size()[-2] == self.samples.size()[-2] 222 | assert input_batch.size()[-3] == self.samples.size()[-3] 223 | 224 | bs = input_batch.shape[0] 225 | input_batch = input_batch[:, None, ...].to(u.dev()) # (bs, 1, nch, x, y) 226 | 227 | def calc_dist(input_batch): 228 | dists = u.L2(self.samples, input_batch, axes=[2, 3, 4]) 229 | l2, best_ind_classes = torch.min(dists, 1) 230 | return l2, best_ind_classes 231 | 232 | l2s, best_ind_classes = u.auto_batch(self.max_bs, calc_dist, input_batch) 233 | 234 | # boring bookkeeping 235 | pred = self.get_classes(bs, input_batch, best_ind_classes) 236 | imgs = self.samples[0, best_ind_classes] 237 | # print(pred, imgs, l2s)\ 238 | if return_more: 239 | return pred, imgs, l2s 240 | else: 241 | return pred 242 | 243 | def get_classes(self, bs, input_batch, best_ind_classes): 244 | pred = torch.zeros(bs, self.n_classes).to(u.dev()) 245 | pred[range(bs), self.classes[best_ind_classes]] = 1. 246 | return pred 247 | 248 | 249 | class NearestNeighborLogits(NearestNeighbor): 250 | def __init__(self, samples, classes, n_classes): 251 | """ 252 | :param samples: 4D: (n_samples, nchannels, nx, ny) 253 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples) 254 | """ 255 | super().__init__(samples, classes, n_classes=10) 256 | self.samples = None 257 | self.all_samples = samples 258 | self.class_samples = [self.all_samples[self.classes == i] for i in range(n_classes)] 259 | self.max_bs = 40 260 | 261 | def forward(self, input_batch, return_more=True): 262 | bs, nch, nx, ny = input_batch.shape 263 | all_imgs, all_l2s = [], [] 264 | for i, samples in enumerate(self.class_samples): 265 | self.samples = samples[None, ...] 266 | _, imgs, l2s = super().forward(input_batch, return_more=True) 267 | all_imgs.append(imgs) 268 | all_l2s.append(l2s) 269 | 270 | all_l2s = torch.cat(all_l2s).view(self.n_classes, -1).transpose(0, 1) 271 | if return_more: 272 | all_imgs = torch.cat(all_imgs).view(self.n_classes, -1, nch, nx, ny).transpose(0, 1) 273 | return -all_l2s, all_imgs, all_l2s 274 | else: 275 | return -all_l2s 276 | 277 | def get_classes(self, *args, **kwargs): 278 | return None 279 | 280 | 281 | def net_builder(net, BNs, act_fcts, feat_maps, kernel_sizes, strides): 282 | # build net 283 | for i, (BN, act_fct, kx, stride) in enumerate( 284 | zip(BNs, act_fcts, kernel_sizes, strides)): 285 | net.add_module('conv_%i' % i, nn.Conv2d( 286 | feat_maps[i], feat_maps[i + 1], kx, stride=stride)) 287 | if BN: 288 | net.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1])) 289 | if act_fct is not None: 290 | net.add_module('nl_%i' % i, act_fct()) 291 | 292 | 293 | def calc_fov(x, kernels, paddings=None, dilations=None, strides=None): 294 | l_x = x 295 | n_layer = len(kernels) 296 | if paddings is None: 297 | paddings = [0.] * n_layer 298 | if dilations is None: 299 | dilations = [1.] * n_layer 300 | if strides is None: 301 | strides = [1.] * n_layer 302 | for p, d, k, s in zip(paddings, dilations, kernels, strides): 303 | l_x = calc_fov_layer(l_x, k, p, d, s) 304 | return l_x 305 | 306 | 307 | def calc_fov_layer(x, kernel, padding=0, dilation=1, stride=1): 308 | p, d, k, s = padding, dilation, kernel, float(stride) 309 | print('s', s, 'p', p, 'd', d, 'k', k, ) 310 | if np.floor((x + 2. * p - d * (k - 1.) - 1.) / s + 1.) != (x + 2. * p - d * (k - 1.) - 1.) / s + 1.: # noqa: E501 311 | print('boundary problems') 312 | return np.floor((x + 2. * p - d * (k - 1.) - 1.) / s + 1.) 313 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /madry/mnist_challenge/README.md: -------------------------------------------------------------------------------- 1 | # MNIST Adversarial Examples Challenge 2 | 3 | Recently, there has been much progress on adversarial *attacks* against neural networks, such as the [cleverhans](https://github.com/tensorflow/cleverhans) library and the code by [Carlini and Wagner](https://github.com/carlini/nn_robust_attacks). 4 | We now complement these advances by proposing an *attack challenge* for the 5 | [MNIST](http://yann.lecun.com/exdb/mnist/) dataset (we recently released [a 6 | CIFAR10 variant of this 7 | challenge](https://github.com/MadryLab/cifar10_challenge)). 8 | We have trained a robust network, and the objective is to find a set of adversarial examples on which this network achieves only a low accuracy. 9 | To train an adversarially-robust network, we followed the approach from our recent paper: 10 | 11 | **Towards Deep Learning Models Resistant to Adversarial Attacks**
12 | *Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, Adrian Vladu*
13 | https://arxiv.org/abs/1706.06083. 14 | 15 | As part of the challenge, we release both the training code and the network architecture, but keep the network weights secret. 16 | We invite any researcher to submit attacks against our model (see the detailed instructions below). 17 | We will maintain a leaderboard of the best attacks for the next two months and then publish our secret network weights. 18 | 19 | The goal of our challenge is to clarify the state-of-the-art for adversarial robustness on MNIST. Moreover, we hope that future work on defense mechanisms will adopt a similar challenge format in order to improve reproducibility and empirical comparisons. 20 | 21 | **Update 2017-09-14:** Due to recently increased interest in our challenge, we are extending its duration until October 15th. 22 | 23 | **Update 2017-10-19:** We released our secret model, you can download it by 24 | running `python fetch_model.py secret`. As of Oct 15 we are no longer 25 | accepting black-box challenge submissions. We will soon set up a leaderboard to keep track 26 | of white-box attacks. Many thanks to everyone who participated! 27 | 28 | **Update 2017-11-06:** We have set up a leaderboard for white-box attacks on the (now released) secret model. The submission format is the same as before. We plan to continue evaluating submissions and maintaining the leaderboard for the foreseeable future. 29 | 30 | ## Black-Box Leaderboard (Original Challenge) 31 | 32 | | Attack | Submitted by | Accuracy | Submission Date | 33 | | -------------------------------------- | ------------- | -------- | ---- | 34 | | AdvGAN from ["Generating Adversarial Examples
with Adversarial Networks"](https://arxiv.org/abs/1801.02610) | AdvGAN | **92.76%** | Sep 25, 2017 | 35 | | PGD against three independently and
adversarially trained copies of the network | [Florian Tramèr](http://floriantramer.com/) | 93.54% | Jul 5, 2017 | 36 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss for model B from
["Ensemble Adversarial Training [...]"](https://arxiv.org/abs/1705.07204) | [Florian Tramèr](http://floriantramer.com/) | 94.36% | Jun 29, 2017 | 37 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss for the
naturally trained public network | (initial entry) | 96.08% | Jun 28, 2017 | 38 | | PGD on the cross-entropy loss for the
naturally trained public network | (initial entry) | 96.81% | Jun 28, 2017 | 39 | | Attack using Gaussian Filter for selected pixels
on the adversarially trained public network | Anonymous | 97.33% | Aug 27, 2017 | 40 | | FGSM on the cross-entropy loss for the
adversarially trained public network | (initial entry) | 97.66% | Jun 28, 2017 | 41 | | PGD on the cross-entropy loss for the
adversarially trained public network | (initial entry) | 97.79% | Jun 28, 2017 | 42 | 43 | ## White-Box Leaderboard 44 | 45 | | Attack | Submitted by | Accuracy | Submission Date | 46 | | -------------------------------------- | ------------- | -------- | ---- | 47 | | First-order attack on logit difference
for optimally chosen target label | Samarth Gupta | 88.85% | May 23, 2018 | 48 | | 100-step PGD on the cross-entropy loss
with 50 random restarts | (initial entry) | 89.62% | Nov 6, 2017 | 49 | | 100-step PGD on the [CW](https://github.com/carlini/nn_robust_attacks) loss
with 50 random restarts | (initial entry) | 89.71% | Nov 6, 2017 | 50 | | 100-step PGD on the cross-entropy loss | (initial entry) | 92.52% | Nov 6, 2017 | 51 | | 100-step PGD on the [CW](https://github.com/carlini/nn_robust_attacks) loss | (initial entry) | 93.04% | Nov 6, 2017 | 52 | | FGSM on the cross-entropy loss | (initial entry) | 96.36% | Nov 6, 2017 | 53 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss | (initial entry) | 96.40% | Nov 6, 2017 | 54 | 55 | ## Format and Rules 56 | 57 | The objective of the challenge is to find black-box (transfer) attacks that are effective against our MNIST model. 58 | Attacks are allowed to perturb each pixel of the input image by at most `epsilon=0.3`. 59 | To ensure that the attacks are indeed black-box, we release our training code and model architecture, but keep the actual network weights secret. 60 | 61 | We invite any interested researchers to submit attacks against our model. 62 | The most successful attacks will be listed in the leaderboard above. 63 | As a reference point, we have seeded the leaderboard with the results of some standard attacks. 64 | 65 | ### The MNIST Model 66 | 67 | We used the code published in this repository to produce an adversarially robust model for MNIST classification. The model is a convolutional neural network consisting of two convolutional layers (each followed by max-pooling) and a fully connected layer. This architecture is derived from the [MNIST tensorflow tutorial](https://www.tensorflow.org/get_started/mnist/pros). 68 | The network was trained against an iterative adversary that is allowed to perturb each pixel by at most `epsilon=0.3`. 69 | 70 | The random seed used for training and the trained network weights will be kept secret. 71 | 72 | The `sha256()` digest of our model file is: 73 | ``` 74 | 14eea09c72092db5c2eb5e34cd105974f42569281d2f34826316e356d057f96d 75 | ``` 76 | We will release the corresponding model file on October 15th 2017, which is roughly two months after the start of this competition. 77 | 78 | ### The Attack Model 79 | 80 | We are interested in adversarial inputs that are derived from the MNIST test set. 81 | Each pixel can be perturbed by at most `epsilon=0.3` from its initial value. 82 | All pixels can be perturbed independently, so this is an l_infinity attack. 83 | 84 | ### Submitting an Attack 85 | 86 | Each attack should consist of a perturbed version of the MNIST test set. 87 | Each perturbed image in this test set should follow the above attack model. 88 | 89 | The adversarial test set should be formated as a numpy array with one row per example and each row containing a flattened 90 | array of 28x28 pixels. 91 | Hence the overall dimensions are 10,000 rows and 784 columns. 92 | Each pixel must be in the [0,1] range. 93 | See the script `pgd_attack.py` for an attack that generates an adversarial test set in this format. 94 | 95 | In order to submit your attack, save the matrix containing your adversarial examples with `numpy.save` and email the resulting file to mnist.challenge@gmail.com. 96 | We will then run the `run_attack.py` script on your file to verify that the attack is valid and to evaluate the accuracy of our secret model on your examples. 97 | After that, we will reply with the predictions of our model on each of your examples and the overall accuracy of our model on your evaluation set. 98 | 99 | If the attack is valid and outperforms all current attacks in the leaderboard, it will appear at the top of the leaderboard. 100 | Novel types of attacks might be included in the leaderboard even if they do not perform best. 101 | 102 | We strongly encourage you to disclose your attack method. 103 | We would be happy to add a link to your code in our leaderboard. 104 | 105 | ## Overview of the Code 106 | The code consists of six Python scripts and the file `config.json` that contains various parameter settings. 107 | 108 | ### Running the code 109 | - `python train.py`: trains the network, storing checkpoints along 110 | the way. 111 | - `python eval.py`: an infinite evaluation loop, processing each new 112 | checkpoint as it is created while logging summaries. It is intended 113 | to be run in parallel with the `train.py` script. 114 | - `python pgd_attack.py`: applies the attack to the MNIST eval set and 115 | stores the resulting adversarial eval set in a `.npy` file. This file is 116 | in a valid attack format for our challenge. 117 | - `python run_attack.py`: evaluates the model on the examples in 118 | the `.npy` file specified in config, while ensuring that the adversarial examples 119 | are indeed a valid attack. The script also saves the network predictions in `pred.npy`. 120 | - `python fetch_model.py name`: downloads the pre-trained model with the 121 | specified name (at the moment `adv_trained` or `natural`), prints the sha256 122 | hash, and places it in the models directory. 123 | 124 | ### Parameters in `config.json` 125 | 126 | Model configuration: 127 | - `model_dir`: contains the path to the directory of the currently 128 | trained/evaluated model. 129 | 130 | Training configuration: 131 | - `random_seed`: the seed for the RNG used to initialize the network 132 | weights. 133 | - `max_num_training_steps`: the number of training steps. 134 | - `num_output_steps`: the number of training steps between printing 135 | progress in standard output. 136 | - `num_summary_steps`: the number of training steps between storing 137 | tensorboard summaries. 138 | - `num_checkpoint_steps`: the number of training steps between storing 139 | model checkpoints. 140 | - `training_batch_size`: the size of the training batch. 141 | 142 | Evaluation configuration: 143 | - `num_eval_examples`: the number of MNIST examples to evaluate the 144 | model on. 145 | - `eval_batch_size`: the size of the evaluation batches. 146 | - `eval_on_cpu`: forces the `eval.py` script to run on the CPU so it does not compete with `train.py` for GPU resources. 147 | 148 | Adversarial examples configuration: 149 | - `epsilon`: the maximum allowed perturbation per pixel. 150 | - `k`: the number of PGD iterations used by the adversary. 151 | - `a`: the size of the PGD adversary steps. 152 | - `random_start`: specifies whether the adversary will start iterating 153 | from the natural example or a random perturbation of it. 154 | - `loss_func`: the loss function used to run pgd on. `xent` corresponds to the 155 | standard cross-entropy loss, `cw` corresponds to the loss function 156 | of [Carlini and Wagner](https://arxiv.org/abs/1608.04644). 157 | - `store_adv_path`: the file in which adversarial examples are stored. 158 | Relevant for the `pgd_attack.py` and `run_attack.py` scripts. 159 | 160 | ## Example usage 161 | After cloning the repository you can either train a new network or evaluate/attack one of our pre-trained networks. 162 | #### Training a new network 163 | * Start training by running: 164 | ``` 165 | python train.py 166 | ``` 167 | * (Optional) Evaluation summaries can be logged by simultaneously 168 | running: 169 | ``` 170 | python eval.py 171 | ``` 172 | #### Download a pre-trained network 173 | * For an adversarially trained network, run 174 | ``` 175 | python fetch_model.py adv_trained 176 | ``` 177 | and use the `config.json` file to set `"model_dir": "models/adv_trained"`. 178 | * For a naturally trained network, run 179 | ``` 180 | python fetch_model.py natural 181 | ``` 182 | and use the `config.json` file to set `"model_dir": "models/natural"`. 183 | #### Test the network 184 | * Create an attack file by running 185 | ``` 186 | python pgd_attack.py 187 | ``` 188 | * Evaluate the network with 189 | ``` 190 | python run_attack.py 191 | ``` 192 | -------------------------------------------------------------------------------- /scripts/attacks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed',)).History will not be written to the database.\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "import sys\n", 18 | "sys.path.insert(0, './../') \n", 19 | "%load_ext autoreload\n", 20 | "%autoreload 2\n", 21 | "%matplotlib inline\n", 22 | "\n", 23 | "import torch\n", 24 | "from torchvision import datasets, transforms\n", 25 | "\n", 26 | "import numpy as np\n", 27 | "from matplotlib import pyplot as plt\n", 28 | "import foolbox \n", 29 | "from foolbox import attacks as fa\n", 30 | "\n", 31 | "# own modules\n", 32 | "from abs_models import utils as u\n", 33 | "from abs_models import models as mz\n", 34 | "from abs_models import attack_utils as au" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "ABS model\n", 47 | "model loaded\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "model = mz.get_VAE(n_iter=10) # ABS, do n_iter=50 for original model \n", 53 | "# model = mz.get_VAE(binary=True) # ABS with scaling and binaryzation\n", 54 | "# model = mz.get_binary_CNN() # Binary CNN\n", 55 | "# model = mz.get_CNN() # Vanilla CNN\n", 56 | "# model = mz.get_NearestNeighbor() # Nearest Neighbor, \"nearest L2 dist to each class\"=logits\n", 57 | "# model = mz.get_madry() # Robust network from Madry et al. in tf" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# code is agnostic of pytorch/ tensorflow model --> foolbox model\n", 67 | "if model.code_base == 'tensorflow':\n", 68 | " fmodel = foolbox.models.TensorFlowModel(model.x_input, model.pre_softmax, (0., 1.),\n", 69 | " channel_axis=3)\n", 70 | "elif model.code_base == 'pytorch':\n", 71 | " model.eval()\n", 72 | " fmodel = foolbox.models.PyTorchModel(model, # return logits in shape (bs, n_classes)\n", 73 | " bounds=(0., 1.), num_classes=10,\n", 74 | " device=u.dev())\n", 75 | "else:\n", 76 | " print('not implemented')" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "score 0.988\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "# test model \n", 94 | "b, l = u.get_batch(bs=10000) # returns random batch as np.array\n", 95 | "pred_label = np.argmax(fmodel.batch_predictions(b), axis=1)\n", 96 | "print('score', float(np.sum(pred_label == l)) / b.shape[0])" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "# Decision based attacks\n", 104 | "Note that this is only demo code. All experiments were optimized to our compute architecture. " 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 12, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "b, l = u.get_batch(bs=1) # returns random batch" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 13, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAEICAYAAACQ6CLfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAB+NJREFUeJzt3b+LnVkdx/HvWV0WRAsbC52ZakH2H5jMBgIrWGihIOSnhSAIYiEkkmwnWChosQQs1E4bl1nJBGRFwUItzPzY2gULLdyMWRALGxddNMfCKcLCPU82z9w7M/m8XlWS75zcJ7N584Q999yn9d4LyPPMSV8AcDLED6HED6HED6HED6HED6HEz6TW2o9aa9886evgeDX7/JDJnZ+h1toHTvoaWA7xh2qtvdBa+11r7R+ttTdba58/+vWftNZ+2Fr7ZWvtn1X1qaNf+/Yja19urb3dWnvQWvtKa6231p4/sT8MT0T8gVprz1bV61X166r6WFV9vap+2lr75NGXfLGqvlNVH6mq379n7Weq6htV9emqer6qXlrNVXPcxJ9pq6o+XFXf7b2/23v/TVX9oqquHc1/3nu/13t/2Hv/13vWXq6qH/fe3+y9v1NV31rZVXOsxJ/p41V1v/f+8JFf+0tVfeLox/en1j7y89HXcoqJP9ODqlpvrT3633+jqv569OPRFtDbVbX2yM/Xj/naWBHxZzqoqneq6uXW2rOttZeq6nNVtf0Ya39WVV8++h+GH6oq+/9nlPgD9d7frf/H/tmq+ntV/aCqvtR7/+NjrP1VVX2/qn5bVX+qqv2j0b+Xc7Usizf5MEtr7YWq+kNVPdd7/89JXw+Pz52f96219oXW2nOttY9W1feq6nXhnz3i50l8tar+VlV/rqr/VtXXTvZyeBL+2Q+h3Pkh1AdX+WKtNf/MgCXrvbfH+Tp3fgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgi10kd0s3pbW1vD+d7e3nD+8OHD4fzg4GA4v3z58sLZ4eHhcC3L5c4PocQPocQPocQPocQPocQPocQPoVrvfXUv1trqXizIaC9/e3t7uHZ9fX04n9rnf+aZ8f1jd3d34ezChQvDtTyZ3nt7nK9z54dQ4odQ4odQ4odQ4odQ4odQ4odQzvM/BTY2NhbOpvbxWxtvCU/t40+tP3/+/MLZ6D0AVePPAqjyeQBzufNDKPFDKPFDKPFDKPFDKPFDKEd6nwL37t1bONvc3ByundrKm3ukd7R+znHgKkeCF3GkFxgSP4QSP4QSP4QSP4QSP4QSP4Syz38GzHnM9tQ+/dSR3Km/H3PWz33tO3fuDOdXrlwZzp9W9vmBIfFDKPFDKPFDKPFDKPFDKPFDKB/dfQpM7eNPPWZ7tJe/zPP4c9fPfe2p79tovr+/P1ybwJ0fQokfQokfQokfQokfQokfQokfQtnnPwUuXbo0nE89Zntqv3xk7pn4qWsfvUdh7uPBp74va2trw3k6d34IJX4IJX4IJX4IJX4IJX4IJX4IZZ9/BW7cuDGcX79+fTifOtc+Z+3t27ef+PeuGj8zoKpqd3d34ez8+fPDtXM/S2CVz6Q4i9z5IZT4IZT4IZT4IZT4IZT4IZStvmMwdbT04sWLw/nco62jY7nLfkz14eHhcH7hwoWFs7lbeVPft9FHd+/s7AzXJnDnh1Dih1Dih1Dih1Dih1Dih1Dih1D2+Y/B1KOiNzc3h/Opo6dT++Fn9ejqK6+8MpxPHXWeeh/AaP2tW7eGaxO480Mo8UMo8UMo8UMo8UMo8UMo8UMo+/wrMPdc+tT6g4OD931Np8Ebb7wxnC/z+/baa68N1y77cxBOA3d+CCV+CCV+CCV+CCV+CCV+CCV+CGWffwXmfj79sh+zfVot8/t2Vj8D4Ti580Mo8UMo8UMo8UMo8UMo8UMo8UMo+/wrMPdc+rVr147zcs6MZZ7nn1qbwJ0fQokfQokfQokfQokfQokfQtnqW4G5R1NTj5860rtc7vwQSvwQSvwQSvwQSvwQSvwQSvwQyj7/CkztR9+5c2c439nZOc7LOVbr6+vD+fb29sLZiy++OFw7tRc/dSx3f39/4ezq1avDtQnc+SGU+CGU+CGU+CGU+CGU+CGU+CGUff4VmDqXfpbPlo/28auqNjc3F86m/txT37fRPn5V7keePy53fgglfgglfgglfgglfgglfgglfghln38Fps7zb2xsDOeXLl0azu/fv79wNrUXPvc9CFNn6kfr55zHr6q6e/fucH54eDicp3Pnh1Dih1Dih1Dih1Dih1Dih1C2+lZgajvt3Llzw/mrr746nD948GDhbG9vb7h27rHaOY/Jnnsk11bePO78EEr8EEr8EEr8EEr8EEr8EEr8EMo+/wpM7YVPHW2dWj96TPZbb7211Ne+efPmcH779u3hnJPjzg+hxA+hxA+hxA+hxA+hxA+hxA+h2iofD91aO7vPoh5YW1sbzqfO629tbQ3n169fH85H5+KnzsRPvfbU34+dnZ3hnNXrvY/fvHHEnR9CiR9CiR9CiR9CiR9CiR9CiR9C2eeHp4x9fmBI/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBqpY/oBk4Pd34IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4I9T+qSrBvRcCLbAAAAABJRU5ErkJggg==\n", 124 | "text/plain": [ 125 | "" 126 | ] 127 | }, 128 | "metadata": {}, 129 | "output_type": "display_data" 130 | }, 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "runtime 31.859752893447876 seconds\n", 136 | "pred 3\n" 137 | ] 138 | }, 139 | { 140 | "data": { 141 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAEICAYAAACQ6CLfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAACmNJREFUeJzt3V9olucdxvHrNtFY/8YlKgZ10/gXJ0wcAykDFU/USRkOxwTFHaht0eG0YCn0YEcT5mCIB4WCgqwFJ4zpoNUuCIXKPChzwoyVUYMpaoI2rVpTE5M8OzAHQfr8buebRM31/Rx6eed9Y94rd+nvee4nFUUhAH5GPes3AODZoPyAKcoPmKL8gCnKD5ii/IApyo9QSukHKaUipVT9rN8LBhflB0xRfsAU5TeVUnozpfR5SuleSqk5pfTz/j+vSikdTCndTildlbR+wJpfppQ+fezr/DaldGqY3z4GAeX39bmkn0qaLOl3kv6cUpohabukn0laJunHkn4xYM3fJS1MKc0f8GebJb0/LO8Yg4rymyqK4kRRFDeKougriuK4pP9K+omkTZL+VBTFF0VRdEj6/YA1nZJOSvqVJPX/ElgkiZ3/BUT5TaWUtqaU/p1S+jql9LWkH0qql9Qg6YsBf/XaY0vfV3/59WjX/1v/LwW8YCi/oZTS9yW9K2mXpLqiKGol/UdSknRT0qwBf332Y8v/IWlqSulHevRLgP/kf0FRfk/jJRWSbklSSunXerTzS9JfJP0mpTQzpTRF0psDFxZF8VDSCUl/kPQ9PfplgBcQ5TdUFEWzpD9K+qekdklLJZ3rj9+VdEbSRUn/kvTX7/gS70taI+lEURQ9Q/6GMSQSh3kAntj5AVOUHzBF+QFTlB8wNay3aaaU+L+LwBAriiI9yd9j5wdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTPHYZQyplJ7o1vLvxOGyQ4udHzBF+QFTlB8wRfkBU5QfMEX5AVOM+hDKjepqamrCvLq6/CPW0xM/4zOX9/X1VZS7Y+cHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTDHnfw6MGhX/Dh4/fnyYT5w4sTTbu3dvuHbfvn1hntPV1RXmb7/9dml26tSpcG1bW1uY379/P8yjW4K5XZidH7BF+QFTlB8wRfkBU5QfMEX5AVOUHzCVhnPemVKyHK5OmDAhzGfOnBnm8+fPD/ODBw+WZgsWLAjX5uQ+H5Uczb1u3bowv3nzZphfvnw5zLu7u0uzkTznL4riiX4o7PyAKcoPmKL8gCnKD5ii/IApyg+YovyAKe7nf0LRPHvMmDHh2smTJ4d5bW1tmHd2doZ5c3NzadbY2BiuvXPnTpj39vaGee6sgXHjxpVmH3zwQbj2ww8/DPPNmzeHeTTnBzs/YIvyA6YoP2CK8gOmKD9givIDphj19cvdmhodrz169Ohwbe6I6atXr4Z5bhw3Z86c0qyqqipcm3uM9aRJk8J80aJFYR79u40dOzZcu3bt2jDPHRs+km/bHQzs/IApyg+YovyAKcoPmKL8gCnKD5ii/IApju5+QpXM+XPXEFQ6rz5+/HhplrutNXfL7+LFi8M8dx1A9L19+eWX4doLFy6EeVNTU5gfOnQozEcqju4GEKL8gCnKD5ii/IApyg+YovyAKcoPmOJ+/icU3feem6VH1whI0tSpU8P85ZdfDvOGhobSLDenr6urC/NKvfTSS6XZN998E669du1amH/88cdhXl9fX5rdvn07XOuAnR8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wxZx/EOTut8/luVn8gQMHwnz27NmlWe5s/JxXX301zN95552n/tq5M/9z5xzknknw+uuv/9/vyQk7P2CK8gOmKD9givIDpig/YIryA6YoP2CKOf8w2L59e5hv27YtzKur4x9T7rkAkW+//TbM7969G+atra1hHl2DkJN7pkBuzo8YOz9givIDpig/YIryA6YoP2CK8gOmeET3IIiOzpak5cuXh/mmTZvCvLa2Nszfe++90uz06dPh2uhobUnq7e0N89znJzqW/NKlSxW9du6W37lz55Zm7e3t4doXGY/oBhCi/IApyg+YovyAKcoPmKL8gCnKD5jilt5BMGfOnDBfsWJFmF+/fj3MW1pawryjo6M0yz0G+/79+2He09MT5rk5/61bt0qzw4cPh2t37doV5jU1NWF+9uzZ0mzJkiXhWgfs/IApyg+YovyAKcoPmKL8gCnKD5ii/ICpETPnzx1fnctHjYp/D0bz7vPnz4dr16xZE+a595Z7zHZ0T35uTv8sRecQSNKOHTvCfPTo0WE+ffr00mzr1q3h2mPHjoX5SMDOD5ii/IApyg+YovyAKcoPmKL8gCnKD5gaMXP+3H3llc75I7nz5XOPuR4zZkyY19XVhfnJkyfD/Hk1ZcqUMM/9THI/8+j6hxkzZoRrHbDzA6YoP2CK8gOmKD9givIDpig/YIryA6ZGzJw/p6+vL8yH8r73adOmhXluXr179+6nfu3c9Q2Vyn396N/93r17FX3tnKqqqtJswoQJFX3tkYCdHzBF+QFTlB8wRfkBU5QfMEX5AVM2o75Kb+mNjonu6uoK1zY2NoZ5buQVHUEtxbcM50aYQz0K7O7uLs2iR4tL0sOHD8O8ujr++Ea3WufGrw7Y+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTzPn75R6DHR0z3dDQEK5dtmxZmL/22mth3t7eHub19fWl2YMHD8K1ucdc565/WLhwYZjv37+/NFu3bl1Fr53LP/nkk9Js586d4VoH7PyAKcoPmKL8gCnKD5ii/IApyg+YovyAKZs5f6Wi6wRyc/7cI7hnzZr11K8txdcoRMdXS/k5/7hx48L8yJEjYZ773iK5I82vXLkS5nv27Hnq13bAzg+YovyAKcoPmKL8gCnKD5ii/IApyg+Yspnz52bGufPto1n90qVLw7XR/faS9Morr4T5V199FebR93bu3LlwbVtbW5jnRGfjS/E1CrnrFy5evFhR3tzcHObu2PkBU5QfMEX5AVOUHzBF+QFTlB8wxaivX/QoaUnq7OwszVauXBmuzd0Wu379+oryGzdulGbnz58P1+bkRqC547Mjn332WZi/9dZbYf7RRx+FeW4M6Y6dHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBlM+fPyV0H0NHRUZq1tLSEa1etWhXm1dXxj6Gvry/Mo+OxW1tbw7W57zs3x3/jjTfC/OjRo6VZ7vvKPV6cOX5l2PkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBUyk35x3UF0tp+F5sGM2bNy/MN2zYUNH6LVu2hHlTU1NptnHjxnDt6tWrwzx3DcKZM2fCPLpOIPfZG87P5khSFEV8Jno/dn7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFHN+YIRhzg8gRPkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTw/qIbgDPD3Z+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7A1P8AV5YvgrI4BXQAAAAASUVORK5CYII=\n", 142 | "text/plain": [ 143 | "" 144 | ] 145 | }, 146 | "metadata": {}, 147 | "output_type": "display_data" 148 | } 149 | ], 150 | "source": [ 151 | "import time\n", 152 | "start = time.time()\n", 153 | "att = fa.DeepFoolL2Attack(fmodel)\n", 154 | "metric = foolbox.distances.MSE\n", 155 | "criterion = foolbox.criteria.Misclassification()\n", 156 | "\n", 157 | "plt.imshow(b[0, 0], cmap='gray')\n", 158 | "plt.title('orig')\n", 159 | "plt.axis('off')\n", 160 | "plt.show()\n", 161 | "\n", 162 | "# Estimate gradients from scores\n", 163 | "if not model.has_grad: \n", 164 | " GE = foolbox.gradient_estimators.CoordinateWiseGradientEstimator(0.1)\n", 165 | " fmodel = foolbox.models.ModelWithEstimatedGradients(fmodel, GE)\n", 166 | "\n", 167 | "# gernate Adversarial\n", 168 | "a = foolbox.adversarial.Adversarial(fmodel, criterion, b[0], l[0], distance=metric)\n", 169 | "att(a) \n", 170 | "\n", 171 | "print('runtime', time.time() - start, 'seconds')\n", 172 | "print('pred', np.argmax(fmodel.predictions(a.image)))\n", 173 | "if a.image is not None: # attack was successful\n", 174 | " plt.imshow(a.image[0], cmap='gray')\n", 175 | " plt.title('adv')\n", 176 | " plt.axis('off')\n", 177 | " plt.show()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "# get Trash Adversarials" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 8, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "from foolbox.gradient_estimators import CoordinateWiseGradientEstimator as CWGE" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 9, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stderr", 203 | "output_type": "stream", 204 | "text": [ 205 | "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", 206 | " \n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "a = np.random.random((1, 28, 28)).astype(np.float32)\n", 212 | "a_helper = torch.tensor(torch.from_numpy(a.copy()), requires_grad=True)\n", 213 | "fixed_class = 1\n", 214 | "GE = CWGE(1.)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 10, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "opti = torch.optim.SGD([a_helper], lr=1, momentum=0.95)" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": {}, 230 | "outputs": [], 231 | "source": [ 232 | "confidence_level = model.confidence_level # abs 0.0000031, CNN 1439000, madry 60, 1-NN 0.000000000004\n", 233 | "logits_scale = model.logit_scale # ABS 430, madry 1, CNN 1, 1-NN 5\n", 234 | "\n", 235 | "a_orig = a\n", 236 | "plt.imshow(u.t2n(a[0]), cmap='gray')\n", 237 | "plt.show()\n", 238 | "\n", 239 | "for i in range(10000):\n", 240 | " logits = fmodel.predictions(a)\n", 241 | " probs = u.t2n(u.confidence_softmax(logits_scale*torch.from_numpy(logits[None, :]), dim=1, \n", 242 | " const=confidence_level))[0]\n", 243 | " pred_class = np.argmax(u.t2n(logits).squeeze())\n", 244 | " \n", 245 | " if probs[fixed_class]>= 0.9:\n", 246 | " break \n", 247 | " grads = GE(fmodel.batch_predictions, a, fixed_class, (0,1))\n", 248 | "\n", 249 | " a = au.update_distal_adv(a, a_helper, grads, opti)\n", 250 | " if i % 1000 == 0:\n", 251 | " print(f'probs {probs[pred_class]:.3f} class', pred_class)\n", 252 | " fig, ax = plt.subplots(1,3, squeeze=False, figsize=(10, 4))\n", 253 | " ax[0, 0].imshow(u.t2n(a[0]), cmap='gray')\n", 254 | " ax[0, 1].imshow(u.t2n(grads[0]), cmap='gray')\n", 255 | " ax[0, 2].imshow(np.sign(grads[0]), cmap='gray')\n", 256 | " plt.show()\n", 257 | "plt.imshow(u.t2n(a[0]), cmap='gray')\n", 258 | "plt.show()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "# Latent Descent Attack" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# only for abs\n", 275 | "att = au.LineSearchAttack(model) # BinaryLineSearchAttack\n", 276 | "b, l = u.get_batch(bs=200)\n", 277 | "\n", 278 | "advs = att(b, l, n_coarse_steps=50+1, n_ft_steps=2)\n", 279 | "\n", 280 | "for adv in advs:\n", 281 | " adv['img'] = adv['img'].cpu().numpy()\n", 282 | "\n", 283 | "for i, (a_i, b_i) in enumerate(zip(advs, b)):\n", 284 | " l2 = np.sqrt(a_i['distance'] * 784) # convert from MSE\n", 285 | "\n", 286 | " fig, ax = plt.subplots(1, 2, squeeze=False)\n", 287 | " ax[0, 0].set_title(str(a_i['original_label']))\n", 288 | " ax[0, 0].imshow(u.t2n(b_i[0]), cmap='gray')\n", 289 | " ax[0, 1].set_title(str(a_i['adversarial_label']))\n", 290 | " ax[0, 1].imshow(u.t2n(a_i['img'][0]), cmap='gray')\n", 291 | " plt.show()\n", 292 | " if i ==10:\n", 293 | " break\n", 294 | "print('mean L2', np.mean([np.sqrt(a_i['distance'] * 784) for a_i in advs]))" 295 | ] 296 | } 297 | ], 298 | "metadata": { 299 | "kernelspec": { 300 | "display_name": "Python 3", 301 | "language": "python", 302 | "name": "python3" 303 | }, 304 | "language_info": { 305 | "codemirror_mode": { 306 | "name": "ipython", 307 | "version": 3 308 | }, 309 | "file_extension": ".py", 310 | "mimetype": "text/x-python", 311 | "name": "python", 312 | "nbconvert_exporter": "python", 313 | "pygments_lexer": "ipython3", 314 | "version": "3.6.4" 315 | } 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | --------------------------------------------------------------------------------