├── abs_models
├── __init__.py
├── loss_functions.py
├── sampling.py
├── attack_utils.py
├── utils.py
├── inference.py
├── visualization.py
├── models.py
└── nets.py
├── exp
├── imgs
│ ├── qualitative.png
│ └── distal_adversarials.png
├── VAE_swarm_MSE
│ └── nets
│ │ ├── ABS_0.net
│ │ ├── ABS_1.net
│ │ ├── ABS_2.net
│ │ ├── ABS_3.net
│ │ ├── ABS_4.net
│ │ ├── ABS_5.net
│ │ ├── ABS_6.net
│ │ ├── ABS_7.net
│ │ ├── ABS_8.net
│ │ └── ABS_9.net
└── mnist_cnn
│ └── nets
│ ├── binary_cnn.net
│ ├── vanilla_cnn.net
│ └── transfer_cnn.net
├── foolbox_model.py
├── madry
└── mnist_challenge
│ ├── config.json
│ ├── model_robustml.py
│ ├── LICENSE
│ ├── fetch_model.py
│ ├── model.py
│ ├── run_attack.py
│ ├── pgd_attack.py
│ ├── train.py
│ ├── eval.py
│ └── README.md
├── robustml_model.py
├── README.md
├── scripts
├── attacks.py
└── attacks.ipynb
└── LICENSE
/abs_models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/exp/imgs/qualitative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/imgs/qualitative.png
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_0.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_0.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_1.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_1.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_2.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_2.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_3.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_3.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_4.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_4.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_5.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_5.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_6.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_6.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_7.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_7.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_8.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_8.net
--------------------------------------------------------------------------------
/exp/VAE_swarm_MSE/nets/ABS_9.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/VAE_swarm_MSE/nets/ABS_9.net
--------------------------------------------------------------------------------
/exp/imgs/distal_adversarials.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/imgs/distal_adversarials.png
--------------------------------------------------------------------------------
/exp/mnist_cnn/nets/binary_cnn.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/binary_cnn.net
--------------------------------------------------------------------------------
/exp/mnist_cnn/nets/vanilla_cnn.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/vanilla_cnn.net
--------------------------------------------------------------------------------
/exp/mnist_cnn/nets/transfer_cnn.net:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bethgelab/AnalysisBySynthesis/HEAD/exp/mnist_cnn/nets/transfer_cnn.net
--------------------------------------------------------------------------------
/foolbox_model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import foolbox
3 | import numpy as np
4 | from abs_models.models import get_ABS
5 | import os
6 |
7 | def create():
8 | model = get_ABS()
9 | model.eval()
10 | fmodel = foolbox.models.PyTorchModel(
11 | model, (0, 1))
12 | return fmodel
13 |
14 |
15 | if __name__ == '__main__':
16 | fmodel = create()
17 |
18 | # design an input that looks like a 1
19 | x = np.zeros((1, 28, 28), dtype=np.float32)
20 | x[0, 5:-5, 12:-12] = 1
21 |
22 | logits = fmodel.predictions(x)
23 |
24 | print('logits', logits)
25 | print('probabilities', foolbox.utils.softmax(logits))
26 | print('class', np.argmax(logits))
27 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "_comment": "===== MODEL CONFIGURATION =====",
3 | "model_dir": "models/a_very_robust_model",
4 |
5 | "_comment": "===== TRAINING CONFIGURATION =====",
6 | "random_seed": 4557077,
7 | "max_num_training_steps": 100000,
8 | "num_output_steps": 100,
9 | "num_summary_steps": 100,
10 | "num_checkpoint_steps": 300,
11 | "training_batch_size": 50,
12 |
13 | "_comment": "===== EVAL CONFIGURATION =====",
14 | "num_eval_examples": 10000,
15 | "eval_batch_size": 200,
16 | "eval_on_cpu": true,
17 |
18 | "_comment": "=====ADVERSARIAL EXAMPLES CONFIGURATION=====",
19 | "epsilon": 0.3,
20 | "k": 40,
21 | "a": 0.01,
22 | "random_start": true,
23 | "loss_func": "xent",
24 | "store_adv_path": "attack.npy"
25 | }
26 |
--------------------------------------------------------------------------------
/robustml_model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import robustml
3 | import numpy as np
4 | import foolbox_model
5 |
6 |
7 | class ABSModel(robustml.model.Model):
8 | """RobustML interface for the Analysis by Synthesis (ABS) model."""
9 |
10 | def __init__(self):
11 | self._dataset = robustml.dataset.MNIST()
12 | self._threat_model = robustml.threat_model.L2(epsilon=1.5)
13 | self._fmodel = foolbox_model.create()
14 | assert self._fmodel.bounds() == (0, 1)
15 |
16 | @property
17 | def dataset(self):
18 | return self._dataset
19 |
20 | @property
21 | def threat_model(self):
22 | return self._threat_model
23 |
24 | def classify(self, x):
25 | assert x.shape == (28, 28)
26 | x = x[np.newaxis] # add chanell axis
27 | assert x.shape == (1, 28, 28)
28 | return np.argmax(self._fmodel.predictions(x))
29 |
30 |
31 | if __name__ == '__main__':
32 | model = ABSModel()
33 |
34 | # design an input that looks like a 1
35 | x = np.zeros((28, 28), dtype=np.float32)
36 | x[5:-5, 12:-12] = 1
37 |
38 | print('class', model.classify(x))
39 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/model_robustml.py:
--------------------------------------------------------------------------------
1 | import robustml
2 | import tensorflow as tf
3 |
4 | import model
5 |
6 | class Model(robustml.model.Model):
7 | def __init__(self, sess):
8 | self._model = model.Model()
9 |
10 | saver = tf.train.Saver()
11 | checkpoint = tf.train.latest_checkpoint('models/secret')
12 | saver.restore(sess, checkpoint)
13 |
14 | self._sess = sess
15 | self._input = self._model.x_input
16 | self._logits = self._model.pre_softmax
17 | self._predictions = self._model.y_pred
18 | self._dataset = robustml.dataset.MNIST()
19 | self._threat_model = robustml.threat_model.Linf(epsilon=0.3)
20 |
21 | @property
22 | def dataset(self):
23 | return self._dataset
24 |
25 | @property
26 | def threat_model(self):
27 | return self._threat_model
28 |
29 | def classify(self, x):
30 | return self._sess.run(self._predictions,
31 | {self._input: x})[0]
32 |
33 | # expose attack interface
34 |
35 | @property
36 | def input(self):
37 | return self._input
38 |
39 | @property
40 | def logits(self):
41 | return self._logits
42 |
43 | @property
44 | def predictions(self):
45 | return self._predictions
46 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/fetch_model.py:
--------------------------------------------------------------------------------
1 | """Downloads a model, computes its SHA256 hash and unzips it
2 | at the proper location."""
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import sys
8 | import zipfile
9 | import hashlib
10 |
11 | if len(sys.argv) != 2 or sys.argv[1] not in ['natural',
12 | 'adv_trained',
13 | 'secret']:
14 | print('Usage: python fetch_model.py [natural, adv_trained, secret]')
15 | sys.exit(1)
16 |
17 | if sys.argv[1] == 'natural':
18 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/natural.zip'
19 | elif sys.argv[1] == 'secret':
20 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/secret.zip'
21 | else: # fetch adv_trained model
22 | url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/adv_trained.zip'
23 |
24 | fname = url.split('/')[-1] # get the name of the file
25 |
26 | # model download
27 | print('Downloading models')
28 | if sys.version_info >= (3,):
29 | import urllib.request
30 | urllib.request.urlretrieve(url, fname)
31 | else:
32 | import urllib
33 | urllib.urlretrieve(url, fname)
34 |
35 | # computing model hash
36 | sha256 = hashlib.sha256()
37 | with open(fname, 'rb') as f:
38 | data = f.read()
39 | sha256.update(data)
40 | print('SHA256 hash: {}'.format(sha256.hexdigest()))
41 |
42 | # extracting model
43 | print('Extracting model')
44 | with zipfile.ZipFile(fname, 'r') as model_zip:
45 | model_zip.extractall()
46 | print('Extracted model in {}'.format(model_zip.namelist()[0]))
47 |
--------------------------------------------------------------------------------
/abs_models/loss_functions.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from abs_models import utils as u
3 | import numpy as np
4 |
5 | def squared_L2_loss(a, b, axes, keepdim=True):
6 | return u.tsum((a - b)**2, axes=axes, keepdim=keepdim)
7 |
8 |
9 | def KLD(mu_latent_q, sig_q=1., dim=-3):
10 | """
11 |
12 | :param mu_latent_q: z must be shape (..., n_latent ...) at i-th pos
13 | :param sig_q: scalar
14 | :param dim: determines pos i
15 | :return:
16 | """
17 | return -0.5 * torch.sum(1 - mu_latent_q ** 2 + u.tlog(sig_q) - sig_q**2,
18 | dim=dim, keepdim=True)
19 |
20 |
21 | def ELBOs(x_rec: torch.Tensor, samples_latent: torch.Tensor, x_orig: torch.Tensor,
22 | beta=1, dist_fct=squared_L2_loss):
23 | """
24 | :param x_rec: shape (..., n_channels, nx, ny)
25 | :param samples_latent: (..., n_latent, 1, 1)
26 | :param x_orig: (..., n_channels, nx, ny)
27 | :param beta:
28 | :param dist_fct:
29 | :return:
30 | """
31 | n_ch, nx, ny = x_rec.shape[-3:]
32 | kld = KLD(samples_latent, sig_q=1.)
33 | rec_loss = dist_fct(x_orig, x_rec, axes=[-1, -2, -3])
34 | elbo = rec_loss + beta * kld
35 | # del x_rec, x_orig, kld
36 | # del x_rec, samples_latent, x_orig
37 | return elbo / (n_ch * nx * ny)
38 |
39 |
40 | def ELBOs2(x, rec_x, samples_latent, beta):
41 | """This is the loss function used during inference to calculate the logits.
42 |
43 | This function must only operate on the last the dimensions of x and rec_x.
44 | There can be varying number of additional dimensions before them!
45 | """
46 |
47 | input_size = int(np.prod(x.shape[-3:]))
48 | assert len(x.shape) == 4 and len(rec_x.shape) == 4
49 | # alternative implementation that is much faster and more memory efficient
50 | # when each sample in x needs to be compared to each sample in rec_x
51 | assert x.shape[-3:] == rec_x.shape[-3:]
52 | x = x.reshape(x.shape[0], input_size)
53 | y = rec_x.reshape(rec_x.shape[0], input_size)
54 |
55 | x2 = torch.norm(x, p=2, dim=-1, keepdim=True).pow(2) # x2 shape (bs, 1)
56 | y2 = torch.norm(y, p=2, dim=-1, keepdim=True).pow(2) # y2 shape (1, nsamples)
57 | # note that we could cache the calculation of y2, but
58 | # it's so fast that it doesn't matter
59 |
60 | L2squared = x2 + y2.t() - 2 * torch.mm(x, y.t())
61 | L2squared = L2squared / input_size
62 |
63 | kld = KLD(samples_latent, sig_q=1.)[None, :, 0, 0, 0] / input_size
64 | # note that the KLD sum is over the latents, not over the input size
65 | return L2squared + beta * kld
66 |
67 |
68 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/model.py:
--------------------------------------------------------------------------------
1 | """
2 | The model is adapted from the tensorflow tutorial:
3 | https://www.tensorflow.org/get_started/mnist/pros
4 | """
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import tensorflow as tf
10 |
11 |
12 | class Model(object):
13 | def __init__(self):
14 | self.x_input = tf.placeholder(tf.float32, shape = [None, 1, 28, 28])
15 | self.y_input = tf.placeholder(tf.int64, shape = [None])
16 |
17 | self.x_image = tf.reshape(self.x_input, [-1, 28, 28, 1])
18 |
19 | # first convolutional layer
20 | self.W_conv1 = self._weight_variable([5,5,1,32])
21 | b_conv1 = self._bias_variable([32])
22 |
23 | self.h_conv1 = tf.nn.relu(self._conv2d(self.x_image, self.W_conv1) + b_conv1)
24 | h_pool1 = self._max_pool_2x2(self.h_conv1)
25 |
26 | # second convolutional layer
27 | W_conv2 = self._weight_variable([5,5,32,64])
28 | b_conv2 = self._bias_variable([64])
29 |
30 | h_conv2 = tf.nn.relu(self._conv2d(h_pool1, W_conv2) + b_conv2)
31 | self.h_pool2 = self._max_pool_2x2(h_conv2)
32 |
33 | # first fully connected layer
34 | W_fc1 = self._weight_variable([7 * 7 * 64, 1024])
35 | b_fc1 = self._bias_variable([1024])
36 |
37 | h_pool2_flat = tf.reshape(self.h_pool2, [-1, 7 * 7 * 64])
38 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
39 |
40 | # output layer
41 | W_fc2 = self._weight_variable([1024,10])
42 | b_fc2 = self._bias_variable([10])
43 |
44 | self.pre_softmax = tf.matmul(h_fc1, W_fc2) + b_fc2
45 |
46 | y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits(
47 | labels=self.y_input, logits=self.pre_softmax)
48 |
49 | self.xent = tf.reduce_sum(y_xent)
50 |
51 | self.y_pred = tf.argmax(self.pre_softmax, 1)
52 |
53 | correct_prediction = tf.equal(self.y_pred, self.y_input)
54 |
55 | self.num_correct = tf.reduce_sum(tf.cast(correct_prediction, tf.int64))
56 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
57 |
58 | @staticmethod
59 | def _weight_variable(shape):
60 | initial = tf.truncated_normal(shape, stddev=0.1)
61 | return tf.Variable(initial)
62 |
63 | @staticmethod
64 | def _bias_variable(shape):
65 | initial = tf.constant(0.1, shape = shape)
66 | return tf.Variable(initial)
67 |
68 | @staticmethod
69 | def _conv2d(x, W):
70 | return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
71 |
72 | @staticmethod
73 | def _max_pool_2x2( x):
74 | return tf.nn.max_pool(x,
75 | ksize = [1,2,2,1],
76 | strides=[1,2,2,1],
77 | padding='SAME')
78 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/run_attack.py:
--------------------------------------------------------------------------------
1 | """Evaluates a model against examples from a .npy file as specified
2 | in config.json"""
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | from datetime import datetime
8 | import json
9 | import math
10 | import os
11 | import sys
12 | import time
13 |
14 | import tensorflow as tf
15 | from tensorflow.examples.tutorials.mnist import input_data
16 |
17 | import numpy as np
18 |
19 | from model import Model
20 |
21 | def run_attack(checkpoint, x_adv, epsilon):
22 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
23 |
24 | model = Model()
25 |
26 | saver = tf.train.Saver()
27 |
28 | num_eval_examples = 10000
29 | eval_batch_size = 64
30 |
31 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size))
32 | total_corr = 0
33 |
34 | x_nat = mnist.test.images
35 | l_inf = np.amax(np.abs(x_nat - x_adv))
36 |
37 | if l_inf > epsilon + 0.0001:
38 | print('maximum perturbation found: {}'.format(l_inf))
39 | print('maximum perturbation allowed: {}'.format(epsilon))
40 | return
41 |
42 | y_pred = [] # label accumulator
43 |
44 | with tf.Session() as sess:
45 | # Restore the checkpoint
46 | saver.restore(sess, checkpoint)
47 |
48 | # Iterate over the samples batch-by-batch
49 | for ibatch in range(num_batches):
50 | bstart = ibatch * eval_batch_size
51 | bend = min(bstart + eval_batch_size, num_eval_examples)
52 |
53 | x_batch = x_adv[bstart:bend, :]
54 | y_batch = mnist.test.labels[bstart:bend]
55 |
56 | dict_adv = {model.x_input: x_batch,
57 | model.y_input: y_batch}
58 | cur_corr, y_pred_batch = sess.run([model.num_correct, model.y_pred],
59 | feed_dict=dict_adv)
60 |
61 | total_corr += cur_corr
62 | y_pred.append(y_pred_batch)
63 |
64 | accuracy = total_corr / num_eval_examples
65 |
66 | print('Accuracy: {:.2f}%'.format(100.0 * accuracy))
67 | y_pred = np.concatenate(y_pred, axis=0)
68 | np.save('pred.npy', y_pred)
69 | print('Output saved at pred.npy')
70 |
71 | if __name__ == '__main__':
72 | import json
73 |
74 | with open('config.json') as config_file:
75 | config = json.load(config_file)
76 |
77 | model_dir = config['model_dir']
78 |
79 | checkpoint = tf.train.latest_checkpoint(model_dir)
80 | x_adv = np.load(config['store_adv_path'])
81 |
82 | if checkpoint is None:
83 | print('No checkpoint found')
84 | elif x_adv.shape != (10000, 784):
85 | print('Invalid shape: expected (10000,784), found {}'.format(x_adv.shape))
86 | elif np.amax(x_adv) > 1.0001 or \
87 | np.amin(x_adv) < -0.0001 or \
88 | np.isnan(np.amax(x_adv)):
89 | print('Invalid pixel range. Expected [0, 1], found [{}, {}]'.format(
90 | np.amin(x_adv),
91 | np.amax(x_adv)))
92 | else:
93 | run_attack(checkpoint, x_adv, config['epsilon'])
94 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Towards the first adversarially robust neural network model on MNIST
2 |
3 | The ABS model is a robust (w.r.t. Adversarial Examples) classifier on MNIST. For more details checkout our paper "Towards the first adversarially robust neural network model on MNIST
4 | https://arxiv.org/abs/1805.09190 [1].
5 |
6 | This code provides the pre-trained ABS models and baselines such as:
7 | a vanilla CNN, a binary CNN, a Nearest Neighbour classifier, the model of Madry et al. [2] and our Analysis by Synthesis (**ABS**) model.
8 |
9 | A random selection of adversarial examples for the different models can be seen below.
10 |
11 |
12 | 
13 | Smallest adversarial examples for different architectures.
14 |
15 |
16 |
17 |
18 | To generate adversarial examples and run the code agnostic of the deeplearning framework (e.g. tensorflow, torch), we use foolbox [3].
19 | Foolbox support decision-, score- and gradient-based attacks. For gradient-based attacks, the gradients can either be calculated directly or estimated with the model scores and finite difference based methods.
20 | Additionally some model specific attacks (LatentDescent) are provided.
21 |
22 | Lastly we also compute distal (also called trash) adversarial examples which are unrecognizabale images which are classified with high confidence.
23 |
24 |
25 | 
26 | Distal adversarials which are classiefied as "1" with >90% certainty.
27 |
28 |
29 |
30 |
31 |
32 | ## Loading the ABS Model
33 | The model can be loaded and supports the standard pytorch API
34 |
35 | ```
36 | from abs_models import models as mz # model zoo
37 | from abs_models import utils as u
38 |
39 | model = mz.get_VAE(n_iter=50) # ABS do n_iter=1 for speedup (but ess accurate)
40 | batch, label = u.get_batch() # returns torch.tensor, shape (batch_size, n_channels, nx, ny)
41 | logits = model(u.n2t(batch))
42 | ```
43 | For a complete example using foolbox see "_scripts/attacks.ipynb_" or "_scripts/attacks.py_".
44 |
45 | With the power of foolbox one can also run a code agnostic version of the model
46 |
47 | ## Installation
48 | Our code used pytorch and python3.6 and can be found here (this repo):
49 | ```
50 | git clone https://github.com/lukas-schott/AnalysisBySynthesis.git
51 | ```
52 |
53 | The dependencies are:
54 | ```
55 | pip3 --no-cache-dir install \
56 | numpy \
57 | http://download.pytorch.org/whl/cu90/torch-0.4.0-cp36-cp36m-linux_x86_64.whl \
58 | torchvision \
59 | foolbox \
60 | ```
61 |
62 | Have fun :).
63 |
64 |
65 | [1] Lukas Schott, Jonas Rauber, Matthias Bethge, and Wieland Brendel. Towards the first adversarially robust neural network model on mnist. International Conference for Learning Representations 2019, 2019. URL https://arxiv.org/abs/1805.09190
66 |
67 |
68 | [2] Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, and Adrian Vladu. Towards deep
69 | learning models resistant to adversarial attacks. In _International Conference on Learning Representations_, 2018. URL https://openreview.net/forum?id=rJzIBfZ
70 |
71 | [3] Jonas Rauber and Wieland Brendel. Foolbox Documentation. Read the Docs, 2018. URL https://media.readthedocs.org/pdf/foolbox/latest/foolbox.pdf
72 |
--------------------------------------------------------------------------------
/abs_models/sampling.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from abs_models import utils as u
3 | from torch import tensor
4 | import numpy as np
5 | from torch.nn import functional as F
6 | from scipy.stats import multivariate_normal
7 |
8 |
9 | class GridMan(object):
10 | def __init__(self, AEs, nd, n_classes, nc=1, nx=28, ny=28, limit=0.99):
11 | self.samples = {}
12 | self.images = {}
13 | self.th_images = {}
14 | self.classes = {}
15 | self.l_v = {}
16 | self.AEs = AEs
17 | self.nd = nd
18 | self.n_classes = n_classes
19 | self.nx = nx
20 | self.ny = ny
21 | self.nc = nc
22 | self.limit = None
23 |
24 | def init_grid(self, n_samples, fraction_to_dismiss=None,
25 | sample_sigma=None):
26 | n_grid = self.n_samples_to_n_grid(n_samples)
27 | print('init new grid', n_samples, n_grid)
28 | limit = 0.99
29 | if self.limit is not None:
30 | limit = self.limit
31 | grids = [(np.linspace(-limit, limit, n_grid)) for i in range(self.nd)]
32 | xys = np.array(np.meshgrid(*grids))
33 | xys = np.moveaxis(xys, 0, -1).reshape(n_grid ** self.nd, self.nd)
34 | self.samples[n_samples] = xys
35 | self.l_v[n_samples] = \
36 | torch.from_numpy(xys[:, :, None, None].astype(np.float32)).to(u.dev())
37 |
38 | def get_images(self, n_samples=10, fraction_to_dismiss=0.1,
39 | weighted=False, sample_sigma=1):
40 | if n_samples not in self.images.keys():
41 | self.init_grid(n_samples, fraction_to_dismiss=fraction_to_dismiss,
42 | sample_sigma=sample_sigma)
43 | self.images[n_samples] = np.empty((self.n_classes, n_samples,
44 | self.nc, self.nx, self.ny))
45 | for c, AE in enumerate(self.AEs[:self.n_classes]):
46 | AE.eval()
47 | images = torch.sigmoid(AE.Decoder.forward(self.l_v[n_samples])).cpu().data.numpy()
48 | if weighted:
49 | images = images[:, 0, None]
50 | self.images[n_samples][c, ...] = images
51 |
52 | self.l_v[n_samples]
53 | assert n_samples not in self.th_images
54 | self.th_images[n_samples] = tensor(self.images[n_samples]).type(
55 | torch.FloatTensor).to(u.dev())
56 | print('done creating samples')
57 |
58 | return self.images[n_samples]
59 |
60 | def n_samples_to_n_grid(self, n_samples):
61 | return int(np.round(n_samples ** (1. / self.nd)))
62 |
63 |
64 | class GaussianSamples(GridMan):
65 | def init_grid(self, n_samples, fraction_to_dismiss=0.1,
66 | mus=None, sample_sigma=1):
67 | if mus is None:
68 | mus = np.zeros(self.nd)
69 | samples = get_gaussian_samples(n_samples, self.nd, mus,
70 | fraction_to_dismiss=fraction_to_dismiss,
71 | sample_sigma=sample_sigma)
72 | self.samples[n_samples] = samples
73 | self.l_v[n_samples] = \
74 | torch.from_numpy(samples[:, :, None, None].astype(
75 | np.float32)).to(u.dev())
76 |
77 | def n_samples_to_n_grid(self, n_samples):
78 | return n_samples
79 |
80 |
81 | def get_gaussian_samples(n_samples, nd, mus,
82 | fraction_to_dismiss=0.1, sample_sigma=1):
83 | # returns nd coords sampled from gaussian in shape (n_samples, nd)
84 | sigmas = np.diag(np.ones(nd)) * sample_sigma
85 | g = multivariate_normal(mus, sigmas)
86 | samples = g.rvs(size=int(n_samples / (1. - fraction_to_dismiss)))
87 | probs = g.pdf(samples)
88 | thresh = np.sort(probs)[-n_samples]
89 | samples = samples[probs >= thresh]
90 | return samples
91 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/pgd_attack.py:
--------------------------------------------------------------------------------
1 | """
2 | Implementation of attack methods. Running this file as a program will
3 | apply the attack to the model specified by the config file and store
4 | the examples in an .npy file.
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import tensorflow as tf
11 | import numpy as np
12 |
13 |
14 | class LinfPGDAttack:
15 | def __init__(self, model, epsilon, k, a, random_start, loss_func):
16 | """Attack parameter initialization. The attack performs k steps of
17 | size a, while always staying within epsilon from the initial
18 | point."""
19 | self.model = model
20 | self.epsilon = epsilon
21 | self.k = k
22 | self.a = a
23 | self.rand = random_start
24 |
25 | if loss_func == 'xent':
26 | loss = model.xent
27 | elif loss_func == 'cw':
28 | label_mask = tf.one_hot(model.y_input,
29 | 10,
30 | on_value=1.0,
31 | off_value=0.0,
32 | dtype=tf.float32)
33 | correct_logit = tf.reduce_sum(label_mask * model.pre_softmax, axis=1)
34 | wrong_logit = tf.reduce_max((1-label_mask) * model.pre_softmax, axis=1)
35 | loss = -tf.nn.relu(correct_logit - wrong_logit + 50)
36 | else:
37 | print('Unknown loss function. Defaulting to cross-entropy')
38 | loss = model.xent
39 |
40 | self.grad = tf.gradients(loss, model.x_input)[0]
41 |
42 | def perturb(self, x_nat, y, sess):
43 | """Given a set of examples (x_nat, y), returns a set of adversarial
44 | examples within epsilon of x_nat in l_infinity norm."""
45 | if self.rand:
46 | x = x_nat + np.random.uniform(-self.epsilon, self.epsilon, x_nat.shape)
47 | else:
48 | x = np.copy(x_nat)
49 |
50 | for i in range(self.k):
51 | grad = sess.run(self.grad, feed_dict={self.model.x_input: x,
52 | self.model.y_input: y})
53 |
54 | x += self.a * np.sign(grad)
55 |
56 | x = np.clip(x, x_nat - self.epsilon, x_nat + self.epsilon)
57 | x = np.clip(x, 0, 1) # ensure valid pixel range
58 |
59 | return x
60 |
61 |
62 | if __name__ == '__main__':
63 | import json
64 | import sys
65 | import math
66 |
67 | from tensorflow.examples.tutorials.mnist import input_data
68 |
69 | from model import Model
70 |
71 | with open('config.json') as config_file:
72 | config = json.load(config_file)
73 |
74 | model_file = tf.train.latest_checkpoint(config['model_dir'])
75 | if model_file is None:
76 | print('No model found')
77 | sys.exit()
78 |
79 | model = Model()
80 | attack = LinfPGDAttack(model,
81 | config['epsilon'],
82 | config['k'],
83 | config['a'],
84 | config['random_start'],
85 | config['loss_func'])
86 | saver = tf.train.Saver()
87 |
88 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
89 |
90 | with tf.Session() as sess:
91 | # Restore the checkpoint
92 | saver.restore(sess, model_file)
93 |
94 | # Iterate over the samples batch-by-batch
95 | num_eval_examples = config['num_eval_examples']
96 | eval_batch_size = config['eval_batch_size']
97 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size))
98 |
99 | x_adv = [] # adv accumulator
100 |
101 | print('Iterating over {} batches'.format(num_batches))
102 |
103 | for ibatch in range(num_batches):
104 | bstart = ibatch * eval_batch_size
105 | bend = min(bstart + eval_batch_size, num_eval_examples)
106 | print('batch size: {}'.format(bend - bstart))
107 |
108 | x_batch = mnist.test.images[bstart:bend, :]
109 | y_batch = mnist.test.labels[bstart:bend]
110 |
111 | x_batch_adv = attack.perturb(x_batch, y_batch, sess)
112 |
113 | x_adv.append(x_batch_adv)
114 |
115 | print('Storing examples')
116 | path = config['store_adv_path']
117 | x_adv = np.concatenate(x_adv, axis=0)
118 | np.save(path, x_adv)
119 | print('Examples stored in {}'.format(path))
120 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/train.py:
--------------------------------------------------------------------------------
1 | """Trains a model, saving checkpoints and tensorboard summaries along
2 | the way."""
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | from datetime import datetime
8 | import json
9 | import os
10 | import shutil
11 | from timeit import default_timer as timer
12 |
13 | import tensorflow as tf
14 | import numpy as np
15 | from tensorflow.examples.tutorials.mnist import input_data
16 |
17 | from model import Model
18 | from pgd_attack import LinfPGDAttack
19 |
20 | with open('config.json') as config_file:
21 | config = json.load(config_file)
22 |
23 | # Setting up training parameters
24 | tf.set_random_seed(config['random_seed'])
25 |
26 | max_num_training_steps = config['max_num_training_steps']
27 | num_output_steps = config['num_output_steps']
28 | num_summary_steps = config['num_summary_steps']
29 | num_checkpoint_steps = config['num_checkpoint_steps']
30 |
31 | batch_size = config['training_batch_size']
32 |
33 | # Setting up the data and the model
34 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
35 | global_step = tf.contrib.framework.get_or_create_global_step()
36 | model = Model()
37 |
38 | # Setting up the optimizer
39 | train_step = tf.train.AdamOptimizer(1e-4).minimize(model.xent,
40 | global_step=global_step)
41 |
42 | # Set up adversary
43 | attack = LinfPGDAttack(model,
44 | config['epsilon'],
45 | config['k'],
46 | config['a'],
47 | config['random_start'],
48 | config['loss_func'])
49 |
50 | # Setting up the Tensorboard and checkpoint outputs
51 | model_dir = config['model_dir']
52 | if not os.path.exists(model_dir):
53 | os.makedirs(model_dir)
54 |
55 | # We add accuracy and xent twice so we can easily make three types of
56 | # comparisons in Tensorboard:
57 | # - train vs eval (for a single run)
58 | # - train of different runs
59 | # - eval of different runs
60 |
61 | saver = tf.train.Saver(max_to_keep=3)
62 | tf.summary.scalar('accuracy adv train', model.accuracy)
63 | tf.summary.scalar('accuracy adv', model.accuracy)
64 | tf.summary.scalar('xent adv train', model.xent / batch_size)
65 | tf.summary.scalar('xent adv', model.xent / batch_size)
66 | tf.summary.image('images adv train', model.x_image)
67 | merged_summaries = tf.summary.merge_all()
68 |
69 | shutil.copy('config.json', model_dir)
70 |
71 | with tf.Session() as sess:
72 | # Initialize the summary writer, global variables, and our time counter.
73 | summary_writer = tf.summary.FileWriter(model_dir, sess.graph)
74 | sess.run(tf.global_variables_initializer())
75 | training_time = 0.0
76 |
77 | # Main training loop
78 | for ii in range(max_num_training_steps):
79 | x_batch, y_batch = mnist.train.next_batch(batch_size)
80 |
81 | # Compute Adversarial Perturbations
82 | start = timer()
83 | x_batch_adv = attack.perturb(x_batch, y_batch, sess)
84 | end = timer()
85 | training_time += end - start
86 |
87 | nat_dict = {model.x_input: x_batch,
88 | model.y_input: y_batch}
89 |
90 | adv_dict = {model.x_input: x_batch_adv,
91 | model.y_input: y_batch}
92 |
93 | # Output to stdout
94 | if ii % num_output_steps == 0:
95 | nat_acc = sess.run(model.accuracy, feed_dict=nat_dict)
96 | adv_acc = sess.run(model.accuracy, feed_dict=adv_dict)
97 | print('Step {}: ({})'.format(ii, datetime.now()))
98 | print(' training nat accuracy {:.4}%'.format(nat_acc * 100))
99 | print(' training adv accuracy {:.4}%'.format(adv_acc * 100))
100 | if ii != 0:
101 | print(' {} examples per second'.format(
102 | num_output_steps * batch_size / training_time))
103 | training_time = 0.0
104 | # Tensorboard summaries
105 | if ii % num_summary_steps == 0:
106 | summary = sess.run(merged_summaries, feed_dict=adv_dict)
107 | summary_writer.add_summary(summary, global_step.eval(sess))
108 |
109 | # Write a checkpoint
110 | if ii % num_checkpoint_steps == 0:
111 | saver.save(sess,
112 | os.path.join(model_dir, 'checkpoint'),
113 | global_step=global_step)
114 |
115 | # Actual training step
116 | start = timer()
117 | sess.run(train_step, feed_dict=adv_dict)
118 | end = timer()
119 | training_time += end - start
120 |
--------------------------------------------------------------------------------
/abs_models/attack_utils.py:
--------------------------------------------------------------------------------
1 | import foolbox
2 | import foolbox.attacks as fa
3 | import numpy as np
4 | import torch
5 |
6 | from abs_models import utils as u
7 | from abs_models import models
8 |
9 |
10 | def get_attack(attack, fmodel):
11 | args = []
12 | kwargs = {}
13 | # L0
14 | if attack == 'SAPA':
15 | metric = foolbox.distances.L0
16 | A = fa.SaltAndPepperNoiseAttack(fmodel)
17 | elif attack == 'PA':
18 | metric = foolbox.distances.L0
19 | A = fa.PointwiseAttack(fmodel)
20 |
21 | # L2
22 | elif 'IGD' in attack:
23 | metric = foolbox.distances.MSE
24 | A = fa.L2BasicIterativeAttack(fmodel)
25 | elif attack == 'AGNA':
26 | metric = foolbox.distances.MSE
27 | kwargs['epsilons'] = np.linspace(0.5, 1, 50)
28 | A = fa.AdditiveGaussianNoiseAttack(fmodel)
29 | elif attack == 'BA':
30 | metric = foolbox.distances.MSE
31 | A = fa.BoundaryAttack(fmodel)
32 | elif 'DeepFool' in attack:
33 | metric = foolbox.distances.MSE
34 | A = fa.DeepFoolL2Attack(fmodel)
35 | elif attack == 'PAL2':
36 | metric = foolbox.distances.MSE
37 | A = fa.PointwiseAttack(fmodel)
38 |
39 | # L inf
40 | elif 'FGSM' in attack and not 'IFGSM' in attack:
41 | metric = foolbox.distances.Linf
42 | A = fa.FGSM(fmodel)
43 | kwargs['epsilons'] = 20
44 |
45 | elif 'IFGSM' in attack:
46 | metric = foolbox.distances.Linf
47 | A = fa.IterativeGradientSignAttack(fmodel)
48 | elif 'PGD' in attack:
49 | metric = foolbox.distances.Linf
50 | A = fa.LinfinityBasicIterativeAttack(fmodel)
51 | elif 'IGM' in attack:
52 | metric = foolbox.distances.Linf
53 | A = fa.MomentumIterativeAttack(fmodel)
54 | else:
55 | raise Exception('Not implemented')
56 | return A, metric, args, kwargs
57 |
58 |
59 | class LineSearchAttack:
60 | def __init__(self, abs_model : models.ELBOVAE):
61 | self.abs = abs_model
62 |
63 | def __call__(self, x, l, n_coarse_steps=3, n_ft_steps=10):
64 | x, l = u.n2t(x), u.n2t(l)
65 | x, l = x.to(u.dev()), l.to(u.dev())
66 | bs = x.shape[0]
67 | best_other = 0
68 | best_advs = [{'original_label': -1, 'adversarial_label': None,
69 | 'distance': np.inf, 'img': torch.zeros(x.shape[1:]).to(u.dev())}
70 | for _ in range(bs)]
71 | coarse_steps = torch.zeros(bs).to(u.dev())
72 |
73 | n_adv_found = 0
74 | for i, coarse_step in enumerate(torch.linspace(0, 1., n_coarse_steps).to(u.dev())):
75 | current_adv = (1 - coarse_step) * x + coarse_step * best_other
76 | best_other, current_label = self.get_best_prototypes(current_adv, l)
77 | for j, (current_adv_i, pred_l_i, l_i) in enumerate(zip(current_adv, current_label, l)):
78 | if best_advs[j]['original_label'] == -1 and pred_l_i != l_i:
79 | self.update_adv(best_advs[j], current_adv_i, pred_l_i, l_i, x[j])
80 | coarse_steps[i] = coarse_step
81 | n_adv_found += 1
82 | if n_adv_found == bs:
83 | break
84 | best_advs_imgs = torch.cat([a['img'][None] for a in best_advs])
85 | coarse_steps_old = coarse_steps[:, None, None, None]
86 |
87 | # binary search
88 | best_advs_imgs_old = best_advs_imgs.clone()
89 | sign, step = - torch.ones(bs, 1, 1, 1).to(u.dev()), 0.5
90 | for i in range(n_ft_steps):
91 | coarse_steps = coarse_steps_old + step * sign
92 | current_adv = (1 - coarse_steps) * x + coarse_steps * best_advs_imgs_old
93 | _, current_label = self.get_best_prototypes(current_adv, l)
94 |
95 | for j, (pred_l_i, l_i) in enumerate(zip(current_label, l)):
96 | if pred_l_i == l_i:
97 | sign[j] = 1
98 | else:
99 | self.update_adv(best_advs[j], current_adv[j], pred_l_i, l_i, x[j])
100 |
101 | sign[j] = -1
102 | step /= 2
103 |
104 | return best_advs
105 |
106 | def get_best_prototypes(self, x: torch.Tensor, l: torch.Tensor):
107 | bs = l.shape[0]
108 | p_c, elbos, l_v_classes, reconsts = self.abs.forward(x, return_more=True)
109 | _, pred_classes = torch.max(p_c, dim=1)
110 | p_c[range(bs), l] = - np.inf
111 | _, pred_classes_other = torch.max(p_c, dim=1)
112 | best_other_reconst = reconsts[range(bs), pred_classes_other.squeeze()]
113 | best_other_reconst = self.post_process_reconst(best_other_reconst, x)
114 |
115 | return best_other_reconst, pred_classes.squeeze()
116 |
117 | def update_adv(self, best_adv, current_adv, pred_l, orig_l, orig_x):
118 | best_adv['img'] = current_adv.data.clone()
119 | best_adv['original_label'] = orig_l.cpu().numpy()
120 | best_adv['adversarial_label'] = pred_l.cpu().numpy()
121 | best_adv['distance'] = np.mean((current_adv - orig_x).cpu().numpy()**2)
122 |
123 | def post_process_reconst(self, reconst, x):
124 | return reconst
125 |
126 |
127 | class BinaryLineSearchAttack(LineSearchAttack):
128 | def post_process_reconst(self, reconst, x):
129 | return u.binary_projection(reconst, x)
130 |
131 |
132 | def update_distal_adv(a, a_up, grads, opti):
133 | a_up.data = torch.from_numpy(a)
134 | opti.zero_grad()
135 | a_up.grad = torch.from_numpy(grads)
136 | opti.step()
137 | a_up.data.clamp_(0, 1)
138 | a = a_up.data.numpy()
139 | return a
140 |
--------------------------------------------------------------------------------
/scripts/attacks.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext_format_version: '1.2'
4 | # kernelspec:
5 | # display_name: Python 3
6 | # language: python
7 | # name: python3
8 | # language_info:
9 | # codemirror_mode:
10 | # name: ipython
11 | # version: 3
12 | # file_extension: .py
13 | # mimetype: text/x-python
14 | # name: python
15 | # nbconvert_exporter: python
16 | # pygments_lexer: ipython3
17 | # version: 3.6.4
18 | # ---
19 |
20 | # +
21 | import sys
22 | sys.path.insert(0, './../')
23 | # %load_ext autoreload
24 | # %autoreload 2
25 | # %matplotlib inline
26 |
27 | import torch
28 | from torchvision import datasets, transforms
29 |
30 | import numpy as np
31 | from matplotlib import pyplot as plt
32 | import foolbox
33 | from foolbox import attacks as fa
34 |
35 | # own modules
36 | from abs_models import utils as u
37 | from abs_models import models as mz
38 | from abs_models import attack_utils as au
39 | # -
40 |
41 | model = mz.get_VAE(n_iter=10) # ABS, do n_iter=50 for original model
42 | # model = mz.get_VAE(binary=True) # ABS with scaling and binaryzation
43 | # model = mz.get_binary_CNN() # Binary CNN
44 | # model = mz.get_CNN() # Vanilla CNN
45 | # model = mz.get_NearestNeighbor() # Nearest Neighbor, "nearest L2 dist to each class"=logits
46 | # model = mz.get_madry() # Robust network from Madry et al. in tf
47 |
48 | # code is agnostic of pytorch/ tensorflow model --> foolbox model
49 | if model.code_base == 'tensorflow':
50 | fmodel = foolbox.models.TensorFlowModel(model.x_input, model.pre_softmax, (0., 1.),
51 | channel_axis=3)
52 | elif model.code_base == 'pytorch':
53 | model.eval()
54 | fmodel = foolbox.models.PyTorchModel(model, # return logits in shape (bs, n_classes)
55 | bounds=(0., 1.), num_classes=10,
56 | device=u.dev())
57 | else:
58 | print('not implemented')
59 |
60 | # test model
61 | b, l = u.get_batch(bs=10000) # returns random batch as np.array
62 | pred_label = np.argmax(fmodel.batch_predictions(b), axis=1)
63 | print('score', float(np.sum(pred_label == l)) / b.shape[0])
64 |
65 | # # Decision based attacks
66 | # Note that this is only demo code. All experiments were optimized to our compute architecture.
67 |
68 | b, l = u.get_batch(bs=1) # returns random batch
69 |
70 | # +
71 | import time
72 | start = time.time()
73 | att = fa.DeepFoolL2Attack(fmodel)
74 | metric = foolbox.distances.MSE
75 | criterion = foolbox.criteria.Misclassification()
76 |
77 | plt.imshow(b[0, 0], cmap='gray')
78 | plt.title('orig')
79 | plt.axis('off')
80 | plt.show()
81 |
82 | # Estimate gradients from scores
83 | if not model.has_grad:
84 | GE = foolbox.gradient_estimators.CoordinateWiseGradientEstimator(0.1)
85 | fmodel = foolbox.models.ModelWithEstimatedGradients(fmodel, GE)
86 |
87 | # gernate Adversarial
88 | a = foolbox.adversarial.Adversarial(fmodel, criterion, b[0], l[0], distance=metric)
89 | att(a)
90 |
91 | print('runtime', time.time() - start, 'seconds')
92 | print('pred', np.argmax(fmodel.predictions(a.image)))
93 | if a.image is not None: # attack was successful
94 | plt.imshow(a.image[0], cmap='gray')
95 | plt.title('adv')
96 | plt.axis('off')
97 | plt.show()
98 | # -
99 |
100 | # # get Trash Adversarials
101 |
102 | from foolbox.gradient_estimators import CoordinateWiseGradientEstimator as CWGE
103 |
104 | a = np.random.random((1, 28, 28)).astype(np.float32)
105 | a_helper = torch.tensor(torch.from_numpy(a.copy()), requires_grad=True)
106 | fixed_class = 1
107 | GE = CWGE(1.)
108 |
109 | opti = torch.optim.SGD([a_helper], lr=1, momentum=0.95)
110 |
111 | # +
112 | confidence_level = model.confidence_level # abs 0.0000031, CNN 1439000, madry 60, 1-NN 0.000000000004
113 | logits_scale = model.logit_scale # ABS 430, madry 1, CNN 1, 1-NN 5
114 |
115 | a_orig = a
116 | plt.imshow(u.t2n(a[0]), cmap='gray')
117 | plt.show()
118 |
119 | for i in range(10000):
120 | logits = fmodel.predictions(a)
121 | probs = u.t2n(u.confidence_softmax(logits_scale*torch.from_numpy(logits[None, :]), dim=1,
122 | const=confidence_level))[0]
123 | pred_class = np.argmax(u.t2n(logits).squeeze())
124 |
125 | if probs[fixed_class]>= 0.9:
126 | break
127 | grads = GE(fmodel.batch_predictions, a, fixed_class, (0,1))
128 |
129 | a = au.update_distal_adv(a, a_helper, grads, opti)
130 | if i % 1000 == 0:
131 | print(f'probs {probs[pred_class]:.3f} class', pred_class)
132 | fig, ax = plt.subplots(1,3, squeeze=False, figsize=(10, 4))
133 | ax[0, 0].imshow(u.t2n(a[0]), cmap='gray')
134 | ax[0, 1].imshow(u.t2n(grads[0]), cmap='gray')
135 | ax[0, 2].imshow(np.sign(grads[0]), cmap='gray')
136 | plt.show()
137 | plt.imshow(u.t2n(a[0]), cmap='gray')
138 | plt.show()
139 | # -
140 |
141 | # # Latent Descent Attack
142 |
143 | # +
144 | # only for abs
145 | att = au.LineSearchAttack(model) # BinaryLineSearchAttack
146 | b, l = u.get_batch(bs=200)
147 |
148 | advs = att(b, l, n_coarse_steps=50+1, n_ft_steps=2)
149 |
150 | for adv in advs:
151 | adv['img'] = adv['img'].cpu().numpy()
152 |
153 | for i, (a_i, b_i) in enumerate(zip(advs, b)):
154 | l2 = np.sqrt(a_i['distance'] * 784) # convert from MSE
155 |
156 | fig, ax = plt.subplots(1, 2, squeeze=False)
157 | ax[0, 0].set_title(str(a_i['original_label']))
158 | ax[0, 0].imshow(u.t2n(b_i[0]), cmap='gray')
159 | ax[0, 1].set_title(str(a_i['adversarial_label']))
160 | ax[0, 1].imshow(u.t2n(a_i['img'][0]), cmap='gray')
161 | plt.show()
162 | if i ==10:
163 | break
164 | print('mean L2', np.mean([np.sqrt(a_i['distance'] * 784) for a_i in advs]))
165 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/eval.py:
--------------------------------------------------------------------------------
1 | """
2 | Infinite evaluation loop going through the checkpoints in the model directory
3 | as they appear and evaluating them. Accuracy and average loss are printed and
4 | added as tensorboard summaries.
5 | """
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | from datetime import datetime
11 | import json
12 | import math
13 | import os
14 | import sys
15 | import time
16 |
17 | import tensorflow as tf
18 | from tensorflow.examples.tutorials.mnist import input_data
19 |
20 | from model import Model
21 | from pgd_attack import LinfPGDAttack
22 |
23 | # Global constants
24 | with open('config.json') as config_file:
25 | config = json.load(config_file)
26 | num_eval_examples = config['num_eval_examples']
27 | eval_batch_size = config['eval_batch_size']
28 | eval_on_cpu = config['eval_on_cpu']
29 |
30 | model_dir = config['model_dir']
31 |
32 | # Set upd the data, hyperparameters, and the model
33 | mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
34 |
35 | if eval_on_cpu:
36 | with tf.device("/cpu:0"):
37 | model = Model()
38 | attack = LinfPGDAttack(model,
39 | config['epsilon'],
40 | config['k'],
41 | config['a'],
42 | config['random_start'],
43 | config['loss_func'])
44 | else:
45 | model = Model()
46 | attack = LinfPGDAttack(model,
47 | config['epsilon'],
48 | config['k'],
49 | config['a'],
50 | config['random_start'],
51 | config['loss_func'])
52 |
53 | global_step = tf.contrib.framework.get_or_create_global_step()
54 |
55 | # Setting up the Tensorboard and checkpoint outputs
56 | if not os.path.exists(model_dir):
57 | os.makedirs(model_dir)
58 | eval_dir = os.path.join(model_dir, 'eval')
59 | if not os.path.exists(eval_dir):
60 | os.makedirs(eval_dir)
61 |
62 | last_checkpoint_filename = ''
63 | already_seen_state = False
64 |
65 | saver = tf.train.Saver()
66 | summary_writer = tf.summary.FileWriter(eval_dir)
67 |
68 | # A function for evaluating a single checkpoint
69 | def evaluate_checkpoint(filename):
70 | with tf.Session() as sess:
71 | # Restore the checkpoint
72 | saver.restore(sess, filename)
73 |
74 | # Iterate over the samples batch-by-batch
75 | num_batches = int(math.ceil(num_eval_examples / eval_batch_size))
76 | total_xent_nat = 0.
77 | total_xent_adv = 0.
78 | total_corr_nat = 0
79 | total_corr_adv = 0
80 |
81 | for ibatch in range(num_batches):
82 | bstart = ibatch * eval_batch_size
83 | bend = min(bstart + eval_batch_size, num_eval_examples)
84 |
85 | x_batch = mnist.test.images[bstart:bend, :]
86 | y_batch = mnist.test.labels[bstart:bend]
87 |
88 | dict_nat = {model.x_input: x_batch,
89 | model.y_input: y_batch}
90 |
91 | x_batch_adv = attack.perturb(x_batch, y_batch, sess)
92 |
93 | dict_adv = {model.x_input: x_batch_adv,
94 | model.y_input: y_batch}
95 |
96 | cur_corr_nat, cur_xent_nat = sess.run(
97 | [model.num_correct,model.xent],
98 | feed_dict = dict_nat)
99 | cur_corr_adv, cur_xent_adv = sess.run(
100 | [model.num_correct,model.xent],
101 | feed_dict = dict_adv)
102 |
103 | total_xent_nat += cur_xent_nat
104 | total_xent_adv += cur_xent_adv
105 | total_corr_nat += cur_corr_nat
106 | total_corr_adv += cur_corr_adv
107 |
108 | avg_xent_nat = total_xent_nat / num_eval_examples
109 | avg_xent_adv = total_xent_adv / num_eval_examples
110 | acc_nat = total_corr_nat / num_eval_examples
111 | acc_adv = total_corr_adv / num_eval_examples
112 |
113 | summary = tf.Summary(value=[
114 | tf.Summary.Value(tag='xent adv eval', simple_value= avg_xent_adv),
115 | tf.Summary.Value(tag='xent adv', simple_value= avg_xent_adv),
116 | tf.Summary.Value(tag='xent nat', simple_value= avg_xent_nat),
117 | tf.Summary.Value(tag='accuracy adv eval', simple_value= acc_adv),
118 | tf.Summary.Value(tag='accuracy adv', simple_value= acc_adv),
119 | tf.Summary.Value(tag='accuracy nat', simple_value= acc_nat)])
120 | summary_writer.add_summary(summary, global_step.eval(sess))
121 |
122 | print('natural: {:.2f}%'.format(100 * acc_nat))
123 | print('adversarial: {:.2f}%'.format(100 * acc_adv))
124 | print('avg nat loss: {:.4f}'.format(avg_xent_nat))
125 | print('avg adv loss: {:.4f}'.format(avg_xent_adv))
126 |
127 | # Infinite eval loop
128 | while True:
129 | cur_checkpoint = tf.train.latest_checkpoint(model_dir)
130 |
131 | # Case 1: No checkpoint yet
132 | if cur_checkpoint is None:
133 | if not already_seen_state:
134 | print('No checkpoint yet, waiting ...', end='')
135 | already_seen_state = True
136 | else:
137 | print('.', end='')
138 | sys.stdout.flush()
139 | time.sleep(10)
140 | # Case 2: Previously unseen checkpoint
141 | elif cur_checkpoint != last_checkpoint_filename:
142 | print('\nCheckpoint {}, evaluating ... ({})'.format(cur_checkpoint,
143 | datetime.now()))
144 | sys.stdout.flush()
145 | last_checkpoint_filename = cur_checkpoint
146 | already_seen_state = False
147 | evaluate_checkpoint(cur_checkpoint)
148 | # Case 3: Previously evaluated checkpoint
149 | else:
150 | if not already_seen_state:
151 | print('Waiting for the next checkpoint ... ({}) '.format(
152 | datetime.now()),
153 | end='')
154 | already_seen_state = True
155 | else:
156 | print('.', end='')
157 | sys.stdout.flush()
158 | time.sleep(10)
159 |
--------------------------------------------------------------------------------
/abs_models/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import numpy as np
4 | import time
5 |
6 |
7 | def get_batch(bs=1):
8 | loader = torch.utils.data.DataLoader(
9 | torchvision.datasets.MNIST('./../data/MNIST/', train=False, download=True,
10 | transform=torchvision.transforms.ToTensor()),
11 | batch_size=bs, shuffle=True)
12 | b, l = next(iter(loader))
13 | return t2n(b), t2n(l)
14 |
15 |
16 | def clip_to_sphere(tens, radius, channel_dim=1):
17 | radi2 = torch.sum(tens**2, dim=channel_dim, keepdim=True)
18 | mask = torch.gt(radi2, radius**2).expand_as(tens)
19 | tens[mask] = torch.sqrt(
20 | tens[mask]**2 / radi2.expand_as(tens)[mask] * radius**2)
21 | return tens
22 |
23 |
24 | def binarize(tens, thresh=0.5):
25 | if isinstance(tens, torch.Tensor):
26 | tens = tens.clone()
27 | else:
28 | tens = np.copy(tens)
29 | tens[tens < thresh] = 0.
30 | tens[tens >= thresh] = 1.
31 | return tens
32 |
33 |
34 | def tens2numpy(tens):
35 | if tens.is_cuda:
36 | tens = tens.cpu()
37 | if tens.requires_grad:
38 | tens = tens.detach()
39 | return tens.numpy()
40 |
41 |
42 | def t2n(tens):
43 | if isinstance(tens, np.ndarray):
44 | return tens
45 | elif isinstance(tens, list):
46 | return np.array(tens)
47 | elif isinstance(tens, float) or isinstance(tens, int):
48 | return np.array([tens])
49 | else:
50 | return tens2numpy(tens)
51 |
52 |
53 | def n2t(tens):
54 | return torch.from_numpy(tens).to(dev())
55 |
56 |
57 | class LinearActFct(torch.nn.Module):
58 | def forward(self, input):
59 | return input
60 |
61 | def __repr__(self):
62 | return self.__class__.__name__
63 |
64 |
65 | def tsum(input, axes=None, keepdim=False):
66 | if axes is None:
67 | axes = range(len(input.size()))
68 |
69 | # probably some check for uniqueness of axes
70 | if keepdim:
71 | for ax in axes:
72 | input = input.sum(ax, keepdim=True)
73 | else:
74 | for ax in sorted(axes, reverse=True):
75 | input = input.sum(ax, keepdim=False)
76 |
77 | return input
78 |
79 |
80 | def tlog(x):
81 | if isinstance(x, float):
82 | return np.log(x)
83 | elif isinstance(x, int):
84 | return np.log(float(x))
85 | else:
86 | return torch.log(x)
87 |
88 |
89 | def best_other(logits, gt_label):
90 | best_other = np.argsort(logits)
91 | best_other = best_other[best_other != gt_label][-1]
92 | return best_other
93 |
94 |
95 | def L2(a, b, axes=None):
96 | if len(a.shape) != len(b.shape):
97 | print(a.shape, b.shape)
98 | raise(Exception('broadcasting not possible'))
99 | L2_dist = torch.sqrt(tsum((a - b)**2, axes=axes))
100 | return L2_dist
101 |
102 |
103 | def auto_batch(max_batch_size, f, xs, *args, verbose=False, **kwargs):
104 | """Will automatically pass list subxbatches of xs to f.
105 | f must return torch tensors"""
106 | if not isinstance(xs, list):
107 | xs = [xs]
108 | n = xs[0].shape[0]
109 | y = []
110 | for start in range(0, n, max_batch_size):
111 | xb = [x[start:start + max_batch_size] for x in xs]
112 | yb = f(*xb, *args, **kwargs)
113 | y.append(yb)
114 | if not isinstance(yb, tuple):
115 | y = torch.cat(y)
116 | assert y.shape[0] == n
117 | return y
118 | else:
119 | return (torch.cat(y_i) for y_i in list(zip(*y)))
120 |
121 |
122 | def timeit(method):
123 | def timed(*args, **kw):
124 | ts = time.time()
125 | result = method(*args, **kw)
126 | te = time.time()
127 | print('%r %2.2f ms' % (method.__name__, (te - ts) * 1000))
128 | return result
129 | return timed
130 |
131 |
132 | def t_loop_collect(fct, iter_obj, *args, concat_dim=1, **kwargs):
133 | all_outs = []
134 | for obj in iter_obj:
135 | outs = fct(obj, *args, **kwargs)
136 | all_outs.append(outs)
137 | all_outs = list(map(list, zip(*all_outs)))
138 | all_outs = [torch.cat(out, dim=concat_dim) for out in all_outs]
139 | return all_outs
140 |
141 | def dev():
142 | if torch.cuda.is_available():
143 | return 'cuda:0'
144 | else:
145 | return 'cpu'
146 |
147 |
148 | def y_2_one_hot(y, n_classes=10):
149 | assert len(y.shape) == 1
150 | y_one_hot = torch.FloatTensor(y.shape[0], n_classes).to(dev())
151 | y_one_hot.zero_()
152 | return y_one_hot.scatter_(1, y[:, None], 1)
153 |
154 |
155 | def confidence_softmax(x, const=0, dim=1):
156 | x = torch.exp(x)
157 | n_classes = x.shape[1]
158 | # return x
159 | norms = torch.sum(x, dim=dim, keepdim=True)
160 | return (x + const) / (norms + const * n_classes)
161 |
162 |
163 | def cross_entropy(label, logits):
164 | """Calculates the cross-entropy.
165 | logits: np.array with shape (bs, n_classes)
166 | label: np.array with shape (bs)
167 |
168 | """
169 | assert label.shape[0] == logits.shape[0]
170 | assert len(logits.shape) == 2
171 |
172 | # for numerical reasons we subtract the max logit
173 | # (mathematically it doesn't matter!)
174 | # otherwise exp(logits) might become too large or too small
175 | logits = logits - np.max(logits, axis=1)[:, None]
176 | e = np.exp(logits)
177 | s = np.sum(e, axis=1)
178 | ce = np.log(s) - logits[np.arange(label.shape[0]), label]
179 | return ce
180 |
181 |
182 | def show_gpu_usages(thresh=100000):
183 | tmp = 0
184 | import gc
185 | for obj in gc.get_objects():
186 | try:
187 | if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
188 | if obj.is_cuda and np.prod(obj.shape) > thresh \
189 | and not isinstance(obj, torch.nn.parameter.Parameter):
190 | tmp += 1
191 | print(type(obj), list(obj.size()), obj.dtype, obj.is_cuda,
192 | np.prod(obj.shape), tmp)
193 | except:
194 | pass
195 | print()
196 |
197 |
198 | def binary_projection(rec, orig):
199 | # rec > 0.5 and orig > 0.5 --> rec[mask] = orig[mask]
200 | mask = [(rec >= 0.5) & (orig >= 0.5)]
201 | rec[mask] = orig[mask]
202 | # both smaller 0.5
203 | mask = [(rec < 0.5) & (orig < 0.5)]
204 | rec[mask] = orig[mask]
205 |
206 | # rec > 0.5 and orig < 0.5 --> rec[mask] 0.5
207 | rec[(rec >= 0.5) & (orig < 0.5)] = 0.5
208 | rec[(rec < 0.5) & (orig >= 0.5)] = 0.49999
209 | return rec
210 |
211 |
212 |
213 | pass
214 |
--------------------------------------------------------------------------------
/abs_models/inference.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import optim
3 | from torch.nn import functional as F
4 | from torch import tensor
5 | import numpy as np
6 |
7 | from abs_models import utils as u
8 | from abs_models import loss_functions
9 |
10 |
11 | def inference(AEs, x_inp, n_samples, n_iter, beta, GM, fraction_to_dismiss=0.1, lr=0.01,
12 | n_classes=10, nd=8, clip=2, GD_inference_b=True,
13 | dist_fct=loss_functions.squared_L2_loss):
14 |
15 | if n_iter == 0:
16 | GD_inference_b = False
17 |
18 | tmp_bs = x_inp.size()[0]
19 |
20 | # get_images has built-in caching
21 | if n_samples not in GM.th_images:
22 | print('setting random seed')
23 | # fix random numbers for attacks
24 | torch.cuda.manual_seed_all(999)
25 | torch.manual_seed(1234)
26 | np.random.seed(1234)
27 | # generate a bunch of samples for each VAE
28 | GM.get_images(n_samples, fraction_to_dismiss)
29 |
30 | # use caching for conversion to torch
31 | x_test_samples = GM.th_images[n_samples]
32 |
33 | # calculate the likelihood for all samples
34 | with torch.no_grad():
35 | bs, n_ch, nx, ny = x_inp.shape
36 | n_samples, n_latent = GM.l_v[n_samples].shape[-4:-2]
37 |
38 | all_ELBOs = \
39 | [loss_functions.ELBOs2(x_inp, recs.detach(), GM.l_v[n_samples], beta)
40 | for recs in x_test_samples]
41 | all_ELBOs = torch.stack(all_ELBOs, dim=1)
42 |
43 | x_inp = x_inp.view(bs, n_ch, nx, ny)
44 |
45 | # tmp save memory
46 | # GM.th_images[n_samples] = GM.th_images[n_samples].cpu()
47 | # GM.l_v[n_samples] = GM.l_v[n_samples].cpu()
48 |
49 | # select the best prototype for each VAE
50 | min_val_c, min_val_c_args = torch.min(all_ELBOs, dim=2)
51 | indices = min_val_c_args.view(tmp_bs * n_classes)
52 | # l_v_best shape: (bs, n_classes, 8, 1, 1)
53 | l_v_best = GM.l_v[n_samples][indices].view(tmp_bs, n_classes, nd, 1, 1)
54 |
55 | if GD_inference_b: # gradient descent in latent space
56 | return GD_inference(AEs, l_v_best.data, x_inp.data,
57 | clip=clip, lr=lr, n_iter=n_iter, beta=beta, dist_fct=dist_fct)
58 | else:
59 | if tmp_bs == 1:
60 | all_recs = GM.images[n_samples][list(range(n_classes)), u.t2n(indices), :, :, :]
61 | else:
62 | all_recs = None
63 | return min_val_c, l_v_best, all_recs
64 |
65 |
66 | def GD_inference(AEs, l_v_best, x_inp, clip=5, lr=0.01, n_iter=20,
67 | beta=1, dist_fct=loss_functions.squared_L2_loss):
68 | n_classes = len(AEs)
69 |
70 | # l_v_best are the latents
71 | # has shape (batch_size, n_classes == 10, n_latents == 8) + singleton dims
72 |
73 | # do gradient descent w.r.t. ELBO in latent space starting from l_v_best
74 | def gd_inference_b(l_v_best, x_inp, AEs, n_classes=10, clip=5, lr=0.01, n_iter=20,
75 | beta=1, dist_fct=loss_functions.squared_L2_loss):
76 |
77 | bs, n_ch, nx, ny = x_inp.shape
78 | with torch.enable_grad():
79 | l_v_best = l_v_best.data.clone().detach().requires_grad_(True).to(u.dev())
80 | opti = optim.Adam([l_v_best], lr=lr)
81 | for i in range(n_iter):
82 | ELBOs = []
83 | all_recs = []
84 | for j in range(n_classes):
85 | if i == n_iter - 1:
86 | l_v_best = l_v_best.detach() # no gradients in last run
87 | AEs[j].eval()
88 |
89 | rec = torch.sigmoid(AEs[j].Decoder.forward(l_v_best[:, j]))
90 |
91 | ELBOs.append(loss_functions.ELBOs(rec, # (bs, n_ch, nx, ny)
92 | l_v_best[:, j], # (bs, n_latent, 1, 1)
93 | x_inp, # (bs, n_ch, nx, ny)
94 | beta=beta,
95 | dist_fct=dist_fct))
96 | if i == n_iter - 1:
97 | all_recs.append(rec.view(bs, 1, n_ch, nx, ny).detach())
98 |
99 | ELBOs = torch.cat(ELBOs, dim=1)
100 | if i < n_iter - 1:
101 | loss = (torch.sum(ELBOs)) - 8./784./2 # historic reasons
102 | # backward
103 | opti.zero_grad()
104 | loss.backward()
105 | opti.step()
106 | l_v_best.data = u.clip_to_sphere(l_v_best.data, clip, channel_dim=2)
107 | else:
108 | opti.zero_grad()
109 | all_recs = torch.cat(all_recs, dim=1)
110 |
111 | return ELBOs.detach(), l_v_best.detach(), all_recs
112 |
113 | ELBOs, l_v_best, all_recs = u.auto_batch(1000, gd_inference_b, [l_v_best, x_inp], AEs,
114 | n_classes=n_classes, clip=clip, lr=lr,
115 | n_iter=n_iter, beta=beta, dist_fct=dist_fct)
116 |
117 | return ELBOs, l_v_best, all_recs
118 |
119 |
120 | # pytorch 1.0:
121 | # def GD_inference_new(AEs, l_v_best, x_inp, clip=5, lr=0.01, n_iter=20,
122 | # beta=1, dist_fct=loss_functions.squared_L2_loss):
123 | # n_classes = len(AEs)
124 | #
125 | # # l_v_best are the latents
126 | # # have shape (batch_size, n_classes == 10, n_latents == 8) + singleton dims
127 | #
128 | # # do gradient descent w.r.t. ELBO in latent space starting from l_v_best
129 | # def gd_inference_b(l_v_best, x_inp, AEs, clip=5, lr=0.01, n_iter=20,
130 | # beta=1, dist_fct=loss_functions.squared_L2_loss):
131 | #
132 | # with torch.enable_grad():
133 | # l_v_best = l_v_best.data.clone().detach().requires_grad_(True).to(u.dev())
134 | # opti = optim.Adam([l_v_best], lr=lr)
135 | # for i in range(n_iter):
136 | # recs = torch.nn.parallel.parallel_apply(
137 | # [AE.Decoder.forward for AE in AEs],
138 | # [best_latent for best_latent in l_v_best.transpose(0, 1)])
139 | # recs = torch.nn.functional.sigmoid(torch.stack(recs, dim=1))
140 | # ELBOs = loss_functions.ELBOs(recs, l_v_best, x_inp[:, None], beta=beta,
141 | # dist_fct=dist_fct)[..., 0]
142 | #
143 | # if i < n_iter - 1:
144 | # loss = (torch.sum(ELBOs)) - 8./784./2 # historic reasons
145 | # # backward
146 | # opti.zero_grad()
147 | # loss.backward()
148 | # opti.step()
149 | # l_v_best.data = u.clip_to_sphere(l_v_best.data, clip, channel_dim=2)
150 | # else:
151 | # opti.zero_grad()
152 | #
153 | # return ELBOs.detach(), l_v_best.detach(), recs.detach()
154 | #
155 | # ELBOs, l_v_best, all_recs = u.auto_batch(2000, gd_inference_b, [l_v_best, x_inp], AEs,
156 | # clip=clip, lr=lr, n_iter=n_iter, beta=beta, dist_fct=dist_fct)
157 | #
158 | # return ELBOs, l_v_best, all_recs
159 |
--------------------------------------------------------------------------------
/abs_models/visualization.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from matplotlib import pyplot as plt
4 | from PIL import Image
5 |
6 | from abs_models import utils as u
7 |
8 |
9 | def visualize_image(ax, im, title=None, clear=False, **kwargs):
10 | if clear:
11 | ax.cla()
12 | ax.imshow(im, **kwargs)
13 | if title is not None:
14 | ax.set_title(title)
15 | ax.axis('off')
16 | return(ax)
17 |
18 |
19 | def plot(ax, y_datas, x_data=None, title=None, clear=True,
20 | scale=None, legend=None):
21 | if not any(isinstance(i, list) for i in y_datas):
22 | y_datas = [y_datas]
23 | if clear:
24 | ax.clear()
25 | if x_data is None:
26 | x_data = range(len(y_datas[0]))
27 |
28 | # acutal plotting
29 | plots = []
30 | for y_data in y_datas:
31 | pl, = ax.plot(x_data, y_data)
32 | plots.append(pl)
33 |
34 | if legend:
35 | ax.legend(plots, legend)
36 | if scale is not None:
37 | ax.set_yscale(scale)
38 | if title is not None:
39 | ax.set_title(title)
40 | return ax
41 |
42 |
43 | def scatter(ax, x_data, y_data, title=None, clear=True):
44 | if clear:
45 | ax.clear()
46 | ax.scatter(x_data, y_data)
47 | if title is not None:
48 | ax.set_title(title)
49 |
50 |
51 | def subplots(*args, height=6, width=15, **kwargs):
52 | fig, ax = plt.subplots(*args, squeeze=False, **kwargs)
53 | if height is not None:
54 | fig.set_figheight(height)
55 | if width is not None:
56 | fig.set_figwidth(width)
57 | return fig, ax
58 |
59 |
60 | class Visualizer:
61 | def __init__(self):
62 | self.plots = {}
63 | self.i = -1
64 | self.reset()
65 |
66 | def reset(self):
67 | self.ny = 4
68 | self.nx = 4
69 | fig = plt.figure()
70 | plt.ion()
71 | fig.show()
72 | fig.canvas.draw()
73 |
74 | self.fig = fig
75 | self.i = 0
76 | # for key in self.plots.keys():
77 | # self.plots[key].ax = self.get_next_plot()
78 |
79 | def add_scalar(self, name, y, x):
80 | y = u.t2n(y)
81 | if name in self.plots.keys():
82 | self.plots[name].x.append(x)
83 | self.plots[name].y.append(y)
84 | else:
85 | self.plots[name] = PlotObj(x, y, self.get_next_plot())
86 | self.plots[name].ax.clear()
87 | plot(self.plots[name].ax, self.plots[name].y,
88 | self.plots[name].x, title=name)
89 | self.fig.canvas.draw()
90 |
91 | def add_image(self, name, img, x):
92 | if not isinstance(img, np.ndarray):
93 | img = u.t2n(img)
94 | img = img.squeeze()
95 | if name not in self.plots.keys():
96 | self.plots[name] = self.plots[name] \
97 | = PlotObj(0, 0, self.get_next_plot())
98 | visualize_image(self.plots[name].ax, img, title=name, cmap='gray')
99 |
100 | def get_next_plot(self):
101 | self.i += 1
102 | ax = self.fig.add_subplot(self.nx, self.ny, self.i)
103 | return ax
104 |
105 |
106 | class PlotObj:
107 | def __init__(self, x, y, ax):
108 | self.x = [x]
109 | self.y = [y]
110 | self.ax = ax
111 |
112 |
113 | # visualize hidden space
114 | class RobNNVisualisor(object):
115 | def __init__(self):
116 | self.xl = []
117 | self.yl = []
118 | self.cl = []
119 |
120 | def generate_data(self, model, loader, cuda=False):
121 | for i, (test_data, test_label) in enumerate(loader):
122 | if i == int(np.ceil(400 / loader.batch_size)):
123 | break
124 | x = test_data
125 | yt = test_label
126 | x = x.to(u.dev())
127 | model.forward(x)
128 | latent = model.latent.cpu().data.numpy().swapaxes(0, 1).squeeze()
129 | self.xl += latent[0].tolist()
130 | self.yl += latent[1].tolist()
131 | self.cl += yt.data.numpy().tolist()
132 |
133 | def visualize_hidden_space(self, fig, ax, model=None,
134 | loader=None, cuda=False,
135 | reload=False, colorbar=False):
136 | if self.xl == [] or reload:
137 | self.generate_data(model, loader, cuda=cuda)
138 | cmap = plt.cm.get_cmap("viridis", 10)
139 |
140 | pl = ax.scatter(self.xl, self.yl, c=self.cl, label=self.cl,
141 | vmin=-0.5, vmax=9.5, cmap=cmap)
142 |
143 | if colorbar:
144 | fig.colorbar(pl, ax=ax, ticks=range(10))
145 | return ax
146 |
147 |
148 | def fig2img(fig):
149 | """
150 | @brief Convert a Matplotlib figure to a PIL Image in RGBA format
151 | and return it
152 | @param fig a matplotlib figure
153 | @return a Python Imaging Library ( PIL ) image
154 | """
155 | # put the figure pixmap into a numpy array
156 | buf = fig2data(fig)
157 | w, h, d = buf.shape
158 | return Image.frombytes("RGBA", (w, h), buf.tostring())
159 |
160 |
161 | def fig2data(fig):
162 | """
163 | @brief Convert a Matplotlib figure to a 4D numpy array with
164 | RGBA channels and return it
165 | @param fig a matplotlib figure
166 | @return a numpy 3D array of RGBA values
167 | """
168 | # draw the renderer
169 | fig.canvas.draw()
170 |
171 | # Get the RGBA buffer from the figure
172 | w, h = fig.canvas.get_width_height()
173 | buf = np.frombuffer(fig.canvas.tostring_argb(), dtype=np.uint8)
174 | buf.shape = (w, h, 4)
175 |
176 | # canvas.tostring_argb give pixmap in ARGB mode.
177 | # Roll the ALPHA channel to have it in RGBA mode
178 | buf = np.roll(buf, 3, axis=2)
179 | return buf
180 |
181 |
182 | # adapted from https://github.com/lanpa/tensorboard-pytorch
183 | def tens2scattters(tens, lims=None, labels=None):
184 | tens_np = u.tens2numpy(tens)
185 | labels = u.tens2numpy(labels)
186 |
187 | # draw
188 | fig = plt.figure()
189 | ax = plt.gca()
190 | ax.scatter(tens_np[0], tens_np[1], c=labels)
191 | plt.axis('scaled')
192 | if lims is not None:
193 | ax.set_xlim(lims[0], lims[1])
194 | ax.set_ylim(lims[0], lims[1])
195 | return fig2data(fig)
196 |
197 |
198 | def fig2data(fig):
199 | fig.canvas.draw()
200 | # Now we can save it to a numpy array.
201 | data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
202 | data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
203 | plt.close()
204 | return data
205 |
206 |
207 | def visualize_latent_distr(CNN, nd, limit=2, n_grid=100):
208 | limit = 2
209 | n_grid = 100
210 | fig, ax = subplots(1, 1, width=7, height=6)
211 | fig.subplots_adjust(right=0.8)
212 | grids = [(np.linspace(-limit, limit, n_grid)) for i in range(nd)]
213 | xys = np.array(np.meshgrid(*grids))
214 | xys = np.moveaxis(xys, 0, -1).reshape(n_grid ** nd, nd)
215 | outs = CNN.forward(torch.from_numpy(xys[:, :, None, None]).type(torch.cuda.FloatTensor)) # noqa: E501
216 | outs = u.t2n(outs.squeeze())
217 | sc = ax[0, 0].scatter(xys[:, 0], xys[:, 1], c=(outs - np.min(outs)) / (np.max(outs) - np.min(outs))) # noqa: E501
218 | cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
219 | fig.colorbar(sc, cax=cbar_ax)
220 | return fig2data(fig)
221 |
222 |
223 | if __name__ == '__main__':
224 | fig, ax = subplots(2)
225 | print(ax)
226 |
--------------------------------------------------------------------------------
/abs_models/models.py:
--------------------------------------------------------------------------------
1 | from os.path import join, dirname
2 | import torch
3 | from torch import nn
4 | from torchvision import datasets, transforms
5 |
6 | from abs_models import utils as u
7 | from abs_models import nets
8 | from abs_models.inference import inference
9 | from abs_models import sampling
10 |
11 | DEFAULT_PATH = dirname(__file__)
12 |
13 |
14 | class ELBOVAE(nn.Module):
15 | def __init__(self, AEs, n_samples, n_iter, beta, GM,
16 | fraction_to_dismiss=0.1, clip=5, lr=0.05):
17 |
18 | super().__init__()
19 | self.AEs = AEs
20 | for i, AE in enumerate(self.AEs):
21 | self.add_module(f'VAE_{i}', AE)
22 | self.n_samples = n_samples
23 | self.n_iter = n_iter
24 | self.beta = beta
25 | self.GM = GM
26 | self.fraction_to_dismiss = fraction_to_dismiss
27 | self.clip = clip
28 | self.lr = lr
29 | self.logit_scale = 440
30 | self.confidence_level = 0.000039
31 | self.name_check = 'MNIST_MSE'
32 |
33 | def forward(self, x, return_more=False):
34 | # assert (torch.ge(x, 0).all())
35 | # assert (torch.le(x, 1).all())
36 |
37 | ELBOs, l_v_classes, reconsts = inference(self.AEs, x, self.n_samples, self.n_iter,
38 | self.beta, self.GM, self.fraction_to_dismiss,
39 | clip=self.clip, lr=self.lr)
40 | ELBOs = self.rescale(ELBOs) # class specific fine-scaling
41 |
42 | if return_more:
43 | p_c = u.confidence_softmax(-ELBOs * self.logit_scale, const=self.confidence_level,
44 | dim=1)
45 | return p_c, ELBOs, l_v_classes, reconsts
46 | else:
47 | return -ELBOs[:, :, 0, 0] # like logits
48 |
49 | def rescale(self, logits):
50 | return logits
51 |
52 |
53 | class ELBOVAE_binary(ELBOVAE):
54 | def __init__(self, AEs, n_samples, n_iter, beta, GM,
55 | fraction_to_dismiss=0.1, clip=5, lr=0.05):
56 |
57 | super().__init__(AEs, n_samples, n_iter, beta, GM,
58 | fraction_to_dismiss=fraction_to_dismiss,
59 | clip=clip, lr=lr)
60 |
61 | self.name_check = 'ABS'
62 | self.rescale_b = True
63 | self.discriminative_scalings = torch.tensor(
64 | [1., 0.96, 1.001, 1.06, 0.98, 0.96, 1.03, 1., 1., 1.]).to(u.dev())
65 |
66 | def forward(self, x, return_more=False):
67 | # assert (torch.ge(x, 0).all())
68 | # assert (torch.le(x, 1).all())
69 | x = u.binarize(x)
70 | return super().forward(x, return_more=return_more)
71 |
72 | def rescale(self, logits):
73 | if self.rescale_b:
74 | return logits * self.discriminative_scalings[None, :, None, None]
75 | else:
76 | return logits
77 |
78 |
79 | def get_ABS(n_samples=8000, n_iter=50, beta=1, clip=5,
80 | fraction_to_dismiss=0.1, lr=0.05, load=True,
81 | binary=True, load_path=DEFAULT_PATH):
82 | return get_VAE(n_samples=n_samples, n_iter=n_iter, beta=beta, clip=clip,
83 | fraction_to_dismiss=fraction_to_dismiss, lr=lr, load=load,
84 | binary=binary, load_path=load_path)
85 |
86 |
87 | def get_VAE(n_samples=8000, n_iter=50, beta=1, clip=5, fraction_to_dismiss=0.1, lr=0.05,
88 | load=True, binary=False, load_path=DEFAULT_PATH):
89 | """Creates the ABS model. If binary is True, returns the full
90 | ABS model including binarization and scalar, otherwise returns
91 | the base ABS model without binarization and without scalar."""
92 |
93 | load_path = join(DEFAULT_PATH, '../exp/VAE_swarm_MSE/nets/')
94 |
95 | print('ABS model')
96 |
97 | n_classes = 10
98 | nd = 8
99 | nx, ny = 28, 28
100 |
101 | def init_models():
102 | strides = [1, 2, 2, 1]
103 | latent_act_fct = u.LinearActFct
104 |
105 | kernelE = [5, 4, 3, 5]
106 | feat_mapsE = [32, 32, 64, nd]
107 | encoder = { 'feat_maps': feat_mapsE, 'kernels': kernelE, 'strides': strides}
108 | kernelD = [4, 5, 5, 4]
109 | feat_mapsD = [32, 16, 16, 1]
110 | decoder = {'feat_maps': feat_mapsD, 'kernels': kernelD, 'strides': strides}
111 |
112 | AEs = []
113 | for i in range(n_classes):
114 | AE = nets.VariationalAutoEncoder(encoder, decoder, latent_act_fct=latent_act_fct)
115 | AE.eval()
116 | AE.to(u.dev())
117 | AEs.append(AE)
118 | return AEs
119 |
120 | AEs = init_models()
121 |
122 | if load:
123 | for i in range(n_classes):
124 | path = load_path + f'/ABS_{i}.net'
125 | AEs[i].iters = 29000
126 | AEs[i].load_state_dict(torch.load(path, map_location=str(u.dev())))
127 | print('model loaded')
128 |
129 | GM = sampling.GaussianSamples(AEs, nd, n_classes, nx=nx, ny=ny)
130 | if binary:
131 | model = ELBOVAE_binary
132 | else:
133 | model = ELBOVAE
134 | model = model(AEs, n_samples, n_iter, beta, GM, fraction_to_dismiss, clip, lr=lr)
135 | model.eval()
136 | model.code_base = 'pytorch'
137 | model.has_grad = False
138 | return model
139 |
140 |
141 | class CNN(nets.Architectures):
142 | def __init__(self, model):
143 | super().__init__()
144 | self.add_module('net', model)
145 | self.model = model
146 | self.has_grad = True
147 | self.confidence_level = 1439000
148 | self.logit_scale = 1
149 | self.name_check = 'MNIST_baseline'
150 |
151 | def forward(self, input):
152 | # assert (torch.ge(input, 0).all())
153 | # assert (torch.le(input, 1).all())
154 | return self.model.forward(input)[:, :, 0, 0]
155 |
156 |
157 | def get_CNN(load_path=DEFAULT_PATH):
158 |
159 | load_path = join(DEFAULT_PATH, '../exp/mnist_cnn/nets/')
160 |
161 |
162 | # network
163 | shape = (1, 1, 28, 28)
164 | kernelE = [5, 4, 3, 5]
165 | strides = [1, 2, 2, 1]
166 | feat_mapsE = [20, 70, 256, 10] # (32, 32, 16, 2)
167 |
168 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides)
169 | # load net
170 | print('path', load_path + '/vanilla_cnn.net')
171 | model.load_state_dict(torch.load(load_path + '/vanilla_cnn.net', map_location=str(u.dev())))
172 | print('model loaded')
173 | NN = CNN(model)
174 | NN.eval()
175 | NN.to(u.dev())
176 | NN.code_base = 'pytorch'
177 | return NN
178 |
179 |
180 | class BinaryCNN(CNN):
181 | def __init__(self, model):
182 | super().__init__(model)
183 | self.name_check = 'MNIST_baseline_binary'
184 |
185 | def forward(self, input):
186 | input = u.binarize(input)
187 | return super().forward(input)
188 |
189 |
190 | def get_binary_CNN(load_path=DEFAULT_PATH, binarize=True):
191 | load_path = join(DEFAULT_PATH, '../exp/mnist_cnn/nets/')
192 |
193 | # network
194 | shape = (1, 1, 28, 28)
195 | kernelE = [5, 4, 3, 5]
196 | strides = [1, 2, 2, 1]
197 | feat_mapsE = [20, 70, 256, 10] # (32, 32, 16, 2)
198 |
199 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides)
200 |
201 | # load net
202 | model.load_state_dict(torch.load(load_path + '/vanilla_cnn.net', map_location=str(u.dev())))
203 | print('model loaded')
204 | if binarize:
205 | model = BinaryCNN(model)
206 | else:
207 | model = CNN(model)
208 | model.eval()
209 | model.to(u.dev())
210 | model.code_base = 'pytorch'
211 | return model
212 |
213 |
214 | def get_transfer_model(load_path=DEFAULT_PATH):
215 |
216 | # new arch
217 | shape = (1, 1, 28, 28)
218 | strides = [1, 2, 2, 1]
219 | kernelE = [5, 4, 3, 5]
220 | feat_mapsE = [32, 32, 64, 10] # (32, 32, 16, 2)
221 |
222 | model = nets.NN(feat_mapsE, shape[1:], kernels=kernelE, strides=strides)
223 | model.load_state_dict(torch.load(load_path + 'transfer_cnn.net', map_location=str(u.dev())))
224 |
225 | model.to(u.dev())
226 | if load_path is not None:
227 | model.load_state_dict(torch.load(load_path, map_location=str(u.dev())))
228 | model.eval()
229 | model.code_base = 'pytorch'
230 | return model
231 |
232 |
233 | class NearestNeighbor(nets.NearestNeighborLogits):
234 | def __init__(self, samples, classes, n_classes):
235 | """
236 | :param samples: 4D: (n_samples, nchannels, nx, ny)
237 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples)
238 | """
239 | super().__init__(samples, classes, n_classes)
240 | self.name_check = 'MNIST_NN'
241 |
242 | def forward(self, input_batch, return_more=False):
243 | # assert (torch.ge(input_batch, 0).all())
244 | # assert (torch.le(input_batch, 1).all())
245 | return super().forward(input_batch, return_more=return_more)
246 |
247 |
248 | def get_NearestNeighbor():
249 | n_classes = 10
250 | mnist_train = datasets.MNIST('./../data', train=True, download=True,
251 | transform=transforms.Compose([transforms.ToTensor()]))
252 |
253 | NN = NearestNeighbor(mnist_train.train_data[:, None, ...].type(torch.float32).to(u.dev()) / 255,
254 | mnist_train.train_labels.to(u.dev()), n_classes=n_classes)
255 |
256 | print('model initialized')
257 | NN.eval() # does nothing but avoids warnings
258 | NN.code_base = 'pytorch'
259 | NN.has_grad = False
260 | return NN
261 |
262 |
263 | def get_madry(load_path='./../madry/mnist_challenge/models/secret/'):
264 | import tensorflow as tf
265 | from madry.mnist_challenge.model import Model
266 | sess = tf.InteractiveSession()
267 | model = Model()
268 | model_file = tf.train.latest_checkpoint(load_path)
269 | restorer = tf.train.Saver()
270 | restorer.restore(sess, model_file)
271 | model.code_base = 'tensorflow'
272 | model.logit_scale = 1.
273 | model.confidence_level = 60.
274 | model.has_grad = True
275 | return model
--------------------------------------------------------------------------------
/abs_models/nets.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from torch import nn
4 |
5 | from abs_models import utils as u
6 |
7 |
8 | class Architectures(nn.Module):
9 | def __init__(self, input_size=None):
10 | super(Architectures, self).__init__()
11 | self.c = input_size
12 | self.iters = 0
13 |
14 | def forward(self, input):
15 | for module in self._modules.values():
16 | input = module(input)
17 | return input
18 |
19 |
20 | class ConvAE(Architectures):
21 | def __init__(self, EncArgs, DecArgs):
22 | super().__init__(input_size=None)
23 | self.latent = None
24 | self.Encoder = ConvEncoder(**EncArgs)
25 | self.Decoder = ConvDecoder(**DecArgs)
26 |
27 | def forward(self, x):
28 | self.latent = self.Encoder.forward(x)
29 | return self.Decoder.forward(self.latent)
30 |
31 |
32 | class VariationalAutoEncoder(ConvAE):
33 | def __init__(self, EncArgs, DecArgs, latent_act_fct=nn.Tanh):
34 |
35 | self.fac = 2
36 |
37 | # Decoder must match encoder
38 | EncArgs['feat_maps'][-1] = int(EncArgs['feat_maps'][-1] * self.fac)
39 | self.n_latent = int(EncArgs['feat_maps'][-1])
40 | self.depth = len(EncArgs['feat_maps'])
41 |
42 | if 'act_fcts' not in EncArgs.keys():
43 | EncArgs['act_fcts'] = self.depth * [torch.nn.ELU]
44 | EncArgs['act_fcts'][-1] = None
45 |
46 | # half amount of layers (half mu, half sigma)
47 | DecArgs['input_sizes'] = [int(EncArgs['feat_maps'][-1] / self.fac)]
48 | super().__init__(EncArgs, DecArgs)
49 | EncArgs['feat_maps'][-1] = int(EncArgs['feat_maps'][-1] / self.fac)
50 |
51 | self.std = None
52 | self.mu = None
53 | self.logvar = None
54 |
55 | self.latent_act_fct = latent_act_fct()
56 |
57 | def reparameterize(self, inp):
58 | self.mu = self.latent_act_fct(
59 | inp[:, :int(self.n_latent / self.fac), :, :])
60 |
61 | if self.training:
62 | # std
63 | self.logvar = inp[:, int(self.n_latent / 2):, :, :]
64 | self.std = self.logvar.mul(0.5).exp_()
65 |
66 | # reparam of mu
67 | eps = torch.empty_like(self.mu.data).normal_()
68 | self.latent = eps.mul(self.std).add_(self.mu)
69 |
70 | else: # test
71 | self.latent = self.mu
72 | self.logvar = inp[:, int(self.n_latent / 2):, :, :]
73 | self.std = self.logvar.mul(0.5).exp_()
74 |
75 | def forward(self, x):
76 | prelatent = self.Encoder.forward(x)
77 | self.reparameterize(prelatent)
78 | out = self.Decoder(self.latent)
79 | return out
80 |
81 |
82 | class ConvEncoder(nn.Sequential):
83 | def __init__(self, feat_maps=(256, 128, 128), input_sizes=(1, 28, 28),
84 | kernels=(5, 3, 3),
85 | BNs=None, act_fcts=None, dilations=None, strides=None):
86 |
87 | super().__init__()
88 |
89 | self.latent = None
90 |
91 | self.depth = len(feat_maps)
92 | if BNs is None:
93 | BNs = self.depth * [True]
94 | BNs[-1] = False
95 | if act_fcts is None:
96 | act_fcts = self.depth * [nn.ELU]
97 | act_fcts[-1] = nn.Tanh
98 | if dilations is None:
99 | dilations = self.depth * [1]
100 | if strides is None:
101 | strides = self.depth * [1]
102 |
103 | # check
104 | args = [feat_maps, kernels, dilations, strides]
105 | for i, it in enumerate(args):
106 | if len(it) != self.depth:
107 | raise Exception('wrong length' + str(it) + str(i))
108 | feat_maps = [input_sizes[0]] + list(feat_maps)
109 |
110 | # build net
111 | for i, (BN, act_fct, kx, dil, stride) in enumerate(
112 | zip(BNs, act_fcts, kernels, dilations, strides)):
113 |
114 | self.add_module('conv_%i' % i, nn.Conv2d(
115 | feat_maps[i], feat_maps[i + 1], kx,
116 | stride=stride, dilation=dil))
117 |
118 | if BN:
119 | self.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1]))
120 | if act_fct is not None:
121 | self.add_module('nl_%i' % i, act_fct())
122 |
123 | def forward(self, input):
124 | for module in self._modules.values():
125 | input = module(input)
126 | self.latent = input
127 | return input
128 |
129 |
130 | class ConvDecoder(nn.Sequential):
131 | def __init__(self, feat_maps=(32, 32, 1), input_sizes=(2, 1, 1),
132 | kernels=(3, 3, 3),
133 | BNs=None, act_fcts=None, dilations=None, strides=(1, 1, 1),
134 | conv_fct=None):
135 |
136 | super().__init__()
137 |
138 | self.depth = len(feat_maps)
139 | if BNs is None:
140 | BNs = self.depth * [True]
141 | BNs[-1] = False
142 | if act_fcts is None:
143 | act_fcts = self.depth * [nn.ELU]
144 | act_fcts[-1] = u.LinearActFct
145 | if dilations is None:
146 | dilations = self.depth * [1]
147 |
148 | # check
149 | args = [feat_maps, kernels, dilations, strides]
150 | for i, it in enumerate(args):
151 | if len(it) != self.depth:
152 | raise Exception('wrong length' + str(it) + str(i))
153 |
154 | feat_maps = [input_sizes[0]] + list(feat_maps)
155 |
156 | if conv_fct is None:
157 | conv_fct = nn.ConvTranspose2d
158 |
159 | # build net
160 | for i, (BN, act_fct, kx, dil, stride) in enumerate(
161 | zip(BNs, act_fcts, kernels, dilations, strides)):
162 |
163 | self.add_module('conv_%i' % i, conv_fct(
164 | feat_maps[i], feat_maps[i + 1], kx, stride=stride))
165 | if BN:
166 | self.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1]))
167 | self.add_module('nl_%i' % i, act_fct())
168 |
169 |
170 | # Other models
171 | # ------------
172 |
173 | class NN(Architectures):
174 | def __init__(self, feat_maps=(16, 16, 8), input_sizes=(1, 28, 28),
175 | kernels=(5, 3, 3), strides=None,
176 | BNs=None, act_fcts=None):
177 | super().__init__(input_size=input_sizes)
178 | self.depth = len(feat_maps)
179 | ad_feat_maps = [input_sizes[0]] + list(feat_maps)
180 |
181 | if strides is None:
182 | strides = self.depth * [1]
183 |
184 | if BNs is None:
185 | BNs = self.depth * [True]
186 | BNs[-1] = False
187 |
188 | if act_fcts is None:
189 | act_fcts = self.depth * [nn.ELU]
190 | act_fcts[-1] = None
191 |
192 | net_builder(self, BNs, act_fcts=act_fcts, feat_maps=ad_feat_maps,
193 | kernel_sizes=kernels, strides=strides)
194 |
195 |
196 | class View(nn.Module):
197 | def __init__(self, *shape):
198 | super(View, self).__init__()
199 | self.shape = shape
200 |
201 | def forward(self, input):
202 | bs = input.size()[0]
203 | return input.view((bs,) + self.shape)
204 |
205 |
206 | class NearestNeighbor(nn.Module):
207 | def __init__(self, samples, classes, n_classes):
208 | """
209 | :param samples: 4D: (n_samples, nchannels, nx, ny)
210 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples)
211 | """
212 | super().__init__()
213 | self.samples = samples[None, ...] # (1, n_samples, nch, x, y)
214 | self.classes = classes
215 | self.n_classes = n_classes
216 | self.max_bs = 20
217 |
218 | def forward(self, input_batch, return_more=True):
219 | assert len(input_batch.size()) == 4
220 | assert input_batch.size()[-1] == self.samples.size()[-1]
221 | assert input_batch.size()[-2] == self.samples.size()[-2]
222 | assert input_batch.size()[-3] == self.samples.size()[-3]
223 |
224 | bs = input_batch.shape[0]
225 | input_batch = input_batch[:, None, ...].to(u.dev()) # (bs, 1, nch, x, y)
226 |
227 | def calc_dist(input_batch):
228 | dists = u.L2(self.samples, input_batch, axes=[2, 3, 4])
229 | l2, best_ind_classes = torch.min(dists, 1)
230 | return l2, best_ind_classes
231 |
232 | l2s, best_ind_classes = u.auto_batch(self.max_bs, calc_dist, input_batch)
233 |
234 | # boring bookkeeping
235 | pred = self.get_classes(bs, input_batch, best_ind_classes)
236 | imgs = self.samples[0, best_ind_classes]
237 | # print(pred, imgs, l2s)\
238 | if return_more:
239 | return pred, imgs, l2s
240 | else:
241 | return pred
242 |
243 | def get_classes(self, bs, input_batch, best_ind_classes):
244 | pred = torch.zeros(bs, self.n_classes).to(u.dev())
245 | pred[range(bs), self.classes[best_ind_classes]] = 1.
246 | return pred
247 |
248 |
249 | class NearestNeighborLogits(NearestNeighbor):
250 | def __init__(self, samples, classes, n_classes):
251 | """
252 | :param samples: 4D: (n_samples, nchannels, nx, ny)
253 | :param classes: 1D: (2, 3, 4, 1, ...) (n_samples)
254 | """
255 | super().__init__(samples, classes, n_classes=10)
256 | self.samples = None
257 | self.all_samples = samples
258 | self.class_samples = [self.all_samples[self.classes == i] for i in range(n_classes)]
259 | self.max_bs = 40
260 |
261 | def forward(self, input_batch, return_more=True):
262 | bs, nch, nx, ny = input_batch.shape
263 | all_imgs, all_l2s = [], []
264 | for i, samples in enumerate(self.class_samples):
265 | self.samples = samples[None, ...]
266 | _, imgs, l2s = super().forward(input_batch, return_more=True)
267 | all_imgs.append(imgs)
268 | all_l2s.append(l2s)
269 |
270 | all_l2s = torch.cat(all_l2s).view(self.n_classes, -1).transpose(0, 1)
271 | if return_more:
272 | all_imgs = torch.cat(all_imgs).view(self.n_classes, -1, nch, nx, ny).transpose(0, 1)
273 | return -all_l2s, all_imgs, all_l2s
274 | else:
275 | return -all_l2s
276 |
277 | def get_classes(self, *args, **kwargs):
278 | return None
279 |
280 |
281 | def net_builder(net, BNs, act_fcts, feat_maps, kernel_sizes, strides):
282 | # build net
283 | for i, (BN, act_fct, kx, stride) in enumerate(
284 | zip(BNs, act_fcts, kernel_sizes, strides)):
285 | net.add_module('conv_%i' % i, nn.Conv2d(
286 | feat_maps[i], feat_maps[i + 1], kx, stride=stride))
287 | if BN:
288 | net.add_module('bn_%i' % i, nn.BatchNorm2d(feat_maps[i + 1]))
289 | if act_fct is not None:
290 | net.add_module('nl_%i' % i, act_fct())
291 |
292 |
293 | def calc_fov(x, kernels, paddings=None, dilations=None, strides=None):
294 | l_x = x
295 | n_layer = len(kernels)
296 | if paddings is None:
297 | paddings = [0.] * n_layer
298 | if dilations is None:
299 | dilations = [1.] * n_layer
300 | if strides is None:
301 | strides = [1.] * n_layer
302 | for p, d, k, s in zip(paddings, dilations, kernels, strides):
303 | l_x = calc_fov_layer(l_x, k, p, d, s)
304 | return l_x
305 |
306 |
307 | def calc_fov_layer(x, kernel, padding=0, dilation=1, stride=1):
308 | p, d, k, s = padding, dilation, kernel, float(stride)
309 | print('s', s, 'p', p, 'd', d, 'k', k, )
310 | if np.floor((x + 2. * p - d * (k - 1.) - 1.) / s + 1.) != (x + 2. * p - d * (k - 1.) - 1.) / s + 1.: # noqa: E501
311 | print('boundary problems')
312 | return np.floor((x + 2. * p - d * (k - 1.) - 1.) / s + 1.)
313 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/madry/mnist_challenge/README.md:
--------------------------------------------------------------------------------
1 | # MNIST Adversarial Examples Challenge
2 |
3 | Recently, there has been much progress on adversarial *attacks* against neural networks, such as the [cleverhans](https://github.com/tensorflow/cleverhans) library and the code by [Carlini and Wagner](https://github.com/carlini/nn_robust_attacks).
4 | We now complement these advances by proposing an *attack challenge* for the
5 | [MNIST](http://yann.lecun.com/exdb/mnist/) dataset (we recently released [a
6 | CIFAR10 variant of this
7 | challenge](https://github.com/MadryLab/cifar10_challenge)).
8 | We have trained a robust network, and the objective is to find a set of adversarial examples on which this network achieves only a low accuracy.
9 | To train an adversarially-robust network, we followed the approach from our recent paper:
10 |
11 | **Towards Deep Learning Models Resistant to Adversarial Attacks**
12 | *Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, Dimitris Tsipras, Adrian Vladu*
13 | https://arxiv.org/abs/1706.06083.
14 |
15 | As part of the challenge, we release both the training code and the network architecture, but keep the network weights secret.
16 | We invite any researcher to submit attacks against our model (see the detailed instructions below).
17 | We will maintain a leaderboard of the best attacks for the next two months and then publish our secret network weights.
18 |
19 | The goal of our challenge is to clarify the state-of-the-art for adversarial robustness on MNIST. Moreover, we hope that future work on defense mechanisms will adopt a similar challenge format in order to improve reproducibility and empirical comparisons.
20 |
21 | **Update 2017-09-14:** Due to recently increased interest in our challenge, we are extending its duration until October 15th.
22 |
23 | **Update 2017-10-19:** We released our secret model, you can download it by
24 | running `python fetch_model.py secret`. As of Oct 15 we are no longer
25 | accepting black-box challenge submissions. We will soon set up a leaderboard to keep track
26 | of white-box attacks. Many thanks to everyone who participated!
27 |
28 | **Update 2017-11-06:** We have set up a leaderboard for white-box attacks on the (now released) secret model. The submission format is the same as before. We plan to continue evaluating submissions and maintaining the leaderboard for the foreseeable future.
29 |
30 | ## Black-Box Leaderboard (Original Challenge)
31 |
32 | | Attack | Submitted by | Accuracy | Submission Date |
33 | | -------------------------------------- | ------------- | -------- | ---- |
34 | | AdvGAN from ["Generating Adversarial Examples
with Adversarial Networks"](https://arxiv.org/abs/1801.02610) | AdvGAN | **92.76%** | Sep 25, 2017 |
35 | | PGD against three independently and
adversarially trained copies of the network | [Florian Tramèr](http://floriantramer.com/) | 93.54% | Jul 5, 2017 |
36 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss for model B from
["Ensemble Adversarial Training [...]"](https://arxiv.org/abs/1705.07204) | [Florian Tramèr](http://floriantramer.com/) | 94.36% | Jun 29, 2017 |
37 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss for the
naturally trained public network | (initial entry) | 96.08% | Jun 28, 2017 |
38 | | PGD on the cross-entropy loss for the
naturally trained public network | (initial entry) | 96.81% | Jun 28, 2017 |
39 | | Attack using Gaussian Filter for selected pixels
on the adversarially trained public network | Anonymous | 97.33% | Aug 27, 2017 |
40 | | FGSM on the cross-entropy loss for the
adversarially trained public network | (initial entry) | 97.66% | Jun 28, 2017 |
41 | | PGD on the cross-entropy loss for the
adversarially trained public network | (initial entry) | 97.79% | Jun 28, 2017 |
42 |
43 | ## White-Box Leaderboard
44 |
45 | | Attack | Submitted by | Accuracy | Submission Date |
46 | | -------------------------------------- | ------------- | -------- | ---- |
47 | | First-order attack on logit difference
for optimally chosen target label | Samarth Gupta | 88.85% | May 23, 2018 |
48 | | 100-step PGD on the cross-entropy loss
with 50 random restarts | (initial entry) | 89.62% | Nov 6, 2017 |
49 | | 100-step PGD on the [CW](https://github.com/carlini/nn_robust_attacks) loss
with 50 random restarts | (initial entry) | 89.71% | Nov 6, 2017 |
50 | | 100-step PGD on the cross-entropy loss | (initial entry) | 92.52% | Nov 6, 2017 |
51 | | 100-step PGD on the [CW](https://github.com/carlini/nn_robust_attacks) loss | (initial entry) | 93.04% | Nov 6, 2017 |
52 | | FGSM on the cross-entropy loss | (initial entry) | 96.36% | Nov 6, 2017 |
53 | | FGSM on the [CW](https://github.com/carlini/nn_robust_attacks) loss | (initial entry) | 96.40% | Nov 6, 2017 |
54 |
55 | ## Format and Rules
56 |
57 | The objective of the challenge is to find black-box (transfer) attacks that are effective against our MNIST model.
58 | Attacks are allowed to perturb each pixel of the input image by at most `epsilon=0.3`.
59 | To ensure that the attacks are indeed black-box, we release our training code and model architecture, but keep the actual network weights secret.
60 |
61 | We invite any interested researchers to submit attacks against our model.
62 | The most successful attacks will be listed in the leaderboard above.
63 | As a reference point, we have seeded the leaderboard with the results of some standard attacks.
64 |
65 | ### The MNIST Model
66 |
67 | We used the code published in this repository to produce an adversarially robust model for MNIST classification. The model is a convolutional neural network consisting of two convolutional layers (each followed by max-pooling) and a fully connected layer. This architecture is derived from the [MNIST tensorflow tutorial](https://www.tensorflow.org/get_started/mnist/pros).
68 | The network was trained against an iterative adversary that is allowed to perturb each pixel by at most `epsilon=0.3`.
69 |
70 | The random seed used for training and the trained network weights will be kept secret.
71 |
72 | The `sha256()` digest of our model file is:
73 | ```
74 | 14eea09c72092db5c2eb5e34cd105974f42569281d2f34826316e356d057f96d
75 | ```
76 | We will release the corresponding model file on October 15th 2017, which is roughly two months after the start of this competition.
77 |
78 | ### The Attack Model
79 |
80 | We are interested in adversarial inputs that are derived from the MNIST test set.
81 | Each pixel can be perturbed by at most `epsilon=0.3` from its initial value.
82 | All pixels can be perturbed independently, so this is an l_infinity attack.
83 |
84 | ### Submitting an Attack
85 |
86 | Each attack should consist of a perturbed version of the MNIST test set.
87 | Each perturbed image in this test set should follow the above attack model.
88 |
89 | The adversarial test set should be formated as a numpy array with one row per example and each row containing a flattened
90 | array of 28x28 pixels.
91 | Hence the overall dimensions are 10,000 rows and 784 columns.
92 | Each pixel must be in the [0,1] range.
93 | See the script `pgd_attack.py` for an attack that generates an adversarial test set in this format.
94 |
95 | In order to submit your attack, save the matrix containing your adversarial examples with `numpy.save` and email the resulting file to mnist.challenge@gmail.com.
96 | We will then run the `run_attack.py` script on your file to verify that the attack is valid and to evaluate the accuracy of our secret model on your examples.
97 | After that, we will reply with the predictions of our model on each of your examples and the overall accuracy of our model on your evaluation set.
98 |
99 | If the attack is valid and outperforms all current attacks in the leaderboard, it will appear at the top of the leaderboard.
100 | Novel types of attacks might be included in the leaderboard even if they do not perform best.
101 |
102 | We strongly encourage you to disclose your attack method.
103 | We would be happy to add a link to your code in our leaderboard.
104 |
105 | ## Overview of the Code
106 | The code consists of six Python scripts and the file `config.json` that contains various parameter settings.
107 |
108 | ### Running the code
109 | - `python train.py`: trains the network, storing checkpoints along
110 | the way.
111 | - `python eval.py`: an infinite evaluation loop, processing each new
112 | checkpoint as it is created while logging summaries. It is intended
113 | to be run in parallel with the `train.py` script.
114 | - `python pgd_attack.py`: applies the attack to the MNIST eval set and
115 | stores the resulting adversarial eval set in a `.npy` file. This file is
116 | in a valid attack format for our challenge.
117 | - `python run_attack.py`: evaluates the model on the examples in
118 | the `.npy` file specified in config, while ensuring that the adversarial examples
119 | are indeed a valid attack. The script also saves the network predictions in `pred.npy`.
120 | - `python fetch_model.py name`: downloads the pre-trained model with the
121 | specified name (at the moment `adv_trained` or `natural`), prints the sha256
122 | hash, and places it in the models directory.
123 |
124 | ### Parameters in `config.json`
125 |
126 | Model configuration:
127 | - `model_dir`: contains the path to the directory of the currently
128 | trained/evaluated model.
129 |
130 | Training configuration:
131 | - `random_seed`: the seed for the RNG used to initialize the network
132 | weights.
133 | - `max_num_training_steps`: the number of training steps.
134 | - `num_output_steps`: the number of training steps between printing
135 | progress in standard output.
136 | - `num_summary_steps`: the number of training steps between storing
137 | tensorboard summaries.
138 | - `num_checkpoint_steps`: the number of training steps between storing
139 | model checkpoints.
140 | - `training_batch_size`: the size of the training batch.
141 |
142 | Evaluation configuration:
143 | - `num_eval_examples`: the number of MNIST examples to evaluate the
144 | model on.
145 | - `eval_batch_size`: the size of the evaluation batches.
146 | - `eval_on_cpu`: forces the `eval.py` script to run on the CPU so it does not compete with `train.py` for GPU resources.
147 |
148 | Adversarial examples configuration:
149 | - `epsilon`: the maximum allowed perturbation per pixel.
150 | - `k`: the number of PGD iterations used by the adversary.
151 | - `a`: the size of the PGD adversary steps.
152 | - `random_start`: specifies whether the adversary will start iterating
153 | from the natural example or a random perturbation of it.
154 | - `loss_func`: the loss function used to run pgd on. `xent` corresponds to the
155 | standard cross-entropy loss, `cw` corresponds to the loss function
156 | of [Carlini and Wagner](https://arxiv.org/abs/1608.04644).
157 | - `store_adv_path`: the file in which adversarial examples are stored.
158 | Relevant for the `pgd_attack.py` and `run_attack.py` scripts.
159 |
160 | ## Example usage
161 | After cloning the repository you can either train a new network or evaluate/attack one of our pre-trained networks.
162 | #### Training a new network
163 | * Start training by running:
164 | ```
165 | python train.py
166 | ```
167 | * (Optional) Evaluation summaries can be logged by simultaneously
168 | running:
169 | ```
170 | python eval.py
171 | ```
172 | #### Download a pre-trained network
173 | * For an adversarially trained network, run
174 | ```
175 | python fetch_model.py adv_trained
176 | ```
177 | and use the `config.json` file to set `"model_dir": "models/adv_trained"`.
178 | * For a naturally trained network, run
179 | ```
180 | python fetch_model.py natural
181 | ```
182 | and use the `config.json` file to set `"model_dir": "models/natural"`.
183 | #### Test the network
184 | * Create an attack file by running
185 | ```
186 | python pgd_attack.py
187 | ```
188 | * Evaluate the network with
189 | ```
190 | python run_attack.py
191 | ```
192 |
--------------------------------------------------------------------------------
/scripts/attacks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed',)).History will not be written to the database.\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "import sys\n",
18 | "sys.path.insert(0, './../') \n",
19 | "%load_ext autoreload\n",
20 | "%autoreload 2\n",
21 | "%matplotlib inline\n",
22 | "\n",
23 | "import torch\n",
24 | "from torchvision import datasets, transforms\n",
25 | "\n",
26 | "import numpy as np\n",
27 | "from matplotlib import pyplot as plt\n",
28 | "import foolbox \n",
29 | "from foolbox import attacks as fa\n",
30 | "\n",
31 | "# own modules\n",
32 | "from abs_models import utils as u\n",
33 | "from abs_models import models as mz\n",
34 | "from abs_models import attack_utils as au"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 2,
40 | "metadata": {},
41 | "outputs": [
42 | {
43 | "name": "stdout",
44 | "output_type": "stream",
45 | "text": [
46 | "ABS model\n",
47 | "model loaded\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "model = mz.get_VAE(n_iter=10) # ABS, do n_iter=50 for original model \n",
53 | "# model = mz.get_VAE(binary=True) # ABS with scaling and binaryzation\n",
54 | "# model = mz.get_binary_CNN() # Binary CNN\n",
55 | "# model = mz.get_CNN() # Vanilla CNN\n",
56 | "# model = mz.get_NearestNeighbor() # Nearest Neighbor, \"nearest L2 dist to each class\"=logits\n",
57 | "# model = mz.get_madry() # Robust network from Madry et al. in tf"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "# code is agnostic of pytorch/ tensorflow model --> foolbox model\n",
67 | "if model.code_base == 'tensorflow':\n",
68 | " fmodel = foolbox.models.TensorFlowModel(model.x_input, model.pre_softmax, (0., 1.),\n",
69 | " channel_axis=3)\n",
70 | "elif model.code_base == 'pytorch':\n",
71 | " model.eval()\n",
72 | " fmodel = foolbox.models.PyTorchModel(model, # return logits in shape (bs, n_classes)\n",
73 | " bounds=(0., 1.), num_classes=10,\n",
74 | " device=u.dev())\n",
75 | "else:\n",
76 | " print('not implemented')"
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 5,
82 | "metadata": {},
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "score 0.988\n"
89 | ]
90 | }
91 | ],
92 | "source": [
93 | "# test model \n",
94 | "b, l = u.get_batch(bs=10000) # returns random batch as np.array\n",
95 | "pred_label = np.argmax(fmodel.batch_predictions(b), axis=1)\n",
96 | "print('score', float(np.sum(pred_label == l)) / b.shape[0])"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "# Decision based attacks\n",
104 | "Note that this is only demo code. All experiments were optimized to our compute architecture. "
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 12,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "b, l = u.get_batch(bs=1) # returns random batch"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 13,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAEICAYAAACQ6CLfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAB+NJREFUeJzt3b+LnVkdx/HvWV0WRAsbC52ZakH2H5jMBgIrWGihIOSnhSAIYiEkkmwnWChosQQs1E4bl1nJBGRFwUItzPzY2gULLdyMWRALGxddNMfCKcLCPU82z9w7M/m8XlWS75zcJ7N584Q999yn9d4LyPPMSV8AcDLED6HED6HED6HED6HED6HEz6TW2o9aa9886evgeDX7/JDJnZ+h1toHTvoaWA7xh2qtvdBa+11r7R+ttTdba58/+vWftNZ+2Fr7ZWvtn1X1qaNf+/Yja19urb3dWnvQWvtKa6231p4/sT8MT0T8gVprz1bV61X166r6WFV9vap+2lr75NGXfLGqvlNVH6mq379n7Weq6htV9emqer6qXlrNVXPcxJ9pq6o+XFXf7b2/23v/TVX9oqquHc1/3nu/13t/2Hv/13vWXq6qH/fe3+y9v1NV31rZVXOsxJ/p41V1v/f+8JFf+0tVfeLox/en1j7y89HXcoqJP9ODqlpvrT3633+jqv569OPRFtDbVbX2yM/Xj/naWBHxZzqoqneq6uXW2rOttZeq6nNVtf0Ya39WVV8++h+GH6oq+/9nlPgD9d7frf/H/tmq+ntV/aCqvtR7/+NjrP1VVX2/qn5bVX+qqv2j0b+Xc7Usizf5MEtr7YWq+kNVPdd7/89JXw+Pz52f96219oXW2nOttY9W1feq6nXhnz3i50l8tar+VlV/rqr/VtXXTvZyeBL+2Q+h3Pkh1AdX+WKtNf/MgCXrvbfH+Tp3fgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgglfgi10kd0s3pbW1vD+d7e3nD+8OHD4fzg4GA4v3z58sLZ4eHhcC3L5c4PocQPocQPocQPocQPocQPocQPoVrvfXUv1trqXizIaC9/e3t7uHZ9fX04n9rnf+aZ8f1jd3d34ezChQvDtTyZ3nt7nK9z54dQ4odQ4odQ4odQ4odQ4odQ4odQzvM/BTY2NhbOpvbxWxtvCU/t40+tP3/+/MLZ6D0AVePPAqjyeQBzufNDKPFDKPFDKPFDKPFDKPFDKEd6nwL37t1bONvc3ByundrKm3ukd7R+znHgKkeCF3GkFxgSP4QSP4QSP4QSP4QSP4QSP4Syz38GzHnM9tQ+/dSR3Km/H3PWz33tO3fuDOdXrlwZzp9W9vmBIfFDKPFDKPFDKPFDKPFDKPFDKB/dfQpM7eNPPWZ7tJe/zPP4c9fPfe2p79tovr+/P1ybwJ0fQokfQokfQokfQokfQokfQokfQtnnPwUuXbo0nE89Zntqv3xk7pn4qWsfvUdh7uPBp74va2trw3k6d34IJX4IJX4IJX4IJX4IJX4IJX4IZZ9/BW7cuDGcX79+fTifOtc+Z+3t27ef+PeuGj8zoKpqd3d34ez8+fPDtXM/S2CVz6Q4i9z5IZT4IZT4IZT4IZT4IZT4IZStvmMwdbT04sWLw/nco62jY7nLfkz14eHhcH7hwoWFs7lbeVPft9FHd+/s7AzXJnDnh1Dih1Dih1Dih1Dih1Dih1Dih1D2+Y/B1KOiNzc3h/Opo6dT++Fn9ejqK6+8MpxPHXWeeh/AaP2tW7eGaxO480Mo8UMo8UMo8UMo8UMo8UMo8UMo+/wrMPdc+tT6g4OD931Np8Ebb7wxnC/z+/baa68N1y77cxBOA3d+CCV+CCV+CCV+CCV+CCV+CCV+CGWffwXmfj79sh+zfVot8/t2Vj8D4Ti580Mo8UMo8UMo8UMo8UMo8UMo8UMo+/wrMPdc+rVr147zcs6MZZ7nn1qbwJ0fQokfQokfQokfQokfQokfQtnqW4G5R1NTj5860rtc7vwQSvwQSvwQSvwQSvwQSvwQSvwQyj7/CkztR9+5c2c439nZOc7LOVbr6+vD+fb29sLZiy++OFw7tRc/dSx3f39/4ezq1avDtQnc+SGU+CGU+CGU+CGU+CGU+CGU+CGUff4VmDqXfpbPlo/28auqNjc3F86m/txT37fRPn5V7keePy53fgglfgglfgglfgglfgglfgglfghln38Fps7zb2xsDOeXLl0azu/fv79wNrUXPvc9CFNn6kfr55zHr6q6e/fucH54eDicp3Pnh1Dih1Dih1Dih1Dih1Dih1C2+lZgajvt3Llzw/mrr746nD948GDhbG9vb7h27rHaOY/Jnnsk11bePO78EEr8EEr8EEr8EEr8EEr8EEr8EMo+/wpM7YVPHW2dWj96TPZbb7211Ne+efPmcH779u3hnJPjzg+hxA+hxA+hxA+hxA+hxA+hxA+h2iofD91aO7vPoh5YW1sbzqfO629tbQ3n169fH85H5+KnzsRPvfbU34+dnZ3hnNXrvY/fvHHEnR9CiR9CiR9CiR9CiR9CiR9CiR9C2eeHp4x9fmBI/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBK/BBqpY/oBk4Pd34IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4IJX4I9T+qSrBvRcCLbAAAAABJRU5ErkJggg==\n",
124 | "text/plain": [
125 | ""
126 | ]
127 | },
128 | "metadata": {},
129 | "output_type": "display_data"
130 | },
131 | {
132 | "name": "stdout",
133 | "output_type": "stream",
134 | "text": [
135 | "runtime 31.859752893447876 seconds\n",
136 | "pred 3\n"
137 | ]
138 | },
139 | {
140 | "data": {
141 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAEICAYAAACQ6CLfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAACmNJREFUeJzt3V9olucdxvHrNtFY/8YlKgZ10/gXJ0wcAykDFU/USRkOxwTFHaht0eG0YCn0YEcT5mCIB4WCgqwFJ4zpoNUuCIXKPChzwoyVUYMpaoI2rVpTE5M8OzAHQfr8buebRM31/Rx6eed9Y94rd+nvee4nFUUhAH5GPes3AODZoPyAKcoPmKL8gCnKD5ii/IApyo9QSukHKaUipVT9rN8LBhflB0xRfsAU5TeVUnozpfR5SuleSqk5pfTz/j+vSikdTCndTildlbR+wJpfppQ+fezr/DaldGqY3z4GAeX39bmkn0qaLOl3kv6cUpohabukn0laJunHkn4xYM3fJS1MKc0f8GebJb0/LO8Yg4rymyqK4kRRFDeKougriuK4pP9K+omkTZL+VBTFF0VRdEj6/YA1nZJOSvqVJPX/ElgkiZ3/BUT5TaWUtqaU/p1S+jql9LWkH0qql9Qg6YsBf/XaY0vfV3/59WjX/1v/LwW8YCi/oZTS9yW9K2mXpLqiKGol/UdSknRT0qwBf332Y8v/IWlqSulHevRLgP/kf0FRfk/jJRWSbklSSunXerTzS9JfJP0mpTQzpTRF0psDFxZF8VDSCUl/kPQ9PfplgBcQ5TdUFEWzpD9K+qekdklLJZ3rj9+VdEbSRUn/kvTX7/gS70taI+lEURQ9Q/6GMSQSh3kAntj5AVOUHzBF+QFTlB8wNay3aaaU+L+LwBAriiI9yd9j5wdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTPHYZQyplJ7o1vLvxOGyQ4udHzBF+QFTlB8wRfkBU5QfMEX5AVOM+hDKjepqamrCvLq6/CPW0xM/4zOX9/X1VZS7Y+cHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTDHnfw6MGhX/Dh4/fnyYT5w4sTTbu3dvuHbfvn1hntPV1RXmb7/9dml26tSpcG1bW1uY379/P8yjW4K5XZidH7BF+QFTlB8wRfkBU5QfMEX5AVOUHzCVhnPemVKyHK5OmDAhzGfOnBnm8+fPD/ODBw+WZgsWLAjX5uQ+H5Uczb1u3bowv3nzZphfvnw5zLu7u0uzkTznL4riiX4o7PyAKcoPmKL8gCnKD5ii/IApyg+YovyAKe7nf0LRPHvMmDHh2smTJ4d5bW1tmHd2doZ5c3NzadbY2BiuvXPnTpj39vaGee6sgXHjxpVmH3zwQbj2ww8/DPPNmzeHeTTnBzs/YIvyA6YoP2CK8gOmKD9givIDphj19cvdmhodrz169Ohwbe6I6atXr4Z5bhw3Z86c0qyqqipcm3uM9aRJk8J80aJFYR79u40dOzZcu3bt2jDPHRs+km/bHQzs/IApyg+YovyAKcoPmKL8gCnKD5ii/IApju5+QpXM+XPXEFQ6rz5+/HhplrutNXfL7+LFi8M8dx1A9L19+eWX4doLFy6EeVNTU5gfOnQozEcqju4GEKL8gCnKD5ii/IApyg+YovyAKcoPmOJ+/icU3feem6VH1whI0tSpU8P85ZdfDvOGhobSLDenr6urC/NKvfTSS6XZN998E669du1amH/88cdhXl9fX5rdvn07XOuAnR8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wxZx/EOTut8/luVn8gQMHwnz27NmlWe5s/JxXX301zN95552n/tq5M/9z5xzknknw+uuv/9/vyQk7P2CK8gOmKD9givIDpig/YIryA6YoP2CKOf8w2L59e5hv27YtzKur4x9T7rkAkW+//TbM7969G+atra1hHl2DkJN7pkBuzo8YOz9givIDpig/YIryA6YoP2CK8gOmeET3IIiOzpak5cuXh/mmTZvCvLa2Nszfe++90uz06dPh2uhobUnq7e0N89znJzqW/NKlSxW9du6W37lz55Zm7e3t4doXGY/oBhCi/IApyg+YovyAKcoPmKL8gCnKD5jilt5BMGfOnDBfsWJFmF+/fj3MW1pawryjo6M0yz0G+/79+2He09MT5rk5/61bt0qzw4cPh2t37doV5jU1NWF+9uzZ0mzJkiXhWgfs/IApyg+YovyAKcoPmKL8gCnKD5ii/ICpETPnzx1fnctHjYp/D0bz7vPnz4dr16xZE+a595Z7zHZ0T35uTv8sRecQSNKOHTvCfPTo0WE+ffr00mzr1q3h2mPHjoX5SMDOD5ii/IApyg+YovyAKcoPmKL8gCnKD5gaMXP+3H3llc75I7nz5XOPuR4zZkyY19XVhfnJkyfD/Hk1ZcqUMM/9THI/8+j6hxkzZoRrHbDzA6YoP2CK8gOmKD9givIDpig/YIryA6ZGzJw/p6+vL8yH8r73adOmhXluXr179+6nfu3c9Q2Vyn396N/93r17FX3tnKqqqtJswoQJFX3tkYCdHzBF+QFTlB8wRfkBU5QfMEX5AVM2o75Kb+mNjonu6uoK1zY2NoZ5buQVHUEtxbcM50aYQz0K7O7uLs2iR4tL0sOHD8O8ujr++Ea3WufGrw7Y+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTzPn75R6DHR0z3dDQEK5dtmxZmL/22mth3t7eHub19fWl2YMHD8K1ucdc565/WLhwYZjv37+/NFu3bl1Fr53LP/nkk9Js586d4VoH7PyAKcoPmKL8gCnKD5ii/IApyg+YovyAKZs5f6Wi6wRyc/7cI7hnzZr11K8txdcoRMdXS/k5/7hx48L8yJEjYZ773iK5I82vXLkS5nv27Hnq13bAzg+YovyAKcoPmKL8gCnKD5ii/IApyg+Yspnz52bGufPto1n90qVLw7XR/faS9Morr4T5V199FebR93bu3LlwbVtbW5jnRGfjS/E1CrnrFy5evFhR3tzcHObu2PkBU5QfMEX5AVOUHzBF+QFTlB8wxaivX/QoaUnq7OwszVauXBmuzd0Wu379+oryGzdulGbnz58P1+bkRqC547Mjn332WZi/9dZbYf7RRx+FeW4M6Y6dHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBlM+fPyV0H0NHRUZq1tLSEa1etWhXm1dXxj6Gvry/Mo+OxW1tbw7W57zs3x3/jjTfC/OjRo6VZ7vvKPV6cOX5l2PkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBUyk35x3UF0tp+F5sGM2bNy/MN2zYUNH6LVu2hHlTU1NptnHjxnDt6tWrwzx3DcKZM2fCPLpOIPfZG87P5khSFEV8Jno/dn7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFHN+YIRhzg8gRPkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTlB8wRfkBU5QfMEX5AVOUHzBF+QFTw/qIbgDPD3Z+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7AFOUHTFF+wBTlB0xRfsAU5QdMUX7A1P8AV5YvgrI4BXQAAAAASUVORK5CYII=\n",
142 | "text/plain": [
143 | ""
144 | ]
145 | },
146 | "metadata": {},
147 | "output_type": "display_data"
148 | }
149 | ],
150 | "source": [
151 | "import time\n",
152 | "start = time.time()\n",
153 | "att = fa.DeepFoolL2Attack(fmodel)\n",
154 | "metric = foolbox.distances.MSE\n",
155 | "criterion = foolbox.criteria.Misclassification()\n",
156 | "\n",
157 | "plt.imshow(b[0, 0], cmap='gray')\n",
158 | "plt.title('orig')\n",
159 | "plt.axis('off')\n",
160 | "plt.show()\n",
161 | "\n",
162 | "# Estimate gradients from scores\n",
163 | "if not model.has_grad: \n",
164 | " GE = foolbox.gradient_estimators.CoordinateWiseGradientEstimator(0.1)\n",
165 | " fmodel = foolbox.models.ModelWithEstimatedGradients(fmodel, GE)\n",
166 | "\n",
167 | "# gernate Adversarial\n",
168 | "a = foolbox.adversarial.Adversarial(fmodel, criterion, b[0], l[0], distance=metric)\n",
169 | "att(a) \n",
170 | "\n",
171 | "print('runtime', time.time() - start, 'seconds')\n",
172 | "print('pred', np.argmax(fmodel.predictions(a.image)))\n",
173 | "if a.image is not None: # attack was successful\n",
174 | " plt.imshow(a.image[0], cmap='gray')\n",
175 | " plt.title('adv')\n",
176 | " plt.axis('off')\n",
177 | " plt.show()"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "# get Trash Adversarials"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 8,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "from foolbox.gradient_estimators import CoordinateWiseGradientEstimator as CWGE"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 9,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "name": "stderr",
203 | "output_type": "stream",
204 | "text": [
205 | "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
206 | " \n"
207 | ]
208 | }
209 | ],
210 | "source": [
211 | "a = np.random.random((1, 28, 28)).astype(np.float32)\n",
212 | "a_helper = torch.tensor(torch.from_numpy(a.copy()), requires_grad=True)\n",
213 | "fixed_class = 1\n",
214 | "GE = CWGE(1.)"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 10,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "opti = torch.optim.SGD([a_helper], lr=1, momentum=0.95)"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": null,
229 | "metadata": {},
230 | "outputs": [],
231 | "source": [
232 | "confidence_level = model.confidence_level # abs 0.0000031, CNN 1439000, madry 60, 1-NN 0.000000000004\n",
233 | "logits_scale = model.logit_scale # ABS 430, madry 1, CNN 1, 1-NN 5\n",
234 | "\n",
235 | "a_orig = a\n",
236 | "plt.imshow(u.t2n(a[0]), cmap='gray')\n",
237 | "plt.show()\n",
238 | "\n",
239 | "for i in range(10000):\n",
240 | " logits = fmodel.predictions(a)\n",
241 | " probs = u.t2n(u.confidence_softmax(logits_scale*torch.from_numpy(logits[None, :]), dim=1, \n",
242 | " const=confidence_level))[0]\n",
243 | " pred_class = np.argmax(u.t2n(logits).squeeze())\n",
244 | " \n",
245 | " if probs[fixed_class]>= 0.9:\n",
246 | " break \n",
247 | " grads = GE(fmodel.batch_predictions, a, fixed_class, (0,1))\n",
248 | "\n",
249 | " a = au.update_distal_adv(a, a_helper, grads, opti)\n",
250 | " if i % 1000 == 0:\n",
251 | " print(f'probs {probs[pred_class]:.3f} class', pred_class)\n",
252 | " fig, ax = plt.subplots(1,3, squeeze=False, figsize=(10, 4))\n",
253 | " ax[0, 0].imshow(u.t2n(a[0]), cmap='gray')\n",
254 | " ax[0, 1].imshow(u.t2n(grads[0]), cmap='gray')\n",
255 | " ax[0, 2].imshow(np.sign(grads[0]), cmap='gray')\n",
256 | " plt.show()\n",
257 | "plt.imshow(u.t2n(a[0]), cmap='gray')\n",
258 | "plt.show()"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "metadata": {},
264 | "source": [
265 | "# Latent Descent Attack"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": null,
271 | "metadata": {},
272 | "outputs": [],
273 | "source": [
274 | "# only for abs\n",
275 | "att = au.LineSearchAttack(model) # BinaryLineSearchAttack\n",
276 | "b, l = u.get_batch(bs=200)\n",
277 | "\n",
278 | "advs = att(b, l, n_coarse_steps=50+1, n_ft_steps=2)\n",
279 | "\n",
280 | "for adv in advs:\n",
281 | " adv['img'] = adv['img'].cpu().numpy()\n",
282 | "\n",
283 | "for i, (a_i, b_i) in enumerate(zip(advs, b)):\n",
284 | " l2 = np.sqrt(a_i['distance'] * 784) # convert from MSE\n",
285 | "\n",
286 | " fig, ax = plt.subplots(1, 2, squeeze=False)\n",
287 | " ax[0, 0].set_title(str(a_i['original_label']))\n",
288 | " ax[0, 0].imshow(u.t2n(b_i[0]), cmap='gray')\n",
289 | " ax[0, 1].set_title(str(a_i['adversarial_label']))\n",
290 | " ax[0, 1].imshow(u.t2n(a_i['img'][0]), cmap='gray')\n",
291 | " plt.show()\n",
292 | " if i ==10:\n",
293 | " break\n",
294 | "print('mean L2', np.mean([np.sqrt(a_i['distance'] * 784) for a_i in advs]))"
295 | ]
296 | }
297 | ],
298 | "metadata": {
299 | "kernelspec": {
300 | "display_name": "Python 3",
301 | "language": "python",
302 | "name": "python3"
303 | },
304 | "language_info": {
305 | "codemirror_mode": {
306 | "name": "ipython",
307 | "version": 3
308 | },
309 | "file_extension": ".py",
310 | "mimetype": "text/x-python",
311 | "name": "python",
312 | "nbconvert_exporter": "python",
313 | "pygments_lexer": "ipython3",
314 | "version": "3.6.4"
315 | }
316 | },
317 | "nbformat": 4,
318 | "nbformat_minor": 2
319 | }
320 |
--------------------------------------------------------------------------------