├── LICENSE
├── README.md
├── attacks.py
├── craft_adv_examples.py
├── cw_attacks.py
├── detect_adv_examples.py
├── extract_characteristics.py
├── train_model.py
└── util.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Xingjun Ma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Code for paper "Characterizing Adversarial Subspaces Using Local Intrinsic Dimensionality". ICLR 2018, https://arxiv.org/abs/1801.02613
 2 | 
 3 | ## Update: added BatchNormalization to after Conv and ReLU. 17 Sept. 2018.
 4 | 
 5 | ### 1. Pre-train DNN models:
 6 | python train_model.py -d mnist -e 50 -b 128
 7 | 
 8 | ### 2. Craft adversarial examples:
 9 | python craft_adv_samples.py -d cifar -a cw-l2 -b 100
10 | ### 3.Extract detection characteristics:
11 | python extract_characteristics.py -d cifar -a cw-l2 -r lid -k 20 -b 100
12 | 
13 | ### 4. Train simple detectors:
14 | python detect_adv_examples.py -d cifar -a fgsm -t cw-l2 -r lid
15 | 
16 | #### Dependencies:
17 | python 3.5, tqdm, tensorflow = 1.8, Keras >= 2.0, cleverhans >= 1.0.0 (may need extra change to pass in keras learning rate)
18 | 
19 | #### Kernal Density and Bayesian Uncertainty are from https://github.com/rfeinman/detecting-adversarial-samples ("Detecting Adversarial Samples from Artifacts" (Feinman et al. 2017))
20 | 
21 | ---------------------------
22 | If you came across the error:
23 | 
24 | tensorflow.python.framework.errors_impl.InvalidArgumentError: input_1:0 is both fed and fetched.
25 | 
26 | 
27 | Solution: in function get_layer_wise_activations() (util.py), do the following change:
28 | acts = [layer.output for layer in model.layers[1:]] # let the layer index start from 1.
29 | 
30 | Reason: this possibly cause by the input layer is defined as a sepearte layer, with both input and output is X.
31 | 


--------------------------------------------------------------------------------
/attacks.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import copy
  5 | from collections import defaultdict
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from tqdm import tqdm
  9 | from six.moves import xrange
 10 | 
 11 | from cleverhans.utils import other_classes
 12 | from cleverhans.utils_tf import batch_eval, model_argmax
 13 | from cleverhans.attacks_tf import (jacobian_graph, jacobian,
 14 |                                    apply_perturbations, saliency_map)
 15 | import keras.backend as K
 16 | 
 17 | 
 18 | def fgsm(x, predictions, eps, clip_min=None, clip_max=None, y=None):
 19 |     """
 20 |     Computes symbolic TF tensor for the adversarial samples. This must
 21 |     be evaluated with a session.run call.
 22 |     :param x: the input placeholder
 23 |     :param predictions: the model's output tensor
 24 |     :param eps: the epsilon (input variation parameter)
 25 |     :param clip_min: optional parameter that can be used to set a minimum
 26 |                     value for components of the example returned
 27 |     :param clip_max: optional parameter that can be used to set a maximum
 28 |                     value for components of the example returned
 29 |     :param y: the output placeholder. Use None (the default) to avoid the
 30 |             label leaking effect.
 31 |     :return: a tensor for the adversarial example
 32 |     """
 33 | 
 34 |     # Compute loss
 35 |     if y is None:
 36 |         # In this case, use model predictions as ground truth
 37 |         y = tf.to_float(
 38 |             tf.equal(predictions,
 39 |                      tf.reduce_max(predictions, 1, keep_dims=True)))
 40 |     y = y / tf.reduce_sum(y, 1, keep_dims=True)
 41 |     logits, = predictions.op.inputs
 42 |     loss = tf.reduce_mean(
 43 |         tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
 44 |     )
 45 | 
 46 |     # Define gradient of loss wrt input
 47 |     grad, = tf.gradients(loss, x)
 48 | 
 49 |     # Take sign of gradient
 50 |     signed_grad = tf.sign(grad)
 51 | 
 52 |     # Multiply by constant epsilon
 53 |     scaled_signed_grad = eps * signed_grad
 54 | 
 55 |     # Add perturbation to original example to obtain adversarial example
 56 |     adv_x = tf.stop_gradient(x + scaled_signed_grad)
 57 | 
 58 |     # If clipping is needed, reset all values outside of [clip_min, clip_max]
 59 |     if (clip_min is not None) and (clip_max is not None):
 60 |         adv_x = tf.clip_by_value(adv_x, clip_min, clip_max)
 61 | 
 62 |     return adv_x
 63 | 
 64 | 
 65 | def jsma(sess, x, predictions, grads, sample, target, theta, gamma,
 66 |          increase, nb_classes, clip_min, clip_max, verbose=False):
 67 |     """
 68 |     TensorFlow implementation of the jacobian-based saliency map method (JSMA).
 69 |     :param sess: TF session
 70 |     :param x: the input placeholder
 71 |     :param predictions: the model's symbolic output (linear output,
 72 |         pre-softmax)
 73 |     :param sample: numpy array with sample input
 74 |     :param target: target class for sample input
 75 |     :param theta: delta for each feature adjustment
 76 |     :param gamma: a float between 0 - 1 indicating the maximum distortion
 77 |         percentage
 78 |     :param increase: boolean; true if we are increasing pixels, false otherwise
 79 |     :param nb_classes: integer indicating the number of classes in the model
 80 |     :param clip_min: optional parameter that can be used to set a minimum
 81 |                     value for components of the example returned
 82 |     :param clip_max: optional parameter that can be used to set a maximum
 83 |                     value for components of the example returned
 84 |     :param verbose: boolean; whether to print status updates or not
 85 |     :return: an adversarial sample
 86 |     """
 87 | 
 88 |     # Copy the source sample and define the maximum number of features
 89 |     # (i.e. the maximum number of iterations) that we may perturb
 90 |     adv_x = copy.copy(sample)
 91 |     # count the number of features. For MNIST, 1x28x28 = 784; for
 92 |     # CIFAR, 3x32x32 = 3072; etc.
 93 |     nb_features = np.product(adv_x.shape[1:])
 94 |     # reshape sample for sake of standardization
 95 |     original_shape = adv_x.shape
 96 |     adv_x = np.reshape(adv_x, (1, nb_features))
 97 |     # compute maximum number of iterations
 98 |     max_iters = np.floor(nb_features * gamma / 2)
 99 |     if verbose:
100 |         print('Maximum number of iterations: {0}'.format(max_iters))
101 | 
102 |     # Compute our initial search domain. We optimize the initial search domain
103 |     # by removing all features that are already at their maximum values (if
104 |     # increasing input features---otherwise, at their minimum value).
105 |     if increase:
106 |         search_domain = set([i for i in xrange(nb_features)
107 |                              if adv_x[0, i] < clip_max])
108 |     else:
109 |         search_domain = set([i for i in xrange(nb_features)
110 |                              if adv_x[0, i] > clip_min])
111 | 
112 |     # Initialize the loop variables
113 |     iteration = 0
114 |     adv_x_original_shape = np.reshape(adv_x, original_shape)
115 |     current = model_argmax(sess, x, predictions, adv_x_original_shape, feed={K.learning_phase(): 0})
116 | 
117 |     # Repeat this main loop until we have achieved misclassification
118 |     while (current != target and iteration < max_iters and
119 |            len(search_domain) > 1):
120 |         # Reshape the adversarial example
121 |         adv_x_original_shape = np.reshape(adv_x, original_shape)
122 | 
123 |         # Compute the Jacobian components
124 |         grads_target, grads_others = jacobian(sess, x, grads, target,
125 |                                               adv_x_original_shape,
126 |                                               nb_features, nb_classes,
127 |                                               feed={K.learning_phase(): 0})
128 | 
129 |         # Compute the saliency map for each of our target classes
130 |         # and return the two best candidate features for perturbation
131 |         i, j, search_domain = saliency_map(
132 |             grads_target, grads_others, search_domain, increase)
133 | 
134 |         # Apply the perturbation to the two input features selected previously
135 |         adv_x = apply_perturbations(
136 |             i, j, adv_x, increase, theta, clip_min, clip_max)
137 | 
138 |         # Update our current prediction by querying the model
139 |         current = model_argmax(sess, x, predictions, adv_x_original_shape, feed={K.learning_phase(): 0})
140 | 
141 |         # Update loop variables
142 |         iteration += 1
143 | 
144 |         # This process may take a while, so outputting progress regularly
145 |         if iteration % 5 == 0 and verbose:
146 |             msg = 'Current iteration: {0} - Current Prediction: {1}'
147 |             print(msg.format(iteration, current))
148 | 
149 |     # Compute the ratio of pixels perturbed by the algorithm
150 |     percent_perturbed = float(iteration * 2) / nb_features
151 | 
152 |     # Report success when the adversarial example is misclassified in the
153 |     # target class
154 |     if current == target:
155 |         if verbose:
156 |             print('Successful')
157 |         return np.reshape(adv_x, original_shape), 1, percent_perturbed
158 |     else:
159 |         if verbose:
160 |             print('Unsuccesful')
161 |         return np.reshape(adv_x, original_shape), 0, percent_perturbed
162 | 
163 | 
164 | def fast_gradient_sign_method(sess, model, X, Y, eps, clip_min=None,
165 |                               clip_max=None, batch_size=256):
166 |     """
167 |     TODO
168 |     :param sess:
169 |     :param model: predictions or after-softmax
170 |     :param X:
171 |     :param Y:
172 |     :param eps:
173 |     :param clip_min:
174 |     :param clip_max:
175 |     :param batch_size:
176 |     :return:
177 |     """
178 |     # Define TF placeholders for the input and output
179 |     x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:])
180 |     y = tf.placeholder(tf.float32, shape=(None,) + Y.shape[1:])
181 |     adv_x = fgsm(
182 |         x, model(x), eps=eps,
183 |         clip_min=clip_min,
184 |         clip_max=clip_max, y=y
185 |     )
186 |     X_adv, = batch_eval(
187 |         sess, [x, y], [adv_x],
188 |         [X, Y], feed={K.learning_phase(): 0},
189 |         args={'batch_size': batch_size}
190 |     )
191 | 
192 |     return X_adv
193 | 
194 | 
195 | def basic_iterative_method(sess, model, X, Y, eps, eps_iter, nb_iter=50,
196 |                            clip_min=None, clip_max=None, batch_size=256):
197 |     """
198 |     TODO
199 |     :param sess:
200 |     :param model: predictions or after-softmax
201 |     :param X:
202 |     :param Y:
203 |     :param eps:
204 |     :param eps_iter:
205 |     :param nb_iter:
206 |     :param clip_min:
207 |     :param clip_max:
208 |     :param batch_size:
209 |     :return:
210 |     """
211 |     # Define TF placeholders for the input and output
212 |     x = tf.placeholder(tf.float32, shape=(None,)+X.shape[1:])
213 |     y = tf.placeholder(tf.float32, shape=(None,)+Y.shape[1:])
214 |     # results will hold the adversarial inputs at each iteration of BIM;
215 |     # thus it will have shape (nb_iter, n_samples, n_rows, n_cols, n_channels)
216 |     results = np.zeros((nb_iter, X.shape[0],) + X.shape[1:])
217 |     # Initialize adversarial samples as the original samples, set upper and
218 |     # lower bounds
219 |     X_adv = X
220 |     X_min = X_adv - eps
221 |     X_max = X_adv + eps
222 |     print('Running BIM iterations...')
223 |     # "its" is a dictionary that keeps track of the iteration at which each
224 |     # sample becomes misclassified. The default value will be (nb_iter-1), the
225 |     # very last iteration.
226 |     def f(val):
227 |         return lambda: val
228 |     its = defaultdict(f(nb_iter-1))
229 |     # Out keeps track of which samples have already been misclassified
230 |     out = set()
231 |     for i in tqdm(range(nb_iter)):
232 |         adv_x = fgsm(
233 |             x, model(x), eps=eps_iter,
234 |             clip_min=clip_min, clip_max=clip_max, y=y
235 |         )
236 |         X_adv, = batch_eval(
237 |             sess, [x, y], [adv_x],
238 |             [X_adv, Y], feed={K.learning_phase(): 0},
239 |             args={'batch_size': batch_size}
240 |         )
241 |         X_adv = np.maximum(np.minimum(X_adv, X_max), X_min)
242 |         results[i] = X_adv
243 |         # check misclassifieds
244 |         predictions = model.predict_classes(X_adv, batch_size=512, verbose=0)
245 |         misclassifieds = np.where(predictions != Y.argmax(axis=1))[0]
246 |         for elt in misclassifieds:
247 |             if elt not in out:
248 |                 its[elt] = i
249 |                 out.add(elt)
250 | 
251 |     return its, results
252 | 
253 | 
254 | def saliency_map_method(sess, model, X, Y, theta, gamma, clip_min=None,
255 |                         clip_max=None):
256 |     """
257 |     TODO
258 |     :param sess:
259 |     :param model: predictions or after-softmax
260 |     :param X:
261 |     :param Y:
262 |     :param theta:
263 |     :param gamma:
264 |     :param clip_min:
265 |     :param clip_max:
266 |     :return:
267 |     """
268 |     nb_classes = Y.shape[1]
269 |     # Define TF placeholder for the input
270 |     x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:])
271 |     # Define model gradients
272 |     grads = jacobian_graph(model(x), x, nb_classes)
273 |     X_adv = np.zeros_like(X)
274 |     for i in tqdm(range(len(X))):
275 |         current_class = int(np.argmax(Y[i]))
276 |         target_class = np.random.choice(other_classes(nb_classes, current_class))
277 |         X_adv[i], _, _ = jsma(
278 |             sess, x, model(x), grads, X[i:(i+1)], target_class, theta=theta,
279 |             gamma=gamma, increase=True, nb_classes=nb_classes,
280 |             clip_min=clip_min, clip_max=clip_max
281 |         )
282 | 
283 |     return X_adv
284 | 
285 | 


--------------------------------------------------------------------------------
/craft_adv_examples.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import argparse
  6 | import warnings
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | import keras.backend as K
 10 | from keras.models import load_model
 11 | 
 12 | from util import get_data, get_model, cross_entropy
 13 | from attacks import fast_gradient_sign_method, basic_iterative_method, saliency_map_method
 14 | from cw_attacks import CarliniL2, CarliniLID
 15 | 
 16 | # FGSM & BIM attack parameters that were chosen
 17 | ATTACK_PARAMS = {
 18 |     'mnist': {'eps': 0.40, 'eps_iter': 0.010, 'image_size': 28, 'num_channels': 1, 'num_labels': 10},
 19 |     'cifar': {'eps': 0.050, 'eps_iter': 0.005, 'image_size': 32, 'num_channels': 3, 'num_labels': 10},
 20 |     'svhn': {'eps': 0.130, 'eps_iter': 0.010, 'image_size': 32, 'num_channels': 3, 'num_labels': 10}
 21 | }
 22 | 
 23 | # CLIP_MIN = 0.0
 24 | # CLIP_MAX = 1.0
 25 | CLIP_MIN = -0.5
 26 | CLIP_MAX = 0.5
 27 | PATH_DATA = "data/"
 28 | 
 29 | def craft_one_type(sess, model, X, Y, dataset, attack, batch_size):
 30 |     """
 31 |     TODO
 32 |     :param sess:
 33 |     :param model:
 34 |     :param X:
 35 |     :param Y:
 36 |     :param dataset:
 37 |     :param attack:
 38 |     :param batch_size:
 39 |     :return:
 40 |     """
 41 |     if attack == 'fgsm':
 42 |         # FGSM attack
 43 |         print('Crafting fgsm adversarial samples...')
 44 |         X_adv = fast_gradient_sign_method(
 45 |             sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], clip_min=CLIP_MIN,
 46 |             clip_max=CLIP_MAX, batch_size=batch_size
 47 |         )
 48 |     elif attack in ['bim-a', 'bim-b']:
 49 |         # BIM attack
 50 |         print('Crafting %s adversarial samples...' % attack)
 51 |         its, results = basic_iterative_method(
 52 |             sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'],
 53 |             eps_iter=ATTACK_PARAMS[dataset]['eps_iter'], clip_min=CLIP_MIN,
 54 |             clip_max=CLIP_MAX, batch_size=batch_size
 55 |         )
 56 |         if attack == 'bim-a':
 57 |             # BIM-A
 58 |             # For each sample, select the time step where that sample first
 59 |             # became misclassified
 60 |             X_adv = np.asarray([results[its[i], i] for i in range(len(Y))])
 61 |         else:
 62 |             # BIM-B
 63 |             # For each sample, select the very last time step
 64 |             X_adv = results[-1]
 65 |     elif attack == 'jsma':
 66 |         # JSMA attack
 67 |         print('Crafting jsma adversarial samples. This may take > 5 hours')
 68 |         X_adv = saliency_map_method(
 69 |             sess, model, X, Y, theta=1, gamma=0.1, clip_min=CLIP_MIN, clip_max=CLIP_MAX
 70 |         )
 71 |     elif attack == 'cw-l2':
 72 |         # C&W attack
 73 |         print('Crafting %s examples. This takes > 5 hours due to internal grid search' % attack)
 74 |         image_size = ATTACK_PARAMS[dataset]['image_size']
 75 |         num_channels = ATTACK_PARAMS[dataset]['num_channels']
 76 |         num_labels = ATTACK_PARAMS[dataset]['num_labels']
 77 |         cw_attack = CarliniL2(sess, model, image_size, num_channels, num_labels, batch_size=batch_size)
 78 |         X_adv = cw_attack.attack(X, Y)
 79 |     elif attack == 'cw-lid':
 80 |         # C&W attack to break LID detector
 81 |         print('Crafting %s examples. This takes > 5 hours due to internal grid search' % attack)
 82 |         image_size = ATTACK_PARAMS[dataset]['image_size']
 83 |         num_channels = ATTACK_PARAMS[dataset]['num_channels']
 84 |         num_labels = ATTACK_PARAMS[dataset]['num_labels']
 85 |         cw_attack = CarliniLID(sess, model, image_size, num_channels, num_labels, batch_size=batch_size)
 86 |         X_adv = cw_attack.attack(X, Y)
 87 | 
 88 |     _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0)
 89 |     print("Model accuracy on the adversarial test set: %0.2f%%" % (100 * acc))
 90 |     np.save(os.path.join(PATH_DATA, 'Adv_%s_%s.npy' % (dataset, attack)), X_adv)
 91 |     l2_diff = np.linalg.norm(
 92 |         X_adv.reshape((len(X), -1)) -
 93 |         X.reshape((len(X), -1)),
 94 |         axis=1
 95 |     ).mean()
 96 |     print("Average L-2 perturbation size of the %s attack: %0.2f" %
 97 |           (attack, l2_diff))
 98 | 
 99 | def main(args):
100 |     assert args.dataset in ['mnist', 'cifar', 'svhn'], \
101 |         "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
102 |     assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all', 'cw-lid'], \
103 |         "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
104 |         "'jsma', 'cw-l2', 'all' or 'cw-lid' for attacking LID detector"
105 |     model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset)
106 |     assert os.path.isfile(model_file), \
107 |         'model file not found... must first train model using train_model.py.'
108 |     if args.dataset == 'svhn' and args.attack == 'cw-l2':
109 |         assert args.batch_size == 16, \
110 |         "svhn has 26032 test images, the batch_size for cw-l2 attack should be 16, " \
111 |         "otherwise, there will be error at the last batch-- needs to be fixed."
112 | 
113 | 
114 |     print('Dataset: %s. Attack: %s' % (args.dataset, args.attack))
115 |     # Create TF session, set it as Keras backend
116 |     sess = tf.Session()
117 |     K.set_session(sess)
118 |     if args.attack == 'cw-l2' or args.attack == 'cw-lid':
119 |         warnings.warn("Important: remove the softmax layer for cw attacks!")
120 |         # use softmax=False to load without softmax layer
121 |         model = get_model(args.dataset, softmax=False)
122 |         model.compile(
123 |             loss=cross_entropy,
124 |             optimizer='adadelta',
125 |             metrics=['accuracy']
126 |         )
127 |         model.load_weights(model_file)
128 |     else:
129 |         model = load_model(model_file)
130 | 
131 |     _, _, X_test, Y_test = get_data(args.dataset)
132 |     _, acc = model.evaluate(X_test, Y_test, batch_size=args.batch_size,
133 |                             verbose=0)
134 |     print("Accuracy on the test set: %0.2f%%" % (100*acc))
135 | 
136 |     if args.attack == 'cw-lid': # white box attacking LID detector - an example
137 |         X_test = X_test[:1000]
138 |         Y_test = Y_test[:1000]
139 | 
140 |     if args.attack == 'all':
141 |         # Cycle through all attacks
142 |         for attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2']:
143 |             craft_one_type(sess, model, X_test, Y_test, args.dataset, attack,
144 |                            args.batch_size)
145 |     else:
146 |         # Craft one specific attack type
147 |         craft_one_type(sess, model, X_test, Y_test, args.dataset, args.attack,
148 |                        args.batch_size)
149 |     print('Adversarial samples crafted and saved to %s ' % PATH_DATA)
150 |     sess.close()
151 | 
152 | 
153 | if __name__ == "__main__":
154 |     parser = argparse.ArgumentParser()
155 |     parser.add_argument(
156 |         '-d', '--dataset',
157 |         help="Dataset to use; either 'mnist', 'cifar' or 'svhn'",
158 |         required=True, type=str
159 |     )
160 |     parser.add_argument(
161 |         '-a', '--attack',
162 |         help="Attack to use; either 'fgsm', 'bim-a', 'bim-b', 'jsma', or 'cw-l2' "
163 |              "or 'all'",
164 |         required=True, type=str
165 |     )
166 |     parser.add_argument(
167 |         '-b', '--batch_size',
168 |         help="The batch size to use for training.",
169 |         required=False, type=int
170 |     )
171 |     parser.set_defaults(batch_size=100)
172 |     args = parser.parse_args()
173 |     main(args)


--------------------------------------------------------------------------------
/cw_attacks.py:
--------------------------------------------------------------------------------
  1 | ## l0_attack.py + l2_attack.py + li_attack.py-- attack a network optimizing for l_0, l_2 or l_infinity distance
  2 | ## This is just a copy and paste from https://github.com/carlini/nn_robust_attacks.
  3 | ## TODO: merge the code?
  4 | ##
  5 | ## Copyright (C) 2016, Nicholas Carlini <nicholas@carlini.com>.
  6 | ##
  7 | ## This program is licenced under the BSD 2-Clause licence,
  8 | ## contained in the LICENCE file in this directory.
  9 | 
 10 | import sys
 11 | import tensorflow as tf
 12 | import numpy as np
 13 | from tqdm import tqdm
 14 | from cleverhans.utils import other_classes
 15 | import keras.backend as K
 16 | 
 17 | from util import lid_adv_term
 18 | 
 19 | # settings for C&W L2 attack
 20 | L2_BINARY_SEARCH_STEPS = 9  # number of times to adjust the constant with binary search
 21 | L2_MAX_ITERATIONS = 1000    # number of iterations to perform gradient descent
 22 | L2_ABORT_EARLY = True       # if we stop improving, abort gradient descent early
 23 | L2_LEARNING_RATE = 1e-2     # larger values converge faster to less accurate results
 24 | L2_TARGETED = True          # should we target one specific class? or just be wrong?
 25 | L2_CONFIDENCE = 0           # how strong the adversarial example should be
 26 | L2_INITIAL_CONST = 1e-3    # the initial constant c to pick as a first guess
 27 | 
 28 | class CarliniL2:
 29 |     def __init__(self, sess, model, image_size, num_channels, num_labels, batch_size=100,
 30 |                  confidence=L2_CONFIDENCE, targeted=L2_TARGETED, learning_rate=L2_LEARNING_RATE,
 31 |                  binary_search_steps=L2_BINARY_SEARCH_STEPS, max_iterations=L2_MAX_ITERATIONS,
 32 |                  abort_early=L2_ABORT_EARLY,
 33 |                  initial_const=L2_INITIAL_CONST):
 34 |         """
 35 |         The L_2 optimized attack. 
 36 | 
 37 |         This attack is the most efficient and should be used as the primary 
 38 |         attack to evaluate potential defenses.
 39 | 
 40 |         Returns adversarial examples for the supplied model.
 41 | 
 42 |         confidence: Confidence of adversarial examples: higher produces examples
 43 |           that are farther away, but more strongly classified as adversarial.
 44 |         batch_size: Number of attacks to run simultaneously.
 45 |         targeted: True if we should perform a targetted attack, False otherwise.
 46 |         learning_rate: The learning rate for the attack algorithm. Smaller values
 47 |           produce better results but are slower to converge.
 48 |         binary_search_steps: The number of times we perform binary search to
 49 |           find the optimal tradeoff-constant between distance and confidence. 
 50 |         max_iterations: The maximum number of iterations. Larger values are more
 51 |           accurate; setting too small will require a large learning rate and will
 52 |           produce poor results.
 53 |         abort_early: If true, allows early aborts if gradient descent gets stuck.
 54 |         initial_const: The initial tradeoff-constant to use to tune the relative
 55 |           importance of distance and confidence. If binary_search_steps is large,
 56 |           the initial constant is not important.
 57 |         """
 58 |         self.model = model
 59 |         self.sess = sess
 60 |         self.image_size = image_size
 61 |         self.num_channels = num_channels
 62 |         self.num_labels = num_labels
 63 | 
 64 |         self.TARGETED = targeted
 65 |         self.LEARNING_RATE = learning_rate
 66 |         self.MAX_ITERATIONS = max_iterations
 67 |         self.BINARY_SEARCH_STEPS = binary_search_steps
 68 |         self.ABORT_EARLY = abort_early
 69 |         self.CONFIDENCE = confidence
 70 |         self.initial_const = initial_const
 71 |         self.batch_size = batch_size
 72 | 
 73 |         self.repeat = binary_search_steps >= 10
 74 | 
 75 |         shape = (self.batch_size, self.image_size, self.image_size, self.num_channels)
 76 | 
 77 |         # the variable we're going to optimize over
 78 |         modifier = tf.Variable(np.zeros(shape, dtype=np.float32))
 79 |         self.max_mod = tf.reduce_max(modifier)
 80 | 
 81 |         # these are variables to be more efficient in sending data to tf
 82 |         self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32)
 83 |         self.tlab = tf.Variable(np.zeros((self.batch_size, self.num_labels)), dtype=tf.float32)
 84 |         self.const = tf.Variable(np.zeros(self.batch_size), dtype=tf.float32)
 85 | 
 86 |         # and here's what we use to assign them
 87 |         self.assign_timg = tf.placeholder(tf.float32, shape)
 88 |         self.assign_tlab = tf.placeholder(tf.float32, (self.batch_size, self.num_labels))
 89 |         self.assign_const = tf.placeholder(tf.float32, [self.batch_size])
 90 | 
 91 |         # the resulting image, tanh'd to keep bounded from -0.5 to 0.5
 92 |         self.newimg = tf.tanh(modifier + self.timg) / 2
 93 | 
 94 |         # prediction BEFORE-SOFTMAX of the model
 95 |         self.output = self.model(self.newimg)
 96 | 
 97 |         # distance to the input data
 98 |         self.l2dist = tf.reduce_sum(tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3])
 99 | 
100 |         # compute the probability of the label class versus the maximum other
101 |         real = tf.reduce_sum((self.tlab) * self.output, 1)
102 |         other = tf.reduce_max((1 - self.tlab) * self.output - (self.tlab * 10000), 1)
103 | 
104 |         if self.TARGETED:
105 |             # if targetted, optimize for making the other class most likely
106 |             loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE)
107 |         else:
108 |             # if untargeted, optimize for making this class least likely.
109 |             loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE)
110 | 
111 |         # sum up the losses
112 |         self.loss2 = tf.reduce_sum(self.l2dist)
113 |         self.loss1 = tf.reduce_sum(self.const * loss1)
114 |         self.loss = self.loss1 + self.loss2
115 |         self.grads = tf.reduce_max(tf.gradients(self.loss, [modifier]))
116 | 
117 |         # Setup the adam optimizer and keep track of variables we're creating
118 |         start_vars = set(x.name for x in tf.global_variables())
119 |         optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
120 |         self.train = optimizer.minimize(self.loss, var_list=[modifier])
121 |         end_vars = tf.global_variables()
122 |         new_vars = [x for x in end_vars if x.name not in start_vars]
123 | 
124 |         # these are the variables to initialize when we run
125 |         self.setup = []
126 |         self.setup.append(self.timg.assign(self.assign_timg))
127 |         self.setup.append(self.tlab.assign(self.assign_tlab))
128 |         self.setup.append(self.const.assign(self.assign_const))
129 | 
130 |         self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
131 | 
132 |     def attack(self, X, Y):
133 |         """
134 |         Perform the L_2 attack on the given images for the given targets.
135 | 
136 |         :param X: samples to generate advs
137 |         :param Y: the original class labels
138 |         If self.targeted is true, then the targets represents the target labels.
139 |         If self.targeted is false, then targets are the original class labels.
140 |         """
141 |         nb_classes = Y.shape[1]
142 | 
143 |         # random select target class for targeted attack
144 |         y_target = np.copy(Y)
145 |         if self.TARGETED:
146 |             for i in range(Y.shape[0]):
147 |                 current = int(np.argmax(Y[i]))
148 |                 target = np.random.choice(other_classes(nb_classes, current))
149 |                 y_target[i] = np.eye(nb_classes)[target]
150 | 
151 |         X_adv = np.zeros_like(X)
152 |         for i in tqdm(range(0, X.shape[0], self.batch_size)):
153 |             start = i
154 |             end = i + self.batch_size
155 |             end = np.minimum(end, X.shape[0])
156 |             X_adv[start:end] = self.attack_batch(X[start:end], y_target[start:end])
157 | 
158 |         return X_adv
159 | 
160 |     def attack_batch(self, imgs, labs):
161 |         """
162 |         Run the attack on a batch of images and labels.
163 |         """
164 | 
165 |         def compare(x, y):
166 |             if not isinstance(x, (float, int, np.int64)):
167 |                 x = np.copy(x)
168 |                 x[y] -= self.CONFIDENCE
169 |                 x = np.argmax(x)
170 |             if self.TARGETED:
171 |                 return x == y
172 |             else:
173 |                 return x != y
174 | 
175 |         # batch_size = self.batch_size
176 |         batch_size = imgs.shape[0]
177 | 
178 |         # convert to tanh-space
179 |         imgs = np.arctanh(imgs * 1.999999)
180 | 
181 |         # set the lower and upper bounds accordingly
182 |         lower_bound = np.zeros(batch_size)
183 |         CONST = np.ones(batch_size) * self.initial_const
184 |         upper_bound = np.ones(batch_size) * 1e10
185 | 
186 |         # the best l2, score, and image attack
187 |         o_bestl2 = [1e10] * batch_size
188 |         o_bestscore = [-1] * batch_size
189 |         o_bestattack = [np.zeros(imgs[0].shape)] * batch_size
190 |         # o_bestattack = np.copy(imgs)
191 | 
192 |         for outer_step in range(self.BINARY_SEARCH_STEPS):
193 |             # print(o_bestl2)
194 |             # completely reset adam's internal state.
195 |             self.sess.run(self.init)
196 |             batch = imgs[:batch_size]
197 |             batchlab = labs[:batch_size]
198 | 
199 |             bestl2 = [1e10] * batch_size
200 |             bestscore = [-1] * batch_size
201 | 
202 |             # The last iteration (if we run many steps) repeat the search once.
203 |             if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS - 1:
204 |                 CONST = upper_bound
205 | 
206 |             # set the variables so that we don't have to send them over again
207 |             self.sess.run(self.setup, {self.assign_timg: batch,
208 |                                        self.assign_tlab: batchlab,
209 |                                        self.assign_const: CONST})
210 | 
211 |             prev = 1e6
212 |             for iteration in range(self.MAX_ITERATIONS):
213 |                 # perform the attack
214 |                 _, l, l2s, scores, nimg = self.sess.run([self.train, self.loss,
215 |                                                          self.l2dist, self.output,
216 |                                                          self.newimg], feed_dict={K.learning_phase(): 0})
217 | 
218 |                 # print out the losses every 10%
219 |                 # if iteration % (self.MAX_ITERATIONS // 10) == 0:
220 |                 #     print(iteration, self.sess.run((self.loss, self.loss1, self.loss2, self.grads, self.max_mod), feed_dict={K.learning_phase(): 0}))
221 | 
222 |                 # check if we should abort search if we're getting nowhere.
223 |                 if self.ABORT_EARLY and iteration % (self.MAX_ITERATIONS // 10) == 0:
224 |                     if l > prev * .9999:
225 |                         break
226 |                     prev = l
227 | 
228 |                 # adjust the best result found so far
229 |                 for e, (l2, sc, ii) in enumerate(zip(l2s, scores, nimg)):
230 |                     if l2 < bestl2[e] and compare(sc, np.argmax(batchlab[e])):
231 |                         bestl2[e] = l2
232 |                         bestscore[e] = np.argmax(sc)
233 |                     if l2 < o_bestl2[e] and compare(sc, np.argmax(batchlab[e])):
234 |                         # print('l2:', l2, 'bestl2[e]: ', bestl2[e])
235 |                         # print('score:', np.argmax(sc), 'bestscore[e]:', bestscore[e])
236 |                         # print('np.argmax(batchlab[e]):', np.argmax(batchlab[e]))
237 |                         o_bestl2[e] = l2
238 |                         o_bestscore[e] = np.argmax(sc)
239 |                         o_bestattack[e] = ii
240 | 
241 |             # adjust the constant as needed
242 |             for e in range(batch_size):
243 |                 if compare(bestscore[e], np.argmax(batchlab[e])) and bestscore[e] != -1:
244 |                     # success, divide const by two
245 |                     upper_bound[e] = min(upper_bound[e], CONST[e])
246 |                     if upper_bound[e] < 1e9:
247 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
248 |                 else:
249 |                     # failure, either multiply by 10 if no solution found yet
250 |                     #          or do binary search with the known upper bound
251 |                     lower_bound[e] = max(lower_bound[e], CONST[e])
252 |                     if upper_bound[e] < 1e9:
253 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
254 |                     else:
255 |                         CONST[e] *= 10
256 | 
257 |         # return the best solution found
258 |         o_bestl2 = np.array(o_bestl2)
259 |         print('sucess rate: %.4f' % (1-np.sum(o_bestl2==1e10)/self.batch_size))
260 |         return o_bestattack
261 | 
262 | class CarliniLID:
263 |     def __init__(self, sess, model, image_size, num_channels, num_labels, batch_size=100,
264 |                  confidence=L2_CONFIDENCE, targeted=L2_TARGETED, learning_rate=L2_LEARNING_RATE,
265 |                  binary_search_steps=L2_BINARY_SEARCH_STEPS, max_iterations=L2_MAX_ITERATIONS,
266 |                  abort_early=L2_ABORT_EARLY,
267 |                  initial_const=L2_INITIAL_CONST):
268 |         """
269 |         The modified L_2 optimized attack to break LID detector. 
270 | 
271 |         This attack is the most efficient and should be used as the primary 
272 |         attack to evaluate potential defenses.
273 | 
274 |         Returns adversarial examples for the supplied model.
275 | 
276 |         confidence: Confidence of adversarial examples: higher produces examples
277 |           that are farther away, but more strongly classified as adversarial.
278 |         batch_size: Number of attacks to run simultaneously.
279 |         targeted: True if we should perform a targetted attack, False otherwise.
280 |         learning_rate: The learning rate for the attack algorithm. Smaller values
281 |           produce better results but are slower to converge.
282 |         binary_search_steps: The number of times we perform binary search to
283 |           find the optimal tradeoff-constant between distance and confidence. 
284 |         max_iterations: The maximum number of iterations. Larger values are more
285 |           accurate; setting too small will require a large learning rate and will
286 |           produce poor results.
287 |         abort_early: If true, allows early aborts if gradient descent gets stuck.
288 |         initial_const: The initial tradeoff-constant to use to tune the relative
289 |           importance of distance and confidence. If binary_search_steps is large,
290 |           the initial constant is not important.
291 |         """
292 |         self.model = model
293 |         self.sess = sess
294 |         self.image_size = image_size
295 |         self.num_channels = num_channels
296 |         self.num_labels = num_labels
297 | 
298 |         self.TARGETED = targeted
299 |         self.LEARNING_RATE = learning_rate
300 |         self.MAX_ITERATIONS = max_iterations
301 |         self.BINARY_SEARCH_STEPS = binary_search_steps
302 |         self.ABORT_EARLY = abort_early
303 |         self.CONFIDENCE = confidence
304 |         self.initial_const = initial_const
305 |         self.batch_size = batch_size
306 | 
307 |         self.repeat = binary_search_steps >= 10
308 | 
309 |         shape = (self.batch_size, self.image_size, self.image_size, self.num_channels)
310 | 
311 |         # the variable we're going to optimize over
312 |         modifier = tf.Variable(np.zeros(shape, dtype=np.float32))
313 |         self.max_mod = tf.reduce_max(modifier)
314 | 
315 |         # these are variables to be more efficient in sending data to tf
316 |         self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32)
317 |         self.tlab = tf.Variable(np.zeros((self.batch_size, self.num_labels)), dtype=tf.float32)
318 |         self.const = tf.Variable(np.zeros(self.batch_size), dtype=tf.float32)
319 | 
320 |         # and here's what we use to assign them
321 |         self.assign_timg = tf.placeholder(tf.float32, shape)
322 |         self.assign_tlab = tf.placeholder(tf.float32, (self.batch_size, self.num_labels))
323 |         self.assign_const = tf.placeholder(tf.float32, [self.batch_size])
324 | 
325 |         # the resulting image, tanh'd to keep bounded from -0.5 to 0.5
326 |         self.newimg = tf.tanh(modifier + self.timg) / 2
327 | 
328 |         # prediction BEFORE-SOFTMAX of the model
329 |         self.output = self.model(self.newimg)
330 | 
331 |         # distance to the input data
332 |         self.l2dist = tf.reduce_sum(tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3])
333 | 
334 |         # compute the probability of the label class versus the maximum other
335 |         real = tf.reduce_sum((self.tlab) * self.output, 1)
336 |         other = tf.reduce_max((1 - self.tlab) * self.output - (self.tlab * 10000), 1)
337 | 
338 |         if self.TARGETED:
339 |             # if targetted, optimize for making the other class most likely
340 |             loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE)
341 |         else:
342 |             # if untargeted, optimize for making this class least likely.
343 |             loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE)
344 | 
345 |         # add lis loss to the attack
346 |         self.clean_logits = tf.placeholder(tf.float32, (1, self.batch_size, None))
347 |         loss_lid = lid_adv_term(self.clean_logits, self.output, self.batch_size)
348 | 
349 |         # sum up the losses
350 |         self.loss2 = tf.reduce_sum(self.l2dist)
351 |         self.loss1 = tf.reduce_sum(self.const * (loss1 + loss_lid))
352 |         self.loss = self.loss1 + self.loss2
353 |         self.grads = tf.reduce_max(tf.gradients(self.loss, [modifier]))
354 | 
355 |         # Setup the adam optimizer and keep track of variables we're creating
356 |         start_vars = set(x.name for x in tf.global_variables())
357 |         optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
358 |         self.train = optimizer.minimize(self.loss, var_list=[modifier])
359 |         end_vars = tf.global_variables()
360 |         new_vars = [x for x in end_vars if x.name not in start_vars]
361 | 
362 |         # these are the variables to initialize when we run
363 |         self.setup = []
364 |         self.setup.append(self.timg.assign(self.assign_timg))
365 |         self.setup.append(self.tlab.assign(self.assign_tlab))
366 |         self.setup.append(self.const.assign(self.assign_const))
367 | 
368 |         self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
369 | 
370 |     def attack(self, X, Y):
371 |         """
372 |         Perform the L_2 attack on the given images for the given targets.
373 | 
374 |         :param X: samples to generate advs
375 |         :param Y: the original class labels
376 |         If self.targeted is true, then the targets represents the target labels.
377 |         If self.targeted is false, then targets are the original class labels.
378 |         """
379 |         nb_classes = Y.shape[1]
380 | 
381 |         # random select target class for targeted attack
382 |         y_target = np.copy(Y)
383 |         if self.TARGETED:
384 |             for i in range(Y.shape[0]):
385 |                 current = int(np.argmax(Y[i]))
386 |                 target = np.random.choice(other_classes(nb_classes, current))
387 |                 y_target[i] = np.eye(nb_classes)[target]
388 | 
389 |         X_adv = np.zeros_like(X)
390 |         for i in tqdm(range(0, X.shape[0], self.batch_size)):
391 |             start = i
392 |             end = i + self.batch_size
393 |             end = np.minimum(end, X.shape[0])
394 |             X_adv[start:end] = self.attack_batch(X[start:end], y_target[start:end])
395 | 
396 |         return X_adv
397 | 
398 |     def attack_batch(self, imgs, labs):
399 |         """
400 |         Run the attack on a batch of images and labels.
401 |         """
402 | 
403 |         def compare(x, y):
404 |             if not isinstance(x, (float, int, np.int64)):
405 |                 x = np.copy(x)
406 |                 x[y] -= self.CONFIDENCE
407 |                 x = np.argmax(x)
408 |             if self.TARGETED:
409 |                 return x == y
410 |             else:
411 |                 return x != y
412 | 
413 |         # batch_size = self.batch_size
414 |         batch_size = imgs.shape[0]
415 | 
416 |         # convert to tanh-space
417 |         imgs = np.arctanh(imgs * 1.999999)
418 | 
419 |         # set the lower and upper bounds accordingly
420 |         lower_bound = np.zeros(batch_size)
421 |         CONST = np.ones(batch_size) * self.initial_const
422 |         upper_bound = np.ones(batch_size) * 1e10
423 | 
424 |         # the best l2, score, and image attack
425 |         o_bestl2 = [1e10] * batch_size
426 |         o_bestscore = [-1] * batch_size
427 |         o_bestattack = [np.zeros(imgs[0].shape)] * batch_size
428 |         # o_bestattack = np.copy(imgs)
429 | 
430 |         for outer_step in range(self.BINARY_SEARCH_STEPS):
431 |             # print(o_bestl2)
432 |             # completely reset adam's internal state.
433 |             self.sess.run(self.init)
434 |             batch = imgs[:batch_size]
435 |             batchlab = labs[:batch_size]
436 | 
437 |             bestl2 = [1e10] * batch_size
438 |             bestscore = [-1] * batch_size
439 | 
440 |             # The last iteration (if we run many steps) repeat the search once.
441 |             if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS - 1:
442 |                 CONST = upper_bound
443 | 
444 |             # set the variables so that we don't have to send them over again
445 |             self.sess.run(self.setup, {self.assign_timg: batch,
446 |                                        self.assign_tlab: batchlab,
447 |                                        self.assign_const: CONST})
448 | 
449 |             # get clean logits of clean samples:
450 |             c_logits = self.sess.run([self.output], feed_dict={K.learning_phase(): 0})
451 | 
452 |             prev = 1e6
453 |             for iteration in range(self.MAX_ITERATIONS):
454 |                 # perform the attack
455 |                 _, l, l2s, scores, nimg = self.sess.run([self.train, self.loss,
456 |                                                          self.l2dist, self.output,
457 |                                                          self.newimg], feed_dict={K.learning_phase(): 0,
458 |                                                                                   self.clean_logits: c_logits})
459 | 
460 |                 # print out the losses every 10%
461 |                 # if iteration % (self.MAX_ITERATIONS // 10) == 0:
462 |                 #     print(iteration, self.sess.run((self.loss, self.loss1, self.loss2, self.grads, self.max_mod), feed_dict={K.learning_phase(): 0}))
463 | 
464 |                 # check if we should abort search if we're getting nowhere.
465 |                 if self.ABORT_EARLY and iteration % (self.MAX_ITERATIONS // 10) == 0:
466 |                     if l > prev * .9999:
467 |                         break
468 |                     prev = l
469 | 
470 |                 # adjust the best result found so far
471 |                 for e, (l2, sc, ii) in enumerate(zip(l2s, scores, nimg)):
472 |                     if l2 < bestl2[e] and compare(sc, np.argmax(batchlab[e])):
473 |                         bestl2[e] = l2
474 |                         bestscore[e] = np.argmax(sc)
475 |                     if l2 < o_bestl2[e] and compare(sc, np.argmax(batchlab[e])):
476 |                         # print('l2:', l2, 'bestl2[e]: ', bestl2[e])
477 |                         # print('score:', np.argmax(sc), 'bestscore[e]:', bestscore[e])
478 |                         # print('np.argmax(batchlab[e]):', np.argmax(batchlab[e]))
479 |                         o_bestl2[e] = l2
480 |                         o_bestscore[e] = np.argmax(sc)
481 |                         o_bestattack[e] = ii
482 | 
483 |             # adjust the constant as needed
484 |             for e in range(batch_size):
485 |                 if compare(bestscore[e], np.argmax(batchlab[e])) and bestscore[e] != -1:
486 |                     # success, divide const by two
487 |                     upper_bound[e] = min(upper_bound[e], CONST[e])
488 |                     if upper_bound[e] < 1e9:
489 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
490 |                 else:
491 |                     # failure, either multiply by 10 if no solution found yet
492 |                     #          or do binary search with the known upper bound
493 |                     lower_bound[e] = max(lower_bound[e], CONST[e])
494 |                     if upper_bound[e] < 1e9:
495 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
496 |                     else:
497 |                         CONST[e] *= 10
498 | 
499 |         # return the best solution found
500 |         o_bestl2 = np.array(o_bestl2)
501 |         print('sucess rate: %.4f' % (1-np.sum(o_bestl2==1e10)/self.batch_size))
502 |         return o_bestattack
503 | 


--------------------------------------------------------------------------------
/detect_adv_examples.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import argparse
  6 | import numpy as np
  7 | from sklearn.preprocessing import scale, MinMaxScaler, StandardScaler
  8 | from sklearn.metrics import accuracy_score, precision_score, recall_score
  9 | from util import (random_split, block_split, train_lr, compute_roc)
 10 | 
 11 | DATASETS = ['mnist', 'cifar', 'svhn']
 12 | ATTACKS = ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2']
 13 | CHARACTERISTICS = ['kd', 'bu', 'lid']
 14 | PATH_DATA = "data/"
 15 | PATH_IMAGES = "plots/"
 16 | 
 17 | def load_characteristics(dataset, attack, characteristics):
 18 |     """
 19 |     Load multiple characteristics for one dataset and one attack.
 20 |     :param dataset: 
 21 |     :param attack: 
 22 |     :param characteristics: 
 23 |     :return: 
 24 |     """
 25 |     X, Y = None, None
 26 |     for characteristic in characteristics:
 27 |         # print("  -- %s" % characteristics)
 28 |         file_name = os.path.join(PATH_DATA, "%s_%s_%s.npy" % (characteristic, dataset, attack))
 29 |         data = np.load(file_name)
 30 |         if X is None:
 31 |             X = data[:, :-1]
 32 |         else:
 33 |             X = np.concatenate((X, data[:, :-1]), axis=1)
 34 |         if Y is None:
 35 |             Y = data[:, -1] # labels only need to load once
 36 | 
 37 |     return X, Y
 38 | 
 39 | def detect(args):
 40 |     assert args.dataset in DATASETS, \
 41 |         "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
 42 |     assert args.attack in ATTACKS, \
 43 |         "Train attack must be either 'fgsm', 'bim-a', 'bim-b', " \
 44 |         "'jsma', 'cw-l2'"
 45 |     assert args.test_attack in ATTACKS, \
 46 |         "Test attack must be either 'fgsm', 'bim-a', 'bim-b', " \
 47 |         "'jsma', 'cw-l2'"
 48 |     characteristics = args.characteristics.split(',')
 49 |     for char in characteristics:
 50 |         assert char in CHARACTERISTICS, \
 51 |             "Characteristic(s) to use 'kd', 'bu', 'lid'"
 52 | 
 53 |     print("Loading train attack: %s" % args.attack)
 54 |     X, Y = load_characteristics(args.dataset, args.attack, characteristics)
 55 | 
 56 |     # standarization
 57 |     scaler = MinMaxScaler().fit(X)
 58 |     X = scaler.transform(X)
 59 |     # X = scale(X) # Z-norm
 60 | 
 61 |     # test attack is the same as training attack
 62 |     X_train, Y_train, X_test, Y_test = block_split(X, Y)
 63 |     if args.test_attack != args.attack:
 64 |         # test attack is a different attack
 65 |         print("Loading test attack: %s" % args.test_attack)
 66 |         X_test, Y_test = load_characteristics(args.dataset, args.test_attack, characteristics)
 67 |         _, _, X_test, Y_test = block_split(X_test, Y_test)
 68 | 
 69 |         # apply training normalizer
 70 |         X_test = scaler.transform(X_test)
 71 |         # X_test = scale(X_test) # Z-norm
 72 | 
 73 |     print("Train data size: ", X_train.shape)
 74 |     print("Test data size: ", X_test.shape)
 75 | 
 76 | 
 77 |     ## Build detector
 78 |     print("LR Detector on [dataset: %s, train_attack: %s, test_attack: %s] with:" %
 79 |                                         (args.dataset, args.attack, args.test_attack))
 80 |     lr = train_lr(X_train, Y_train)
 81 | 
 82 |     ## Evaluate detector
 83 |     y_pred = lr.predict_proba(X_test)[:, 1]
 84 |     y_label_pred = lr.predict(X_test)
 85 |     
 86 |     # AUC
 87 |     _, _, auc_score = compute_roc(Y_test, y_pred, plot=False)
 88 |     precision = precision_score(Y_test, y_label_pred)
 89 |     recall = recall_score(Y_test, y_label_pred)
 90 | 
 91 |     y_label_pred = lr.predict(X_test)
 92 |     acc = accuracy_score(Y_test, y_label_pred)
 93 |     print('Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall: %.4f' % (auc_score, acc, precision, recall))
 94 | 
 95 |     return lr, auc_score, scaler
 96 | 
 97 | if __name__ == "__main__":
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument(
100 |         '-d', '--dataset',
101 |         help="Dataset to use; either 'mnist', 'cifar' or 'svhn'",
102 |         required=True, type=str
103 |     )
104 |     parser.add_argument(
105 |         '-a', '--attack',
106 |         help="Attack to use train the discriminator; either 'fgsm', 'bim-a', 'bim-b', 'jsma' 'cw-l2'",
107 |         required=True, type=str
108 |     )
109 |     parser.add_argument(
110 |         '-r', '--characteristics',
111 |         help="Characteristic(s) to use any combination in ['kd', 'bu', 'lid'] "
112 |              "separated by comma, for example: kd,bu",
113 |         required=True, type=str
114 |     )
115 |     parser.add_argument(
116 |         '-t', '--test_attack',
117 |         help="Characteristic(s) to cross-test the discriminator.",
118 |         required=False, type=str
119 |     )
120 |     parser.add_argument(
121 |         '-b', '--batch_size',
122 |         help="The batch size to use for training.",
123 |         required=False, type=int
124 |     )
125 |     parser.set_defaults(batch_size=100)
126 |     parser.set_defaults(test_attack=None)
127 |     args = parser.parse_args()
128 |     detect(args)
129 | 


--------------------------------------------------------------------------------
/extract_characteristics.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import argparse
  6 | import warnings
  7 | import numpy as np
  8 | from sklearn.neighbors import KernelDensity
  9 | from keras.models import load_model
 10 | 
 11 | from util import (get_data, get_noisy_samples, get_mc_predictions,
 12 |                       get_deep_representations, score_samples, normalize,
 13 |                       get_lids_random_batch, get_kmeans_random_batch)
 14 | 
 15 | # In the original paper, the author used optimal KDE bandwidths dataset-wise
 16 | #  that were determined from CV tuning
 17 | BANDWIDTHS = {'mnist': 3.7926, 'cifar': 0.26, 'svhn': 1.00}
 18 | 
 19 | # Here we further tune bandwidth for each of the 10 classes in mnist, cifar and svhn
 20 | # Run tune_kernal_density.py to get the following settings.
 21 | # BANDWIDTHS = {'mnist': [0.2637, 0.1274, 0.2637, 0.2637, 0.2637, 0.2637, 0.2637, 0.2069, 0.3360, 0.2637],
 22 | #               'cifar': [0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
 23 | #               'svhn': [0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1274, 0.1000, 0.1000]}
 24 | 
 25 | PATH_DATA = "data/"
 26 | PATH_IMAGES = "plots/"
 27 | 
 28 | def merge_and_generate_labels(X_pos, X_neg):
 29 |     """
 30 |     merge positve and nagative artifact and generate labels
 31 |     :param X_pos: positive samples
 32 |     :param X_neg: negative samples
 33 |     :return: X: merged samples, 2D ndarray
 34 |              y: generated labels (0/1): 2D ndarray same size as X
 35 |     """
 36 |     X_pos = np.asarray(X_pos, dtype=np.float32)
 37 |     print("X_pos: ", X_pos.shape)
 38 |     X_pos = X_pos.reshape((X_pos.shape[0], -1))
 39 | 
 40 |     X_neg = np.asarray(X_neg, dtype=np.float32)
 41 |     print("X_neg: ", X_neg.shape)
 42 |     X_neg = X_neg.reshape((X_neg.shape[0], -1))
 43 | 
 44 |     X = np.concatenate((X_pos, X_neg))
 45 |     y = np.concatenate((np.ones(X_pos.shape[0]), np.zeros(X_neg.shape[0])))
 46 |     y = y.reshape((X.shape[0], 1))
 47 | 
 48 |     return X, y
 49 | 
 50 | 
 51 | def get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv):
 52 |     """
 53 |     Get kernel density scores
 54 |     :param model: 
 55 |     :param X_train: 
 56 |     :param Y_train: 
 57 |     :param X_test: 
 58 |     :param X_test_noisy: 
 59 |     :param X_test_adv: 
 60 |     :return: artifacts: positive and negative examples with kd values, 
 61 |             labels: adversarial (label: 1) and normal/noisy (label: 0) examples
 62 |     """
 63 |     # Get deep feature representations
 64 |     print('Getting deep feature representations...')
 65 |     X_train_features = get_deep_representations(model, X_train,
 66 |                                                 batch_size=args.batch_size)
 67 |     X_test_normal_features = get_deep_representations(model, X_test,
 68 |                                                       batch_size=args.batch_size)
 69 |     X_test_noisy_features = get_deep_representations(model, X_test_noisy,
 70 |                                                      batch_size=args.batch_size)
 71 |     X_test_adv_features = get_deep_representations(model, X_test_adv,
 72 |                                                    batch_size=args.batch_size)
 73 |     # Train one KDE per class
 74 |     print('Training KDEs...')
 75 |     class_inds = {}
 76 |     for i in range(Y_train.shape[1]):
 77 |         class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0]
 78 |     kdes = {}
 79 |     warnings.warn("Using pre-set kernel bandwidths that were determined "
 80 |                   "optimal for the specific CNN models of the paper. If you've "
 81 |                   "changed your model, you'll need to re-optimize the "
 82 |                   "bandwidth.")
 83 |     print('bandwidth %.4f for %s' % (BANDWIDTHS[args.dataset], args.dataset))
 84 |     for i in range(Y_train.shape[1]):
 85 |         kdes[i] = KernelDensity(kernel='gaussian',
 86 |                                 bandwidth=BANDWIDTHS[args.dataset]) \
 87 |             .fit(X_train_features[class_inds[i]])
 88 |     # Get model predictions
 89 |     print('Computing model predictions...')
 90 |     preds_test_normal = model.predict_classes(X_test, verbose=0,
 91 |                                               batch_size=args.batch_size)
 92 |     preds_test_noisy = model.predict_classes(X_test_noisy, verbose=0,
 93 |                                              batch_size=args.batch_size)
 94 |     preds_test_adv = model.predict_classes(X_test_adv, verbose=0,
 95 |                                            batch_size=args.batch_size)
 96 |     # Get density estimates
 97 |     print('computing densities...')
 98 |     densities_normal = score_samples(
 99 |         kdes,
100 |         X_test_normal_features,
101 |         preds_test_normal
102 |     )
103 |     densities_noisy = score_samples(
104 |         kdes,
105 |         X_test_noisy_features,
106 |         preds_test_noisy
107 |     )
108 |     densities_adv = score_samples(
109 |         kdes,
110 |         X_test_adv_features,
111 |         preds_test_adv
112 |     )
113 | 
114 |     print("densities_normal:", densities_normal.shape)
115 |     print("densities_adv:", densities_adv.shape)
116 |     print("densities_noisy:", densities_noisy.shape)
117 | 
118 |     ## skip the normalization, you may want to try different normalizations later
119 |     ## so at this step, just save the raw values
120 |     # densities_normal_z, densities_adv_z, densities_noisy_z = normalize(
121 |     #     densities_normal,
122 |     #     densities_adv,
123 |     #     densities_noisy
124 |     # )
125 | 
126 |     densities_pos = densities_adv
127 |     densities_neg = np.concatenate((densities_normal, densities_noisy))
128 |     artifacts, labels = merge_and_generate_labels(densities_pos, densities_neg)
129 | 
130 |     return artifacts, labels
131 | 
132 | def get_bu(model, X_test, X_test_noisy, X_test_adv):
133 |     """
134 |     Get Bayesian uncertainty scores
135 |     :param model: 
136 |     :param X_train: 
137 |     :param Y_train: 
138 |     :param X_test: 
139 |     :param X_test_noisy: 
140 |     :param X_test_adv: 
141 |     :return: artifacts: positive and negative examples with bu values, 
142 |             labels: adversarial (label: 1) and normal/noisy (label: 0) examples
143 |     """
144 |     print('Getting Monte Carlo dropout variance predictions...')
145 |     uncerts_normal = get_mc_predictions(model, X_test,
146 |                                         batch_size=args.batch_size) \
147 |         .var(axis=0).mean(axis=1)
148 |     uncerts_noisy = get_mc_predictions(model, X_test_noisy,
149 |                                        batch_size=args.batch_size) \
150 |         .var(axis=0).mean(axis=1)
151 |     uncerts_adv = get_mc_predictions(model, X_test_adv,
152 |                                      batch_size=args.batch_size) \
153 |         .var(axis=0).mean(axis=1)
154 | 
155 |     print("uncerts_normal:", uncerts_normal.shape)
156 |     print("uncerts_noisy:", uncerts_noisy.shape)
157 |     print("uncerts_adv:", uncerts_adv.shape)
158 | 
159 |     ## skip the normalization, you may want to try different normalizations later
160 |     ## so at this step, just save the raw values
161 |     # uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize(
162 |     #     uncerts_normal,
163 |     #     uncerts_adv,
164 |     #     uncerts_noisy
165 |     # )
166 | 
167 |     uncerts_pos = uncerts_adv
168 |     uncerts_neg = np.concatenate((uncerts_normal, uncerts_noisy))
169 |     artifacts, labels = merge_and_generate_labels(uncerts_pos, uncerts_neg)
170 | 
171 |     return artifacts, labels
172 | 
173 | def get_lid(model, X_test, X_test_noisy, X_test_adv, k=10, batch_size=100, dataset='mnist'):
174 |     """
175 |     Get local intrinsic dimensionality
176 |     :param model: 
177 |     :param X_train: 
178 |     :param Y_train: 
179 |     :param X_test: 
180 |     :param X_test_noisy: 
181 |     :param X_test_adv: 
182 |     :return: artifacts: positive and negative examples with lid values, 
183 |             labels: adversarial (label: 1) and normal/noisy (label: 0) examples
184 |     """
185 |     print('Extract local intrinsic dimensionality: k = %s' % k)
186 |     lids_normal, lids_noisy, lids_adv = get_lids_random_batch(model, X_test, X_test_noisy,
187 |                                                               X_test_adv, dataset, k, batch_size)
188 |     print("lids_normal:", lids_normal.shape)
189 |     print("lids_noisy:", lids_noisy.shape)
190 |     print("lids_adv:", lids_adv.shape)
191 | 
192 |     ## skip the normalization, you may want to try different normalizations later
193 |     ## so at this step, just save the raw values
194 |     # lids_normal_z, lids_adv_z, lids_noisy_z = normalize(
195 |     #     lids_normal,
196 |     #     lids_adv,
197 |     #     lids_noisy
198 |     # )
199 | 
200 |     lids_pos = lids_adv
201 |     lids_neg = np.concatenate((lids_normal, lids_noisy))
202 |     artifacts, labels = merge_and_generate_labels(lids_pos, lids_neg)
203 | 
204 |     return artifacts, labels
205 | 
206 | def get_kmeans(model, X_test, X_test_noisy, X_test_adv, k=10, batch_size=100, dataset='mnist'):
207 |     """
208 |     Calculate the average distance to k nearest neighbours as a feature.
209 |     This is used to compare density vs LID. Why density doesn't work?
210 |     :param model: 
211 |     :param X_train: 
212 |     :param Y_train: 
213 |     :param X_test: 
214 |     :param X_test_noisy: 
215 |     :param X_test_adv: 
216 |     :return: artifacts: positive and negative examples with lid values, 
217 |             labels: adversarial (label: 1) and normal/noisy (label: 0) examples
218 |     """
219 |     print('Extract k means feature: k = %s' % k)
220 |     kms_normal, kms_noisy, kms_adv = get_kmeans_random_batch(model, X_test, X_test_noisy,
221 |                                                               X_test_adv, dataset, k, batch_size,
222 |                                                              pca=True)
223 |     print("kms_normal:", kms_normal.shape)
224 |     print("kms_noisy:", kms_noisy.shape)
225 |     print("kms_adv:", kms_adv.shape)
226 | 
227 |     ## skip the normalization, you may want to try different normalizations later
228 |     ## so at this step, just save the raw values
229 |     # kms_normal_z, kms_noisy_z, kms_adv_z = normalize(
230 |     #     kms_normal,
231 |     #     kms_noisy,
232 |     #     kms_adv
233 |     # )
234 | 
235 |     kms_pos = kms_adv
236 |     kms_neg = np.concatenate((kms_normal, kms_noisy))
237 |     artifacts, labels = merge_and_generate_labels(kms_pos, kms_neg)
238 | 
239 |     return artifacts, labels
240 | 
241 | def main(args):
242 |     assert args.dataset in ['mnist', 'cifar', 'svhn'], \
243 |         "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
244 |     assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all'], \
245 |         "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
246 |         "'jsma' or 'cw-l2'"
247 |     assert args.characteristic in ['kd', 'bu', 'lid', 'km', 'all'], \
248 |         "Characteristic(s) to use 'kd', 'bu', 'lid', 'km', 'all'"
249 |     model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset)
250 |     assert os.path.isfile(model_file), \
251 |         'model file not found... must first train model using train_model.py.'
252 |     adv_file = os.path.join(PATH_DATA, "Adv_%s_%s.npy" % (args.dataset, args.attack))
253 |     assert os.path.isfile(adv_file), \
254 |         'adversarial sample file not found... must first craft adversarial ' \
255 |         'samples using craft_adv_samples.py'
256 | 
257 |     print('Loading the data and model...')
258 |     # Load the model
259 |     model = load_model(model_file)
260 |     # Load the dataset
261 |     X_train, Y_train, X_test, Y_test = get_data(args.dataset)
262 |     # Check attack type, select adversarial and noisy samples accordingly
263 |     print('Loading noisy and adversarial samples...')
264 |     if args.attack == 'all':
265 |         # TODO: implement 'all' option
266 |         # X_test_adv = ...
267 |         # X_test_noisy = ...
268 |         raise NotImplementedError("'All' types detector not yet implemented.")
269 |     else:
270 |         # Load adversarial samples
271 |         X_test_adv = np.load(adv_file)
272 |         print("X_test_adv: ", X_test_adv.shape)
273 | 
274 |         # as there are some parameters to tune for noisy example, so put the generation
275 |         # step here instead of the adversarial step which can take many hours
276 |         noisy_file = os.path.join(PATH_DATA, 'Noisy_%s_%s.npy' % (args.dataset, args.attack))
277 |         if os.path.isfile(noisy_file):
278 |             X_test_noisy = np.load(noisy_file)
279 |         else:
280 |             # Craft an equal number of noisy samples
281 |             print('Crafting %s noisy samples. ' % args.dataset)
282 |             X_test_noisy = get_noisy_samples(X_test, X_test_adv, args.dataset, args.attack)
283 |             np.save(noisy_file, X_test_noisy)
284 | 
285 |     # Check model accuracies on each sample type
286 |     for s_type, dataset in zip(['normal', 'noisy', 'adversarial'],
287 |                                [X_test, X_test_noisy, X_test_adv]):
288 |         _, acc = model.evaluate(dataset, Y_test, batch_size=args.batch_size,
289 |                                 verbose=0)
290 |         print("Model accuracy on the %s test set: %0.2f%%" %
291 |               (s_type, 100 * acc))
292 |         # Compute and display average perturbation sizes
293 |         if not s_type == 'normal':
294 |             l2_diff = np.linalg.norm(
295 |                 dataset.reshape((len(X_test), -1)) -
296 |                 X_test.reshape((len(X_test), -1)),
297 |                 axis=1
298 |             ).mean()
299 |             print("Average L-2 perturbation size of the %s test set: %0.2f" %
300 |                   (s_type, l2_diff))
301 | 
302 |     # Refine the normal, noisy and adversarial sets to only include samples for
303 |     # which the original version was correctly classified by the model
304 |     preds_test = model.predict_classes(X_test, verbose=0,
305 |                                        batch_size=args.batch_size)
306 |     inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0]
307 |     print("Number of correctly predict images: %s" % (len(inds_correct)))
308 | 
309 |     X_test = X_test[inds_correct]
310 |     X_test_noisy = X_test_noisy[inds_correct]
311 |     X_test_adv = X_test_adv[inds_correct]
312 |     print("X_test: ", X_test.shape)
313 |     print("X_test_noisy: ", X_test_noisy.shape)
314 |     print("X_test_adv: ", X_test_adv.shape)
315 | 
316 |     if args.characteristic == 'kd':
317 |         # extract kernel density
318 |         characteristics, labels = get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv)
319 |         print("KD: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape)
320 | 
321 |         # save to file
322 |         bandwidth = BANDWIDTHS[args.dataset]
323 |         file_name = os.path.join(PATH_DATA, 'kd_%s_%s_%.4f.npy' % (args.dataset, args.attack, bandwidth))
324 |         data = np.concatenate((characteristics, labels), axis=1)
325 |         np.save(file_name, data)
326 |     elif args.characteristic == 'bu':
327 |         # extract Bayesian uncertainty
328 |         characteristics, labels = get_bu(model, X_test, X_test_noisy, X_test_adv)
329 |         print("BU: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape)
330 | 
331 |         # save to file
332 |         file_name = os.path.join(PATH_DATA, 'bu_%s_%s.npy' % (args.dataset, args.attack))
333 |         data = np.concatenate((characteristics, labels), axis=1)
334 |         np.save(file_name, data)
335 |     elif args.characteristic == 'lid':
336 |         # extract local intrinsic dimensionality
337 |         characteristics, labels = get_lid(model, X_test, X_test_noisy, X_test_adv,
338 |                                     args.k_nearest, args.batch_size, args.dataset)
339 |         print("LID: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape)
340 | 
341 |         # save to file
342 |         # file_name = os.path.join(PATH_DATA, 'lid_%s_%s.npy' % (args.dataset, args.attack))
343 |         file_name = os.path.join('../data_grid_search/lid_large_batch/', 'lid_%s_%s_%s.npy' %
344 |                                  (args.dataset, args.attack, args.k_nearest))
345 | 
346 |         data = np.concatenate((characteristics, labels), axis=1)
347 |         np.save(file_name, data)
348 |     elif args.characteristic == 'km':
349 |         # extract k means distance
350 |         characteristics, labels = get_kmeans(model, X_test, X_test_noisy, X_test_adv,
351 |                                     args.k_nearest, args.batch_size, args.dataset)
352 |         print("K-Mean: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape)
353 | 
354 |         # save to file
355 |         file_name = os.path.join(PATH_DATA, 'km_pca_%s_%s.npy' % (args.dataset, args.attack))
356 |         data = np.concatenate((characteristics, labels), axis=1)
357 |         np.save(file_name, data)
358 |     elif args.characteristic == 'all':
359 |         # extract kernel density
360 |         characteristics, labels = get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv)
361 |         file_name = os.path.join(PATH_DATA, 'kd_%s_%s.npy' % (args.dataset, args.attack))
362 |         data = np.concatenate((characteristics, labels), axis=1)
363 |         np.save(file_name, data)
364 | 
365 |         # extract Bayesian uncertainty
366 |         characteristics, labels = get_bu(model, X_test, X_test_noisy, X_test_adv)
367 |         file_name = os.path.join(PATH_DATA, 'bu_%s_%s.npy' % (args.dataset, args.attack))
368 |         data = np.concatenate((characteristics, labels), axis=1)
369 |         np.save(file_name, data)
370 | 
371 |         # extract local intrinsic dimensionality
372 |         characteristics, labels = get_lid(model, X_test, X_test_noisy, X_test_adv,
373 |                                     args.k_nearest, args.batch_size, args.dataset)
374 |         file_name = os.path.join(PATH_DATA, 'lid_%s_%s.npy' % (args.dataset, args.attack))
375 |         data = np.concatenate((characteristics, labels), axis=1)
376 |         np.save(file_name, data)
377 | 
378 |         # extract k means distance
379 |         # artifcharacteristics, labels = get_kmeans(model, X_test, X_test_noisy, X_test_adv,
380 |         #                                args.k_nearest, args.batch_size, args.dataset)
381 |         # file_name = os.path.join(PATH_DATA, 'km_%s_%s.npy' % (args.dataset, args.attack))
382 |         # data = np.concatenate((characteristics, labels), axis=1)
383 |         # np.save(file_name, data)
384 | 
385 | 
386 | if __name__ == "__main__":
387 |     parser = argparse.ArgumentParser()
388 |     parser.add_argument(
389 |         '-d', '--dataset',
390 |         help="Dataset to use; either 'mnist', 'cifar' or 'svhn'",
391 |         required=True, type=str
392 |     )
393 |     parser.add_argument(
394 |         '-a', '--attack',
395 |         help="Attack to use; either 'fgsm', 'jsma', 'bim-b', 'jsma', 'cw-l2' "
396 |              "or 'all'",
397 |         required=True, type=str
398 |     )
399 |     parser.add_argument(
400 |         '-r', '--characteristic',
401 |         help="Characteristic(s) to use 'kd', 'bu', 'lid' 'km' or 'all'",
402 |         required=True, type=str
403 |     )
404 |     parser.add_argument(
405 |         '-k', '--k_nearest',
406 |         help="The number of nearest neighbours to use; either 10, 20, 100 ",
407 |         required=False, type=int
408 |     )
409 |     parser.add_argument(
410 |         '-b', '--batch_size',
411 |         help="The batch size to use for training.",
412 |         required=False, type=int
413 |     )
414 |     parser.set_defaults(batch_size=100)
415 |     parser.set_defaults(k_nearest=20)
416 |     args = parser.parse_args()
417 |     main(args)
418 | 


--------------------------------------------------------------------------------
/train_model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import print_function
 3 | 
 4 | import argparse
 5 | from util import get_data, get_model, cross_entropy
 6 | from keras.preprocessing.image import ImageDataGenerator
 7 | import tensorflow as tf
 8 | 
 9 | 
10 | def train(dataset='mnist', batch_size=128, epochs=50):
11 |     """
12 |     Train one model with data augmentation: random padding+cropping and horizontal flip
13 |     :param args: 
14 |     :return: 
15 |     """
16 |     print('Data set: %s' % dataset)
17 |     X_train, Y_train, X_test, Y_test = get_data(dataset)
18 |     model = get_model(dataset)
19 |     model.compile(
20 |         loss='categorical_crossentropy',
21 |         optimizer='adadelta',
22 |         metrics=['accuracy']
23 |     )
24 |     
25 | #     # training without data augmentation
26 | #     model.fit(
27 | #         X_train, Y_train,
28 | #         epochs=epochs,
29 | #         batch_size=batch_size,
30 | #         shuffle=True,
31 | #         verbose=1,
32 | #         validation_data=(X_test, Y_test)
33 | #     )
34 | 
35 |     # training with data augmentation
36 |     # data augmentation
37 |     datagen = ImageDataGenerator(
38 |         rotation_range=20,
39 |         width_shift_range=0.2,
40 |         height_shift_range=0.2,
41 |         horizontal_flip=True)
42 |     
43 |     model.fit_generator(
44 |         datagen.flow(X_train, Y_train, batch_size=batch_size),
45 |         steps_per_epoch=len(X_train) / batch_size,
46 |         epochs=epochs,
47 |         verbose=1,
48 |         validation_data=(X_test, Y_test))
49 | 
50 |     model.save('data/model_%s.h5' % dataset)
51 | 
52 | def main(args):
53 |     """
54 |     Train model with data augmentation: random padding+cropping and horizontal flip
55 |     :param args: 
56 |     :return: 
57 |     """
58 |     assert args.dataset in ['mnist', 'cifar', 'svhn', 'all'], \
59 |         "dataset parameter must be either 'mnist', 'cifar', 'svhn' or all"
60 |     if args.dataset == 'all':
61 |         for dataset in ['mnist', 'cifar', 'svhn']:
62 |             train(dataset, args.batch_size, args.epochs)
63 |     else:
64 |         train(args.dataset, args.batch_size, args.epochs)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     parser = argparse.ArgumentParser()
69 |     parser.add_argument(
70 |         '-d', '--dataset',
71 |         help="Dataset to use; either 'mnist', 'cifar', 'svhn' or 'all'",
72 |         required=True, type=str
73 |     )
74 |     parser.add_argument(
75 |         '-e', '--epochs',
76 |         help="The number of epochs to train for.",
77 |         required=False, type=int
78 |     )
79 |     parser.add_argument(
80 |         '-b', '--batch_size',
81 |         help="The batch size to use for training.",
82 |         required=False, type=int
83 |     )
84 |     parser.set_defaults(epochs=120)
85 |     parser.set_defaults(batch_size=100)
86 |     args = parser.parse_args()
87 |     main(args)
88 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import multiprocessing as mp
  6 | from subprocess import call
  7 | import warnings
  8 | import numpy as np
  9 | import scipy.io as sio
 10 | from tqdm import tqdm
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.metrics import roc_curve, auc, roc_auc_score
 13 | from sklearn.linear_model import LogisticRegressionCV
 14 | from sklearn.preprocessing import scale
 15 | import keras.backend as K
 16 | from keras.datasets import mnist, cifar10
 17 | from keras.utils import np_utils
 18 | from keras.models import Sequential
 19 | from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
 20 | from keras.layers import Conv2D, MaxPooling2D
 21 | from keras.regularizers import l2
 22 | import tensorflow as tf
 23 | from scipy.spatial.distance import pdist, cdist, squareform
 24 | from keras import regularizers
 25 | from sklearn.decomposition import PCA
 26 | 
 27 | # Gaussian noise scale sizes that were determined so that the average
 28 | # L-2 perturbation size is equal to that of the adversarial samples
 29 | # mnist roughly L2_difference/20
 30 | # cifar roughly L2_difference/54
 31 | # svhn roughly L2_difference/60
 32 | # be very carefully with these settings, tune to have noisy/adv have the same L2-norm
 33 | # otherwise artifact will lose its accuracy
 34 | # STDEVS = {
 35 | #     'mnist': {'fgsm': 0.264, 'bim-a': 0.111, 'bim-b': 0.184, 'cw-l2': 0.588},
 36 | #     'cifar': {'fgsm': 0.0504, 'bim-a': 0.0087, 'bim-b': 0.0439, 'cw-l2': 0.015},
 37 | #     'svhn': {'fgsm': 0.1332, 'bim-a': 0.015, 'bim-b': 0.1024, 'cw-l2': 0.0379}
 38 | # }
 39 | 
 40 | # fined tuned again when retrained all models with X in [-0.5, 0.5]
 41 | STDEVS = {
 42 |     'mnist': {'fgsm': 0.271, 'bim-a': 0.111, 'bim-b': 0.167, 'cw-l2': 0.207},
 43 |     'cifar': {'fgsm': 0.0504, 'bim-a': 0.0084, 'bim-b': 0.0428, 'cw-l2': 0.007},
 44 |     'svhn': {'fgsm': 0.133, 'bim-a': 0.0155, 'bim-b': 0.095, 'cw-l2': 0.008}
 45 | }
 46 | 
 47 | # CLIP_MIN = 0.0
 48 | # CLIP_MAX = 1.0
 49 | CLIP_MIN = -0.5
 50 | CLIP_MAX = 0.5
 51 | PATH_DATA = "data/"
 52 | 
 53 | # Set random seed
 54 | np.random.seed(0)
 55 | 
 56 | 
 57 | def get_data(dataset='mnist'):
 58 |     """
 59 |     images in [-0.5, 0.5] (instead of [0, 1]) which suits C&W attack and generally gives better performance
 60 |     
 61 |     :param dataset:
 62 |     :return: 
 63 |     """
 64 |     assert dataset in ['mnist', 'cifar', 'svhn'], \
 65 |         "dataset parameter must be either 'mnist' 'cifar' or 'svhn'"
 66 |     if dataset == 'mnist':
 67 |         # the data, shuffled and split between train and test sets
 68 |         (X_train, y_train), (X_test, y_test) = mnist.load_data()
 69 |         # reshape to (n_samples, 28, 28, 1)
 70 |         X_train = X_train.reshape(-1, 28, 28, 1)
 71 |         X_test = X_test.reshape(-1, 28, 28, 1)
 72 |     elif dataset == 'cifar':
 73 |         # the data, shuffled and split between train and test sets
 74 |         (X_train, y_train), (X_test, y_test) = cifar10.load_data()
 75 |     else:
 76 |         if not os.path.isfile(os.path.join(PATH_DATA, "svhn_train.mat")):
 77 |             print('Downloading SVHN train set...')
 78 |             call(
 79 |                 "curl -o ../data/svhn_train.mat "
 80 |                 "http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
 81 |                 shell=True
 82 |             )
 83 |         if not os.path.isfile(os.path.join(PATH_DATA, "svhn_test.mat")):
 84 |             print('Downloading SVHN test set...')
 85 |             call(
 86 |                 "curl -o ../data/svhn_test.mat "
 87 |                 "http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
 88 |                 shell=True
 89 |             )
 90 |         train = sio.loadmat(os.path.join(PATH_DATA,'svhn_train.mat'))
 91 |         test = sio.loadmat(os.path.join(PATH_DATA, 'svhn_test.mat'))
 92 |         X_train = np.transpose(train['X'], axes=[3, 0, 1, 2])
 93 |         X_test = np.transpose(test['X'], axes=[3, 0, 1, 2])
 94 |         # reshape (n_samples, 1) to (n_samples,) and change 1-index
 95 |         # to 0-index
 96 |         y_train = np.reshape(train['y'], (-1,)) - 1
 97 |         y_test = np.reshape(test['y'], (-1,)) - 1
 98 | 
 99 |     # cast pixels to floats, normalize to [0, 1] range
100 |     X_train = X_train.astype('float32')
101 |     X_test = X_test.astype('float32')
102 |     X_train = (X_train/255.0) - (1.0 - CLIP_MAX)
103 |     X_test = (X_test/255.0) - (1.0 - CLIP_MAX)
104 | 
105 |     # one-hot-encode the labels
106 |     Y_train = np_utils.to_categorical(y_train, 10)
107 |     Y_test = np_utils.to_categorical(y_test, 10)
108 | 
109 |     print("X_train:", X_train.shape)
110 |     print("Y_train:", Y_train.shape)
111 |     print("X_test:", X_test.shape)
112 |     print("Y_test", Y_test.shape)
113 | 
114 |     return X_train, Y_train, X_test, Y_test
115 | 
116 | def get_model(dataset='mnist', softmax=True):
117 |     """
118 |     Takes in a parameter indicating which model type to use ('mnist',
119 |     'cifar' or 'svhn') and returns the appropriate Keras model.
120 |     :param dataset: A string indicating which dataset we are building
121 |                     a model for.
122 |     :param softmax: if add softmax to the last layer.
123 |     :return: The model; a Keras 'Sequential' instance.
124 |     """
125 |     assert dataset in ['mnist', 'cifar', 'svhn'], \
126 |         "dataset parameter must be either 'mnist' 'cifar' or 'svhn'"
127 |     if dataset == 'mnist':
128 |         # MNIST model: 0, 2, 7, 10
129 |         layers = [
130 |             Conv2D(64, (3, 3), padding='valid', input_shape=(28, 28, 1)),  # 0
131 |             Activation('relu'),  # 1
132 |             BatchNormalization(), # 2
133 |             Conv2D(64, (3, 3)),  # 3
134 |             Activation('relu'),  # 4
135 |             BatchNormalization(), # 5
136 |             MaxPooling2D(pool_size=(2, 2)),  # 6
137 |             Dropout(0.5),  # 7
138 |             Flatten(),  # 8
139 |             Dense(128),  # 9            
140 |             Activation('relu'),  # 10
141 |             BatchNormalization(), # 11
142 |             Dropout(0.5),  # 12
143 |             Dense(10),  # 13
144 |         ]
145 |     elif dataset == 'cifar':
146 |         # CIFAR-10 model
147 |         layers = [
148 |             Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3)),  # 0
149 |             Activation('relu'),  # 1
150 |             BatchNormalization(), # 2
151 |             Conv2D(32, (3, 3), padding='same'),  # 3
152 |             Activation('relu'),  # 4
153 |             BatchNormalization(), # 5
154 |             MaxPooling2D(pool_size=(2, 2)),  # 6
155 |             
156 |             Conv2D(64, (3, 3), padding='same'),  # 7
157 |             Activation('relu'),  # 8
158 |             BatchNormalization(), # 9
159 |             Conv2D(64, (3, 3), padding='same'),  # 10
160 |             Activation('relu'),  # 11
161 |             BatchNormalization(), # 12
162 |             MaxPooling2D(pool_size=(2, 2)),  # 13
163 |             
164 |             Conv2D(128, (3, 3), padding='same'),  # 14
165 |             Activation('relu'),  # 15
166 |             BatchNormalization(), # 16
167 |             Conv2D(128, (3, 3), padding='same'),  # 17
168 |             Activation('relu'),  # 18
169 |             BatchNormalization(), # 19
170 |             MaxPooling2D(pool_size=(2, 2)),  # 20
171 |             
172 |             Flatten(),  # 21
173 |             Dropout(0.5),  # 22
174 |             
175 |             Dense(1024, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)),  # 23
176 |             Activation('relu'),  # 24
177 |             BatchNormalization(), # 25
178 |             Dropout(0.5),  # 26
179 |             Dense(512, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)),  # 27
180 |             Activation('relu'),  # 28
181 |             BatchNormalization(), # 29
182 |             Dropout(0.5),  # 30
183 |             Dense(10),  # 31
184 |         ]
185 |     else:
186 |         # SVHN model
187 |         layers = [
188 |             Conv2D(64, (3, 3), padding='valid', input_shape=(32, 32, 3)),  # 0
189 |             Activation('relu'),  # 1
190 |             BatchNormalization(), # 2
191 |             Conv2D(64, (3, 3)),  # 3
192 |             Activation('relu'),  # 4
193 |             BatchNormalization(), # 5
194 |             MaxPooling2D(pool_size=(2, 2)),  # 6
195 |             
196 |             Dropout(0.5),  # 7
197 |             Flatten(),  # 8
198 |             
199 |             Dense(512),  # 9
200 |             Activation('relu'),  # 10
201 |             BatchNormalization(), # 11
202 |             Dropout(0.5),  # 12
203 |             
204 |             Dense(128),  # 13
205 |             Activation('relu'),  # 14
206 |             BatchNormalization(), # 15
207 |             Dropout(0.5),  # 16
208 |             Dense(10),  # 17
209 |         ]
210 | 
211 |     model = Sequential()
212 |     for layer in layers:
213 |         model.add(layer)
214 |     if softmax:
215 |         model.add(Activation('softmax'))
216 | 
217 |     return model
218 | 
219 | def cross_entropy(y_true, y_pred):
220 |     return tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
221 | 
222 | def lid_term(logits, batch_size=100):
223 |     """Calculate LID loss term for a minibatch of logits
224 | 
225 |     :param logits: 
226 |     :return: 
227 |     """
228 |     # y_pred = tf.nn.softmax(logits)
229 |     y_pred = logits
230 | 
231 |     # calculate pairwise distance
232 |     r = tf.reduce_sum(tf.square(y_pred), axis=1)
233 |     # turn r into column vector
234 |     r = tf.reshape(r, [-1, 1])
235 |     D = r - 2 * tf.matmul(y_pred, tf.transpose(y_pred)) + tf.transpose(r)
236 | 
237 |     # find the k nearest neighbor
238 |     D1 = tf.sqrt(D + 1e-9)
239 |     D2, _ = tf.nn.top_k(-D1, k=21, sorted=True)
240 |     D3 = -D2[:, 1:]
241 | 
242 |     m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1]))
243 |     v_log = tf.reduce_sum(tf.log(m + 1e-9), axis=1)  # to avoid nan
244 |     lids = -20 / v_log
245 | 
246 |     ## batch normalize lids
247 |     # lids = tf.nn.l2_normalize(lids, dim=0, epsilon=1e-12)
248 | 
249 |     return lids
250 | 
251 | def lid_adv_term(clean_logits, adv_logits, batch_size=100):
252 |     """Calculate LID loss term for a minibatch of advs logits
253 | 
254 |     :param logits: clean logits
255 |     :param A_logits: adversarial logits
256 |     :return: 
257 |     """
258 |     # y_pred = tf.nn.softmax(logits)
259 |     c_pred = tf.reshape(clean_logits, (batch_size, -1))
260 |     a_pred = tf.reshape(adv_logits, (batch_size, -1))
261 | 
262 |     # calculate pairwise distance
263 |     r_a = tf.reduce_sum(tf.square(a_pred), axis=1)
264 |     # turn r_a into column vector
265 |     r_a = tf.reshape(r_a, [-1, 1])
266 | 
267 |     r_c = tf.reduce_sum(tf.square(c_pred), axis=1)
268 |     # turn r_c into row vector
269 |     r_c = tf.reshape(r_c, [1, -1])
270 | 
271 |     D = r_a - 2 * tf.matmul(a_pred, tf.transpose(c_pred)) + r_c
272 | 
273 |     # find the k nearest neighbor
274 |     D1 = tf.sqrt(D + 1e-9)
275 |     D2, _ = tf.nn.top_k(-D1, k=21, sorted=True)
276 |     D3 = -D2[:, 1:]
277 | 
278 |     m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1]))
279 |     v_log = tf.reduce_sum(tf.log(m + 1e-9), axis=1)  # to avoid nan
280 |     lids = -20 / v_log
281 | 
282 |     ## batch normalize lids
283 |     lids = tf.nn.l2_normalize(lids, dim=0, epsilon=1e-12)
284 | 
285 |     return lids
286 | 
287 | def flip(x, nb_diff):
288 |     """
289 |     Helper function for get_noisy_samples
290 |     :param x:
291 |     :param nb_diff:
292 |     :return:
293 |     """
294 |     original_shape = x.shape
295 |     x = np.copy(np.reshape(x, (-1,)))
296 |     candidate_inds = np.where(x < CLIP_MAX)[0]
297 |     assert candidate_inds.shape[0] >= nb_diff
298 |     inds = np.random.choice(candidate_inds, nb_diff)
299 |     x[inds] = CLIP_MAX
300 | 
301 |     return np.reshape(x, original_shape)
302 | 
303 | 
304 | def get_noisy_samples(X_test, X_test_adv, dataset, attack):
305 |     """
306 |     TODO
307 |     :param X_test:
308 |     :param X_test_adv:
309 |     :param dataset:
310 |     :param attack:
311 |     :return:
312 |     """
313 |     if attack in ['jsma', 'cw-l0']:
314 |         X_test_noisy = np.zeros_like(X_test)
315 |         for i in range(len(X_test)):
316 |             # Count the number of pixels that are different
317 |             nb_diff = len(np.where(X_test[i] != X_test_adv[i])[0])
318 |             # Randomly flip an equal number of pixels (flip means move to max
319 |             # value of 1)
320 |             X_test_noisy[i] = flip(X_test[i], nb_diff)
321 |     else:
322 |         warnings.warn("Important: using pre-set Gaussian scale sizes to craft noisy "
323 |                       "samples. You will definitely need to manually tune the scale "
324 |                       "according to the L2 print below, otherwise the result "
325 |                       "will inaccurate. In future scale sizes will be inferred "
326 |                       "automatically. For now, manually tune the scales around "
327 |                       "mnist: L2/20.0, cifar: L2/54.0, svhn: L2/60.0")
328 |         # Add Gaussian noise to the samples
329 |         # print(STDEVS[dataset][attack])
330 |         X_test_noisy = np.minimum(
331 |             np.maximum(
332 |                 X_test + np.random.normal(loc=0, scale=STDEVS[dataset][attack],
333 |                                           size=X_test.shape),
334 |                 CLIP_MIN
335 |             ),
336 |             CLIP_MAX
337 |         )
338 | 
339 |     return X_test_noisy
340 | 
341 | 
342 | def get_mc_predictions(model, X, nb_iter=50, batch_size=256):
343 |     """
344 |     TODO
345 |     :param model:
346 |     :param X:
347 |     :param nb_iter:
348 |     :param batch_size:
349 |     :return:
350 |     """
351 |     output_dim = model.layers[-1].output.shape[-1].value
352 |     get_output = K.function(
353 |         [model.layers[0].input, K.learning_phase()],
354 |         [model.layers[-1].output]
355 |     )
356 | 
357 |     def predict():
358 |         n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
359 |         output = np.zeros(shape=(len(X), output_dim))
360 |         for i in range(n_batches):
361 |             output[i * batch_size:(i + 1) * batch_size] = \
362 |                 get_output([X[i * batch_size:(i + 1) * batch_size], 1])[0]
363 |         return output
364 | 
365 |     preds_mc = []
366 |     for i in tqdm(range(nb_iter)):
367 |         preds_mc.append(predict())
368 | 
369 |     return np.asarray(preds_mc)
370 | 
371 | 
372 | def get_deep_representations(model, X, batch_size=256):
373 |     """
374 |     TODO
375 |     :param model:
376 |     :param X:
377 |     :param batch_size:
378 |     :return:
379 |     """
380 |     # last hidden layer is always at index -4
381 |     output_dim = model.layers[-4].output.shape[-1].value
382 |     get_encoding = K.function(
383 |         [model.layers[0].input, K.learning_phase()],
384 |         [model.layers[-4].output]
385 |     )
386 | 
387 |     n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
388 |     output = np.zeros(shape=(len(X), output_dim))
389 |     for i in range(n_batches):
390 |         output[i * batch_size:(i + 1) * batch_size] = \
391 |             get_encoding([X[i * batch_size:(i + 1) * batch_size], 0])[0]
392 | 
393 |     return output
394 | 
395 | def get_layer_wise_activations(model, dataset):
396 |     """
397 |     Get the deep activation outputs.
398 |     :param model:
399 |     :param dataset: 'mnist', 'cifar', 'svhn', has different submanifolds architectures  
400 |     :return: 
401 |     """
402 |     assert dataset in ['mnist', 'cifar', 'svhn'], \
403 |         "dataset parameter must be either 'mnist' 'cifar' or 'svhn'"
404 |     if dataset == 'mnist':
405 |         # mnist model
406 |         acts = [model.layers[0].input]
407 |         acts.extend([layer.output for layer in model.layers])
408 |     elif dataset == 'cifar':
409 |         # cifar-10 model
410 |         acts = [model.layers[0].input]
411 |         acts.extend([layer.output for layer in model.layers])
412 |     else:
413 |         # svhn model
414 |         acts = [model.layers[0].input]
415 |         acts.extend([layer.output for layer in model.layers])
416 |     return acts
417 | 
418 | # lid of a single query point x
419 | def mle_single(data, x, k=20):
420 |     data = np.asarray(data, dtype=np.float32)
421 |     x = np.asarray(x, dtype=np.float32)
422 |     # print('x.ndim',x.ndim)
423 |     if x.ndim == 1:
424 |         x = x.reshape((-1, x.shape[0]))
425 |     # dim = x.shape[1]
426 | 
427 |     k = min(k, len(data)-1)
428 |     f = lambda v: - k / np.sum(np.log(v/v[-1]))
429 |     a = cdist(x, data)
430 |     a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1]
431 |     a = np.apply_along_axis(f, axis=1, arr=a)
432 |     return a[0]
433 | 
434 | # lid of a batch of query points X
435 | def mle_batch(data, batch, k):
436 |     data = np.asarray(data, dtype=np.float32)
437 |     batch = np.asarray(batch, dtype=np.float32)
438 | 
439 |     k = min(k, len(data)-1)
440 |     f = lambda v: - k / np.sum(np.log(v/v[-1]))
441 |     a = cdist(batch, data)
442 |     a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1]
443 |     a = np.apply_along_axis(f, axis=1, arr=a)
444 |     return a
445 | 
446 | # mean distance of x to its k nearest neighbours
447 | def kmean_batch(data, batch, k):
448 |     data = np.asarray(data, dtype=np.float32)
449 |     batch = np.asarray(batch, dtype=np.float32)
450 | 
451 |     k = min(k, len(data)-1)
452 |     f = lambda v: np.mean(v)
453 |     a = cdist(batch, data)
454 |     a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1]
455 |     a = np.apply_along_axis(f, axis=1, arr=a)
456 |     return a
457 | 
458 | # mean distance of x to its k nearest neighbours
459 | def kmean_pca_batch(data, batch, k=10):
460 |     data = np.asarray(data, dtype=np.float32)
461 |     batch = np.asarray(batch, dtype=np.float32)
462 |     a = np.zeros(batch.shape[0])
463 |     for i in np.arange(batch.shape[0]):
464 |         tmp = np.concatenate((data, [batch[i]]))
465 |         tmp_pca = PCA(n_components=2).fit_transform(tmp)
466 |         a[i] = kmean_batch(tmp_pca[:-1], tmp_pca[-1], k=k)
467 |     return a
468 | 
469 | def get_lids_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100):
470 |     """
471 |     Get the local intrinsic dimensionality of each Xi in X_adv
472 |     estimated by k close neighbours in the random batch it lies in.
473 |     :param model:
474 |     :param X: normal images
475 |     :param X_noisy: noisy images
476 |     :param X_adv: advserial images    
477 |     :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures  
478 |     :param k: the number of nearest neighbours for LID estimation  
479 |     :param batch_size: default 100
480 |     :return: lids: LID of normal images of shape (num_examples, lid_dim)
481 |             lids_adv: LID of advs images of shape (num_examples, lid_dim)
482 |     """
483 |     # get deep representations
484 |     funcs = [K.function([model.layers[0].input, K.learning_phase()], [out])
485 |                  for out in get_layer_wise_activations(model, dataset)]
486 |     lid_dim = len(funcs)
487 |     print("Number of layers to estimate: ", lid_dim)
488 | 
489 |     def estimate(i_batch):
490 |         start = i_batch * batch_size
491 |         end = np.minimum(len(X), (i_batch + 1) * batch_size)
492 |         n_feed = end - start
493 |         lid_batch = np.zeros(shape=(n_feed, lid_dim))
494 |         lid_batch_adv = np.zeros(shape=(n_feed, lid_dim))
495 |         lid_batch_noisy = np.zeros(shape=(n_feed, lid_dim))
496 |         for i, func in enumerate(funcs):
497 |             X_act = func([X[start:end], 0])[0]
498 |             X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1))
499 |             # print("X_act: ", X_act.shape)
500 | 
501 |             X_adv_act = func([X_adv[start:end], 0])[0]
502 |             X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1))
503 |             # print("X_adv_act: ", X_adv_act.shape)
504 | 
505 |             X_noisy_act = func([X_noisy[start:end], 0])[0]
506 |             X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1))
507 |             # print("X_noisy_act: ", X_noisy_act.shape)
508 | 
509 |             # random clean samples
510 |             # Maximum likelihood estimation of local intrinsic dimensionality (LID)
511 |             lid_batch[:, i] = mle_batch(X_act, X_act, k=k)
512 |             # print("lid_batch: ", lid_batch.shape)
513 |             lid_batch_adv[:, i] = mle_batch(X_act, X_adv_act, k=k)
514 |             # print("lid_batch_adv: ", lid_batch_adv.shape)
515 |             lid_batch_noisy[:, i] = mle_batch(X_act, X_noisy_act, k=k)
516 |             # print("lid_batch_noisy: ", lid_batch_noisy.shape)
517 |         return lid_batch, lid_batch_noisy, lid_batch_adv
518 | 
519 |     lids = []
520 |     lids_adv = []
521 |     lids_noisy = []
522 |     n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
523 |     for i_batch in tqdm(range(n_batches)):
524 |         lid_batch, lid_batch_noisy, lid_batch_adv = estimate(i_batch)
525 |         lids.extend(lid_batch)
526 |         lids_adv.extend(lid_batch_adv)
527 |         lids_noisy.extend(lid_batch_noisy)
528 |         # print("lids: ", lids.shape)
529 |         # print("lids_adv: ", lids_noisy.shape)
530 |         # print("lids_noisy: ", lids_noisy.shape)
531 | 
532 |     lids = np.asarray(lids, dtype=np.float32)
533 |     lids_noisy = np.asarray(lids_noisy, dtype=np.float32)
534 |     lids_adv = np.asarray(lids_adv, dtype=np.float32)
535 | 
536 |     return lids, lids_noisy, lids_adv
537 | 
538 | def get_kmeans_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100, pca=False):
539 |     """
540 |     Get the mean distance of each Xi in X_adv to its k nearest neighbors.
541 | 
542 |     :param model:
543 |     :param X: normal images
544 |     :param X_noisy: noisy images
545 |     :param X_adv: advserial images    
546 |     :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures  
547 |     :param k: the number of nearest neighbours for LID estimation  
548 |     :param batch_size: default 100
549 |     :param pca: using pca or not, if True, apply pca to the referenced sample and a 
550 |             minibatch of normal samples, then compute the knn mean distance of the referenced sample.
551 |     :return: kms_normal: kmean of normal images (num_examples, 1)
552 |             kms_noisy: kmean of normal images (num_examples, 1)
553 |             kms_adv: kmean of adv images (num_examples, 1)
554 |     """
555 |     # get deep representations
556 |     funcs = [K.function([model.layers[0].input, K.learning_phase()], [model.layers[-2].output])]
557 |     km_dim = len(funcs)
558 |     print("Number of layers to use: ", km_dim)
559 | 
560 |     def estimate(i_batch):
561 |         start = i_batch * batch_size
562 |         end = np.minimum(len(X), (i_batch + 1) * batch_size)
563 |         n_feed = end - start
564 |         km_batch = np.zeros(shape=(n_feed, km_dim))
565 |         km_batch_adv = np.zeros(shape=(n_feed, km_dim))
566 |         km_batch_noisy = np.zeros(shape=(n_feed, km_dim))
567 |         for i, func in enumerate(funcs):
568 |             X_act = func([X[start:end], 0])[0]
569 |             X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1))
570 |             # print("X_act: ", X_act.shape)
571 | 
572 |             X_adv_act = func([X_adv[start:end], 0])[0]
573 |             X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1))
574 |             # print("X_adv_act: ", X_adv_act.shape)
575 | 
576 |             X_noisy_act = func([X_noisy[start:end], 0])[0]
577 |             X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1))
578 |             # print("X_noisy_act: ", X_noisy_act.shape)
579 | 
580 |             # Maximum likelihood estimation of local intrinsic dimensionality (LID)
581 |             if pca:
582 |                 km_batch[:, i] = kmean_pca_batch(X_act, X_act, k=k)
583 |             else:
584 |                 km_batch[:, i] = kmean_batch(X_act, X_act, k=k)
585 |             # print("lid_batch: ", lid_batch.shape)
586 |             if pca:
587 |                 km_batch_adv[:, i] = kmean_pca_batch(X_act, X_adv_act, k=k)
588 |             else:
589 |                 km_batch_adv[:, i] = kmean_batch(X_act, X_adv_act, k=k)
590 |             # print("lid_batch_adv: ", lid_batch_adv.shape)
591 |             if pca:
592 |                 km_batch_noisy[:, i] = kmean_pca_batch(X_act, X_noisy_act, k=k)
593 |             else:
594 |                 km_batch_noisy[:, i] = kmean_batch(X_act, X_noisy_act, k=k)
595 |                 # print("lid_batch_noisy: ", lid_batch_noisy.shape)
596 |         return km_batch, km_batch_noisy, km_batch_adv
597 | 
598 |     kms = []
599 |     kms_adv = []
600 |     kms_noisy = []
601 |     n_batches = int(np.ceil(X.shape[0] / float(batch_size)))
602 |     for i_batch in tqdm(range(n_batches)):
603 |         km_batch, km_batch_noisy, km_batch_adv = estimate(i_batch)
604 |         kms.extend(km_batch)
605 |         kms_adv.extend(km_batch_adv)
606 |         kms_noisy.extend(km_batch_noisy)
607 |         # print("kms: ", kms.shape)
608 |         # print("kms_adv: ", kms_noisy.shape)
609 |         # print("kms_noisy: ", kms_noisy.shape)
610 | 
611 |     kms = np.asarray(kms, dtype=np.float32)
612 |     kms_noisy = np.asarray(kms_noisy, dtype=np.float32)
613 |     kms_adv = np.asarray(kms_adv, dtype=np.float32)
614 | 
615 |     return kms, kms_noisy, kms_adv
616 | 
617 | def score_point(tup):
618 |     """
619 |     TODO
620 |     :param tup:
621 |     :return:
622 |     """
623 |     x, kde = tup
624 | 
625 |     return kde.score_samples(np.reshape(x, (1, -1)))[0]
626 | 
627 | 
628 | def score_samples(kdes, samples, preds, n_jobs=None):
629 |     """
630 |     TODO
631 |     :param kdes:
632 |     :param samples:
633 |     :param preds:
634 |     :param n_jobs:
635 |     :return:
636 |     """
637 |     if n_jobs is not None:
638 |         p = mp.Pool(n_jobs)
639 |     else:
640 |         p = mp.Pool()
641 |     results = np.asarray(
642 |         p.map(
643 |             score_point,
644 |             [(x, kdes[i]) for x, i in zip(samples, preds)]
645 |         )
646 |     )
647 |     p.close()
648 |     p.join()
649 | 
650 |     return results
651 | 
652 | 
653 | def normalize(normal, adv, noisy):
654 |     """Z-score normalisation
655 |     TODO
656 |     :param normal:
657 |     :param adv:
658 |     :param noisy:
659 |     :return:
660 |     """
661 |     n_samples = len(normal)
662 |     total = scale(np.concatenate((normal, adv, noisy)))
663 | 
664 |     return total[:n_samples], total[n_samples:2*n_samples], total[2*n_samples:]
665 | 
666 | 
667 | def train_lr(X, y):
668 |     """
669 |     TODO
670 |     :param X: the data samples
671 |     :param y: the labels
672 |     :return:
673 |     """
674 |     lr = LogisticRegressionCV(n_jobs=-1).fit(X, y)
675 |     return lr
676 | 
677 | 
678 | def train_lr_rfeinman(densities_pos, densities_neg, uncerts_pos, uncerts_neg):
679 |     """
680 |     TODO
681 |     :param densities_pos:
682 |     :param densities_neg:
683 |     :param uncerts_pos:
684 |     :param uncerts_neg:
685 |     :return:
686 |     """
687 |     values_neg = np.concatenate(
688 |         (densities_neg.reshape((1, -1)),
689 |          uncerts_neg.reshape((1, -1))),
690 |         axis=0).transpose([1, 0])
691 |     values_pos = np.concatenate(
692 |         (densities_pos.reshape((1, -1)),
693 |          uncerts_pos.reshape((1, -1))),
694 |         axis=0).transpose([1, 0])
695 | 
696 |     values = np.concatenate((values_neg, values_pos))
697 |     labels = np.concatenate(
698 |         (np.zeros_like(densities_neg), np.ones_like(densities_pos)))
699 | 
700 |     lr = LogisticRegressionCV(n_jobs=-1).fit(values, labels)
701 | 
702 |     return values, labels, lr
703 | 
704 | 
705 | def compute_roc(y_true, y_pred, plot=False):
706 |     """
707 |     TODO
708 |     :param y_true: ground truth
709 |     :param y_pred: predictions
710 |     :param plot:
711 |     :return:
712 |     """
713 |     fpr, tpr, _ = roc_curve(y_true, y_pred)
714 |     auc_score = roc_auc_score(y_true, y_pred)
715 |     if plot:
716 |         plt.figure(figsize=(7, 6))
717 |         plt.plot(fpr, tpr, color='blue',
718 |                  label='ROC (AUC = %0.4f)' % auc_score)
719 |         plt.legend(loc='lower right')
720 |         plt.title("ROC Curve")
721 |         plt.xlabel("FPR")
722 |         plt.ylabel("TPR")
723 |         plt.show()
724 | 
725 |     return fpr, tpr, auc_score
726 | 
727 | 
728 | def compute_roc_rfeinman(probs_neg, probs_pos, plot=False):
729 |     """
730 |     TODO
731 |     :param probs_neg:
732 |     :param probs_pos:
733 |     :param plot:
734 |     :return:
735 |     """
736 |     probs = np.concatenate((probs_neg, probs_pos))
737 |     labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
738 |     fpr, tpr, _ = roc_curve(labels, probs)
739 |     auc_score = auc(fpr, tpr)
740 |     if plot:
741 |         plt.figure(figsize=(7, 6))
742 |         plt.plot(fpr, tpr, color='blue',
743 |                  label='ROC (AUC = %0.4f)' % auc_score)
744 |         plt.legend(loc='lower right')
745 |         plt.title("ROC Curve")
746 |         plt.xlabel("FPR")
747 |         plt.ylabel("TPR")
748 |         plt.show()
749 | 
750 |     return fpr, tpr, auc_score
751 | 
752 | def random_split(X, Y):
753 |     """
754 |     Random split the data into 80% for training and 20% for testing
755 |     :param X: 
756 |     :param Y: 
757 |     :return: 
758 |     """
759 |     print("random split 80%, 20% for training and testing")
760 |     num_samples = X.shape[0]
761 |     num_train = int(num_samples * 0.8)
762 |     rand_pert = np.random.permutation(num_samples)
763 |     X = X[rand_pert]
764 |     Y = Y[rand_pert]
765 |     X_train, X_test = X[:num_train], X[num_train:]
766 |     Y_train, Y_test = Y[:num_train], Y[num_train:]
767 | 
768 |     return X_train, Y_train, X_test, Y_test
769 | 
770 | def block_split(X, Y):
771 |     """
772 |     Split the data into 80% for training and 20% for testing
773 |     in a block size of 100.
774 |     :param X: 
775 |     :param Y: 
776 |     :return: 
777 |     """
778 |     print("Isolated split 80%, 20% for training and testing")
779 |     num_samples = X.shape[0]
780 |     partition = int(num_samples / 3)
781 |     X_adv, Y_adv = X[:partition], Y[:partition]
782 |     X_norm, Y_norm = X[partition: 2*partition], Y[partition: 2*partition]
783 |     X_noisy, Y_noisy = X[2*partition:], Y[2*partition:]
784 |     num_train = int(partition*0.008) * 100
785 | 
786 |     X_train = np.concatenate((X_norm[:num_train], X_noisy[:num_train], X_adv[:num_train]))
787 |     Y_train = np.concatenate((Y_norm[:num_train], Y_noisy[:num_train], Y_adv[:num_train]))
788 | 
789 |     X_test = np.concatenate((X_norm[num_train:], X_noisy[num_train:], X_adv[num_train:]))
790 |     Y_test = np.concatenate((Y_norm[num_train:], Y_noisy[num_train:], Y_adv[num_train:]))
791 | 
792 |     return X_train, Y_train, X_test, Y_test
793 | 
794 | if __name__ == "__main__":
795 |     # unit test
796 |     a = np.array([1, 2, 3, 4, 5])
797 |     b = np.array([6, 7, 8, 9, 10])
798 |     c = np.array([11, 12, 13, 14, 15])
799 | 
800 |     a_z, b_z, c_z = normalize(a, b, c)
801 |     print(a_z)
802 |     print(b_z)
803 |     print(c_z)
804 | 


--------------------------------------------------------------------------------