├── LICENSE ├── README.md ├── attacks.py ├── craft_adv_examples.py ├── cw_attacks.py ├── detect_adv_examples.py ├── extract_characteristics.py ├── train_model.py └── util.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Xingjun Ma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Code for paper "Characterizing Adversarial Subspaces Using Local Intrinsic Dimensionality". ICLR 2018, https://arxiv.org/abs/1801.02613 2 | 3 | ## Update: added BatchNormalization to after Conv and ReLU. 17 Sept. 2018. 4 | 5 | ### 1. Pre-train DNN models: 6 | python train_model.py -d mnist -e 50 -b 128 7 | 8 | ### 2. Craft adversarial examples: 9 | python craft_adv_samples.py -d cifar -a cw-l2 -b 100 10 | ### 3.Extract detection characteristics: 11 | python extract_characteristics.py -d cifar -a cw-l2 -r lid -k 20 -b 100 12 | 13 | ### 4. Train simple detectors: 14 | python detect_adv_examples.py -d cifar -a fgsm -t cw-l2 -r lid 15 | 16 | #### Dependencies: 17 | python 3.5, tqdm, tensorflow = 1.8, Keras >= 2.0, cleverhans >= 1.0.0 (may need extra change to pass in keras learning rate) 18 | 19 | #### Kernal Density and Bayesian Uncertainty are from https://github.com/rfeinman/detecting-adversarial-samples ("Detecting Adversarial Samples from Artifacts" (Feinman et al. 2017)) 20 | 21 | --------------------------- 22 | If you came across the error: 23 | 24 | tensorflow.python.framework.errors_impl.InvalidArgumentError: input_1:0 is both fed and fetched. 25 | 26 | 27 | Solution: in function get_layer_wise_activations() (util.py), do the following change: 28 | acts = [layer.output for layer in model.layers[1:]] # let the layer index start from 1. 29 | 30 | Reason: this possibly cause by the input layer is defined as a sepearte layer, with both input and output is X. 31 | -------------------------------------------------------------------------------- /attacks.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import copy 5 | from collections import defaultdict 6 | import numpy as np 7 | import tensorflow as tf 8 | from tqdm import tqdm 9 | from six.moves import xrange 10 | 11 | from cleverhans.utils import other_classes 12 | from cleverhans.utils_tf import batch_eval, model_argmax 13 | from cleverhans.attacks_tf import (jacobian_graph, jacobian, 14 | apply_perturbations, saliency_map) 15 | import keras.backend as K 16 | 17 | 18 | def fgsm(x, predictions, eps, clip_min=None, clip_max=None, y=None): 19 | """ 20 | Computes symbolic TF tensor for the adversarial samples. This must 21 | be evaluated with a session.run call. 22 | :param x: the input placeholder 23 | :param predictions: the model's output tensor 24 | :param eps: the epsilon (input variation parameter) 25 | :param clip_min: optional parameter that can be used to set a minimum 26 | value for components of the example returned 27 | :param clip_max: optional parameter that can be used to set a maximum 28 | value for components of the example returned 29 | :param y: the output placeholder. Use None (the default) to avoid the 30 | label leaking effect. 31 | :return: a tensor for the adversarial example 32 | """ 33 | 34 | # Compute loss 35 | if y is None: 36 | # In this case, use model predictions as ground truth 37 | y = tf.to_float( 38 | tf.equal(predictions, 39 | tf.reduce_max(predictions, 1, keep_dims=True))) 40 | y = y / tf.reduce_sum(y, 1, keep_dims=True) 41 | logits, = predictions.op.inputs 42 | loss = tf.reduce_mean( 43 | tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y) 44 | ) 45 | 46 | # Define gradient of loss wrt input 47 | grad, = tf.gradients(loss, x) 48 | 49 | # Take sign of gradient 50 | signed_grad = tf.sign(grad) 51 | 52 | # Multiply by constant epsilon 53 | scaled_signed_grad = eps * signed_grad 54 | 55 | # Add perturbation to original example to obtain adversarial example 56 | adv_x = tf.stop_gradient(x + scaled_signed_grad) 57 | 58 | # If clipping is needed, reset all values outside of [clip_min, clip_max] 59 | if (clip_min is not None) and (clip_max is not None): 60 | adv_x = tf.clip_by_value(adv_x, clip_min, clip_max) 61 | 62 | return adv_x 63 | 64 | 65 | def jsma(sess, x, predictions, grads, sample, target, theta, gamma, 66 | increase, nb_classes, clip_min, clip_max, verbose=False): 67 | """ 68 | TensorFlow implementation of the jacobian-based saliency map method (JSMA). 69 | :param sess: TF session 70 | :param x: the input placeholder 71 | :param predictions: the model's symbolic output (linear output, 72 | pre-softmax) 73 | :param sample: numpy array with sample input 74 | :param target: target class for sample input 75 | :param theta: delta for each feature adjustment 76 | :param gamma: a float between 0 - 1 indicating the maximum distortion 77 | percentage 78 | :param increase: boolean; true if we are increasing pixels, false otherwise 79 | :param nb_classes: integer indicating the number of classes in the model 80 | :param clip_min: optional parameter that can be used to set a minimum 81 | value for components of the example returned 82 | :param clip_max: optional parameter that can be used to set a maximum 83 | value for components of the example returned 84 | :param verbose: boolean; whether to print status updates or not 85 | :return: an adversarial sample 86 | """ 87 | 88 | # Copy the source sample and define the maximum number of features 89 | # (i.e. the maximum number of iterations) that we may perturb 90 | adv_x = copy.copy(sample) 91 | # count the number of features. For MNIST, 1x28x28 = 784; for 92 | # CIFAR, 3x32x32 = 3072; etc. 93 | nb_features = np.product(adv_x.shape[1:]) 94 | # reshape sample for sake of standardization 95 | original_shape = adv_x.shape 96 | adv_x = np.reshape(adv_x, (1, nb_features)) 97 | # compute maximum number of iterations 98 | max_iters = np.floor(nb_features * gamma / 2) 99 | if verbose: 100 | print('Maximum number of iterations: {0}'.format(max_iters)) 101 | 102 | # Compute our initial search domain. We optimize the initial search domain 103 | # by removing all features that are already at their maximum values (if 104 | # increasing input features---otherwise, at their minimum value). 105 | if increase: 106 | search_domain = set([i for i in xrange(nb_features) 107 | if adv_x[0, i] < clip_max]) 108 | else: 109 | search_domain = set([i for i in xrange(nb_features) 110 | if adv_x[0, i] > clip_min]) 111 | 112 | # Initialize the loop variables 113 | iteration = 0 114 | adv_x_original_shape = np.reshape(adv_x, original_shape) 115 | current = model_argmax(sess, x, predictions, adv_x_original_shape, feed={K.learning_phase(): 0}) 116 | 117 | # Repeat this main loop until we have achieved misclassification 118 | while (current != target and iteration < max_iters and 119 | len(search_domain) > 1): 120 | # Reshape the adversarial example 121 | adv_x_original_shape = np.reshape(adv_x, original_shape) 122 | 123 | # Compute the Jacobian components 124 | grads_target, grads_others = jacobian(sess, x, grads, target, 125 | adv_x_original_shape, 126 | nb_features, nb_classes, 127 | feed={K.learning_phase(): 0}) 128 | 129 | # Compute the saliency map for each of our target classes 130 | # and return the two best candidate features for perturbation 131 | i, j, search_domain = saliency_map( 132 | grads_target, grads_others, search_domain, increase) 133 | 134 | # Apply the perturbation to the two input features selected previously 135 | adv_x = apply_perturbations( 136 | i, j, adv_x, increase, theta, clip_min, clip_max) 137 | 138 | # Update our current prediction by querying the model 139 | current = model_argmax(sess, x, predictions, adv_x_original_shape, feed={K.learning_phase(): 0}) 140 | 141 | # Update loop variables 142 | iteration += 1 143 | 144 | # This process may take a while, so outputting progress regularly 145 | if iteration % 5 == 0 and verbose: 146 | msg = 'Current iteration: {0} - Current Prediction: {1}' 147 | print(msg.format(iteration, current)) 148 | 149 | # Compute the ratio of pixels perturbed by the algorithm 150 | percent_perturbed = float(iteration * 2) / nb_features 151 | 152 | # Report success when the adversarial example is misclassified in the 153 | # target class 154 | if current == target: 155 | if verbose: 156 | print('Successful') 157 | return np.reshape(adv_x, original_shape), 1, percent_perturbed 158 | else: 159 | if verbose: 160 | print('Unsuccesful') 161 | return np.reshape(adv_x, original_shape), 0, percent_perturbed 162 | 163 | 164 | def fast_gradient_sign_method(sess, model, X, Y, eps, clip_min=None, 165 | clip_max=None, batch_size=256): 166 | """ 167 | TODO 168 | :param sess: 169 | :param model: predictions or after-softmax 170 | :param X: 171 | :param Y: 172 | :param eps: 173 | :param clip_min: 174 | :param clip_max: 175 | :param batch_size: 176 | :return: 177 | """ 178 | # Define TF placeholders for the input and output 179 | x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:]) 180 | y = tf.placeholder(tf.float32, shape=(None,) + Y.shape[1:]) 181 | adv_x = fgsm( 182 | x, model(x), eps=eps, 183 | clip_min=clip_min, 184 | clip_max=clip_max, y=y 185 | ) 186 | X_adv, = batch_eval( 187 | sess, [x, y], [adv_x], 188 | [X, Y], feed={K.learning_phase(): 0}, 189 | args={'batch_size': batch_size} 190 | ) 191 | 192 | return X_adv 193 | 194 | 195 | def basic_iterative_method(sess, model, X, Y, eps, eps_iter, nb_iter=50, 196 | clip_min=None, clip_max=None, batch_size=256): 197 | """ 198 | TODO 199 | :param sess: 200 | :param model: predictions or after-softmax 201 | :param X: 202 | :param Y: 203 | :param eps: 204 | :param eps_iter: 205 | :param nb_iter: 206 | :param clip_min: 207 | :param clip_max: 208 | :param batch_size: 209 | :return: 210 | """ 211 | # Define TF placeholders for the input and output 212 | x = tf.placeholder(tf.float32, shape=(None,)+X.shape[1:]) 213 | y = tf.placeholder(tf.float32, shape=(None,)+Y.shape[1:]) 214 | # results will hold the adversarial inputs at each iteration of BIM; 215 | # thus it will have shape (nb_iter, n_samples, n_rows, n_cols, n_channels) 216 | results = np.zeros((nb_iter, X.shape[0],) + X.shape[1:]) 217 | # Initialize adversarial samples as the original samples, set upper and 218 | # lower bounds 219 | X_adv = X 220 | X_min = X_adv - eps 221 | X_max = X_adv + eps 222 | print('Running BIM iterations...') 223 | # "its" is a dictionary that keeps track of the iteration at which each 224 | # sample becomes misclassified. The default value will be (nb_iter-1), the 225 | # very last iteration. 226 | def f(val): 227 | return lambda: val 228 | its = defaultdict(f(nb_iter-1)) 229 | # Out keeps track of which samples have already been misclassified 230 | out = set() 231 | for i in tqdm(range(nb_iter)): 232 | adv_x = fgsm( 233 | x, model(x), eps=eps_iter, 234 | clip_min=clip_min, clip_max=clip_max, y=y 235 | ) 236 | X_adv, = batch_eval( 237 | sess, [x, y], [adv_x], 238 | [X_adv, Y], feed={K.learning_phase(): 0}, 239 | args={'batch_size': batch_size} 240 | ) 241 | X_adv = np.maximum(np.minimum(X_adv, X_max), X_min) 242 | results[i] = X_adv 243 | # check misclassifieds 244 | predictions = model.predict_classes(X_adv, batch_size=512, verbose=0) 245 | misclassifieds = np.where(predictions != Y.argmax(axis=1))[0] 246 | for elt in misclassifieds: 247 | if elt not in out: 248 | its[elt] = i 249 | out.add(elt) 250 | 251 | return its, results 252 | 253 | 254 | def saliency_map_method(sess, model, X, Y, theta, gamma, clip_min=None, 255 | clip_max=None): 256 | """ 257 | TODO 258 | :param sess: 259 | :param model: predictions or after-softmax 260 | :param X: 261 | :param Y: 262 | :param theta: 263 | :param gamma: 264 | :param clip_min: 265 | :param clip_max: 266 | :return: 267 | """ 268 | nb_classes = Y.shape[1] 269 | # Define TF placeholder for the input 270 | x = tf.placeholder(tf.float32, shape=(None,) + X.shape[1:]) 271 | # Define model gradients 272 | grads = jacobian_graph(model(x), x, nb_classes) 273 | X_adv = np.zeros_like(X) 274 | for i in tqdm(range(len(X))): 275 | current_class = int(np.argmax(Y[i])) 276 | target_class = np.random.choice(other_classes(nb_classes, current_class)) 277 | X_adv[i], _, _ = jsma( 278 | sess, x, model(x), grads, X[i:(i+1)], target_class, theta=theta, 279 | gamma=gamma, increase=True, nb_classes=nb_classes, 280 | clip_min=clip_min, clip_max=clip_max 281 | ) 282 | 283 | return X_adv 284 | 285 | -------------------------------------------------------------------------------- /craft_adv_examples.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import os 5 | import argparse 6 | import warnings 7 | import numpy as np 8 | import tensorflow as tf 9 | import keras.backend as K 10 | from keras.models import load_model 11 | 12 | from util import get_data, get_model, cross_entropy 13 | from attacks import fast_gradient_sign_method, basic_iterative_method, saliency_map_method 14 | from cw_attacks import CarliniL2, CarliniLID 15 | 16 | # FGSM & BIM attack parameters that were chosen 17 | ATTACK_PARAMS = { 18 | 'mnist': {'eps': 0.40, 'eps_iter': 0.010, 'image_size': 28, 'num_channels': 1, 'num_labels': 10}, 19 | 'cifar': {'eps': 0.050, 'eps_iter': 0.005, 'image_size': 32, 'num_channels': 3, 'num_labels': 10}, 20 | 'svhn': {'eps': 0.130, 'eps_iter': 0.010, 'image_size': 32, 'num_channels': 3, 'num_labels': 10} 21 | } 22 | 23 | # CLIP_MIN = 0.0 24 | # CLIP_MAX = 1.0 25 | CLIP_MIN = -0.5 26 | CLIP_MAX = 0.5 27 | PATH_DATA = "data/" 28 | 29 | def craft_one_type(sess, model, X, Y, dataset, attack, batch_size): 30 | """ 31 | TODO 32 | :param sess: 33 | :param model: 34 | :param X: 35 | :param Y: 36 | :param dataset: 37 | :param attack: 38 | :param batch_size: 39 | :return: 40 | """ 41 | if attack == 'fgsm': 42 | # FGSM attack 43 | print('Crafting fgsm adversarial samples...') 44 | X_adv = fast_gradient_sign_method( 45 | sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], clip_min=CLIP_MIN, 46 | clip_max=CLIP_MAX, batch_size=batch_size 47 | ) 48 | elif attack in ['bim-a', 'bim-b']: 49 | # BIM attack 50 | print('Crafting %s adversarial samples...' % attack) 51 | its, results = basic_iterative_method( 52 | sess, model, X, Y, eps=ATTACK_PARAMS[dataset]['eps'], 53 | eps_iter=ATTACK_PARAMS[dataset]['eps_iter'], clip_min=CLIP_MIN, 54 | clip_max=CLIP_MAX, batch_size=batch_size 55 | ) 56 | if attack == 'bim-a': 57 | # BIM-A 58 | # For each sample, select the time step where that sample first 59 | # became misclassified 60 | X_adv = np.asarray([results[its[i], i] for i in range(len(Y))]) 61 | else: 62 | # BIM-B 63 | # For each sample, select the very last time step 64 | X_adv = results[-1] 65 | elif attack == 'jsma': 66 | # JSMA attack 67 | print('Crafting jsma adversarial samples. This may take > 5 hours') 68 | X_adv = saliency_map_method( 69 | sess, model, X, Y, theta=1, gamma=0.1, clip_min=CLIP_MIN, clip_max=CLIP_MAX 70 | ) 71 | elif attack == 'cw-l2': 72 | # C&W attack 73 | print('Crafting %s examples. This takes > 5 hours due to internal grid search' % attack) 74 | image_size = ATTACK_PARAMS[dataset]['image_size'] 75 | num_channels = ATTACK_PARAMS[dataset]['num_channels'] 76 | num_labels = ATTACK_PARAMS[dataset]['num_labels'] 77 | cw_attack = CarliniL2(sess, model, image_size, num_channels, num_labels, batch_size=batch_size) 78 | X_adv = cw_attack.attack(X, Y) 79 | elif attack == 'cw-lid': 80 | # C&W attack to break LID detector 81 | print('Crafting %s examples. This takes > 5 hours due to internal grid search' % attack) 82 | image_size = ATTACK_PARAMS[dataset]['image_size'] 83 | num_channels = ATTACK_PARAMS[dataset]['num_channels'] 84 | num_labels = ATTACK_PARAMS[dataset]['num_labels'] 85 | cw_attack = CarliniLID(sess, model, image_size, num_channels, num_labels, batch_size=batch_size) 86 | X_adv = cw_attack.attack(X, Y) 87 | 88 | _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0) 89 | print("Model accuracy on the adversarial test set: %0.2f%%" % (100 * acc)) 90 | np.save(os.path.join(PATH_DATA, 'Adv_%s_%s.npy' % (dataset, attack)), X_adv) 91 | l2_diff = np.linalg.norm( 92 | X_adv.reshape((len(X), -1)) - 93 | X.reshape((len(X), -1)), 94 | axis=1 95 | ).mean() 96 | print("Average L-2 perturbation size of the %s attack: %0.2f" % 97 | (attack, l2_diff)) 98 | 99 | def main(args): 100 | assert args.dataset in ['mnist', 'cifar', 'svhn'], \ 101 | "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" 102 | assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all', 'cw-lid'], \ 103 | "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ 104 | "'jsma', 'cw-l2', 'all' or 'cw-lid' for attacking LID detector" 105 | model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset) 106 | assert os.path.isfile(model_file), \ 107 | 'model file not found... must first train model using train_model.py.' 108 | if args.dataset == 'svhn' and args.attack == 'cw-l2': 109 | assert args.batch_size == 16, \ 110 | "svhn has 26032 test images, the batch_size for cw-l2 attack should be 16, " \ 111 | "otherwise, there will be error at the last batch-- needs to be fixed." 112 | 113 | 114 | print('Dataset: %s. Attack: %s' % (args.dataset, args.attack)) 115 | # Create TF session, set it as Keras backend 116 | sess = tf.Session() 117 | K.set_session(sess) 118 | if args.attack == 'cw-l2' or args.attack == 'cw-lid': 119 | warnings.warn("Important: remove the softmax layer for cw attacks!") 120 | # use softmax=False to load without softmax layer 121 | model = get_model(args.dataset, softmax=False) 122 | model.compile( 123 | loss=cross_entropy, 124 | optimizer='adadelta', 125 | metrics=['accuracy'] 126 | ) 127 | model.load_weights(model_file) 128 | else: 129 | model = load_model(model_file) 130 | 131 | _, _, X_test, Y_test = get_data(args.dataset) 132 | _, acc = model.evaluate(X_test, Y_test, batch_size=args.batch_size, 133 | verbose=0) 134 | print("Accuracy on the test set: %0.2f%%" % (100*acc)) 135 | 136 | if args.attack == 'cw-lid': # white box attacking LID detector - an example 137 | X_test = X_test[:1000] 138 | Y_test = Y_test[:1000] 139 | 140 | if args.attack == 'all': 141 | # Cycle through all attacks 142 | for attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2']: 143 | craft_one_type(sess, model, X_test, Y_test, args.dataset, attack, 144 | args.batch_size) 145 | else: 146 | # Craft one specific attack type 147 | craft_one_type(sess, model, X_test, Y_test, args.dataset, args.attack, 148 | args.batch_size) 149 | print('Adversarial samples crafted and saved to %s ' % PATH_DATA) 150 | sess.close() 151 | 152 | 153 | if __name__ == "__main__": 154 | parser = argparse.ArgumentParser() 155 | parser.add_argument( 156 | '-d', '--dataset', 157 | help="Dataset to use; either 'mnist', 'cifar' or 'svhn'", 158 | required=True, type=str 159 | ) 160 | parser.add_argument( 161 | '-a', '--attack', 162 | help="Attack to use; either 'fgsm', 'bim-a', 'bim-b', 'jsma', or 'cw-l2' " 163 | "or 'all'", 164 | required=True, type=str 165 | ) 166 | parser.add_argument( 167 | '-b', '--batch_size', 168 | help="The batch size to use for training.", 169 | required=False, type=int 170 | ) 171 | parser.set_defaults(batch_size=100) 172 | args = parser.parse_args() 173 | main(args) -------------------------------------------------------------------------------- /cw_attacks.py: -------------------------------------------------------------------------------- 1 | ## l0_attack.py + l2_attack.py + li_attack.py-- attack a network optimizing for l_0, l_2 or l_infinity distance 2 | ## This is just a copy and paste from https://github.com/carlini/nn_robust_attacks. 3 | ## TODO: merge the code? 4 | ## 5 | ## Copyright (C) 2016, Nicholas Carlini . 6 | ## 7 | ## This program is licenced under the BSD 2-Clause licence, 8 | ## contained in the LICENCE file in this directory. 9 | 10 | import sys 11 | import tensorflow as tf 12 | import numpy as np 13 | from tqdm import tqdm 14 | from cleverhans.utils import other_classes 15 | import keras.backend as K 16 | 17 | from util import lid_adv_term 18 | 19 | # settings for C&W L2 attack 20 | L2_BINARY_SEARCH_STEPS = 9 # number of times to adjust the constant with binary search 21 | L2_MAX_ITERATIONS = 1000 # number of iterations to perform gradient descent 22 | L2_ABORT_EARLY = True # if we stop improving, abort gradient descent early 23 | L2_LEARNING_RATE = 1e-2 # larger values converge faster to less accurate results 24 | L2_TARGETED = True # should we target one specific class? or just be wrong? 25 | L2_CONFIDENCE = 0 # how strong the adversarial example should be 26 | L2_INITIAL_CONST = 1e-3 # the initial constant c to pick as a first guess 27 | 28 | class CarliniL2: 29 | def __init__(self, sess, model, image_size, num_channels, num_labels, batch_size=100, 30 | confidence=L2_CONFIDENCE, targeted=L2_TARGETED, learning_rate=L2_LEARNING_RATE, 31 | binary_search_steps=L2_BINARY_SEARCH_STEPS, max_iterations=L2_MAX_ITERATIONS, 32 | abort_early=L2_ABORT_EARLY, 33 | initial_const=L2_INITIAL_CONST): 34 | """ 35 | The L_2 optimized attack. 36 | 37 | This attack is the most efficient and should be used as the primary 38 | attack to evaluate potential defenses. 39 | 40 | Returns adversarial examples for the supplied model. 41 | 42 | confidence: Confidence of adversarial examples: higher produces examples 43 | that are farther away, but more strongly classified as adversarial. 44 | batch_size: Number of attacks to run simultaneously. 45 | targeted: True if we should perform a targetted attack, False otherwise. 46 | learning_rate: The learning rate for the attack algorithm. Smaller values 47 | produce better results but are slower to converge. 48 | binary_search_steps: The number of times we perform binary search to 49 | find the optimal tradeoff-constant between distance and confidence. 50 | max_iterations: The maximum number of iterations. Larger values are more 51 | accurate; setting too small will require a large learning rate and will 52 | produce poor results. 53 | abort_early: If true, allows early aborts if gradient descent gets stuck. 54 | initial_const: The initial tradeoff-constant to use to tune the relative 55 | importance of distance and confidence. If binary_search_steps is large, 56 | the initial constant is not important. 57 | """ 58 | self.model = model 59 | self.sess = sess 60 | self.image_size = image_size 61 | self.num_channels = num_channels 62 | self.num_labels = num_labels 63 | 64 | self.TARGETED = targeted 65 | self.LEARNING_RATE = learning_rate 66 | self.MAX_ITERATIONS = max_iterations 67 | self.BINARY_SEARCH_STEPS = binary_search_steps 68 | self.ABORT_EARLY = abort_early 69 | self.CONFIDENCE = confidence 70 | self.initial_const = initial_const 71 | self.batch_size = batch_size 72 | 73 | self.repeat = binary_search_steps >= 10 74 | 75 | shape = (self.batch_size, self.image_size, self.image_size, self.num_channels) 76 | 77 | # the variable we're going to optimize over 78 | modifier = tf.Variable(np.zeros(shape, dtype=np.float32)) 79 | self.max_mod = tf.reduce_max(modifier) 80 | 81 | # these are variables to be more efficient in sending data to tf 82 | self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32) 83 | self.tlab = tf.Variable(np.zeros((self.batch_size, self.num_labels)), dtype=tf.float32) 84 | self.const = tf.Variable(np.zeros(self.batch_size), dtype=tf.float32) 85 | 86 | # and here's what we use to assign them 87 | self.assign_timg = tf.placeholder(tf.float32, shape) 88 | self.assign_tlab = tf.placeholder(tf.float32, (self.batch_size, self.num_labels)) 89 | self.assign_const = tf.placeholder(tf.float32, [self.batch_size]) 90 | 91 | # the resulting image, tanh'd to keep bounded from -0.5 to 0.5 92 | self.newimg = tf.tanh(modifier + self.timg) / 2 93 | 94 | # prediction BEFORE-SOFTMAX of the model 95 | self.output = self.model(self.newimg) 96 | 97 | # distance to the input data 98 | self.l2dist = tf.reduce_sum(tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3]) 99 | 100 | # compute the probability of the label class versus the maximum other 101 | real = tf.reduce_sum((self.tlab) * self.output, 1) 102 | other = tf.reduce_max((1 - self.tlab) * self.output - (self.tlab * 10000), 1) 103 | 104 | if self.TARGETED: 105 | # if targetted, optimize for making the other class most likely 106 | loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE) 107 | else: 108 | # if untargeted, optimize for making this class least likely. 109 | loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE) 110 | 111 | # sum up the losses 112 | self.loss2 = tf.reduce_sum(self.l2dist) 113 | self.loss1 = tf.reduce_sum(self.const * loss1) 114 | self.loss = self.loss1 + self.loss2 115 | self.grads = tf.reduce_max(tf.gradients(self.loss, [modifier])) 116 | 117 | # Setup the adam optimizer and keep track of variables we're creating 118 | start_vars = set(x.name for x in tf.global_variables()) 119 | optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) 120 | self.train = optimizer.minimize(self.loss, var_list=[modifier]) 121 | end_vars = tf.global_variables() 122 | new_vars = [x for x in end_vars if x.name not in start_vars] 123 | 124 | # these are the variables to initialize when we run 125 | self.setup = [] 126 | self.setup.append(self.timg.assign(self.assign_timg)) 127 | self.setup.append(self.tlab.assign(self.assign_tlab)) 128 | self.setup.append(self.const.assign(self.assign_const)) 129 | 130 | self.init = tf.variables_initializer(var_list=[modifier] + new_vars) 131 | 132 | def attack(self, X, Y): 133 | """ 134 | Perform the L_2 attack on the given images for the given targets. 135 | 136 | :param X: samples to generate advs 137 | :param Y: the original class labels 138 | If self.targeted is true, then the targets represents the target labels. 139 | If self.targeted is false, then targets are the original class labels. 140 | """ 141 | nb_classes = Y.shape[1] 142 | 143 | # random select target class for targeted attack 144 | y_target = np.copy(Y) 145 | if self.TARGETED: 146 | for i in range(Y.shape[0]): 147 | current = int(np.argmax(Y[i])) 148 | target = np.random.choice(other_classes(nb_classes, current)) 149 | y_target[i] = np.eye(nb_classes)[target] 150 | 151 | X_adv = np.zeros_like(X) 152 | for i in tqdm(range(0, X.shape[0], self.batch_size)): 153 | start = i 154 | end = i + self.batch_size 155 | end = np.minimum(end, X.shape[0]) 156 | X_adv[start:end] = self.attack_batch(X[start:end], y_target[start:end]) 157 | 158 | return X_adv 159 | 160 | def attack_batch(self, imgs, labs): 161 | """ 162 | Run the attack on a batch of images and labels. 163 | """ 164 | 165 | def compare(x, y): 166 | if not isinstance(x, (float, int, np.int64)): 167 | x = np.copy(x) 168 | x[y] -= self.CONFIDENCE 169 | x = np.argmax(x) 170 | if self.TARGETED: 171 | return x == y 172 | else: 173 | return x != y 174 | 175 | # batch_size = self.batch_size 176 | batch_size = imgs.shape[0] 177 | 178 | # convert to tanh-space 179 | imgs = np.arctanh(imgs * 1.999999) 180 | 181 | # set the lower and upper bounds accordingly 182 | lower_bound = np.zeros(batch_size) 183 | CONST = np.ones(batch_size) * self.initial_const 184 | upper_bound = np.ones(batch_size) * 1e10 185 | 186 | # the best l2, score, and image attack 187 | o_bestl2 = [1e10] * batch_size 188 | o_bestscore = [-1] * batch_size 189 | o_bestattack = [np.zeros(imgs[0].shape)] * batch_size 190 | # o_bestattack = np.copy(imgs) 191 | 192 | for outer_step in range(self.BINARY_SEARCH_STEPS): 193 | # print(o_bestl2) 194 | # completely reset adam's internal state. 195 | self.sess.run(self.init) 196 | batch = imgs[:batch_size] 197 | batchlab = labs[:batch_size] 198 | 199 | bestl2 = [1e10] * batch_size 200 | bestscore = [-1] * batch_size 201 | 202 | # The last iteration (if we run many steps) repeat the search once. 203 | if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS - 1: 204 | CONST = upper_bound 205 | 206 | # set the variables so that we don't have to send them over again 207 | self.sess.run(self.setup, {self.assign_timg: batch, 208 | self.assign_tlab: batchlab, 209 | self.assign_const: CONST}) 210 | 211 | prev = 1e6 212 | for iteration in range(self.MAX_ITERATIONS): 213 | # perform the attack 214 | _, l, l2s, scores, nimg = self.sess.run([self.train, self.loss, 215 | self.l2dist, self.output, 216 | self.newimg], feed_dict={K.learning_phase(): 0}) 217 | 218 | # print out the losses every 10% 219 | # if iteration % (self.MAX_ITERATIONS // 10) == 0: 220 | # print(iteration, self.sess.run((self.loss, self.loss1, self.loss2, self.grads, self.max_mod), feed_dict={K.learning_phase(): 0})) 221 | 222 | # check if we should abort search if we're getting nowhere. 223 | if self.ABORT_EARLY and iteration % (self.MAX_ITERATIONS // 10) == 0: 224 | if l > prev * .9999: 225 | break 226 | prev = l 227 | 228 | # adjust the best result found so far 229 | for e, (l2, sc, ii) in enumerate(zip(l2s, scores, nimg)): 230 | if l2 < bestl2[e] and compare(sc, np.argmax(batchlab[e])): 231 | bestl2[e] = l2 232 | bestscore[e] = np.argmax(sc) 233 | if l2 < o_bestl2[e] and compare(sc, np.argmax(batchlab[e])): 234 | # print('l2:', l2, 'bestl2[e]: ', bestl2[e]) 235 | # print('score:', np.argmax(sc), 'bestscore[e]:', bestscore[e]) 236 | # print('np.argmax(batchlab[e]):', np.argmax(batchlab[e])) 237 | o_bestl2[e] = l2 238 | o_bestscore[e] = np.argmax(sc) 239 | o_bestattack[e] = ii 240 | 241 | # adjust the constant as needed 242 | for e in range(batch_size): 243 | if compare(bestscore[e], np.argmax(batchlab[e])) and bestscore[e] != -1: 244 | # success, divide const by two 245 | upper_bound[e] = min(upper_bound[e], CONST[e]) 246 | if upper_bound[e] < 1e9: 247 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 248 | else: 249 | # failure, either multiply by 10 if no solution found yet 250 | # or do binary search with the known upper bound 251 | lower_bound[e] = max(lower_bound[e], CONST[e]) 252 | if upper_bound[e] < 1e9: 253 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 254 | else: 255 | CONST[e] *= 10 256 | 257 | # return the best solution found 258 | o_bestl2 = np.array(o_bestl2) 259 | print('sucess rate: %.4f' % (1-np.sum(o_bestl2==1e10)/self.batch_size)) 260 | return o_bestattack 261 | 262 | class CarliniLID: 263 | def __init__(self, sess, model, image_size, num_channels, num_labels, batch_size=100, 264 | confidence=L2_CONFIDENCE, targeted=L2_TARGETED, learning_rate=L2_LEARNING_RATE, 265 | binary_search_steps=L2_BINARY_SEARCH_STEPS, max_iterations=L2_MAX_ITERATIONS, 266 | abort_early=L2_ABORT_EARLY, 267 | initial_const=L2_INITIAL_CONST): 268 | """ 269 | The modified L_2 optimized attack to break LID detector. 270 | 271 | This attack is the most efficient and should be used as the primary 272 | attack to evaluate potential defenses. 273 | 274 | Returns adversarial examples for the supplied model. 275 | 276 | confidence: Confidence of adversarial examples: higher produces examples 277 | that are farther away, but more strongly classified as adversarial. 278 | batch_size: Number of attacks to run simultaneously. 279 | targeted: True if we should perform a targetted attack, False otherwise. 280 | learning_rate: The learning rate for the attack algorithm. Smaller values 281 | produce better results but are slower to converge. 282 | binary_search_steps: The number of times we perform binary search to 283 | find the optimal tradeoff-constant between distance and confidence. 284 | max_iterations: The maximum number of iterations. Larger values are more 285 | accurate; setting too small will require a large learning rate and will 286 | produce poor results. 287 | abort_early: If true, allows early aborts if gradient descent gets stuck. 288 | initial_const: The initial tradeoff-constant to use to tune the relative 289 | importance of distance and confidence. If binary_search_steps is large, 290 | the initial constant is not important. 291 | """ 292 | self.model = model 293 | self.sess = sess 294 | self.image_size = image_size 295 | self.num_channels = num_channels 296 | self.num_labels = num_labels 297 | 298 | self.TARGETED = targeted 299 | self.LEARNING_RATE = learning_rate 300 | self.MAX_ITERATIONS = max_iterations 301 | self.BINARY_SEARCH_STEPS = binary_search_steps 302 | self.ABORT_EARLY = abort_early 303 | self.CONFIDENCE = confidence 304 | self.initial_const = initial_const 305 | self.batch_size = batch_size 306 | 307 | self.repeat = binary_search_steps >= 10 308 | 309 | shape = (self.batch_size, self.image_size, self.image_size, self.num_channels) 310 | 311 | # the variable we're going to optimize over 312 | modifier = tf.Variable(np.zeros(shape, dtype=np.float32)) 313 | self.max_mod = tf.reduce_max(modifier) 314 | 315 | # these are variables to be more efficient in sending data to tf 316 | self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32) 317 | self.tlab = tf.Variable(np.zeros((self.batch_size, self.num_labels)), dtype=tf.float32) 318 | self.const = tf.Variable(np.zeros(self.batch_size), dtype=tf.float32) 319 | 320 | # and here's what we use to assign them 321 | self.assign_timg = tf.placeholder(tf.float32, shape) 322 | self.assign_tlab = tf.placeholder(tf.float32, (self.batch_size, self.num_labels)) 323 | self.assign_const = tf.placeholder(tf.float32, [self.batch_size]) 324 | 325 | # the resulting image, tanh'd to keep bounded from -0.5 to 0.5 326 | self.newimg = tf.tanh(modifier + self.timg) / 2 327 | 328 | # prediction BEFORE-SOFTMAX of the model 329 | self.output = self.model(self.newimg) 330 | 331 | # distance to the input data 332 | self.l2dist = tf.reduce_sum(tf.square(self.newimg - tf.tanh(self.timg) / 2), [1, 2, 3]) 333 | 334 | # compute the probability of the label class versus the maximum other 335 | real = tf.reduce_sum((self.tlab) * self.output, 1) 336 | other = tf.reduce_max((1 - self.tlab) * self.output - (self.tlab * 10000), 1) 337 | 338 | if self.TARGETED: 339 | # if targetted, optimize for making the other class most likely 340 | loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE) 341 | else: 342 | # if untargeted, optimize for making this class least likely. 343 | loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE) 344 | 345 | # add lis loss to the attack 346 | self.clean_logits = tf.placeholder(tf.float32, (1, self.batch_size, None)) 347 | loss_lid = lid_adv_term(self.clean_logits, self.output, self.batch_size) 348 | 349 | # sum up the losses 350 | self.loss2 = tf.reduce_sum(self.l2dist) 351 | self.loss1 = tf.reduce_sum(self.const * (loss1 + loss_lid)) 352 | self.loss = self.loss1 + self.loss2 353 | self.grads = tf.reduce_max(tf.gradients(self.loss, [modifier])) 354 | 355 | # Setup the adam optimizer and keep track of variables we're creating 356 | start_vars = set(x.name for x in tf.global_variables()) 357 | optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) 358 | self.train = optimizer.minimize(self.loss, var_list=[modifier]) 359 | end_vars = tf.global_variables() 360 | new_vars = [x for x in end_vars if x.name not in start_vars] 361 | 362 | # these are the variables to initialize when we run 363 | self.setup = [] 364 | self.setup.append(self.timg.assign(self.assign_timg)) 365 | self.setup.append(self.tlab.assign(self.assign_tlab)) 366 | self.setup.append(self.const.assign(self.assign_const)) 367 | 368 | self.init = tf.variables_initializer(var_list=[modifier] + new_vars) 369 | 370 | def attack(self, X, Y): 371 | """ 372 | Perform the L_2 attack on the given images for the given targets. 373 | 374 | :param X: samples to generate advs 375 | :param Y: the original class labels 376 | If self.targeted is true, then the targets represents the target labels. 377 | If self.targeted is false, then targets are the original class labels. 378 | """ 379 | nb_classes = Y.shape[1] 380 | 381 | # random select target class for targeted attack 382 | y_target = np.copy(Y) 383 | if self.TARGETED: 384 | for i in range(Y.shape[0]): 385 | current = int(np.argmax(Y[i])) 386 | target = np.random.choice(other_classes(nb_classes, current)) 387 | y_target[i] = np.eye(nb_classes)[target] 388 | 389 | X_adv = np.zeros_like(X) 390 | for i in tqdm(range(0, X.shape[0], self.batch_size)): 391 | start = i 392 | end = i + self.batch_size 393 | end = np.minimum(end, X.shape[0]) 394 | X_adv[start:end] = self.attack_batch(X[start:end], y_target[start:end]) 395 | 396 | return X_adv 397 | 398 | def attack_batch(self, imgs, labs): 399 | """ 400 | Run the attack on a batch of images and labels. 401 | """ 402 | 403 | def compare(x, y): 404 | if not isinstance(x, (float, int, np.int64)): 405 | x = np.copy(x) 406 | x[y] -= self.CONFIDENCE 407 | x = np.argmax(x) 408 | if self.TARGETED: 409 | return x == y 410 | else: 411 | return x != y 412 | 413 | # batch_size = self.batch_size 414 | batch_size = imgs.shape[0] 415 | 416 | # convert to tanh-space 417 | imgs = np.arctanh(imgs * 1.999999) 418 | 419 | # set the lower and upper bounds accordingly 420 | lower_bound = np.zeros(batch_size) 421 | CONST = np.ones(batch_size) * self.initial_const 422 | upper_bound = np.ones(batch_size) * 1e10 423 | 424 | # the best l2, score, and image attack 425 | o_bestl2 = [1e10] * batch_size 426 | o_bestscore = [-1] * batch_size 427 | o_bestattack = [np.zeros(imgs[0].shape)] * batch_size 428 | # o_bestattack = np.copy(imgs) 429 | 430 | for outer_step in range(self.BINARY_SEARCH_STEPS): 431 | # print(o_bestl2) 432 | # completely reset adam's internal state. 433 | self.sess.run(self.init) 434 | batch = imgs[:batch_size] 435 | batchlab = labs[:batch_size] 436 | 437 | bestl2 = [1e10] * batch_size 438 | bestscore = [-1] * batch_size 439 | 440 | # The last iteration (if we run many steps) repeat the search once. 441 | if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS - 1: 442 | CONST = upper_bound 443 | 444 | # set the variables so that we don't have to send them over again 445 | self.sess.run(self.setup, {self.assign_timg: batch, 446 | self.assign_tlab: batchlab, 447 | self.assign_const: CONST}) 448 | 449 | # get clean logits of clean samples: 450 | c_logits = self.sess.run([self.output], feed_dict={K.learning_phase(): 0}) 451 | 452 | prev = 1e6 453 | for iteration in range(self.MAX_ITERATIONS): 454 | # perform the attack 455 | _, l, l2s, scores, nimg = self.sess.run([self.train, self.loss, 456 | self.l2dist, self.output, 457 | self.newimg], feed_dict={K.learning_phase(): 0, 458 | self.clean_logits: c_logits}) 459 | 460 | # print out the losses every 10% 461 | # if iteration % (self.MAX_ITERATIONS // 10) == 0: 462 | # print(iteration, self.sess.run((self.loss, self.loss1, self.loss2, self.grads, self.max_mod), feed_dict={K.learning_phase(): 0})) 463 | 464 | # check if we should abort search if we're getting nowhere. 465 | if self.ABORT_EARLY and iteration % (self.MAX_ITERATIONS // 10) == 0: 466 | if l > prev * .9999: 467 | break 468 | prev = l 469 | 470 | # adjust the best result found so far 471 | for e, (l2, sc, ii) in enumerate(zip(l2s, scores, nimg)): 472 | if l2 < bestl2[e] and compare(sc, np.argmax(batchlab[e])): 473 | bestl2[e] = l2 474 | bestscore[e] = np.argmax(sc) 475 | if l2 < o_bestl2[e] and compare(sc, np.argmax(batchlab[e])): 476 | # print('l2:', l2, 'bestl2[e]: ', bestl2[e]) 477 | # print('score:', np.argmax(sc), 'bestscore[e]:', bestscore[e]) 478 | # print('np.argmax(batchlab[e]):', np.argmax(batchlab[e])) 479 | o_bestl2[e] = l2 480 | o_bestscore[e] = np.argmax(sc) 481 | o_bestattack[e] = ii 482 | 483 | # adjust the constant as needed 484 | for e in range(batch_size): 485 | if compare(bestscore[e], np.argmax(batchlab[e])) and bestscore[e] != -1: 486 | # success, divide const by two 487 | upper_bound[e] = min(upper_bound[e], CONST[e]) 488 | if upper_bound[e] < 1e9: 489 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 490 | else: 491 | # failure, either multiply by 10 if no solution found yet 492 | # or do binary search with the known upper bound 493 | lower_bound[e] = max(lower_bound[e], CONST[e]) 494 | if upper_bound[e] < 1e9: 495 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 496 | else: 497 | CONST[e] *= 10 498 | 499 | # return the best solution found 500 | o_bestl2 = np.array(o_bestl2) 501 | print('sucess rate: %.4f' % (1-np.sum(o_bestl2==1e10)/self.batch_size)) 502 | return o_bestattack 503 | -------------------------------------------------------------------------------- /detect_adv_examples.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import os 5 | import argparse 6 | import numpy as np 7 | from sklearn.preprocessing import scale, MinMaxScaler, StandardScaler 8 | from sklearn.metrics import accuracy_score, precision_score, recall_score 9 | from util import (random_split, block_split, train_lr, compute_roc) 10 | 11 | DATASETS = ['mnist', 'cifar', 'svhn'] 12 | ATTACKS = ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2'] 13 | CHARACTERISTICS = ['kd', 'bu', 'lid'] 14 | PATH_DATA = "data/" 15 | PATH_IMAGES = "plots/" 16 | 17 | def load_characteristics(dataset, attack, characteristics): 18 | """ 19 | Load multiple characteristics for one dataset and one attack. 20 | :param dataset: 21 | :param attack: 22 | :param characteristics: 23 | :return: 24 | """ 25 | X, Y = None, None 26 | for characteristic in characteristics: 27 | # print(" -- %s" % characteristics) 28 | file_name = os.path.join(PATH_DATA, "%s_%s_%s.npy" % (characteristic, dataset, attack)) 29 | data = np.load(file_name) 30 | if X is None: 31 | X = data[:, :-1] 32 | else: 33 | X = np.concatenate((X, data[:, :-1]), axis=1) 34 | if Y is None: 35 | Y = data[:, -1] # labels only need to load once 36 | 37 | return X, Y 38 | 39 | def detect(args): 40 | assert args.dataset in DATASETS, \ 41 | "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" 42 | assert args.attack in ATTACKS, \ 43 | "Train attack must be either 'fgsm', 'bim-a', 'bim-b', " \ 44 | "'jsma', 'cw-l2'" 45 | assert args.test_attack in ATTACKS, \ 46 | "Test attack must be either 'fgsm', 'bim-a', 'bim-b', " \ 47 | "'jsma', 'cw-l2'" 48 | characteristics = args.characteristics.split(',') 49 | for char in characteristics: 50 | assert char in CHARACTERISTICS, \ 51 | "Characteristic(s) to use 'kd', 'bu', 'lid'" 52 | 53 | print("Loading train attack: %s" % args.attack) 54 | X, Y = load_characteristics(args.dataset, args.attack, characteristics) 55 | 56 | # standarization 57 | scaler = MinMaxScaler().fit(X) 58 | X = scaler.transform(X) 59 | # X = scale(X) # Z-norm 60 | 61 | # test attack is the same as training attack 62 | X_train, Y_train, X_test, Y_test = block_split(X, Y) 63 | if args.test_attack != args.attack: 64 | # test attack is a different attack 65 | print("Loading test attack: %s" % args.test_attack) 66 | X_test, Y_test = load_characteristics(args.dataset, args.test_attack, characteristics) 67 | _, _, X_test, Y_test = block_split(X_test, Y_test) 68 | 69 | # apply training normalizer 70 | X_test = scaler.transform(X_test) 71 | # X_test = scale(X_test) # Z-norm 72 | 73 | print("Train data size: ", X_train.shape) 74 | print("Test data size: ", X_test.shape) 75 | 76 | 77 | ## Build detector 78 | print("LR Detector on [dataset: %s, train_attack: %s, test_attack: %s] with:" % 79 | (args.dataset, args.attack, args.test_attack)) 80 | lr = train_lr(X_train, Y_train) 81 | 82 | ## Evaluate detector 83 | y_pred = lr.predict_proba(X_test)[:, 1] 84 | y_label_pred = lr.predict(X_test) 85 | 86 | # AUC 87 | _, _, auc_score = compute_roc(Y_test, y_pred, plot=False) 88 | precision = precision_score(Y_test, y_label_pred) 89 | recall = recall_score(Y_test, y_label_pred) 90 | 91 | y_label_pred = lr.predict(X_test) 92 | acc = accuracy_score(Y_test, y_label_pred) 93 | print('Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall: %.4f' % (auc_score, acc, precision, recall)) 94 | 95 | return lr, auc_score, scaler 96 | 97 | if __name__ == "__main__": 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument( 100 | '-d', '--dataset', 101 | help="Dataset to use; either 'mnist', 'cifar' or 'svhn'", 102 | required=True, type=str 103 | ) 104 | parser.add_argument( 105 | '-a', '--attack', 106 | help="Attack to use train the discriminator; either 'fgsm', 'bim-a', 'bim-b', 'jsma' 'cw-l2'", 107 | required=True, type=str 108 | ) 109 | parser.add_argument( 110 | '-r', '--characteristics', 111 | help="Characteristic(s) to use any combination in ['kd', 'bu', 'lid'] " 112 | "separated by comma, for example: kd,bu", 113 | required=True, type=str 114 | ) 115 | parser.add_argument( 116 | '-t', '--test_attack', 117 | help="Characteristic(s) to cross-test the discriminator.", 118 | required=False, type=str 119 | ) 120 | parser.add_argument( 121 | '-b', '--batch_size', 122 | help="The batch size to use for training.", 123 | required=False, type=int 124 | ) 125 | parser.set_defaults(batch_size=100) 126 | parser.set_defaults(test_attack=None) 127 | args = parser.parse_args() 128 | detect(args) 129 | -------------------------------------------------------------------------------- /extract_characteristics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import os 5 | import argparse 6 | import warnings 7 | import numpy as np 8 | from sklearn.neighbors import KernelDensity 9 | from keras.models import load_model 10 | 11 | from util import (get_data, get_noisy_samples, get_mc_predictions, 12 | get_deep_representations, score_samples, normalize, 13 | get_lids_random_batch, get_kmeans_random_batch) 14 | 15 | # In the original paper, the author used optimal KDE bandwidths dataset-wise 16 | # that were determined from CV tuning 17 | BANDWIDTHS = {'mnist': 3.7926, 'cifar': 0.26, 'svhn': 1.00} 18 | 19 | # Here we further tune bandwidth for each of the 10 classes in mnist, cifar and svhn 20 | # Run tune_kernal_density.py to get the following settings. 21 | # BANDWIDTHS = {'mnist': [0.2637, 0.1274, 0.2637, 0.2637, 0.2637, 0.2637, 0.2637, 0.2069, 0.3360, 0.2637], 22 | # 'cifar': [0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000], 23 | # 'svhn': [0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1274, 0.1000, 0.1000]} 24 | 25 | PATH_DATA = "data/" 26 | PATH_IMAGES = "plots/" 27 | 28 | def merge_and_generate_labels(X_pos, X_neg): 29 | """ 30 | merge positve and nagative artifact and generate labels 31 | :param X_pos: positive samples 32 | :param X_neg: negative samples 33 | :return: X: merged samples, 2D ndarray 34 | y: generated labels (0/1): 2D ndarray same size as X 35 | """ 36 | X_pos = np.asarray(X_pos, dtype=np.float32) 37 | print("X_pos: ", X_pos.shape) 38 | X_pos = X_pos.reshape((X_pos.shape[0], -1)) 39 | 40 | X_neg = np.asarray(X_neg, dtype=np.float32) 41 | print("X_neg: ", X_neg.shape) 42 | X_neg = X_neg.reshape((X_neg.shape[0], -1)) 43 | 44 | X = np.concatenate((X_pos, X_neg)) 45 | y = np.concatenate((np.ones(X_pos.shape[0]), np.zeros(X_neg.shape[0]))) 46 | y = y.reshape((X.shape[0], 1)) 47 | 48 | return X, y 49 | 50 | 51 | def get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv): 52 | """ 53 | Get kernel density scores 54 | :param model: 55 | :param X_train: 56 | :param Y_train: 57 | :param X_test: 58 | :param X_test_noisy: 59 | :param X_test_adv: 60 | :return: artifacts: positive and negative examples with kd values, 61 | labels: adversarial (label: 1) and normal/noisy (label: 0) examples 62 | """ 63 | # Get deep feature representations 64 | print('Getting deep feature representations...') 65 | X_train_features = get_deep_representations(model, X_train, 66 | batch_size=args.batch_size) 67 | X_test_normal_features = get_deep_representations(model, X_test, 68 | batch_size=args.batch_size) 69 | X_test_noisy_features = get_deep_representations(model, X_test_noisy, 70 | batch_size=args.batch_size) 71 | X_test_adv_features = get_deep_representations(model, X_test_adv, 72 | batch_size=args.batch_size) 73 | # Train one KDE per class 74 | print('Training KDEs...') 75 | class_inds = {} 76 | for i in range(Y_train.shape[1]): 77 | class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0] 78 | kdes = {} 79 | warnings.warn("Using pre-set kernel bandwidths that were determined " 80 | "optimal for the specific CNN models of the paper. If you've " 81 | "changed your model, you'll need to re-optimize the " 82 | "bandwidth.") 83 | print('bandwidth %.4f for %s' % (BANDWIDTHS[args.dataset], args.dataset)) 84 | for i in range(Y_train.shape[1]): 85 | kdes[i] = KernelDensity(kernel='gaussian', 86 | bandwidth=BANDWIDTHS[args.dataset]) \ 87 | .fit(X_train_features[class_inds[i]]) 88 | # Get model predictions 89 | print('Computing model predictions...') 90 | preds_test_normal = model.predict_classes(X_test, verbose=0, 91 | batch_size=args.batch_size) 92 | preds_test_noisy = model.predict_classes(X_test_noisy, verbose=0, 93 | batch_size=args.batch_size) 94 | preds_test_adv = model.predict_classes(X_test_adv, verbose=0, 95 | batch_size=args.batch_size) 96 | # Get density estimates 97 | print('computing densities...') 98 | densities_normal = score_samples( 99 | kdes, 100 | X_test_normal_features, 101 | preds_test_normal 102 | ) 103 | densities_noisy = score_samples( 104 | kdes, 105 | X_test_noisy_features, 106 | preds_test_noisy 107 | ) 108 | densities_adv = score_samples( 109 | kdes, 110 | X_test_adv_features, 111 | preds_test_adv 112 | ) 113 | 114 | print("densities_normal:", densities_normal.shape) 115 | print("densities_adv:", densities_adv.shape) 116 | print("densities_noisy:", densities_noisy.shape) 117 | 118 | ## skip the normalization, you may want to try different normalizations later 119 | ## so at this step, just save the raw values 120 | # densities_normal_z, densities_adv_z, densities_noisy_z = normalize( 121 | # densities_normal, 122 | # densities_adv, 123 | # densities_noisy 124 | # ) 125 | 126 | densities_pos = densities_adv 127 | densities_neg = np.concatenate((densities_normal, densities_noisy)) 128 | artifacts, labels = merge_and_generate_labels(densities_pos, densities_neg) 129 | 130 | return artifacts, labels 131 | 132 | def get_bu(model, X_test, X_test_noisy, X_test_adv): 133 | """ 134 | Get Bayesian uncertainty scores 135 | :param model: 136 | :param X_train: 137 | :param Y_train: 138 | :param X_test: 139 | :param X_test_noisy: 140 | :param X_test_adv: 141 | :return: artifacts: positive and negative examples with bu values, 142 | labels: adversarial (label: 1) and normal/noisy (label: 0) examples 143 | """ 144 | print('Getting Monte Carlo dropout variance predictions...') 145 | uncerts_normal = get_mc_predictions(model, X_test, 146 | batch_size=args.batch_size) \ 147 | .var(axis=0).mean(axis=1) 148 | uncerts_noisy = get_mc_predictions(model, X_test_noisy, 149 | batch_size=args.batch_size) \ 150 | .var(axis=0).mean(axis=1) 151 | uncerts_adv = get_mc_predictions(model, X_test_adv, 152 | batch_size=args.batch_size) \ 153 | .var(axis=0).mean(axis=1) 154 | 155 | print("uncerts_normal:", uncerts_normal.shape) 156 | print("uncerts_noisy:", uncerts_noisy.shape) 157 | print("uncerts_adv:", uncerts_adv.shape) 158 | 159 | ## skip the normalization, you may want to try different normalizations later 160 | ## so at this step, just save the raw values 161 | # uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize( 162 | # uncerts_normal, 163 | # uncerts_adv, 164 | # uncerts_noisy 165 | # ) 166 | 167 | uncerts_pos = uncerts_adv 168 | uncerts_neg = np.concatenate((uncerts_normal, uncerts_noisy)) 169 | artifacts, labels = merge_and_generate_labels(uncerts_pos, uncerts_neg) 170 | 171 | return artifacts, labels 172 | 173 | def get_lid(model, X_test, X_test_noisy, X_test_adv, k=10, batch_size=100, dataset='mnist'): 174 | """ 175 | Get local intrinsic dimensionality 176 | :param model: 177 | :param X_train: 178 | :param Y_train: 179 | :param X_test: 180 | :param X_test_noisy: 181 | :param X_test_adv: 182 | :return: artifacts: positive and negative examples with lid values, 183 | labels: adversarial (label: 1) and normal/noisy (label: 0) examples 184 | """ 185 | print('Extract local intrinsic dimensionality: k = %s' % k) 186 | lids_normal, lids_noisy, lids_adv = get_lids_random_batch(model, X_test, X_test_noisy, 187 | X_test_adv, dataset, k, batch_size) 188 | print("lids_normal:", lids_normal.shape) 189 | print("lids_noisy:", lids_noisy.shape) 190 | print("lids_adv:", lids_adv.shape) 191 | 192 | ## skip the normalization, you may want to try different normalizations later 193 | ## so at this step, just save the raw values 194 | # lids_normal_z, lids_adv_z, lids_noisy_z = normalize( 195 | # lids_normal, 196 | # lids_adv, 197 | # lids_noisy 198 | # ) 199 | 200 | lids_pos = lids_adv 201 | lids_neg = np.concatenate((lids_normal, lids_noisy)) 202 | artifacts, labels = merge_and_generate_labels(lids_pos, lids_neg) 203 | 204 | return artifacts, labels 205 | 206 | def get_kmeans(model, X_test, X_test_noisy, X_test_adv, k=10, batch_size=100, dataset='mnist'): 207 | """ 208 | Calculate the average distance to k nearest neighbours as a feature. 209 | This is used to compare density vs LID. Why density doesn't work? 210 | :param model: 211 | :param X_train: 212 | :param Y_train: 213 | :param X_test: 214 | :param X_test_noisy: 215 | :param X_test_adv: 216 | :return: artifacts: positive and negative examples with lid values, 217 | labels: adversarial (label: 1) and normal/noisy (label: 0) examples 218 | """ 219 | print('Extract k means feature: k = %s' % k) 220 | kms_normal, kms_noisy, kms_adv = get_kmeans_random_batch(model, X_test, X_test_noisy, 221 | X_test_adv, dataset, k, batch_size, 222 | pca=True) 223 | print("kms_normal:", kms_normal.shape) 224 | print("kms_noisy:", kms_noisy.shape) 225 | print("kms_adv:", kms_adv.shape) 226 | 227 | ## skip the normalization, you may want to try different normalizations later 228 | ## so at this step, just save the raw values 229 | # kms_normal_z, kms_noisy_z, kms_adv_z = normalize( 230 | # kms_normal, 231 | # kms_noisy, 232 | # kms_adv 233 | # ) 234 | 235 | kms_pos = kms_adv 236 | kms_neg = np.concatenate((kms_normal, kms_noisy)) 237 | artifacts, labels = merge_and_generate_labels(kms_pos, kms_neg) 238 | 239 | return artifacts, labels 240 | 241 | def main(args): 242 | assert args.dataset in ['mnist', 'cifar', 'svhn'], \ 243 | "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" 244 | assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all'], \ 245 | "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ 246 | "'jsma' or 'cw-l2'" 247 | assert args.characteristic in ['kd', 'bu', 'lid', 'km', 'all'], \ 248 | "Characteristic(s) to use 'kd', 'bu', 'lid', 'km', 'all'" 249 | model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset) 250 | assert os.path.isfile(model_file), \ 251 | 'model file not found... must first train model using train_model.py.' 252 | adv_file = os.path.join(PATH_DATA, "Adv_%s_%s.npy" % (args.dataset, args.attack)) 253 | assert os.path.isfile(adv_file), \ 254 | 'adversarial sample file not found... must first craft adversarial ' \ 255 | 'samples using craft_adv_samples.py' 256 | 257 | print('Loading the data and model...') 258 | # Load the model 259 | model = load_model(model_file) 260 | # Load the dataset 261 | X_train, Y_train, X_test, Y_test = get_data(args.dataset) 262 | # Check attack type, select adversarial and noisy samples accordingly 263 | print('Loading noisy and adversarial samples...') 264 | if args.attack == 'all': 265 | # TODO: implement 'all' option 266 | # X_test_adv = ... 267 | # X_test_noisy = ... 268 | raise NotImplementedError("'All' types detector not yet implemented.") 269 | else: 270 | # Load adversarial samples 271 | X_test_adv = np.load(adv_file) 272 | print("X_test_adv: ", X_test_adv.shape) 273 | 274 | # as there are some parameters to tune for noisy example, so put the generation 275 | # step here instead of the adversarial step which can take many hours 276 | noisy_file = os.path.join(PATH_DATA, 'Noisy_%s_%s.npy' % (args.dataset, args.attack)) 277 | if os.path.isfile(noisy_file): 278 | X_test_noisy = np.load(noisy_file) 279 | else: 280 | # Craft an equal number of noisy samples 281 | print('Crafting %s noisy samples. ' % args.dataset) 282 | X_test_noisy = get_noisy_samples(X_test, X_test_adv, args.dataset, args.attack) 283 | np.save(noisy_file, X_test_noisy) 284 | 285 | # Check model accuracies on each sample type 286 | for s_type, dataset in zip(['normal', 'noisy', 'adversarial'], 287 | [X_test, X_test_noisy, X_test_adv]): 288 | _, acc = model.evaluate(dataset, Y_test, batch_size=args.batch_size, 289 | verbose=0) 290 | print("Model accuracy on the %s test set: %0.2f%%" % 291 | (s_type, 100 * acc)) 292 | # Compute and display average perturbation sizes 293 | if not s_type == 'normal': 294 | l2_diff = np.linalg.norm( 295 | dataset.reshape((len(X_test), -1)) - 296 | X_test.reshape((len(X_test), -1)), 297 | axis=1 298 | ).mean() 299 | print("Average L-2 perturbation size of the %s test set: %0.2f" % 300 | (s_type, l2_diff)) 301 | 302 | # Refine the normal, noisy and adversarial sets to only include samples for 303 | # which the original version was correctly classified by the model 304 | preds_test = model.predict_classes(X_test, verbose=0, 305 | batch_size=args.batch_size) 306 | inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0] 307 | print("Number of correctly predict images: %s" % (len(inds_correct))) 308 | 309 | X_test = X_test[inds_correct] 310 | X_test_noisy = X_test_noisy[inds_correct] 311 | X_test_adv = X_test_adv[inds_correct] 312 | print("X_test: ", X_test.shape) 313 | print("X_test_noisy: ", X_test_noisy.shape) 314 | print("X_test_adv: ", X_test_adv.shape) 315 | 316 | if args.characteristic == 'kd': 317 | # extract kernel density 318 | characteristics, labels = get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv) 319 | print("KD: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape) 320 | 321 | # save to file 322 | bandwidth = BANDWIDTHS[args.dataset] 323 | file_name = os.path.join(PATH_DATA, 'kd_%s_%s_%.4f.npy' % (args.dataset, args.attack, bandwidth)) 324 | data = np.concatenate((characteristics, labels), axis=1) 325 | np.save(file_name, data) 326 | elif args.characteristic == 'bu': 327 | # extract Bayesian uncertainty 328 | characteristics, labels = get_bu(model, X_test, X_test_noisy, X_test_adv) 329 | print("BU: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape) 330 | 331 | # save to file 332 | file_name = os.path.join(PATH_DATA, 'bu_%s_%s.npy' % (args.dataset, args.attack)) 333 | data = np.concatenate((characteristics, labels), axis=1) 334 | np.save(file_name, data) 335 | elif args.characteristic == 'lid': 336 | # extract local intrinsic dimensionality 337 | characteristics, labels = get_lid(model, X_test, X_test_noisy, X_test_adv, 338 | args.k_nearest, args.batch_size, args.dataset) 339 | print("LID: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape) 340 | 341 | # save to file 342 | # file_name = os.path.join(PATH_DATA, 'lid_%s_%s.npy' % (args.dataset, args.attack)) 343 | file_name = os.path.join('../data_grid_search/lid_large_batch/', 'lid_%s_%s_%s.npy' % 344 | (args.dataset, args.attack, args.k_nearest)) 345 | 346 | data = np.concatenate((characteristics, labels), axis=1) 347 | np.save(file_name, data) 348 | elif args.characteristic == 'km': 349 | # extract k means distance 350 | characteristics, labels = get_kmeans(model, X_test, X_test_noisy, X_test_adv, 351 | args.k_nearest, args.batch_size, args.dataset) 352 | print("K-Mean: [characteristic shape: ", characteristics.shape, ", label shape: ", labels.shape) 353 | 354 | # save to file 355 | file_name = os.path.join(PATH_DATA, 'km_pca_%s_%s.npy' % (args.dataset, args.attack)) 356 | data = np.concatenate((characteristics, labels), axis=1) 357 | np.save(file_name, data) 358 | elif args.characteristic == 'all': 359 | # extract kernel density 360 | characteristics, labels = get_kd(model, X_train, Y_train, X_test, X_test_noisy, X_test_adv) 361 | file_name = os.path.join(PATH_DATA, 'kd_%s_%s.npy' % (args.dataset, args.attack)) 362 | data = np.concatenate((characteristics, labels), axis=1) 363 | np.save(file_name, data) 364 | 365 | # extract Bayesian uncertainty 366 | characteristics, labels = get_bu(model, X_test, X_test_noisy, X_test_adv) 367 | file_name = os.path.join(PATH_DATA, 'bu_%s_%s.npy' % (args.dataset, args.attack)) 368 | data = np.concatenate((characteristics, labels), axis=1) 369 | np.save(file_name, data) 370 | 371 | # extract local intrinsic dimensionality 372 | characteristics, labels = get_lid(model, X_test, X_test_noisy, X_test_adv, 373 | args.k_nearest, args.batch_size, args.dataset) 374 | file_name = os.path.join(PATH_DATA, 'lid_%s_%s.npy' % (args.dataset, args.attack)) 375 | data = np.concatenate((characteristics, labels), axis=1) 376 | np.save(file_name, data) 377 | 378 | # extract k means distance 379 | # artifcharacteristics, labels = get_kmeans(model, X_test, X_test_noisy, X_test_adv, 380 | # args.k_nearest, args.batch_size, args.dataset) 381 | # file_name = os.path.join(PATH_DATA, 'km_%s_%s.npy' % (args.dataset, args.attack)) 382 | # data = np.concatenate((characteristics, labels), axis=1) 383 | # np.save(file_name, data) 384 | 385 | 386 | if __name__ == "__main__": 387 | parser = argparse.ArgumentParser() 388 | parser.add_argument( 389 | '-d', '--dataset', 390 | help="Dataset to use; either 'mnist', 'cifar' or 'svhn'", 391 | required=True, type=str 392 | ) 393 | parser.add_argument( 394 | '-a', '--attack', 395 | help="Attack to use; either 'fgsm', 'jsma', 'bim-b', 'jsma', 'cw-l2' " 396 | "or 'all'", 397 | required=True, type=str 398 | ) 399 | parser.add_argument( 400 | '-r', '--characteristic', 401 | help="Characteristic(s) to use 'kd', 'bu', 'lid' 'km' or 'all'", 402 | required=True, type=str 403 | ) 404 | parser.add_argument( 405 | '-k', '--k_nearest', 406 | help="The number of nearest neighbours to use; either 10, 20, 100 ", 407 | required=False, type=int 408 | ) 409 | parser.add_argument( 410 | '-b', '--batch_size', 411 | help="The batch size to use for training.", 412 | required=False, type=int 413 | ) 414 | parser.set_defaults(batch_size=100) 415 | parser.set_defaults(k_nearest=20) 416 | args = parser.parse_args() 417 | main(args) 418 | -------------------------------------------------------------------------------- /train_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import argparse 5 | from util import get_data, get_model, cross_entropy 6 | from keras.preprocessing.image import ImageDataGenerator 7 | import tensorflow as tf 8 | 9 | 10 | def train(dataset='mnist', batch_size=128, epochs=50): 11 | """ 12 | Train one model with data augmentation: random padding+cropping and horizontal flip 13 | :param args: 14 | :return: 15 | """ 16 | print('Data set: %s' % dataset) 17 | X_train, Y_train, X_test, Y_test = get_data(dataset) 18 | model = get_model(dataset) 19 | model.compile( 20 | loss='categorical_crossentropy', 21 | optimizer='adadelta', 22 | metrics=['accuracy'] 23 | ) 24 | 25 | # # training without data augmentation 26 | # model.fit( 27 | # X_train, Y_train, 28 | # epochs=epochs, 29 | # batch_size=batch_size, 30 | # shuffle=True, 31 | # verbose=1, 32 | # validation_data=(X_test, Y_test) 33 | # ) 34 | 35 | # training with data augmentation 36 | # data augmentation 37 | datagen = ImageDataGenerator( 38 | rotation_range=20, 39 | width_shift_range=0.2, 40 | height_shift_range=0.2, 41 | horizontal_flip=True) 42 | 43 | model.fit_generator( 44 | datagen.flow(X_train, Y_train, batch_size=batch_size), 45 | steps_per_epoch=len(X_train) / batch_size, 46 | epochs=epochs, 47 | verbose=1, 48 | validation_data=(X_test, Y_test)) 49 | 50 | model.save('data/model_%s.h5' % dataset) 51 | 52 | def main(args): 53 | """ 54 | Train model with data augmentation: random padding+cropping and horizontal flip 55 | :param args: 56 | :return: 57 | """ 58 | assert args.dataset in ['mnist', 'cifar', 'svhn', 'all'], \ 59 | "dataset parameter must be either 'mnist', 'cifar', 'svhn' or all" 60 | if args.dataset == 'all': 61 | for dataset in ['mnist', 'cifar', 'svhn']: 62 | train(dataset, args.batch_size, args.epochs) 63 | else: 64 | train(args.dataset, args.batch_size, args.epochs) 65 | 66 | 67 | if __name__ == "__main__": 68 | parser = argparse.ArgumentParser() 69 | parser.add_argument( 70 | '-d', '--dataset', 71 | help="Dataset to use; either 'mnist', 'cifar', 'svhn' or 'all'", 72 | required=True, type=str 73 | ) 74 | parser.add_argument( 75 | '-e', '--epochs', 76 | help="The number of epochs to train for.", 77 | required=False, type=int 78 | ) 79 | parser.add_argument( 80 | '-b', '--batch_size', 81 | help="The batch size to use for training.", 82 | required=False, type=int 83 | ) 84 | parser.set_defaults(epochs=120) 85 | parser.set_defaults(batch_size=100) 86 | args = parser.parse_args() 87 | main(args) 88 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import print_function 3 | 4 | import os 5 | import multiprocessing as mp 6 | from subprocess import call 7 | import warnings 8 | import numpy as np 9 | import scipy.io as sio 10 | from tqdm import tqdm 11 | import matplotlib.pyplot as plt 12 | from sklearn.metrics import roc_curve, auc, roc_auc_score 13 | from sklearn.linear_model import LogisticRegressionCV 14 | from sklearn.preprocessing import scale 15 | import keras.backend as K 16 | from keras.datasets import mnist, cifar10 17 | from keras.utils import np_utils 18 | from keras.models import Sequential 19 | from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization 20 | from keras.layers import Conv2D, MaxPooling2D 21 | from keras.regularizers import l2 22 | import tensorflow as tf 23 | from scipy.spatial.distance import pdist, cdist, squareform 24 | from keras import regularizers 25 | from sklearn.decomposition import PCA 26 | 27 | # Gaussian noise scale sizes that were determined so that the average 28 | # L-2 perturbation size is equal to that of the adversarial samples 29 | # mnist roughly L2_difference/20 30 | # cifar roughly L2_difference/54 31 | # svhn roughly L2_difference/60 32 | # be very carefully with these settings, tune to have noisy/adv have the same L2-norm 33 | # otherwise artifact will lose its accuracy 34 | # STDEVS = { 35 | # 'mnist': {'fgsm': 0.264, 'bim-a': 0.111, 'bim-b': 0.184, 'cw-l2': 0.588}, 36 | # 'cifar': {'fgsm': 0.0504, 'bim-a': 0.0087, 'bim-b': 0.0439, 'cw-l2': 0.015}, 37 | # 'svhn': {'fgsm': 0.1332, 'bim-a': 0.015, 'bim-b': 0.1024, 'cw-l2': 0.0379} 38 | # } 39 | 40 | # fined tuned again when retrained all models with X in [-0.5, 0.5] 41 | STDEVS = { 42 | 'mnist': {'fgsm': 0.271, 'bim-a': 0.111, 'bim-b': 0.167, 'cw-l2': 0.207}, 43 | 'cifar': {'fgsm': 0.0504, 'bim-a': 0.0084, 'bim-b': 0.0428, 'cw-l2': 0.007}, 44 | 'svhn': {'fgsm': 0.133, 'bim-a': 0.0155, 'bim-b': 0.095, 'cw-l2': 0.008} 45 | } 46 | 47 | # CLIP_MIN = 0.0 48 | # CLIP_MAX = 1.0 49 | CLIP_MIN = -0.5 50 | CLIP_MAX = 0.5 51 | PATH_DATA = "data/" 52 | 53 | # Set random seed 54 | np.random.seed(0) 55 | 56 | 57 | def get_data(dataset='mnist'): 58 | """ 59 | images in [-0.5, 0.5] (instead of [0, 1]) which suits C&W attack and generally gives better performance 60 | 61 | :param dataset: 62 | :return: 63 | """ 64 | assert dataset in ['mnist', 'cifar', 'svhn'], \ 65 | "dataset parameter must be either 'mnist' 'cifar' or 'svhn'" 66 | if dataset == 'mnist': 67 | # the data, shuffled and split between train and test sets 68 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 69 | # reshape to (n_samples, 28, 28, 1) 70 | X_train = X_train.reshape(-1, 28, 28, 1) 71 | X_test = X_test.reshape(-1, 28, 28, 1) 72 | elif dataset == 'cifar': 73 | # the data, shuffled and split between train and test sets 74 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 75 | else: 76 | if not os.path.isfile(os.path.join(PATH_DATA, "svhn_train.mat")): 77 | print('Downloading SVHN train set...') 78 | call( 79 | "curl -o ../data/svhn_train.mat " 80 | "http://ufldl.stanford.edu/housenumbers/train_32x32.mat", 81 | shell=True 82 | ) 83 | if not os.path.isfile(os.path.join(PATH_DATA, "svhn_test.mat")): 84 | print('Downloading SVHN test set...') 85 | call( 86 | "curl -o ../data/svhn_test.mat " 87 | "http://ufldl.stanford.edu/housenumbers/test_32x32.mat", 88 | shell=True 89 | ) 90 | train = sio.loadmat(os.path.join(PATH_DATA,'svhn_train.mat')) 91 | test = sio.loadmat(os.path.join(PATH_DATA, 'svhn_test.mat')) 92 | X_train = np.transpose(train['X'], axes=[3, 0, 1, 2]) 93 | X_test = np.transpose(test['X'], axes=[3, 0, 1, 2]) 94 | # reshape (n_samples, 1) to (n_samples,) and change 1-index 95 | # to 0-index 96 | y_train = np.reshape(train['y'], (-1,)) - 1 97 | y_test = np.reshape(test['y'], (-1,)) - 1 98 | 99 | # cast pixels to floats, normalize to [0, 1] range 100 | X_train = X_train.astype('float32') 101 | X_test = X_test.astype('float32') 102 | X_train = (X_train/255.0) - (1.0 - CLIP_MAX) 103 | X_test = (X_test/255.0) - (1.0 - CLIP_MAX) 104 | 105 | # one-hot-encode the labels 106 | Y_train = np_utils.to_categorical(y_train, 10) 107 | Y_test = np_utils.to_categorical(y_test, 10) 108 | 109 | print("X_train:", X_train.shape) 110 | print("Y_train:", Y_train.shape) 111 | print("X_test:", X_test.shape) 112 | print("Y_test", Y_test.shape) 113 | 114 | return X_train, Y_train, X_test, Y_test 115 | 116 | def get_model(dataset='mnist', softmax=True): 117 | """ 118 | Takes in a parameter indicating which model type to use ('mnist', 119 | 'cifar' or 'svhn') and returns the appropriate Keras model. 120 | :param dataset: A string indicating which dataset we are building 121 | a model for. 122 | :param softmax: if add softmax to the last layer. 123 | :return: The model; a Keras 'Sequential' instance. 124 | """ 125 | assert dataset in ['mnist', 'cifar', 'svhn'], \ 126 | "dataset parameter must be either 'mnist' 'cifar' or 'svhn'" 127 | if dataset == 'mnist': 128 | # MNIST model: 0, 2, 7, 10 129 | layers = [ 130 | Conv2D(64, (3, 3), padding='valid', input_shape=(28, 28, 1)), # 0 131 | Activation('relu'), # 1 132 | BatchNormalization(), # 2 133 | Conv2D(64, (3, 3)), # 3 134 | Activation('relu'), # 4 135 | BatchNormalization(), # 5 136 | MaxPooling2D(pool_size=(2, 2)), # 6 137 | Dropout(0.5), # 7 138 | Flatten(), # 8 139 | Dense(128), # 9 140 | Activation('relu'), # 10 141 | BatchNormalization(), # 11 142 | Dropout(0.5), # 12 143 | Dense(10), # 13 144 | ] 145 | elif dataset == 'cifar': 146 | # CIFAR-10 model 147 | layers = [ 148 | Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3)), # 0 149 | Activation('relu'), # 1 150 | BatchNormalization(), # 2 151 | Conv2D(32, (3, 3), padding='same'), # 3 152 | Activation('relu'), # 4 153 | BatchNormalization(), # 5 154 | MaxPooling2D(pool_size=(2, 2)), # 6 155 | 156 | Conv2D(64, (3, 3), padding='same'), # 7 157 | Activation('relu'), # 8 158 | BatchNormalization(), # 9 159 | Conv2D(64, (3, 3), padding='same'), # 10 160 | Activation('relu'), # 11 161 | BatchNormalization(), # 12 162 | MaxPooling2D(pool_size=(2, 2)), # 13 163 | 164 | Conv2D(128, (3, 3), padding='same'), # 14 165 | Activation('relu'), # 15 166 | BatchNormalization(), # 16 167 | Conv2D(128, (3, 3), padding='same'), # 17 168 | Activation('relu'), # 18 169 | BatchNormalization(), # 19 170 | MaxPooling2D(pool_size=(2, 2)), # 20 171 | 172 | Flatten(), # 21 173 | Dropout(0.5), # 22 174 | 175 | Dense(1024, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)), # 23 176 | Activation('relu'), # 24 177 | BatchNormalization(), # 25 178 | Dropout(0.5), # 26 179 | Dense(512, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)), # 27 180 | Activation('relu'), # 28 181 | BatchNormalization(), # 29 182 | Dropout(0.5), # 30 183 | Dense(10), # 31 184 | ] 185 | else: 186 | # SVHN model 187 | layers = [ 188 | Conv2D(64, (3, 3), padding='valid', input_shape=(32, 32, 3)), # 0 189 | Activation('relu'), # 1 190 | BatchNormalization(), # 2 191 | Conv2D(64, (3, 3)), # 3 192 | Activation('relu'), # 4 193 | BatchNormalization(), # 5 194 | MaxPooling2D(pool_size=(2, 2)), # 6 195 | 196 | Dropout(0.5), # 7 197 | Flatten(), # 8 198 | 199 | Dense(512), # 9 200 | Activation('relu'), # 10 201 | BatchNormalization(), # 11 202 | Dropout(0.5), # 12 203 | 204 | Dense(128), # 13 205 | Activation('relu'), # 14 206 | BatchNormalization(), # 15 207 | Dropout(0.5), # 16 208 | Dense(10), # 17 209 | ] 210 | 211 | model = Sequential() 212 | for layer in layers: 213 | model.add(layer) 214 | if softmax: 215 | model.add(Activation('softmax')) 216 | 217 | return model 218 | 219 | def cross_entropy(y_true, y_pred): 220 | return tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) 221 | 222 | def lid_term(logits, batch_size=100): 223 | """Calculate LID loss term for a minibatch of logits 224 | 225 | :param logits: 226 | :return: 227 | """ 228 | # y_pred = tf.nn.softmax(logits) 229 | y_pred = logits 230 | 231 | # calculate pairwise distance 232 | r = tf.reduce_sum(tf.square(y_pred), axis=1) 233 | # turn r into column vector 234 | r = tf.reshape(r, [-1, 1]) 235 | D = r - 2 * tf.matmul(y_pred, tf.transpose(y_pred)) + tf.transpose(r) 236 | 237 | # find the k nearest neighbor 238 | D1 = tf.sqrt(D + 1e-9) 239 | D2, _ = tf.nn.top_k(-D1, k=21, sorted=True) 240 | D3 = -D2[:, 1:] 241 | 242 | m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1])) 243 | v_log = tf.reduce_sum(tf.log(m + 1e-9), axis=1) # to avoid nan 244 | lids = -20 / v_log 245 | 246 | ## batch normalize lids 247 | # lids = tf.nn.l2_normalize(lids, dim=0, epsilon=1e-12) 248 | 249 | return lids 250 | 251 | def lid_adv_term(clean_logits, adv_logits, batch_size=100): 252 | """Calculate LID loss term for a minibatch of advs logits 253 | 254 | :param logits: clean logits 255 | :param A_logits: adversarial logits 256 | :return: 257 | """ 258 | # y_pred = tf.nn.softmax(logits) 259 | c_pred = tf.reshape(clean_logits, (batch_size, -1)) 260 | a_pred = tf.reshape(adv_logits, (batch_size, -1)) 261 | 262 | # calculate pairwise distance 263 | r_a = tf.reduce_sum(tf.square(a_pred), axis=1) 264 | # turn r_a into column vector 265 | r_a = tf.reshape(r_a, [-1, 1]) 266 | 267 | r_c = tf.reduce_sum(tf.square(c_pred), axis=1) 268 | # turn r_c into row vector 269 | r_c = tf.reshape(r_c, [1, -1]) 270 | 271 | D = r_a - 2 * tf.matmul(a_pred, tf.transpose(c_pred)) + r_c 272 | 273 | # find the k nearest neighbor 274 | D1 = tf.sqrt(D + 1e-9) 275 | D2, _ = tf.nn.top_k(-D1, k=21, sorted=True) 276 | D3 = -D2[:, 1:] 277 | 278 | m = tf.transpose(tf.multiply(tf.transpose(D3), 1.0 / D3[:, -1])) 279 | v_log = tf.reduce_sum(tf.log(m + 1e-9), axis=1) # to avoid nan 280 | lids = -20 / v_log 281 | 282 | ## batch normalize lids 283 | lids = tf.nn.l2_normalize(lids, dim=0, epsilon=1e-12) 284 | 285 | return lids 286 | 287 | def flip(x, nb_diff): 288 | """ 289 | Helper function for get_noisy_samples 290 | :param x: 291 | :param nb_diff: 292 | :return: 293 | """ 294 | original_shape = x.shape 295 | x = np.copy(np.reshape(x, (-1,))) 296 | candidate_inds = np.where(x < CLIP_MAX)[0] 297 | assert candidate_inds.shape[0] >= nb_diff 298 | inds = np.random.choice(candidate_inds, nb_diff) 299 | x[inds] = CLIP_MAX 300 | 301 | return np.reshape(x, original_shape) 302 | 303 | 304 | def get_noisy_samples(X_test, X_test_adv, dataset, attack): 305 | """ 306 | TODO 307 | :param X_test: 308 | :param X_test_adv: 309 | :param dataset: 310 | :param attack: 311 | :return: 312 | """ 313 | if attack in ['jsma', 'cw-l0']: 314 | X_test_noisy = np.zeros_like(X_test) 315 | for i in range(len(X_test)): 316 | # Count the number of pixels that are different 317 | nb_diff = len(np.where(X_test[i] != X_test_adv[i])[0]) 318 | # Randomly flip an equal number of pixels (flip means move to max 319 | # value of 1) 320 | X_test_noisy[i] = flip(X_test[i], nb_diff) 321 | else: 322 | warnings.warn("Important: using pre-set Gaussian scale sizes to craft noisy " 323 | "samples. You will definitely need to manually tune the scale " 324 | "according to the L2 print below, otherwise the result " 325 | "will inaccurate. In future scale sizes will be inferred " 326 | "automatically. For now, manually tune the scales around " 327 | "mnist: L2/20.0, cifar: L2/54.0, svhn: L2/60.0") 328 | # Add Gaussian noise to the samples 329 | # print(STDEVS[dataset][attack]) 330 | X_test_noisy = np.minimum( 331 | np.maximum( 332 | X_test + np.random.normal(loc=0, scale=STDEVS[dataset][attack], 333 | size=X_test.shape), 334 | CLIP_MIN 335 | ), 336 | CLIP_MAX 337 | ) 338 | 339 | return X_test_noisy 340 | 341 | 342 | def get_mc_predictions(model, X, nb_iter=50, batch_size=256): 343 | """ 344 | TODO 345 | :param model: 346 | :param X: 347 | :param nb_iter: 348 | :param batch_size: 349 | :return: 350 | """ 351 | output_dim = model.layers[-1].output.shape[-1].value 352 | get_output = K.function( 353 | [model.layers[0].input, K.learning_phase()], 354 | [model.layers[-1].output] 355 | ) 356 | 357 | def predict(): 358 | n_batches = int(np.ceil(X.shape[0] / float(batch_size))) 359 | output = np.zeros(shape=(len(X), output_dim)) 360 | for i in range(n_batches): 361 | output[i * batch_size:(i + 1) * batch_size] = \ 362 | get_output([X[i * batch_size:(i + 1) * batch_size], 1])[0] 363 | return output 364 | 365 | preds_mc = [] 366 | for i in tqdm(range(nb_iter)): 367 | preds_mc.append(predict()) 368 | 369 | return np.asarray(preds_mc) 370 | 371 | 372 | def get_deep_representations(model, X, batch_size=256): 373 | """ 374 | TODO 375 | :param model: 376 | :param X: 377 | :param batch_size: 378 | :return: 379 | """ 380 | # last hidden layer is always at index -4 381 | output_dim = model.layers[-4].output.shape[-1].value 382 | get_encoding = K.function( 383 | [model.layers[0].input, K.learning_phase()], 384 | [model.layers[-4].output] 385 | ) 386 | 387 | n_batches = int(np.ceil(X.shape[0] / float(batch_size))) 388 | output = np.zeros(shape=(len(X), output_dim)) 389 | for i in range(n_batches): 390 | output[i * batch_size:(i + 1) * batch_size] = \ 391 | get_encoding([X[i * batch_size:(i + 1) * batch_size], 0])[0] 392 | 393 | return output 394 | 395 | def get_layer_wise_activations(model, dataset): 396 | """ 397 | Get the deep activation outputs. 398 | :param model: 399 | :param dataset: 'mnist', 'cifar', 'svhn', has different submanifolds architectures 400 | :return: 401 | """ 402 | assert dataset in ['mnist', 'cifar', 'svhn'], \ 403 | "dataset parameter must be either 'mnist' 'cifar' or 'svhn'" 404 | if dataset == 'mnist': 405 | # mnist model 406 | acts = [model.layers[0].input] 407 | acts.extend([layer.output for layer in model.layers]) 408 | elif dataset == 'cifar': 409 | # cifar-10 model 410 | acts = [model.layers[0].input] 411 | acts.extend([layer.output for layer in model.layers]) 412 | else: 413 | # svhn model 414 | acts = [model.layers[0].input] 415 | acts.extend([layer.output for layer in model.layers]) 416 | return acts 417 | 418 | # lid of a single query point x 419 | def mle_single(data, x, k=20): 420 | data = np.asarray(data, dtype=np.float32) 421 | x = np.asarray(x, dtype=np.float32) 422 | # print('x.ndim',x.ndim) 423 | if x.ndim == 1: 424 | x = x.reshape((-1, x.shape[0])) 425 | # dim = x.shape[1] 426 | 427 | k = min(k, len(data)-1) 428 | f = lambda v: - k / np.sum(np.log(v/v[-1])) 429 | a = cdist(x, data) 430 | a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1] 431 | a = np.apply_along_axis(f, axis=1, arr=a) 432 | return a[0] 433 | 434 | # lid of a batch of query points X 435 | def mle_batch(data, batch, k): 436 | data = np.asarray(data, dtype=np.float32) 437 | batch = np.asarray(batch, dtype=np.float32) 438 | 439 | k = min(k, len(data)-1) 440 | f = lambda v: - k / np.sum(np.log(v/v[-1])) 441 | a = cdist(batch, data) 442 | a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1] 443 | a = np.apply_along_axis(f, axis=1, arr=a) 444 | return a 445 | 446 | # mean distance of x to its k nearest neighbours 447 | def kmean_batch(data, batch, k): 448 | data = np.asarray(data, dtype=np.float32) 449 | batch = np.asarray(batch, dtype=np.float32) 450 | 451 | k = min(k, len(data)-1) 452 | f = lambda v: np.mean(v) 453 | a = cdist(batch, data) 454 | a = np.apply_along_axis(np.sort, axis=1, arr=a)[:,1:k+1] 455 | a = np.apply_along_axis(f, axis=1, arr=a) 456 | return a 457 | 458 | # mean distance of x to its k nearest neighbours 459 | def kmean_pca_batch(data, batch, k=10): 460 | data = np.asarray(data, dtype=np.float32) 461 | batch = np.asarray(batch, dtype=np.float32) 462 | a = np.zeros(batch.shape[0]) 463 | for i in np.arange(batch.shape[0]): 464 | tmp = np.concatenate((data, [batch[i]])) 465 | tmp_pca = PCA(n_components=2).fit_transform(tmp) 466 | a[i] = kmean_batch(tmp_pca[:-1], tmp_pca[-1], k=k) 467 | return a 468 | 469 | def get_lids_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100): 470 | """ 471 | Get the local intrinsic dimensionality of each Xi in X_adv 472 | estimated by k close neighbours in the random batch it lies in. 473 | :param model: 474 | :param X: normal images 475 | :param X_noisy: noisy images 476 | :param X_adv: advserial images 477 | :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures 478 | :param k: the number of nearest neighbours for LID estimation 479 | :param batch_size: default 100 480 | :return: lids: LID of normal images of shape (num_examples, lid_dim) 481 | lids_adv: LID of advs images of shape (num_examples, lid_dim) 482 | """ 483 | # get deep representations 484 | funcs = [K.function([model.layers[0].input, K.learning_phase()], [out]) 485 | for out in get_layer_wise_activations(model, dataset)] 486 | lid_dim = len(funcs) 487 | print("Number of layers to estimate: ", lid_dim) 488 | 489 | def estimate(i_batch): 490 | start = i_batch * batch_size 491 | end = np.minimum(len(X), (i_batch + 1) * batch_size) 492 | n_feed = end - start 493 | lid_batch = np.zeros(shape=(n_feed, lid_dim)) 494 | lid_batch_adv = np.zeros(shape=(n_feed, lid_dim)) 495 | lid_batch_noisy = np.zeros(shape=(n_feed, lid_dim)) 496 | for i, func in enumerate(funcs): 497 | X_act = func([X[start:end], 0])[0] 498 | X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1)) 499 | # print("X_act: ", X_act.shape) 500 | 501 | X_adv_act = func([X_adv[start:end], 0])[0] 502 | X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1)) 503 | # print("X_adv_act: ", X_adv_act.shape) 504 | 505 | X_noisy_act = func([X_noisy[start:end], 0])[0] 506 | X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1)) 507 | # print("X_noisy_act: ", X_noisy_act.shape) 508 | 509 | # random clean samples 510 | # Maximum likelihood estimation of local intrinsic dimensionality (LID) 511 | lid_batch[:, i] = mle_batch(X_act, X_act, k=k) 512 | # print("lid_batch: ", lid_batch.shape) 513 | lid_batch_adv[:, i] = mle_batch(X_act, X_adv_act, k=k) 514 | # print("lid_batch_adv: ", lid_batch_adv.shape) 515 | lid_batch_noisy[:, i] = mle_batch(X_act, X_noisy_act, k=k) 516 | # print("lid_batch_noisy: ", lid_batch_noisy.shape) 517 | return lid_batch, lid_batch_noisy, lid_batch_adv 518 | 519 | lids = [] 520 | lids_adv = [] 521 | lids_noisy = [] 522 | n_batches = int(np.ceil(X.shape[0] / float(batch_size))) 523 | for i_batch in tqdm(range(n_batches)): 524 | lid_batch, lid_batch_noisy, lid_batch_adv = estimate(i_batch) 525 | lids.extend(lid_batch) 526 | lids_adv.extend(lid_batch_adv) 527 | lids_noisy.extend(lid_batch_noisy) 528 | # print("lids: ", lids.shape) 529 | # print("lids_adv: ", lids_noisy.shape) 530 | # print("lids_noisy: ", lids_noisy.shape) 531 | 532 | lids = np.asarray(lids, dtype=np.float32) 533 | lids_noisy = np.asarray(lids_noisy, dtype=np.float32) 534 | lids_adv = np.asarray(lids_adv, dtype=np.float32) 535 | 536 | return lids, lids_noisy, lids_adv 537 | 538 | def get_kmeans_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100, pca=False): 539 | """ 540 | Get the mean distance of each Xi in X_adv to its k nearest neighbors. 541 | 542 | :param model: 543 | :param X: normal images 544 | :param X_noisy: noisy images 545 | :param X_adv: advserial images 546 | :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures 547 | :param k: the number of nearest neighbours for LID estimation 548 | :param batch_size: default 100 549 | :param pca: using pca or not, if True, apply pca to the referenced sample and a 550 | minibatch of normal samples, then compute the knn mean distance of the referenced sample. 551 | :return: kms_normal: kmean of normal images (num_examples, 1) 552 | kms_noisy: kmean of normal images (num_examples, 1) 553 | kms_adv: kmean of adv images (num_examples, 1) 554 | """ 555 | # get deep representations 556 | funcs = [K.function([model.layers[0].input, K.learning_phase()], [model.layers[-2].output])] 557 | km_dim = len(funcs) 558 | print("Number of layers to use: ", km_dim) 559 | 560 | def estimate(i_batch): 561 | start = i_batch * batch_size 562 | end = np.minimum(len(X), (i_batch + 1) * batch_size) 563 | n_feed = end - start 564 | km_batch = np.zeros(shape=(n_feed, km_dim)) 565 | km_batch_adv = np.zeros(shape=(n_feed, km_dim)) 566 | km_batch_noisy = np.zeros(shape=(n_feed, km_dim)) 567 | for i, func in enumerate(funcs): 568 | X_act = func([X[start:end], 0])[0] 569 | X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1)) 570 | # print("X_act: ", X_act.shape) 571 | 572 | X_adv_act = func([X_adv[start:end], 0])[0] 573 | X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1)) 574 | # print("X_adv_act: ", X_adv_act.shape) 575 | 576 | X_noisy_act = func([X_noisy[start:end], 0])[0] 577 | X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1)) 578 | # print("X_noisy_act: ", X_noisy_act.shape) 579 | 580 | # Maximum likelihood estimation of local intrinsic dimensionality (LID) 581 | if pca: 582 | km_batch[:, i] = kmean_pca_batch(X_act, X_act, k=k) 583 | else: 584 | km_batch[:, i] = kmean_batch(X_act, X_act, k=k) 585 | # print("lid_batch: ", lid_batch.shape) 586 | if pca: 587 | km_batch_adv[:, i] = kmean_pca_batch(X_act, X_adv_act, k=k) 588 | else: 589 | km_batch_adv[:, i] = kmean_batch(X_act, X_adv_act, k=k) 590 | # print("lid_batch_adv: ", lid_batch_adv.shape) 591 | if pca: 592 | km_batch_noisy[:, i] = kmean_pca_batch(X_act, X_noisy_act, k=k) 593 | else: 594 | km_batch_noisy[:, i] = kmean_batch(X_act, X_noisy_act, k=k) 595 | # print("lid_batch_noisy: ", lid_batch_noisy.shape) 596 | return km_batch, km_batch_noisy, km_batch_adv 597 | 598 | kms = [] 599 | kms_adv = [] 600 | kms_noisy = [] 601 | n_batches = int(np.ceil(X.shape[0] / float(batch_size))) 602 | for i_batch in tqdm(range(n_batches)): 603 | km_batch, km_batch_noisy, km_batch_adv = estimate(i_batch) 604 | kms.extend(km_batch) 605 | kms_adv.extend(km_batch_adv) 606 | kms_noisy.extend(km_batch_noisy) 607 | # print("kms: ", kms.shape) 608 | # print("kms_adv: ", kms_noisy.shape) 609 | # print("kms_noisy: ", kms_noisy.shape) 610 | 611 | kms = np.asarray(kms, dtype=np.float32) 612 | kms_noisy = np.asarray(kms_noisy, dtype=np.float32) 613 | kms_adv = np.asarray(kms_adv, dtype=np.float32) 614 | 615 | return kms, kms_noisy, kms_adv 616 | 617 | def score_point(tup): 618 | """ 619 | TODO 620 | :param tup: 621 | :return: 622 | """ 623 | x, kde = tup 624 | 625 | return kde.score_samples(np.reshape(x, (1, -1)))[0] 626 | 627 | 628 | def score_samples(kdes, samples, preds, n_jobs=None): 629 | """ 630 | TODO 631 | :param kdes: 632 | :param samples: 633 | :param preds: 634 | :param n_jobs: 635 | :return: 636 | """ 637 | if n_jobs is not None: 638 | p = mp.Pool(n_jobs) 639 | else: 640 | p = mp.Pool() 641 | results = np.asarray( 642 | p.map( 643 | score_point, 644 | [(x, kdes[i]) for x, i in zip(samples, preds)] 645 | ) 646 | ) 647 | p.close() 648 | p.join() 649 | 650 | return results 651 | 652 | 653 | def normalize(normal, adv, noisy): 654 | """Z-score normalisation 655 | TODO 656 | :param normal: 657 | :param adv: 658 | :param noisy: 659 | :return: 660 | """ 661 | n_samples = len(normal) 662 | total = scale(np.concatenate((normal, adv, noisy))) 663 | 664 | return total[:n_samples], total[n_samples:2*n_samples], total[2*n_samples:] 665 | 666 | 667 | def train_lr(X, y): 668 | """ 669 | TODO 670 | :param X: the data samples 671 | :param y: the labels 672 | :return: 673 | """ 674 | lr = LogisticRegressionCV(n_jobs=-1).fit(X, y) 675 | return lr 676 | 677 | 678 | def train_lr_rfeinman(densities_pos, densities_neg, uncerts_pos, uncerts_neg): 679 | """ 680 | TODO 681 | :param densities_pos: 682 | :param densities_neg: 683 | :param uncerts_pos: 684 | :param uncerts_neg: 685 | :return: 686 | """ 687 | values_neg = np.concatenate( 688 | (densities_neg.reshape((1, -1)), 689 | uncerts_neg.reshape((1, -1))), 690 | axis=0).transpose([1, 0]) 691 | values_pos = np.concatenate( 692 | (densities_pos.reshape((1, -1)), 693 | uncerts_pos.reshape((1, -1))), 694 | axis=0).transpose([1, 0]) 695 | 696 | values = np.concatenate((values_neg, values_pos)) 697 | labels = np.concatenate( 698 | (np.zeros_like(densities_neg), np.ones_like(densities_pos))) 699 | 700 | lr = LogisticRegressionCV(n_jobs=-1).fit(values, labels) 701 | 702 | return values, labels, lr 703 | 704 | 705 | def compute_roc(y_true, y_pred, plot=False): 706 | """ 707 | TODO 708 | :param y_true: ground truth 709 | :param y_pred: predictions 710 | :param plot: 711 | :return: 712 | """ 713 | fpr, tpr, _ = roc_curve(y_true, y_pred) 714 | auc_score = roc_auc_score(y_true, y_pred) 715 | if plot: 716 | plt.figure(figsize=(7, 6)) 717 | plt.plot(fpr, tpr, color='blue', 718 | label='ROC (AUC = %0.4f)' % auc_score) 719 | plt.legend(loc='lower right') 720 | plt.title("ROC Curve") 721 | plt.xlabel("FPR") 722 | plt.ylabel("TPR") 723 | plt.show() 724 | 725 | return fpr, tpr, auc_score 726 | 727 | 728 | def compute_roc_rfeinman(probs_neg, probs_pos, plot=False): 729 | """ 730 | TODO 731 | :param probs_neg: 732 | :param probs_pos: 733 | :param plot: 734 | :return: 735 | """ 736 | probs = np.concatenate((probs_neg, probs_pos)) 737 | labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos))) 738 | fpr, tpr, _ = roc_curve(labels, probs) 739 | auc_score = auc(fpr, tpr) 740 | if plot: 741 | plt.figure(figsize=(7, 6)) 742 | plt.plot(fpr, tpr, color='blue', 743 | label='ROC (AUC = %0.4f)' % auc_score) 744 | plt.legend(loc='lower right') 745 | plt.title("ROC Curve") 746 | plt.xlabel("FPR") 747 | plt.ylabel("TPR") 748 | plt.show() 749 | 750 | return fpr, tpr, auc_score 751 | 752 | def random_split(X, Y): 753 | """ 754 | Random split the data into 80% for training and 20% for testing 755 | :param X: 756 | :param Y: 757 | :return: 758 | """ 759 | print("random split 80%, 20% for training and testing") 760 | num_samples = X.shape[0] 761 | num_train = int(num_samples * 0.8) 762 | rand_pert = np.random.permutation(num_samples) 763 | X = X[rand_pert] 764 | Y = Y[rand_pert] 765 | X_train, X_test = X[:num_train], X[num_train:] 766 | Y_train, Y_test = Y[:num_train], Y[num_train:] 767 | 768 | return X_train, Y_train, X_test, Y_test 769 | 770 | def block_split(X, Y): 771 | """ 772 | Split the data into 80% for training and 20% for testing 773 | in a block size of 100. 774 | :param X: 775 | :param Y: 776 | :return: 777 | """ 778 | print("Isolated split 80%, 20% for training and testing") 779 | num_samples = X.shape[0] 780 | partition = int(num_samples / 3) 781 | X_adv, Y_adv = X[:partition], Y[:partition] 782 | X_norm, Y_norm = X[partition: 2*partition], Y[partition: 2*partition] 783 | X_noisy, Y_noisy = X[2*partition:], Y[2*partition:] 784 | num_train = int(partition*0.008) * 100 785 | 786 | X_train = np.concatenate((X_norm[:num_train], X_noisy[:num_train], X_adv[:num_train])) 787 | Y_train = np.concatenate((Y_norm[:num_train], Y_noisy[:num_train], Y_adv[:num_train])) 788 | 789 | X_test = np.concatenate((X_norm[num_train:], X_noisy[num_train:], X_adv[num_train:])) 790 | Y_test = np.concatenate((Y_norm[num_train:], Y_noisy[num_train:], Y_adv[num_train:])) 791 | 792 | return X_train, Y_train, X_test, Y_test 793 | 794 | if __name__ == "__main__": 795 | # unit test 796 | a = np.array([1, 2, 3, 4, 5]) 797 | b = np.array([6, 7, 8, 9, 10]) 798 | c = np.array([11, 12, 13, 14, 15]) 799 | 800 | a_z, b_z, c_z = normalize(a, b, c) 801 | print(a_z) 802 | print(b_z) 803 | print(c_z) 804 | --------------------------------------------------------------------------------