├── CIFAR-10
    ├── attack.py
    ├── evaluate.py
    ├── fol_guided_fuzzing.py
    ├── gen_adv.py
    ├── metrics.py
    ├── models.py
    ├── select_retrain.py
    └── train_model.py
├── FASHION
    ├── models.py
    └── train_model.py
├── LICENSE
├── MNIST
    ├── attack.py
    ├── evaluate.py
    ├── fol_guided_fuzzing.py
    ├── gen_adv.py
    ├── metrics.py
    ├── mnist.npz
    ├── models.py
    ├── select_retrain.py
    └── train_model.py
├── README.md
├── SVHN
    ├── models.py
    └── train_model.py
└── metrics.py


/CIFAR-10/attack.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | 
  6 | 
  7 | class FGSM:
  8 |     """
  9 |     We use FGSM to generate a batch of adversarial examples. 
 10 |     """
 11 |     def __init__(self, model, ep=0.01, isRand=True):
 12 |         """
 13 |         isRand is set True to improve the attack success rate. 
 14 |         """
 15 |         self.isRand = isRand
 16 |         self.model = model
 17 |         self.ep = ep
 18 |         
 19 |     def generate(self, x, y, randRate=1):
 20 |         """
 21 |         x: clean inputs, shape of x: [batch_size, width, height, channel] 
 22 |         y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
 23 |         """
 24 |         fols = []
 25 |         target = tf.constant(y)
 26 |         
 27 |         xi = x.copy()
 28 |         if self.isRand:
 29 |             x = x + np.random.uniform(-self.ep * randRate, self.ep * randRate, x.shape)
 30 |             x = np.clip(x, 0, 1)
 31 |         
 32 |         x = tf.Variable(x)
 33 |         with tf.GradientTape() as tape:
 34 |             loss = keras.losses.categorical_crossentropy(target, self.model(x))
 35 |             grads = tape.gradient(loss, x)
 36 |         delta = tf.sign(grads)
 37 |         x_adv = x + self.ep * delta
 38 |         
 39 |         x_adv = tf.clip_by_value(x_adv, clip_value_min=xi-self.ep, clip_value_max=xi+self.ep)
 40 |         x_adv = tf.clip_by_value(x_adv, clip_value_min=0, clip_value_max=1)
 41 |         
 42 |         idxs = np.where(np.argmax(self.model(x_adv), axis=1) != np.argmax(y, axis=1))[0]
 43 |         print("SUCCESS:", len(idxs))
 44 |         
 45 |         x_adv, xi, target = x_adv.numpy()[idxs], xi[idxs], target.numpy()[idxs]
 46 |         x_adv, target = tf.Variable(x_adv), tf.constant(target)
 47 |         
 48 |         preds = self.model(x_adv).numpy()
 49 |         ginis = np.sum(np.square(preds), axis=1)
 50 |         
 51 |         with tf.GradientTape() as tape:
 52 |             loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
 53 |             grads = tape.gradient(loss, x_adv)
 54 |             grad_norm = np.linalg.norm(grads.numpy().reshape(x_adv.shape[0], -1), ord=1, axis=1)
 55 |             grads_flat = grads.numpy().reshape(x_adv.shape[0], -1)
 56 |             diff = (x_adv.numpy() - xi).reshape(x_adv.shape[0], -1)
 57 |             for i in range(x_adv.shape[0]):
 58 |                 i_fol = -np.dot(grads_flat[i], diff[i]) + self.ep * grad_norm[i]
 59 |                 fols.append(i_fol)
 60 |   
 61 |         return x_adv.numpy(), target.numpy(), np.array(fols), ginis
 62 | 
 63 | 
 64 | 
 65 | class PGD:
 66 |     """
 67 |     We use PGD to generate a batch of adversarial examples. PGD could be seen as iterative version of FGSM.
 68 |     """
 69 |     def __init__(self, model, ep=0.01, step=None, epochs=10, isRand=True):
 70 |         """
 71 |         isRand is set True to improve the attack success rate. 
 72 |         """
 73 |         self.isRand = isRand
 74 |         self.model = model
 75 |         self.ep = ep
 76 |         if step == None:
 77 |             self.step = ep/6
 78 |         self.epochs = epochs
 79 |         
 80 |     def generate(self, x, y, randRate=1):
 81 |         """
 82 |         x: clean inputs, shape of x: [batch_size, width, height, channel] 
 83 |         y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
 84 |         """
 85 |         fols = []
 86 |         target = tf.constant(y)
 87 |     
 88 |         xi = x.copy()
 89 |         if self.isRand:
 90 |             x = x + np.random.uniform(-self.ep * randRate, self.ep * randRate, x.shape)
 91 |             x = np.clip(x, 0, 1)
 92 |         
 93 |         x_adv = tf.Variable(x)
 94 |         for i in range(self.epochs): 
 95 |             with tf.GradientTape() as tape:
 96 |                 loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
 97 |                 grads = tape.gradient(loss, x_adv)
 98 |             delta = tf.sign(grads)
 99 |             x_adv.assign_add(self.step * delta)
100 |             x_adv = tf.clip_by_value(x_adv, clip_value_min=xi-self.ep, clip_value_max=xi+self.ep)
101 |             x_adv = tf.clip_by_value(x_adv, clip_value_min=0, clip_value_max=1)
102 |             x_adv = tf.Variable(x_adv)
103 |         
104 |         idxs = np.where(np.argmax(self.model(x_adv), axis=1) != np.argmax(y, axis=1))[0]
105 |         print("SUCCESS:", len(idxs))
106 |         
107 |         x_adv, xi, target = x_adv.numpy()[idxs], xi[idxs], target.numpy()[idxs]
108 |         x_adv, target = tf.Variable(x_adv), tf.constant(target)
109 |         
110 |         preds = self.model(x_adv).numpy()
111 |         ginis = np.sum(np.square(preds), axis=1)
112 |         
113 |         with tf.GradientTape() as tape:
114 |             loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
115 |             grads = tape.gradient(loss, x_adv)
116 |             grad_norm = np.linalg.norm(grads.numpy().reshape(x_adv.shape[0], -1), ord=1, axis=1)
117 |             grads_flat = grads.numpy().reshape(x_adv.shape[0], -1)
118 |             diff = (x_adv.numpy() - xi).reshape(x_adv.shape[0], -1)
119 |             for i in range(x_adv.shape[0]):
120 |                 i_fol = -np.dot(grads_flat[i], diff[i]) + self.ep * grad_norm[i]
121 |                 fols.append(i_fol)
122 |   
123 |         return x_adv.numpy(), target.numpy(), np.array(fols), ginis
124 |     
125 | 
126 |   
127 |     


--------------------------------------------------------------------------------
/CIFAR-10/evaluate.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | # %matplotlib inline
 6 | 
 7 | import os
 8 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
 9 | 
10 | gpus = tf.config.experimental.list_physical_devices('GPU')
11 | if gpus:
12 |     try:
13 |         for gpu in gpus:
14 |             tf.config.experimental.set_memory_growth(gpu, True)
15 |     except RuntimeError as e:
16 |         print(e)
17 | 
18 | 
19 | # Load the generated adversarial inputs for Robustness evaluation. 
20 | with np.load("./FGSM_Test.npz") as f:
21 |     fgsm_test, fgsm_test_labels = f['advs'], f['labels']
22 | 
23 | with np.load("./PGD_Test.npz") as f:
24 |     pgd_test, pgd_test_labels = f['advs'], f['labels']
25 | 
26 | fp_test = np.concatenate((fgsm_test, pgd_test))
27 | fp_test_labels = np.concatenate((fgsm_test_labels, pgd_test_labels))
28 | 
29 | 
30 | sNums = [500*i for i in [2,4,8,12,20]]
31 | strategies = ['best', 'kmst', 'gini']
32 | acc_pure = [[] for i in range(len(strategies))]
33 | acc_fp = [[] for i in range(len(strategies))]
34 | 
35 | 
36 | for num in sNums:
37 |     for i in range(len(strategies)):
38 |         s = strategies[i]
39 |         model_path = "./checkpoint/best_Resnet_MIX_%d_%s.h5" % (num, s)
40 |         best_model = keras.models.load_model(model_path)
41 |         lfp, afp = best_model.evaluate(fp_test, fp_test_labels, verbose=0)
42 |         acc_fp[i].append(afp)
43 | 
44 | 
45 | 
46 | colormap = ['r','limegreen', 'dodgerblue']
47 | plt.figure(figsize=(8,6))
48 | x = [i/max(sNums) for i in sNums]
49 | for i in range(len(strategies)):
50 |     plt.plot(x, acc_fp[i],'o-', label=strategies[i], color=colormap[i], linewidth=3, markersize=8)
51 | 
52 | plt.title("CIFAR-ATTACK", fontsize=20)
53 | plt.xlabel("# Percentage of test cases", fontsize=20)
54 | plt.ylabel("Robustness", fontsize=20)
55 | plt.xticks(x, [1,2,4,6,10],fontsize=15)
56 | plt.yticks(fontsize=15)
57 | plt.legend(fontsize=15)
58 | 
59 | fig = plt.gcf()
60 | fig.savefig('./cifar_attack_robustness.pdf')
61 | 
62 | 


--------------------------------------------------------------------------------
/CIFAR-10/fol_guided_fuzzing.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import random
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import time
  6 | from tensorflow.keras.datasets import cifar10
  7 | 
  8 | import os
  9 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 
 10 | 
 11 | gpus = tf.config.experimental.list_physical_devices('GPU')
 12 | if gpus:
 13 |     try:
 14 |         for gpu in gpus:
 15 |             tf.config.experimental.set_memory_growth(gpu, True)
 16 |     except RuntimeError as e:
 17 |         print(e)
 18 | 
 19 | 
 20 | 
 21 | # cifar
 22 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 23 | 
 24 | x_train = x_train.astype('float32')
 25 | x_test = x_test.astype('float32')
 26 | x_train = x_train/255
 27 | x_test = x_test/255
 28 | 
 29 | y_train = keras.utils.to_categorical(y_train, 10)
 30 | y_test = keras.utils.to_categorical(y_test, 10)
 31 | 
 32 | model = keras.models.load_model("./saved_models/cifar10_resnet20_model.h5") 
 33 | 
 34 | 
 35 | seeds = random.sample(list(range(x_train.shape[0])), 1000)
 36 | images = x_train[seeds]
 37 | labels = y_train[seeds]
 38 | 
 39 | 
 40 | # some training samples is static, i.e., grad=<0>, hard to generate. 
 41 | seeds_filter = []
 42 | gen_img = tf.Variable(images)
 43 | with tf.GradientTape() as g:
 44 |     loss = keras.losses.categorical_crossentropy(labels, model(gen_img))
 45 |     grads = g.gradient(loss, gen_img)
 46 | 
 47 | fols = np.linalg.norm((grads.numpy()+1e-20).reshape(images.shape[0], -1), ord=2, axis=1)
 48 | seeds_filter = np.where(fols > 1e-3)[0]
 49 | 
 50 | 
 51 | start_t = time.time()
 52 | lr = 0.1
 53 | total_sets = []
 54 | for idx in seeds_filter:
 55 |     # delta_t = time.time() - start_t
 56 |     # if delta_t > 300:
 57 |     #     break
 58 |     img_list = []
 59 |     tmp_img = images[[idx]]
 60 |     orig_img = tmp_img.copy()
 61 |     orig_norm = np.linalg.norm(orig_img)
 62 |     img_list.append(tf.identity(tmp_img))
 63 |     logits = model(tmp_img)
 64 |     orig_index = np.argmax(logits[0])
 65 |     target = keras.utils.to_categorical([orig_index], 10)
 66 |     label_top5 = np.argsort(logits[0])[-5:]
 67 | 
 68 |     folMAX = 0 
 69 |     epoch = 0 
 70 |     while len(img_list) > 0:
 71 |         gen_img = img_list.pop(0)   
 72 |         for _ in range(2):
 73 |             gen_img = tf.Variable(gen_img)
 74 |             with tf.GradientTape(persistent=True) as g:
 75 |                 loss = keras.losses.categorical_crossentropy(target, model(gen_img))
 76 |                 grads = g.gradient(loss, gen_img)
 77 |                 fol = tf.norm(grads+1e-20)
 78 |                 g.watch(fol)
 79 |                 logits = model(gen_img)
 80 |                 obj = fol - logits[0][orig_index]
 81 |                 dl_di = g.gradient(obj, gen_img)
 82 |             del g
 83 |             
 84 |             gen_img = gen_img + dl_di * lr * (random.random() + 0.5)
 85 |             gen_img = tf.clip_by_value(gen_img, clip_value_min=0, clip_value_max=1)
 86 |             
 87 |             with tf.GradientTape() as t:
 88 |                 t.watch(gen_img)
 89 |                 loss = keras.losses.categorical_crossentropy(target, model(gen_img))
 90 |                 grad = t.gradient(loss, gen_img)
 91 |                 fol = np.linalg.norm(grad.numpy()) # L2 adaption
 92 | 
 93 |             distance = np.linalg.norm(gen_img.numpy() - orig_img) / orig_norm
 94 |             if fol > folMAX and distance < 0.5:
 95 |                 folMAX = fol
 96 |                 img_list.append(tf.identity(gen_img))
 97 |             
 98 |             gen_index = np.argmax(model(gen_img)[0]) 
 99 |             if gen_index != orig_index:
100 |                 total_sets.append((fol, gen_img.numpy(), labels[idx]))
101 | 
102 | 
103 | fols = np.array([item[0] for item in total_sets])
104 | advs = np.array([item[1].reshape(32,32,3) for item in total_sets])
105 | labels = np.array([item[2] for item in total_sets])
106 | 
107 | np.savez('./FOL_Fuzz.npz', advs=advs, labels=labels, fols=fols)
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/CIFAR-10/gen_adv.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | from tensorflow.keras.datasets import cifar10
 4 | import numpy as np
 5 | from attack import FGSM, PGD
 6 | import os
 7 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 
 8 | 
 9 | 
10 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
11 | 
12 | # preprocess cifar dataset
13 | x_train = x_train.astype('float32')
14 | x_test = x_test.astype('float32')
15 | x_train = x_train/255
16 | x_test = x_test/255
17 | 
18 | y_train = keras.utils.to_categorical(y_train, 10)
19 | y_test = keras.utils.to_categorical(y_test, 10)
20 | 
21 | 
22 | # load your model 
23 | model = keras.models.load_model("./cifar10_resnet20_model.h5")
24 | 
25 | fgsm = FGSM(model, ep=0.01, isRand=True)
26 | pgd = PGD(model, ep=0.01, epochs=10, isRand=True)
27 | 
28 | # generate adversarial examples at once. 
29 | advs, labels, fols, ginis = fgsm.generate(x_train, y_train)
30 | np.savez('./FGSM_TrainFull.npz', advs=advs, labels=labels, fols=fols, ginis=ginis)
31 | 
32 | advs, labels, fols, ginis = pgd.generate(x_train, y_train)
33 | np.savez('./PGD_TrainFull.npz', advs=advs, labels=labels, fols=fols, ginis=ginis)
34 | 
35 | advs, labels, _, _ = fgsm.generate(x_test, y_test)
36 | np.savez('./FGSM_Test.npz', advs=advs, labels=labels)
37 | 
38 | advs, labels, _, _ = pgd.generate(x_test, y_test)
39 | np.savez('./PGD_Test.npz', advs=advs, labels=labels)


--------------------------------------------------------------------------------
/CIFAR-10/metrics.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | 
 6 | ## Metrics for quality evaluation for massive test cases. 
 7 | 
 8 | 
 9 | def gini(model, x):
10 |     """
11 |     Different from the defination in DeepGini paper (deepgini = 1 - ginis), the smaller the ginis here, the larger the uncertainty. 
12 |     
13 |     shape of x: [batch_size, width, height, channel]
14 |     """
15 |     x = tf.Variable(x)
16 |     preds = model(x).numpy()
17 |     ginis = np.sum(np.square(preds), axis=1)
18 |     return ginis
19 |     
20 |     
21 | def fol_Linf(model, x, xi, ep, y):
22 |     """
23 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel]   
24 |     xi: initial inputs, shape of xi: [batch_size, width, height, channel]  
25 |     ep: L_inf bound
26 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
27 |     """
28 |     x, target = tf.Variable(x), tf.constant(y)
29 |     fols = []
30 |     with tf.GradientTape() as tape:
31 |         loss = keras.losses.categorical_crossentropy(target, model(x))
32 |         grads = tape.gradient(loss, x)
33 |         grad_norm = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=1, axis=1)
34 |         grads_flat = grads.numpy().reshape(x.shape[0], -1)
35 |         diff = (x.numpy() - xi).reshape(x.shape[0], -1)
36 |         for i in range(x.shape[0]):
37 |             i_fol = -np.dot(grads_flat[i], diff[i]) + ep * grad_norm[i]
38 |             fols.append(i_fol)
39 |     
40 |     return np.array(fols)
41 | 
42 | 
43 | def fol_L2(model, x, y):
44 |     """
45 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
46 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
47 |     """
48 |     x, target = tf.Variable(x), tf.constant(y)
49 |     with tf.GradientTape() as tape:
50 |         loss = keras.losses.categorical_crossentropy(target, model(x))
51 |         grads = tape.gradient(loss, x)
52 |         grads_norm_L2 = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=2, axis=1)
53 | 
54 |     return grads_norm_L2
55 | 
56 | 
57 | def zol(model, x, y):
58 |     """
59 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
60 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
61 |     """
62 |     x, target = tf.Variable(x), tf.constant(y)
63 |     loss = keras.losses.categorical_crossentropy(target, model(x))
64 |     loss.numpy().reshape(-1)
65 | 
66 |     return loss
67 | 
68 | 
69 | def robustness(model, x, y):
70 |     """
71 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
72 |     y: ground truth labels, shape of y: [batch_size] 
73 |     """
74 |     return np.sum(np.argmax(model(x), axis=1) == y) / y.shape[0]
75 | 
76 | 


--------------------------------------------------------------------------------
/CIFAR-10/models.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation
  3 | from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
  4 | from tensorflow.keras.regularizers import l2
  5 | from tensorflow.keras.models import Model
  6 | 
  7 | 
  8 | 
  9 | def resnet_layer(inputs,
 10 |                  num_filters=16,
 11 |                  kernel_size=3,
 12 |                  strides=1,
 13 |                  activation='relu',
 14 |                  batch_normalization=True,
 15 |                  conv_first=True):
 16 |   
 17 |     conv = Conv2D(num_filters,
 18 |                   kernel_size=kernel_size,
 19 |                   strides=strides,
 20 |                   padding='same',
 21 |                   kernel_initializer='he_normal',
 22 |                   kernel_regularizer=l2(1e-4))
 23 | 
 24 |     x = inputs
 25 |     if conv_first:
 26 |         x = conv(x)
 27 |         if batch_normalization:
 28 |             x = BatchNormalization()(x)
 29 |         if activation is not None:
 30 |             x = Activation(activation)(x)
 31 |     else:
 32 |         if batch_normalization:
 33 |             x = BatchNormalization()(x)
 34 |         if activation is not None:
 35 |             x = Activation(activation)(x)
 36 |         x = conv(x)
 37 |     return x
 38 | 
 39 | 
 40 | def resnet_v1(input_shape, depth, num_classes=10):
 41 |     if (depth - 2) % 6 != 0:
 42 |         raise ValueError('depth should be 6n+2 (eg 20, 32, 44)')
 43 | 
 44 |     num_filters = 16
 45 |     num_res_blocks = int((depth - 2) / 6)
 46 | 
 47 |     inputs = Input(shape=input_shape)
 48 |     x = resnet_layer(inputs=inputs)
 49 |     # Instantiate the stack of residual units
 50 |     for stack in range(3):
 51 |         for res_block in range(num_res_blocks):
 52 |             strides = 1
 53 |             if stack > 0 and res_block == 0:  
 54 |                 strides = 2  
 55 |             y = resnet_layer(inputs=x,
 56 |                              num_filters=num_filters,
 57 |                              strides=strides)
 58 |             y = resnet_layer(inputs=y,
 59 |                              num_filters=num_filters,
 60 |                              activation=None)
 61 |             if stack > 0 and res_block == 0:  
 62 |                 x = resnet_layer(inputs=x,
 63 |                                  num_filters=num_filters,
 64 |                                  kernel_size=1,
 65 |                                  strides=strides,
 66 |                                  activation=None,
 67 |                                  batch_normalization=False)
 68 |             x = keras.layers.add([x, y])
 69 |             x = Activation('relu')(x)
 70 |         num_filters *= 2
 71 | 
 72 |     # Add classifier on top.
 73 |     # v1 does not use BN after last shortcut connection-ReLU
 74 |     x = AveragePooling2D(pool_size=8)(x)
 75 |     y = Flatten()(x)
 76 |     outputs = Dense(num_classes,
 77 |                     activation='softmax',
 78 |                     kernel_initializer='he_normal')(y)
 79 | 
 80 |     # Instantiate model.
 81 |     model = Model(inputs=inputs, outputs=outputs)
 82 |     return model
 83 | 
 84 | 
 85 | def ConvNet_2(input_shape=(32,32,3)):
 86 |     model = keras.models.Sequential()
 87 |     model.add(keras.layers.Conv2D(32, 3, padding="same", input_shape=input_shape, activation='relu'))
 88 |     model.add(keras.layers.Conv2D(32, 3, padding="same", activation='relu'))
 89 |     model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
 90 |     model.add(keras.layers.Conv2D(64, 3, padding="same", activation='relu'))
 91 |     model.add(keras.layers.Conv2D(64, 3, padding="same", activation='relu'))
 92 |     model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
 93 |     model.add(keras.layers.Conv2D(128, 3, padding="same", activation='relu'))
 94 |     model.add(keras.layers.Conv2D(128, 3, padding="same", activation='relu'))
 95 |     model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
 96 |     model.add(keras.layers.Flatten())
 97 |     model.add(keras.layers.Dropout(0.5))
 98 |     model.add(keras.layers.Dense(1024, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu'))
 99 |     model.add(keras.layers.Dropout(0.5))
100 |     model.add(keras.layers.Dense(512, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu'))
101 |     model.add(keras.layers.Dropout(0.5))
102 |     model.add(keras.layers.Dense(10, activation='softmax'))
103 |     return model
104 |   
105 |   
106 | 


--------------------------------------------------------------------------------
/CIFAR-10/select_retrain.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | from tensorflow.keras.datasets import cifar10
  5 | from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
  6 | from tensorflow.keras.callbacks import ReduceLROnPlateau
  7 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
  8 | from tensorflow.keras.regularizers import l2
  9 | import random
 10 | 
 11 | import os
 12 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
 13 | 
 14 | # Suppress the GPU memory
 15 | gpus = tf.config.experimental.list_physical_devices('GPU')
 16 | if gpus:
 17 |     try:
 18 |         for gpu in gpus:
 19 |             tf.config.experimental.set_memory_growth(gpu, True)
 20 |     except RuntimeError as e:
 21 |         print(e)
 22 |         
 23 |     
 24 | 
 25 | def lr_schedule_retrain(epoch):
 26 |     lr = 1e-4
 27 |     if epoch > 25:
 28 |         lr *= 1e-1
 29 |     print('Learning rate: ', lr)
 30 |     return lr
 31 | 
 32 | 
 33 | 
 34 | def select(values, n, s='best', k=4):
 35 |     """
 36 |     n: the number of selected test cases. 
 37 |     s: strategy, ['best', 'random', 'kmst', 'gini']
 38 |     k: for KM-ST, the number of ranges. 
 39 |     """
 40 |     ranks = np.argsort(values) 
 41 |     
 42 |     if s == 'best':
 43 |         h = n//2
 44 |         return np.concatenate((ranks[:h],ranks[-h:]))
 45 |         
 46 |     elif s == 'r':
 47 |         return np.array(random.sample(list(ranks),n)) 
 48 |     
 49 |     elif s == 'kmst':
 50 |         fol_max = values.max()
 51 |         th = fol_max / k
 52 |         section_nums = n // k
 53 |         indexes = []
 54 |         for i in range(k):
 55 |             section_indexes = np.intersect1d(np.where(values<th*(i+1)), np.where(values>=th*i))
 56 |             if section_nums < len(section_indexes):
 57 |                 index = random.sample(list(section_indexes), section_nums)
 58 |                 indexes.append(index)
 59 |             else: 
 60 |                 indexes.append(section_indexes)
 61 |                 index = random.sample(list(ranks), section_nums-len(section_indexes))
 62 |                 indexes.append(index)
 63 |         return np.concatenate(np.array(indexes))
 64 | 
 65 |     # This is for gini strategy. There is little difference from DeepGini paper. See function ginis() in metrics.py 
 66 |     else: 
 67 |         return ranks[:n]   
 68 |     
 69 |     
 70 | 
 71 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 72 | x_train = x_train.astype('float32')
 73 | x_test = x_test.astype('float32')
 74 | 
 75 | # convert class vectors to binary class matrics
 76 | y_train = keras.utils.to_categorical(y_train, 10)
 77 | y_test = keras.utils.to_categorical(y_test, 10)
 78 | x_train = x_train/255
 79 | x_test = x_test/255
 80 | 
 81 | 
 82 | # Load the generated adversarial inputs for training. FGSM and PGD. 
 83 | with np.load("./FGSM_TrainFull.npz") as f:
 84 |     fgsm_train, fgsm_train_labels, fgsm_train_fols, fgsm_train_ginis = f['advs'], f['labels'], f['fols'], f['ginis']
 85 |     
 86 | with np.load("./PGD_TrainFull.npz") as f:
 87 |     pgd_train, pgd_train_labels, pgd_train_fols, pgd_train_ginis= f['advs'], f['labels'], f['fols'], f['ginis']
 88 |     
 89 | # Load the generated adversarial inputs for testing. FGSM and PGD. 
 90 | with np.load("./FGSM_Test.npz") as f:
 91 |     fgsm_test, fgsm_test_labels = f['advs'], f['labels']
 92 | 
 93 | with np.load("./PGD_Test.npz") as f:
 94 |     pgd_test, pgd_test_labels = f['advs'], f['labels']
 95 | 
 96 | 
 97 | # Mix the adversarial inputs 
 98 | fp_train = np.concatenate((fgsm_train, pgd_train))
 99 | fp_train_labels = np.concatenate((fgsm_train_labels, pgd_train_labels))
100 | fp_train_fols = np.concatenate((fgsm_train_fols, pgd_train_fols))
101 | fp_train_ginis = np.concatenate((fgsm_train_ginis, pgd_train_ginis))
102 | 
103 | fp_test = np.concatenate((fgsm_test, pgd_test))
104 | fp_test_labels = np.concatenate((fgsm_test_labels, pgd_test_labels))
105 | 
106 | 
107 | sNums = [500*i for i in [2,4,8,12,20]]
108 | strategies = ['best', 'kmst', 'gini']
109 | 
110 | for num in sNums:
111 |     print(num)
112 |     for i in range(len(strategies)):
113 |         s = strategies[i]
114 |         model_path = "./checkpoint/best_Resnet_MIX_%d_%s.h5" % (num, s)
115 |         checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_accuracy', verbose=1, save_best_only=True)
116 |         lr_scheduler = LearningRateScheduler(lr_schedule_retrain)
117 |         lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
118 |         callbacks = [checkpoint, lr_reducer, lr_scheduler]
119 |         
120 |         if s == 'gini':
121 |             indexes = select(fp_train_ginis, num, s=s)
122 |         else:
123 |             indexes = select(fp_train_fols, num, s=s)
124 | 
125 |         selectAdvs = fp_train[indexes]
126 |         selectAdvsLabels = fp_train_labels[indexes]
127 |             
128 |         x_train_mix = np.concatenate((x_train, selectAdvs),axis=0)
129 |         y_train_mix = np.concatenate((y_train, selectAdvsLabels),axis=0)
130 |         
131 | 
132 |         # load old model 
133 |         model = keras.models.load_model("./saved_models/cifar10_resnet20_model.h5")  
134 | #         model.fit(x_train_mix, y_train_mix, epochs=40, batch_size=64, verbose=1, callbacks=callbacks,
135 | #                  validation_data=(fp_test, fp_test_labels))
136 | 
137 |         datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
138 |         datagen.fit(x_train_mix)
139 |         batch_size = 64
140 |         history = model.fit_generator(datagen.flow(x_train_mix, y_train_mix, batch_size=batch_size),
141 |                             validation_data=(fp_test, fp_test_labels),
142 |                             epochs=40, verbose=1,
143 |                             callbacks=callbacks,
144 |                             steps_per_epoch= x_train_mix.shape[0] // batch_size)
145 | 
146 | 
147 |         
148 |         
149 |         
150 | 
151 | 
152 |         
153 |         
154 | 
155 |         
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 


--------------------------------------------------------------------------------
/CIFAR-10/train_model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from tensorflow import keras
  4 | from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation
  5 | from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
  6 | from tensorflow.keras.optimizers import Adam
  7 | from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
  8 | from tensorflow.keras.callbacks import ReduceLROnPlateau
  9 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
 10 | from tensorflow.keras.regularizers import l2
 11 | from tensorflow.keras.models import Model
 12 | from tensorflow.keras.datasets import cifar10
 13 | from models import resnet_layer, resnet_v1, ConvNet_2
 14 | 
 15 | import os
 16 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
 17 | 
 18 | gpus = tf.config.experimental.list_physical_devices('GPU')
 19 | if gpus:
 20 |     try:
 21 |         for gpu in gpus:
 22 |             tf.config.experimental.set_memory_growth(gpu, True)
 23 |     except RuntimeError as e:
 24 |         print(e)
 25 |     
 26 |       
 27 |         
 28 | def lr_schedule(epoch):
 29 |     lr = 1e-3
 30 |     if epoch > 90:
 31 |         lr *= 1e-3
 32 |     elif epoch > 75:
 33 |         lr *= 1e-2
 34 |     elif epoch > 50:
 35 |         lr *= 1e-1
 36 |     print('Learning rate: ', lr)
 37 |     return lr
 38 |         
 39 |         
 40 |         
 41 | # hyper-parameters for training resnet-20
 42 | batch_size = 64  
 43 | epochs = 100
 44 | data_augmentation = True
 45 | num_classes = 10
 46 | depth = 20
 47 | 
 48 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 49 | 
 50 | x_train = x_train.astype('float32') / 255
 51 | x_test = x_test.astype('float32') / 255
 52 | 
 53 | y_train = keras.utils.to_categorical(y_train, num_classes)
 54 | y_test = keras.utils.to_categorical(y_test, num_classes)
 55 | 
 56 | input_shape = x_train.shape[1:]
 57 | 
 58 | model = resnet_v1(input_shape=input_shape, depth=depth)
 59 | model.compile(loss='categorical_crossentropy',
 60 |               optimizer=Adam(lr=lr_schedule(0)),
 61 |               metrics=['accuracy'])
 62 | 
 63 | print(model.summary())
 64 | 
 65 | # Prepare model model saving directory.
 66 | save_dir = os.path.join(os.getcwd(), 'saved_models')
 67 | model_name = 'cifar10_resnet20_model.{epoch:03d}.h5' 
 68 | if not os.path.isdir(save_dir):
 69 |     os.makedirs(save_dir)
 70 | filepath = os.path.join(save_dir, model_name)
 71 | 
 72 | # Prepare callbacks for model saving and for learning rate adjustment.
 73 | checkpoint = ModelCheckpoint(filepath=filepath,
 74 |                              monitor='val_accuracy',
 75 |                              verbose=1,
 76 |                              save_best_only=True,
 77 |                              mode='auto')
 78 | 
 79 | lr_scheduler = LearningRateScheduler(lr_schedule)
 80 | 
 81 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
 82 |                                cooldown=0,
 83 |                                patience=5,
 84 |                                min_lr=0.5e-6)
 85 | 
 86 | callbacks = [checkpoint, lr_reducer, lr_scheduler]
 87 | 
 88 | 
 89 | 
 90 | # Run training, with or without data augmentation.
 91 | if not data_augmentation:
 92 |     print('Not using data augmentation.')
 93 |     model.fit(x_train, y_train,
 94 |               batch_size=batch_size,
 95 |               epochs=epochs,
 96 |               validation_data=(x_test, y_test),
 97 |               shuffle=True,
 98 |               callbacks=callbacks)
 99 | else:
100 |     print('Using real-time data augmentation.')
101 |     # This will do preprocessing and realtime data augmentation:
102 |     datagen = ImageDataGenerator(
103 |         rotation_range=10,
104 |         # randomly shift images horizontally
105 |         width_shift_range=0.1,
106 |         # randomly shift images vertically
107 |         height_shift_range=0.1,
108 |         # set range for random shear
109 |         horizontal_flip=True)
110 |     
111 |     datagen.fit(x_train)
112 | 
113 |     # Fit the model on the batches generated by datagen.flow().
114 |     history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
115 |                         validation_data=(x_test, y_test),
116 |                         epochs=epochs, verbose=1,
117 |                         callbacks=callbacks,
118 |                         steps_per_epoch= x_train.shape[0] // batch_size)
119 | 
120 | # Score trained model.
121 | scores = model.evaluate(x_test, y_test, verbose=1)
122 | print('Test loss:', scores[0])
123 | print('Test accuracy:', scores[1])
124 | 
125 | 


--------------------------------------------------------------------------------
/FASHION/models.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 |  
 6 | def Lenet5(input_shape=(28, 28, 1)):
 7 |     input_tensor = keras.layers.Input(shape=input_shape)
 8 |     
 9 |     x = keras.layers.Convolution2D(6, (5, 5), activation='relu', padding='same', name='block1_conv1')(input_tensor)
10 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block1_pool1')(x)
11 | 
12 |     x = keras.layers.Convolution2D(16, (5, 5), activation='relu', padding='same', name='block2_conv1')(x)
13 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block2_pool1')(x)
14 |     
15 |     x = keras.layers.Flatten(name='flatten')(x)
16 |     x = keras.layers.Dense(120, activation='relu', name='fc1')(x)
17 |     x = keras.layers.Dense(84, activation='relu', name='fc2')(x)
18 |     x = keras.layers.Dense(10, name='before_softmax')(x)
19 |     x = keras.layers.Activation('softmax', name='redictions')(x)
20 |     
21 |     return keras.models.Model(input_tensor, x)
22 | 
23 | 
24 | 
25 | def ConvNet_1(input_shape=(28,28,1)):
26 |     model = keras.models.Sequential()
27 |     model.add(keras.layers.Conv2D(32, 3, input_shape=input_shape, activation='relu'))
28 | 
29 |     model.add(keras.layers.Conv2D(32, 3, activation='relu'))
30 |     model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
31 |     model.add(keras.layers.Conv2D(64,3, activation='relu'))
32 |     model.add(keras.layers.Conv2D(64,3, activation='relu'))
33 |     model.add(keras.layers.MaxPooling2D(pool_size=(2,2)))
34 | 
35 |     model.add(keras.layers.Flatten())
36 |     model.add(keras.layers.Dense(512, activation='relu'))
37 |     model.add(keras.layers.Dropout(0.5))
38 |     model.add(keras.layers.Dense(10, activation='softmax'))
39 |     
40 |     return model 
41 |   
42 | 


--------------------------------------------------------------------------------
/FASHION/train_model.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | from models import Lenet5, ConvNet_1
 5 | 
 6 | gpus = tf.config.experimental.list_physical_devices('GPU')
 7 | if gpus:
 8 |     try:
 9 |         for gpu in gpus:
10 |             tf.config.experimental.set_memory_growth(gpu, True)
11 |     except RuntimeError as e:
12 |         print(e)
13 |         
14 | 
15 | def load_fashion(path="./fashion.npz"):
16 |     f = np.load(path)
17 |     x_train, y_train = f['x_train'], f['y_train']
18 |     x_test, y_test = f['x_test'], f['y_test']
19 |     f.close()
20 | 
21 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
22 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
23 | 
24 |     x_train = x_train.astype('float32') / 255.
25 |     x_test = x_test.astype('float32') / 255.
26 | 
27 |     y_train = keras.utils.to_categorical(y_train, 10)
28 |     y_test = keras.utils.to_categorical(y_test, 10)
29 |     
30 |     return x_train, x_test, y_train, y_test
31 | 
32 | 
33 | path = "./fashion.npz"
34 | x_train, x_test, y_train, y_test = load_fashion(path)
35 | 
36 | 
37 | lenet5 = Lenet5()
38 | lenet5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
39 | lenet5.fit(x_train, y_train, epochs=10, batch_size=64)
40 | 
41 | lenet5.evaluate(x_test, y_test)
42 | 
43 | lenet5.save("./Lenet5_fashion.h5")
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 SmallkeyChen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MNIST/attack.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | 
  6 | 
  7 | class FGSM:
  8 |     """
  9 |     We use FGSM to generate a batch of adversarial examples. 
 10 |     """
 11 |     def __init__(self, model, ep=0.3, isRand=True):
 12 |         """
 13 |         isRand is set True to improve the attack success rate. 
 14 |         """
 15 |         self.isRand = isRand
 16 |         self.model = model
 17 |         self.ep = ep
 18 |         
 19 |     def generate(self, x, y, randRate=1):
 20 |         """
 21 |         x: clean inputs, shape of x: [batch_size, width, height, channel] 
 22 |         y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
 23 |         """
 24 |         fols = []
 25 |         target = tf.constant(y)
 26 |         
 27 |         xi = x.copy()
 28 |         if self.isRand:
 29 |             x = x + np.random.uniform(-self.ep * randRate, self.ep * randRate, x.shape)
 30 |             x = np.clip(x, 0, 1)
 31 |         
 32 |         x = tf.Variable(x)
 33 |         with tf.GradientTape() as tape:
 34 |             loss = keras.losses.categorical_crossentropy(target, self.model(x))
 35 |             grads = tape.gradient(loss, x)
 36 |         delta = tf.sign(grads)
 37 |         x_adv = x + self.ep * delta
 38 |         
 39 |         x_adv = tf.clip_by_value(x_adv, clip_value_min=xi-self.ep, clip_value_max=xi+self.ep)
 40 |         x_adv = tf.clip_by_value(x_adv, clip_value_min=0, clip_value_max=1)
 41 |         
 42 |         idxs = np.where(np.argmax(self.model(x_adv), axis=1) != np.argmax(y, axis=1))[0]
 43 |         print("SUCCESS:", len(idxs))
 44 |         
 45 |         x_adv, xi, target = x_adv.numpy()[idxs], xi[idxs], target.numpy()[idxs]
 46 |         x_adv, target = tf.Variable(x_adv), tf.constant(target)
 47 |         
 48 |         preds = self.model(x_adv).numpy()
 49 |         ginis = np.sum(np.square(preds), axis=1)
 50 |         
 51 |         with tf.GradientTape() as tape:
 52 |             loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
 53 |             grads = tape.gradient(loss, x_adv)
 54 |             grad_norm = np.linalg.norm(grads.numpy().reshape(x_adv.shape[0], -1), ord=1, axis=1)
 55 |             grads_flat = grads.numpy().reshape(x_adv.shape[0], -1)
 56 |             diff = (x_adv.numpy() - xi).reshape(x_adv.shape[0], -1)
 57 |             for i in range(x_adv.shape[0]):
 58 |                 i_fol = -np.dot(grads_flat[i], diff[i]) + self.ep * grad_norm[i]
 59 |                 fols.append(i_fol)
 60 |   
 61 |         return x_adv.numpy(), target.numpy(), np.array(fols), ginis
 62 | 
 63 | 
 64 | 
 65 | class PGD:
 66 |     """
 67 |     We use PGD to generate a batch of adversarial examples. PGD could be seen as iterative version of FGSM.
 68 |     """
 69 |     def __init__(self, model, ep=0.3, step=None, epochs=10, isRand=True):
 70 |         """
 71 |         isRand is set True to improve the attack success rate. 
 72 |         """
 73 |         self.isRand = isRand
 74 |         self.model = model
 75 |         self.ep = ep
 76 |         if step == None:
 77 |             self.step = ep/6
 78 |         self.epochs = epochs
 79 |         
 80 |     def generate(self, x, y, randRate=1):
 81 |         """
 82 |         x: clean inputs, shape of x: [batch_size, width, height, channel] 
 83 |         y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
 84 |         """
 85 |         fols = []
 86 |         target = tf.constant(y)
 87 |     
 88 |         xi = x.copy()
 89 |         if self.isRand:
 90 |             x = x + np.random.uniform(-self.ep * randRate, self.ep * randRate, x.shape)
 91 |             x = np.clip(x, 0, 1)
 92 |         
 93 |         x_adv = tf.Variable(x)
 94 |         for i in range(self.epochs): 
 95 |             with tf.GradientTape() as tape:
 96 |                 loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
 97 |                 grads = tape.gradient(loss, x_adv)
 98 |             delta = tf.sign(grads)
 99 |             x_adv.assign_add(self.step * delta)
100 |             x_adv = tf.clip_by_value(x_adv, clip_value_min=xi-self.ep, clip_value_max=xi+self.ep)
101 |             x_adv = tf.clip_by_value(x_adv, clip_value_min=0, clip_value_max=1)
102 |             x_adv = tf.Variable(x_adv)
103 |         
104 |         idxs = np.where(np.argmax(self.model(x_adv), axis=1) != np.argmax(y, axis=1))[0]
105 |         print("SUCCESS:", len(idxs))
106 |         
107 |         x_adv, xi, target = x_adv.numpy()[idxs], xi[idxs], target.numpy()[idxs]
108 |         x_adv, target = tf.Variable(x_adv), tf.constant(target)
109 |         
110 |         preds = self.model(x_adv).numpy()
111 |         ginis = np.sum(np.square(preds), axis=1)
112 |         
113 |         with tf.GradientTape() as tape:
114 |             loss = keras.losses.categorical_crossentropy(target, self.model(x_adv))
115 |             grads = tape.gradient(loss, x_adv)
116 |             grad_norm = np.linalg.norm(grads.numpy().reshape(x_adv.shape[0], -1), ord=1, axis=1)
117 |             grads_flat = grads.numpy().reshape(x_adv.shape[0], -1)
118 |             diff = (x_adv.numpy() - xi).reshape(x_adv.shape[0], -1)
119 |             for i in range(x_adv.shape[0]):
120 |                 i_fol = -np.dot(grads_flat[i], diff[i]) + self.ep * grad_norm[i]
121 |                 fols.append(i_fol)
122 |   
123 |         return x_adv.numpy(), target.numpy(), np.array(fols), ginis
124 |     
125 | 
126 |   
127 |     


--------------------------------------------------------------------------------
/MNIST/evaluate.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | # %matplotlib inline
 6 | 
 7 | import os
 8 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
 9 | 
10 | gpus = tf.config.experimental.list_physical_devices('GPU')
11 | if gpus:
12 |     try:
13 |         for gpu in gpus:
14 |             tf.config.experimental.set_memory_growth(gpu, True)
15 |     except RuntimeError as e:
16 |         print(e)
17 | 
18 | 
19 | # Load the generated adversarial inputs for Robustness evaluation. 
20 | with np.load("./FGSM_Test.npz") as f:
21 |     fgsm_test, fgsm_test_labels = f['advs'], f['labels']
22 | 
23 | with np.load("./PGD_Test.npz") as f:
24 |     pgd_test, pgd_test_labels = f['advs'], f['labels']
25 | 
26 | fp_test = np.concatenate((fgsm_test, pgd_test))
27 | fp_test_labels = np.concatenate((fgsm_test_labels, pgd_test_labels))
28 | 
29 | 
30 | sNums = [600*i for i in [1,2,3,4,6,8,10,12,16,20]]
31 | strategies = ['best', 'kmst', 'gini']
32 | acc_fp = [[] for i in range(len(strategies))]
33 | 
34 | 
35 | for num in sNums:
36 |     for i in range(len(strategies)):
37 |         s = strategies[i]
38 |         model_path = "./checkpoint/best_Lenet5_MIX_%d_%s.h5" % (num, s)
39 |         best_model = keras.models.load_model(model_path)
40 |         lfp, afp = best_model.evaluate(fp_test, fp_test_labels, verbose=0)
41 |         acc_fp[i].append(afp)
42 | 
43 | 
44 | colormap = ['r','limegreen', 'dodgerblue']
45 | plt.figure(figsize=(8,6))
46 | x = [i/max(sNums) for i in sNums]
47 | for i in range(len(strategies)):
48 |     plt.plot(x, acc_fp[i],'o-', label=strategies[i], color=colormap[i], linewidth=3, markersize=8)
49 | 
50 | plt.title("MNIST-ATTACK", fontsize=20)
51 | plt.xlabel("# Percentage of test cases", fontsize=20)
52 | plt.ylabel("Robustness", fontsize=20)
53 | plt.xticks(x, [0.5,1,1.5,2,3,4,5,6,8,10],fontsize=15)
54 | plt.yticks(fontsize=15)
55 | plt.legend(fontsize=15)
56 | 
57 | fig = plt.gcf()
58 | fig.savefig('./mnist_attack_robustness.pdf')


--------------------------------------------------------------------------------
/MNIST/fol_guided_fuzzing.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import random
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import time
  6 | 
  7 | import os
  8 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 
  9 | 
 10 | gpus = tf.config.experimental.list_physical_devices('GPU')
 11 | if gpus:
 12 |     try:
 13 |         for gpu in gpus:
 14 |             tf.config.experimental.set_memory_growth(gpu, True)
 15 |     except RuntimeError as e:
 16 |         print(e)
 17 | 
 18 | 
 19 | def load_mnist(path="./mnist.npz"):
 20 |     f = np.load(path)
 21 |     x_train, y_train = f['x_train'], f['y_train']
 22 |     x_test, y_test = f['x_test'], f['y_test']
 23 |     f.close()
 24 | 
 25 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
 26 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
 27 | 
 28 |     x_train = x_train.astype('float32') / 255.
 29 |     x_test = x_test.astype('float32') / 255.
 30 | 
 31 |     y_train = keras.utils.to_categorical(y_train, 10)
 32 |     y_test = keras.utils.to_categorical(y_test, 10)
 33 |     
 34 |     return x_train, x_test, y_train, y_test
 35 | 
 36 | 
 37 | path = "./mnist.npz"
 38 | x_train, x_test, y_train, y_test = load_mnist(path)
 39 | 
 40 | model = keras.models.load_model("./Lenet5_mnist.h5")
 41 | 
 42 | 
 43 | seeds = random.sample(list(range(x_train.shape[0])), 1000)
 44 | images = x_train[seeds]
 45 | labels = y_train[seeds]
 46 | 
 47 | 
 48 | # some training samples is static, i.e., grad=<0>, hard to generate. 
 49 | seeds_filter = []
 50 | gen_img = tf.Variable(images)
 51 | with tf.GradientTape() as g:
 52 |     loss = keras.losses.categorical_crossentropy(labels, model(gen_img))
 53 |     grads = g.gradient(loss, gen_img)
 54 | 
 55 | fols = np.linalg.norm((grads.numpy()+1e-20).reshape(images.shape[0], -1), ord=2, axis=1)
 56 | seeds_filter = np.where(fols > 1e-3)[0]
 57 | 
 58 | 
 59 | start_t = time.time()
 60 | lr = 0.1
 61 | total_sets = []
 62 | for idx in seeds_filter:
 63 |     # delta_t = time.time() - start_t
 64 |     # if delta_t > 300:
 65 |     #     break
 66 |     img_list = []
 67 |     tmp_img = images[[idx]]
 68 |     orig_img = tmp_img.copy()
 69 |     orig_norm = np.linalg.norm(orig_img)
 70 |     img_list.append(tf.identity(tmp_img))
 71 |     logits = model(tmp_img)
 72 |     orig_index = np.argmax(logits[0])
 73 |     target = keras.utils.to_categorical([orig_index], 10)
 74 |     label_top5 = np.argsort(logits[0])[-5:]
 75 | 
 76 |     folMAX = 0 
 77 |     epoch = 0 
 78 |     while len(img_list) > 0:
 79 |         gen_img = img_list.pop(0)   
 80 |         for _ in range(2):
 81 |             gen_img = tf.Variable(gen_img)
 82 |             with tf.GradientTape(persistent=True) as g:
 83 |                 loss = keras.losses.categorical_crossentropy(target, model(gen_img))
 84 |                 grads = g.gradient(loss, gen_img)
 85 |                 fol = tf.norm(grads+1e-20)
 86 |                 g.watch(fol)
 87 |                 logits = model(gen_img)
 88 |                 obj = fol - logits[0][orig_index]
 89 |                 dl_di = g.gradient(obj, gen_img)
 90 |             del g
 91 |             
 92 |             gen_img = gen_img + dl_di * lr * (random.random() + 0.5)
 93 |             gen_img = tf.clip_by_value(gen_img, clip_value_min=0, clip_value_max=1)
 94 |             
 95 |             with tf.GradientTape() as t:
 96 |                 t.watch(gen_img)
 97 |                 loss = keras.losses.categorical_crossentropy(target, model(gen_img))
 98 |                 grad = t.gradient(loss, gen_img)
 99 |                 fol = np.linalg.norm(grad.numpy()) # L2 adaption
100 | 
101 |             distance = np.linalg.norm(gen_img.numpy() - orig_img) / orig_norm
102 |             if fol > folMAX and distance < 0.5:
103 |                 folMAX = fol
104 |                 img_list.append(tf.identity(gen_img))
105 |             
106 |             gen_index = np.argmax(model(gen_img)[0]) 
107 |             if gen_index != orig_index:
108 |                 total_sets.append((fol, gen_img.numpy(), labels[idx]))
109 | 
110 | 
111 | fols = np.array([item[0] for item in total_sets])
112 | advs = np.array([item[1].reshape(28,28,1) for item in total_sets])
113 | labels = np.array([item[2] for item in total_sets])
114 | 
115 | np.savez('./FOL_Fuzz.npz', advs=advs, labels=labels, fols=fols)
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/MNIST/gen_adv.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | from attack import FGSM, PGD
 5 | import os
 6 | os.environ["CUDA_VISIBLE_DEVICES"]="-1" 
 7 | 
 8 | 
 9 | 
10 | def load_mnist(path="./mnist.npz"):
11 |     f = np.load(path)
12 |     x_train, y_train = f['x_train'], f['y_train']
13 |     x_test, y_test = f['x_test'], f['y_test']
14 |     f.close()
15 | 
16 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
17 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
18 | 
19 |     x_train = x_train.astype('float32') / 255.
20 |     x_test = x_test.astype('float32') / 255.
21 | 
22 |     y_train = keras.utils.to_categorical(y_train, 10)
23 |     y_test = keras.utils.to_categorical(y_test, 10)
24 |     
25 |     return x_train, x_test, y_train, y_test
26 | 
27 | 
28 | path = "./mnist.npz"
29 | x_train, x_test, y_train, y_test = load_mnist(path)
30 | 
31 | 
32 | # load your model 
33 | model = keras.models.load_model("./Lenet5_mnist.h5")
34 | 
35 | fgsm = FGSM(model, ep=0.3, isRand=True)
36 | pgd = PGD(model, ep=0.3, epochs=10, isRand=True)
37 | 
38 | # generate adversarial examples at once. 
39 | advs, labels, fols, ginis = fgsm.generate(x_train, y_train)
40 | np.savez('./FGSM_TrainFull.npz', advs=advs, labels=labels, fols=fols, ginis=ginis)
41 | 
42 | advs, labels, fols, ginis = pgd.generate(x_train, y_train)
43 | np.savez('./PGD_TrainFull.npz', advs=advs, labels=labels, fols=fols, ginis=ginis)
44 | 
45 | advs, labels, _, _ = fgsm.generate(x_test, y_test)
46 | np.savez('./FGSM_Test.npz', advs=advs, labels=labels)
47 | 
48 | advs, labels, _, _ = pgd.generate(x_test, y_test)
49 | np.savez('./PGD_Test.npz', advs=advs, labels=labels)


--------------------------------------------------------------------------------
/MNIST/metrics.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | 
 6 | ## Metrics for quality evaluation for massive test cases. 
 7 | 
 8 | 
 9 | def gini(model, x):
10 |     """
11 |     Different from the defination in DeepGini paper (deepgini = 1 - ginis), the smaller the ginis here, the larger the uncertainty. 
12 |     
13 |     shape of x: [batch_size, width, height, channel]
14 |     """
15 |     x = tf.Variable(x)
16 |     preds = model(x).numpy()
17 |     ginis = np.sum(np.square(preds), axis=1)
18 |     return ginis
19 |     
20 |     
21 | def fol_Linf(model, x, xi, ep, y):
22 |     """
23 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel]   
24 |     xi: initial inputs, shape of xi: [batch_size, width, height, channel]  
25 |     ep: L_inf bound
26 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
27 |     """
28 |     x, target = tf.Variable(x), tf.constant(y)
29 |     fols = []
30 |     with tf.GradientTape() as tape:
31 |         loss = keras.losses.categorical_crossentropy(target, model(x))
32 |         grads = tape.gradient(loss, x)
33 |         grad_norm = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=1, axis=1)
34 |         grads_flat = grads.numpy().reshape(x.shape[0], -1)
35 |         diff = (x.numpy() - xi).reshape(x.shape[0], -1)
36 |         for i in range(x.shape[0]):
37 |             i_fol = -np.dot(grads_flat[i], diff[i]) + ep * grad_norm[i]
38 |             fols.append(i_fol)
39 |     
40 |     return np.array(fols)
41 | 
42 | 
43 | def fol_L2(model, x, y):
44 |     """
45 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
46 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
47 |     """
48 |     x, target = tf.Variable(x), tf.constant(y)
49 |     with tf.GradientTape() as tape:
50 |         loss = keras.losses.categorical_crossentropy(target, model(x))
51 |         grads = tape.gradient(loss, x)
52 |         grads_norm_L2 = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=2, axis=1)
53 | 
54 |     return grads_norm_L2
55 | 
56 | 
57 | def zol(model, x, y):
58 |     """
59 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
60 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
61 |     """
62 |     x, target = tf.Variable(x), tf.constant(y)
63 |     loss = keras.losses.categorical_crossentropy(target, model(x))
64 |     loss.numpy().reshape(-1)
65 | 
66 |     return loss
67 | 
68 | 
69 | def robustness(model, x, y):
70 |     """
71 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
72 |     y: ground truth labels, shape of y: [batch_size] 
73 |     """
74 |     return np.sum(np.argmax(model(x), axis=1) == y) / y.shape[0]
75 | 
76 | 


--------------------------------------------------------------------------------
/MNIST/mnist.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Testing4AI/RobOT/e727ee91b84171beb2d2ed832755f1714ed38e92/MNIST/mnist.npz


--------------------------------------------------------------------------------
/MNIST/models.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | def Lenet1(input_shape=(28, 28, 1)):
 6 |     input_tensor = keras.layers.Input(shape=input_shape)
 7 |     x = keras.layers.Conv2D(4, (5, 5), activation='relu', padding='same')(input_tensor)
 8 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
 9 | 
10 |     x = keras.layers.Conv2D(12, (5, 5), activation='relu', padding='same')(x)
11 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
12 | 
13 |     x = keras.layers.Flatten()(x)
14 |     x = keras.layers.Dense(10, activation='softmax')(x)
15 | 
16 |     return keras.models.Model(input_tensor, x)
17 |   
18 | 
19 | def Lenet5(input_shape=(28, 28, 1)):
20 |     input_tensor = keras.layers.Input(shape=input_shape)
21 |     
22 |     x = keras.layers.Convolution2D(6, (5, 5), activation='relu', padding='same', name='block1_conv1')(input_tensor)
23 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block1_pool1')(x)
24 | 
25 |     x = keras.layers.Convolution2D(16, (5, 5), activation='relu', padding='same', name='block2_conv1')(x)
26 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block2_pool1')(x)
27 |     
28 |     x = keras.layers.Flatten(name='flatten')(x)
29 |     x = keras.layers.Dense(120, activation='relu', name='fc1')(x)
30 |     x = keras.layers.Dense(84, activation='relu', name='fc2')(x)
31 |     x = keras.layers.Dense(10, name='before_softmax')(x)
32 |     x = keras.layers.Activation('softmax', name='redictions')(x)
33 |     
34 |     return keras.models.Model(input_tensor, x)
35 |   
36 |   
37 | 


--------------------------------------------------------------------------------
/MNIST/select_retrain.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import random
  5 | from tensorflow.keras.callbacks import ModelCheckpoint
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
  8 | 
  9 | # Suppress the GPU memory
 10 | gpus = tf.config.experimental.list_physical_devices('GPU')
 11 | if gpus:
 12 |     try:
 13 |         for gpu in gpus:
 14 |             tf.config.experimental.set_memory_growth(gpu, True)
 15 |     except RuntimeError as e:
 16 |         print(e)
 17 |         
 18 |     
 19 | 
 20 | def select(foscs, n, s='best', k=1000):
 21 |   
 22 |     ranks = np.argsort(foscs)
 23 |     # we choose test cases with small and large fols. 
 24 |     if s == 'best':
 25 |         h = n//2
 26 |         return np.concatenate((ranks[:h],ranks[-h:]))
 27 | 
 28 |     # we choose test cases with small and large fols.   
 29 |     elif s == 'kmst':
 30 |         index = []
 31 |         section_w = len(ranks) // k
 32 |         section_nums = n // section_w
 33 |         indexes = random.sample(list(range(k)), section_nums)
 34 |         for i in indexes:
 35 |             block = ranks[i*section_w: (i+1)*section_w]
 36 |             index.append(block)
 37 |         return np.concatenate(np.array(index))
 38 | 
 39 |     # This is for gini strategy. There is little different from DeepGini paper. See function ginis() in metrics.py 
 40 |     else:
 41 |         return ranks[:n]    
 42 |     
 43 | 
 44 | def select(values, n, s='best', k=4):
 45 |     """
 46 |     n: the number of selected test cases. 
 47 |     s: strategy, ['best', 'random', 'kmst', 'gini']
 48 |     k: for KM-ST, the number of ranges. 
 49 |     """
 50 |     ranks = np.argsort(values) 
 51 |     
 52 |     if s == 'best':
 53 |         h = n//2
 54 |         return np.concatenate((ranks[:h],ranks[-h:]))
 55 |         
 56 |     elif s == 'r':
 57 |         return np.array(random.sample(list(ranks),n)) 
 58 |     
 59 |     elif s == 'kmst':
 60 |         fol_max = values.max()
 61 |         th = fol_max / k
 62 |         section_nums = n // k
 63 |         indexes = []
 64 |         for i in range(k):
 65 |             section_indexes = np.intersect1d(np.where(values<th*(i+1)), np.where(values>=th*i))
 66 |             if section_nums < len(section_indexes):
 67 |                 index = random.sample(list(section_indexes), section_nums)
 68 |                 indexes.append(index)
 69 |             else: 
 70 |                 indexes.append(section_indexes)
 71 |                 index = random.sample(list(ranks), section_nums-len(section_indexes))
 72 |                 indexes.append(index)
 73 |         return np.concatenate(np.array(indexes))
 74 | 
 75 |     # This is for gini strategy. There is little difference from DeepGini paper. See function ginis() in metrics.py 
 76 |     else: 
 77 |         return ranks[:n]  
 78 |     
 79 |     
 80 | def load_mnist(path="./mnist.npz"):
 81 |     """
 82 |     preprocessing for MNIST dataset, values are normalized to [0,1].  
 83 |     y_train and y_test are one-hot vectors. 
 84 |     """
 85 |     f = np.load(path)
 86 |     x_train, y_train = f['x_train'], f['y_train']
 87 |     x_test, y_test = f['x_test'], f['y_test']
 88 |     f.close()
 89 | 
 90 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
 91 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
 92 | 
 93 |     x_train = x_train.astype('float32') / 255.
 94 |     x_test = x_test.astype('float32') / 255.
 95 | 
 96 |     y_train = keras.utils.to_categorical(y_train, 10)
 97 |     y_test = keras.utils.to_categorical(y_test, 10)
 98 |     
 99 |     return x_train, x_test, y_train, y_test
100 | 
101 | 
102 | x_train, x_test, y_train, y_test = load_mnist(path="./mnist.npz")
103 | 
104 | # Load the generated adversarial inputs for training. FGSM and PGD. 
105 | with np.load("./FGSM_TrainFull.npz") as f:
106 |     fgsm_train, fgsm_train_labels, fgsm_train_fols, fgsm_train_ginis = f['advs'], f['labels'], f['fols'], f['ginis']
107 |     
108 | with np.load("./PGD_TrainFull.npz") as f:
109 |     pgd_train, pgd_train_labels, pgd_train_fols, pgd_train_ginis= f['advs'], f['labels'], f['fols'], f['ginis']
110 |     
111 | # Load the generated adversarial inputs for testing. FGSM and PGD. 
112 | with np.load("./FGSM_Test.npz") as f:
113 |     fgsm_test, fgsm_test_labels = f['advs'], f['labels']
114 | 
115 | with np.load("./PGD_Test.npz") as f:
116 |     pgd_test, pgd_test_labels = f['advs'], f['labels']
117 | 
118 | 
119 | # Mix the adversarial inputs 
120 | fp_train = np.concatenate((fgsm_train, pgd_train))
121 | fp_train_labels = np.concatenate((fgsm_train_labels, pgd_train_labels))
122 | fp_train_fols = np.concatenate((fgsm_train_fols, pgd_train_fols))
123 | fp_train_ginis = np.concatenate((fgsm_train_ginis, pgd_train_ginis))
124 | 
125 | fp_test = np.concatenate((fgsm_test, pgd_test))
126 | fp_test_labels = np.concatenate((fgsm_test_labels, pgd_test_labels))
127 | 
128 | 
129 | sNums = [600*i for i in [1,2,3,4,6,8,10,12,16,20]]
130 | strategies = ['best', 'kmst', 'gini']
131 | acc_clean = [[] for i in range(len(strategies))]
132 | acc_fp = [[] for i in range(len(strategies))]
133 | 
134 | 
135 | for num in sNums:
136 |     for i in range(len(strategies)):
137 |         s = strategies[i]
138 |         # model save path
139 |         model_path = "./checkpoint/best_Lenet5_MIX_%d_%s.h5" % (num, s)
140 |         model = keras.models.load_model("./Lenet5_mnist.h5")
141 |         
142 |         checkpoint = ModelCheckpoint(filepath=model_path, monitor='val_accuracy', verbose=0, save_best_only=True)
143 |         callbacks = [checkpoint]
144 |         
145 |         if s == 'gini':
146 |             indexes = select(fp_train_ginis, num, s=s)
147 |         else:
148 |             indexes = select(fp_train_fols, num, s=s)
149 | 
150 |         selectAdvs = fp_train[indexes]
151 |         selectAdvsLabels = fp_train_labels[indexes]
152 |             
153 |         x_train_mix = np.concatenate((x_train, selectAdvs),axis=0)
154 |         y_train_mix = np.concatenate((y_train, selectAdvsLabels),axis=0)
155 |         
156 |         # model retraining 
157 |         model.fit(x_train_mix, y_train_mix, epochs=10, batch_size=64, verbose=0, callbacks=callbacks,
158 |                  validation_data=(fp_test, fp_test_labels))
159 |         
160 |         best_model = keras.models.load_model(model_path)
161 |         _, aclean = best_model.evaluate(x_test, y_test, verbose=0)
162 |         _, afp = best_model.evaluate(fp_test, fp_test_labels, verbose=0)
163 |        
164 |         acc_clean[i].append(aclean)
165 |         acc_fp[i].append(afp)
166 |         
167 |         
168 |         
169 | 
170 | 
171 |         
172 |         
173 | 
174 |         
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/MNIST/train_model.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | from models import Lenet1, Lenet5
 5 | 
 6 | gpus = tf.config.experimental.list_physical_devices('GPU')
 7 | if gpus:
 8 |     try:
 9 |         for gpu in gpus:
10 |             tf.config.experimental.set_memory_growth(gpu, True)
11 |     except RuntimeError as e:
12 |         print(e)
13 |         
14 | 
15 | def load_mnist(path="./mnist.npz"):
16 |     f = np.load(path)
17 |     x_train, y_train = f['x_train'], f['y_train']
18 |     x_test, y_test = f['x_test'], f['y_test']
19 |     f.close()
20 | 
21 |     x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
22 |     x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
23 | 
24 |     x_train = x_train.astype('float32') / 255.
25 |     x_test = x_test.astype('float32') / 255.
26 | 
27 |     y_train = keras.utils.to_categorical(y_train, 10)
28 |     y_test = keras.utils.to_categorical(y_test, 10)
29 |     
30 |     return x_train, x_test, y_train, y_test
31 | 
32 | 
33 | path = "./mnist.npz"
34 | x_train, x_test, y_train, y_test = load_mnist(path)
35 | 
36 | 
37 | lenet5 = Lenet5()
38 | lenet5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
39 | lenet5.fit(x_train, y_train, epochs=10, batch_size=64)
40 | 
41 | lenet5.evaluate(x_test, y_test)
42 | 
43 | lenet5.save("./Lenet5_mnist.h5")
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RobOT: Robustness-Oriented Testing for Deep Learning Systems published at ICSE 2021
 2 | See the <a href="https://arxiv.org/pdf/2102.05913.pdf" target="_blank">ICSE2021 paper</a>  for more details. 
 3 | 
 4 | ## Prerequisite (Py3.6 & Tf2)
 5 | The code are run successfully using Python 3.6 and Tensorflow 2.2.0.
 6 | 
 7 | We recommend using conda to install the tensorflow-gpu environment
 8 | ```shell
 9 | conda create -n tf2-gpu tensorflow-gpu==2.2.0
10 | conda activate tf2-gpu
11 | ```
12 | 
13 | Checking installed environments
14 | ```shell
15 | conda env list
16 | ```
17 | 
18 | to run the code in jupyter, you should add the kernel in jupyter notebook 
19 | ```
20 | pip install ipykernel
21 | python -m ipykernel install --name tf2-gpu
22 | ```
23 | 
24 | then start jupyter notebook for experiments
25 | ```
26 | jupyter notebook
27 | ```
28 | 
29 | ## Files
30 | - MNIST - robustness experiments on the MNIST dataset.
31 | - FASHION - robustness experimnets on the FASHION dataset.
32 | - SVHN - robustness experiments on the SVHN dataset.
33 | - CIFAR-10 - robustness experiments on the CIFAR-10 dataset.
34 | 
35 | 
36 | 
37 | ## Functions
38 | metrics.py contains proposed metrics FOL. 
39 | 
40 | train_model.py is to train the DNN model.
41 | 
42 | attack.py contains FGSM and PGD attack. 
43 | 
44 | gen_adv.py is to generate adversarial inputs for test selection and robustness evaluation. You could also use toolbox like <a href="https://github.com/cleverhans-lab/cleverhans" target="_blank">cleverhans</a> for the test case generation. 
45 | 
46 | select_retrain.py is to select valuable test cases for model retraining. 
47 | 
48 | 
49 | For testing methods (DeepXplore, DLFuzz, ADAPT), we use the code repository <a href="https://github.com/kupl/ADAPT" target="_blank">ADAPT</a>. 
50 | 
51 | For testing methods (AEQUITAS, ADF), we use the code repository <a href="https://github.com/pxzhang94/ADF" target="_blank">ADF</a>. 
52 | 
53 | ## Coming soon
54 | More details would be included soon. 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/SVHN/models.py:
--------------------------------------------------------------------------------
  1 | from tensorflow import keras
  2 | import tensorflow as tf
  3 | from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Activation
  4 | from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
  5 | from tensorflow.keras.regularizers import l2
  6 | from tensorflow.keras.models import Model
  7 | 
  8 |  
  9 | def Lenet5(input_shape=(32, 32, 1)):
 10 |     input_tensor = keras.layers.Input(shape=input_shape)
 11 |     
 12 |     x = keras.layers.Convolution2D(6, (5, 5), activation='relu', padding='same', name='block1_conv1')(input_tensor)
 13 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block1_pool1')(x)
 14 | 
 15 |     x = keras.layers.Convolution2D(16, (5, 5), activation='relu', padding='same', name='block2_conv1')(x)
 16 |     x = keras.layers.MaxPooling2D(pool_size=(2, 2), name='block2_pool1')(x)
 17 |     
 18 |     x = keras.layers.Flatten(name='flatten')(x)
 19 |     x = keras.layers.Dense(120, activation='relu', name='fc1')(x)
 20 |     x = keras.layers.Dense(84, activation='relu', name='fc2')(x)
 21 |     x = keras.layers.Dense(10, name='before_softmax')(x)
 22 |     x = keras.layers.Activation('softmax', name='redictions')(x)
 23 |     
 24 |     return keras.models.Model(input_tensor, x)
 25 |   
 26 |   
 27 | 
 28 | def resnet_layer(inputs,
 29 |                  num_filters=16,
 30 |                  kernel_size=3,
 31 |                  strides=1,
 32 |                  activation='relu',
 33 |                  batch_normalization=True,
 34 |                  conv_first=True):
 35 |   
 36 |     conv = Conv2D(num_filters,
 37 |                   kernel_size=kernel_size,
 38 |                   strides=strides,
 39 |                   padding='same',
 40 |                   kernel_initializer='he_normal',
 41 |                   kernel_regularizer=l2(1e-4))
 42 | 
 43 |     x = inputs
 44 |     if conv_first:
 45 |         x = conv(x)
 46 |         if batch_normalization:
 47 |             x = BatchNormalization()(x)
 48 |         if activation is not None:
 49 |             x = Activation(activation)(x)
 50 |     else:
 51 |         if batch_normalization:
 52 |             x = BatchNormalization()(x)
 53 |         if activation is not None:
 54 |             x = Activation(activation)(x)
 55 |         x = conv(x)
 56 |     return x
 57 | 
 58 | 
 59 | def resnet_v1(input_shape, depth, num_classes=10):
 60 |     if (depth - 2) % 6 != 0:
 61 |         raise ValueError('depth should be 6n+2 (eg 20, 32, 44)')
 62 | 
 63 |     num_filters = 16
 64 |     num_res_blocks = int((depth - 2) / 6)
 65 | 
 66 |     inputs = Input(shape=input_shape)
 67 |     x = resnet_layer(inputs=inputs)
 68 |     # Instantiate the stack of residual units
 69 |     for stack in range(3):
 70 |         for res_block in range(num_res_blocks):
 71 |             strides = 1
 72 |             if stack > 0 and res_block == 0:  
 73 |                 strides = 2  
 74 |             y = resnet_layer(inputs=x,
 75 |                              num_filters=num_filters,
 76 |                              strides=strides)
 77 |             y = resnet_layer(inputs=y,
 78 |                              num_filters=num_filters,
 79 |                              activation=None)
 80 |             if stack > 0 and res_block == 0:  
 81 |                 x = resnet_layer(inputs=x,
 82 |                                  num_filters=num_filters,
 83 |                                  kernel_size=1,
 84 |                                  strides=strides,
 85 |                                  activation=None,
 86 |                                  batch_normalization=False)
 87 |             x = keras.layers.add([x, y])
 88 |             x = Activation('relu')(x)
 89 |         num_filters *= 2
 90 | 
 91 |     # Add classifier on top.
 92 |     # v1 does not use BN after last shortcut connection-ReLU
 93 |     x = AveragePooling2D(pool_size=8)(x)
 94 |     y = Flatten()(x)
 95 |     outputs = Dense(num_classes,
 96 |                     activation='softmax',
 97 |                     kernel_initializer='he_normal')(y)
 98 | 
 99 |     # Instantiate model.
100 |     model = Model(inputs=inputs, outputs=outputs)
101 |     return model  
102 |   
103 | 


--------------------------------------------------------------------------------
/SVHN/train_model.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | from models import Lenet5
 5 | 
 6 | gpus = tf.config.experimental.list_physical_devices('GPU')
 7 | if gpus:
 8 |     try:
 9 |         for gpu in gpus:
10 |             tf.config.experimental.set_memory_growth(gpu, True)
11 |     except RuntimeError as e:
12 |         print(e)
13 |         
14 | 
15 | def load_svhn(path=None):
16 |     f = np.load(path)
17 |     x_train, y_train = f['x_train'], f['y_train']
18 |     x_test, y_test = f['x_test'], f['y_test']
19 |     f.close()
20 | 
21 |     x_train = x_train.astype('float32') / 255.
22 |     x_test = x_test.astype('float32') / 255.
23 | 
24 |     y_train = keras.utils.to_categorical(y_train, 10)
25 |     y_test = keras.utils.to_categorical(y_test, 10)
26 |     
27 |     return x_train, x_test, y_train, y_test
28 | 
29 | 
30 | path = "./svhn_grey.npz"
31 | x_train, x_test, y_train, y_test = load_svhn(path)
32 | 
33 | 
34 | lenet5 = Lenet5()
35 | lenet5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
36 | lenet5.fit(x_train, y_train, epochs=20, batch_size=64)
37 | 
38 | lenet5.evaluate(x_test, y_test)
39 | 
40 | lenet5.save("./Lenet5_svhn.h5")
41 | 
42 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | from tensorflow import keras
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | 
 5 | 
 6 | 
 7 | def gini(model, x):
 8 |     """
 9 |     Different from the defination in DeepGini paper (deepgini = 1 - ginis), the smaller the ginis here, the larger the uncertainty. 
10 |     
11 |     shape of x: [batch_size, width, height, channel]
12 |     """
13 |     x = tf.Variable(x)
14 |     preds = model(x).numpy()
15 |     ginis = np.sum(np.square(preds), axis=1)
16 |     return ginis
17 |     
18 |     
19 | def fol_Linf(model, x, xi, ep, y):
20 |     """
21 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel]   
22 |     xi: initial inputs, shape of xi: [batch_size, width, height, channel]  
23 |     ep: L_inf bound
24 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
25 |     """
26 |     x, target = tf.Variable(x), tf.constant(y)
27 |     fols = []
28 |     with tf.GradientTape() as tape:
29 |         loss = keras.losses.categorical_crossentropy(target, model(x))
30 |         grads = tape.gradient(loss, x)
31 |         grad_norm = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=1, axis=1)
32 |         grads_flat = grads.numpy().reshape(x.shape[0], -1)
33 |         diff = (x.numpy() - xi).reshape(x.shape[0], -1)
34 |         for i in range(x.shape[0]):
35 |             i_fol = -np.dot(grads_flat[i], diff[i]) + ep * grad_norm[i]
36 |             fols.append(i_fol)
37 |     
38 |     return np.array(fols)
39 | 
40 | 
41 | def fol_L2(model, x, y):
42 |     """
43 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
44 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
45 |     """
46 |     x, target = tf.Variable(x), tf.constant(y)
47 |     with tf.GradientTape() as tape:
48 |         loss = keras.losses.categorical_crossentropy(target, model(x))
49 |         grads = tape.gradient(loss, x)
50 |         grads_norm_L2 = np.linalg.norm(grads.numpy().reshape(x.shape[0], -1), ord=2, axis=1)
51 | 
52 |     return grads_norm_L2
53 | 
54 | 
55 | def zol(model, x, y):
56 |     """
57 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
58 |     y: ground truth, one hot vectors, shape of y: [batch_size, N_classes] 
59 |     """
60 |     x, target = tf.Variable(x), tf.constant(y)
61 |     loss = keras.losses.categorical_crossentropy(target, model(x))
62 |     loss.numpy().reshape(-1)
63 | 
64 |     return loss
65 | 
66 | 
67 | def robustness(model, x, y):
68 |     """
69 |     x: perturbed inputs, shape of x: [batch_size, width, height, channel] 
70 |     y: ground truth labels, shape of y: [batch_size] 
71 |     """
72 |     return np.sum(np.argmax(model(x), axis=1) == y) / y.shape[0]
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------