├── CIFAR_classification_model ├── README.md ├── helper_functions.py ├── mnist_classification_model ├── test_DQN_adversarial_attack.py └── train_DQN_for_adversarial_attack.py /CIFAR_classification_model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mandareln/Deep-Q-learning-adversarial/6b785abdb4e75ea217a87e5df3d5454ccbfc32fa/CIFAR_classification_model -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Q learning for adversarial attacks 2 | 3 | To train the DQN model for MNIST/CIFAR-10 dataset, run the train_DQN_for_adversarial_attack.py 4 | 5 | The dataset can be selected on line 13/14 from the train_DQN_for_adversarial_attack.py. 6 | 7 | After the end of the training, it saves the trained Q model in the same directory. 8 | 9 | Run test_DQN_adversarial_attack.py to generate and save the adversarial images. 10 | 11 | The keras classification model files for MNIST and CIFAR are generated with keras version 2.1.4 12 | 13 | Run 'pip install keras==2.1.4' if the loading of the models fails. 14 | -------------------------------------------------------------------------------- /helper_functions.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from keras.layers import * 4 | from keras.models import * 5 | from matplotlib import pyplot as plt 6 | from sklearn.utils import shuffle 7 | from itertools import combinations, product 8 | from tensorflow.examples.tutorials.mnist import input_data 9 | from keras.datasets import cifar10 10 | import keras 11 | 12 | 13 | def get_cnn_model(input_shape = None): 14 | cnn_model = Sequential() 15 | cnn_model.add(Conv2D(64,5, padding = 'same', activation = 'relu', input_shape = input_shape)) 16 | cnn_model.add(Dropout(0.5)) 17 | 18 | cnn_model.add(Conv2D(64,5, padding = 'same', activation = 'relu',)) 19 | cnn_model.add(Dropout(0.5)) 20 | 21 | cnn_model.add(Flatten()) 22 | cnn_model.add(Dense(128, activation = 'relu')) 23 | #cnn_model.add(Dropout(0.5)) 24 | 25 | return cnn_model 26 | 27 | 28 | def get_dnn_model(input_shape = 10): 29 | dnn_model = Sequential() 30 | dnn_model.add(Dense(128, activation = 'relu', input_shape = [input_shape])) 31 | #dnn_model.add(Dropout(0.5)) 32 | return dnn_model 33 | 34 | 35 | def update_q_model(exp_replay = None, q_model = None, batch_size = None, dis_factor = None): 36 | 37 | train_input_1 = [] 38 | train_input_2 = [] 39 | 40 | train_label = [] 41 | for ind in range(len(exp_replay)): 42 | 43 | init_img_state = exp_replay[ind][0] 44 | init_prob_state = exp_replay[ind][1] 45 | 46 | next_img_state = exp_replay[ind][4] 47 | next_prob_state = exp_replay[ind][5] 48 | 49 | action_taken = exp_replay[ind][2] 50 | reward_obtained = exp_replay[ind][3] 51 | 52 | target = q_model.predict([init_img_state,init_prob_state])[0] 53 | 54 | Q_sa = np.max(q_model.predict([next_img_state,next_prob_state])[0]) 55 | # 56 | if reward_obtained == 10 or reward_obtained == -1: 57 | target[action_taken] = reward_obtained 58 | else: 59 | target[action_taken] = reward_obtained + dis_factor * Q_sa 60 | 61 | 62 | train_input_1.append(init_img_state) 63 | train_input_2.append(init_prob_state) 64 | 65 | train_label.append(target) 66 | 67 | train_input_1 = np.squeeze(np.array(train_input_1)) 68 | train_input_2 = np.squeeze(np.array(train_input_2)) 69 | 70 | train_label = np.array(train_label) 71 | 72 | if len(train_input_1.shape) == 3: 73 | train_input_1 = np.expand_dims(train_input_1, axis = -1) 74 | 75 | train_input_1,train_input_2,train_label = shuffle(train_input_1,train_input_2,train_label, random_state = 0) 76 | 77 | #print('model update') 78 | q_model.train_on_batch([train_input_1[0:batch_size],train_input_2[0:batch_size]], train_label[0:batch_size]) 79 | 80 | return q_model 81 | -------------------------------------------------------------------------------- /mnist_classification_model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mandareln/Deep-Q-learning-adversarial/6b785abdb4e75ea217a87e5df3d5454ccbfc32fa/mnist_classification_model -------------------------------------------------------------------------------- /test_DQN_adversarial_attack.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from keras.layers import * 4 | from keras.models import * 5 | from matplotlib import pyplot as plt 6 | from itertools import combinations, product 7 | from tensorflow.examples.tutorials.mnist import input_data 8 | from keras.datasets import cifar10 9 | import keras 10 | from PIL import Image 11 | import os 12 | 13 | #dataset = 'CIFAR' 14 | dataset = 'MNIST' 15 | 16 | 17 | if dataset == 'MNIST': 18 | input_model = load_model('./mnist_classification_model') 19 | loaded_Q_model = load_model(dataset + '_Q_adversarial_model') 20 | save_dir = './MNIST_adversarial_examples/' 21 | 22 | if not os.path.isdir(save_dir): 23 | os.mkdir(save_dir) 24 | 25 | 26 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 27 | mnist_images = mnist.train.images 28 | mnist_train_images = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1]) 29 | X_train = mnist_train_images 30 | y_train = mnist.train.labels 31 | mnist_images = mnist.test.images 32 | X_test = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1]) 33 | y_test = mnist.test.labels 34 | input_shape = [28,28,1] 35 | 36 | num_classes = 10 37 | lamda = .4 38 | 39 | 40 | if dataset == 'CIFAR': 41 | input_model = load_model('./CIFAR_classification_model') 42 | loaded_Q_model = load_model(dataset + '_Q_adversarial_model') 43 | save_dir = './CIFAR_adversarial_examples/' 44 | 45 | if not os.path.isdir(save_dir): 46 | os.mkdir(save_dir) 47 | 48 | cifar_class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck'] 49 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 50 | 51 | num_classes = 10 52 | # Convert class vectors to binary class matrices. 53 | y_train = keras.utils.to_categorical(y_train, num_classes) 54 | y_test = keras.utils.to_categorical(y_test, num_classes) 55 | 56 | X_train = X_train / 255.0 57 | X_test = X_test / 255.0 58 | input_shape = [32,32,3] 59 | lamda = .1 60 | 61 | 62 | 63 | ### defining the action space of agent 64 | max_blocks_attack = 15 65 | block_size = 2 66 | x_span = list(range(0,X_train.shape[1],block_size)) 67 | blocks = list(product(x_span,x_span)) 68 | 69 | 70 | for games in range(5000,len(X_test)): 71 | 72 | sample_img = X_test[games] 73 | sample_img = np.expand_dims(sample_img,axis = 0) 74 | 75 | orig_label = np.argmax(input_model.predict(sample_img),axis = 1)[0] 76 | orig_img = np.array(sample_img) 77 | 78 | 79 | for ite in range(max_blocks_attack): 80 | 81 | sample_img_prob = input_model.predict(sample_img)[0] 82 | sample_img_prob = np.expand_dims(sample_img_prob,axis = 0) 83 | 84 | action = np.argmax(loaded_Q_model.predict([sample_img,sample_img_prob])) 85 | 86 | attack_region = np.zeros((sample_img.shape)) 87 | attack_cord = blocks[action] 88 | 89 | attack_region[0,attack_cord[0]:attack_cord[0]+block_size, attack_cord[1]:attack_cord[1]+block_size,:] = 1 90 | 91 | 92 | sample_img_noise = sample_img + lamda * attack_region 93 | 94 | mod_label = np.argmax(input_model.predict(sample_img_noise),axis = 1)[0] 95 | 96 | 97 | if mod_label != orig_label: 98 | print('saving adversarial example image') 99 | sample_img_noise = np.squeeze(sample_img_noise * 255.) 100 | sample_img_noise = Image.fromarray(sample_img_noise.astype('uint8')) 101 | sample_img_noise = sample_img_noise.resize((128, 128)) 102 | 103 | if dataset == 'CIFAR': 104 | sample_img_noise.save(save_dir + str(games) + '_actual_' + cifar_class_names[orig_label] + '_mod_' + cifar_class_names[mod_label] + '.png') 105 | 106 | if dataset == 'MNIST': 107 | sample_img_noise.save(save_dir + str(games) + '_actual_' + str(orig_label) + '_mod_' + str(mod_label) + '.png') 108 | 109 | 110 | break 111 | 112 | sample_img = np.array(sample_img_noise) 113 | 114 | -------------------------------------------------------------------------------- /train_DQN_for_adversarial_attack.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from keras.layers import * 4 | from keras.models import * 5 | from matplotlib import pyplot as plt 6 | from sklearn.utils import shuffle 7 | from itertools import combinations, product 8 | from tensorflow.examples.tutorials.mnist import input_data 9 | from keras.datasets import cifar10 10 | import keras 11 | from helper_functions import * 12 | 13 | dataset = 'MNIST' 14 | #dataset = 'CIFAR' 15 | 16 | if dataset == 'MNIST': 17 | input_model = load_model('./mnist_classification_model') 18 | mnist = input_data.read_data_sets('MNIST_data', one_hot=True) 19 | mnist_images = mnist.train.images 20 | mnist_train_images = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1]) 21 | X_train = mnist_train_images 22 | y_train = mnist.train.labels 23 | mnist_images = mnist.test.images 24 | X_test = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1]) 25 | y_test = mnist.test.labels 26 | input_shape = [28,28,1] 27 | 28 | num_classes = 10 29 | lamda = .4 30 | 31 | 32 | if dataset == 'CIFAR': 33 | input_model = load_model('./CIFAR_classification_model') 34 | cifar_class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck'] 35 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() 36 | 37 | num_classes = 10 38 | # Convert class vectors to binary class matrices. 39 | y_train = keras.utils.to_categorical(y_train, num_classes) 40 | y_test = keras.utils.to_categorical(y_test, num_classes) 41 | 42 | X_train = X_train / 255.0 43 | X_test = X_test / 255.0 44 | input_shape = [32,32,3] 45 | lamda = .1 46 | 47 | 48 | ### defining the action space of agent 49 | block_size = 2 50 | x_span = list(range(0,X_train.shape[1],block_size)) 51 | blocks = list(product(x_span,x_span)) 52 | 53 | 54 | ### defining DQN model 55 | cnn_model = get_cnn_model(input_shape = input_shape) 56 | dnn_model = get_dnn_model(input_shape = num_classes) 57 | 58 | prob_input = Input(shape = [num_classes]) 59 | img_input = Input(shape = input_shape) 60 | 61 | prob_rep = dnn_model(prob_input) 62 | img_rep = cnn_model(img_input) 63 | 64 | x = Concatenate(axis = -1)([prob_rep,img_rep]) 65 | x = Dense(len(blocks), activation = 'linear')(x) 66 | 67 | q_model = Model(inputs = [img_input,prob_input], outputs = [x]) 68 | 69 | q_model.summary() 70 | q_model.compile(loss = 'mse', optimizer = 'adam', metrics = ['accuracy']) 71 | 72 | 73 | 74 | #### Q learning hyperparameters 75 | eps = .9 76 | dis_factor = 0.9 77 | max_memory = 1000 78 | max_blocks_attack = 15 79 | 80 | 81 | sucess = [] 82 | sucess_rate = [] 83 | exp_replay = [] 84 | 85 | ### generating espisodes from 5k test images 86 | for games in range(5000): 87 | 88 | sample_img = X_test[games] 89 | sample_img = np.expand_dims(sample_img,axis = 0) 90 | 91 | if games > 0 and games % 300 == 0: 92 | eps = eps - 0.1 93 | 94 | if eps <= 0.1: 95 | eps = 0.1 96 | 97 | orig_label = np.argmax(input_model.predict(sample_img),axis = 1) 98 | orig_img = np.array(sample_img) 99 | 100 | 101 | for ite in range(0,max_blocks_attack): 102 | #print(ite) 103 | sample_img_prob = input_model.predict(sample_img)[0] 104 | sample_img_prob = np.expand_dims(sample_img_prob,axis = 0) 105 | 106 | if np.random.rand() < eps: 107 | action = np.random.randint(0,len(blocks)) 108 | else: 109 | action = np.argmax(q_model.predict([sample_img,sample_img_prob])) 110 | 111 | attack_region = np.zeros((sample_img.shape)) 112 | attack_cord = blocks[action] 113 | attack_region[0,attack_cord[0]:attack_cord[0]+block_size, attack_cord[1]:attack_cord[1]+block_size,:] = 1 114 | 115 | sample_img_noise = sample_img + lamda * attack_region 116 | sample_img_noise_prob = input_model.predict(sample_img_noise) 117 | 118 | mod_label = np.argmax(input_model.predict(sample_img_noise),axis = 1) 119 | 120 | if mod_label != orig_label: 121 | 122 | reward = 10. 123 | sucess.append(1) 124 | exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob]) 125 | break 126 | 127 | else: 128 | reward = -.1 129 | exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob]) 130 | 131 | sample_img = np.array(sample_img_noise) 132 | 133 | 134 | if ite == (max_blocks_attack-1): 135 | reward = -1. 136 | exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob]) 137 | sucess.append(0) 138 | 139 | 140 | if len(exp_replay) >= max_memory: 141 | 142 | q_model = update_q_model(exp_replay = exp_replay, q_model = q_model, batch_size = 32, dis_factor = dis_factor) 143 | exp_replay = [] 144 | 145 | print('Q model updated,', 'sucess rate', np.mean(np.array(sucess))) 146 | sucess_rate.append(np.mean(np.array(sucess))) 147 | sucess = [] 148 | 149 | 150 | 151 | ### saving the q model and sucess rate 152 | q_model.save(dataset + '_Q_adversarial_model') 153 | 154 | 155 | 156 | 157 | --------------------------------------------------------------------------------