├── CIFAR_classification_model
├── README.md
├── helper_functions.py
├── mnist_classification_model
├── test_DQN_adversarial_attack.py
└── train_DQN_for_adversarial_attack.py


/CIFAR_classification_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mandareln/Deep-Q-learning-adversarial/6b785abdb4e75ea217a87e5df3d5454ccbfc32fa/CIFAR_classification_model


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Q learning for adversarial attacks
 2 | 
 3 | To train the DQN model for MNIST/CIFAR-10 dataset, run the train_DQN_for_adversarial_attack.py
 4 | 
 5 | The dataset can be selected on line 13/14 from the train_DQN_for_adversarial_attack.py.
 6 | 
 7 | After the end of the training, it saves the trained Q model in the same directory.
 8 | 
 9 | Run test_DQN_adversarial_attack.py to generate and save the adversarial images. 
10 | 
11 | The keras classification model files for MNIST and CIFAR are generated with keras version 2.1.4
12 | 
13 | Run 'pip install keras==2.1.4' if the loading of the models fails.
14 | 


--------------------------------------------------------------------------------
/helper_functions.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | from keras.layers import *
 4 | from keras.models import *
 5 | from matplotlib import pyplot as plt
 6 | from sklearn.utils import shuffle
 7 | from itertools import combinations, product
 8 | from tensorflow.examples.tutorials.mnist import input_data
 9 | from keras.datasets import cifar10
10 | import keras
11 | 
12 | 
13 | def get_cnn_model(input_shape = None):
14 |     cnn_model = Sequential()
15 |     cnn_model.add(Conv2D(64,5, padding = 'same', activation = 'relu', input_shape = input_shape))
16 |     cnn_model.add(Dropout(0.5))
17 |     
18 |     cnn_model.add(Conv2D(64,5, padding = 'same', activation = 'relu',))
19 |     cnn_model.add(Dropout(0.5))
20 |     
21 |     cnn_model.add(Flatten())
22 |     cnn_model.add(Dense(128, activation = 'relu'))
23 |     #cnn_model.add(Dropout(0.5))
24 |     
25 |     return cnn_model
26 | 
27 | 
28 | def get_dnn_model(input_shape = 10):
29 |     dnn_model = Sequential()
30 |     dnn_model.add(Dense(128, activation = 'relu', input_shape = [input_shape]))
31 |     #dnn_model.add(Dropout(0.5))
32 |     return dnn_model
33 | 
34 | 
35 | def update_q_model(exp_replay = None, q_model = None, batch_size = None, dis_factor = None):
36 |    
37 |    train_input_1 = []
38 |    train_input_2 = []
39 |    
40 |    train_label = []
41 |    for ind in range(len(exp_replay)):
42 |     
43 |         init_img_state = exp_replay[ind][0]
44 |         init_prob_state = exp_replay[ind][1]
45 |         
46 |         next_img_state = exp_replay[ind][4]
47 |         next_prob_state = exp_replay[ind][5]
48 |         
49 |         action_taken = exp_replay[ind][2]
50 |         reward_obtained = exp_replay[ind][3]
51 |                
52 |         target = q_model.predict([init_img_state,init_prob_state])[0]
53 |                         
54 |         Q_sa = np.max(q_model.predict([next_img_state,next_prob_state])[0])
55 | #                
56 |         if reward_obtained == 10 or reward_obtained == -1:
57 |             target[action_taken] = reward_obtained
58 |         else:
59 |             target[action_taken] = reward_obtained + dis_factor * Q_sa
60 |         
61 |         
62 |         train_input_1.append(init_img_state)
63 |         train_input_2.append(init_prob_state)
64 |         
65 |         train_label.append(target)
66 |        
67 |    train_input_1 = np.squeeze(np.array(train_input_1))
68 |    train_input_2 = np.squeeze(np.array(train_input_2))
69 |    
70 |    train_label = np.array(train_label)  
71 |      
72 |    if len(train_input_1.shape) == 3:
73 |        train_input_1 = np.expand_dims(train_input_1, axis = -1)
74 |      
75 |    train_input_1,train_input_2,train_label = shuffle(train_input_1,train_input_2,train_label, random_state = 0)
76 |      
77 |    #print('model update')
78 |    q_model.train_on_batch([train_input_1[0:batch_size],train_input_2[0:batch_size]], train_label[0:batch_size])
79 |    
80 |    return q_model
81 | 


--------------------------------------------------------------------------------
/mnist_classification_model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mandareln/Deep-Q-learning-adversarial/6b785abdb4e75ea217a87e5df3d5454ccbfc32fa/mnist_classification_model


--------------------------------------------------------------------------------
/test_DQN_adversarial_attack.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from keras.layers import *
  4 | from keras.models import *
  5 | from matplotlib import pyplot as plt
  6 | from itertools import combinations, product
  7 | from tensorflow.examples.tutorials.mnist import input_data
  8 | from keras.datasets import cifar10
  9 | import keras
 10 | from PIL import Image 
 11 | import os
 12 | 
 13 | #dataset = 'CIFAR'
 14 | dataset = 'MNIST'
 15 | 
 16 | 
 17 | if dataset == 'MNIST':
 18 |     input_model = load_model('./mnist_classification_model')
 19 |     loaded_Q_model = load_model(dataset + '_Q_adversarial_model') 
 20 |     save_dir = './MNIST_adversarial_examples/'
 21 |     
 22 |     if not os.path.isdir(save_dir):
 23 |         os.mkdir(save_dir)
 24 |         
 25 |     
 26 |     mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
 27 |     mnist_images = mnist.train.images
 28 |     mnist_train_images = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1])
 29 |     X_train = mnist_train_images
 30 |     y_train = mnist.train.labels
 31 |     mnist_images = mnist.test.images
 32 |     X_test = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1])
 33 |     y_test = mnist.test.labels
 34 |     input_shape = [28,28,1]
 35 |     
 36 |     num_classes = 10
 37 |     lamda = .4
 38 | 
 39 | 
 40 | if dataset == 'CIFAR':
 41 |     input_model = load_model('./CIFAR_classification_model')
 42 |     loaded_Q_model = load_model(dataset + '_Q_adversarial_model')  
 43 |     save_dir = './CIFAR_adversarial_examples/'
 44 |     
 45 |     if not os.path.isdir(save_dir):
 46 |         os.mkdir(save_dir)
 47 |     
 48 |     cifar_class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
 49 |     (X_train, y_train), (X_test, y_test) = cifar10.load_data()
 50 |     
 51 |     num_classes = 10
 52 |     # Convert class vectors to binary class matrices.
 53 |     y_train = keras.utils.to_categorical(y_train, num_classes)
 54 |     y_test = keras.utils.to_categorical(y_test, num_classes)
 55 |     
 56 |     X_train = X_train / 255.0
 57 |     X_test = X_test / 255.0
 58 |     input_shape = [32,32,3]
 59 |     lamda = .1
 60 | 
 61 | 
 62 | 
 63 | ### defining the action space of agent
 64 | max_blocks_attack = 15
 65 | block_size = 2
 66 | x_span = list(range(0,X_train.shape[1],block_size))
 67 | blocks = list(product(x_span,x_span)) 
 68 | 
 69 | 
 70 | for games in range(5000,len(X_test)):
 71 |    
 72 |     sample_img = X_test[games]
 73 |     sample_img = np.expand_dims(sample_img,axis = 0)
 74 |     
 75 |     orig_label = np.argmax(input_model.predict(sample_img),axis = 1)[0]
 76 |     orig_img = np.array(sample_img)
 77 |                                                                                                                                                                          
 78 |     
 79 |     for ite in range(max_blocks_attack):
 80 |               
 81 |         sample_img_prob = input_model.predict(sample_img)[0]
 82 |         sample_img_prob = np.expand_dims(sample_img_prob,axis = 0)
 83 |         
 84 |         action = np.argmax(loaded_Q_model.predict([sample_img,sample_img_prob]))
 85 |                               
 86 |         attack_region = np.zeros((sample_img.shape))       
 87 |         attack_cord = blocks[action]
 88 |                
 89 |         attack_region[0,attack_cord[0]:attack_cord[0]+block_size, attack_cord[1]:attack_cord[1]+block_size,:] = 1
 90 |         
 91 |              
 92 |         sample_img_noise = sample_img + lamda * attack_region
 93 |                  
 94 |         mod_label = np.argmax(input_model.predict(sample_img_noise),axis = 1)[0]
 95 |         
 96 |         
 97 |         if mod_label != orig_label:
 98 |             print('saving adversarial example image')            
 99 |             sample_img_noise = np.squeeze(sample_img_noise * 255.)
100 |             sample_img_noise = Image.fromarray(sample_img_noise.astype('uint8'))
101 |             sample_img_noise = sample_img_noise.resize((128, 128)) 
102 |             
103 |             if dataset == 'CIFAR':
104 |                 sample_img_noise.save(save_dir + str(games) + '_actual_' + cifar_class_names[orig_label] + '_mod_' + cifar_class_names[mod_label] + '.png')
105 |             
106 |             if dataset == 'MNIST':
107 |                 sample_img_noise.save(save_dir + str(games) + '_actual_' + str(orig_label) + '_mod_' + str(mod_label) + '.png')
108 |                                
109 |                     
110 |             break
111 |         
112 |         sample_img = np.array(sample_img_noise)
113 |    
114 |      


--------------------------------------------------------------------------------
/train_DQN_for_adversarial_attack.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from keras.layers import *
  4 | from keras.models import *
  5 | from matplotlib import pyplot as plt
  6 | from sklearn.utils import shuffle
  7 | from itertools import combinations, product
  8 | from tensorflow.examples.tutorials.mnist import input_data
  9 | from keras.datasets import cifar10
 10 | import keras
 11 | from helper_functions import *
 12 | 
 13 | dataset = 'MNIST'
 14 | #dataset = 'CIFAR'
 15 | 
 16 | if dataset == 'MNIST':
 17 |     input_model = load_model('./mnist_classification_model')
 18 |     mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
 19 |     mnist_images = mnist.train.images
 20 |     mnist_train_images = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1])
 21 |     X_train = mnist_train_images
 22 |     y_train = mnist.train.labels
 23 |     mnist_images = mnist.test.images
 24 |     X_test = np.reshape(mnist_images,[mnist_images.shape[0],28,28,1])
 25 |     y_test = mnist.test.labels
 26 |     input_shape = [28,28,1]
 27 |     
 28 |     num_classes = 10
 29 |     lamda = .4
 30 | 
 31 | 
 32 | if dataset == 'CIFAR':
 33 |     input_model = load_model('./CIFAR_classification_model')
 34 |     cifar_class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
 35 |     (X_train, y_train), (X_test, y_test) = cifar10.load_data()
 36 |     
 37 |     num_classes = 10
 38 |     # Convert class vectors to binary class matrices.
 39 |     y_train = keras.utils.to_categorical(y_train, num_classes)
 40 |     y_test = keras.utils.to_categorical(y_test, num_classes)
 41 |     
 42 |     X_train = X_train / 255.0
 43 |     X_test = X_test / 255.0
 44 |     input_shape = [32,32,3]
 45 |     lamda = .1
 46 | 
 47 | 
 48 | ### defining the action space of agent
 49 | block_size = 2
 50 | x_span = list(range(0,X_train.shape[1],block_size))
 51 | blocks = list(product(x_span,x_span)) 
 52 | 
 53 | 
 54 | ### defining DQN model
 55 | cnn_model = get_cnn_model(input_shape = input_shape)
 56 | dnn_model = get_dnn_model(input_shape = num_classes)
 57 | 
 58 | prob_input = Input(shape = [num_classes])
 59 | img_input = Input(shape = input_shape)
 60 | 
 61 | prob_rep = dnn_model(prob_input)
 62 | img_rep = cnn_model(img_input)
 63 | 
 64 | x = Concatenate(axis = -1)([prob_rep,img_rep])
 65 | x = Dense(len(blocks), activation = 'linear')(x)
 66 | 
 67 | q_model = Model(inputs = [img_input,prob_input], outputs = [x])
 68 | 
 69 | q_model.summary()
 70 | q_model.compile(loss = 'mse', optimizer = 'adam', metrics = ['accuracy'])
 71 | 
 72 | 
 73 | 
 74 | #### Q learning hyperparameters
 75 | eps = .9
 76 | dis_factor = 0.9
 77 | max_memory = 1000
 78 | max_blocks_attack = 15
 79 | 
 80 | 
 81 | sucess = []
 82 | sucess_rate = []
 83 | exp_replay = []
 84 | 
 85 | ### generating espisodes from 5k test images
 86 | for games in range(5000):
 87 |     
 88 |     sample_img = X_test[games]
 89 |     sample_img = np.expand_dims(sample_img,axis = 0)
 90 |     
 91 |     if games > 0 and games % 300 == 0:
 92 |         eps = eps - 0.1
 93 |     
 94 |     if eps <= 0.1:
 95 |         eps = 0.1
 96 |     
 97 |     orig_label = np.argmax(input_model.predict(sample_img),axis = 1)
 98 |     orig_img = np.array(sample_img)
 99 |     
100 |    
101 |     for ite in range(0,max_blocks_attack):
102 |         #print(ite)       
103 |         sample_img_prob = input_model.predict(sample_img)[0]
104 |         sample_img_prob = np.expand_dims(sample_img_prob,axis = 0)
105 |         
106 |         if np.random.rand() < eps:
107 |             action = np.random.randint(0,len(blocks))
108 |         else:
109 |             action = np.argmax(q_model.predict([sample_img,sample_img_prob]))
110 |                
111 |         attack_region = np.zeros((sample_img.shape))       
112 |         attack_cord = blocks[action]
113 |         attack_region[0,attack_cord[0]:attack_cord[0]+block_size, attack_cord[1]:attack_cord[1]+block_size,:] = 1
114 |                
115 |         sample_img_noise = sample_img + lamda * attack_region
116 |         sample_img_noise_prob = input_model.predict(sample_img_noise)
117 |         
118 |         mod_label = np.argmax(input_model.predict(sample_img_noise),axis = 1)
119 |         
120 |         if mod_label != orig_label:
121 |             
122 |             reward = 10.
123 |             sucess.append(1)
124 |             exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob])
125 |             break
126 |         
127 |         else:
128 |             reward = -.1
129 |             exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob])
130 |             
131 |         sample_img = np.array(sample_img_noise)
132 |                
133 |     
134 |     if ite == (max_blocks_attack-1):
135 |         reward = -1.
136 |         exp_replay.append([sample_img,sample_img_prob,action,reward,sample_img_noise,sample_img_noise_prob])
137 |         sucess.append(0)
138 |                                            
139 |         
140 |     if len(exp_replay) >= max_memory:
141 |        
142 |        q_model = update_q_model(exp_replay = exp_replay, q_model = q_model, batch_size = 32, dis_factor = dis_factor)
143 |        exp_replay = []
144 |               
145 |        print('Q model updated,', 'sucess rate', np.mean(np.array(sucess)))
146 |        sucess_rate.append(np.mean(np.array(sucess)))
147 |        sucess = []
148 |              
149 | 
150 | 
151 | ### saving the q model and sucess rate
152 | q_model.save(dataset + '_Q_adversarial_model')
153 | 
154 | 
155 |        
156 | 
157 | 


--------------------------------------------------------------------------------