├── README.md ├── fgm.py ├── l2_LADMMST_attack_v3.py ├── l2_attack.py ├── setup_cifar.py ├── setup_inception.py ├── setup_mnist.py ├── test_attack_iclr.py └── train_models.py /README.md: -------------------------------------------------------------------------------- 1 | # StrAttack 2 | 3 | The code is for paper: 'Structured Adversarial Attack: Towards General Implementation and Better Interpretability' which accepted in ICLR 2019 4 | (https://openreview.net/forum?id=BkgzniCqY7) by Kaidi Xu*, Sijia Liu*, Pu Zhao, Pin-Yu Chen, Huan Zhang, Quanfu Fan, Deniz Erdogmus, Yanzhi Wang, Xue Lin (* means equal contribution) 5 | 6 | ``` 7 | @inproceedings{ 8 | xu2018structured, 9 | title={Structured Adversarial Attack: Towards General Implementation and Better Interpretability}, 10 | author={Kaidi Xu and Sijia Liu and Pu Zhao and Pin-Yu Chen and Huan Zhang and Quanfu Fan and Deniz Erdogmus and Yanzhi Wang and Xue Lin}, 11 | booktitle={International Conference on Learning Representations}, 12 | year={2019}, 13 | url={https://openreview.net/forum?id=BkgzniCqY7}, 14 | } 15 | ``` 16 | 17 | 18 | Experiment Setup 19 | ------------------------------------- 20 | To prepare the ImageNet dataset, download and unzip the following archive: 21 | 22 | [ImageNet Test Set](http://jaina.cs.ucdavis.edu/datasets/adv/imagenet/img.tar.gz) 23 | 24 | 25 | and put the `imgs` folder in `../imagesnetdata`. This path can be changed 26 | in `setup_inception.py`. 27 | 28 | To download the inception model: 29 | 30 | ``` 31 | python3 setup_inception.py 32 | ``` 33 | 34 | 35 | To train CIFAR10 and MNIST model: 36 | run 37 | ``` 38 | python3 trainmodel.py -d all 39 | ``` 40 | 41 | StrAttack: 42 | 43 | run 44 | ``` 45 | python3 test_attack_iclr.py 46 | ``` 47 | You can change methods, dataset or any hyperparameter in args or add in command line. 48 | 49 | 50 | -------------------------------------------------------------------------------- /fgm.py: -------------------------------------------------------------------------------- 1 | ## Based on the CleverHans FGM implementation 2 | ## Modified by Yash Sharma for epsilon search and to match attack structure for attack code 3 | 4 | ## fgm.py -- attack a network with the Fast Gradient Method 5 | ## 6 | ## Copyright (C) 2017, Yash Sharma . 7 | ## Copyright (C) 2016, Nicholas Carlini . 8 | ## 9 | ## This program is licenced under the BSD 2-Clause licence, 10 | ## contained in the LICENCE file in this directory. 11 | 12 | import sys 13 | import tensorflow as tf 14 | import numpy as np 15 | from six.moves import xrange 16 | 17 | class FGM: 18 | def __init__(self, sess, model, batch_size=9, ord=np.inf, clip_min=-0.5, clip_max=0.5, targeted=True, inception=False): 19 | 20 | image_size, num_channels, num_labels = model.image_size, model.num_channels, model.num_labels 21 | self.sess = sess 22 | self.model = model 23 | self.targeted = targeted 24 | self.batch_size = batch_size 25 | self.ord = ord 26 | self.clip_min = clip_min 27 | self.clip_max = clip_max 28 | self.inception = inception 29 | 30 | shape = (batch_size,image_size,image_size,num_channels) 31 | 32 | # these are variables to be more efficient in sending data to tf 33 | self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32) 34 | self.tlab = tf.Variable(np.zeros((batch_size,num_labels)), dtype=tf.float32) 35 | self.eps = tf.Variable(0., dtype=tf.float32) 36 | 37 | # and here's what we use to assign them 38 | self.assign_timg = tf.placeholder(tf.float32, shape) 39 | self.assign_tlab = tf.placeholder(tf.float32, (batch_size,num_labels)) 40 | self.assign_eps = tf.placeholder(tf.float32) 41 | 42 | self.tlab_new = self.tlab / tf.reduce_sum(self.tlab, 1, keep_dims=True) 43 | 44 | # prediction BEFORE-SOFTMAX of the model 45 | self.output = self.model.predict(self.timg) 46 | self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.tlab_new) 47 | 48 | if self.targeted: 49 | self.loss = -self.loss 50 | 51 | self.gradients, = tf.gradients(self.loss, self.timg) 52 | 53 | if self.ord == np.inf: 54 | self.signed_grad = tf.sign(self.gradients) 55 | elif self.ord == 1: 56 | reduc_ind = list(xrange(1, len(shape))) 57 | self.signed_grad = self.gradients / tf.reduce_sum(tf.abs(self.gradients), 58 | reduction_indices=reduc_ind, 59 | keep_dims=True) 60 | elif self.ord == 2: 61 | reduc_ind = list(xrange(1, len(shape))) 62 | self.signed_grad = self.gradients / tf.sqrt(tf.reduce_sum(tf.square(self.gradients), 63 | reduction_indices=reduc_ind, 64 | keep_dims=True)) 65 | 66 | self.adv_x = tf.clip_by_value(tf.stop_gradient(self.timg + self.eps*self.signed_grad), self.clip_min, self.clip_max) 67 | 68 | # these are the variables to initialize when we run 69 | self.setup = [] 70 | self.setup.append(self.timg.assign(self.assign_timg)) 71 | self.setup.append(self.tlab.assign(self.assign_tlab)) 72 | self.setup.append(self.eps.assign(self.assign_eps)) 73 | 74 | 75 | def attack(self, inputs, targets): 76 | adv_ = [] 77 | 78 | grad_ = [] 79 | print('go up to',len(inputs)) 80 | for i in range(0,len(inputs),self.batch_size): 81 | print('tick',i) 82 | batch = inputs[i:i+self.batch_size] 83 | batchlab = targets[i:i+self.batch_size] 84 | 85 | self.sess.run(self.setup, {self.assign_timg: batch, self.assign_tlab: batchlab, self.assign_eps: 0.}) 86 | adv,grad = self.sess.run([self.adv_x, self.signed_grad]) 87 | adv_.extend(adv) 88 | grad_.extend(grad) 89 | adv_ = np.array(adv_) 90 | print(adv_.shape) 91 | grad_ = np.array(grad_) 92 | print(grad_.shape) 93 | 94 | if self.ord == np.inf: 95 | step_size = 1e-3 96 | eps = np.arange(1e-3,1e+0,step_size) 97 | elif self.ord == 2: 98 | step_size = 1e-2 99 | eps = np.arange(1e-2,1e+1,step_size) 100 | elif self.ord == 1: 101 | step_size = 1e+0 102 | eps = np.arange(1e+0,1e+3,step_size) 103 | loop_iter = np.arange(0,len(inputs)) 104 | for i,c in enumerate(eps): 105 | adv = np.clip(np.add(inputs,np.multiply(c,grad_)), self.clip_min, self.clip_max) 106 | for j in loop_iter: 107 | pred = self.model.model.predict(adv[j:j+1]) 108 | if self.inception: 109 | pred = np.reshape(pred, (targets[0:1].shape)) 110 | if(np.argmax(pred,1) == np.argmax(targets[j:j+1],1)): 111 | loop_iter = np.setdiff1d(loop_iter, j) 112 | print(len(loop_iter)) 113 | adv_[j] = adv[j] 114 | adv = adv_ 115 | return adv 116 | -------------------------------------------------------------------------------- /l2_LADMMST_attack_v3.py: -------------------------------------------------------------------------------- 1 | ## l2_attack.py -- attack a network optimizing for l_2 distance 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## modified by Kaidi Xu for ICLR 2019 paper: 5 | ## 'Structured Adversarial Attack: Towards General Implementation and Better Interpretability' 6 | ## This program is licenced under the BSD 2-Clause licence, 7 | ## contained in the LICENCE file in this directory. 8 | 9 | import time 10 | import sys 11 | import tensorflow as tf 12 | import numpy as np 13 | 14 | # from multiprocessing.pool import ThreadPool 15 | 16 | 17 | BINARY_SEARCH_STEPS = 8 # number of times to adjust the constant with binary search 18 | MAX_ITERATIONS = 1000 # number of iterations to perform gradient descent 19 | ABORT_EARLY = True # if we stop improving, abort gradient descent early 20 | LEARNING_RATE = 1e-3 # larger values converge faster to less accurate results 1e-2 for MNIST, 1e-3 for cifar and imagenet 21 | TARGETED = True # should we target one specific class? or just be wrong? 22 | CONFIDENCE = 0 # how strong the adversarial example should be 23 | INITIAL_CONST = 1 # the initial constant c to pick as a first guess 24 | RO = 15 25 | RETRAIN = True 26 | 27 | # pool = ThreadPool() 28 | 29 | 30 | class LADMMSTL2: 31 | def __init__(self, sess, model, batch_size=1, confidence=CONFIDENCE, 32 | targeted=TARGETED, learning_rate=LEARNING_RATE, 33 | binary_search_steps=BINARY_SEARCH_STEPS, max_iterations=MAX_ITERATIONS, 34 | abort_early=ABORT_EARLY, initial_const = INITIAL_CONST,print_every = 100, 35 | ro=RO, retrain=RETRAIN): 36 | """ 37 | The L_2 optimized attack. 38 | 39 | This attack is the most efficient and should be used as the primary 40 | attack to evaluate potential defenses. 41 | 42 | Returns adversarial examples for the supplied model. 43 | 44 | confidence: Confidence of adversarial examples: higher produces examples 45 | that are farther away, but more strongly classified as adversarial. 46 | batch_size: Number of attacks to run simultaneously. 47 | targeted: True if we should perform a targetted attack, False otherwise. 48 | learning_rate: The learning rate for the attack algorithm. Smaller values 49 | produce better results but are slower to converge. 50 | binary_search_steps: The number of times we perform binary search to 51 | find the optimal tradeoff-constant between distance and confidence. 52 | max_iterations: The maximum number of iterations. Larger values are more 53 | accurate; setting too small will require a large learning rate and will 54 | produce poor results. 55 | abort_early: If true, allows early aborts if gradient descent gets stuck. 56 | initial_const: The initial tradeoff-constant to use to tune the relative 57 | importance of distance and confidence. If binary_search_steps is large, 58 | the initial constant is not important. 59 | boxmin: Minimum pixel value (default -0.5). 60 | boxmax: Maximum pixel value (default 0.5). 61 | """ 62 | 63 | self.model = model 64 | self.sess = sess 65 | self.TARGETED = targeted 66 | self.LEARNING_RATE = learning_rate 67 | self.MAX_ITERATIONS = max_iterations 68 | self.BINARY_SEARCH_STEPS = binary_search_steps 69 | self.ABORT_EARLY = abort_early 70 | self.CONFIDENCE = confidence 71 | self.batch_size = batch_size 72 | self.ro = ro 73 | self.retrain = retrain 74 | self.grad = self.gradient_descent(sess, model) 75 | 76 | def compare(self, x, y): 77 | if not isinstance(x, (float, int, np.int64)): 78 | x = np.copy(x) 79 | if self.TARGETED: 80 | x[y] -= self.CONFIDENCE 81 | else: 82 | x[y] += self.CONFIDENCE 83 | x = np.argmax(x) 84 | if self.TARGETED: 85 | return x == y 86 | else: 87 | return x != y 88 | 89 | def gradient_descent(self, sess, model): 90 | 91 | batch_size = self.batch_size 92 | shape = (batch_size, model.image_size, model.image_size, model.num_channels) 93 | 94 | tz = tf.Variable(np.zeros(shape, dtype=np.float32)) 95 | timg = tf.Variable(np.zeros(shape), dtype=tf.float32) 96 | tlab = tf.Variable(np.zeros((batch_size, model.num_labels)), dtype=tf.float32) 97 | const = tf.Variable(np.zeros(batch_size), dtype=tf.float32) 98 | 99 | # and here's what we use to assign them 100 | assign_timg = tf.placeholder(tf.float32, shape) 101 | assign_tlab = tf.placeholder(tf.float32, (batch_size, model.num_labels)) 102 | assign_tz = tf.placeholder(tf.float32, shape) 103 | assign_const = tf.placeholder(tf.float32, [batch_size]) 104 | 105 | # the resulting image, tanh'd to keep bounded from boxmin to boxmax 106 | newimg = tz + timg 107 | l2dist_real = tf.reduce_sum(tf.square(tz), [1, 2, 3]) 108 | output = model.predict(newimg) 109 | 110 | real = tf.reduce_sum(tlab * output, 1) 111 | other = tf.reduce_max((1 - tlab) * output - (tlab * 10000), 1) 112 | 113 | if self.TARGETED: 114 | # if targetted, optimize for making the other class most likely 115 | loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE) 116 | else: 117 | # if untargeted, optimize for making this class least likely. 118 | loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE) 119 | 120 | loss1 = const * tf.reduce_sum(loss1) 121 | 122 | gradtz = tf.gradients(loss1, [tz]) 123 | 124 | # these are the variables to initialize when we run 125 | setup = [] 126 | setup.append(timg.assign(assign_timg)) 127 | setup.append(tlab.assign(assign_tlab)) 128 | setup.append(tz.assign(assign_tz)) 129 | setup.append(const.assign(assign_const)) 130 | 131 | def doit(imgs, labs, z, CONST): 132 | 133 | batch = imgs[:batch_size] 134 | batchlab = labs[:batch_size] 135 | 136 | sess.run(setup, {assign_timg: batch, assign_tlab: batchlab, assign_tz: z, assign_const: CONST, }) 137 | 138 | l2s, scores, nimg, z_grads = sess.run([l2dist_real, output, newimg, gradtz]) 139 | 140 | return l2s, scores, nimg, np.array(z_grads) 141 | 142 | return doit 143 | 144 | def attack(self, imgs, targets): 145 | """ 146 | Perform the L_2 attack on the given images for the given targets. 147 | 148 | If self.targeted is true, then the targets represents the target labels. 149 | If self.targeted is false, then targets are the original class labels. 150 | """ 151 | r = [] 152 | rv = [] 153 | print('go up to', len(imgs)) 154 | for i in range(0, len(imgs), self.batch_size): 155 | print('tick', i) 156 | r1, r2 = self.attack_batch(imgs[i:i + self.batch_size], targets[i:i + self.batch_size]) 157 | r.extend(r1) 158 | rv = np.append(rv, r2) 159 | rv = rv.reshape([-1,3]) 160 | rv = np.mean(rv, axis = 0) 161 | 162 | print("\nnone zeros group:", rv[0], "\nl2 mean:", rv[1], "\nli mean", rv[2], "\n") 163 | return np.array(r) 164 | 165 | def attack_batch(self, imgs, labs): 166 | """ 167 | Run the attack on a batch of images and labels. 168 | """ 169 | batch_size = self.batch_size 170 | o_bestl2 = [1e10] * batch_size 171 | o_bestscore = [-1] * batch_size 172 | o_bestattack = [np.zeros(imgs[0].shape)] * batch_size 173 | o_besty = np.ones(imgs.shape) 174 | 175 | lower_bound = np.zeros(batch_size) 176 | CONST = np.ones(batch_size) * INITIAL_CONST # 1 for imgnet 177 | upper_bound = np.ones(batch_size)*1e10 178 | 179 | alpha = 5 180 | tau = 3 181 | gamma = 2 182 | 183 | if self.model.image_size>32: #imagenet 184 | filterSize = 13 185 | stride = 13 186 | else: # cifar mnist 187 | filterSize = 2 188 | stride = 2 189 | print('grid size:', filterSize) 190 | n = self.model.image_size * self.model.image_size * self.model.num_channels 191 | 192 | P = np.floor((self.model.image_size - filterSize) / stride) + 1 193 | P = P.astype(np.int32) 194 | Q = P 195 | 196 | z = 0.0 * np.ones(imgs.shape) 197 | v = 0.0 * np.ones(imgs.shape) 198 | u = 0.0 * np.ones(imgs.shape) 199 | s = 0.0 * np.ones(imgs.shape) 200 | ep = 0.5 201 | 202 | index = np.ones([P*Q,filterSize * filterSize * self.model.num_channels],dtype=int) 203 | 204 | tmpidx = 0 205 | for q in range(Q): 206 | # plus = 0 207 | plus1 = q * stride * self.model.image_size * self.model.num_channels 208 | for p in range(P): 209 | index_ = np.array([], dtype=int) 210 | #index2_ = np.array([], dtype=int) 211 | for i in range(filterSize): 212 | index_ = np.append(index_, 213 | np.arange(p * stride * self.model.num_channels + i * self.model.image_size * self.model.num_channels + plus1, 214 | p * stride * self.model.num_channels + i * self.model.image_size * self.model.num_channels + plus1 + filterSize * self.model.num_channels, 215 | dtype=int)) 216 | index[tmpidx] = index_ 217 | tmpidx += 1 218 | index = np.tile(index, (batch_size,1,1)) 219 | 220 | for outer_step in range(self.BINARY_SEARCH_STEPS): 221 | print(outer_step, o_bestl2, CONST) 222 | 223 | #prev = 1e6 224 | bestl2 = [1e10]*batch_size 225 | bestscore = [-1]*batch_size 226 | 227 | z = 0.0 * np.ones(imgs.shape) 228 | # z1 = 0.0*np.ones(imgs.shape) 229 | v = 0.0 * np.ones(imgs.shape) 230 | u = 0.0 * np.ones(imgs.shape) 231 | s = 0.0 * np.ones(imgs.shape) 232 | 233 | for iteration in range(self.MAX_ITERATIONS + outer_step * 1000 ): 234 | if iteration % 200 == 0: 235 | print(iteration, 'best l2square:', o_bestl2, 'when l0:', np.count_nonzero((np.array(o_bestattack) - imgs).reshape([batch_size,-1]), axis = 1)) 236 | 237 | # delta step 238 | # l2 239 | delt = self.ro / (self.ro + 2 * gamma) * (z - u) 240 | # l1 241 | #tmp = z - u - gamma / self.ro 242 | #tmp = np.where(tmp > 0, tmp, 0) 243 | #tmp1 = u - z - gamma / self.ro 244 | #tmp1 = np.where(tmp1 > 0, tmp1, 0) 245 | #delt = tmp - tmp1 246 | 247 | # w step 248 | temp = z - s 249 | temp1 = np.where(temp > np.minimum(0.5 - imgs, ep), np.minimum(0.5 - imgs, ep), temp) 250 | w = np.where(temp1 < np.maximum(-0.5 - imgs, -ep), np.maximum(-0.5 - imgs, -ep), temp1) 251 | 252 | # y step 253 | 254 | y0 = (z - v).reshape(batch_size,-1) 255 | #y0 = (z - v) 256 | 257 | y = y0[:] 258 | 259 | #timestart = time.time() 260 | #@jit 261 | def findIndx(b): 262 | #for b in range(batch_size): 263 | tmpc = tau / self.ro 264 | 265 | y0Ds = np.take(y0[b], index[b]) 266 | y0Dns = np.linalg.norm(y0Ds, axis=1) 267 | #print(np.mean(y0Dns[y0Dns != 0])) 268 | tmpy = np.zeros_like(y0Dns) 269 | tmpy[y0Dns != 0] = 1 - tmpc / y0Dns[y0Dns != 0] 270 | tmpy_ = np.zeros_like(y0Ds) 271 | tmpy = np.transpose(np.tile(tmpy, [y0Ds.shape[1],1])) 272 | tmpy_[tmpy > 0] = tmpy[tmpy > 0] * y0Ds[tmpy > 0] 273 | #tmpy_[tmpy > 0] = np.transpose(np.tile(tmpy[tmpy > 0], [y0Ds.shape[1],1])) * y0Ds[tmpy > 0] 274 | np.put(y[b], index[b], tmpy_) 275 | 276 | 277 | list(map(findIndx, range(batch_size))) 278 | 279 | y = y.reshape(imgs.shape) 280 | 281 | # z step 282 | l2s, scores, nimg, z_grads = self.grad(imgs, labs, z, CONST) 283 | 284 | 285 | Sc = y + v 286 | 287 | #eta = 1 288 | eta = 1/np.sqrt(iteration+1) 289 | z = 1 / (alpha / eta + 2 * self.ro + 2*self.ro) * \ 290 | (alpha / eta * z + 2*self.ro * (delt + u) + self.ro * (w + s) + self.ro * Sc - z_grads[0]) 291 | # print(Sc.mean(),w.mean(),y.mean(),delt.mean(),z.mean()) 292 | 293 | u = u + delt - z 294 | 295 | v = v + y - z 296 | 297 | s = s + w - z 298 | 299 | #yt = yt.reshape(imgs.shape) 300 | #np.count_nonzero(o_besty)/batch_size 301 | l2s, scores, nimg, y_grads = self.grad(imgs, labs, y, CONST) 302 | 303 | for e, (l2, sc, ii,) in enumerate(zip(l2s, scores, nimg)): 304 | if l2 < bestl2[e] and self.compare(sc, np.argmax(labs[e])): 305 | bestl2[e] = l2 306 | bestscore[e] = np.argmax(sc) 307 | if l2 < o_bestl2[e] and self.compare(sc, np.argmax(labs[e])): 308 | #print("change", e, o_bestl2[e] - l2) 309 | o_bestl2[e] = l2 310 | o_bestscore[e] = np.argmax(sc) 311 | o_bestattack[e] = ii 312 | o_besty[e] = y[e] 313 | 314 | for e in range(batch_size): 315 | if self.compare(bestscore[e], np.argmax(labs[e])) and bestscore[e] != -1 and bestl2[e] == o_bestl2[e]: 316 | # success, divide const by two 317 | upper_bound[e] = min(upper_bound[e],CONST[e]) 318 | if upper_bound[e] < 1e9: 319 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 320 | else: 321 | # failure, either multiply by 10 if no solution found yet 322 | # or do binary search with the known upper bound 323 | lower_bound[e] = max(lower_bound[e],CONST[e]) 324 | if upper_bound[e] < 1e9: 325 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 326 | else: 327 | CONST[e] *= 5 328 | 329 | print('Finally', o_bestl2) 330 | # np.save("img",o_besty[8].squeeze()) 331 | if self.retrain: 332 | lower_bound = np.zeros(batch_size) 333 | CONST = np.ones(batch_size) * 5 # 5 for imgnet 334 | upper_bound = np.ones(batch_size)*1e10 335 | for tmpi in range(8): 336 | print("retrain C:", CONST) 337 | bestl2 = [1e10]*batch_size 338 | bestscore = [-1]*batch_size 339 | 340 | Nz = o_besty[np.nonzero(o_besty)] 341 | Nz = np.abs(Nz) 342 | e0 = np.percentile(Nz, 3) 343 | #e0 = 0 344 | # e0 = 0.00001 345 | #randm = -1 + 2*np.random.random((o_besty.shape)) 346 | #z1 = np.where(np.abs(o_besty) <= e0, 0, randm) 347 | A2 = np.where(np.abs(o_besty) <= e0, 0, 1) 348 | #randm = -1 + 2*np.random.random((o_besty.shape)) 349 | z1 = o_besty 350 | u1 = 0.0 * np.ones(imgs.shape) 351 | tmpC = self.ro / (self.ro + gamma/100) 352 | for outer_step in range(400): 353 | if outer_step % 200 == 0: 354 | print("retrain", tmpi, outer_step, o_bestl2) 355 | 356 | tempA = (z1 - u1) * tmpC 357 | tempA1 = np.where(np.abs(o_besty) <= e0, 0, tempA) 358 | tempA2 = np.where(np.logical_and(tempA > np.minimum(0.5 - imgs, ep), (np.abs(o_besty) > e0)), 359 | np.minimum(0.5 - imgs, ep), tempA1) 360 | deltA = np.where(np.logical_and(tempA < np.maximum(-0.5 - imgs, -ep), (np.abs(o_besty) > e0)), 361 | np.maximum(-0.5 - imgs, -ep), tempA2) 362 | 363 | l2s, scores, nimg, z_grads = self.grad(imgs, labs, deltA, CONST) 364 | z1 = 1 / (alpha + 2 * self.ro) * (alpha * z1 + self.ro * (deltA + u1) - np.multiply(z_grads[0],A2)) 365 | 366 | u1 = u1 + deltA - z1 367 | 368 | #l2s, scores, nimg, z_grads = self.grad(imgs, labs, deltA) 369 | for e, (l2, sc, ii,) in enumerate(zip(l2s, scores, nimg)): 370 | if l2 < bestl2[e] and self.compare(sc, np.argmax(labs[e])): 371 | bestl2[e] = l2 372 | bestscore[e] = np.argmax(sc) 373 | if l2 < o_bestl2[e] and self.compare(sc, np.argmax(labs[e])): 374 | o_bestl2[e] = l2 375 | o_bestscore[e] = np.argmax(sc) 376 | o_bestattack[e] = ii 377 | o_besty[e] = deltA[e] 378 | 379 | for e in range(batch_size): 380 | if self.compare(bestscore[e], np.argmax(labs[e])) and bestscore[e] != -1: 381 | # success, divide const by two 382 | upper_bound[e] = min(upper_bound[e],CONST[e]) 383 | if upper_bound[e] < 1e9: 384 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 385 | else: 386 | # failure, either multiply by 10 if no solution found yet 387 | # or do binary search with the known upper bound 388 | lower_bound[e] = max(lower_bound[e],CONST[e]) 389 | if upper_bound[e] < 1e9: 390 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 391 | else: 392 | CONST[e] *= 5 393 | 394 | rVector = [0, 0, 0] 395 | resultl2 = np.array([]) 396 | resultli = np.array([]) 397 | o_besty = o_besty.reshape(batch_size, -1) 398 | for b in (range(batch_size)): 399 | for k in range(index.shape[1]): 400 | ry0D = np.take(o_besty[b], index[b,k]) 401 | ry0D2 = np.linalg.norm(ry0D) 402 | if ry0D2 != 0: 403 | resultl2 = np.append(resultl2, ry0D2) 404 | resultli = np.append(resultli, np.max(np.abs(ry0D))) 405 | 406 | rVector[0] = len(resultl2)/batch_size 407 | rVector[1] = np.mean(resultl2) 408 | rVector[2] = np.mean(resultli) 409 | 410 | print("ro", self.ro, "gamma", gamma, "tau", tau, "alpha", alpha) 411 | print("\ntotal groups:", P*Q) 412 | return o_bestattack, rVector 413 | 414 | -------------------------------------------------------------------------------- /l2_attack.py: -------------------------------------------------------------------------------- 1 | ## l2_attack.py -- attack a network optimizing for l_2 distance 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## 5 | ## This program is licenced under the BSD 2-Clause licence, 6 | ## contained in the LICENCE file in this directory. 7 | 8 | import sys 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | BINARY_SEARCH_STEPS = 9 # number of times to adjust the constant with binary search 13 | MAX_ITERATIONS = 1000 # number of iterations to perform gradient descent 14 | ABORT_EARLY = True # if we stop improving, abort gradient descent early 15 | LEARNING_RATE = 1e-3 # larger values converge faster to less accurate results e-2 for MNIST -3 for others 16 | TARGETED = True # should we target one specific class? or just be wrong? 17 | CONFIDENCE = 1 # how strong the adversarial example should be 18 | INITIAL_CONST = 1 # the initial constant c to pick as a first guess 19 | 20 | class CarliniL2: 21 | def __init__(self, sess, model, batch_size=1, confidence = CONFIDENCE, 22 | targeted = TARGETED, learning_rate = LEARNING_RATE, 23 | binary_search_steps = BINARY_SEARCH_STEPS, max_iterations = MAX_ITERATIONS, 24 | abort_early = ABORT_EARLY, 25 | initial_const = INITIAL_CONST, 26 | boxmin = -0.5, boxmax = 0.5): 27 | """ 28 | The L_2 optimized attack. 29 | 30 | This attack is the most efficient and should be used as the primary 31 | attack to evaluate potential defenses. 32 | 33 | Returns adversarial examples for the supplied model. 34 | 35 | confidence: Confidence of adversarial examples: higher produces examples 36 | that are farther away, but more strongly classified as adversarial. 37 | batch_size: Number of attacks to run simultaneously. 38 | targeted: True if we should perform a targetted attack, False otherwise. 39 | learning_rate: The learning rate for the attack algorithm. Smaller values 40 | produce better results but are slower to converge. 41 | binary_search_steps: The number of times we perform binary search to 42 | find the optimal tradeoff-constant between distance and confidence. 43 | max_iterations: The maximum number of iterations. Larger values are more 44 | accurate; setting too small will require a large learning rate and will 45 | produce poor results. 46 | abort_early: If true, allows early aborts if gradient descent gets stuck. 47 | initial_const: The initial tradeoff-constant to use to tune the relative 48 | importance of distance and confidence. If binary_search_steps is large, 49 | the initial constant is not important. 50 | boxmin: Minimum pixel value (default -0.5). 51 | boxmax: Maximum pixel value (default 0.5). 52 | """ 53 | 54 | image_size, num_channels, num_labels = model.image_size, model.num_channels, model.num_labels 55 | self.image_size = model.image_size 56 | self.num_channels = model.num_channels 57 | self.sess = sess 58 | self.TARGETED = targeted 59 | self.LEARNING_RATE = learning_rate 60 | self.MAX_ITERATIONS = max_iterations 61 | self.BINARY_SEARCH_STEPS = binary_search_steps 62 | self.ABORT_EARLY = abort_early 63 | self.CONFIDENCE = confidence 64 | self.initial_const = initial_const 65 | self.batch_size = batch_size 66 | self.model = model 67 | 68 | self.repeat = binary_search_steps >= 10 69 | 70 | shape = (batch_size,image_size,image_size,num_channels) 71 | 72 | # the variable we're going to optimize over 73 | self.modifier = tf.Variable(np.zeros(shape,dtype=np.float32)) 74 | # self.modifier2 = tf.Variable(np.zeros(shape, dtype=np.float32)) 75 | # self.modifier3 = tf.Variable(np.zeros(shape, dtype=np.float32)) 76 | 77 | # these are variables to be more efficient in sending data to tf 78 | self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32) 79 | self.tlab = tf.Variable(np.zeros((batch_size,num_labels)), dtype=tf.float32) 80 | self.const = tf.Variable(np.zeros(batch_size), dtype=tf.float32) 81 | 82 | # and here's what we use to assign them 83 | self.assign_timg = tf.placeholder(tf.float32, shape) 84 | self.assign_tlab = tf.placeholder(tf.float32, (batch_size,num_labels)) 85 | self.assign_const = tf.placeholder(tf.float32, [batch_size]) 86 | 87 | # the resulting image, tanh'd to keep bounded from boxmin to boxmax 88 | self.boxmul = (boxmax - boxmin) / 2. 89 | self.boxplus = (boxmin + boxmax) / 2. 90 | self.newimg = tf.tanh(self.modifier + self.timg) * self.boxmul + self.boxplus 91 | 92 | # prediction BEFORE-SOFTMAX of the model 93 | #self.output = model.predict(self.newimg) 94 | #model.x_input = self.newimg 95 | self.output = model.predict(self.newimg) 96 | 97 | # distance to the input data 98 | self.l2dist = tf.reduce_sum(tf.square(self.newimg-(tf.tanh(self.timg) * self.boxmul + self.boxplus)),[1,2,3]) 99 | 100 | # compute the probability of the label class versus the maximum other 101 | real = tf.reduce_sum((self.tlab)*self.output,1) 102 | other = tf.reduce_max((1-self.tlab)*self.output - (self.tlab*10000),1) 103 | 104 | if self.TARGETED: 105 | # if targetted, optimize for making the other class most likely 106 | loss1 = tf.maximum(0.0, other-real+self.CONFIDENCE) 107 | else: 108 | # if untargeted, optimize for making this class least likely. 109 | loss1 = tf.maximum(0.0, real-other+self.CONFIDENCE) 110 | 111 | # sum up the losses 112 | self.loss2 = tf.reduce_sum(self.l2dist) 113 | self.loss1 = tf.reduce_sum(self.const*loss1) 114 | self.loss = self.loss1+self.loss2 115 | 116 | # Setup the adam optimizer and keep track of variables we're creating 117 | start_vars = set(x.name for x in tf.global_variables()) 118 | optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) 119 | self.train = optimizer.minimize(self.loss, var_list=[self.modifier]) 120 | 121 | end_vars = tf.global_variables() 122 | # self.modifier1 = end_vars[14] 123 | new_vars = [x for x in end_vars if x.name not in start_vars] 124 | 125 | # these are the variables to initialize when we run 126 | self.setup = [] 127 | self.setup.append(self.timg.assign(self.assign_timg)) 128 | self.setup.append(self.tlab.assign(self.assign_tlab)) 129 | self.setup.append(self.const.assign(self.assign_const)) 130 | 131 | # self.modifier = self.modifier2 + self.modifier3 - self.modifier 132 | 133 | self.init = tf.variables_initializer(var_list=[self.modifier]+new_vars) 134 | 135 | #+[self.modifier2] + [self.modifier3] 136 | def attack(self, imgs, targets): 137 | """ 138 | Perform the L_2 attack on the given images for the given targets. 139 | 140 | If self.targeted is true, then the targets represents the target labels. 141 | If self.targeted is false, then targets are the original class labels. 142 | """ 143 | r = [] 144 | rv = [] 145 | print('go up to', len(imgs)) 146 | for i in range(0, len(imgs), self.batch_size): 147 | print('tick', i) 148 | r1, r2 = self.attack_batch(imgs[i:i + self.batch_size], targets[i:i + self.batch_size]) 149 | r.extend(r1) 150 | rv = np.append(rv, r2) 151 | rv = rv.reshape([-1,3]) 152 | rv = np.mean(rv, axis = 0) 153 | 154 | print("none zeros groups:", rv[0], "\nl2 mean:", rv[1], "\nli mean:", rv[2], "\n") 155 | return np.array(r) 156 | 157 | def attack_batch(self, imgs, labs): 158 | """ 159 | Run the attack on a batch of images and labels. 160 | """ 161 | def compare(x,y): 162 | if not isinstance(x, (float, int, np.int64)): 163 | x = np.copy(x) 164 | if self.TARGETED: 165 | x[y] -= self.CONFIDENCE 166 | else: 167 | x[y] += self.CONFIDENCE 168 | x = np.argmax(x) 169 | if self.TARGETED: 170 | return x == y 171 | else: 172 | return x != y 173 | 174 | batch_size = self.batch_size 175 | 176 | # convert to tanh-space 177 | imgs2 = imgs 178 | imgs = np.arctanh((imgs - self.boxplus) / self.boxmul * 0.999999) 179 | 180 | # set the lower and upper bounds accordingly 181 | lower_bound = np.zeros(batch_size) 182 | CONST = np.ones(batch_size)*self.initial_const 183 | upper_bound = np.ones(batch_size)*1e10 184 | 185 | # the best l2, score, and image attack 186 | o_bestl2 = [1e10]*batch_size 187 | o_bestscore = [-1]*batch_size 188 | o_bestattack = [np.zeros(imgs[0].shape)]*batch_size 189 | 190 | if self.model.image_size>32: #imagenet 191 | filterSize = 13 192 | stride = 13 193 | else: # cifar mnist 194 | filterSize = 2 195 | stride = 2 196 | n = self.image_size * self.image_size * self.num_channels 197 | 198 | P = np.floor((self.image_size - filterSize) / stride) + 1 199 | P = P.astype(np.int32) 200 | Q = P 201 | 202 | index = np.ones([P*Q,filterSize * filterSize * self.num_channels],dtype=int) 203 | #index2 = np.ones([P*Q,filterSize * filterSize * self.model.num_channels],dtype=int) 204 | 205 | tmpidx = 0 206 | for q in range(Q): 207 | # plus = 0 208 | plus1 = q * stride * self.image_size * self.num_channels 209 | for p in range(P): 210 | index_ = np.array([], dtype=int) 211 | #index2_ = np.array([], dtype=int) 212 | for i in range(filterSize): 213 | index_ = np.append(index_, 214 | np.arange(p * stride * self.num_channels + i * self.image_size * self.num_channels + plus1, 215 | p * stride * self.num_channels + i * self.image_size * self.num_channels + plus1 + filterSize * self.num_channels, 216 | dtype=int)) 217 | index[tmpidx] = index_ 218 | tmpidx += 1 219 | index = np.tile(index, (self.batch_size,1,1)) 220 | 221 | 222 | for outer_step in range(self.BINARY_SEARCH_STEPS): 223 | print(outer_step, o_bestl2, CONST) 224 | # print(self.modifier) 225 | # completely reset adam's internal state. 226 | self.sess.run(self.init) 227 | batch = imgs[:batch_size] 228 | batchlab = labs[:batch_size] 229 | 230 | bestl2 = [1e10]*batch_size 231 | bestscore = [-1]*batch_size 232 | 233 | # The last iteration (if we run many steps) repeat the search once. 234 | if self.repeat == True and outer_step == self.BINARY_SEARCH_STEPS-1: 235 | CONST = upper_bound 236 | 237 | # set the variables so that we don't have to send them over again 238 | self.sess.run(self.setup, {self.assign_timg: batch, 239 | self.assign_tlab: batchlab, 240 | self.assign_const: CONST}) 241 | 242 | prev = 1e6 243 | for iteration in range(self.MAX_ITERATIONS): 244 | if iteration % 200 == 0: 245 | print(iteration, o_bestl2) 246 | # perform the attack 247 | ttt, l, l2s, scores, nimg = self.sess.run([self.train, self.loss, 248 | self.l2dist, self.output, 249 | self.newimg]) 250 | modi = self.sess.run(self.modifier) 251 | # print out the losses every 10% 252 | #if iteration%(self.MAX_ITERATIONS//10) == 0: 253 | # print(iteration,self.sess.run((self.loss,self.loss1,self.loss2))) 254 | 255 | # check if we should abort search if we're getting nowhere. 256 | if self.ABORT_EARLY and iteration%(self.MAX_ITERATIONS//10) == 0: 257 | if l > prev*.9999: 258 | break 259 | prev = l 260 | 261 | # adjust the best result found so far 262 | for e,(l2,sc,ii) in enumerate(zip(l2s,scores,nimg)): 263 | if l2 < bestl2[e] and compare(sc, np.argmax(batchlab[e])): 264 | bestl2[e] = l2 265 | bestscore[e] = np.argmax(sc) 266 | if l2 < o_bestl2[e] and compare(sc, np.argmax(batchlab[e])): 267 | #print("change", e, o_bestl2[e] - l2) 268 | o_bestl2[e] = l2 269 | o_bestscore[e] = np.argmax(sc) 270 | o_bestattack[e] = ii 271 | 272 | # adjust the constant as needed 273 | for e in range(batch_size): 274 | if compare(bestscore[e], np.argmax(batchlab[e])) and bestscore[e] != -1: 275 | # success, divide const by two 276 | upper_bound[e] = min(upper_bound[e],CONST[e]) 277 | if upper_bound[e] < 1e9: 278 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 279 | else: 280 | # failure, either multiply by 10 if no solution found yet 281 | # or do binary search with the known upper bound 282 | lower_bound[e] = max(lower_bound[e],CONST[e]) 283 | if upper_bound[e] < 1e9: 284 | CONST[e] = (lower_bound[e] + upper_bound[e])/2 285 | else: 286 | CONST[e] *= 10 287 | 288 | # return the best solution found 289 | o_bestl2 = np.array(o_bestl2) 290 | 291 | o_besty = np.array(o_bestattack) - imgs2 292 | rVector = [0, 0, 0] 293 | resultl2 = np.array([]) 294 | resultli = np.array([]) 295 | for b in (range(batch_size)): 296 | for k in range(index.shape[1]): 297 | ry0D = np.take(o_besty[b], index[b,k]) 298 | ry0D2 = np.linalg.norm(ry0D) 299 | if ry0D2 != 0: 300 | resultl2 = np.append(resultl2, ry0D2) 301 | resultli = np.append(resultli, np.max(np.abs(ry0D))) 302 | 303 | rVector[0] = len(resultl2)/batch_size 304 | rVector[1] = np.mean(resultl2) 305 | rVector[2] = np.mean(resultli) 306 | 307 | print("\ntotal groups:", P*Q) 308 | 309 | return o_bestattack, rVector 310 | 311 | 312 | 313 | 314 | 315 | 316 | -------------------------------------------------------------------------------- /setup_cifar.py: -------------------------------------------------------------------------------- 1 | ## setup_cifar.py -- cifar data and model loading code 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## 5 | ## This program is licenced under the BSD 2-Clause licence, 6 | ## contained in the LICENCE file in this directory. 7 | 8 | 9 | import tensorflow as tf 10 | import numpy as np 11 | import os 12 | import pickle 13 | import gzip 14 | import pickle 15 | import urllib.request 16 | 17 | from keras.models import Sequential 18 | from keras.layers import Dense, Dropout, Activation, Flatten 19 | from keras.layers import Conv2D, MaxPooling2D 20 | from keras.utils import np_utils 21 | from keras.models import load_model 22 | 23 | def load_batch(fpath, label_key='labels'): 24 | f = open(fpath, 'rb') 25 | d = pickle.load(f, encoding="bytes") 26 | for k, v in d.items(): 27 | del(d[k]) 28 | d[k.decode("utf8")] = v 29 | f.close() 30 | data = d["data"] 31 | labels = d[label_key] 32 | 33 | data = data.reshape(data.shape[0], 3, 32, 32) 34 | final = np.zeros((data.shape[0], 32, 32, 3),dtype=np.float32) 35 | final[:,:,:,0] = data[:,0,:,:] 36 | final[:,:,:,1] = data[:,1,:,:] 37 | final[:,:,:,2] = data[:,2,:,:] 38 | 39 | final /= 255 40 | final -= .5 41 | labels2 = np.zeros((len(labels), 10)) 42 | labels2[np.arange(len(labels2)), labels] = 1 43 | 44 | return final, labels 45 | 46 | def load_batch(fpath): 47 | f = open(fpath,"rb").read() 48 | size = 32*32*3+1 49 | labels = [] 50 | images = [] 51 | for i in range(10000): 52 | arr = np.fromstring(f[i*size:(i+1)*size],dtype=np.uint8) 53 | lab = np.identity(10)[arr[0]] 54 | img = arr[1:].reshape((3,32,32)).transpose((1,2,0)) 55 | 56 | labels.append(lab) 57 | images.append((img/255)-.5) 58 | return np.array(images,dtype=np.float32),np.array(labels) #fix a bug here add dtype 59 | 60 | 61 | class CIFAR: 62 | def __init__(self): 63 | train_data = [] 64 | train_labels = [] 65 | 66 | if not os.path.exists("cifar-10-batches-bin"): 67 | urllib.request.urlretrieve("https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz", 68 | "cifar-data.tar.gz") 69 | os.popen("tar -xzf cifar-data.tar.gz").read() 70 | 71 | 72 | for i in range(5): 73 | r,s = load_batch("cifar-10-batches-bin/data_batch_"+str(i+1)+".bin") 74 | train_data.extend(r) 75 | train_labels.extend(s) 76 | 77 | train_data = np.array(train_data,dtype=np.float32) 78 | train_labels = np.array(train_labels) 79 | 80 | self.test_data, self.test_labels = load_batch("cifar-10-batches-bin/test_batch.bin") 81 | #self.test_data = np.array(self.test_data,dtype=np.float32) 82 | 83 | VALIDATION_SIZE = 5000 84 | 85 | self.validation_data = train_data[:VALIDATION_SIZE, :, :, :] 86 | self.validation_labels = train_labels[:VALIDATION_SIZE] 87 | self.train_data = train_data[VALIDATION_SIZE:, :, :, :] 88 | self.train_labels = train_labels[VALIDATION_SIZE:] 89 | 90 | class CIFARModel: 91 | def __init__(self, restore, session=None): 92 | self.num_channels = 3 93 | self.image_size = 32 94 | self.num_labels = 10 95 | 96 | model = Sequential() 97 | 98 | model.add(Conv2D(64, (3, 3), 99 | input_shape=(32, 32, 3))) 100 | model.add(Activation('relu')) 101 | model.add(Conv2D(64, (3, 3))) 102 | model.add(Activation('relu')) 103 | model.add(MaxPooling2D(pool_size=(2, 2))) 104 | 105 | model.add(Conv2D(128, (3, 3))) 106 | model.add(Activation('relu')) 107 | model.add(Conv2D(128, (3, 3))) 108 | model.add(Activation('relu')) 109 | model.add(MaxPooling2D(pool_size=(2, 2))) 110 | 111 | model.add(Flatten()) 112 | model.add(Dense(256)) 113 | model.add(Activation('relu')) 114 | model.add(Dense(256)) 115 | model.add(Activation('relu')) 116 | model.add(Dense(10)) 117 | 118 | model.load_weights(restore) 119 | 120 | self.model = model 121 | 122 | def predict(self, data): 123 | return self.model(data) 124 | 125 | 126 | -------------------------------------------------------------------------------- /setup_inception.py: -------------------------------------------------------------------------------- 1 | ## Modified by Huan Zhang for the updated Inception-v3 model (inception_v3_2016_08_28.tar.gz) 2 | ## Modified by Nicholas Carlini to match model structure for attack code. 3 | ## Original copyright license follows. 4 | 5 | 6 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # ============================================================================== 20 | 21 | """Simple image classification with Inception. 22 | 23 | Run image classification with Inception trained on ImageNet 2012 Challenge data 24 | set. 25 | 26 | This program creates a graph from a saved GraphDef protocol buffer, 27 | and runs inference on an input JPEG image. It outputs human readable 28 | strings of the top 5 predictions along with their probabilities. 29 | 30 | Change the --image_file argument to any jpg image to compute a 31 | classification of that image. 32 | 33 | Please see the tutorial and website for a detailed description of how 34 | to use this script to perform image recognition. 35 | 36 | https://tensorflow.org/tutorials/image_recognition/ 37 | """ 38 | 39 | from __future__ import absolute_import 40 | from __future__ import division 41 | from __future__ import print_function 42 | 43 | import os.path 44 | import re 45 | import sys 46 | import random 47 | import tarfile 48 | import scipy.misc 49 | 50 | import numpy as np 51 | from six.moves import urllib 52 | import tensorflow as tf 53 | 54 | FLAGS = tf.app.flags.FLAGS 55 | 56 | # classify_image_graph_def.pb: 57 | # Binary representation of the GraphDef protocol buffer. 58 | # imagenet_synset_to_human_label_map.txt: 59 | # Map from synset ID to a human readable string. 60 | # imagenet_2012_challenge_label_map_proto.pbtxt: 61 | # Text representation of a protocol buffer mapping a label to synset ID. 62 | tf.app.flags.DEFINE_string( 63 | 'model_dir', 'tmp/imagenet', 64 | """Path to classify_image_graph_def.pb, """ 65 | """imagenet_synset_to_human_label_map.txt, and """ 66 | """imagenet_2012_challenge_label_map_proto.pbtxt.""") 67 | tf.app.flags.DEFINE_string('image_file', '', 68 | """Absolute path to image file.""") 69 | tf.app.flags.DEFINE_integer('num_top_predictions', 5, 70 | """Display this many predictions.""") 71 | 72 | # pylint: disable=line-too-long 73 | DATA_URL = 'http://jaina.cs.ucdavis.edu/datasets/adv/imagenet/inception_v3_2016_08_28_frozen.tar.gz' 74 | # pylint: enable=line-too-long 75 | 76 | 77 | class NodeLookup(object): 78 | """Converts integer node ID's to human readable labels.""" 79 | 80 | def __init__(self, 81 | label_lookup_path=None): 82 | if not label_lookup_path: 83 | label_lookup_path = os.path.join( 84 | FLAGS.model_dir, 'labels.txt') 85 | self.node_lookup = self.load(label_lookup_path) 86 | 87 | def load(self, label_lookup_path): 88 | """Loads a human readable English name for each softmax node. 89 | 90 | Args: 91 | label_lookup_path: string UID to integer node ID. 92 | uid_lookup_path: string UID to human-readable string. 93 | 94 | Returns: 95 | dict from integer node ID to human-readable string. 96 | """ 97 | if not tf.gfile.Exists(label_lookup_path): 98 | tf.logging.fatal('File does not exist %s', label_lookup_path) 99 | 100 | # Loads mapping from string UID to integer node ID. 101 | node_id_to_name = {} 102 | proto_as_ascii = tf.gfile.GFile(label_lookup_path).readlines() 103 | for line in proto_as_ascii: 104 | if line: 105 | words = line.split(':') 106 | target_class = int(words[0]) 107 | name = words[1] 108 | node_id_to_name[target_class] = name 109 | 110 | v = [] 111 | for value in node_id_to_name.items(): 112 | v.append(value) 113 | 114 | return node_id_to_name 115 | 116 | def id_to_string(self, node_id): 117 | if node_id not in self.node_lookup: 118 | return '' 119 | return self.node_lookup[node_id] 120 | 121 | 122 | def create_graph(): 123 | """Creates a graph from saved GraphDef file and returns a saver.""" 124 | # Creates graph from saved graph_def.pb. 125 | with tf.gfile.FastGFile(os.path.join( 126 | # FLAGS.model_dir, 'classify_image_graph_def.pb'), 'rb') as f: 127 | FLAGS.model_dir, 'frozen_inception_v3.pb'), 'rb') as f: 128 | graph_def = tf.GraphDef() 129 | graph_def.ParseFromString(f.read()) 130 | #for line in repr(graph_def).split("\n"): 131 | # if "tensor_content" not in line: 132 | # print(line) 133 | _ = tf.import_graph_def(graph_def, name='') 134 | 135 | 136 | def run_inference_on_image(image): 137 | """Runs inference on an image. (Not updated, not working for inception v3 20160828) 138 | 139 | Args: 140 | image: Image file name. 141 | 142 | Returns: 143 | Nothing 144 | """ 145 | if not tf.gfile.Exists(image): 146 | tf.logging.fatal('File does not exist %s', image) 147 | image_data = tf.gfile.FastGFile(image, 'rb').read() 148 | 149 | # Creates graph from saved GraphDef. 150 | create_graph() 151 | 152 | with tf.Session() as sess: 153 | # Some useful tensors: 154 | # 'softmax:0': A tensor containing the normalized prediction across 155 | # 1000 labels. 156 | # 'pool_3:0': A tensor containing the next-to-last layer containing 2048 157 | # float description of the image. 158 | # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG 159 | # encoding of the image. 160 | # Runs the softmax tensor by feeding the image_data as input to the graph. 161 | #softmax_tensor = sess.graph.get_tensor_by_name('softmax:0') 162 | img = tf.placeholder(tf.uint8, (299,299,3)) 163 | softmax_tensor = tf.import_graph_def( 164 | sess.graph.as_graph_def(), 165 | input_map={'DecodeJpeg:0': tf.reshape(img,((299,299,3)))}, 166 | return_elements=['softmax/logits:0']) 167 | 168 | dat = scipy.misc.imresize(scipy.misc.imread(image),(299,299)) 169 | predictions = sess.run(softmax_tensor, 170 | {img: dat}) 171 | 172 | predictions = np.squeeze(predictions) 173 | 174 | # Creates node ID --> English string lookup. 175 | node_lookup = NodeLookup() 176 | 177 | top_k = predictions.argsort()#[-FLAGS.num_top_predictions:][::-1] 178 | for node_id in top_k: 179 | print('id',node_id) 180 | human_string = node_lookup.id_to_string(node_id) 181 | score = predictions[node_id] 182 | print('%s (score = %.5f)' % (human_string, score)) 183 | 184 | class InceptionModelPrediction: 185 | def __init__(self, sess, use_log = True): 186 | self.sess = sess 187 | self.use_log = use_log 188 | if self.use_log: 189 | output_name = 'InceptionV3/Predictions/Softmax:0' 190 | else: 191 | output_name = 'InceptionV3/Predictions/Reshape:0' 192 | self.img = tf.placeholder(tf.float32, (None, 299,299,3)) 193 | self.softmax_tensor = tf.import_graph_def( 194 | sess.graph.as_graph_def(), 195 | input_map={'input:0': self.img}, 196 | return_elements=[output_name]) 197 | def predict(self, dat): 198 | dat = np.squeeze(dat) 199 | # scaled = (0.5 + dat) * 255 200 | if dat.ndim == 3: 201 | scaled = dat.reshape((1,) + dat.shape) 202 | else: 203 | scaled = dat 204 | # print(scaled.shape) 205 | predictions = self.sess.run(self.softmax_tensor, 206 | {self.img: scaled}) 207 | predictions = np.squeeze(predictions) 208 | return predictions 209 | # Creates node ID --> English string lookup. 210 | node_lookup = NodeLookup() 211 | top_k = predictions.argsort()#[-FLAGS.num_top_predictions:][::-1] 212 | for node_id in top_k: 213 | print('id',node_id) 214 | human_string = node_lookup.id_to_string(node_id) 215 | score = predictions[node_id] 216 | print('%s (score = %.5f)' % (human_string, score)) 217 | return top_k[-1] 218 | 219 | 220 | CREATED_GRAPH = False 221 | class InceptionModel: 222 | image_size = 299 223 | num_labels = 1001 224 | num_channels = 3 225 | def __init__(self, sess, use_log = False): 226 | global CREATED_GRAPH 227 | self.sess = sess 228 | self.use_log = use_log 229 | if not CREATED_GRAPH: 230 | create_graph() 231 | CREATED_GRAPH = True 232 | self.model = InceptionModelPrediction(sess, use_log) 233 | 234 | def predict(self, img): 235 | if self.use_log: 236 | output_name = 'InceptionV3/Predictions/Softmax:0' 237 | else: 238 | output_name = 'InceptionV3/Predictions/Reshape:0' 239 | #scaled = (0.5+tf.reshape(img,((299,299,3))))*255 240 | #scaled = (0.5+img)*255 241 | if img.shape.as_list()[0]: 242 | # check if a shape has been specified explicitly 243 | shape = (int(img.shape[0]), 1001) 244 | softmax_tensor = tf.import_graph_def( 245 | self.sess.graph.as_graph_def(), 246 | input_map={'input:0': img, 'InceptionV3/Predictions/Shape:0': shape}, 247 | return_elements=[output_name]) 248 | else: 249 | # placeholder shape 250 | softmax_tensor = tf.import_graph_def( 251 | self.sess.graph.as_graph_def(), 252 | input_map={'input:0': img}, 253 | return_elements=[output_name]) 254 | return softmax_tensor[0] 255 | 256 | 257 | def maybe_download_and_extract(): 258 | """Download and extract model tar file.""" 259 | dest_directory = FLAGS.model_dir 260 | if not os.path.exists(dest_directory): 261 | os.makedirs(dest_directory) 262 | filename = DATA_URL.split('/')[-1] 263 | filepath = os.path.join(dest_directory, filename) 264 | if not os.path.exists(filepath): 265 | def _progress(count, block_size, total_size): 266 | sys.stdout.write('\r>> Downloading %s %.1f%%' % ( 267 | filename, float(count * block_size) / float(total_size) * 100.0)) 268 | sys.stdout.flush() 269 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 270 | print() 271 | statinfo = os.stat(filepath) 272 | print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') 273 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 274 | 275 | 276 | def main(_): 277 | maybe_download_and_extract() 278 | image = (FLAGS.image_file if FLAGS.image_file else 279 | os.path.join(FLAGS.model_dir, 'cropped_panda.jpg')) 280 | # run_inference_on_image(image) 281 | create_graph() 282 | with tf.Session() as sess: 283 | dat = np.array(scipy.misc.imresize(scipy.misc.imread(image),(299,299)), dtype = np.float32) 284 | dat /= 255.0 285 | dat -= 0.5 286 | # print(dat) 287 | model = InceptionModelPrediction(sess, True) 288 | predictions = model.predict(dat) 289 | # Creates node ID --> English string lookup. 290 | node_lookup = NodeLookup() 291 | top_k = predictions.argsort()#[-FLAGS.num_top_predictions:][::-1] 292 | for node_id in top_k: 293 | print('id',node_id) 294 | human_string = node_lookup.id_to_string(node_id) 295 | score = predictions[node_id] 296 | print('%s (score = %.5f)' % (human_string, score)) 297 | 298 | 299 | def readimg(ff): 300 | f = "../imagenetdata/imgs/"+ff 301 | img = scipy.misc.imread(f) 302 | # skip small images (image should be at least 299x299) 303 | if img.shape[0] < 299 or img.shape[1] < 299: 304 | return None 305 | img = np.array(scipy.misc.imresize(img,(299,299)),dtype=np.float32)/255-.5 306 | if img.shape != (299, 299, 3): 307 | return None 308 | return [img, int(ff.split(".")[0])] 309 | 310 | class ImageNet: 311 | def __init__(self, seed): 312 | from multiprocessing import Pool 313 | pool = Pool(8) 314 | file_list = sorted(os.listdir("../imagenetdata/imgs/")) 315 | random.seed(seed) 316 | r = pool.map(readimg, file_list[:8000]) 317 | random.shuffle(file_list) 318 | #print(file_list[:200]) 319 | r = [x for x in r if x != None] 320 | test_data, test_labels = zip(*r) 321 | self.test_data = np.array(test_data) 322 | self.test_labels = np.zeros((len(test_labels), 1001)) 323 | self.test_labels[np.arange(len(test_labels)), test_labels] = 1 324 | 325 | 326 | 327 | 328 | if __name__ == '__main__': 329 | tf.app.run() 330 | -------------------------------------------------------------------------------- /setup_mnist.py: -------------------------------------------------------------------------------- 1 | ## setup_mnist.py -- mnist data and model loading code 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## 5 | ## This program is licenced under the BSD 2-Clause licence, 6 | ## contained in the LICENCE file in this directory. 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | import os 11 | import pickle 12 | import gzip 13 | import urllib.request 14 | 15 | from keras.models import Sequential 16 | from keras.layers import Dense, Dropout, Activation, Flatten 17 | from keras.layers import Conv2D, MaxPooling2D 18 | from keras.utils import np_utils 19 | from keras.models import load_model 20 | 21 | def extract_data(filename, num_images): 22 | with gzip.open(filename) as bytestream: 23 | bytestream.read(16) 24 | buf = bytestream.read(num_images*28*28) 25 | data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) 26 | data = (data / 255) - 0.5 27 | data = data.reshape(num_images, 28, 28, 1) 28 | return data 29 | 30 | def extract_labels(filename, num_images): 31 | with gzip.open(filename) as bytestream: 32 | bytestream.read(8) 33 | buf = bytestream.read(1 * num_images) 34 | labels = np.frombuffer(buf, dtype=np.uint8) 35 | return (np.arange(10) == labels[:, None]).astype(np.float32) 36 | 37 | class MNIST: 38 | def __init__(self): 39 | if not os.path.exists("data"): 40 | os.mkdir("data") 41 | files = ["train-images-idx3-ubyte.gz", 42 | "t10k-images-idx3-ubyte.gz", 43 | "train-labels-idx1-ubyte.gz", 44 | "t10k-labels-idx1-ubyte.gz"] 45 | for name in files: 46 | 47 | urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/' + name, "data/"+name) 48 | 49 | train_data = extract_data("data/train-images-idx3-ubyte.gz", 60000) 50 | train_labels = extract_labels("data/train-labels-idx1-ubyte.gz", 60000) 51 | self.test_data = extract_data("data/t10k-images-idx3-ubyte.gz", 10000) 52 | self.test_labels = extract_labels("data/t10k-labels-idx1-ubyte.gz", 10000) 53 | 54 | VALIDATION_SIZE = 5000 55 | 56 | self.validation_data = train_data[:VALIDATION_SIZE, :, :, :] 57 | self.validation_labels = train_labels[:VALIDATION_SIZE] 58 | self.train_data = train_data[VALIDATION_SIZE:, :, :, :] 59 | self.train_labels = train_labels[VALIDATION_SIZE:] 60 | 61 | 62 | class MNISTModel: 63 | def __init__(self, restore, session=None): 64 | self.num_channels = 1 65 | self.image_size = 28 66 | self.num_labels = 10 67 | 68 | model = Sequential() 69 | 70 | model.add(Conv2D(32, (3, 3), 71 | input_shape=(28, 28, 1))) 72 | model.add(Activation('relu')) 73 | model.add(Conv2D(32, (3, 3))) 74 | model.add(Activation('relu')) 75 | model.add(MaxPooling2D(pool_size=(2, 2))) 76 | 77 | model.add(Conv2D(64, (3, 3))) 78 | model.add(Activation('relu')) 79 | model.add(Conv2D(64, (3, 3))) 80 | model.add(Activation('relu')) 81 | model.add(MaxPooling2D(pool_size=(2, 2))) 82 | 83 | model.add(Flatten()) 84 | model.add(Dense(200)) 85 | model.add(Activation('relu')) 86 | model.add(Dense(200)) 87 | model.add(Activation('relu')) 88 | model.add(Dense(10)) 89 | model.load_weights(restore) 90 | 91 | self.model = model 92 | 93 | def predict(self, data): 94 | return self.model(data) 95 | -------------------------------------------------------------------------------- /test_attack_iclr.py: -------------------------------------------------------------------------------- 1 | ## test_attack.py -- sample code to test attack procedure 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## 5 | ## This program is licenced under the BSD 2-Clause licence, 6 | ## contained in the LICENCE file in this directory. 7 | 8 | import tensorflow as tf 9 | import numpy as np 10 | import time 11 | import random 12 | import os 13 | 14 | #from setup_cifar_madry2 import CIFAR, MadryCIFARModel 15 | from setup_cifar import CIFAR, CIFARModel 16 | from setup_mnist_madry import MNIST, MadryMNISTModel 17 | from setup_inception import ImageNet, InceptionModel 18 | 19 | #from ifgm_pgd import IFGM 20 | #from fgm import FGM 21 | 22 | from l2_attack import CarliniL2 23 | #from li_attack import CarliniLi 24 | 25 | from l2_LADMMST_attack_v3 import LADMMSTL2 26 | 27 | from PIL import Image 28 | 29 | 30 | def show(img, name = "output.png"): 31 | fig = img.squeeze() 32 | np.save(name,fig) 33 | fig = (img + 0.5)*255 34 | fig = fig.astype(np.uint8).squeeze() 35 | pic = Image.fromarray(fig) 36 | #pic.resize((512,512), resample=PIL.Image.BICUBIC) 37 | pic.save(name) 38 | 39 | 40 | 41 | 42 | def generate_data(data, model, samples, targeted=True, target_num=1, start=0, inception=False, seed=3, handpick=False ): 43 | """ 44 | Generate the input data to the attack algorithm. 45 | 46 | data: the images to attack 47 | samples: number of samples to use 48 | targeted: if true, construct targeted attacks, otherwise untargeted attacks 49 | start: offset into data to use 50 | inception: if targeted and inception, randomly sample 100 targets intead of 1000 51 | """ 52 | random.seed(seed) 53 | inputs = [] 54 | targets = [] 55 | labels = [] 56 | true_ids = [] 57 | sample_set = [] 58 | 59 | data_d = data.test_data 60 | labels_d = data.test_labels 61 | 62 | if handpick: 63 | if inception: 64 | deck = list(range(0, 1500)) 65 | else: 66 | deck = list(range(0, 10000)) 67 | random.shuffle(deck) 68 | print('Handpicking') 69 | 70 | while (len(sample_set) < samples): 71 | rand_int = deck.pop() 72 | pred = model.model.predict(data_d[rand_int:rand_int + 1]) 73 | 74 | if inception: 75 | pred = np.reshape(pred, (labels_d[0:1].shape)) 76 | 77 | if (np.argmax(pred, 1) == np.argmax(labels_d[rand_int:rand_int + 1], 1)): 78 | sample_set.append(rand_int) 79 | print('Handpicked') 80 | else: 81 | sample_set = random.sample(range(0, 10000), samples) 82 | 83 | for i in sample_set: 84 | if targeted: 85 | if inception: 86 | seq = random.sample(range(1, 1001), target_num) 87 | else: 88 | seq = range(labels_d.shape[1]) 89 | 90 | for j in seq: 91 | if (j == np.argmax(labels_d[start + i])) and (inception == False): 92 | continue 93 | inputs.append(data_d[start + i]) 94 | targets.append(np.eye(labels_d.shape[1])[j]) 95 | labels.append(labels_d[start + i]) 96 | true_ids.append(start + i) 97 | else: 98 | inputs.append(data_d[start + i]) 99 | targets.append(labels_d[start + i]) 100 | labels.append(labels_d[start + i]) 101 | true_ids.append(start + i) 102 | 103 | inputs = np.array(inputs) 104 | targets = np.array(targets) 105 | labels = np.array(labels) 106 | true_ids = np.array(true_ids) 107 | return inputs, targets, labels, true_ids 108 | 109 | def l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids): 110 | 111 | r_best = [] 112 | d_best_l0 = [] 113 | d_best_l1 = [] 114 | d_best_l2 = [] 115 | d_best_linf = [] 116 | r_average = [] 117 | d_average_l0 = [] 118 | d_average_l1 = [] 119 | d_average_l2 = [] 120 | d_average_linf = [] 121 | r_worst = [] 122 | d_worst_l0 = [] 123 | d_worst_l1 = [] 124 | d_worst_l2 = [] 125 | d_worst_linf = [] 126 | 127 | if (args['show']): 128 | if not os.path.exists(str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): 129 | os.makedirs(str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) 130 | 131 | for i in range(0, len(inputs), args['target_number']): 132 | pred = [] 133 | for j in range(i, i + args['target_number']): 134 | if inception: 135 | pred.append(np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) 136 | else: 137 | pred.append(model.model.predict(adv[j:j + 1])) 138 | 139 | dist_l0 = 1e10 140 | dist_l1 = 1e10 141 | dist_l2 = 1e10 142 | dist_linf = 1e10 143 | dist_l0_index = 1e10 144 | dist_l1_index = 1e10 145 | dist_l2_index = 1e10 146 | dist_linf_index = 1e10 147 | for k, j in enumerate(range(i, i + args['target_number'])): 148 | if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): 149 | if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): 150 | dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) 151 | dist_l1_index = j 152 | if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): 153 | dist_linf = np.amax(np.abs(adv[j] - inputs[j])) 154 | dist_linf_index = j 155 | if ((np.sum((adv[j] - inputs[j]) ** 2) ** .5) < dist_l2): 156 | dist_l2 = (np.sum((adv[j] - inputs[j]) ** 2) ** .5) 157 | dist_l2_index = j 158 | if np.array(np.nonzero(np.where(np.abs(adv[j]-inputs[j]) < 1e-7, 0, adv[j]-inputs[j]))).shape[1] < dist_l0: 159 | dist_l0 = np.array(np.nonzero(np.where(np.abs(adv[j]-inputs[j]) < 1e-7, 0, adv[j]-inputs[j]))).shape[1] 160 | dist_l0_index = j 161 | if (dist_l1_index != 1e10): 162 | d_best_l2.append((np.sum((adv[dist_l2_index] - inputs[dist_l2_index]) ** 2) ** .5)) 163 | d_best_l1.append(np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) 164 | d_best_linf.append(np.amax(np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) 165 | d_best_l0.append(np.array(np.nonzero(np.where(np.abs(adv[dist_l0_index] - inputs[dist_l0_index]) < 1e-7, 0, 166 | adv[dist_l0_index] - inputs[dist_l0_index]))).shape[1]) 167 | r_best.append(1) 168 | else: 169 | r_best.append(0) 170 | 171 | rand_int = np.random.randint(i, i + args['target_number']) 172 | if inception: 173 | pred_r = np.reshape(model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) 174 | else: 175 | pred_r = model.model.predict(adv[rand_int:rand_int + 1]) 176 | if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): 177 | r_average.append(1) 178 | d_average_l2.append(np.sum((adv[rand_int] - inputs[rand_int]) ** 2) ** .5) 179 | d_average_l1.append(np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) 180 | d_average_linf.append(np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) 181 | d_average_l0.append(np.array(np.nonzero(np.where(np.abs(adv[rand_int] - inputs[rand_int]) < 1e-7, 0, 182 | adv[rand_int] - inputs[rand_int]))).shape[1]) 183 | else: 184 | r_average.append(0) 185 | 186 | dist_l0 = 0 187 | dist_l0_index = 1e10 188 | dist_l1 = 0 189 | dist_l1_index = 1e10 190 | dist_linf = 0 191 | dist_linf_index = 1e10 192 | dist_l2 = 0 193 | dist_l2_index = 1e10 194 | for k, j in enumerate(range(i, i + args['target_number'])): 195 | if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): 196 | r_worst.append(0) 197 | dist_l0_index = 1e10 198 | dist_l1_index = 1e10 199 | dist_l2_index = 1e10 200 | dist_linf_index = 1e10 201 | break 202 | else: 203 | if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): 204 | dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) 205 | dist_l1_index = j 206 | if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): 207 | dist_linf = np.amax(np.abs(adv[j] - inputs[j])) 208 | dist_linf_index = j 209 | if ((np.sum((adv[j] - inputs[j]) ** 2) ** .5) > dist_l2): 210 | dist_l2 = (np.sum((adv[j] - inputs[j]) ** 2) ** .5) 211 | dist_l2_index = j 212 | if np.array(np.nonzero(np.where(np.abs(adv[j]-inputs[j]) < 1e-6, 0, adv[j]-inputs[j]))).shape[1] > dist_l0: 213 | dist_l0 = np.array(np.nonzero(np.where(np.abs(adv[j]-inputs[j]) < 1e-6, 0, adv[j]-inputs[j]))).shape[1] 214 | dist_l0_index = j 215 | if (dist_l1_index != 1e10): 216 | d_worst_l2.append((np.sum((adv[dist_l2_index] - inputs[dist_l2_index]) ** 2) ** .5)) 217 | d_worst_l1.append(np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) 218 | d_worst_linf.append(np.amax(np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) 219 | d_worst_l0.append(np.array(np.nonzero(np.where(np.abs(adv[dist_l0_index] - inputs[dist_l0_index]) < 1e-7, 0, 220 | adv[dist_l0_index] - inputs[dist_l0_index]))).shape[1]) 221 | r_worst.append(1) 222 | 223 | if (args['show']): 224 | for j in range(i, i + args['target_number']): 225 | target_id = np.argmax(targets[j:j + 1], 1) 226 | label_id = np.argmax(labels[j:j + 1], 1) 227 | prev_id = np.argmax(np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) 228 | adv_id = np.argmax(np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) 229 | suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( 230 | true_ids[i], 231 | target_id, 232 | label_id, 233 | prev_id, 234 | adv_id, 235 | adv_id == target_id, 236 | np.sum(np.abs(adv[j] - inputs[j])), 237 | np.sum((adv[j] - inputs[j]) ** 2) ** .5, 238 | np.amax(np.abs(adv[j] - inputs[j]))) 239 | 240 | show(inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str( 241 | args['attack']) + "/original_{}.png".format(suffix)) 242 | show(adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str( 243 | args['attack']) + "/adversarial_{}.png".format(suffix)) 244 | show(adv[j:j + 1]-inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str( 245 | args['attack']) + "/diff_{}.png".format(suffix)) 246 | 247 | print('best_case_L0_mean', np.mean(d_best_l0)) 248 | print('best_case_L1_mean', np.mean(d_best_l1)) 249 | print('best_case_L2_mean', np.mean(d_best_l2)) 250 | print('best_case_Linf_mean', np.mean(d_best_linf)) 251 | print('best_case_prob', np.mean(r_best)) 252 | print('average_case_L0_mean', np.mean(d_average_l0)) 253 | print('average_case_L1_mean', np.mean(d_average_l1)) 254 | print('average_case_L2_mean', np.mean(d_average_l2)) 255 | print('average_case_Linf_mean', np.mean(d_average_linf)) 256 | print('average_case_prob', np.mean(r_average)) 257 | print('worst_case_L0_mean', np.mean(d_worst_l0)) 258 | print('worst_case_L1_mean', np.mean(d_worst_l1)) 259 | print('worst_case_L2_mean', np.mean(d_worst_l2)) 260 | print('worst_case_Linf_mean', np.mean(d_worst_linf)) 261 | print('worst_case_prob', np.mean(r_worst)) 262 | 263 | 264 | 265 | 266 | def main(args): 267 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) 268 | with tf.Session() as sess: 269 | if args['dataset'] == 'mnist': 270 | data, model = MNIST(), MadryMNISTModel("models/secret/", sess) 271 | handpick = False 272 | inception = False 273 | if args['dataset'] == "cifar": 274 | data, model = CIFAR(), CIFARModel("models/cifar", sess) 275 | #data, model = CIFAR(), MadryCIFARModel("models/model_0/", sess) 276 | handpick = True 277 | inception = False 278 | if args['dataset'] == "imagenet": 279 | data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess, False) 280 | handpick = True 281 | inception = True 282 | 283 | if args['adversarial'] != "none": 284 | model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess) 285 | 286 | if args['temp'] and args['dataset'] == 'mnist': 287 | model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) 288 | if args['temp'] and args['dataset'] == 'cifar': 289 | model = MadryCIFARModel("models/cifar-distilled-" + str(args['temp']), sess) 290 | 291 | inputs, targets, labels, true_ids = generate_data(data, model, samples=args['numimg'], targeted=True, target_num=args['target_number'], 292 | start=0, inception=inception, handpick=handpick, seed=args['seed']) 293 | 294 | #print(true_ids) 295 | if args['attack'] == 'L2C': 296 | attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 297 | confidence=args['conf'], 298 | binary_search_steps=args['binary_steps'], 299 | abort_early=args['abort_early']) 300 | if args['attack'] == 'LiCW': 301 | attack = CarliniLi(sess, model, max_iterations=args['maxiter'], 302 | abort_early=args['abort_early']) 303 | 304 | if args['attack'] == 'L2A': 305 | attack = ADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 306 | confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], 307 | abort_early=args['abort_early']) 308 | 309 | if args['attack'] == 'L2AE': 310 | attack = ADMML2en(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 311 | confidence=args['conf'], binary_search_steps=args['binary_steps'], ro=args['ro'], 312 | iteration_steps=args['iteration_steps'], abort_early=args['abort_early']) 313 | 314 | if args['attack'] == 'L2LA': 315 | attack = LADMML2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 316 | confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], 317 | abort_early=args['abort_early']) 318 | if args['attack'] == 'L2LAST': 319 | attack = LADMMSTL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 320 | confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], 321 | abort_early=args['abort_early'],retrain=args['retrain']) 322 | 323 | if args['attack'] == 'LiIF': 324 | attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) 325 | if args['attack'] == 'LiF': 326 | attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) 327 | 328 | if args['attack'] == 'L1': 329 | attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 330 | confidence=args['conf'], 331 | binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) 332 | 333 | if args['attack'] == 'L1EN': 334 | attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], 335 | confidence=args['conf'], 336 | binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) 337 | 338 | if args['attack'] == 'L1IFGM': 339 | attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) 340 | if args['attack'] == 'L2IFGM': 341 | attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) 342 | 343 | if args['attack'] == 'L1FGM': 344 | attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) 345 | if args['attack'] == 'L2FGM': 346 | attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) 347 | 348 | timestart = time.time() 349 | adv = attack.attack(inputs, targets) 350 | timeend = time.time() 351 | 352 | print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") 353 | 354 | if args['train']: 355 | np.save('labels_train.npy', labels) 356 | np.save(str(args['attack']) + '_train.npy', adv) 357 | 358 | #if (args['conf'] != 0): 359 | # model = MNISTModel("models/mnist-distilled-100", sess) 360 | 361 | l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids) 362 | 363 | if __name__ == "__main__": 364 | import argparse 365 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 366 | parser.add_argument("-d", "--dataset", choices=["mnist", "cifar", "imagenet"], default="imagenet", 367 | help="dataset to use") 368 | parser.add_argument("-n", "--numimg", type=int, default=3, help="number of images to attack") 369 | parser.add_argument("-b", "--batch_size", type=int, default=9, help="batch size") 370 | parser.add_argument("-m", "--maxiter", type=int, default=2000, help="max iterations per bss") 371 | #parser.add_argument("-m1", "--maxiter_1", type=int, default=1000, help="max iterations per bss") 372 | parser.add_argument("-is", "--iteration_steps", type=int, default=6, help="number of iteration L2ADMM not for CW") 373 | parser.add_argument("-ro", "--ro", type=int, default=15, help="value of ro") 374 | parser.add_argument("-bs", "--binary_steps", type=int, default=6, help="number of bss for CW outer loop") 375 | parser.add_argument("-ae", "--abort_early", action='store_true', default=True, 376 | help="abort binary search step early when losses stop decreasing") 377 | parser.add_argument("-cf", "--conf", type=int, default=0, help='Set attack confidence for transferability tests') 378 | parser.add_argument("-imgsd", "--seed_imagenet", type=int, default=825, 379 | help='random seed for pulling images from ImageNet test set') 380 | parser.add_argument("-sd", "--seed", type=int, default=1001, 381 | help='random seed for pulling images from data set') 382 | parser.add_argument("-sh", "--show", action='store_true', default=False, 383 | help='save original and adversarial images to save directory') 384 | parser.add_argument("-s", "--save", default="./saves", help="save directory") 385 | parser.add_argument("-a", "--attack", 386 | choices=["L2C", "L2A", "L2AE", "L2LA", "L2LAST","L2U","LiIF"], 387 | default="L2LAST", 388 | #default="L2C", 389 | help="attack algorithm") 390 | parser.add_argument("-re", "--retrain", default=True, help="retrain or not") 391 | parser.add_argument("-tn", "--target_number", type=int, default=3, help="number of targets for one input") # useless for mnist and cifar 392 | parser.add_argument("-tr", "--train", action='store_true', default=False, 393 | help="save adversarial images generated from train set") 394 | parser.add_argument("-tp", "--temp", type=int, default=0, 395 | help="attack defensively distilled network trained with this temperature") 396 | parser.add_argument("-adv", "--adversarial", choices=["none", "l2", "l1", "en", "l2l1", "l2en"], default="none", 397 | help="attack network adversarially trained under these examples") 398 | parser.add_argument("-be", "--beta", type=float, default=1e-4, help='beta hyperparameter') 399 | args = vars(parser.parse_args()) 400 | print(args) 401 | main(args) 402 | -------------------------------------------------------------------------------- /train_models.py: -------------------------------------------------------------------------------- 1 | ## train_models.py -- train the neural network models for attacking 2 | ## 3 | ## Copyright (C) 2016, Nicholas Carlini . 4 | ## 5 | ## This program is licenced under the BSD 2-Clause licence, 6 | ## contained in the LICENCE file in this directory. 7 | 8 | 9 | import numpy as np 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation, Flatten 12 | from keras.layers import Conv2D, MaxPooling2D 13 | from keras.optimizers import SGD 14 | 15 | import tensorflow as tf 16 | from setup_mnist import MNIST 17 | from setup_cifar import CIFAR 18 | import os 19 | 20 | 21 | def train(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1, init=None, 22 | adversarial=False, examples=None, labels=None): 23 | """ 24 | Standard neural network training procedure. 25 | """ 26 | if adversarial: 27 | data.train_data = np.concatenate((data.train_data, examples), axis=0) 28 | data.train_labels = np.concatenate((data.train_labels, labels), axis=0) 29 | model = Sequential() 30 | 31 | print(data.train_data.shape) 32 | 33 | model.add(Conv2D(params[0], (3, 3), 34 | input_shape=data.train_data.shape[1:])) 35 | model.add(Activation('relu')) 36 | model.add(Conv2D(params[1], (3, 3))) 37 | model.add(Activation('relu')) 38 | model.add(MaxPooling2D(pool_size=(2, 2))) 39 | 40 | model.add(Conv2D(params[2], (3, 3))) 41 | model.add(Activation('relu')) 42 | model.add(Conv2D(params[3], (3, 3))) 43 | model.add(Activation('relu')) 44 | model.add(MaxPooling2D(pool_size=(2, 2))) 45 | 46 | model.add(Flatten()) 47 | model.add(Dense(params[4])) 48 | model.add(Activation('relu')) 49 | model.add(Dropout(0.5)) 50 | model.add(Dense(params[5])) 51 | model.add(Activation('relu')) 52 | model.add(Dense(10)) 53 | 54 | if init != None: 55 | model.load_weights(init) 56 | 57 | def fn(correct, predicted): 58 | return tf.nn.softmax_cross_entropy_with_logits(labels=correct, 59 | logits=predicted/train_temp) 60 | 61 | sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) 62 | 63 | model.compile(loss=fn, 64 | optimizer=sgd, 65 | metrics=['accuracy']) 66 | 67 | model.fit(data.train_data, data.train_labels, 68 | batch_size=batch_size, 69 | validation_data=(data.validation_data, data.validation_labels), 70 | nb_epoch=num_epochs, 71 | shuffle=True) 72 | 73 | 74 | if file_name != None: 75 | model.save(file_name) 76 | 77 | return model 78 | 79 | def train_distillation(data, file_name, params, num_epochs=50, batch_size=128, train_temp=1): 80 | """ 81 | Train a network using defensive distillation. 82 | 83 | Distillation as a Defense to Adversarial Perturbations against Deep Neural Networks 84 | Nicolas Papernot, Patrick McDaniel, Xi Wu, Somesh Jha, Ananthram Swami 85 | IEEE S&P, 2016. 86 | """ 87 | if not os.path.exists(file_name+"_init"): 88 | # Train for one epoch to get a good starting point. 89 | train(data, file_name+"_init", params, 1, batch_size) 90 | 91 | # now train the teacher at the given temperature 92 | teacher = train(data, file_name+"_teacher", params, num_epochs, batch_size, train_temp, 93 | init=file_name+"_init") 94 | 95 | # evaluate the labels at temperature t 96 | predicted = teacher.predict(data.train_data) 97 | with tf.Session() as sess: 98 | y = sess.run(tf.nn.softmax(predicted/train_temp)) 99 | print(y) 100 | data.train_labels = y 101 | 102 | # train the student model at temperature t 103 | student = train(data, file_name, params, num_epochs, batch_size, train_temp, 104 | init=file_name+"_init") 105 | 106 | # and finally we predict at temperature 1 107 | predicted = student.predict(data.train_data) 108 | 109 | print(predicted) 110 | 111 | def main(args): 112 | if not os.path.isdir('models'): 113 | os.makedirs('models') 114 | 115 | if (not (args['adversarial'] or args['defensive'])): 116 | if args['dataset'] == "mnist" or args['dataset'] == "all": 117 | train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=250) 118 | if args['dataset'] == 'cifar' or args['dataset'] == 'all': 119 | train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=250) 120 | 121 | if args['adversarial']: 122 | CWL2 = np.load('train/L2C_train.npy') 123 | ADMML2 = np.load('train/L2A_train.npy') 124 | # ADMML0 = np.load('train/ADMML0_train.npy') 125 | 126 | labels = np.load('train/labels_train.npy') 127 | CWL2_ADMML2 = np.concatenate((CWL2, ADMML2), axis=0) 128 | # CWL2_ADMML0 = np.concatenate((CWL2, ADMML0), axis=0) 129 | labels_2 = np.concatenate((labels, labels), axis=0) 130 | 131 | train(MNIST(), "models/mnist_cwl2", [32, 32, 64, 64, 200, 200], num_epochs=50, 132 | adversarial=True, examples=CWL2, labels=labels) 133 | train(MNIST(), "models/mnist_admml2", [32, 32, 64, 64, 200, 200], num_epochs=50, 134 | adversarial=True, examples=ADMML2, labels=labels) 135 | # train(MNIST(), "models/mnist_admml0", [32, 32, 64, 64, 200, 200], num_epochs=50, 136 | # adversarial=True, examples=ADMML0, labels=labels) 137 | train(MNIST(), "models/mnist_cwl2_admml2", [32, 32, 64, 64, 200, 200], num_epochs=50, 138 | adversarial=True, examples=CWL2_ADMML2, labels=labels_2) 139 | # train(MNIST(), "models/mnist_cwl2_admml0", [32, 32, 64, 64, 200, 200], num_epochs=50, 140 | # adversarial=True, examples=CWL2_ADMML0, labels=labels_2) 141 | 142 | if args['defensive']: 143 | if args['temp'] == 0: 144 | temp = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] 145 | else: 146 | temp = args['temp'] 147 | if args['defensive'] and (args['dataset'] == "mnist" or args['dataset'] == "mnist"): 148 | for t in temp: 149 | print('Mnist_'+str(t)) 150 | train_distillation(MNIST(), "models/mnist-distilled-"+str(t), [32, 32, 64, 64, 200, 200], 151 | num_epochs=50, train_temp=t) 152 | if args['defensive'] and (args['dataset'] == "cifar" or args['dataset'] == "all"): 153 | for t in temp: 154 | print('Cifar_'+str(t)) 155 | train_distillation(CIFAR(), "models/cifar-distilled-"+str(t), [64, 64, 128, 128, 256, 256], 156 | num_epochs=50, train_temp=t) 157 | 158 | 159 | if __name__ == "__main__": 160 | import argparse 161 | parser = argparse.ArgumentParser() 162 | parser.add_argument("-d", "--dataset", choices=["mnist", "cifar", "all"], default="cifar") 163 | parser.add_argument("-a", "--adversarial", action='store_true', default=False) 164 | parser.add_argument("-dd", "--defensive", action='store_true', default=False) 165 | parser.add_argument("-t", "--temp", nargs='+', type=int, default=0) 166 | args = vars(parser.parse_args()) 167 | print(args) 168 | main(args) 169 | --------------------------------------------------------------------------------