├── LICENSE.txt ├── README.md ├── attack.py ├── attacks ├── __init__.py ├── attack_carlini_wagner_l2.py ├── attack_iterative.py └── helpers.py ├── dataset.py ├── download_checkpoint.sh ├── metadata.json ├── run_attack.sh ├── run_attack_cwl2.py └── run_attack_iter.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-nips2017-attack-example 2 | 3 | This is a baseline targeted (or untargeted) attack that works within the Cleverhans (https://github.com/tensorflow/cleverhans) framework for the NIPS-2017 adversarial competition. 4 | 5 | There are two types of attacks included, an iterative fast-gradient method, and a Carlini and Wagner L2 attack. 6 | 7 | ## Iterative Fast-Gradient 8 | 9 | These attacks are modeled after the 'basic iterative' / 'itarative FGSM' attack mentioned in https://arxiv.org/abs/1611.01236 and https://arxiv.org/abs/1705.07204 (among others). 10 | 11 | The default setup is to run a targeted L-inifity norm variant of the targeted attack with 10 steps. L1 or L2 based attacks seem to require around 40-50 steps with the current code to perform a reasonable attack. 12 | 13 | ## Carlini and Wagner L2 14 | 15 | An implementation of the L2 variant of the attack described in this paper https://arxiv.org/abs/1608.04644 by Carlini and Wagner. Based on a reference implementation by Carlini at https://github.com/carlini/nn_robust_attacks and https://github.com/tensorflow/cleverhans/blob/master/cleverhans/attacks_tf.py 16 | 17 | NOTE: I'm still verifying and experimenting with this attack. It takes MUCH longer (half a day) to run and produces much more subtle results that I'm having difficulty successfully transfering as a targeted attack to other models... 18 | 19 | ## Usage 20 | 21 | To run: 22 | 1. Setup and verify cleverhans nips17 adversarial competition example environment 23 | 2. Clone this repo 24 | 3. Run ./download_checkpoint.sh to download the inceptionv3 checkpoint from torchvision model zoo 25 | 4. Symbolic link the folder this repo was clone into into the cleverhans 'examples/nips17_adversarial_competition/sample_targeted_attacks/' folder 26 | 5. Run run_attacks_and_defenses.sh and ensure '--gpu' flag is added 27 | 28 | 29 | To switch between attacks and alter parameters of the attack, command line args in the run_attack.sh script need modification. 30 | 31 | Iterative non-targeted L1: 32 | ``` 33 | python run_attack_iter.py \ 34 | --input_dir="${INPUT_DIR}" \ 35 | --output_dir="${OUTPUT_DIR}" \ 36 | --max_epsilon="${MAX_EPSILON}" \ 37 | --steps 50 \ 38 | --norm 1 \ 39 | --checkpoint_path=inception_v3_google-1a9a5a14.pth 40 | ``` 41 | 42 | Iterative targeted L2: 43 | ``` 44 | python run_attack_iter.py \ 45 | --input_dir="${INPUT_DIR}" \ 46 | --output_dir="${OUTPUT_DIR}" \ 47 | --max_epsilon="${MAX_EPSILON}" \ 48 | --steps 42 \ 49 | --targeted \ 50 | --norm 2 \ 51 | --checkpoint_path=inception_v3_google-1a9a5a14.pth 52 | ``` 53 | 54 | Carlini and Wagner L2: 55 | ``` 56 | python run_attack_cwl2.py \ 57 | --input_dir="${INPUT_DIR}" \ 58 | --output_dir="${OUTPUT_DIR}" \ 59 | --max_epsilon="${MAX_EPSILON}" \ 60 | --targeted \ 61 | --checkpoint_path=inception_v3_google-1a9a5a14.pth 62 | ``` 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /attack.py: -------------------------------------------------------------------------------- 1 | """Attack loop 2 | """ 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import os 9 | import numpy as np 10 | import torch 11 | import torchvision 12 | import torch.utils.data as data 13 | 14 | from scipy.misc import imsave 15 | from dataset import Dataset, default_inception_transform 16 | 17 | 18 | def run_attack(args, attack): 19 | assert args.input_dir 20 | 21 | if args.targeted: 22 | dataset = Dataset( 23 | args.input_dir, 24 | transform=default_inception_transform(args.img_size)) 25 | else: 26 | dataset = Dataset( 27 | args.input_dir, 28 | target_file='', 29 | transform=default_inception_transform(args.img_size)) 30 | 31 | loader = data.DataLoader( 32 | dataset, 33 | batch_size=args.batch_size, 34 | shuffle=False) 35 | 36 | model = torchvision.models.inception_v3(pretrained=False, transform_input=False) 37 | if not args.no_gpu: 38 | model = model.cuda() 39 | 40 | if args.checkpoint_path is not None and os.path.isfile(args.checkpoint_path): 41 | checkpoint = torch.load(args.checkpoint_path) 42 | if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 43 | model.load_state_dict(checkpoint['state_dict']) 44 | else: 45 | model.load_state_dict(checkpoint) 46 | else: 47 | print("Error: No checkpoint found at %s." % args.checkpoint_path) 48 | 49 | model.eval() 50 | 51 | for batch_idx, (input, target) in enumerate(loader): 52 | if not args.no_gpu: 53 | input = input.cuda() 54 | target = target.cuda() 55 | 56 | input_adv = attack.run(model, input, target, batch_idx) 57 | 58 | start_index = args.batch_size * batch_idx 59 | indices = list(range(start_index, start_index + input.size(0))) 60 | for filename, o in zip(dataset.filenames(indices, basename=True), input_adv): 61 | output_file = os.path.join(args.output_dir, filename) 62 | imsave(output_file, (o + 1.0) * 0.5, format='png') 63 | -------------------------------------------------------------------------------- /attacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .attack_carlini_wagner_l2 import AttackCarliniWagnerL2 2 | from .attack_iterative import AttackIterative -------------------------------------------------------------------------------- /attacks/attack_carlini_wagner_l2.py: -------------------------------------------------------------------------------- 1 | """PyTorch Carlini and Wagner L2 attack algorithm. 2 | 3 | Based on paper by Carlini & Wagner, https://arxiv.org/abs/1608.04644 and a reference implementation at 4 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/attacks_tf.py 5 | """ 6 | import os 7 | import sys 8 | import torch 9 | import numpy as np 10 | from torch import optim 11 | from torch import autograd 12 | from .helpers import * 13 | 14 | 15 | class AttackCarliniWagnerL2: 16 | 17 | def __init__(self, targeted=True, search_steps=None, max_steps=None, cuda=True, debug=False): 18 | self.debug = debug 19 | self.targeted = targeted 20 | self.num_classes = 1000 21 | self.confidence = 20 # FIXME need to find a good value for this, 0 value used in paper not doing much... 22 | self.initial_const = 0.1 # bumped up from default of .01 in reference code 23 | self.binary_search_steps = search_steps or 5 24 | self.repeat = self.binary_search_steps >= 10 25 | self.max_steps = max_steps or 1000 26 | self.abort_early = True 27 | self.clip_min = -1. 28 | self.clip_max = 1. 29 | self.cuda = cuda 30 | self.clamp_fn = 'tanh' # set to something else perform a simple clamp instead of tanh 31 | self.init_rand = False # an experiment, does a random starting point help? 32 | 33 | def _compare(self, output, target): 34 | if not isinstance(output, (float, int, np.int64)): 35 | output = np.copy(output) 36 | if self.targeted: 37 | output[target] -= self.confidence 38 | else: 39 | output[target] += self.confidence 40 | output = np.argmax(output) 41 | if self.targeted: 42 | return output == target 43 | else: 44 | return output != target 45 | 46 | def _loss(self, output, target, dist, scale_const): 47 | # compute the probability of the label class versus the maximum other 48 | real = (target * output).sum(1) 49 | other = ((1. - target) * output - target * 10000.).max(1)[0] 50 | if self.targeted: 51 | # if targeted, optimize for making the other class most likely 52 | loss1 = torch.clamp(other - real + self.confidence, min=0.) # equiv to max(..., 0.) 53 | else: 54 | # if non-targeted, optimize for making this class least likely. 55 | loss1 = torch.clamp(real - other + self.confidence, min=0.) # equiv to max(..., 0.) 56 | loss1 = torch.sum(scale_const * loss1) 57 | 58 | loss2 = dist.sum() 59 | 60 | loss = loss1 + loss2 61 | return loss 62 | 63 | def _optimize(self, optimizer, model, input_var, modifier_var, target_var, scale_const_var, input_orig=None): 64 | # apply modifier and clamp resulting image to keep bounded from clip_min to clip_max 65 | if self.clamp_fn == 'tanh': 66 | input_adv = tanh_rescale(modifier_var + input_var, self.clip_min, self.clip_max) 67 | else: 68 | input_adv = torch.clamp(modifier_var + input_var, self.clip_min, self.clip_max) 69 | 70 | output = model(input_adv) 71 | 72 | # distance to the original input data 73 | if input_orig is None: 74 | dist = l2_dist(input_adv, input_var, keepdim=False) 75 | else: 76 | dist = l2_dist(input_adv, input_orig, keepdim=False) 77 | 78 | loss = self._loss(output, target_var, dist, scale_const_var) 79 | 80 | optimizer.zero_grad() 81 | loss.backward() 82 | optimizer.step() 83 | 84 | loss_np = loss.data[0] 85 | dist_np = dist.data.cpu().numpy() 86 | output_np = output.data.cpu().numpy() 87 | input_adv_np = input_adv.data.permute(0, 2, 3, 1).cpu().numpy() # back to BHWC for numpy consumption 88 | return loss_np, dist_np, output_np, input_adv_np 89 | 90 | def run(self, model, input, target, batch_idx=0): 91 | batch_size = input.size(0) 92 | 93 | # set the lower and upper bounds accordingly 94 | lower_bound = np.zeros(batch_size) 95 | scale_const = np.ones(batch_size) * self.initial_const 96 | upper_bound = np.ones(batch_size) * 1e10 97 | 98 | # python/numpy placeholders for the overall best l2, label score, and adversarial image 99 | o_best_l2 = [1e10] * batch_size 100 | o_best_score = [-1] * batch_size 101 | o_best_attack = input.permute(0, 2, 3, 1).cpu().numpy() 102 | 103 | # setup input (image) variable, clamp/scale as necessary 104 | if self.clamp_fn == 'tanh': 105 | # convert to tanh-space, input already int -1 to 1 range, does it make sense to do 106 | # this as per the reference implementation or can we skip the arctanh? 107 | input_var = autograd.Variable(torch_arctanh(input), requires_grad=False) 108 | input_orig = tanh_rescale(input_var, self.clip_min, self.clip_max) 109 | else: 110 | input_var = autograd.Variable(input, requires_grad=False) 111 | input_orig = None 112 | 113 | # setup the target variable, we need it to be in one-hot form for the loss function 114 | target_onehot = torch.zeros(target.size() + (self.num_classes,)) 115 | if self.cuda: 116 | target_onehot = target_onehot.cuda() 117 | target_onehot.scatter_(1, target.unsqueeze(1), 1.) 118 | target_var = autograd.Variable(target_onehot, requires_grad=False) 119 | 120 | # setup the modifier variable, this is the variable we are optimizing over 121 | modifier = torch.zeros(input_var.size()).float() 122 | if self.init_rand: 123 | # Experiment with a non-zero starting point... 124 | modifier = torch.normal(means=modifier, std=0.001) 125 | if self.cuda: 126 | modifier = modifier.cuda() 127 | modifier_var = autograd.Variable(modifier, requires_grad=True) 128 | 129 | optimizer = optim.Adam([modifier_var], lr=0.0005) 130 | 131 | for search_step in range(self.binary_search_steps): 132 | print('Batch: {0:>3}, search step: {1}'.format(batch_idx, search_step)) 133 | if self.debug: 134 | print('Const:') 135 | for i, x in enumerate(scale_const): 136 | print(i, x) 137 | best_l2 = [1e10] * batch_size 138 | best_score = [-1] * batch_size 139 | 140 | # The last iteration (if we run many steps) repeat the search once. 141 | if self.repeat and search_step == self.binary_search_steps - 1: 142 | scale_const = upper_bound 143 | 144 | scale_const_tensor = torch.from_numpy(scale_const).float() 145 | if self.cuda: 146 | scale_const_tensor = scale_const_tensor.cuda() 147 | scale_const_var = autograd.Variable(scale_const_tensor, requires_grad=False) 148 | 149 | prev_loss = 1e6 150 | for step in range(self.max_steps): 151 | # perform the attack 152 | loss, dist, output, adv_img = self._optimize( 153 | optimizer, 154 | model, 155 | input_var, 156 | modifier_var, 157 | target_var, 158 | scale_const_var, 159 | input_orig) 160 | 161 | if step % 100 == 0 or step == self.max_steps - 1: 162 | print('Step: {0:>4}, loss: {1:6.4f}, dist: {2:8.5f}, modifier mean: {3:.5e}'.format( 163 | step, loss, dist.mean(), modifier_var.data.mean())) 164 | 165 | if self.abort_early and step % (self.max_steps // 10) == 0: 166 | if loss > prev_loss * .9999: 167 | print('Aborting early...') 168 | break 169 | prev_loss = loss 170 | 171 | # update best result found 172 | for i in range(batch_size): 173 | target_label = target[i] 174 | output_logits = output[i] 175 | output_label = np.argmax(output_logits) 176 | di = dist[i] 177 | if self.debug: 178 | if step % 100 == 0: 179 | print('{0:>2} dist: {1:.5f}, output: {2:>3}, {3:5.3}, target {4:>3}'.format( 180 | i, di, output_label, output_logits[output_label], target_label)) 181 | if di < best_l2[i] and self._compare(output_logits, target_label): 182 | if self.debug: 183 | print('{0:>2} best step, prev dist: {1:.5f}, new dist: {2:.5f}'.format( 184 | i, best_l2[i], di)) 185 | best_l2[i] = di 186 | best_score[i] = output_label 187 | if di < o_best_l2[i] and self._compare(output_logits, target_label): 188 | if self.debug: 189 | print('{0:>2} best total, prev dist: {1:.5f}, new dist: {2:.5f}'.format( 190 | i, o_best_l2[i], di)) 191 | o_best_l2[i] = di 192 | o_best_score[i] = output_label 193 | o_best_attack[i] = adv_img[i] 194 | 195 | sys.stdout.flush() 196 | # end inner step loop 197 | 198 | # adjust the constants 199 | batch_failure = 0 200 | batch_success = 0 201 | for i in range(batch_size): 202 | if self._compare(best_score[i], target[i]) and best_score[i] != -1: 203 | # successful, do binary search and divide const by two 204 | upper_bound[i] = min(upper_bound[i], scale_const[i]) 205 | if upper_bound[i] < 1e9: 206 | scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2 207 | if self.debug: 208 | print('{0:>2} successful attack, lowering const to {1:.3f}'.format( 209 | i, scale_const[i])) 210 | else: 211 | # failure, multiply by 10 if no solution found 212 | # or do binary search with the known upper bound 213 | lower_bound[i] = max(lower_bound[i], scale_const[i]) 214 | if upper_bound[i] < 1e9: 215 | scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2 216 | else: 217 | scale_const[i] *= 10 218 | if self.debug: 219 | print('{0:>2} failed attack, raising const to {1:.3f}'.format( 220 | i, scale_const[i])) 221 | if self._compare(o_best_score[i], target[i]) and o_best_score[i] != -1: 222 | batch_success += 1 223 | else: 224 | batch_failure += 1 225 | 226 | print('Num failures: {0:2d}, num successes: {1:2d}\n'.format(batch_failure, batch_success)) 227 | sys.stdout.flush() 228 | # end outer search loop 229 | 230 | return o_best_attack 231 | -------------------------------------------------------------------------------- /attacks/attack_iterative.py: -------------------------------------------------------------------------------- 1 | """Pytorch Iterative Fast-Gradient attack algorithm 2 | """ 3 | import sys 4 | import torch 5 | from torch import autograd 6 | from torch.autograd.gradcheck import zero_gradients 7 | from .helpers import * 8 | 9 | 10 | class AttackIterative: 11 | 12 | def __init__( 13 | self, 14 | targeted=True, max_epsilon=16, norm=float('inf'), 15 | step_alpha=None, num_steps=None, cuda=True, debug=False): 16 | 17 | self.targeted = targeted 18 | self.eps = 2.0 * max_epsilon / 255.0 19 | self.num_steps = num_steps or 10 20 | self.norm = norm 21 | if not step_alpha: 22 | if norm == float('inf'): 23 | self.step_alpha = self.eps / self.num_steps 24 | else: 25 | # Different scaling required for L2 and L1 norms to get anywhere 26 | if norm == 1: 27 | self.step_alpha = 500.0 # L1 needs a lot of (arbitrary) love 28 | else: 29 | self.step_alpha = 1.0 30 | else: 31 | self.step_alpha = step_alpha 32 | self.loss_fn = torch.nn.CrossEntropyLoss() 33 | if cuda: 34 | self.loss_fn = self.loss_fn.cuda() 35 | self.debug = debug 36 | 37 | def run(self, model, input, target, batch_idx=0): 38 | input_var = autograd.Variable(input, requires_grad=True) 39 | target_var = autograd.Variable(target) 40 | eps = self.eps 41 | step_alpha = self.step_alpha 42 | 43 | step = 0 44 | while step < self.num_steps: 45 | zero_gradients(input_var) 46 | output = model(input_var) 47 | if not self.targeted and not step: 48 | # for non-targeted, we'll move away from most likely 49 | target_var.data = output.data.max(1)[1] 50 | loss = self.loss_fn(output, target_var) 51 | loss.backward() 52 | 53 | # normalize and scale gradient 54 | if self.norm == 2: 55 | normed_grad = step_alpha * input_var.grad.data / l2_norm(input_var.grad.data) 56 | elif self.norm == 1: 57 | normed_grad = step_alpha * input_var.grad.data / l1_norm(input_var.grad.data) 58 | else: 59 | # infinity-norm 60 | normed_grad = step_alpha * torch.sign(input_var.grad.data) 61 | 62 | # perturb current input image by normalized and scaled gradient 63 | if self.targeted: 64 | step_adv = input_var.data - normed_grad 65 | else: 66 | step_adv = input_var.data + normed_grad 67 | 68 | # calculate total adversarial perturbation from original image and clip to epsilon constraints 69 | total_adv = step_adv - input 70 | if self.norm == 2: 71 | # total_adv = eps * total_adv / l2norm(total_adv) 72 | total_adv = torch.clamp(total_adv, -eps, eps) 73 | elif self.norm == 1: 74 | # total_adv = eps * total_adv / l1norm(total_adv) 75 | total_adv = torch.clamp(total_adv, -eps, eps) 76 | else: 77 | # infinity-norm 78 | total_adv = torch.clamp(total_adv, -eps, eps) 79 | 80 | if self.debug: 81 | print('batch:', batch_idx, 'step:', step, total_adv.mean(), total_adv.min(), total_adv.max()) 82 | sys.stdout.flush() 83 | 84 | # apply total adversarial perturbation to original image and clip to valid pixel range 85 | input_adv = input + total_adv 86 | input_adv = torch.clamp(input_adv, -1.0, 1.0) 87 | input_var.data = input_adv 88 | step += 1 89 | 90 | return input_adv.permute(0, 2, 3, 1).cpu().numpy() 91 | -------------------------------------------------------------------------------- /attacks/helpers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import operator as op 3 | import functools as ft 4 | 5 | 6 | '''reduce_* helper functions reduce tensors on all dimensions but the first. 7 | They are intended to be used on batched tensors where dim 0 is the batch dim. 8 | ''' 9 | 10 | 11 | def reduce_sum(x, keepdim=True): 12 | # silly PyTorch, when will you get proper reducing sums/means? 13 | for a in reversed(range(1, x.dim())): 14 | x = x.sum(a, keepdim=keepdim) 15 | return x 16 | 17 | 18 | def reduce_mean(x, keepdim=True): 19 | numel = ft.reduce(op.mul, x.size()[1:]) 20 | x = reduce_sum(x, keepdim=keepdim) 21 | return x / numel 22 | 23 | 24 | def reduce_min(x, keepdim=True): 25 | for a in reversed(range(1, x.dim())): 26 | x = x.min(a, keepdim=keepdim)[0] 27 | return x 28 | 29 | 30 | def reduce_max(x, keepdim=True): 31 | for a in reversed(range(1, x.dim())): 32 | x = x.max(a, keepdim=keepdim)[0] 33 | return x 34 | 35 | 36 | def torch_arctanh(x, eps=1e-6): 37 | x *= (1. - eps) 38 | return (torch.log((1 + x) / (1 - x))) * 0.5 39 | 40 | 41 | def l2r_dist(x, y, keepdim=True, eps=1e-8): 42 | d = (x - y)**2 43 | d = reduce_sum(d, keepdim=keepdim) 44 | d += eps # to prevent infinite gradient at 0 45 | return d.sqrt() 46 | 47 | 48 | def l2_dist(x, y, keepdim=True): 49 | d = (x - y)**2 50 | return reduce_sum(d, keepdim=keepdim) 51 | 52 | 53 | def l1_dist(x, y, keepdim=True): 54 | d = torch.abs(x - y) 55 | return reduce_sum(d, keepdim=keepdim) 56 | 57 | 58 | def l2_norm(x, keepdim=True): 59 | norm = reduce_sum(x*x, keepdim=keepdim) 60 | return norm.sqrt() 61 | 62 | 63 | def l1_norm(x, keepdim=True): 64 | return reduce_sum(x.abs(), keepdim=keepdim) 65 | 66 | 67 | def rescale(x, x_min=-1., x_max=1.): 68 | return x * (x_max - x_min) + x_min 69 | 70 | 71 | def tanh_rescale(x, x_min=-1., x_max=1.): 72 | return (torch.tanh(x) + 1) * 0.5 * (x_max - x_min) + x_min 73 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import torch 4 | import pandas as pd 5 | 6 | import torch.utils.data as data 7 | import torchvision.transforms as transforms 8 | from PIL import Image 9 | 10 | IMG_EXTENSIONS = ['.png', '.jpg'] 11 | 12 | 13 | class LeNormalize(object): 14 | """Normalize to -1..1 in Google Inception style 15 | """ 16 | def __call__(self, tensor): 17 | for t in tensor: 18 | t.sub_(0.5).mul_(2.0) 19 | return tensor 20 | 21 | 22 | def default_inception_transform(img_size): 23 | tf = transforms.Compose([ 24 | transforms.Scale(img_size), 25 | transforms.CenterCrop(img_size), 26 | transforms.ToTensor(), 27 | LeNormalize(), 28 | ]) 29 | return tf 30 | 31 | 32 | def find_inputs(folder, filename_to_target=None, types=IMG_EXTENSIONS): 33 | inputs = [] 34 | for root, _, files in os.walk(folder, topdown=False): 35 | for rel_filename in files: 36 | base, ext = os.path.splitext(rel_filename) 37 | if ext.lower() in types: 38 | abs_filename = os.path.join(root, rel_filename) 39 | target = filename_to_target[rel_filename] if filename_to_target else 0 40 | inputs.append((abs_filename, target)) 41 | return inputs 42 | 43 | 44 | class Dataset(data.Dataset): 45 | 46 | def __init__( 47 | self, 48 | root, 49 | target_file='target_class.csv', 50 | transform=None): 51 | 52 | if target_file: 53 | target_df = pd.read_csv(os.path.join(root, target_file), header=None) 54 | f_to_t = dict(zip(target_df[0], target_df[1] - 1)) # -1 for 0-999 class ids 55 | else: 56 | f_to_t = dict() 57 | imgs = find_inputs(root, filename_to_target=f_to_t) 58 | if len(imgs) == 0: 59 | raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n" 60 | "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) 61 | 62 | self.root = root 63 | self.imgs = imgs 64 | self.transform = transform 65 | 66 | def __getitem__(self, index): 67 | path, target = self.imgs[index] 68 | img = Image.open(path).convert('RGB') 69 | if self.transform is not None: 70 | img = self.transform(img) 71 | if target is None: 72 | target = torch.zeros(1).long() 73 | return img, target 74 | 75 | def __len__(self): 76 | return len(self.imgs) 77 | 78 | def set_transform(self, transform): 79 | self.transform = transform 80 | 81 | def filenames(self, indices=[], basename=False): 82 | if indices: 83 | if basename: 84 | return [os.path.basename(self.imgs[i][0]) for i in indices] 85 | else: 86 | return [self.imgs[i][0] for i in indices] 87 | else: 88 | if basename: 89 | return [os.path.basename(x[0]) for x in self.imgs] 90 | else: 91 | return [x[0] for x in self.imgs] 92 | -------------------------------------------------------------------------------- /download_checkpoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | wget https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth 4 | -------------------------------------------------------------------------------- /metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "targeted_attack", 3 | "container": "rwightman/pytorch-extra", 4 | "container_gpu": "rwightman/pytorch-extra", 5 | "entry_point": "run_attack.sh" 6 | } 7 | -------------------------------------------------------------------------------- /run_attack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # run_attack.sh is a script which executes the attack 4 | # 5 | # Envoronment which runs attacks and defences calls it in a following way: 6 | # run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON 7 | # where: 8 | # INPUT_DIR - directory with input PNG images 9 | # OUTPUT_DIR - directory where adversarial images should be written 10 | # MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation 11 | # 12 | 13 | INPUT_DIR=$1 14 | OUTPUT_DIR=$2 15 | MAX_EPSILON=$3 16 | 17 | python run_attack_iter.py \ 18 | --input_dir="${INPUT_DIR}" \ 19 | --output_dir="${OUTPUT_DIR}" \ 20 | --max_epsilon="${MAX_EPSILON}" \ 21 | --targeted \ 22 | --checkpoint_path=inception_v3_google-1a9a5a14.pth 23 | 24 | -------------------------------------------------------------------------------- /run_attack_cwl2.py: -------------------------------------------------------------------------------- 1 | """Pytorch Carlini and Wagner L2 attack runner. 2 | 3 | """ 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import argparse 10 | 11 | from attacks import AttackCarliniWagnerL2 12 | from attack import run_attack 13 | 14 | parser = argparse.ArgumentParser(description='Defence') 15 | parser.add_argument('--input_dir', metavar='DIR', 16 | help='Input directory with images.') 17 | parser.add_argument('--output_dir', metavar='FILE', 18 | help='Output directory to save images.') 19 | parser.add_argument('--checkpoint_path', default=None, 20 | help='Path to network checkpoint.') 21 | parser.add_argument('--img_size', type=int, default=299, metavar='N', 22 | help='Image patch size (default: 299)') 23 | parser.add_argument('--batch_size', type=int, default=32, metavar='N', 24 | help='Batch size (default: 32)') 25 | parser.add_argument('--max_epsilon', type=int, default=16, metavar='N', 26 | help='Maximum size of adversarial perturbation. (default: 16.0)') 27 | parser.add_argument('--steps', type=int, default=None, metavar='N', 28 | help='Number of optimization steps to run attack for (default: 1000)') 29 | parser.add_argument('--search_steps', type=int, default=None, metavar='N', 30 | help='Number of binary search steps to run attack for (default: 6)') 31 | parser.add_argument('--targeted', action='store_true', default=False, 32 | help='Targeted attack') 33 | parser.add_argument('--no_gpu', action='store_true', default=False, 34 | help='Disable GPU training') 35 | parser.add_argument('--debug', action='store_true', default=False, 36 | help='Enable verbose debug output') 37 | 38 | 39 | def main(): 40 | args = parser.parse_args() 41 | attack = AttackCarliniWagnerL2( 42 | targeted=args.targeted, 43 | max_steps=args.steps, 44 | search_steps=args.search_steps, 45 | cuda=not args.no_gpu, 46 | debug=args.debug) 47 | 48 | run_attack(args, attack) 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /run_attack_iter.py: -------------------------------------------------------------------------------- 1 | """Pytorch Iterate Fast-Gradient attack runner. 2 | """ 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import argparse 9 | from attacks import AttackIterative 10 | from attack import run_attack 11 | 12 | parser = argparse.ArgumentParser(description='Defence') 13 | parser.add_argument('--input_dir', metavar='DIR', 14 | help='Input directory with images.') 15 | parser.add_argument('--output_dir', metavar='FILE', 16 | help='Output directory to save images.') 17 | parser.add_argument('--checkpoint_path', default=None, 18 | help='Path to network checkpoint.') 19 | parser.add_argument('--img_size', type=int, default=299, metavar='N', 20 | help='Image patch size (default: 299)') 21 | parser.add_argument('--batch_size', type=int, default=32, metavar='N', 22 | help='Batch size (default: 32)') 23 | parser.add_argument('--max_epsilon', type=int, default=16, metavar='N', 24 | help='Maximum size of adversarial perturbation. (default: 16.0)') 25 | parser.add_argument('--steps', type=int, default=10, metavar='N', 26 | help='Number of steps to run attack for') 27 | parser.add_argument('--step_alpha', type=float, default=0.0, 28 | help='Per step scaling constant, defaults to epsilon/steps') 29 | parser.add_argument('--norm', default='inf', type=float, 30 | help='Gradient norm.') 31 | parser.add_argument('--targeted', action='store_true', default=False, 32 | help='Targeted attack') 33 | parser.add_argument('--no_gpu', action='store_true', default=False, 34 | help='Disable GPU training') 35 | parser.add_argument('--debug', action='store_true', default=False, 36 | help='Enable verbose debug output') 37 | 38 | 39 | def main(): 40 | args = parser.parse_args() 41 | attack = AttackIterative( 42 | targeted=args.targeted, 43 | max_epsilon=args.max_epsilon, 44 | norm=args.norm, 45 | step_alpha=args.step_alpha, 46 | num_steps=args.steps, 47 | cuda=not args.no_gpu, 48 | debug=args.debug) 49 | 50 | run_attack(args, attack) 51 | 52 | if __name__ == '__main__': 53 | main() 54 | --------------------------------------------------------------------------------