├── LICENSE.txt
├── README.md
├── attack.py
├── attacks
    ├── __init__.py
    ├── attack_carlini_wagner_l2.py
    ├── attack_iterative.py
    └── helpers.py
├── dataset.py
├── download_checkpoint.sh
├── metadata.json
├── run_attack.sh
├── run_attack_cwl2.py
└── run_attack_iter.py


/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pytorch-nips2017-attack-example
 2 | 
 3 | This is a baseline targeted (or untargeted) attack that works within the Cleverhans (https://github.com/tensorflow/cleverhans) framework for the NIPS-2017 adversarial competition. 
 4 | 
 5 | There are two types of attacks included, an iterative fast-gradient method, and a Carlini and Wagner L2 attack.
 6 | 
 7 | ## Iterative Fast-Gradient
 8 | 
 9 | These attacks are modeled after the 'basic iterative' / 'itarative FGSM' attack mentioned in https://arxiv.org/abs/1611.01236 and https://arxiv.org/abs/1705.07204 (among others).
10 | 
11 | The default setup is to run a targeted L-inifity norm variant of the targeted attack with 10 steps. L1 or L2 based attacks seem to require around 40-50 steps with the current code to perform a reasonable attack.
12 | 
13 | ## Carlini and Wagner L2
14 | 
15 | An implementation of the L2 variant of the attack described in this paper https://arxiv.org/abs/1608.04644 by Carlini and Wagner. Based on a reference implementation by Carlini at https://github.com/carlini/nn_robust_attacks and  https://github.com/tensorflow/cleverhans/blob/master/cleverhans/attacks_tf.py
16 | 
17 | NOTE: I'm still verifying and experimenting with this attack. It takes MUCH longer (half a day) to run and produces much more subtle results that I'm having difficulty successfully transfering as a targeted attack to other models... 
18 | 
19 | ## Usage
20 | 
21 | To run:
22 | 1. Setup and verify cleverhans nips17 adversarial competition example environment
23 | 2. Clone this repo
24 | 3. Run ./download_checkpoint.sh to download the inceptionv3 checkpoint from torchvision model zoo
25 | 4. Symbolic link the folder this repo was clone into into the cleverhans 'examples/nips17_adversarial_competition/sample_targeted_attacks/' folder
26 | 5. Run run_attacks_and_defenses.sh and ensure '--gpu' flag is added
27 | 
28 | 
29 | To switch between attacks and alter parameters of the attack, command line args in the run_attack.sh script need modification.
30 | 
31 | Iterative non-targeted L1: 
32 | ```
33 | python run_attack_iter.py \
34 |   --input_dir="${INPUT_DIR}" \
35 |   --output_dir="${OUTPUT_DIR}" \
36 |   --max_epsilon="${MAX_EPSILON}" \
37 |   --steps 50 \
38 |   --norm 1 \
39 |   --checkpoint_path=inception_v3_google-1a9a5a14.pth
40 | ```
41 | 
42 | Iterative targeted L2:
43 | ```
44 | python run_attack_iter.py \
45 |   --input_dir="${INPUT_DIR}" \
46 |   --output_dir="${OUTPUT_DIR}" \
47 |   --max_epsilon="${MAX_EPSILON}" \
48 |   --steps 42 \
49 |   --targeted \
50 |   --norm 2 \
51 |   --checkpoint_path=inception_v3_google-1a9a5a14.pth
52 | ```
53 | 
54 | Carlini and Wagner L2:
55 | ```
56 | python run_attack_cwl2.py \
57 |   --input_dir="${INPUT_DIR}" \
58 |   --output_dir="${OUTPUT_DIR}" \
59 |   --max_epsilon="${MAX_EPSILON}" \
60 |   --targeted \
61 |   --checkpoint_path=inception_v3_google-1a9a5a14.pth
62 | ```
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/attack.py:
--------------------------------------------------------------------------------
 1 | """Attack loop
 2 | """
 3 | 
 4 | from __future__ import absolute_import
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | 
 8 | import os
 9 | import numpy as np
10 | import torch
11 | import torchvision
12 | import torch.utils.data as data
13 | 
14 | from scipy.misc import imsave
15 | from dataset import Dataset, default_inception_transform
16 | 
17 | 
18 | def run_attack(args, attack):
19 |     assert args.input_dir
20 | 
21 |     if args.targeted:
22 |         dataset = Dataset(
23 |             args.input_dir,
24 |             transform=default_inception_transform(args.img_size))
25 |     else:
26 |         dataset = Dataset(
27 |             args.input_dir,
28 |             target_file='',
29 |             transform=default_inception_transform(args.img_size))
30 | 
31 |     loader = data.DataLoader(
32 |         dataset,
33 |         batch_size=args.batch_size,
34 |         shuffle=False)
35 | 
36 |     model = torchvision.models.inception_v3(pretrained=False, transform_input=False)
37 |     if not args.no_gpu:
38 |         model = model.cuda()
39 | 
40 |     if args.checkpoint_path is not None and os.path.isfile(args.checkpoint_path):
41 |         checkpoint = torch.load(args.checkpoint_path)
42 |         if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
43 |             model.load_state_dict(checkpoint['state_dict'])
44 |         else:
45 |             model.load_state_dict(checkpoint)
46 |     else:
47 |         print("Error: No checkpoint found at %s." % args.checkpoint_path)
48 | 
49 |     model.eval()
50 | 
51 |     for batch_idx, (input, target) in enumerate(loader):
52 |         if not args.no_gpu:
53 |             input = input.cuda()
54 |             target = target.cuda()
55 | 
56 |         input_adv = attack.run(model, input, target, batch_idx)
57 | 
58 |         start_index = args.batch_size * batch_idx
59 |         indices = list(range(start_index, start_index + input.size(0)))
60 |         for filename, o in zip(dataset.filenames(indices, basename=True), input_adv):
61 |             output_file = os.path.join(args.output_dir, filename)
62 |             imsave(output_file, (o + 1.0) * 0.5, format='png')
63 | 


--------------------------------------------------------------------------------
/attacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .attack_carlini_wagner_l2 import AttackCarliniWagnerL2
2 | from .attack_iterative import AttackIterative


--------------------------------------------------------------------------------
/attacks/attack_carlini_wagner_l2.py:
--------------------------------------------------------------------------------
  1 | """PyTorch Carlini and Wagner L2 attack algorithm.
  2 | 
  3 | Based on paper by Carlini & Wagner, https://arxiv.org/abs/1608.04644 and a reference implementation at
  4 | https://github.com/tensorflow/cleverhans/blob/master/cleverhans/attacks_tf.py
  5 | """
  6 | import os
  7 | import sys
  8 | import torch
  9 | import numpy as np
 10 | from torch import optim
 11 | from torch import autograd
 12 | from .helpers import *
 13 | 
 14 | 
 15 | class AttackCarliniWagnerL2:
 16 | 
 17 |     def __init__(self, targeted=True, search_steps=None, max_steps=None, cuda=True, debug=False):
 18 |         self.debug = debug
 19 |         self.targeted = targeted
 20 |         self.num_classes = 1000
 21 |         self.confidence = 20  # FIXME need to find a good value for this, 0 value used in paper not doing much...
 22 |         self.initial_const = 0.1  # bumped up from default of .01 in reference code
 23 |         self.binary_search_steps = search_steps or 5
 24 |         self.repeat = self.binary_search_steps >= 10
 25 |         self.max_steps = max_steps or 1000
 26 |         self.abort_early = True
 27 |         self.clip_min = -1.
 28 |         self.clip_max = 1.
 29 |         self.cuda = cuda
 30 |         self.clamp_fn = 'tanh'  # set to something else perform a simple clamp instead of tanh
 31 |         self.init_rand = False  # an experiment, does a random starting point help?
 32 | 
 33 |     def _compare(self, output, target):
 34 |         if not isinstance(output, (float, int, np.int64)):
 35 |             output = np.copy(output)
 36 |             if self.targeted:
 37 |                 output[target] -= self.confidence
 38 |             else:
 39 |                 output[target] += self.confidence
 40 |             output = np.argmax(output)
 41 |         if self.targeted:
 42 |             return output == target
 43 |         else:
 44 |             return output != target
 45 | 
 46 |     def _loss(self, output, target, dist, scale_const):
 47 |         # compute the probability of the label class versus the maximum other
 48 |         real = (target * output).sum(1)
 49 |         other = ((1. - target) * output - target * 10000.).max(1)[0]
 50 |         if self.targeted:
 51 |             # if targeted, optimize for making the other class most likely
 52 |             loss1 = torch.clamp(other - real + self.confidence, min=0.)  # equiv to max(..., 0.)
 53 |         else:
 54 |             # if non-targeted, optimize for making this class least likely.
 55 |             loss1 = torch.clamp(real - other + self.confidence, min=0.)  # equiv to max(..., 0.)
 56 |         loss1 = torch.sum(scale_const * loss1)
 57 | 
 58 |         loss2 = dist.sum()
 59 | 
 60 |         loss = loss1 + loss2
 61 |         return loss
 62 | 
 63 |     def _optimize(self, optimizer, model, input_var, modifier_var, target_var, scale_const_var, input_orig=None):
 64 |         # apply modifier and clamp resulting image to keep bounded from clip_min to clip_max
 65 |         if self.clamp_fn == 'tanh':
 66 |             input_adv = tanh_rescale(modifier_var + input_var, self.clip_min, self.clip_max)
 67 |         else:
 68 |             input_adv = torch.clamp(modifier_var + input_var, self.clip_min, self.clip_max)
 69 | 
 70 |         output = model(input_adv)
 71 | 
 72 |         # distance to the original input data
 73 |         if input_orig is None:
 74 |             dist = l2_dist(input_adv, input_var, keepdim=False)
 75 |         else:
 76 |             dist = l2_dist(input_adv, input_orig, keepdim=False)
 77 | 
 78 |         loss = self._loss(output, target_var, dist, scale_const_var)
 79 | 
 80 |         optimizer.zero_grad()
 81 |         loss.backward()
 82 |         optimizer.step()
 83 | 
 84 |         loss_np = loss.data[0]
 85 |         dist_np = dist.data.cpu().numpy()
 86 |         output_np = output.data.cpu().numpy()
 87 |         input_adv_np = input_adv.data.permute(0, 2, 3, 1).cpu().numpy()  # back to BHWC for numpy consumption
 88 |         return loss_np, dist_np, output_np, input_adv_np
 89 | 
 90 |     def run(self, model, input, target, batch_idx=0):
 91 |         batch_size = input.size(0)
 92 | 
 93 |         # set the lower and upper bounds accordingly
 94 |         lower_bound = np.zeros(batch_size)
 95 |         scale_const = np.ones(batch_size) * self.initial_const
 96 |         upper_bound = np.ones(batch_size) * 1e10
 97 | 
 98 |         # python/numpy placeholders for the overall best l2, label score, and adversarial image
 99 |         o_best_l2 = [1e10] * batch_size
100 |         o_best_score = [-1] * batch_size
101 |         o_best_attack = input.permute(0, 2, 3, 1).cpu().numpy()
102 | 
103 |         # setup input (image) variable, clamp/scale as necessary
104 |         if self.clamp_fn == 'tanh':
105 |             # convert to tanh-space, input already int -1 to 1 range, does it make sense to do
106 |             # this as per the reference implementation or can we skip the arctanh?
107 |             input_var = autograd.Variable(torch_arctanh(input), requires_grad=False)
108 |             input_orig = tanh_rescale(input_var, self.clip_min, self.clip_max)
109 |         else:
110 |             input_var = autograd.Variable(input, requires_grad=False)
111 |             input_orig = None
112 | 
113 |         # setup the target variable, we need it to be in one-hot form for the loss function
114 |         target_onehot = torch.zeros(target.size() + (self.num_classes,))
115 |         if self.cuda:
116 |             target_onehot = target_onehot.cuda()
117 |         target_onehot.scatter_(1, target.unsqueeze(1), 1.)
118 |         target_var = autograd.Variable(target_onehot, requires_grad=False)
119 | 
120 |         # setup the modifier variable, this is the variable we are optimizing over
121 |         modifier = torch.zeros(input_var.size()).float()
122 |         if self.init_rand:
123 |             # Experiment with a non-zero starting point...
124 |             modifier = torch.normal(means=modifier, std=0.001)
125 |         if self.cuda:
126 |             modifier = modifier.cuda()
127 |         modifier_var = autograd.Variable(modifier, requires_grad=True)
128 | 
129 |         optimizer = optim.Adam([modifier_var], lr=0.0005)
130 | 
131 |         for search_step in range(self.binary_search_steps):
132 |             print('Batch: {0:>3}, search step: {1}'.format(batch_idx, search_step))
133 |             if self.debug:
134 |                 print('Const:')
135 |                 for i, x in enumerate(scale_const):
136 |                     print(i, x)
137 |             best_l2 = [1e10] * batch_size
138 |             best_score = [-1] * batch_size
139 | 
140 |             # The last iteration (if we run many steps) repeat the search once.
141 |             if self.repeat and search_step == self.binary_search_steps - 1:
142 |                 scale_const = upper_bound
143 | 
144 |             scale_const_tensor = torch.from_numpy(scale_const).float()
145 |             if self.cuda:
146 |                 scale_const_tensor = scale_const_tensor.cuda()
147 |             scale_const_var = autograd.Variable(scale_const_tensor, requires_grad=False)
148 | 
149 |             prev_loss = 1e6
150 |             for step in range(self.max_steps):
151 |                 # perform the attack
152 |                 loss, dist, output, adv_img = self._optimize(
153 |                     optimizer,
154 |                     model,
155 |                     input_var,
156 |                     modifier_var,
157 |                     target_var,
158 |                     scale_const_var,
159 |                     input_orig)
160 | 
161 |                 if step % 100 == 0 or step == self.max_steps - 1:
162 |                     print('Step: {0:>4}, loss: {1:6.4f}, dist: {2:8.5f}, modifier mean: {3:.5e}'.format(
163 |                         step, loss, dist.mean(), modifier_var.data.mean()))
164 | 
165 |                 if self.abort_early and step % (self.max_steps // 10) == 0:
166 |                     if loss > prev_loss * .9999:
167 |                         print('Aborting early...')
168 |                         break
169 |                     prev_loss = loss
170 | 
171 |                 # update best result found
172 |                 for i in range(batch_size):
173 |                     target_label = target[i]
174 |                     output_logits = output[i]
175 |                     output_label = np.argmax(output_logits)
176 |                     di = dist[i]
177 |                     if self.debug:
178 |                         if step % 100 == 0:
179 |                             print('{0:>2} dist: {1:.5f}, output: {2:>3}, {3:5.3}, target {4:>3}'.format(
180 |                                 i, di, output_label, output_logits[output_label], target_label))
181 |                     if di < best_l2[i] and self._compare(output_logits, target_label):
182 |                         if self.debug:
183 |                             print('{0:>2} best step,  prev dist: {1:.5f}, new dist: {2:.5f}'.format(
184 |                                   i, best_l2[i], di))
185 |                         best_l2[i] = di
186 |                         best_score[i] = output_label
187 |                     if di < o_best_l2[i] and self._compare(output_logits, target_label):
188 |                         if self.debug:
189 |                             print('{0:>2} best total, prev dist: {1:.5f}, new dist: {2:.5f}'.format(
190 |                                   i, o_best_l2[i], di))
191 |                         o_best_l2[i] = di
192 |                         o_best_score[i] = output_label
193 |                         o_best_attack[i] = adv_img[i]
194 | 
195 |                 sys.stdout.flush()
196 |                 # end inner step loop
197 | 
198 |             # adjust the constants
199 |             batch_failure = 0
200 |             batch_success = 0
201 |             for i in range(batch_size):
202 |                 if self._compare(best_score[i], target[i]) and best_score[i] != -1:
203 |                     # successful, do binary search and divide const by two
204 |                     upper_bound[i] = min(upper_bound[i], scale_const[i])
205 |                     if upper_bound[i] < 1e9:
206 |                         scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2
207 |                     if self.debug:
208 |                         print('{0:>2} successful attack, lowering const to {1:.3f}'.format(
209 |                             i, scale_const[i]))
210 |                 else:
211 |                     # failure, multiply by 10 if no solution found
212 |                     # or do binary search with the known upper bound
213 |                     lower_bound[i] = max(lower_bound[i], scale_const[i])
214 |                     if upper_bound[i] < 1e9:
215 |                         scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2
216 |                     else:
217 |                         scale_const[i] *= 10
218 |                     if self.debug:
219 |                         print('{0:>2} failed attack, raising const to {1:.3f}'.format(
220 |                             i, scale_const[i]))
221 |                 if self._compare(o_best_score[i], target[i]) and o_best_score[i] != -1:
222 |                     batch_success += 1
223 |                 else:
224 |                     batch_failure += 1
225 | 
226 |             print('Num failures: {0:2d}, num successes: {1:2d}\n'.format(batch_failure, batch_success))
227 |             sys.stdout.flush()
228 |             # end outer search loop
229 | 
230 |         return o_best_attack
231 | 


--------------------------------------------------------------------------------
/attacks/attack_iterative.py:
--------------------------------------------------------------------------------
 1 | """Pytorch Iterative Fast-Gradient attack algorithm
 2 | """
 3 | import sys
 4 | import torch
 5 | from torch import autograd
 6 | from torch.autograd.gradcheck import zero_gradients
 7 | from .helpers import *
 8 | 
 9 | 
10 | class AttackIterative:
11 | 
12 |     def __init__(
13 |             self,
14 |             targeted=True, max_epsilon=16, norm=float('inf'),
15 |             step_alpha=None, num_steps=None, cuda=True, debug=False):
16 | 
17 |         self.targeted = targeted
18 |         self.eps = 2.0 * max_epsilon / 255.0
19 |         self.num_steps = num_steps or 10
20 |         self.norm = norm
21 |         if not step_alpha:
22 |             if norm == float('inf'):
23 |                 self.step_alpha = self.eps / self.num_steps
24 |             else:
25 |                 # Different scaling required for L2 and L1 norms to get anywhere
26 |                 if norm == 1:
27 |                     self.step_alpha = 500.0  # L1 needs a lot of (arbitrary) love
28 |                 else:
29 |                     self.step_alpha = 1.0
30 |         else:
31 |             self.step_alpha = step_alpha
32 |         self.loss_fn = torch.nn.CrossEntropyLoss()
33 |         if cuda:
34 |             self.loss_fn = self.loss_fn.cuda()
35 |         self.debug = debug
36 | 
37 |     def run(self, model, input, target, batch_idx=0):
38 |         input_var = autograd.Variable(input, requires_grad=True)
39 |         target_var = autograd.Variable(target)
40 |         eps = self.eps
41 |         step_alpha = self.step_alpha
42 | 
43 |         step = 0
44 |         while step < self.num_steps:
45 |             zero_gradients(input_var)
46 |             output = model(input_var)
47 |             if not self.targeted and not step:
48 |                 # for non-targeted, we'll move away from most likely
49 |                 target_var.data = output.data.max(1)[1]
50 |             loss = self.loss_fn(output, target_var)
51 |             loss.backward()
52 | 
53 |             # normalize and scale gradient
54 |             if self.norm == 2:
55 |                 normed_grad = step_alpha * input_var.grad.data / l2_norm(input_var.grad.data)
56 |             elif self.norm == 1:
57 |                 normed_grad = step_alpha * input_var.grad.data / l1_norm(input_var.grad.data)
58 |             else:
59 |                 # infinity-norm
60 |                 normed_grad = step_alpha * torch.sign(input_var.grad.data)
61 | 
62 |             # perturb current input image by normalized and scaled gradient
63 |             if self.targeted:
64 |                 step_adv = input_var.data - normed_grad
65 |             else:
66 |                 step_adv = input_var.data + normed_grad
67 | 
68 |             # calculate total adversarial perturbation from original image and clip to epsilon constraints
69 |             total_adv = step_adv - input
70 |             if self.norm == 2:
71 |                 # total_adv = eps * total_adv / l2norm(total_adv)
72 |                 total_adv = torch.clamp(total_adv, -eps, eps)
73 |             elif self.norm == 1:
74 |                 # total_adv = eps * total_adv / l1norm(total_adv)
75 |                 total_adv = torch.clamp(total_adv, -eps, eps)
76 |             else:
77 |                 # infinity-norm
78 |                 total_adv = torch.clamp(total_adv, -eps, eps)
79 | 
80 |             if self.debug:
81 |                 print('batch:', batch_idx, 'step:', step, total_adv.mean(), total_adv.min(), total_adv.max())
82 |                 sys.stdout.flush()
83 | 
84 |             # apply total adversarial perturbation to original image and clip to valid pixel range
85 |             input_adv = input + total_adv
86 |             input_adv = torch.clamp(input_adv, -1.0, 1.0)
87 |             input_var.data = input_adv
88 |             step += 1
89 | 
90 |         return input_adv.permute(0, 2, 3, 1).cpu().numpy()
91 | 


--------------------------------------------------------------------------------
/attacks/helpers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import operator as op
 3 | import functools as ft
 4 | 
 5 | 
 6 | '''reduce_* helper functions reduce tensors on all dimensions but the first.
 7 | They are intended to be used on batched tensors where dim 0 is the batch dim.
 8 | '''
 9 | 
10 | 
11 | def reduce_sum(x, keepdim=True):
12 |     # silly PyTorch, when will you get proper reducing sums/means?
13 |     for a in reversed(range(1, x.dim())):
14 |         x = x.sum(a, keepdim=keepdim)
15 |     return x
16 | 
17 | 
18 | def reduce_mean(x, keepdim=True):
19 |     numel = ft.reduce(op.mul, x.size()[1:])
20 |     x = reduce_sum(x, keepdim=keepdim)
21 |     return x / numel
22 | 
23 | 
24 | def reduce_min(x, keepdim=True):
25 |     for a in reversed(range(1, x.dim())):
26 |         x = x.min(a, keepdim=keepdim)[0]
27 |     return x
28 | 
29 | 
30 | def reduce_max(x, keepdim=True):
31 |     for a in reversed(range(1, x.dim())):
32 |         x = x.max(a, keepdim=keepdim)[0]
33 |     return x
34 | 
35 | 
36 | def torch_arctanh(x, eps=1e-6):
37 |     x *= (1. - eps)
38 |     return (torch.log((1 + x) / (1 - x))) * 0.5
39 | 
40 | 
41 | def l2r_dist(x, y, keepdim=True, eps=1e-8):
42 |     d = (x - y)**2
43 |     d = reduce_sum(d, keepdim=keepdim)
44 |     d += eps  # to prevent infinite gradient at 0
45 |     return d.sqrt()
46 | 
47 | 
48 | def l2_dist(x, y, keepdim=True):
49 |     d = (x - y)**2
50 |     return reduce_sum(d, keepdim=keepdim)
51 | 
52 | 
53 | def l1_dist(x, y, keepdim=True):
54 |     d = torch.abs(x - y)
55 |     return reduce_sum(d, keepdim=keepdim)
56 | 
57 | 
58 | def l2_norm(x, keepdim=True):
59 |     norm = reduce_sum(x*x, keepdim=keepdim)
60 |     return norm.sqrt()
61 | 
62 | 
63 | def l1_norm(x, keepdim=True):
64 |     return reduce_sum(x.abs(), keepdim=keepdim)
65 | 
66 | 
67 | def rescale(x, x_min=-1., x_max=1.):
68 |     return x * (x_max - x_min) + x_min
69 | 
70 | 
71 | def tanh_rescale(x, x_min=-1., x_max=1.):
72 |     return (torch.tanh(x) + 1) * 0.5 * (x_max - x_min) + x_min
73 | 


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import torch
 4 | import pandas as pd
 5 | 
 6 | import torch.utils.data as data
 7 | import torchvision.transforms as transforms
 8 | from PIL import Image
 9 | 
10 | IMG_EXTENSIONS = ['.png', '.jpg']
11 | 
12 | 
13 | class LeNormalize(object):
14 |     """Normalize to -1..1 in Google Inception style
15 |     """
16 |     def __call__(self, tensor):
17 |         for t in tensor:
18 |             t.sub_(0.5).mul_(2.0)
19 |         return tensor
20 | 
21 | 
22 | def default_inception_transform(img_size):
23 |     tf = transforms.Compose([
24 |         transforms.Scale(img_size),
25 |         transforms.CenterCrop(img_size),
26 |         transforms.ToTensor(),
27 |         LeNormalize(),
28 |     ])
29 |     return tf
30 | 
31 | 
32 | def find_inputs(folder, filename_to_target=None, types=IMG_EXTENSIONS):
33 |     inputs = []
34 |     for root, _, files in os.walk(folder, topdown=False):
35 |         for rel_filename in files:
36 |             base, ext = os.path.splitext(rel_filename)
37 |             if ext.lower() in types:
38 |                 abs_filename = os.path.join(root, rel_filename)
39 |                 target = filename_to_target[rel_filename] if filename_to_target else 0
40 |                 inputs.append((abs_filename, target))
41 |     return inputs
42 | 
43 | 
44 | class Dataset(data.Dataset):
45 | 
46 |     def __init__(
47 |             self,
48 |             root,
49 |             target_file='target_class.csv',
50 |             transform=None):
51 | 
52 |         if target_file:
53 |             target_df = pd.read_csv(os.path.join(root, target_file), header=None)
54 |             f_to_t = dict(zip(target_df[0], target_df[1] - 1))  # -1 for 0-999 class ids
55 |         else:
56 |             f_to_t = dict()
57 |         imgs = find_inputs(root, filename_to_target=f_to_t)
58 |         if len(imgs) == 0:
59 |             raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n"
60 |                                "Supported image extensions are: " + ",".join(IMG_EXTENSIONS)))
61 | 
62 |         self.root = root
63 |         self.imgs = imgs
64 |         self.transform = transform
65 | 
66 |     def __getitem__(self, index):
67 |         path, target = self.imgs[index]
68 |         img = Image.open(path).convert('RGB')
69 |         if self.transform is not None:
70 |             img = self.transform(img)
71 |         if target is None:
72 |             target = torch.zeros(1).long()
73 |         return img, target
74 | 
75 |     def __len__(self):
76 |         return len(self.imgs)
77 | 
78 |     def set_transform(self, transform):
79 |         self.transform = transform
80 | 
81 |     def filenames(self, indices=[], basename=False):
82 |         if indices:
83 |             if basename:
84 |                 return [os.path.basename(self.imgs[i][0]) for i in indices]
85 |             else:
86 |                 return [self.imgs[i][0] for i in indices]
87 |         else:
88 |             if basename:
89 |                 return [os.path.basename(x[0]) for x in self.imgs]
90 |             else:
91 |                 return [x[0] for x in self.imgs]
92 | 


--------------------------------------------------------------------------------
/download_checkpoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | wget https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth
4 | 


--------------------------------------------------------------------------------
/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "type": "targeted_attack",
3 |   "container": "rwightman/pytorch-extra",
4 |   "container_gpu": "rwightman/pytorch-extra",
5 |   "entry_point": "run_attack.sh"
6 | }
7 | 


--------------------------------------------------------------------------------
/run_attack.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # run_attack.sh is a script which executes the attack
 4 | #
 5 | # Envoronment which runs attacks and defences calls it in a following way:
 6 | #   run_attack.sh INPUT_DIR OUTPUT_DIR MAX_EPSILON
 7 | # where:
 8 | #   INPUT_DIR - directory with input PNG images
 9 | #   OUTPUT_DIR - directory where adversarial images should be written
10 | #   MAX_EPSILON - maximum allowed L_{\infty} norm of adversarial perturbation
11 | #
12 | 
13 | INPUT_DIR=$1
14 | OUTPUT_DIR=$2
15 | MAX_EPSILON=$3
16 | 
17 | python run_attack_iter.py \
18 |   --input_dir="${INPUT_DIR}" \
19 |   --output_dir="${OUTPUT_DIR}" \
20 |   --max_epsilon="${MAX_EPSILON}" \
21 |   --targeted \
22 |   --checkpoint_path=inception_v3_google-1a9a5a14.pth
23 | 
24 | 


--------------------------------------------------------------------------------
/run_attack_cwl2.py:
--------------------------------------------------------------------------------
 1 | """Pytorch Carlini and Wagner L2 attack runner.
 2 | 
 3 | """
 4 | 
 5 | from __future__ import absolute_import
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | import argparse
10 | 
11 | from attacks import AttackCarliniWagnerL2
12 | from attack import run_attack
13 | 
14 | parser = argparse.ArgumentParser(description='Defence')
15 | parser.add_argument('--input_dir', metavar='DIR',
16 |                     help='Input directory with images.')
17 | parser.add_argument('--output_dir', metavar='FILE',
18 |                     help='Output directory to save images.')
19 | parser.add_argument('--checkpoint_path', default=None,
20 |                     help='Path to network checkpoint.')
21 | parser.add_argument('--img_size', type=int, default=299, metavar='N',
22 |                     help='Image patch size (default: 299)')
23 | parser.add_argument('--batch_size', type=int, default=32, metavar='N',
24 |                     help='Batch size (default: 32)')
25 | parser.add_argument('--max_epsilon', type=int, default=16, metavar='N',
26 |                     help='Maximum size of adversarial perturbation. (default: 16.0)')
27 | parser.add_argument('--steps', type=int, default=None, metavar='N',
28 |                     help='Number of optimization steps to run attack for (default: 1000)')
29 | parser.add_argument('--search_steps', type=int, default=None, metavar='N',
30 |                     help='Number of binary search steps to run attack for (default: 6)')
31 | parser.add_argument('--targeted', action='store_true', default=False,
32 |                     help='Targeted attack')
33 | parser.add_argument('--no_gpu', action='store_true', default=False,
34 |                     help='Disable GPU training')
35 | parser.add_argument('--debug', action='store_true', default=False,
36 |                     help='Enable verbose debug output')
37 | 
38 | 
39 | def main():
40 |     args = parser.parse_args()
41 |     attack = AttackCarliniWagnerL2(
42 |         targeted=args.targeted,
43 |         max_steps=args.steps,
44 |         search_steps=args.search_steps,
45 |         cuda=not args.no_gpu,
46 |         debug=args.debug)
47 | 
48 |     run_attack(args, attack)
49 | 
50 | if __name__ == '__main__':
51 |     main()
52 | 


--------------------------------------------------------------------------------
/run_attack_iter.py:
--------------------------------------------------------------------------------
 1 | """Pytorch Iterate Fast-Gradient attack runner.
 2 | """
 3 | 
 4 | from __future__ import absolute_import
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | 
 8 | import argparse
 9 | from attacks import AttackIterative
10 | from attack import run_attack
11 | 
12 | parser = argparse.ArgumentParser(description='Defence')
13 | parser.add_argument('--input_dir', metavar='DIR',
14 |                     help='Input directory with images.')
15 | parser.add_argument('--output_dir', metavar='FILE',
16 |                     help='Output directory to save images.')
17 | parser.add_argument('--checkpoint_path', default=None,
18 |                     help='Path to network checkpoint.')
19 | parser.add_argument('--img_size', type=int, default=299, metavar='N',
20 |                     help='Image patch size (default: 299)')
21 | parser.add_argument('--batch_size', type=int, default=32, metavar='N',
22 |                     help='Batch size (default: 32)')
23 | parser.add_argument('--max_epsilon', type=int, default=16, metavar='N',
24 |                     help='Maximum size of adversarial perturbation. (default: 16.0)')
25 | parser.add_argument('--steps', type=int, default=10, metavar='N',
26 |                     help='Number of steps to run attack for')
27 | parser.add_argument('--step_alpha', type=float, default=0.0,
28 |                     help='Per step scaling constant, defaults to epsilon/steps')
29 | parser.add_argument('--norm', default='inf', type=float,
30 |                     help='Gradient norm.')
31 | parser.add_argument('--targeted', action='store_true', default=False,
32 |                     help='Targeted attack')
33 | parser.add_argument('--no_gpu', action='store_true', default=False,
34 |                     help='Disable GPU training')
35 | parser.add_argument('--debug', action='store_true', default=False,
36 |                     help='Enable verbose debug output')
37 | 
38 | 
39 | def main():
40 |     args = parser.parse_args()
41 |     attack = AttackIterative(
42 |         targeted=args.targeted,
43 |         max_epsilon=args.max_epsilon,
44 |         norm=args.norm,
45 |         step_alpha=args.step_alpha,
46 |         num_steps=args.steps,
47 |         cuda=not args.no_gpu,
48 |         debug=args.debug)
49 | 
50 |     run_attack(args, attack)
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------