├── .gitignore
├── README.md
├── adversarial_attack_DRO.py
├── analysis_main.py
├── generate_sample_adversarial.py
├── loss_functions.py
├── sanity_check_attack.py
├── sanity_check_foolbox.py
├── sanity_check_keras.py
├── util_MNIST.py
├── util_adversarial_attack.py
├── util_adversarial_training.py
├── util_analysis.py
└── util_model.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | __pycache__/
 3 | data/
 4 | DRO_models/
 5 | ERM_models/
 6 | Loss_models/
 7 | images/
 8 | sample_images/
 9 | records/
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Optimal loss functions for distributionally robust optimization (DRO) of neural networks
 2 | 
 3 | ## Project overview
 4 | 
 5 | The code in this repository is used to conduct empirical studies for the Part C Computer Science project at the University of Oxford.
 6 | 
 7 | As neural networks are increasingly widely applied in safety-critical systems (e.g. autonomous vehicles), it is essential to ensure safety of systems involving neural networks. 
 8 | It has been discovered that despite their stellar generalization perfoemance, neural networks are surprisingly vulnerable to so-called adversarial perturbations in computer vision; i.e. small and oftentimes imperceptible perturbations to an input image that can trick the neural networks into misclassification of the image. 
 9 | One promising approach to improving robutness of neural networks to adversarial perturbations is adversarial training, whereby neural networks are trained using not only the original training data but also adversarial examples that can be generated from the training data. 
10 | 
11 | In this project, I investigate the relationship between (i) loss functions used in training feedforward neural networks and (ii) the robustness of neural networks that are trained by distributionally robust optimization (DRO), which is a variant of adversarial traning. 
12 | 
13 | I specifically consider the following DRO algorithms:
14 | 1. WRM developed by Sinha et al. ([paper](https://arxiv.org/abs/1710.10571))
15 | 2. FWDRO developed by Staib and Jegelka ([paper](https://machine-learning-and-security.github.io/papers/mlsec17_paper_30.pdf))
16 | 3. Distributional projected gradient descent (PGD). 
17 | 
18 | The loss functions examined in this project come from the paper by Carlini and Wagner ([paper](https://arxiv.org/abs/1608.04644)).
19 | 


--------------------------------------------------------------------------------
/adversarial_attack_DRO.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | from util_MNIST import retrieveMNISTTrainingData
  7 | from util_model import SimpleNeuralNet, MNISTClassifier
  8 | from util_adversarial_training import AdversarialTraining
  9 | 
 10 | """
 11 | This module contains the implementation of the three DRO algorithms 
 12 | studied in this project: FWDRO by Staib et al., WRM by Sinha et al.,
 13 | and distributional PGD. 
 14 | """
 15 | 
 16 | class ProjetcedDRO(AdversarialTraining):
 17 |     """
 18 |     Execute distributionally robust optimization (DRO) using the Euclidean
 19 |     projection in the adversarial attack. This class is applicable only when
 20 |     the underlying distance is the L2-norm and the distributional distance is
 21 |     the 2-Wasserstein distance (i.e. W2). 
 22 |     """
 23 | 
 24 |     def __init__(self, model, loss_criterion):
 25 |         super().__init__(model, loss_criterion)
 26 | 
 27 |     def attack(self, budget, data, steps=15):
 28 |             
 29 |         def randomStart(center, epsilon):
 30 |             """
 31 |             Select a random point that is on the perimeter of a L2-ball. 
 32 |             This point is where the L2-norm-ball constraint is tight. 
 33 | 
 34 |             Arguments:
 35 |                 center: origin of the L2-ball
 36 |                 epsilon: radius of the L2-ball
 37 |             Returns:
 38 |                 None
 39 | 
 40 |                 The input 'center' is modified in place. 
 41 |             """
 42 | 
 43 |             direction = torch.rand(center.size()) * 2 - 1
 44 |             direction = direction.to(self.device)
 45 |             length = torch.norm(direction, p=2).item()
 46 |             center.data.add_(epsilon / length * direction)
 47 |             center.data.clamp_(0, 1)
 48 | 
 49 |         lr = budget / 5
 50 |         images, labels = data
 51 |         # Load an initialized batch of adversarial examples on a device
 52 |         images_adv = images.clone().detach().to(self.device)
 53 |         images_adv.requires_grad_(True)
 54 | 
 55 |         # images.size()[0] corresponds to the batch size.
 56 |         desirable_distance = budget * math.sqrt(images.size()[0])
 57 | 
 58 |         # Choose a random strating point where the constraint for perturbations
 59 |         # is tight. Without randomly choosing a starting point, the adversarial
 60 |         # attack fails most of the time because the loss function is flat near
 61 |         # the training input, which was used in training the neural network.
 62 |         randomStart(images_adv, budget)
 63 |         for i in range(steps):
 64 |             if images_adv.grad is not None:
 65 |                 images_adv.grad.data.zero_()
 66 |             outputs = self.model(images_adv)
 67 |             loss = self.loss_criterion(outputs, labels)
 68 |             loss.backward()
 69 |             images_adv.data.add_(lr * images_adv.grad)
 70 |             diff_tensor = images.detach() - images_adv.detach()
 71 |             diff_tensor = diff_tensor.to(self.device)
 72 |             distance = torch.norm(diff_tensor, p=2).item()
 73 | 
 74 |             # Inside this conditional statement, we can be certain that
 75 |             # distance > 0, provided that budget > 0.
 76 |             # Hence, there is no risk of division by 0.
 77 |             if distance > desirable_distance:
 78 |                 images_adv.data.add_(
 79 |                     (1 - (desirable_distance / distance)) * diff_tensor)
 80 |             images_adv.data.clamp_(0, 1)
 81 |         return images_adv, labels
 82 | 
 83 | 
 84 | class LagrangianDRO(AdversarialTraining):
 85 |     """
 86 |     Execute DRO using the Lagrangian relaxation of the original theoretical
 87 |     formulation of DRO. This approach is developed by Sinha, Namkoong, and
 88 |     Duchi (2018). 
 89 |     """
 90 | 
 91 |     def __init__(self, model, loss_criterion, cost_function):
 92 |         """
 93 |         Initialize instance variables
 94 | 
 95 |         Arguments:
 96 |             cost_function: underlying distance metric for the instance space
 97 |         """
 98 | 
 99 |         super().__init__(model, loss_criterion)
100 |         self.cost_function = cost_function
101 | 
102 |     def attack(self, budget, data, steps=15):
103 |         """
104 |         Launch an adversarial attack using the Lagrangian relaxation.
105 | 
106 |         Arguments:
107 |             budget: gamma in the original paper. Note that this parameter is
108 |                 different from the budget parameter in other DRO classes. 
109 |         """
110 | 
111 |         images, labels = data
112 |         images_adv = images.clone().detach().to(self.device)
113 |         images_adv.requires_grad_(True)
114 | 
115 |         for i in range(steps):
116 |             if images_adv.grad is not None:
117 |                 images_adv.grad.data.zero_()
118 |             outputs = self.model(images_adv)
119 |             loss = self.loss_criterion(
120 |                 outputs, labels) - budget * self.cost_function(images, images_adv)
121 |             loss.backward()
122 |             images_adv.data.add_(1 / math.sqrt(i+1) * images_adv.grad)
123 |             images_adv.data.clamp_(0, 1)
124 |         return images_adv, labels
125 | 
126 | 
127 | class FrankWolfeDRO(AdversarialTraining):
128 |     """
129 |     Execute DRO using the Frank-Wolfe method together with the stochastic
130 |     block coordinate descent (BCD). This approach is developed by Staib and
131 |     Jegelka (2017). 
132 |     """
133 | 
134 |     def __init__(self, model, loss_criterion, p, q):
135 |         """
136 |         Initialize instance variables.
137 | 
138 |         Arguments:
139 |             p: distributional distance will be Wp
140 |             q: underlying distance for the instance space will be Lq
141 |         """
142 | 
143 |         super().__init__(model, loss_criterion)
144 |         assert p > 1 and q > 1
145 |         self.p = p
146 |         self.q = q
147 | 
148 |     def attack(self, budget, data, steps=15):
149 |         """
150 |         Launch an adversarial attack using the Frank-Wolfe method.
151 |         The algorithm is taken from 'Convex Optimization: Algorithms and
152 |         Complexity' by Bubeck. 
153 |         """
154 | 
155 |         images, labels = data
156 |         images_adv = images.clone().detach().to(self.device)
157 |         images_adv.requires_grad_(True)
158 | 
159 |         for i in range(steps):
160 |             if images_adv.grad is not None:
161 |                 images_adv.grad.zero_()
162 |             outputs = self.model(images_adv)
163 |             loss = self.loss_criterion(outputs, labels)
164 |             loss.backward()
165 | 
166 |             # desitnation corresponds to y_t in the paper by Bubeck.
167 |             destination = images_adv.data + \
168 |                 self.getOptimalDirection(budget=budget, data=images_adv.grad)
169 |             destination = destination.to(self.device)
170 |             gamma = 2 / (i + 2)
171 |             images_adv.data = (1 - gamma) * \
172 |                 images_adv.data + gamma * destination
173 |             images_adv.data.clamp_(0, 1)
174 |         return images_adv, labels
175 | 
176 |     def getOptimalDirection(self, budget, data):
177 |         """
178 |         Calculate the minimizer of a linear subproblem in the Frank-Wolfe
179 |         method. The objective function is linear, and the constraint is
180 |         a mixed-norm ball.
181 | 
182 |         Instead of calculating a local constraint, I use the same budget
183 |         parameter in every iteration. 
184 | 
185 |         Arguments:
186 |             budget: epsilon in the paper by Staib et al.
187 |             data: gradient of the total loss with respect to the current
188 |                 batch of adversarial examples. This corresponds to C in
189 |                 Appendix B of the paper by Staib et al. 
190 | 
191 |         Returns:
192 |             X in Appendix B of Staib et al.'s paper 
193 |         """
194 | 
195 |         # The number of samples
196 |         batch_size = data.size()[0]
197 | 
198 |         # 'directions' corresponds to v's in Staib et al.'s paper.
199 |         directions = data.clone().detach().view((batch_size, -1))
200 |         directions = directions.to(self.device)
201 | 
202 |         if self.q == np.inf:
203 |             directions = directions.sign()
204 |         elif self.q > 1:
205 |             normalize_dim = 1 / (self.q - 1)
206 |             directions.pow_(normalize_dim)
207 |             directions = F.normalize(directions, p=self.q, dim=1)
208 |         else:
209 |             raise ValueError("The value of q must be larger than 1.")
210 | 
211 |         # This corresponds to a's in the original paper.
212 |         products = []
213 |         for i, direction in enumerate(directions):
214 |             sample = data[i].view(-1)
215 |             products.append(torch.dot(direction, sample))
216 |         products = torch.stack(products)
217 |         products = products.to(self.device)
218 | 
219 |         # This corresponds to epsilons in the original paper.
220 |         size_factors = products.clone().detach()
221 |         size_factors = size_factors.to(self.device)
222 |         if self.p == np.inf:
223 |             size_factors = size_factors.sign()
224 |         elif self.p > 1:
225 |             normalize_dim = 1 / (self.p - 1)
226 |             size_factors.pow_(normalize_dim)
227 |             distance = torch.norm(size_factors, p=self.p).item()
228 |             size_factors = size_factors / distance  # This is now normalized.
229 |         else:
230 |             raise ValueError("The value of p must be larger than 1.")
231 | 
232 |         outputs = []
233 |         for i, size_factor in enumerate(size_factors):
234 |             outputs.append(directions[i] * size_factor * budget)
235 |         result = torch.stack(outputs).view(data.size())
236 |         return result.to(self.device)
237 | 
238 | 
239 | def trainDROModel(dro_type, epochs, steps_adv, budget, activation, batch_size, loss_criterion, cost_function=None):
240 |     """
241 |     Train a neural network using one of the following DRO methods:
242 |         - PGD
243 |         - Lagrangian relaxation based method developed by Sinha et al. 
244 |             This is also called WRM.
245 |         - the Frank-Wolfe method based approach developed by Staib et al. 
246 |     """
247 | 
248 |     model = MNISTClassifier(activation=activation)
249 |     if dro_type == 'PGD':
250 |         train_module = ProjetcedDRO(model, loss_criterion)
251 |     elif dro_type == 'Lag':
252 |         assert cost_function is not None
253 |         train_module = LagrangianDRO(model, loss_criterion, cost_function)
254 |     elif dro_type == 'FW':
255 |         train_module = FrankWolfeDRO(model, loss_criterion, p=2, q=2)
256 |     else:
257 |         raise ValueError("The type of DRO is not valid.")
258 | 
259 |     train_module.train(budget=budget, batch_size=batch_size,
260 |                        epochs=epochs, steps_adv=steps_adv)
261 |     folderpath = "./DRO_models/"
262 |     filepath = folderpath + \
263 |         "{}_DRO_activation={}_epsilon={}.pt".format(
264 |             dro_type, activation, budget)
265 |     torch.save(model.state_dict(), filepath)
266 |     print("A neural network adversarially trained using {} is now saved at {}.".format(
267 |         dro_type, filepath))
268 | 
269 | 
270 | if __name__ == "__main__":
271 |     epochs = 25
272 |     steps_adv = 15
273 |     epsilon = 2.8
274 |     gammas = [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0]
275 |     batch_size = 128
276 |     loss_criterion = nn.CrossEntropyLoss()
277 | 
278 |     def cost_function(x, y): return torch.dist(x, y, p=2) ** 2
279 | 
280 |     trainDROModel('PGD', epochs, steps_adv, epsilon,
281 |                   'relu', batch_size, loss_criterion)
282 |     trainDROModel('FW', epochs, steps_adv, epsilon,
283 |                   'relu', batch_size, loss_criterion)
284 | 
285 |     trainDROModel('PGD', epochs, steps_adv, epsilon,
286 |                   'elu', batch_size, loss_criterion)
287 |     trainDROModel('FW', epochs, steps_adv, epsilon,
288 |                   'elu', batch_size, loss_criterion)
289 | 
290 |     for gamma in gammas:
291 |         trainDROModel('Lag', epochs, steps_adv, gamma, 'relu',
292 |                       batch_size, loss_criterion, cost_function=cost_function)
293 |         trainDROModel('Lag', epochs, steps_adv, gamma, 'elu',
294 |                       batch_size, loss_criterion, cost_function=cost_function)
295 | 


--------------------------------------------------------------------------------
/analysis_main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from util_model import MNISTClassifier
  4 | from util_analysis import Analysis, AnalysisMulitpleModels
  5 | 
  6 | """
  7 | This module contains classes for robustness analysis of neural networks. 
  8 | """
  9 | 
 10 | 
 11 | class ERMModelsAnalysis(AnalysisMulitpleModels):
 12 | 
 13 |     """
 14 |     Class for the robustness analysis on neural networks trained by ERM.
 15 |     """
 16 | 
 17 |     def __init__(self):
 18 |         model_relu = MNISTClassifier(activation='relu')
 19 |         model_elu = MNISTClassifier(activation='elu')
 20 |         model_sgd_relu = MNISTClassifier(activation='relu')
 21 |         model_sgd_elu = MNISTClassifier(activation='elu')
 22 | 
 23 |         # These file paths only work on UNIX.
 24 |         folderpath = "./ERM_models/"
 25 |         filename_relu = "MNISTClassifier_adam_relu.pt"
 26 |         filename_elu = "MNISTClassifier_adam_elu.pt"
 27 |         filename_sgd_relu = "MNISTClassifier_sgd_relu.pt"
 28 |         filename_sgd_elu = "MNISTClassifier_sgd_elu.pt"
 29 | 
 30 |         self.analyzer_relu = Analysis(model_relu, folderpath + filename_relu)
 31 |         self.analyzer_elu = Analysis(model_elu, folderpath + filename_elu)
 32 |         self.analyzer_sgd_relu = Analysis(model_sgd_relu, folderpath + filename_sgd_relu)
 33 |         self.analyzer_sgd_elu = Analysis(model_sgd_elu, folderpath + filename_sgd_elu)
 34 | 
 35 |     def plotERMModels(self, budget, norm, bins):
 36 |         """
 37 |         Produce a line graph of adversarial attack success rates for various
 38 |         budgets. 
 39 |         """
 40 | 
 41 |         analyzers = [self.analyzer_relu, self.analyzer_elu,
 42 |                      self.analyzer_sgd_relu, self.analyzer_sgd_elu]
 43 |         labels = ['ReLU Adam', 'ELU Adam', 'ReLU SGD', 'ELU SGD']
 44 | 
 45 |         fig, (ax1, ax2) = plt.subplots(1, 2)
 46 | 
 47 |         record_filepath = "./records/ERM_analysis_norm={}.txt".format(
 48 |             "L2" if norm == 2 else "Linf")
 49 |         with open(record_filepath, mode='w') as f:
 50 |             self.plotPerturbationLineGraph(
 51 |                 ax1, analyzers, labels, "FGSM", budget, norm, bins, f)
 52 |             self.plotPerturbationLineGraph(
 53 |                 ax2, analyzers, labels, "PGD", budget, norm, bins, f)
 54 | 
 55 |         ax1.set_title("FGSM")
 56 |         ax2.set_title("PGD")
 57 |         plt.tight_layout()
 58 | 
 59 |         width, height = fig.get_size_inches()
 60 |         fig.set_size_inches(width * 1.8, height)
 61 | 
 62 |         # plt.show()
 63 |         filepath = "./images/ERM_norm={}.png".format(
 64 |             "L2" if norm == 2 else "Linf")
 65 |         plt.savefig(filepath, dpi=300)
 66 |         print("Graph now saved at {}".format(filepath))
 67 |         plt.close()
 68 | 
 69 | 
 70 | class DROModelsAnalysis(AnalysisMulitpleModels):
 71 | 
 72 |     """
 73 |     Class for the robustness analysis on the neural networks trained by DRO.
 74 |     """
 75 | 
 76 |     def __init__(self):
 77 |         self.gammas = [0.0001, 0.0003, 0.001,
 78 |                        0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0]
 79 | 
 80 |         def initializeLagAnalyzers():
 81 |             """
 82 |             Initialize Analysis objects for neural networks trained by the DRO
 83 |             algorithm proposed by Sinha et al.
 84 |             """
 85 | 
 86 |             folderpath = "./DRO_models/"
 87 |             Lag_relu_analyzers = []
 88 |             Lag_elu_analyzers = []
 89 |             length = len(self.gammas)
 90 |             for i in range(length):
 91 |                 gamma = self.gammas[i]
 92 |                 filepath_relu = folderpath + \
 93 |                     "{}_DRO_activation={}_epsilon={}.pt".format(
 94 |                         "Lag", "relu", gamma)
 95 |                 filepath_elu = folderpath + \
 96 |                     "{}_DRO_activation={}_epsilon={}.pt".format(
 97 |                         "Lag", "elu", gamma)
 98 |                 model_relu = MNISTClassifier(activation='relu')
 99 |                 model_elu = MNISTClassifier(activation='elu')
100 |                 Lag_relu_analyzers.append(Analysis(model_relu, filepath_relu))
101 |                 Lag_elu_analyzers.append(Analysis(model_elu, filepath_elu))
102 |             return Lag_relu_analyzers, Lag_elu_analyzers
103 | 
104 |         def initializeAnalyzers(dro_type, epsilon):
105 |             """
106 |             Initialize Analysis objects for neural networks trained by the
107 |             Frank-Wolfe method and PGD
108 |             """
109 | 
110 |             folderpath = "./DRO_models/"
111 |             filepath_relu = folderpath + \
112 |                 "{}_DRO_activation={}_epsilon={}.pt".format(
113 |                     dro_type, "relu", epsilon)
114 |             filepath_elu = folderpath + \
115 |                 "{}_DRO_activation={}_epsilon={}.pt".format(
116 |                     dro_type, "elu", epsilon)
117 |             model_relu = MNISTClassifier(activation='relu')
118 |             model_elu = MNISTClassifier(activation='elu')
119 |             analyzer_relu = Analysis(model_relu, filepath_relu)
120 |             analyzer_elu = Analysis(model_elu, filepath_elu)
121 |             return analyzer_relu, analyzer_elu
122 | 
123 |         self.Lag_relu_analyzers, self.Lag_elu_analyzers = initializeLagAnalyzers()
124 |         self.FW_relu_analyzer, self.FW_elu_analyzer = initializeAnalyzers(
125 |             dro_type='FW', epsilon=2.8)
126 |         self.PGD_relu_analyzer, self.PGD_elu_analyzer = initializeAnalyzers(
127 |             dro_type='PGD', epsilon=2.8)
128 | 
129 |     def plotLagDROModels(self, adversarial_type, budget, norm, bins):
130 |         """
131 |         Produce line graphs of adversarial attack success rates on neural
132 |         networks trained by WRM with various values of gamma.
133 |         """
134 | 
135 |         # Pyplot supports LaTex syntax.
136 |         labels = [r"$\gamma = {}$".format(gamma) for gamma in self.gammas]
137 | 
138 |         fig, (ax1, ax2) = plt.subplots(1, 2)
139 | 
140 |         record_filepath = "./records/DRO_analysis_{}_norm={}.txt".format(
141 |             adversarial_type, "L2" if norm == 2 else "Linf")
142 |         with open(record_filepath, mode='w') as f:
143 |             self.plotPerturbationLineGraph(
144 |                 ax1, self.Lag_relu_analyzers, labels, adversarial_type, budget, norm, bins, f)
145 |             self.plotPerturbationLineGraph(
146 |                 ax2, self.Lag_elu_analyzers, labels, adversarial_type, budget, norm, bins, f)
147 |             print("Record stored at {}".format(record_filepath))
148 | 
149 |         ax1.set_title("ReLU")
150 |         ax2.set_title("ELU")
151 |         plt.tight_layout()
152 | 
153 |         width, height = fig.get_size_inches()
154 |         fig.set_size_inches(width * 1.8, height)
155 | 
156 |         # plt.show()
157 |         filepath = "./images/Lag_{}_norm={}.png".format(
158 |             adversarial_type, "L2" if norm == 2 else "Linf")
159 |         plt.savefig(filepath, dpi=300)
160 |         print("Graph now saved at {}".format(filepath))
161 |         plt.close()
162 | 
163 |     def compareLagDROModels(self, budget_two, budget_inf, bins):
164 |         """
165 |         Compare the robustness of those neural networks trained by WRM with
166 |         different values of gamma by using five types of adversarial attacks:
167 |         - FGSM with the L-inf norm
168 |         - FGSM with the L-2 norm
169 |         - pointwise PGD with the L-inf norm
170 |         - pointwise PGD with the L-2 norm
171 |         - distributional PGD. 
172 |         """
173 | 
174 |         self.plotLagDROModels("FGSM", budget_inf, np.inf, bins)
175 |         self.plotLagDROModels("FGSM", budget_two, 2, bins)
176 | 
177 |         self.plotLagDROModels("PGD", budget_inf, np.inf, bins)
178 |         self.plotLagDROModels("PGD", budget_two, 2, bins)
179 | 
180 |         self.plotLagDROModels("distributional_PGD", budget_two, 2, bins)
181 | 
182 |     def plotDROModels(self, budget, norm, bins):
183 |         """
184 |         Compare the robustness of neural networks trained by all three DRO
185 |         algorithms: WRM, the Frank-Wolfe method, and PGD. 
186 |         """
187 | 
188 |         # The optimal gamma for both ReLu and ELU has been determined to be 1.0.
189 |         optimal_gamma = 1.0
190 |         index_optimal_gamma = self.gammas.index(optimal_gamma)
191 |         LagAnalyzers = [self.Lag_relu_analyzers[index_optimal_gamma],
192 |                         self.Lag_elu_analyzers[index_optimal_gamma]]
193 |         FWandPGDanalyzers = [self.FW_relu_analyzer, self.FW_elu_analyzer,
194 |                              self.PGD_relu_analyzer, self.PGD_elu_analyzer]
195 |         analyzers = LagAnalyzers + FWandPGDanalyzers
196 |         labels = ["Lag ReLU", "Lag ELU", "FW ReLU",
197 |                   "FW ELU", "PGD ReLU", "PGD ELU"]
198 | 
199 |         fig, (ax1, ax2) = plt.subplots(1, 2)
200 |         self.plotPerturbationLineGraph(
201 |             ax1, analyzers, labels, "FGSM", budget, norm, bins, record_file=None)
202 |         self.plotPerturbationLineGraph(
203 |             ax2, analyzers, labels, "PGD", budget, norm, bins, record_file=None)
204 | 
205 |         ax1.set_title("FGSM")
206 |         ax2.set_title("PGD")
207 |         plt.tight_layout()
208 | 
209 |         width, height = fig.get_size_inches()
210 |         fig.set_size_inches(width * 1.8, height)
211 | 
212 |         # plt.show()
213 |         filepath = "./images/DRO_norm={}.png".format(
214 |             "L2" if norm == 2 else "Linf")
215 |         plt.savefig(filepath, dpi=300)
216 |         print("Graph now saved at {}".format(filepath))
217 |         plt.close()
218 | 
219 | 
220 | class LossFunctionsAnalysis(AnalysisMulitpleModels):
221 | 
222 |     """
223 |     Class for the robustness analysis various loss functions
224 |     """
225 | 
226 |     def __init__(self):
227 | 
228 |         def initializeAnalyzers(dro_type, activation, budget):
229 |             analyzers = []
230 |             filepath = folderpath = "./Loss_models/"
231 |             for i in range(1, 8):
232 |                 filepath = folderpath + "{}_DRO_activation={}_epsilon={}_loss={}.pt".format(
233 |                     dro_type, activation, budget, "f_{}".format(i))
234 |                 model = MNISTClassifier(activation=activation)
235 |                 analyzers.append(Analysis(model, filepath))
236 |             return analyzers
237 | 
238 |         epsilon = 2.8
239 |         optimal_gamma = 1.0
240 |         self.FWAnalyzers = initializeAnalyzers(
241 |             "FW", activation='relu', budget=epsilon)
242 |         self.PGDAnalyzers = initializeAnalyzers(
243 |             "PGD", activation='relu', budget=epsilon)
244 |         self.LagAnalyzers = initializeAnalyzers(
245 |             "Lag", activation='relu', budget=optimal_gamma)
246 | 
247 |     def plotLosses(self, training_type, budget, norm, bins, record):
248 |         labels = [r"$f_{}$".format(i) for i in range(1, 8)]
249 | 
250 |         fig, (ax1, ax2) = plt.subplots(1, 2)
251 | 
252 |         if training_type == "PGD":
253 |             analyzers = self.PGDAnalyzers
254 |         elif training_type == "FW":
255 |             analyzers = self.FWAnalyzers
256 |         elif training_type == "Lag":
257 |             analyzers = self.LagAnalyzers
258 |         else:
259 |             raise ValueError("The type of DRO is invalid.")
260 | 
261 |         if record:
262 |             record_filepath = "./records/Loss_analysis_DRO_type={}_norm={}budget={}.txt".format(
263 |                 training_type, "L2" if norm == 2 else "Linf", budget)
264 |             with open(record_filepath, "w") as f:
265 |                 self.plotPerturbationLineGraph(
266 |                     ax1, analyzers, labels, "FGSM", budget, norm, bins, f)
267 |                 self.plotPerturbationLineGraph(
268 |                     ax2, analyzers, labels, "PGD", budget, norm, bins, f)
269 |                 print("Record stored at {}".format(record_filepath))
270 |         else:
271 |             self.plotPerturbationLineGraph(
272 |                 ax1, analyzers, labels, "FGSM", budget, norm, bins, None)
273 |             self.plotPerturbationLineGraph(
274 |                 ax2, analyzers, labels, "PGD", budget, norm, bins, None)
275 | 
276 |         ax1.set_title("FGSM")
277 |         ax2.set_title("PGD")
278 |         plt.tight_layout()
279 | 
280 |         width, height = fig.get_size_inches()
281 |         fig.set_size_inches(width * 1.8, height)
282 | 
283 |         # plt.show()
284 |         filepath = "./images/Loss_{}_norm={}.png".format(
285 |             training_type, "L2" if norm == 2 else "Linf")
286 |         plt.savefig(filepath, dpi=300)
287 |         print("Graph now saved at {}".format(filepath))
288 |         plt.close()
289 | 
290 |     def compareLosses(self, budget_two, budget_inf, bins, record=True):
291 |         """
292 |         Compare the seven loss functions in terms of robustness of the
293 |         resulting neural networks.
294 |         """
295 | 
296 |         self.plotLosses("PGD", budget_inf, np.inf, bins, record)
297 |         self.plotLosses("PGD", budget_two, 2, bins, record)
298 | 
299 |         self.plotLosses("FW", budget_inf, np.inf, bins, record)
300 |         self.plotLosses("FW", budget_two, 2, bins, record)
301 | 
302 |         self.plotLosses("Lag", budget_inf, np.inf, bins, record)
303 |         self.plotLosses("Lag", budget_two, 2, bins, record)
304 | 
305 | if __name__ == '__main__':
306 |     budget_two = 4.0
307 |     budget_inf = 0.4
308 |     bins = 20
309 | 
310 |     erm_analysis = ERMModelsAnalysis()
311 |     erm_analysis.plotERMModels(budget=budget_two, norm=2, bins=bins)
312 |     erm_analysis.plotERMModels(budget=budget_inf, norm=np.inf, bins=bins)
313 | 
314 |     dro_analysis = DROModelsAnalysis()
315 |     dro_analysis.compareLagDROModels(budget_two=budget_two, budget_inf=budget_inf, bins=bins)
316 |     # dro_analysis.compareLagDROModels(budget_two=10.0, budget_inf=None, bins=40)
317 |     dro_analysis.plotDROModels(budget=budget_two, norm=2, bins=bins)
318 |     dro_analysis.plotDROModels(budget=budget_inf, norm=np.inf, bins=bins)
319 | 
320 |     loss_analysis = LossFunctionsAnalysis()
321 |     loss_analysis.compareLosses(budget_two=budget_two, budget_inf=budget_inf, bins=bins)
322 | 


--------------------------------------------------------------------------------
/generate_sample_adversarial.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The code is attributed to the GitHub page of foolbox:
 3 | https://github.com/bethgelab/foolbox
 4 | """
 5 | 
 6 | import foolbox
 7 | import keras
 8 | import numpy as np
 9 | from keras.applications.resnet50 import ResNet50
10 | from keras.preprocessing import image
11 | from keras.applications.resnet50 import preprocess_input, decode_predictions
12 | import matplotlib.pyplot as plt
13 | 
14 | """
15 | This script generates a adversarial example from an ImageNet image. 
16 | """
17 | 
18 | img_rows, img_cols = 224, 224
19 | nb_channels = 3
20 | img_shape = (img_rows, img_cols, nb_channels)
21 | 
22 | # instantiate model
23 | keras.backend.set_learning_phase(0)
24 | kmodel = ResNet50(weights='imagenet')
25 | preprocessing = (np.array([104, 116, 123]), 1)
26 | fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)
27 | 
28 | img_folderpath = "./sample_images/"
29 | img_path = img_folderpath + 'sample_image_2.jpg' # An image of a yellow cab/taxi
30 | x = image.load_img(img_path, color_mode='rgb', target_size=(img_rows, img_cols))
31 | img = image.img_to_array(x)
32 | img = np.expand_dims(img, axis=0)
33 | img = img.reshape(img_shape)
34 | label = 468 # For the class of taxi and cab
35 | 
36 | # Note that proprocess_input is an in-place operation. 
37 | prediction = kmodel.predict(preprocess_input(np.copy(img)).reshape((1, img_rows, img_cols, nb_channels)))
38 | # decode the results into a list of tuples (class, description, probability)
39 | # (one such list for each sample in the batch)
40 | print('Prediction on the original example:', decode_predictions(prediction, top=3)[0])
41 | # The original image is correctly classified as a cab with the confidence of 0.999. 
42 | 
43 | # apply attack on source image
44 | # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
45 | attack = foolbox.attacks.FGSM(fmodel)
46 | adversarial = attack(img[:, :, ::-1], label)[:,:,::-1]
47 | # if the attack fails, adversarial will be None and a warning will be printed
48 | 
49 | adversarial_prediction = kmodel.predict(adversarial.reshape((1, img_rows, img_cols, nb_channels)))
50 | print('Prediction on the adversarial example:', decode_predictions(adversarial_prediction, top=3)[0])
51 | # The adversarial example is incorrectly classified as a jigsaw puzzle with the confidence of 0.629. 
52 | 
53 | plt.figure()
54 | 
55 | plt.subplot(1, 3, 1)
56 | plt.title('Original')
57 | plt.imshow(img / 255)  # division by 255 to convert [0, 255] to [0, 1]
58 | plt.axis('off')
59 | 
60 | plt.subplot(1, 3, 2)
61 | plt.title('Difference')
62 | difference = adversarial - img
63 | plt.imshow(difference / abs(difference).max() * 0.2 + 0.5)
64 | plt.axis('off')
65 | 
66 | plt.subplot(1, 3, 3)
67 | plt.title('Adversarial')
68 | plt.imshow(adversarial / 255)
69 | plt.axis('off')
70 | 
71 | plt.tight_layout()
72 | #plt.show()
73 | 
74 | folderpath = "./images/"
75 | # Filepath for the output adversarially perturbed image
76 | filepath = folderpath + "sample_adversarial_example.png"
77 | plt.savefig(filepath, dpi=300)
78 | plt.close()
79 | 


--------------------------------------------------------------------------------
/loss_functions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from util_MNIST import retrieveMNISTTrainingData
  6 | from util_model import MNISTClassifier, trainModel
  7 | from adversarial_attack_DRO import ProjetcedDRO, LagrangianDRO, FrankWolfeDRO
  8 | 
  9 | """
 10 | This module contains the seven loss functions listed in Carlini and Wagneer
 11 | (2017) with a minor modification in the constant in f_5. 
 12 | """
 13 | 
 14 | 
 15 | def f_1(outputs, labels):
 16 |     return F.cross_entropy(outputs, labels)
 17 | 
 18 | 
 19 | def f_2(outputs, labels):
 20 |     outputs = F.softmax(outputs, dim=1)
 21 |     return f_6(outputs, labels)
 22 | 
 23 | 
 24 | def f_3(outputs, labels):
 25 |     outputs = F.softmax(outputs, dim=1)
 26 |     return f_7(outputs, labels)
 27 | 
 28 | 
 29 | def f_4(outputs, labels):
 30 |     outputs = F.softmax(outputs, dim=1)
 31 |     reference_outputs = torch.gather(
 32 |         outputs, 1, labels.view(-1, 1).long()).view(-1)
 33 |     return torch.mean(torch.clamp(0.5 - reference_outputs, min=0))
 34 | 
 35 | 
 36 | def f_5(outputs, labels):
 37 |     # Note that in the original version, the base of e is used instead of 2.
 38 | 
 39 |     outputs = F.softmax(outputs, dim=1)
 40 |     reference_outputs = torch.gather(
 41 |         outputs, 1, labels.view(-1, 1).long()).view(-1)
 42 |     return torch.mean(torch.log2(2.125 - 2 * reference_outputs))
 43 | 
 44 | 
 45 | def f_6(outputs, labels):
 46 |     max_outputs, _ = torch.max(outputs, dim=1)
 47 |     reference_outputs = torch.gather(
 48 |         outputs, 1, labels.view(-1, 1).long()).view(-1)
 49 |     return torch.mean(max_outputs - reference_outputs)
 50 | 
 51 | 
 52 | def f_7(outputs, labels, nb_classes=10):
 53 |     #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 54 |     device = "cuda:0"
 55 | 
 56 |     batch_size = labels.size()[0]
 57 |     indexes_row = torch.arange(0, nb_classes).to(device)
 58 |     indexes = indexes_row.repeat(batch_size, 1)
 59 |     labels_cloned = labels.view(-1, 1).repeat(1, nb_classes)
 60 | 
 61 |     new_outputs = outputs[indexes != labels_cloned].view(
 62 |         batch_size, nb_classes-1)
 63 |     reference_outputs = torch.gather(
 64 |         outputs, 1, labels.view(-1, 1).long()).view(-1)
 65 |     difference = torch.max(new_outputs, dim=1)[0] - reference_outputs
 66 |     return torch.mean(F.softplus(difference))
 67 | 
 68 | 
 69 | def trainModelLoss(dro_type, epochs, steps_adv, budget, activation, batch_size, loss_criterion, cost_function=None):
 70 |     """
 71 |     Train a neural network with a specified loss function.
 72 |     """
 73 | 
 74 |     model = MNISTClassifier(activation=activation)
 75 |     if dro_type == 'PGD':
 76 |         train_module = ProjetcedDRO(model, loss_criterion)
 77 |     elif dro_type == 'Lag':
 78 |         assert cost_function is not None
 79 |         train_module = LagrangianDRO(model, loss_criterion, cost_function)
 80 |     elif dro_type == 'FW':
 81 |         train_module = FrankWolfeDRO(model, loss_criterion, p=2, q=2)
 82 |     else:
 83 |         raise ValueError("The type of DRO is not valid.")
 84 | 
 85 |     train_module.train(budget=budget, batch_size=batch_size,
 86 |                        epochs=epochs, steps_adv=steps_adv)
 87 |     folderpath = "./Loss_models/"
 88 |     filepath = folderpath + "{}_DRO_activation={}_epsilon={}_loss={}.pt".format(
 89 |         dro_type, activation, budget, loss_criterion.__name__)
 90 |     torch.save(model.state_dict(), filepath)
 91 |     print("A neural network adversarially trained using {} now saved at: {}".format(
 92 |         dro_type, filepath))
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     epochs = 25
 97 |     steps_adv = 15
 98 |     epsilon = 2.8
 99 |     optimal_gamma = 1.0
100 |     batch_size = 128
101 |     loss_criterions = [f_1, f_2, f_3, f_4, f_5, f_6, f_7]
102 | 
103 |     def cost_function(x, y): return torch.dist(x, y, p=2) ** 2
104 | 
105 |     for loss_criterion in loss_criterions:
106 |         trainModelLoss("FW", epochs, steps_adv, epsilon,
107 |                        "relu", batch_size, loss_criterion)
108 |         trainModelLoss("PGD", epochs, steps_adv, epsilon,
109 |                        "relu", batch_size, loss_criterion)
110 |         trainModelLoss("Lag", epochs, steps_adv, optimal_gamma, "relu",
111 |                        batch_size, loss_criterion, cost_function=cost_function)
112 | 


--------------------------------------------------------------------------------
/sanity_check_attack.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | 
  7 | from util_MNIST import retrieveMNISTTestData
  8 | from util_model import MNISTClassifier, loadModel
  9 | 
 10 | """
 11 | This module is for sanity checking. Most of the code in this module is
 12 | attributed to a tutorial in the documentation of PyTorch:
 13 | https://pytorch.org/tutorials/beginner/fgsm_tutorial.html. 
 14 | """
 15 | 
 16 | epsilons = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
 17 | use_cuda = True  # If GPU is available, choose GPU over CPU.
 18 | 
 19 | # FGSM attack code
 20 | 
 21 | 
 22 | def fgsm_attack(image, epsilon, data_grad):
 23 | 
 24 |     # Collect the element-wise sign of the data gradient
 25 |     sign_data_grad = data_grad.sign()
 26 |     # Create the perturbed image by adjusting each pixel of the input image
 27 |     perturbed_image = image + epsilon*sign_data_grad
 28 |     # Adding clipping to maintain [0,1] range
 29 |     perturbed_image = torch.clamp(perturbed_image, 0, 1)
 30 |     # Return the perturbed image
 31 |     return perturbed_image
 32 | 
 33 | 
 34 | def test(model, device, test_loader, epsilon):
 35 | 
 36 |     # Accuracy counter
 37 |     correct = 0
 38 | 
 39 |     # Loop over all examples in test set
 40 |     for i, (data, target) in enumerate(test_loader):
 41 |         # Send the data and label to the device
 42 |         data, target = data.to(device), target.to(device)
 43 | 
 44 |         # Set requires_grad attribute of tensor. Important for Attack
 45 |         data.requires_grad = True
 46 | 
 47 |         # Forward pass the data through the model
 48 |         output = model(data)
 49 |         # get the index of the max log-probability
 50 |         init_pred = output.max(1, keepdim=True)[1]
 51 | 
 52 |         # If the initial prediction is wrong, dont bother attacking, just move on
 53 |         if init_pred.item() != target.item():
 54 |             continue
 55 | 
 56 |         # Calculate the loss
 57 |         loss = F.nll_loss(output, target)
 58 | 
 59 |         # Zero all existing gradients
 60 |         model.zero_grad()
 61 | 
 62 |         # Calculate gradients of model in backward pass
 63 |         loss.backward()
 64 | 
 65 |         # Collect datagrad
 66 |         data_grad = data.grad.detach()
 67 | 
 68 |         # Call FGSM Attack
 69 |         perturbed_data = fgsm_attack(data, epsilon, data_grad)
 70 | 
 71 |         # Re-classify the perturbed image
 72 |         output = model(perturbed_data)
 73 | 
 74 |         # Check for success
 75 |         # get the index of the max log-probability
 76 |         final_pred = output.max(1, keepdim=True)[1]
 77 |         if final_pred.item() == target.item():
 78 |             correct += 1
 79 | 
 80 |     # Calculate final accuracy for this epsilon
 81 |     final_acc = correct/float(len(test_loader))
 82 |     print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon,
 83 |                                                              correct, len(test_loader), final_acc))
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     # MNIST Test dataset and dataloader declaration
 88 |     test_loader = retrieveMNISTTestData(batch_size=1, shuffle=True)
 89 | 
 90 |     # Define what device we are using
 91 |     print("CUDA Available: ", torch.cuda.is_available())
 92 |     device = torch.device("cuda" if (
 93 |         use_cuda and torch.cuda.is_available()) else "cpu")
 94 | 
 95 |     # Initialize the network
 96 |     filepath_relu = "./experiment_models/MNISTClassifier_relu.pt"
 97 |     model_relu = MNISTClassifier(activation='relu')
 98 |     model_relu = loadModel(model_relu, filepath_relu)
 99 |     model_relu.to(device)
100 | 
101 |     # Set the model in evaluation mode. In this case this is for the Dropout layers
102 |     model_relu.eval()
103 | 
104 |     # Run test for each epsilon
105 |     for eps in epsilons:
106 |         test(model_relu, device, test_loader, eps)
107 | 


--------------------------------------------------------------------------------
/sanity_check_foolbox.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import foolbox
 3 | from foolbox.models import PyTorchModel
 4 | from foolbox.criteria import Misclassification
 5 | from util_MNIST import retrieveMNISTTestData
 6 | from util_model import loadModel, MNISTClassifier
 7 | 
 8 | from torchsummary import summary
 9 | 
10 | """
11 | This module is for sanity check of Foolbox, a Python library for crafting
12 | adversarial exampples. We apply Foolbox's implementation of FGSM on a neural
13 | network trained by empirical risk minimization (ERM). 
14 | """
15 | 
16 | 
17 | def wrapFoolboxModel(model):
18 |     return PyTorchModel(model, bounds=(0, 1), num_classes=10, channel_axis=1, preprocessing=(0, 1))
19 | 
20 | 
21 | def adversarialAccuracy(model):
22 |     # Use GPU for computation if it is available
23 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24 |     model.to(device)
25 |     print("The model is now loaded on {}.".format(device))
26 | 
27 |     pytorch_model = wrapFoolboxModel(model)
28 | 
29 |     # get source image and label
30 |     batch_size = 1
31 |     test_loader = retrieveMNISTTestData(batch_size=batch_size)
32 |     criterion = Misclassification()
33 | 
34 |     wrong, total = 0, 0
35 |     period = 500
36 |     max_epsilon = 1.0
37 |     epsilons = 5
38 |     for i, (images, labels) in enumerate(test_loader):
39 |         if i == 10000:
40 |             break
41 |         image, label = images[0].numpy(), labels[0].numpy()
42 | 
43 |         #fgsm = foolbox.attacks.FGSM(pytorch_model, criterion)
44 |         #image_adv = fgsm(image, label, epsilons=epsilons, max_epsilon=max_epsilon)
45 |         pgd2 = foolbox.attacks.L2BasicIterativeAttack(pytorch_model, criterion)
46 |         image_adv = pgd2(image, label, epsilon=max_epsilon,
47 |                          stepsize=max_epsilon / 5, iterations=15)
48 | 
49 |         total += 1
50 |         if image_adv is not None:
51 |             wrong += 1
52 |         if i % period == period - 1:
53 |             print(
54 |                 "Cumulative adversarial attack success rate: {} / {} = {}".format(wrong, total, wrong / total))
55 |     print("Adversarial error rate: {} / {} = {}".format(wrong, total, wrong / total))
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     model_relu = MNISTClassifier(activation='relu')
60 |     model_elu = MNISTClassifier(activation='elu')
61 | 
62 |     # These file paths only work on UNIX.
63 |     filepath_relu = "./ERM_models/MNISTClassifier_relu.pt"
64 |     filepath_elu = "./ERM_models/MNISTClassifier_elu.pt"
65 |     model_relu = loadModel(model_relu, filepath_relu)
66 |     model_elu = loadModel(model_relu, filepath_elu)
67 | 
68 |     # Display the architecture of the neural network
69 |     #summary(model_relu.cuda(), (1, 28, 28))
70 | 
71 |     print("The result of relu is as follows.")
72 |     adversarialAccuracy(model_relu)
73 |     print("The result of elu is as follows.")
74 |     adversarialAccuracy(model_elu)
75 | 


--------------------------------------------------------------------------------
/sanity_check_keras.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy import linalg as LA
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from keras.utils import to_categorical
  6 | from keras.datasets import mnist
  7 | from keras.models import Sequential, load_model
  8 | from keras.layers import Dense, Conv2D, Flatten, Activation
  9 | from keras.optimizers import Adam
 10 | from keras import backend as K
 11 | 
 12 | import foolbox
 13 | from foolbox.models import KerasModel
 14 | from foolbox.criteria import Misclassification
 15 | 
 16 | """
 17 | This module is for sanity check. 
 18 | It creates the neural network used by Staib et al. and Sinha et al.
 19 | in Keras and evalutes its robustness (or vulnerability) against an
 20 | FGSM adversary.
 21 | """
 22 | 
 23 | nb_filters = 64
 24 | epochs = 25
 25 | batch_size = 128
 26 | num_classes = 10
 27 | 
 28 | # input image dimensions
 29 | img_rows, img_cols = 28, 28
 30 | 
 31 | # the data, split between train and test sets
 32 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
 33 | 
 34 | if K.image_data_format() == 'channels_first':
 35 |     x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
 36 |     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
 37 |     input_shape = (1, img_rows, img_cols)
 38 |     channel_axis = 1
 39 | else:
 40 |     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
 41 |     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
 42 |     input_shape = (img_rows, img_cols, 1)
 43 |     channel_axis = 3
 44 | 
 45 | x_train = x_train.astype('float32')
 46 | x_test = x_test.astype('float32')
 47 | x_train /= 255
 48 | x_test /= 255
 49 | print('x_train shape:', x_train.shape)
 50 | print(x_train.shape[0], 'train samples')
 51 | print(x_test.shape[0], 'test samples')
 52 | 
 53 | # convert class vectors to binary class matrices
 54 | y_train = to_categorical(y_train, num_classes)
 55 | y_test_original = y_test
 56 | y_test = to_categorical(y_test, num_classes)
 57 | 
 58 | 
 59 | def trainModel(activation='relu'):
 60 |     model = Sequential()
 61 |     model.add(Conv2D(filters=nb_filters, kernel_size=(8, 8),
 62 |                      strides=(2, 2), padding='same', input_shape=input_shape))
 63 |     model.add(Activation(activation))
 64 |     model.add(Conv2D(filters=nb_filters * 2, kernel_size=(6, 6),
 65 |                      strides=(2, 2), padding='valid'))
 66 |     model.add(Activation(activation))
 67 |     model.add(Conv2D(filters=nb_filters * 2, kernel_size=(5, 5),
 68 |                      strides=(1, 1), padding='valid'))
 69 |     model.add(Activation(activation))
 70 |     model.add(Flatten())
 71 |     model.add(Dense(num_classes))
 72 |     model.add(Activation('softmax'))
 73 | 
 74 |     optimizer = Adam(lr=0.001)
 75 |     model.compile(loss='categorical_crossentropy',
 76 |                   optimizer=optimizer, metrics=['accuracy'])
 77 |     model.fit(x_train, y_train,
 78 |               batch_size=batch_size,
 79 |               epochs=epochs,
 80 |               verbose=1,
 81 |               validation_data=(x_test, y_test))
 82 |     score = model.evaluate(x_test, y_test, verbose=0)
 83 |     print('Test loss:', score[0])
 84 |     print('Test accuracy:', score[1])
 85 | 
 86 |     filepath = './experiment_models/KerasMNISTClassifier_{}.h5'.format(
 87 |         activation)
 88 |     model.save(filepath)
 89 | 
 90 | 
 91 | def adversarialAccuracy(model):
 92 |     keras_model = KerasModel(model, bounds=(0, 1), channel_axis=channel_axis)
 93 |     criterion = Misclassification()
 94 | 
 95 |     length = x_test.shape[0]
 96 |     wrong = 0
 97 |     period = 50
 98 |     for i in range(length):
 99 |         image, label = x_test[i], y_test_original[i]
100 | 
101 |         #attack = foolbox.attacks.FGSM(keras_model, criterion)
102 |         #image_adv = attack(image, label, epsilons=5, max_epsilon=1.0)
103 |         pgd2 = foolbox.attacks.L2BasicIterativeAttack(keras_model, criterion)
104 |         image_adv = pgd2(image, label, epsilon=1.0, stepsize=1.0,
105 |                          iterations=1, binary_search=False)
106 | 
107 |         if image_adv is not None:
108 |             prediction = np.argmax(
109 |                 keras_model.predictions_and_gradient(image_adv, label)[0])
110 |             assert prediction != label
111 |             wrong += 1
112 |         if i % period == period - 1:
113 |             print(
114 |                 "Adversarial attack success rate: {} / {} = {}".format(wrong, i+1, wrong / (i+1)))
115 |             if image_adv is not None:
116 |                 displayImage(image_adv, label)
117 |                 print("Size of perturbation: {}".format(
118 |                     LA.norm(image_adv - image, None)))
119 | 
120 |     print("Adversarial error rate: {} / {} = {}".format(wrong, length, wrong / length))
121 | 
122 | 
123 | def displayImage(image, label):
124 |     plt.imshow(image.reshape((img_rows, img_cols)),
125 |                vmin=0.0, vmax=1.0, cmap='gray')
126 |     plt.title("Predicted label is {}".format(label))
127 |     plt.show()
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     # Train Keras neural networks
132 |     """
133 |     trainModel(activation='relu')
134 |     trainModel(activation='elu')
135 |     """
136 | 
137 |     filepath_relu = './experiment_models/KerasMNISTClassifier_relu.h5'
138 |     filepath_elu = './experiment_models/KerasMNISTClassifier_elu.h5'
139 |     model_relu = load_model(filepath_relu)
140 |     model_elu = load_model(filepath_elu)
141 | 
142 |     # Display the architecture of the neural network
143 |     # model_relu.summary()
144 | 
145 |     loss_and_metrics = model_relu.evaluate(x_test, y_test, batch_size=128)
146 |     print("Test accuracy of relu: {}".format(loss_and_metrics))
147 |     loss_and_metrics = model_elu.evaluate(x_test, y_test, batch_size=128)
148 |     print("Test accuracy of elu: {}".format(loss_and_metrics))
149 | 
150 |     # For some unknown reason, this raises an assertion error at the 400-th image.
151 |     adversarialAccuracy(model_relu)
152 |     adversarialAccuracy(model_elu)
153 | 


--------------------------------------------------------------------------------
/util_MNIST.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | import torchvision.transforms as transforms
 4 | 
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | img_rows, img_cols = 28, 28
 9 | 
10 | 
11 | def retrieveMNISTTrainingData(batch_size=128, shuffle=True):
12 |     """
13 |     Retrieve a training dataset of MNIST.
14 | 
15 |     Arguments:
16 |         batch_size: batch size
17 |         shuffle: whether the training data should be shuffled
18 |     Returns:
19 |         data loader for the MNIST training data
20 |     """
21 | 
22 |     transform = transforms.Compose([transforms.ToTensor()])
23 |     MNIST_train_data = torchvision.datasets.MNIST(
24 |         root='./data', train=True, download=True, transform=transform)
25 |     train_loader = torch.utils.data.DataLoader(
26 |         MNIST_train_data, batch_size=batch_size, shuffle=shuffle, num_workers=0)
27 |     return train_loader
28 | 
29 | 
30 | def retrieveMNISTTestData(batch_size=128, shuffle=False):
31 |     """
32 |     Retrieve a test dataset of MNIST.
33 | 
34 |     Arguments:
35 |         batch_size: batch size
36 |         shuffle: whether the test data should be shuffled
37 |     Returns:
38 |         data loader for the MNIST test data
39 |     """
40 | 
41 |     transform = transforms.Compose([transforms.ToTensor()])
42 |     MNIST_test_data = torchvision.datasets.MNIST(
43 |         root='./data', train=False, download=True, transform=transform)
44 |     test_loader = torch.utils.data.DataLoader(
45 |         MNIST_test_data, batch_size=batch_size, shuffle=shuffle, num_workers=0)
46 |     return test_loader
47 | 
48 | 
49 | def displayImage(image, label):
50 |     """
51 |     Display an image of a digit from MNIST.
52 | 
53 |     Arguments:
54 |         image: input image. The shape of this input must be compatible
55 |                 with (img_rows, img_cols).
56 |         label: prediction on this input image
57 |     """
58 | 
59 |     image = image.view((img_rows, img_cols))
60 |     plt.imshow(image, vmin=0.0, vmax=1.0, cmap='gray')
61 |     plt.title("Predicted label: {}".format(label))
62 |     plt.show()
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     train_loader = retrieveMNISTTrainingData(batch_size=1, shuffle=False)
67 |     print("MNIST training data are loaded.")
68 |     train_iterator = iter(train_loader)
69 |     images, labels = train_iterator.next()
70 |     print("The type of the image is {}.".format(type(images)))
71 |     print("The size of the image is {}.".format(images.size()))
72 | 


--------------------------------------------------------------------------------
/util_adversarial_attack.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import optim, nn
  4 | import torch.nn.functional as F
  5 | from util_MNIST import retrieveMNISTTestData
  6 | from util_model import SimpleNeuralNet, loadModel
  7 | from art.attacks import FastGradientMethod, ProjectedGradientDescent
  8 | from art.classifiers import PyTorchClassifier
  9 | from adversarial_attack_DRO import ProjetcedDRO
 10 | 
 11 | img_rows, img_cols = 28, 28
 12 | 
 13 | """
 14 | This module contains classes for adversarial attacks.
 15 | """
 16 | 
 17 | def wrapModel(model, loss_criterion):
 18 |     """
 19 |     Wrap a PyTorch model using a wrapper provided by ART (Adversarial
 20 |     Robustness Toolbox) by IBM.
 21 |     """
 22 | 
 23 |     optimizer = optim.Adam(model.parameters())
 24 |     input_shape = (1, img_rows, img_cols)
 25 |     return PyTorchClassifier((0, 1), model, loss_criterion, optimizer, input_shape, nb_classes=10)
 26 | 
 27 | class FGSM:
 28 |     """
 29 |     Class for the fast gradient sign method (FGSM).
 30 |     This class delegates the implementation of the attack to the ART library
 31 |     developed by IBM. 
 32 |     """
 33 | 
 34 |     def __init__(self, model, loss_criterion, norm, batch_size=128):
 35 |         self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
 36 |         self.norm = norm
 37 |         self.batch_size = batch_size
 38 |         self.attack = FastGradientMethod(
 39 |             self.wrapped_pytorch_model, batch_size=batch_size)
 40 | 
 41 |         # Use GPU for computation if it is available
 42 |         self.device = torch.device(
 43 |             "cuda:0" if torch.cuda.is_available() else "cpu")
 44 | 
 45 |     def generatePerturbation(self, data, budget, minimal=False):
 46 |         """
 47 |         Generate adversarial examples from a given batch of images. 
 48 |         The input data should have already been loaded on an appropriate
 49 |         device. 
 50 | 
 51 |         Arguments:
 52 |             data: pairs of a batch of images and a batch of labels. The batch
 53 |                 of images should be a numpy array. The batch of labels should
 54 |                 be a numpy array of integers. 
 55 |             budget: the maximal size of perturbation allowed. This parameter
 56 |                 is not used if minimal = True. 
 57 |             minimal: whether the minimal adversarial perturbation is computed.
 58 |                 If yes, the maximal size of perturbation is 1.0. Consequently,
 59 |                 the budget parameter is overridden. 
 60 |         """
 61 | 
 62 |         images, _ = data
 63 |         images_adv = self.attack.generate(x=images.cpu().numpy(
 64 |         ), norm=self.norm, eps=budget, minimal=minimal, eps_step=budget / 50, eps_max=budget, batch_size=self.batch_size)
 65 |         images_adv = torch.from_numpy(images_adv)
 66 | 
 67 |         # The output to be returned should be loaded on an appropriate device.
 68 |         return images_adv.to(self.device)
 69 | 
 70 | 
 71 | class FGSMNative:
 72 |     """
 73 |     Class for manually implemented FGSM, unlike the above FGSM class in this
 74 |     module. For some unknown reason, this class produces a different
 75 |     performance in adversarial attacks from the FGSM class. The performance of
 76 |     FGSMNative is better than that of FGSM only in some cases (and not in all
 77 |     cases). Additionally, the difference between the FGSM class and the 
 78 |     FGSMNative class is not significant. 
 79 |     """
 80 | 
 81 |     def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128):
 82 |         self.model = model
 83 |         self.loss_criterion = loss_criterion
 84 |         self.norm = norm
 85 |         self.batch_size = batch_size
 86 | 
 87 |         # Use GPU for computation if it is available
 88 |         self.device = torch.device(
 89 |             "cuda:0" if torch.cuda.is_available() else "cpu")
 90 | 
 91 |     def generatePerturbation(self, data, budget, minimal=False):
 92 |         """
 93 |         Generate adversarial examples from a given batch of images. 
 94 |         The input data should have already been loaded on an appropriate
 95 |         device. 
 96 | 
 97 |         Note that unlike the FGSM class, in this FGSMNative class, the
 98 |         computation of minimal perturbations is not supported. 
 99 | 
100 |         Arguments:
101 |             data: pairs of a batch of images and a batch of labels. The batch
102 |                 of images should be a numpy array. The batch of labels should
103 |                 be a numpy array of integers. 
104 |             budget: the maximal size of perturbation allowed. This parameter
105 |                 is not used if minimal = True. 
106 |             minimal: whether the minimal adversarial perturbation is computed.
107 |                 If yes, the maximal size of perturbation is 1.0. Consequently,
108 |                 the budget parameter is overridden. 
109 |         """
110 | 
111 |         images, labels = data
112 |         images_adv = images.clone().detach().to(self.device)
113 |         # We will never need to compute a gradient with respect to images_adv.
114 |         images_adv.requires_grad_(False)
115 | 
116 |         images.requires_grad_(True)
117 |         output = self.model(images)
118 |         loss = self.loss_criterion(output, labels)
119 |         loss.backward()
120 |         images.requires_grad_(False)
121 | 
122 |         if self.norm == np.inf:
123 |             direction = images.grad.data.sign()
124 |         elif self.norm == 2:
125 |             flattened_images = images_adv.view(-1, img_rows * img_cols)
126 |             direction = F.normalize(
127 |                 flattened_images, p=2, dim=1).view(images.size())
128 |         else:
129 |             raise ValueError("The norm is not valid.")
130 | 
131 |         if minimal:
132 |             iterations = 50
133 |             incremental_size = budget / iterations
134 |             minimal_perturbations = torch.zeros(images.size())
135 |             for i in range(iterations):
136 |                 outputs = self.model(
137 |                     (images_adv + minimal_perturbations).clamp(0, 1))
138 |                 _, predicted = torch.max(outputs.data, 1)
139 |                 for j in range(labels.size()[0]):
140 |                     # If the current adversarial exampels are correctly
141 |                     # classified, increase the size of the perturbations.
142 |                     if predicted[j] == labels[j]:
143 |                         minimal_perturbations[j].add_(
144 |                             incremental_size * direction[j])
145 |             images_adv.add_(minimal_perturbations)
146 |         else:
147 |             images_adv.add_(budget * direction)
148 | 
149 |         images_adv.clamp_(0, 1)
150 | 
151 |         # The output to be returned should be loaded on an appropriate device.
152 |         return images_adv
153 | 
154 | 
155 | class PGD:
156 |     """
157 |     Class for adversarial attacks based on projected gradient descent (PGD).
158 |     The implementation of PGD in ART executes projection on a feasible region
159 |     after each iteration. However, random restrating is not used in this
160 |     implementation. Not using radom restarting is the difference between the
161 |     PGD implemented in ART and the one described by Madry et al. 
162 | 
163 |     This adversarial attack subsumes the iterative FGSM. 
164 |     """
165 | 
166 |     def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128):
167 |         self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
168 |         self.norm = norm
169 |         self.batch_size = batch_size
170 |         self.attack = ProjectedGradientDescent(
171 |             self.wrapped_pytorch_model, norm=norm, random_init=False, batch_size=batch_size)
172 | 
173 |         # Use GPU for computation if it is available
174 |         self.device = torch.device(
175 |             "cuda:0" if torch.cuda.is_available() else "cpu")
176 | 
177 |     def generatePerturbation(self, data, budget, max_iter=15):
178 |         images, _ = data
179 | 
180 |         # eps_step is not allowed to be larger than budget according to the
181 |         # documentation of ART.
182 |         eps_step = budget / 5
183 |         images_adv = self.attack.generate(x=images.cpu().numpy(
184 |         ), norm=self.norm, eps=budget, eps_step=eps_step, max_iter=max_iter, batch_size=self.batch_size)
185 |         images_adv = torch.from_numpy(images_adv)
186 | 
187 |         # The output to be returned should be loaded on an appropriate device.
188 |         return images_adv.to(self.device)
189 | 
190 | 
191 | class DistributionalPGD:
192 |     """
193 |     Class for a PGD-based distributional adversarial attack (as opposed to pointwise
194 |     adversarial attacks such as FGSM and PGD). 
195 |     
196 |     By default, we use the 2-Wasserstein distance (for the distributional distance)
197 |     and the L-2 norm (for the underlying pointwise distance). 
198 |     """
199 | 
200 |     def __init__(self, model, loss_criterion):
201 |         self.model = model
202 |         self.loss_criterion = loss_criterion
203 |         self.training_module = ProjetcedDRO(model, loss_criterion)
204 | 
205 |     def generatePerturbation(self, data, budget, max_iter=15):
206 |         images_adv, _ = self.training_module.attack(budget, data, steps=max_iter)
207 |         
208 |         # The output is already loaded on an appropriate device (i.e. GPU if available). 
209 |         return images_adv
210 | 
211 | 
212 | if __name__ == "__main__":
213 |     # Load a simple neural network
214 |     model = SimpleNeuralNet()
215 |     loadModel(model, "./ERM_models/SimpleModel.pt")
216 | 
217 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
218 |     model.to(device)  # Load the neural network on GPU if it is available
219 |     print("The neural network is now loaded on {}.".format(device))
220 | 
221 |     # Create an object for PGD
222 |     criterion = nn.CrossEntropyLoss()
223 |     batch_size = 128
224 |     pgd = PGD(model, criterion, batch_size=batch_size)
225 |     pytorch_model = pgd.wrapped_pytorch_model
226 | 
227 |     # Read MNIST dataset
228 |     test_loader = retrieveMNISTTestData(batch_size=1024)
229 | 
230 |     # Craft adversarial examples with PGD
231 |     epsilon = 0.1  # Maximum perturbation
232 |     total, correct = 0, 0
233 |     for i, data in enumerate(test_loader):
234 |         images, labels = data
235 |         images, labels = images.to(device), labels.to(device)
236 | 
237 |         # images_adv is already loaded on GPU by generatePerturbation
238 |         images_adv = pgd.generatePerturbation(data, epsilon)
239 |         with torch.no_grad():
240 |             outputs = model(images_adv)
241 |         _, predicted = torch.max(outputs.data, 1)
242 |         total += labels.size(0)
243 |         correct += (predicted == labels).sum().item()
244 |         acc = (predicted == labels).sum().item() / labels.size(0)
245 |         print("Iteration: {}; test accuracy on adversarial sample: {}".format(i+1, acc))
246 |     print("Overall accuracy on adversarial exampels: {}.".format(correct / total))
247 | 


--------------------------------------------------------------------------------
/util_adversarial_training.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.optim as optim
 4 | import torch.nn.functional as F
 5 | import numpy as np
 6 | from util_MNIST import retrieveMNISTTrainingData
 7 | 
 8 | 
 9 | class AdversarialTraining:
10 |     """
11 |     Base class for adversarial training.
12 |     This class does not add any perturbation for adversarial attacks.
13 |     Hence, this class is equivalent to empirical risk minimization (ERM). 
14 |     """
15 | 
16 |     def __init__(self, model, loss_criterion):
17 |         """
18 |         Initialize instance variables.
19 | 
20 |         Arguments:
21 |             model: neural network to be trained
22 |             loss_criterion: loss function
23 |         """
24 | 
25 |         self.model = model
26 |         self.loss_criterion = loss_criterion
27 | 
28 |         # Use GPU for computation if it is available
29 |         self.device = torch.device(
30 |             "cuda:0" if torch.cuda.is_available() else "cpu")
31 |         # Load a model on an appropriate device
32 |         self.model.to(self.device)
33 |         print("The neural network is now loaded on {}.".format(self.device))
34 | 
35 |     def attack(self, budget, data, steps=15):
36 |         """
37 |         Launch an adversarial attack. 
38 |         This is equivalent to solving the inner maximization problem in the
39 |         formulation of RO or DRO. This specific method serves as an abstract
40 |         method and hence does not launch an adversarial attack. In a derived
41 |         class, this method needs to be overridden. 
42 | 
43 |         Arguments:
44 |             budget: limit on the size of adversarial perturbations.
45 |                 This normally corresponds to epsilon in Staib and Jegedlka's
46 |                 paper, but in the DRO developed by Sinha et al., the budget
47 |                 parameter refers to gamma in their paper. 
48 |             steps: number of iterations in the adversarial attack
49 | 
50 |         Returns:
51 |             images_adv: adversarially perturbed images (in batch)
52 |             labels: labels of the adversarially perturbed images
53 |         """
54 |         return data
55 | 
56 |     def train(self, budget, batch_size=128, epochs=25, steps_adv=15):
57 |         """
58 |         Train a neural network (using an adversarial attack if it is defined).
59 |         For optimization, Adam is used. 
60 | 
61 |         Arguments:
62 |             budget: limit on the size of adversarial perturbations
63 |             batch_size: batch size for training
64 |             epochs: number of epochs in training
65 |             steps_adv: number of iterations inside adversarial attacks
66 | 
67 |         Returns:
68 |             None
69 |         """
70 | 
71 |         data_loader = retrieveMNISTTrainingData(batch_size, shuffle=True)
72 |         optimizer = optim.Adam(self.model.parameters())
73 |         for epoch in range(epochs):
74 |             for i, data in enumerate(data_loader, 0):
75 |                 images, labels = data
76 |                 # Input images and labels are loaded by this method.
77 |                 # Hence, they do not need to be loaded by the attack method.
78 |                 images, labels = images.to(self.device), labels.to(self.device)
79 |                 data = (images, labels)
80 | 
81 |                 # However, the attack method should load images_adv on GPU
82 |                 # before returning the output.
83 |                 images_adv, labels = self.attack(budget, data, steps=steps_adv)
84 | 
85 |                 optimizer.zero_grad()
86 |                 outputs = self.model(images_adv)
87 |                 loss = self.loss_criterion(outputs, labels)
88 |                 loss.backward()
89 |                 optimizer.step()
90 |                 # if i % 100 == 99:
91 |                 #     print("Epoch: {}, iteration: {}".format(epoch, i))
92 | 


--------------------------------------------------------------------------------
/util_analysis.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from util_MNIST import retrieveMNISTTestData
  6 | from util_model import loadModel, evaluateModelAccuracy
  7 | from util_adversarial_attack import FGSM, PGD, FGSMNative, DistributionalPGD
  8 | 
  9 | """
 10 | This module contains two base classes for analysis of the robustness of neural
 11 | networks. The first class, Analysis, wraps a single neural network, and the
 12 | second class, AnalysisMulitpleModels, supports analysis on a list of
 13 | neural networks. 
 14 | """
 15 | 
 16 | class Analysis:
 17 | 
 18 |     """
 19 |     Class for the robustness analysis on a single neural network.
 20 |     """
 21 | 
 22 |     def __init__(self, skeleton_model, filepath):
 23 |         self.model = loadModel(skeleton_model, filepath)
 24 | 
 25 |         # Use GPU for computation if it is available
 26 |         self.device = torch.device(
 27 |             "cuda:0" if torch.cuda.is_available() else "cpu")
 28 |         self.model.to(self.device)
 29 |         print("The model is now loaded on {}.".format(self.device))
 30 | 
 31 |         self.filepath = filepath
 32 | 
 33 |     def testAccuracy(self):
 34 |         """
 35 |         Evaluate the accuracy of a neural network on the MNIST test data.
 36 |         """
 37 | 
 38 |         return evaluateModelAccuracy(self.model)
 39 | 
 40 |     def adversarialAccuracy(self, adversarial_type, budget, norm):
 41 |         """
 42 |         Evaluate the accuracy of a neural network on a set of adversarial
 43 |         examples. 
 44 |         """
 45 | 
 46 |         batch_size = 512 if adversarial_type == "distributional_PGD" else 128
 47 |         
 48 |         # Numbers of iterations for pointwise and distributional PGD attacks
 49 |         max_iter_point, max_iter_dist = 15, 40
 50 |         test_loader = retrieveMNISTTestData(batch_size=batch_size)
 51 |         criterion = nn.CrossEntropyLoss()
 52 |         if adversarial_type == "FGSM":
 53 |             adversarial_module = FGSM(
 54 |                 self.model, criterion, norm=norm, batch_size=batch_size)
 55 |         elif adversarial_type == 'PGD':
 56 |             adversarial_module = PGD(
 57 |                 self.model, criterion, norm=norm, batch_size=batch_size)
 58 |         elif adversarial_type == "distributional_PGD":
 59 |             adversarial_module = DistributionalPGD(self.model, criterion)
 60 |         else:
 61 |             raise ValueError("The type of adversarial attack is not valid.")
 62 | 
 63 |         # Craft adversarial examples
 64 |         total, correct = 0, 0
 65 |         for i, data in enumerate(test_loader):
 66 |             images, labels = data
 67 |             images, labels = images.to(self.device), labels.to(self.device)
 68 |             data = (images, labels)
 69 | 
 70 |             # images_adv is already loaded on GPU by generatePerturbation.
 71 |             # Also, if FGSM is used, we have minimal=False by default.
 72 |             if adversarial_type == "FGSM":
 73 |                 images_adv = adversarial_module.generatePerturbation(
 74 |                     data, budget)
 75 |             elif adversarial_type == "PGD":
 76 |                 images_adv = adversarial_module.generatePerturbation(
 77 |                     data, budget, max_iter=max_iter_point)
 78 |             else:
 79 |                 # For distributional PGD attacks
 80 |                 images_adv = adversarial_module.generatePerturbation(
 81 |                     data, budget, max_iter=max_iter_dist)
 82 |             with torch.no_grad():
 83 |                 softmax = nn.Softmax(dim=1)
 84 |                 outputs = softmax(self.model(images_adv))
 85 | 
 86 |             _, predicted = torch.max(outputs.data, 1)
 87 |             total += labels.size(0)
 88 |             correct += (predicted == labels).sum().item()
 89 |         return correct, total
 90 | 
 91 | 
 92 | class AnalysisMulitpleModels:
 93 | 
 94 |     """
 95 |     Base class for the robustness analysis on multiple neural networks.
 96 |     """
 97 | 
 98 |     def __init__(self):
 99 |         pass
100 | 
101 |     def printBasicResult(self, analyzer, budget_two, budget_inf):
102 |         """
103 |         Print out (i) the accuracy of a neural network on MNIST and 
104 |         (ii) its robustness to FGSM and PGD.
105 |         """
106 | 
107 |         correct, total = analyzer.testAccuracy()
108 |         print("Test accuracy: {} / {} = {}".format(correct, total, correct / total))
109 | 
110 |         correct, total = analyzer.adversarialAccuracy(
111 |             'FGSM', budget=budget_two, norm=2)
112 |         print("Adversarial accuracy with respect to FGSM-2: {} / {} = {}".format(correct,
113 |                                                                                  total, correct / total))
114 |         correct, total = analyzer.adversarialAccuracy(
115 |             'FGSM', budget=budget_inf, norm=np.inf)
116 |         print("Adversarial accuracy with respect to FGSM-inf: {} / {} = {}".format(
117 |             correct, total, correct / total))
118 | 
119 |         correct, total = analyzer.adversarialAccuracy(
120 |             'PGD', budget=budget_two, norm=2)
121 |         print("Adversarial accuracy with respect to PGD-2: {} / {} = {}".format(correct,
122 |                                                                                 total, correct / total))
123 |         correct, total = analyzer.adversarialAccuracy(
124 |             'PGD', budget=budget_inf, norm=np.inf)
125 |         print("Adversarial accuracy with respect to PGD-inf: {} / {} = {}".format(
126 |             correct, total, correct / total))
127 | 
128 |     def plotPerturbationLineGraph(self, ax, analyzers, labels, adversarial_type, budget, norm, bins, record_file):
129 |         """
130 |         Plot a line graph of the adversarial attack success rates with various
131 |         budgets for an adversarial attack. 
132 | 
133 |         Arguments:
134 |             ax: Axes object (in pyplot) where a plot a drawn
135 |             analyzers: list of Analysis objects
136 |             labels: list of labels of the Analysis objects in the input list
137 |             bins: the number of different budgets to examine
138 |             record_file: file object to be used to record the adversarial
139 |                 attack success rates
140 |         """
141 | 
142 |         length = len(analyzers)
143 |         results = [[] for i in range(length)]
144 |         increment_size = budget / bins if bins != 0 else None
145 |         perturbations = [i * increment_size for i in range(bins+1)]
146 |         assert length <= 10
147 |         # Colours of lines in a graph; this colour map only has ten colours.
148 |         cmap = plt.get_cmap("tab10")
149 | 
150 |         # Evaluate the test accuracy; i.e. robustness against adverarial
151 |         # attacks with the adversarial budget of 0.
152 |         for j in range(length):
153 |             analyzer = analyzers[j]
154 |             correct, total = analyzer.testAccuracy()
155 |             results[j].append(1 - correct / total)
156 |         print("0-th iteration complete")
157 | 
158 |         # Evaluate the robustness against adversarial attacks with non-zero
159 |         # budget.
160 |         for i in range(bins):
161 |             for j in range(length):
162 |                 analyzer = analyzers[j]
163 |                 correct, total = analyzer.adversarialAccuracy(
164 |                     adversarial_type, increment_size * (i+1), norm)
165 |                 results[j].append(1 - correct / total)
166 |             print("{}-th iteration complete".format(i+1))
167 | 
168 |         # Record the results in a log if required
169 |         if record_file is not None:
170 |             for i in range(length):
171 |                 analyzer = analyzers[i]
172 |                 record_file.write(
173 |                     "Adversarial attack on {}\n".format(analyzer.filepath))
174 |                 record_file.write(
175 |                     "Attack type: {}; Norm: {}\n".format(adversarial_type, norm))
176 |                 record_file.write(
177 |                     "Budget: {}; Bins: {}\n".format(budget, bins))
178 |                 zipped_reuslt = list(zip(perturbations, results[i]))
179 |                 record_file.write(str(zipped_reuslt) + "\n\n")
180 | 
181 |         for i in range(length):
182 |             ax.plot(perturbations, results[i], color=cmap(
183 |                 i), linestyle='-', label=labels[i])
184 |         ax.legend()
185 |         ax.set_xlabel("Perturbation size")
186 |         ax.set_ylabel("Adversarial attack success rate")
187 |         ax.set_xlim(0, budget)
188 |         ax.set_yscale('log')
189 | 


--------------------------------------------------------------------------------
/util_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | from util_MNIST import retrieveMNISTTrainingData, retrieveMNISTTestData, displayImage
  6 | 
  7 | img_rows, img_cols = 28, 28
  8 | 
  9 | 
 10 | class SimpleNeuralNet(nn.Module):
 11 |     """
 12 |     Simple neural network consisting of one hidden layer for MNIST.
 13 |     This neural network is only used as a toy example. 
 14 |     """
 15 | 
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         self.conv1 = nn.Conv2d(1, 2, 4)
 19 |         self.fc1 = nn.Linear(2 * 25 * 25, 10)
 20 | 
 21 |     def forward(self, x):
 22 |         output = F.relu(self.conv1(x))
 23 |         output = output.view(-1, self.num_flat_features(output))
 24 |         output = self.fc1(output)
 25 |         return output
 26 | 
 27 |     def num_flat_features(self, x):
 28 |         size = x.size()[1:]
 29 |         num_features = 1
 30 |         for s in size:
 31 |             num_features *= s
 32 |         return num_features
 33 | 
 34 | 
 35 | class MNISTClassifier(nn.Module):
 36 |     """
 37 |     Convolutional neural network used in the tutorial for CleverHans.
 38 |     This neural network is also used in experiments by Staib et al. (2017) and
 39 |     Sinha et al. (2018).
 40 |     """
 41 | 
 42 |     def __init__(self, nb_filters=64, activation='relu'):
 43 |         """
 44 |         The parameters in convolutional layers and a fully connected layer are
 45 |         initialized using the Glorot/Xavier initialization, which is the
 46 |         default initialization method in Keras.
 47 |         """
 48 | 
 49 |         super().__init__()
 50 |         self.activation = activation
 51 |         self.conv1 = nn.Conv2d(1, nb_filters, kernel_size=(
 52 |             8, 8), stride=(2, 2), padding=(3, 3))
 53 |         nn.init.xavier_uniform_(self.conv1.weight)
 54 |         self.conv2 = nn.Conv2d(nb_filters, nb_filters * 2,
 55 |                                kernel_size=(6, 6), stride=(2, 2))
 56 |         nn.init.xavier_uniform_(self.conv2.weight)
 57 |         self.conv3 = nn.Conv2d(
 58 |             nb_filters * 2, nb_filters * 2, kernel_size=(5, 5), stride=(1, 1))
 59 |         nn.init.xavier_uniform_(self.conv3.weight)
 60 |         self.fc1 = nn.Linear(nb_filters * 2, 10)
 61 |         nn.init.xavier_uniform_(self.fc1.weight)
 62 | 
 63 |     def forward(self, x):
 64 |         outputs = self.conv1(x)
 65 |         outputs = self.applyActivation(outputs)
 66 |         outputs = self.conv2(outputs)
 67 |         outputs = self.applyActivation(outputs)
 68 |         outputs = self.conv3(outputs)
 69 |         outputs = self.applyActivation(outputs)
 70 |         outputs = outputs.view((-1, self.num_flat_features(outputs)))
 71 |         outputs = self.fc1(outputs)
 72 |         # Note that because we use CrosEntropyLoss, which combines
 73 |         # nn.LogSoftmax and nn.NLLLoss, we do not need a softmax layer as the
 74 |         # last layer.
 75 |         return outputs
 76 | 
 77 |     def applyActivation(self, x):
 78 |         if self.activation == 'relu':
 79 |             return F.relu(x)
 80 |         elif self.activation == 'elu':
 81 |             return F.elu(x)
 82 |         else:
 83 |             raise ValueError("The activation function is not valid.")
 84 | 
 85 |     def num_flat_features(self, x):
 86 |         size = x.size()[1:]
 87 |         num_features = 1
 88 |         for s in size:
 89 |             num_features *= s
 90 |         return num_features
 91 | 
 92 | 
 93 | def trainModel(model, loss_criterion, optimizer, epochs=25, filepath=None):
 94 |     # USe GPU for computation if it is available.
 95 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 96 |     model.to(device)  # Load the neural network on GPU if it is available
 97 |     print("The neural network is now loaded on {}.".format(device))
 98 | 
 99 |     running_loss = 0.0
100 |     train_loader = retrieveMNISTTrainingData()
101 |     period = 20
102 |     for epoch in range(epochs):
103 |         for i, data in enumerate(train_loader, 0):
104 |             inputs, labels = data
105 |             # Load images and labels on a device
106 |             inputs, labels = inputs.to(device), labels.to(device)
107 |             optimizer.zero_grad()
108 |             outputs = model(inputs)
109 |             loss = loss_criterion(outputs, labels)
110 |             loss.backward()
111 |             running_loss += loss.item()
112 |             optimizer.step()
113 |             if i % period == period - 1:
114 |                 print("Epoch: {}, iteration: {}, loss: {}".format(
115 |                     epoch, i, running_loss / period))
116 |                 running_loss = 0.0
117 |     print("Training is complete.")
118 |     if filepath is not None:
119 |         torch.save(model.state_dict(), filepath)
120 |         print("The model is now saved at {}.".format(filepath))
121 | 
122 | 
123 | def loadModel(model, filepath):
124 |     """
125 |     Load the set of parameters into the given model.
126 | 
127 |     Arguments:
128 |         model: a model whose paramters are to be loaded. If model is None,
129 |             the file should contain information about the architecture of
130 |             the model as well as its parameters. 
131 |         filepath: path to the .pt file that stores the parameters (and
132 |             possibly also the neural network's architecutre) to be loaded
133 |     """
134 | 
135 |     # Load the model on GPU if it is available.
136 |     # Otherwise, use CPU.
137 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
138 |     if model is None:
139 |         model = torch.load(filepath)
140 |     else:
141 |         model.load_state_dict(torch.load(filepath, map_location=device))
142 |     return model
143 | 
144 | 
145 | def evaluateModelAccuracy(model):
146 |     # Use GPU for computation if it is available
147 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
148 | 
149 |     test_loader = retrieveMNISTTestData(batch_size=128)
150 |     correct = 0
151 |     total = 0
152 |     with torch.no_grad():
153 |         for data in test_loader:
154 |             images, labels = data
155 |             images, labels = images.to(device), labels.to(device)
156 |             outputs = model(images)
157 |             softmax = nn.Softmax(dim=1)
158 |             _, predicted = torch.max(softmax(outputs).data, 1)
159 |             total += labels.size(0)
160 |             correct += (predicted == labels).sum().item()
161 |     return correct, total
162 | 
163 | 
164 | def evaluateModelSingleInput(model, image):
165 |     # Use GPU for computation if it is available
166 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
167 | 
168 |     input = image.view((1, 1, img_rows, img_cols)).to(device)
169 |     otuput = model(input)
170 |     _, prediction = torch.max(otuput.data, 1)
171 |     return prediction.item()
172 | 
173 | 
174 | if __name__ == "__main__":
175 | 
176 |     def experiment(activation, optimizer_type):
177 |         epochs = 25
178 |         # Note that nn.CrosEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.
179 |         loss_criterion = nn.CrossEntropyLoss()
180 |         learning_rate = 0.001
181 | 
182 |         model = MNISTClassifier(activation=activation)
183 |         if optimizer_type == "adam":
184 |             optimizer = optim.Adam(model.parameters(), lr=learning_rate)
185 |         elif optimizer_type == "sgd":
186 |             optimizer = optim.SGD(model.parameters(), lr=learning_rate)
187 |         else:
188 |             raise ValueError("The optimizer type is not recognized.")
189 |         
190 |         # The file paths are only valid in UNIX systems.
191 |         folderpath = "./ERM_models/"
192 |         filename = "MNISTClassifier_{}_{}".format(optimizer_type, activation)
193 | 
194 |         trainModel(model, loss_criterion, optimizer,
195 |            epochs=epochs, filepath=folderpath + filename)
196 | 
197 |     experiment("elu", "adam")
198 |     experiment("relu", "adam")
199 |     experiment("elu", "sgd")
200 |     experiment("relu", "sgd")
201 | 


--------------------------------------------------------------------------------