├── .gitignore ├── README.md ├── adversarial_attack_DRO.py ├── analysis_main.py ├── generate_sample_adversarial.py ├── loss_functions.py ├── sanity_check_attack.py ├── sanity_check_foolbox.py ├── sanity_check_keras.py ├── util_MNIST.py ├── util_adversarial_attack.py ├── util_adversarial_training.py ├── util_analysis.py └── util_model.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__/ 3 | data/ 4 | DRO_models/ 5 | ERM_models/ 6 | Loss_models/ 7 | images/ 8 | sample_images/ 9 | records/ 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Optimal loss functions for distributionally robust optimization (DRO) of neural networks 2 | 3 | ## Project overview 4 | 5 | The code in this repository is used to conduct empirical studies for the Part C Computer Science project at the University of Oxford. 6 | 7 | As neural networks are increasingly widely applied in safety-critical systems (e.g. autonomous vehicles), it is essential to ensure safety of systems involving neural networks. 8 | It has been discovered that despite their stellar generalization perfoemance, neural networks are surprisingly vulnerable to so-called adversarial perturbations in computer vision; i.e. small and oftentimes imperceptible perturbations to an input image that can trick the neural networks into misclassification of the image. 9 | One promising approach to improving robutness of neural networks to adversarial perturbations is adversarial training, whereby neural networks are trained using not only the original training data but also adversarial examples that can be generated from the training data. 10 | 11 | In this project, I investigate the relationship between (i) loss functions used in training feedforward neural networks and (ii) the robustness of neural networks that are trained by distributionally robust optimization (DRO), which is a variant of adversarial traning. 12 | 13 | I specifically consider the following DRO algorithms: 14 | 1. WRM developed by Sinha et al. ([paper](https://arxiv.org/abs/1710.10571)) 15 | 2. FWDRO developed by Staib and Jegelka ([paper](https://machine-learning-and-security.github.io/papers/mlsec17_paper_30.pdf)) 16 | 3. Distributional projected gradient descent (PGD). 17 | 18 | The loss functions examined in this project come from the paper by Carlini and Wagner ([paper](https://arxiv.org/abs/1608.04644)). 19 | -------------------------------------------------------------------------------- /adversarial_attack_DRO.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from util_MNIST import retrieveMNISTTrainingData 7 | from util_model import SimpleNeuralNet, MNISTClassifier 8 | from util_adversarial_training import AdversarialTraining 9 | 10 | """ 11 | This module contains the implementation of the three DRO algorithms 12 | studied in this project: FWDRO by Staib et al., WRM by Sinha et al., 13 | and distributional PGD. 14 | """ 15 | 16 | class ProjetcedDRO(AdversarialTraining): 17 | """ 18 | Execute distributionally robust optimization (DRO) using the Euclidean 19 | projection in the adversarial attack. This class is applicable only when 20 | the underlying distance is the L2-norm and the distributional distance is 21 | the 2-Wasserstein distance (i.e. W2). 22 | """ 23 | 24 | def __init__(self, model, loss_criterion): 25 | super().__init__(model, loss_criterion) 26 | 27 | def attack(self, budget, data, steps=15): 28 | 29 | def randomStart(center, epsilon): 30 | """ 31 | Select a random point that is on the perimeter of a L2-ball. 32 | This point is where the L2-norm-ball constraint is tight. 33 | 34 | Arguments: 35 | center: origin of the L2-ball 36 | epsilon: radius of the L2-ball 37 | Returns: 38 | None 39 | 40 | The input 'center' is modified in place. 41 | """ 42 | 43 | direction = torch.rand(center.size()) * 2 - 1 44 | direction = direction.to(self.device) 45 | length = torch.norm(direction, p=2).item() 46 | center.data.add_(epsilon / length * direction) 47 | center.data.clamp_(0, 1) 48 | 49 | lr = budget / 5 50 | images, labels = data 51 | # Load an initialized batch of adversarial examples on a device 52 | images_adv = images.clone().detach().to(self.device) 53 | images_adv.requires_grad_(True) 54 | 55 | # images.size()[0] corresponds to the batch size. 56 | desirable_distance = budget * math.sqrt(images.size()[0]) 57 | 58 | # Choose a random strating point where the constraint for perturbations 59 | # is tight. Without randomly choosing a starting point, the adversarial 60 | # attack fails most of the time because the loss function is flat near 61 | # the training input, which was used in training the neural network. 62 | randomStart(images_adv, budget) 63 | for i in range(steps): 64 | if images_adv.grad is not None: 65 | images_adv.grad.data.zero_() 66 | outputs = self.model(images_adv) 67 | loss = self.loss_criterion(outputs, labels) 68 | loss.backward() 69 | images_adv.data.add_(lr * images_adv.grad) 70 | diff_tensor = images.detach() - images_adv.detach() 71 | diff_tensor = diff_tensor.to(self.device) 72 | distance = torch.norm(diff_tensor, p=2).item() 73 | 74 | # Inside this conditional statement, we can be certain that 75 | # distance > 0, provided that budget > 0. 76 | # Hence, there is no risk of division by 0. 77 | if distance > desirable_distance: 78 | images_adv.data.add_( 79 | (1 - (desirable_distance / distance)) * diff_tensor) 80 | images_adv.data.clamp_(0, 1) 81 | return images_adv, labels 82 | 83 | 84 | class LagrangianDRO(AdversarialTraining): 85 | """ 86 | Execute DRO using the Lagrangian relaxation of the original theoretical 87 | formulation of DRO. This approach is developed by Sinha, Namkoong, and 88 | Duchi (2018). 89 | """ 90 | 91 | def __init__(self, model, loss_criterion, cost_function): 92 | """ 93 | Initialize instance variables 94 | 95 | Arguments: 96 | cost_function: underlying distance metric for the instance space 97 | """ 98 | 99 | super().__init__(model, loss_criterion) 100 | self.cost_function = cost_function 101 | 102 | def attack(self, budget, data, steps=15): 103 | """ 104 | Launch an adversarial attack using the Lagrangian relaxation. 105 | 106 | Arguments: 107 | budget: gamma in the original paper. Note that this parameter is 108 | different from the budget parameter in other DRO classes. 109 | """ 110 | 111 | images, labels = data 112 | images_adv = images.clone().detach().to(self.device) 113 | images_adv.requires_grad_(True) 114 | 115 | for i in range(steps): 116 | if images_adv.grad is not None: 117 | images_adv.grad.data.zero_() 118 | outputs = self.model(images_adv) 119 | loss = self.loss_criterion( 120 | outputs, labels) - budget * self.cost_function(images, images_adv) 121 | loss.backward() 122 | images_adv.data.add_(1 / math.sqrt(i+1) * images_adv.grad) 123 | images_adv.data.clamp_(0, 1) 124 | return images_adv, labels 125 | 126 | 127 | class FrankWolfeDRO(AdversarialTraining): 128 | """ 129 | Execute DRO using the Frank-Wolfe method together with the stochastic 130 | block coordinate descent (BCD). This approach is developed by Staib and 131 | Jegelka (2017). 132 | """ 133 | 134 | def __init__(self, model, loss_criterion, p, q): 135 | """ 136 | Initialize instance variables. 137 | 138 | Arguments: 139 | p: distributional distance will be Wp 140 | q: underlying distance for the instance space will be Lq 141 | """ 142 | 143 | super().__init__(model, loss_criterion) 144 | assert p > 1 and q > 1 145 | self.p = p 146 | self.q = q 147 | 148 | def attack(self, budget, data, steps=15): 149 | """ 150 | Launch an adversarial attack using the Frank-Wolfe method. 151 | The algorithm is taken from 'Convex Optimization: Algorithms and 152 | Complexity' by Bubeck. 153 | """ 154 | 155 | images, labels = data 156 | images_adv = images.clone().detach().to(self.device) 157 | images_adv.requires_grad_(True) 158 | 159 | for i in range(steps): 160 | if images_adv.grad is not None: 161 | images_adv.grad.zero_() 162 | outputs = self.model(images_adv) 163 | loss = self.loss_criterion(outputs, labels) 164 | loss.backward() 165 | 166 | # desitnation corresponds to y_t in the paper by Bubeck. 167 | destination = images_adv.data + \ 168 | self.getOptimalDirection(budget=budget, data=images_adv.grad) 169 | destination = destination.to(self.device) 170 | gamma = 2 / (i + 2) 171 | images_adv.data = (1 - gamma) * \ 172 | images_adv.data + gamma * destination 173 | images_adv.data.clamp_(0, 1) 174 | return images_adv, labels 175 | 176 | def getOptimalDirection(self, budget, data): 177 | """ 178 | Calculate the minimizer of a linear subproblem in the Frank-Wolfe 179 | method. The objective function is linear, and the constraint is 180 | a mixed-norm ball. 181 | 182 | Instead of calculating a local constraint, I use the same budget 183 | parameter in every iteration. 184 | 185 | Arguments: 186 | budget: epsilon in the paper by Staib et al. 187 | data: gradient of the total loss with respect to the current 188 | batch of adversarial examples. This corresponds to C in 189 | Appendix B of the paper by Staib et al. 190 | 191 | Returns: 192 | X in Appendix B of Staib et al.'s paper 193 | """ 194 | 195 | # The number of samples 196 | batch_size = data.size()[0] 197 | 198 | # 'directions' corresponds to v's in Staib et al.'s paper. 199 | directions = data.clone().detach().view((batch_size, -1)) 200 | directions = directions.to(self.device) 201 | 202 | if self.q == np.inf: 203 | directions = directions.sign() 204 | elif self.q > 1: 205 | normalize_dim = 1 / (self.q - 1) 206 | directions.pow_(normalize_dim) 207 | directions = F.normalize(directions, p=self.q, dim=1) 208 | else: 209 | raise ValueError("The value of q must be larger than 1.") 210 | 211 | # This corresponds to a's in the original paper. 212 | products = [] 213 | for i, direction in enumerate(directions): 214 | sample = data[i].view(-1) 215 | products.append(torch.dot(direction, sample)) 216 | products = torch.stack(products) 217 | products = products.to(self.device) 218 | 219 | # This corresponds to epsilons in the original paper. 220 | size_factors = products.clone().detach() 221 | size_factors = size_factors.to(self.device) 222 | if self.p == np.inf: 223 | size_factors = size_factors.sign() 224 | elif self.p > 1: 225 | normalize_dim = 1 / (self.p - 1) 226 | size_factors.pow_(normalize_dim) 227 | distance = torch.norm(size_factors, p=self.p).item() 228 | size_factors = size_factors / distance # This is now normalized. 229 | else: 230 | raise ValueError("The value of p must be larger than 1.") 231 | 232 | outputs = [] 233 | for i, size_factor in enumerate(size_factors): 234 | outputs.append(directions[i] * size_factor * budget) 235 | result = torch.stack(outputs).view(data.size()) 236 | return result.to(self.device) 237 | 238 | 239 | def trainDROModel(dro_type, epochs, steps_adv, budget, activation, batch_size, loss_criterion, cost_function=None): 240 | """ 241 | Train a neural network using one of the following DRO methods: 242 | - PGD 243 | - Lagrangian relaxation based method developed by Sinha et al. 244 | This is also called WRM. 245 | - the Frank-Wolfe method based approach developed by Staib et al. 246 | """ 247 | 248 | model = MNISTClassifier(activation=activation) 249 | if dro_type == 'PGD': 250 | train_module = ProjetcedDRO(model, loss_criterion) 251 | elif dro_type == 'Lag': 252 | assert cost_function is not None 253 | train_module = LagrangianDRO(model, loss_criterion, cost_function) 254 | elif dro_type == 'FW': 255 | train_module = FrankWolfeDRO(model, loss_criterion, p=2, q=2) 256 | else: 257 | raise ValueError("The type of DRO is not valid.") 258 | 259 | train_module.train(budget=budget, batch_size=batch_size, 260 | epochs=epochs, steps_adv=steps_adv) 261 | folderpath = "./DRO_models/" 262 | filepath = folderpath + \ 263 | "{}_DRO_activation={}_epsilon={}.pt".format( 264 | dro_type, activation, budget) 265 | torch.save(model.state_dict(), filepath) 266 | print("A neural network adversarially trained using {} is now saved at {}.".format( 267 | dro_type, filepath)) 268 | 269 | 270 | if __name__ == "__main__": 271 | epochs = 25 272 | steps_adv = 15 273 | epsilon = 2.8 274 | gammas = [0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0] 275 | batch_size = 128 276 | loss_criterion = nn.CrossEntropyLoss() 277 | 278 | def cost_function(x, y): return torch.dist(x, y, p=2) ** 2 279 | 280 | trainDROModel('PGD', epochs, steps_adv, epsilon, 281 | 'relu', batch_size, loss_criterion) 282 | trainDROModel('FW', epochs, steps_adv, epsilon, 283 | 'relu', batch_size, loss_criterion) 284 | 285 | trainDROModel('PGD', epochs, steps_adv, epsilon, 286 | 'elu', batch_size, loss_criterion) 287 | trainDROModel('FW', epochs, steps_adv, epsilon, 288 | 'elu', batch_size, loss_criterion) 289 | 290 | for gamma in gammas: 291 | trainDROModel('Lag', epochs, steps_adv, gamma, 'relu', 292 | batch_size, loss_criterion, cost_function=cost_function) 293 | trainDROModel('Lag', epochs, steps_adv, gamma, 'elu', 294 | batch_size, loss_criterion, cost_function=cost_function) 295 | -------------------------------------------------------------------------------- /analysis_main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from util_model import MNISTClassifier 4 | from util_analysis import Analysis, AnalysisMulitpleModels 5 | 6 | """ 7 | This module contains classes for robustness analysis of neural networks. 8 | """ 9 | 10 | 11 | class ERMModelsAnalysis(AnalysisMulitpleModels): 12 | 13 | """ 14 | Class for the robustness analysis on neural networks trained by ERM. 15 | """ 16 | 17 | def __init__(self): 18 | model_relu = MNISTClassifier(activation='relu') 19 | model_elu = MNISTClassifier(activation='elu') 20 | model_sgd_relu = MNISTClassifier(activation='relu') 21 | model_sgd_elu = MNISTClassifier(activation='elu') 22 | 23 | # These file paths only work on UNIX. 24 | folderpath = "./ERM_models/" 25 | filename_relu = "MNISTClassifier_adam_relu.pt" 26 | filename_elu = "MNISTClassifier_adam_elu.pt" 27 | filename_sgd_relu = "MNISTClassifier_sgd_relu.pt" 28 | filename_sgd_elu = "MNISTClassifier_sgd_elu.pt" 29 | 30 | self.analyzer_relu = Analysis(model_relu, folderpath + filename_relu) 31 | self.analyzer_elu = Analysis(model_elu, folderpath + filename_elu) 32 | self.analyzer_sgd_relu = Analysis(model_sgd_relu, folderpath + filename_sgd_relu) 33 | self.analyzer_sgd_elu = Analysis(model_sgd_elu, folderpath + filename_sgd_elu) 34 | 35 | def plotERMModels(self, budget, norm, bins): 36 | """ 37 | Produce a line graph of adversarial attack success rates for various 38 | budgets. 39 | """ 40 | 41 | analyzers = [self.analyzer_relu, self.analyzer_elu, 42 | self.analyzer_sgd_relu, self.analyzer_sgd_elu] 43 | labels = ['ReLU Adam', 'ELU Adam', 'ReLU SGD', 'ELU SGD'] 44 | 45 | fig, (ax1, ax2) = plt.subplots(1, 2) 46 | 47 | record_filepath = "./records/ERM_analysis_norm={}.txt".format( 48 | "L2" if norm == 2 else "Linf") 49 | with open(record_filepath, mode='w') as f: 50 | self.plotPerturbationLineGraph( 51 | ax1, analyzers, labels, "FGSM", budget, norm, bins, f) 52 | self.plotPerturbationLineGraph( 53 | ax2, analyzers, labels, "PGD", budget, norm, bins, f) 54 | 55 | ax1.set_title("FGSM") 56 | ax2.set_title("PGD") 57 | plt.tight_layout() 58 | 59 | width, height = fig.get_size_inches() 60 | fig.set_size_inches(width * 1.8, height) 61 | 62 | # plt.show() 63 | filepath = "./images/ERM_norm={}.png".format( 64 | "L2" if norm == 2 else "Linf") 65 | plt.savefig(filepath, dpi=300) 66 | print("Graph now saved at {}".format(filepath)) 67 | plt.close() 68 | 69 | 70 | class DROModelsAnalysis(AnalysisMulitpleModels): 71 | 72 | """ 73 | Class for the robustness analysis on the neural networks trained by DRO. 74 | """ 75 | 76 | def __init__(self): 77 | self.gammas = [0.0001, 0.0003, 0.001, 78 | 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0] 79 | 80 | def initializeLagAnalyzers(): 81 | """ 82 | Initialize Analysis objects for neural networks trained by the DRO 83 | algorithm proposed by Sinha et al. 84 | """ 85 | 86 | folderpath = "./DRO_models/" 87 | Lag_relu_analyzers = [] 88 | Lag_elu_analyzers = [] 89 | length = len(self.gammas) 90 | for i in range(length): 91 | gamma = self.gammas[i] 92 | filepath_relu = folderpath + \ 93 | "{}_DRO_activation={}_epsilon={}.pt".format( 94 | "Lag", "relu", gamma) 95 | filepath_elu = folderpath + \ 96 | "{}_DRO_activation={}_epsilon={}.pt".format( 97 | "Lag", "elu", gamma) 98 | model_relu = MNISTClassifier(activation='relu') 99 | model_elu = MNISTClassifier(activation='elu') 100 | Lag_relu_analyzers.append(Analysis(model_relu, filepath_relu)) 101 | Lag_elu_analyzers.append(Analysis(model_elu, filepath_elu)) 102 | return Lag_relu_analyzers, Lag_elu_analyzers 103 | 104 | def initializeAnalyzers(dro_type, epsilon): 105 | """ 106 | Initialize Analysis objects for neural networks trained by the 107 | Frank-Wolfe method and PGD 108 | """ 109 | 110 | folderpath = "./DRO_models/" 111 | filepath_relu = folderpath + \ 112 | "{}_DRO_activation={}_epsilon={}.pt".format( 113 | dro_type, "relu", epsilon) 114 | filepath_elu = folderpath + \ 115 | "{}_DRO_activation={}_epsilon={}.pt".format( 116 | dro_type, "elu", epsilon) 117 | model_relu = MNISTClassifier(activation='relu') 118 | model_elu = MNISTClassifier(activation='elu') 119 | analyzer_relu = Analysis(model_relu, filepath_relu) 120 | analyzer_elu = Analysis(model_elu, filepath_elu) 121 | return analyzer_relu, analyzer_elu 122 | 123 | self.Lag_relu_analyzers, self.Lag_elu_analyzers = initializeLagAnalyzers() 124 | self.FW_relu_analyzer, self.FW_elu_analyzer = initializeAnalyzers( 125 | dro_type='FW', epsilon=2.8) 126 | self.PGD_relu_analyzer, self.PGD_elu_analyzer = initializeAnalyzers( 127 | dro_type='PGD', epsilon=2.8) 128 | 129 | def plotLagDROModels(self, adversarial_type, budget, norm, bins): 130 | """ 131 | Produce line graphs of adversarial attack success rates on neural 132 | networks trained by WRM with various values of gamma. 133 | """ 134 | 135 | # Pyplot supports LaTex syntax. 136 | labels = [r"$\gamma = {}$".format(gamma) for gamma in self.gammas] 137 | 138 | fig, (ax1, ax2) = plt.subplots(1, 2) 139 | 140 | record_filepath = "./records/DRO_analysis_{}_norm={}.txt".format( 141 | adversarial_type, "L2" if norm == 2 else "Linf") 142 | with open(record_filepath, mode='w') as f: 143 | self.plotPerturbationLineGraph( 144 | ax1, self.Lag_relu_analyzers, labels, adversarial_type, budget, norm, bins, f) 145 | self.plotPerturbationLineGraph( 146 | ax2, self.Lag_elu_analyzers, labels, adversarial_type, budget, norm, bins, f) 147 | print("Record stored at {}".format(record_filepath)) 148 | 149 | ax1.set_title("ReLU") 150 | ax2.set_title("ELU") 151 | plt.tight_layout() 152 | 153 | width, height = fig.get_size_inches() 154 | fig.set_size_inches(width * 1.8, height) 155 | 156 | # plt.show() 157 | filepath = "./images/Lag_{}_norm={}.png".format( 158 | adversarial_type, "L2" if norm == 2 else "Linf") 159 | plt.savefig(filepath, dpi=300) 160 | print("Graph now saved at {}".format(filepath)) 161 | plt.close() 162 | 163 | def compareLagDROModels(self, budget_two, budget_inf, bins): 164 | """ 165 | Compare the robustness of those neural networks trained by WRM with 166 | different values of gamma by using five types of adversarial attacks: 167 | - FGSM with the L-inf norm 168 | - FGSM with the L-2 norm 169 | - pointwise PGD with the L-inf norm 170 | - pointwise PGD with the L-2 norm 171 | - distributional PGD. 172 | """ 173 | 174 | self.plotLagDROModels("FGSM", budget_inf, np.inf, bins) 175 | self.plotLagDROModels("FGSM", budget_two, 2, bins) 176 | 177 | self.plotLagDROModels("PGD", budget_inf, np.inf, bins) 178 | self.plotLagDROModels("PGD", budget_two, 2, bins) 179 | 180 | self.plotLagDROModels("distributional_PGD", budget_two, 2, bins) 181 | 182 | def plotDROModels(self, budget, norm, bins): 183 | """ 184 | Compare the robustness of neural networks trained by all three DRO 185 | algorithms: WRM, the Frank-Wolfe method, and PGD. 186 | """ 187 | 188 | # The optimal gamma for both ReLu and ELU has been determined to be 1.0. 189 | optimal_gamma = 1.0 190 | index_optimal_gamma = self.gammas.index(optimal_gamma) 191 | LagAnalyzers = [self.Lag_relu_analyzers[index_optimal_gamma], 192 | self.Lag_elu_analyzers[index_optimal_gamma]] 193 | FWandPGDanalyzers = [self.FW_relu_analyzer, self.FW_elu_analyzer, 194 | self.PGD_relu_analyzer, self.PGD_elu_analyzer] 195 | analyzers = LagAnalyzers + FWandPGDanalyzers 196 | labels = ["Lag ReLU", "Lag ELU", "FW ReLU", 197 | "FW ELU", "PGD ReLU", "PGD ELU"] 198 | 199 | fig, (ax1, ax2) = plt.subplots(1, 2) 200 | self.plotPerturbationLineGraph( 201 | ax1, analyzers, labels, "FGSM", budget, norm, bins, record_file=None) 202 | self.plotPerturbationLineGraph( 203 | ax2, analyzers, labels, "PGD", budget, norm, bins, record_file=None) 204 | 205 | ax1.set_title("FGSM") 206 | ax2.set_title("PGD") 207 | plt.tight_layout() 208 | 209 | width, height = fig.get_size_inches() 210 | fig.set_size_inches(width * 1.8, height) 211 | 212 | # plt.show() 213 | filepath = "./images/DRO_norm={}.png".format( 214 | "L2" if norm == 2 else "Linf") 215 | plt.savefig(filepath, dpi=300) 216 | print("Graph now saved at {}".format(filepath)) 217 | plt.close() 218 | 219 | 220 | class LossFunctionsAnalysis(AnalysisMulitpleModels): 221 | 222 | """ 223 | Class for the robustness analysis various loss functions 224 | """ 225 | 226 | def __init__(self): 227 | 228 | def initializeAnalyzers(dro_type, activation, budget): 229 | analyzers = [] 230 | filepath = folderpath = "./Loss_models/" 231 | for i in range(1, 8): 232 | filepath = folderpath + "{}_DRO_activation={}_epsilon={}_loss={}.pt".format( 233 | dro_type, activation, budget, "f_{}".format(i)) 234 | model = MNISTClassifier(activation=activation) 235 | analyzers.append(Analysis(model, filepath)) 236 | return analyzers 237 | 238 | epsilon = 2.8 239 | optimal_gamma = 1.0 240 | self.FWAnalyzers = initializeAnalyzers( 241 | "FW", activation='relu', budget=epsilon) 242 | self.PGDAnalyzers = initializeAnalyzers( 243 | "PGD", activation='relu', budget=epsilon) 244 | self.LagAnalyzers = initializeAnalyzers( 245 | "Lag", activation='relu', budget=optimal_gamma) 246 | 247 | def plotLosses(self, training_type, budget, norm, bins, record): 248 | labels = [r"$f_{}$".format(i) for i in range(1, 8)] 249 | 250 | fig, (ax1, ax2) = plt.subplots(1, 2) 251 | 252 | if training_type == "PGD": 253 | analyzers = self.PGDAnalyzers 254 | elif training_type == "FW": 255 | analyzers = self.FWAnalyzers 256 | elif training_type == "Lag": 257 | analyzers = self.LagAnalyzers 258 | else: 259 | raise ValueError("The type of DRO is invalid.") 260 | 261 | if record: 262 | record_filepath = "./records/Loss_analysis_DRO_type={}_norm={}budget={}.txt".format( 263 | training_type, "L2" if norm == 2 else "Linf", budget) 264 | with open(record_filepath, "w") as f: 265 | self.plotPerturbationLineGraph( 266 | ax1, analyzers, labels, "FGSM", budget, norm, bins, f) 267 | self.plotPerturbationLineGraph( 268 | ax2, analyzers, labels, "PGD", budget, norm, bins, f) 269 | print("Record stored at {}".format(record_filepath)) 270 | else: 271 | self.plotPerturbationLineGraph( 272 | ax1, analyzers, labels, "FGSM", budget, norm, bins, None) 273 | self.plotPerturbationLineGraph( 274 | ax2, analyzers, labels, "PGD", budget, norm, bins, None) 275 | 276 | ax1.set_title("FGSM") 277 | ax2.set_title("PGD") 278 | plt.tight_layout() 279 | 280 | width, height = fig.get_size_inches() 281 | fig.set_size_inches(width * 1.8, height) 282 | 283 | # plt.show() 284 | filepath = "./images/Loss_{}_norm={}.png".format( 285 | training_type, "L2" if norm == 2 else "Linf") 286 | plt.savefig(filepath, dpi=300) 287 | print("Graph now saved at {}".format(filepath)) 288 | plt.close() 289 | 290 | def compareLosses(self, budget_two, budget_inf, bins, record=True): 291 | """ 292 | Compare the seven loss functions in terms of robustness of the 293 | resulting neural networks. 294 | """ 295 | 296 | self.plotLosses("PGD", budget_inf, np.inf, bins, record) 297 | self.plotLosses("PGD", budget_two, 2, bins, record) 298 | 299 | self.plotLosses("FW", budget_inf, np.inf, bins, record) 300 | self.plotLosses("FW", budget_two, 2, bins, record) 301 | 302 | self.plotLosses("Lag", budget_inf, np.inf, bins, record) 303 | self.plotLosses("Lag", budget_two, 2, bins, record) 304 | 305 | if __name__ == '__main__': 306 | budget_two = 4.0 307 | budget_inf = 0.4 308 | bins = 20 309 | 310 | erm_analysis = ERMModelsAnalysis() 311 | erm_analysis.plotERMModels(budget=budget_two, norm=2, bins=bins) 312 | erm_analysis.plotERMModels(budget=budget_inf, norm=np.inf, bins=bins) 313 | 314 | dro_analysis = DROModelsAnalysis() 315 | dro_analysis.compareLagDROModels(budget_two=budget_two, budget_inf=budget_inf, bins=bins) 316 | # dro_analysis.compareLagDROModels(budget_two=10.0, budget_inf=None, bins=40) 317 | dro_analysis.plotDROModels(budget=budget_two, norm=2, bins=bins) 318 | dro_analysis.plotDROModels(budget=budget_inf, norm=np.inf, bins=bins) 319 | 320 | loss_analysis = LossFunctionsAnalysis() 321 | loss_analysis.compareLosses(budget_two=budget_two, budget_inf=budget_inf, bins=bins) 322 | -------------------------------------------------------------------------------- /generate_sample_adversarial.py: -------------------------------------------------------------------------------- 1 | """ 2 | The code is attributed to the GitHub page of foolbox: 3 | https://github.com/bethgelab/foolbox 4 | """ 5 | 6 | import foolbox 7 | import keras 8 | import numpy as np 9 | from keras.applications.resnet50 import ResNet50 10 | from keras.preprocessing import image 11 | from keras.applications.resnet50 import preprocess_input, decode_predictions 12 | import matplotlib.pyplot as plt 13 | 14 | """ 15 | This script generates a adversarial example from an ImageNet image. 16 | """ 17 | 18 | img_rows, img_cols = 224, 224 19 | nb_channels = 3 20 | img_shape = (img_rows, img_cols, nb_channels) 21 | 22 | # instantiate model 23 | keras.backend.set_learning_phase(0) 24 | kmodel = ResNet50(weights='imagenet') 25 | preprocessing = (np.array([104, 116, 123]), 1) 26 | fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing) 27 | 28 | img_folderpath = "./sample_images/" 29 | img_path = img_folderpath + 'sample_image_2.jpg' # An image of a yellow cab/taxi 30 | x = image.load_img(img_path, color_mode='rgb', target_size=(img_rows, img_cols)) 31 | img = image.img_to_array(x) 32 | img = np.expand_dims(img, axis=0) 33 | img = img.reshape(img_shape) 34 | label = 468 # For the class of taxi and cab 35 | 36 | # Note that proprocess_input is an in-place operation. 37 | prediction = kmodel.predict(preprocess_input(np.copy(img)).reshape((1, img_rows, img_cols, nb_channels))) 38 | # decode the results into a list of tuples (class, description, probability) 39 | # (one such list for each sample in the batch) 40 | print('Prediction on the original example:', decode_predictions(prediction, top=3)[0]) 41 | # The original image is correctly classified as a cab with the confidence of 0.999. 42 | 43 | # apply attack on source image 44 | # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB 45 | attack = foolbox.attacks.FGSM(fmodel) 46 | adversarial = attack(img[:, :, ::-1], label)[:,:,::-1] 47 | # if the attack fails, adversarial will be None and a warning will be printed 48 | 49 | adversarial_prediction = kmodel.predict(adversarial.reshape((1, img_rows, img_cols, nb_channels))) 50 | print('Prediction on the adversarial example:', decode_predictions(adversarial_prediction, top=3)[0]) 51 | # The adversarial example is incorrectly classified as a jigsaw puzzle with the confidence of 0.629. 52 | 53 | plt.figure() 54 | 55 | plt.subplot(1, 3, 1) 56 | plt.title('Original') 57 | plt.imshow(img / 255) # division by 255 to convert [0, 255] to [0, 1] 58 | plt.axis('off') 59 | 60 | plt.subplot(1, 3, 2) 61 | plt.title('Difference') 62 | difference = adversarial - img 63 | plt.imshow(difference / abs(difference).max() * 0.2 + 0.5) 64 | plt.axis('off') 65 | 66 | plt.subplot(1, 3, 3) 67 | plt.title('Adversarial') 68 | plt.imshow(adversarial / 255) 69 | plt.axis('off') 70 | 71 | plt.tight_layout() 72 | #plt.show() 73 | 74 | folderpath = "./images/" 75 | # Filepath for the output adversarially perturbed image 76 | filepath = folderpath + "sample_adversarial_example.png" 77 | plt.savefig(filepath, dpi=300) 78 | plt.close() 79 | -------------------------------------------------------------------------------- /loss_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from util_MNIST import retrieveMNISTTrainingData 6 | from util_model import MNISTClassifier, trainModel 7 | from adversarial_attack_DRO import ProjetcedDRO, LagrangianDRO, FrankWolfeDRO 8 | 9 | """ 10 | This module contains the seven loss functions listed in Carlini and Wagneer 11 | (2017) with a minor modification in the constant in f_5. 12 | """ 13 | 14 | 15 | def f_1(outputs, labels): 16 | return F.cross_entropy(outputs, labels) 17 | 18 | 19 | def f_2(outputs, labels): 20 | outputs = F.softmax(outputs, dim=1) 21 | return f_6(outputs, labels) 22 | 23 | 24 | def f_3(outputs, labels): 25 | outputs = F.softmax(outputs, dim=1) 26 | return f_7(outputs, labels) 27 | 28 | 29 | def f_4(outputs, labels): 30 | outputs = F.softmax(outputs, dim=1) 31 | reference_outputs = torch.gather( 32 | outputs, 1, labels.view(-1, 1).long()).view(-1) 33 | return torch.mean(torch.clamp(0.5 - reference_outputs, min=0)) 34 | 35 | 36 | def f_5(outputs, labels): 37 | # Note that in the original version, the base of e is used instead of 2. 38 | 39 | outputs = F.softmax(outputs, dim=1) 40 | reference_outputs = torch.gather( 41 | outputs, 1, labels.view(-1, 1).long()).view(-1) 42 | return torch.mean(torch.log2(2.125 - 2 * reference_outputs)) 43 | 44 | 45 | def f_6(outputs, labels): 46 | max_outputs, _ = torch.max(outputs, dim=1) 47 | reference_outputs = torch.gather( 48 | outputs, 1, labels.view(-1, 1).long()).view(-1) 49 | return torch.mean(max_outputs - reference_outputs) 50 | 51 | 52 | def f_7(outputs, labels, nb_classes=10): 53 | #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 54 | device = "cuda:0" 55 | 56 | batch_size = labels.size()[0] 57 | indexes_row = torch.arange(0, nb_classes).to(device) 58 | indexes = indexes_row.repeat(batch_size, 1) 59 | labels_cloned = labels.view(-1, 1).repeat(1, nb_classes) 60 | 61 | new_outputs = outputs[indexes != labels_cloned].view( 62 | batch_size, nb_classes-1) 63 | reference_outputs = torch.gather( 64 | outputs, 1, labels.view(-1, 1).long()).view(-1) 65 | difference = torch.max(new_outputs, dim=1)[0] - reference_outputs 66 | return torch.mean(F.softplus(difference)) 67 | 68 | 69 | def trainModelLoss(dro_type, epochs, steps_adv, budget, activation, batch_size, loss_criterion, cost_function=None): 70 | """ 71 | Train a neural network with a specified loss function. 72 | """ 73 | 74 | model = MNISTClassifier(activation=activation) 75 | if dro_type == 'PGD': 76 | train_module = ProjetcedDRO(model, loss_criterion) 77 | elif dro_type == 'Lag': 78 | assert cost_function is not None 79 | train_module = LagrangianDRO(model, loss_criterion, cost_function) 80 | elif dro_type == 'FW': 81 | train_module = FrankWolfeDRO(model, loss_criterion, p=2, q=2) 82 | else: 83 | raise ValueError("The type of DRO is not valid.") 84 | 85 | train_module.train(budget=budget, batch_size=batch_size, 86 | epochs=epochs, steps_adv=steps_adv) 87 | folderpath = "./Loss_models/" 88 | filepath = folderpath + "{}_DRO_activation={}_epsilon={}_loss={}.pt".format( 89 | dro_type, activation, budget, loss_criterion.__name__) 90 | torch.save(model.state_dict(), filepath) 91 | print("A neural network adversarially trained using {} now saved at: {}".format( 92 | dro_type, filepath)) 93 | 94 | 95 | if __name__ == "__main__": 96 | epochs = 25 97 | steps_adv = 15 98 | epsilon = 2.8 99 | optimal_gamma = 1.0 100 | batch_size = 128 101 | loss_criterions = [f_1, f_2, f_3, f_4, f_5, f_6, f_7] 102 | 103 | def cost_function(x, y): return torch.dist(x, y, p=2) ** 2 104 | 105 | for loss_criterion in loss_criterions: 106 | trainModelLoss("FW", epochs, steps_adv, epsilon, 107 | "relu", batch_size, loss_criterion) 108 | trainModelLoss("PGD", epochs, steps_adv, epsilon, 109 | "relu", batch_size, loss_criterion) 110 | trainModelLoss("Lag", epochs, steps_adv, optimal_gamma, "relu", 111 | batch_size, loss_criterion, cost_function=cost_function) 112 | -------------------------------------------------------------------------------- /sanity_check_attack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | 7 | from util_MNIST import retrieveMNISTTestData 8 | from util_model import MNISTClassifier, loadModel 9 | 10 | """ 11 | This module is for sanity checking. Most of the code in this module is 12 | attributed to a tutorial in the documentation of PyTorch: 13 | https://pytorch.org/tutorials/beginner/fgsm_tutorial.html. 14 | """ 15 | 16 | epsilons = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 17 | use_cuda = True # If GPU is available, choose GPU over CPU. 18 | 19 | # FGSM attack code 20 | 21 | 22 | def fgsm_attack(image, epsilon, data_grad): 23 | 24 | # Collect the element-wise sign of the data gradient 25 | sign_data_grad = data_grad.sign() 26 | # Create the perturbed image by adjusting each pixel of the input image 27 | perturbed_image = image + epsilon*sign_data_grad 28 | # Adding clipping to maintain [0,1] range 29 | perturbed_image = torch.clamp(perturbed_image, 0, 1) 30 | # Return the perturbed image 31 | return perturbed_image 32 | 33 | 34 | def test(model, device, test_loader, epsilon): 35 | 36 | # Accuracy counter 37 | correct = 0 38 | 39 | # Loop over all examples in test set 40 | for i, (data, target) in enumerate(test_loader): 41 | # Send the data and label to the device 42 | data, target = data.to(device), target.to(device) 43 | 44 | # Set requires_grad attribute of tensor. Important for Attack 45 | data.requires_grad = True 46 | 47 | # Forward pass the data through the model 48 | output = model(data) 49 | # get the index of the max log-probability 50 | init_pred = output.max(1, keepdim=True)[1] 51 | 52 | # If the initial prediction is wrong, dont bother attacking, just move on 53 | if init_pred.item() != target.item(): 54 | continue 55 | 56 | # Calculate the loss 57 | loss = F.nll_loss(output, target) 58 | 59 | # Zero all existing gradients 60 | model.zero_grad() 61 | 62 | # Calculate gradients of model in backward pass 63 | loss.backward() 64 | 65 | # Collect datagrad 66 | data_grad = data.grad.detach() 67 | 68 | # Call FGSM Attack 69 | perturbed_data = fgsm_attack(data, epsilon, data_grad) 70 | 71 | # Re-classify the perturbed image 72 | output = model(perturbed_data) 73 | 74 | # Check for success 75 | # get the index of the max log-probability 76 | final_pred = output.max(1, keepdim=True)[1] 77 | if final_pred.item() == target.item(): 78 | correct += 1 79 | 80 | # Calculate final accuracy for this epsilon 81 | final_acc = correct/float(len(test_loader)) 82 | print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, 83 | correct, len(test_loader), final_acc)) 84 | 85 | 86 | if __name__ == "__main__": 87 | # MNIST Test dataset and dataloader declaration 88 | test_loader = retrieveMNISTTestData(batch_size=1, shuffle=True) 89 | 90 | # Define what device we are using 91 | print("CUDA Available: ", torch.cuda.is_available()) 92 | device = torch.device("cuda" if ( 93 | use_cuda and torch.cuda.is_available()) else "cpu") 94 | 95 | # Initialize the network 96 | filepath_relu = "./experiment_models/MNISTClassifier_relu.pt" 97 | model_relu = MNISTClassifier(activation='relu') 98 | model_relu = loadModel(model_relu, filepath_relu) 99 | model_relu.to(device) 100 | 101 | # Set the model in evaluation mode. In this case this is for the Dropout layers 102 | model_relu.eval() 103 | 104 | # Run test for each epsilon 105 | for eps in epsilons: 106 | test(model_relu, device, test_loader, eps) 107 | -------------------------------------------------------------------------------- /sanity_check_foolbox.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import foolbox 3 | from foolbox.models import PyTorchModel 4 | from foolbox.criteria import Misclassification 5 | from util_MNIST import retrieveMNISTTestData 6 | from util_model import loadModel, MNISTClassifier 7 | 8 | from torchsummary import summary 9 | 10 | """ 11 | This module is for sanity check of Foolbox, a Python library for crafting 12 | adversarial exampples. We apply Foolbox's implementation of FGSM on a neural 13 | network trained by empirical risk minimization (ERM). 14 | """ 15 | 16 | 17 | def wrapFoolboxModel(model): 18 | return PyTorchModel(model, bounds=(0, 1), num_classes=10, channel_axis=1, preprocessing=(0, 1)) 19 | 20 | 21 | def adversarialAccuracy(model): 22 | # Use GPU for computation if it is available 23 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 24 | model.to(device) 25 | print("The model is now loaded on {}.".format(device)) 26 | 27 | pytorch_model = wrapFoolboxModel(model) 28 | 29 | # get source image and label 30 | batch_size = 1 31 | test_loader = retrieveMNISTTestData(batch_size=batch_size) 32 | criterion = Misclassification() 33 | 34 | wrong, total = 0, 0 35 | period = 500 36 | max_epsilon = 1.0 37 | epsilons = 5 38 | for i, (images, labels) in enumerate(test_loader): 39 | if i == 10000: 40 | break 41 | image, label = images[0].numpy(), labels[0].numpy() 42 | 43 | #fgsm = foolbox.attacks.FGSM(pytorch_model, criterion) 44 | #image_adv = fgsm(image, label, epsilons=epsilons, max_epsilon=max_epsilon) 45 | pgd2 = foolbox.attacks.L2BasicIterativeAttack(pytorch_model, criterion) 46 | image_adv = pgd2(image, label, epsilon=max_epsilon, 47 | stepsize=max_epsilon / 5, iterations=15) 48 | 49 | total += 1 50 | if image_adv is not None: 51 | wrong += 1 52 | if i % period == period - 1: 53 | print( 54 | "Cumulative adversarial attack success rate: {} / {} = {}".format(wrong, total, wrong / total)) 55 | print("Adversarial error rate: {} / {} = {}".format(wrong, total, wrong / total)) 56 | 57 | 58 | if __name__ == "__main__": 59 | model_relu = MNISTClassifier(activation='relu') 60 | model_elu = MNISTClassifier(activation='elu') 61 | 62 | # These file paths only work on UNIX. 63 | filepath_relu = "./ERM_models/MNISTClassifier_relu.pt" 64 | filepath_elu = "./ERM_models/MNISTClassifier_elu.pt" 65 | model_relu = loadModel(model_relu, filepath_relu) 66 | model_elu = loadModel(model_relu, filepath_elu) 67 | 68 | # Display the architecture of the neural network 69 | #summary(model_relu.cuda(), (1, 28, 28)) 70 | 71 | print("The result of relu is as follows.") 72 | adversarialAccuracy(model_relu) 73 | print("The result of elu is as follows.") 74 | adversarialAccuracy(model_elu) 75 | -------------------------------------------------------------------------------- /sanity_check_keras.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import linalg as LA 3 | import matplotlib.pyplot as plt 4 | 5 | from keras.utils import to_categorical 6 | from keras.datasets import mnist 7 | from keras.models import Sequential, load_model 8 | from keras.layers import Dense, Conv2D, Flatten, Activation 9 | from keras.optimizers import Adam 10 | from keras import backend as K 11 | 12 | import foolbox 13 | from foolbox.models import KerasModel 14 | from foolbox.criteria import Misclassification 15 | 16 | """ 17 | This module is for sanity check. 18 | It creates the neural network used by Staib et al. and Sinha et al. 19 | in Keras and evalutes its robustness (or vulnerability) against an 20 | FGSM adversary. 21 | """ 22 | 23 | nb_filters = 64 24 | epochs = 25 25 | batch_size = 128 26 | num_classes = 10 27 | 28 | # input image dimensions 29 | img_rows, img_cols = 28, 28 30 | 31 | # the data, split between train and test sets 32 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 33 | 34 | if K.image_data_format() == 'channels_first': 35 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 36 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 37 | input_shape = (1, img_rows, img_cols) 38 | channel_axis = 1 39 | else: 40 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 41 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 42 | input_shape = (img_rows, img_cols, 1) 43 | channel_axis = 3 44 | 45 | x_train = x_train.astype('float32') 46 | x_test = x_test.astype('float32') 47 | x_train /= 255 48 | x_test /= 255 49 | print('x_train shape:', x_train.shape) 50 | print(x_train.shape[0], 'train samples') 51 | print(x_test.shape[0], 'test samples') 52 | 53 | # convert class vectors to binary class matrices 54 | y_train = to_categorical(y_train, num_classes) 55 | y_test_original = y_test 56 | y_test = to_categorical(y_test, num_classes) 57 | 58 | 59 | def trainModel(activation='relu'): 60 | model = Sequential() 61 | model.add(Conv2D(filters=nb_filters, kernel_size=(8, 8), 62 | strides=(2, 2), padding='same', input_shape=input_shape)) 63 | model.add(Activation(activation)) 64 | model.add(Conv2D(filters=nb_filters * 2, kernel_size=(6, 6), 65 | strides=(2, 2), padding='valid')) 66 | model.add(Activation(activation)) 67 | model.add(Conv2D(filters=nb_filters * 2, kernel_size=(5, 5), 68 | strides=(1, 1), padding='valid')) 69 | model.add(Activation(activation)) 70 | model.add(Flatten()) 71 | model.add(Dense(num_classes)) 72 | model.add(Activation('softmax')) 73 | 74 | optimizer = Adam(lr=0.001) 75 | model.compile(loss='categorical_crossentropy', 76 | optimizer=optimizer, metrics=['accuracy']) 77 | model.fit(x_train, y_train, 78 | batch_size=batch_size, 79 | epochs=epochs, 80 | verbose=1, 81 | validation_data=(x_test, y_test)) 82 | score = model.evaluate(x_test, y_test, verbose=0) 83 | print('Test loss:', score[0]) 84 | print('Test accuracy:', score[1]) 85 | 86 | filepath = './experiment_models/KerasMNISTClassifier_{}.h5'.format( 87 | activation) 88 | model.save(filepath) 89 | 90 | 91 | def adversarialAccuracy(model): 92 | keras_model = KerasModel(model, bounds=(0, 1), channel_axis=channel_axis) 93 | criterion = Misclassification() 94 | 95 | length = x_test.shape[0] 96 | wrong = 0 97 | period = 50 98 | for i in range(length): 99 | image, label = x_test[i], y_test_original[i] 100 | 101 | #attack = foolbox.attacks.FGSM(keras_model, criterion) 102 | #image_adv = attack(image, label, epsilons=5, max_epsilon=1.0) 103 | pgd2 = foolbox.attacks.L2BasicIterativeAttack(keras_model, criterion) 104 | image_adv = pgd2(image, label, epsilon=1.0, stepsize=1.0, 105 | iterations=1, binary_search=False) 106 | 107 | if image_adv is not None: 108 | prediction = np.argmax( 109 | keras_model.predictions_and_gradient(image_adv, label)[0]) 110 | assert prediction != label 111 | wrong += 1 112 | if i % period == period - 1: 113 | print( 114 | "Adversarial attack success rate: {} / {} = {}".format(wrong, i+1, wrong / (i+1))) 115 | if image_adv is not None: 116 | displayImage(image_adv, label) 117 | print("Size of perturbation: {}".format( 118 | LA.norm(image_adv - image, None))) 119 | 120 | print("Adversarial error rate: {} / {} = {}".format(wrong, length, wrong / length)) 121 | 122 | 123 | def displayImage(image, label): 124 | plt.imshow(image.reshape((img_rows, img_cols)), 125 | vmin=0.0, vmax=1.0, cmap='gray') 126 | plt.title("Predicted label is {}".format(label)) 127 | plt.show() 128 | 129 | 130 | if __name__ == "__main__": 131 | # Train Keras neural networks 132 | """ 133 | trainModel(activation='relu') 134 | trainModel(activation='elu') 135 | """ 136 | 137 | filepath_relu = './experiment_models/KerasMNISTClassifier_relu.h5' 138 | filepath_elu = './experiment_models/KerasMNISTClassifier_elu.h5' 139 | model_relu = load_model(filepath_relu) 140 | model_elu = load_model(filepath_elu) 141 | 142 | # Display the architecture of the neural network 143 | # model_relu.summary() 144 | 145 | loss_and_metrics = model_relu.evaluate(x_test, y_test, batch_size=128) 146 | print("Test accuracy of relu: {}".format(loss_and_metrics)) 147 | loss_and_metrics = model_elu.evaluate(x_test, y_test, batch_size=128) 148 | print("Test accuracy of elu: {}".format(loss_and_metrics)) 149 | 150 | # For some unknown reason, this raises an assertion error at the 400-th image. 151 | adversarialAccuracy(model_relu) 152 | adversarialAccuracy(model_elu) 153 | -------------------------------------------------------------------------------- /util_MNIST.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torchvision.transforms as transforms 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | img_rows, img_cols = 28, 28 9 | 10 | 11 | def retrieveMNISTTrainingData(batch_size=128, shuffle=True): 12 | """ 13 | Retrieve a training dataset of MNIST. 14 | 15 | Arguments: 16 | batch_size: batch size 17 | shuffle: whether the training data should be shuffled 18 | Returns: 19 | data loader for the MNIST training data 20 | """ 21 | 22 | transform = transforms.Compose([transforms.ToTensor()]) 23 | MNIST_train_data = torchvision.datasets.MNIST( 24 | root='./data', train=True, download=True, transform=transform) 25 | train_loader = torch.utils.data.DataLoader( 26 | MNIST_train_data, batch_size=batch_size, shuffle=shuffle, num_workers=0) 27 | return train_loader 28 | 29 | 30 | def retrieveMNISTTestData(batch_size=128, shuffle=False): 31 | """ 32 | Retrieve a test dataset of MNIST. 33 | 34 | Arguments: 35 | batch_size: batch size 36 | shuffle: whether the test data should be shuffled 37 | Returns: 38 | data loader for the MNIST test data 39 | """ 40 | 41 | transform = transforms.Compose([transforms.ToTensor()]) 42 | MNIST_test_data = torchvision.datasets.MNIST( 43 | root='./data', train=False, download=True, transform=transform) 44 | test_loader = torch.utils.data.DataLoader( 45 | MNIST_test_data, batch_size=batch_size, shuffle=shuffle, num_workers=0) 46 | return test_loader 47 | 48 | 49 | def displayImage(image, label): 50 | """ 51 | Display an image of a digit from MNIST. 52 | 53 | Arguments: 54 | image: input image. The shape of this input must be compatible 55 | with (img_rows, img_cols). 56 | label: prediction on this input image 57 | """ 58 | 59 | image = image.view((img_rows, img_cols)) 60 | plt.imshow(image, vmin=0.0, vmax=1.0, cmap='gray') 61 | plt.title("Predicted label: {}".format(label)) 62 | plt.show() 63 | 64 | 65 | if __name__ == "__main__": 66 | train_loader = retrieveMNISTTrainingData(batch_size=1, shuffle=False) 67 | print("MNIST training data are loaded.") 68 | train_iterator = iter(train_loader) 69 | images, labels = train_iterator.next() 70 | print("The type of the image is {}.".format(type(images))) 71 | print("The size of the image is {}.".format(images.size())) 72 | -------------------------------------------------------------------------------- /util_adversarial_attack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import optim, nn 4 | import torch.nn.functional as F 5 | from util_MNIST import retrieveMNISTTestData 6 | from util_model import SimpleNeuralNet, loadModel 7 | from art.attacks import FastGradientMethod, ProjectedGradientDescent 8 | from art.classifiers import PyTorchClassifier 9 | from adversarial_attack_DRO import ProjetcedDRO 10 | 11 | img_rows, img_cols = 28, 28 12 | 13 | """ 14 | This module contains classes for adversarial attacks. 15 | """ 16 | 17 | def wrapModel(model, loss_criterion): 18 | """ 19 | Wrap a PyTorch model using a wrapper provided by ART (Adversarial 20 | Robustness Toolbox) by IBM. 21 | """ 22 | 23 | optimizer = optim.Adam(model.parameters()) 24 | input_shape = (1, img_rows, img_cols) 25 | return PyTorchClassifier((0, 1), model, loss_criterion, optimizer, input_shape, nb_classes=10) 26 | 27 | class FGSM: 28 | """ 29 | Class for the fast gradient sign method (FGSM). 30 | This class delegates the implementation of the attack to the ART library 31 | developed by IBM. 32 | """ 33 | 34 | def __init__(self, model, loss_criterion, norm, batch_size=128): 35 | self.wrapped_pytorch_model = wrapModel(model, loss_criterion) 36 | self.norm = norm 37 | self.batch_size = batch_size 38 | self.attack = FastGradientMethod( 39 | self.wrapped_pytorch_model, batch_size=batch_size) 40 | 41 | # Use GPU for computation if it is available 42 | self.device = torch.device( 43 | "cuda:0" if torch.cuda.is_available() else "cpu") 44 | 45 | def generatePerturbation(self, data, budget, minimal=False): 46 | """ 47 | Generate adversarial examples from a given batch of images. 48 | The input data should have already been loaded on an appropriate 49 | device. 50 | 51 | Arguments: 52 | data: pairs of a batch of images and a batch of labels. The batch 53 | of images should be a numpy array. The batch of labels should 54 | be a numpy array of integers. 55 | budget: the maximal size of perturbation allowed. This parameter 56 | is not used if minimal = True. 57 | minimal: whether the minimal adversarial perturbation is computed. 58 | If yes, the maximal size of perturbation is 1.0. Consequently, 59 | the budget parameter is overridden. 60 | """ 61 | 62 | images, _ = data 63 | images_adv = self.attack.generate(x=images.cpu().numpy( 64 | ), norm=self.norm, eps=budget, minimal=minimal, eps_step=budget / 50, eps_max=budget, batch_size=self.batch_size) 65 | images_adv = torch.from_numpy(images_adv) 66 | 67 | # The output to be returned should be loaded on an appropriate device. 68 | return images_adv.to(self.device) 69 | 70 | 71 | class FGSMNative: 72 | """ 73 | Class for manually implemented FGSM, unlike the above FGSM class in this 74 | module. For some unknown reason, this class produces a different 75 | performance in adversarial attacks from the FGSM class. The performance of 76 | FGSMNative is better than that of FGSM only in some cases (and not in all 77 | cases). Additionally, the difference between the FGSM class and the 78 | FGSMNative class is not significant. 79 | """ 80 | 81 | def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128): 82 | self.model = model 83 | self.loss_criterion = loss_criterion 84 | self.norm = norm 85 | self.batch_size = batch_size 86 | 87 | # Use GPU for computation if it is available 88 | self.device = torch.device( 89 | "cuda:0" if torch.cuda.is_available() else "cpu") 90 | 91 | def generatePerturbation(self, data, budget, minimal=False): 92 | """ 93 | Generate adversarial examples from a given batch of images. 94 | The input data should have already been loaded on an appropriate 95 | device. 96 | 97 | Note that unlike the FGSM class, in this FGSMNative class, the 98 | computation of minimal perturbations is not supported. 99 | 100 | Arguments: 101 | data: pairs of a batch of images and a batch of labels. The batch 102 | of images should be a numpy array. The batch of labels should 103 | be a numpy array of integers. 104 | budget: the maximal size of perturbation allowed. This parameter 105 | is not used if minimal = True. 106 | minimal: whether the minimal adversarial perturbation is computed. 107 | If yes, the maximal size of perturbation is 1.0. Consequently, 108 | the budget parameter is overridden. 109 | """ 110 | 111 | images, labels = data 112 | images_adv = images.clone().detach().to(self.device) 113 | # We will never need to compute a gradient with respect to images_adv. 114 | images_adv.requires_grad_(False) 115 | 116 | images.requires_grad_(True) 117 | output = self.model(images) 118 | loss = self.loss_criterion(output, labels) 119 | loss.backward() 120 | images.requires_grad_(False) 121 | 122 | if self.norm == np.inf: 123 | direction = images.grad.data.sign() 124 | elif self.norm == 2: 125 | flattened_images = images_adv.view(-1, img_rows * img_cols) 126 | direction = F.normalize( 127 | flattened_images, p=2, dim=1).view(images.size()) 128 | else: 129 | raise ValueError("The norm is not valid.") 130 | 131 | if minimal: 132 | iterations = 50 133 | incremental_size = budget / iterations 134 | minimal_perturbations = torch.zeros(images.size()) 135 | for i in range(iterations): 136 | outputs = self.model( 137 | (images_adv + minimal_perturbations).clamp(0, 1)) 138 | _, predicted = torch.max(outputs.data, 1) 139 | for j in range(labels.size()[0]): 140 | # If the current adversarial exampels are correctly 141 | # classified, increase the size of the perturbations. 142 | if predicted[j] == labels[j]: 143 | minimal_perturbations[j].add_( 144 | incremental_size * direction[j]) 145 | images_adv.add_(minimal_perturbations) 146 | else: 147 | images_adv.add_(budget * direction) 148 | 149 | images_adv.clamp_(0, 1) 150 | 151 | # The output to be returned should be loaded on an appropriate device. 152 | return images_adv 153 | 154 | 155 | class PGD: 156 | """ 157 | Class for adversarial attacks based on projected gradient descent (PGD). 158 | The implementation of PGD in ART executes projection on a feasible region 159 | after each iteration. However, random restrating is not used in this 160 | implementation. Not using radom restarting is the difference between the 161 | PGD implemented in ART and the one described by Madry et al. 162 | 163 | This adversarial attack subsumes the iterative FGSM. 164 | """ 165 | 166 | def __init__(self, model, loss_criterion, norm=np.inf, batch_size=128): 167 | self.wrapped_pytorch_model = wrapModel(model, loss_criterion) 168 | self.norm = norm 169 | self.batch_size = batch_size 170 | self.attack = ProjectedGradientDescent( 171 | self.wrapped_pytorch_model, norm=norm, random_init=False, batch_size=batch_size) 172 | 173 | # Use GPU for computation if it is available 174 | self.device = torch.device( 175 | "cuda:0" if torch.cuda.is_available() else "cpu") 176 | 177 | def generatePerturbation(self, data, budget, max_iter=15): 178 | images, _ = data 179 | 180 | # eps_step is not allowed to be larger than budget according to the 181 | # documentation of ART. 182 | eps_step = budget / 5 183 | images_adv = self.attack.generate(x=images.cpu().numpy( 184 | ), norm=self.norm, eps=budget, eps_step=eps_step, max_iter=max_iter, batch_size=self.batch_size) 185 | images_adv = torch.from_numpy(images_adv) 186 | 187 | # The output to be returned should be loaded on an appropriate device. 188 | return images_adv.to(self.device) 189 | 190 | 191 | class DistributionalPGD: 192 | """ 193 | Class for a PGD-based distributional adversarial attack (as opposed to pointwise 194 | adversarial attacks such as FGSM and PGD). 195 | 196 | By default, we use the 2-Wasserstein distance (for the distributional distance) 197 | and the L-2 norm (for the underlying pointwise distance). 198 | """ 199 | 200 | def __init__(self, model, loss_criterion): 201 | self.model = model 202 | self.loss_criterion = loss_criterion 203 | self.training_module = ProjetcedDRO(model, loss_criterion) 204 | 205 | def generatePerturbation(self, data, budget, max_iter=15): 206 | images_adv, _ = self.training_module.attack(budget, data, steps=max_iter) 207 | 208 | # The output is already loaded on an appropriate device (i.e. GPU if available). 209 | return images_adv 210 | 211 | 212 | if __name__ == "__main__": 213 | # Load a simple neural network 214 | model = SimpleNeuralNet() 215 | loadModel(model, "./ERM_models/SimpleModel.pt") 216 | 217 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 218 | model.to(device) # Load the neural network on GPU if it is available 219 | print("The neural network is now loaded on {}.".format(device)) 220 | 221 | # Create an object for PGD 222 | criterion = nn.CrossEntropyLoss() 223 | batch_size = 128 224 | pgd = PGD(model, criterion, batch_size=batch_size) 225 | pytorch_model = pgd.wrapped_pytorch_model 226 | 227 | # Read MNIST dataset 228 | test_loader = retrieveMNISTTestData(batch_size=1024) 229 | 230 | # Craft adversarial examples with PGD 231 | epsilon = 0.1 # Maximum perturbation 232 | total, correct = 0, 0 233 | for i, data in enumerate(test_loader): 234 | images, labels = data 235 | images, labels = images.to(device), labels.to(device) 236 | 237 | # images_adv is already loaded on GPU by generatePerturbation 238 | images_adv = pgd.generatePerturbation(data, epsilon) 239 | with torch.no_grad(): 240 | outputs = model(images_adv) 241 | _, predicted = torch.max(outputs.data, 1) 242 | total += labels.size(0) 243 | correct += (predicted == labels).sum().item() 244 | acc = (predicted == labels).sum().item() / labels.size(0) 245 | print("Iteration: {}; test accuracy on adversarial sample: {}".format(i+1, acc)) 246 | print("Overall accuracy on adversarial exampels: {}.".format(correct / total)) 247 | -------------------------------------------------------------------------------- /util_adversarial_training.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from util_MNIST import retrieveMNISTTrainingData 7 | 8 | 9 | class AdversarialTraining: 10 | """ 11 | Base class for adversarial training. 12 | This class does not add any perturbation for adversarial attacks. 13 | Hence, this class is equivalent to empirical risk minimization (ERM). 14 | """ 15 | 16 | def __init__(self, model, loss_criterion): 17 | """ 18 | Initialize instance variables. 19 | 20 | Arguments: 21 | model: neural network to be trained 22 | loss_criterion: loss function 23 | """ 24 | 25 | self.model = model 26 | self.loss_criterion = loss_criterion 27 | 28 | # Use GPU for computation if it is available 29 | self.device = torch.device( 30 | "cuda:0" if torch.cuda.is_available() else "cpu") 31 | # Load a model on an appropriate device 32 | self.model.to(self.device) 33 | print("The neural network is now loaded on {}.".format(self.device)) 34 | 35 | def attack(self, budget, data, steps=15): 36 | """ 37 | Launch an adversarial attack. 38 | This is equivalent to solving the inner maximization problem in the 39 | formulation of RO or DRO. This specific method serves as an abstract 40 | method and hence does not launch an adversarial attack. In a derived 41 | class, this method needs to be overridden. 42 | 43 | Arguments: 44 | budget: limit on the size of adversarial perturbations. 45 | This normally corresponds to epsilon in Staib and Jegedlka's 46 | paper, but in the DRO developed by Sinha et al., the budget 47 | parameter refers to gamma in their paper. 48 | steps: number of iterations in the adversarial attack 49 | 50 | Returns: 51 | images_adv: adversarially perturbed images (in batch) 52 | labels: labels of the adversarially perturbed images 53 | """ 54 | return data 55 | 56 | def train(self, budget, batch_size=128, epochs=25, steps_adv=15): 57 | """ 58 | Train a neural network (using an adversarial attack if it is defined). 59 | For optimization, Adam is used. 60 | 61 | Arguments: 62 | budget: limit on the size of adversarial perturbations 63 | batch_size: batch size for training 64 | epochs: number of epochs in training 65 | steps_adv: number of iterations inside adversarial attacks 66 | 67 | Returns: 68 | None 69 | """ 70 | 71 | data_loader = retrieveMNISTTrainingData(batch_size, shuffle=True) 72 | optimizer = optim.Adam(self.model.parameters()) 73 | for epoch in range(epochs): 74 | for i, data in enumerate(data_loader, 0): 75 | images, labels = data 76 | # Input images and labels are loaded by this method. 77 | # Hence, they do not need to be loaded by the attack method. 78 | images, labels = images.to(self.device), labels.to(self.device) 79 | data = (images, labels) 80 | 81 | # However, the attack method should load images_adv on GPU 82 | # before returning the output. 83 | images_adv, labels = self.attack(budget, data, steps=steps_adv) 84 | 85 | optimizer.zero_grad() 86 | outputs = self.model(images_adv) 87 | loss = self.loss_criterion(outputs, labels) 88 | loss.backward() 89 | optimizer.step() 90 | # if i % 100 == 99: 91 | # print("Epoch: {}, iteration: {}".format(epoch, i)) 92 | -------------------------------------------------------------------------------- /util_analysis.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from util_MNIST import retrieveMNISTTestData 6 | from util_model import loadModel, evaluateModelAccuracy 7 | from util_adversarial_attack import FGSM, PGD, FGSMNative, DistributionalPGD 8 | 9 | """ 10 | This module contains two base classes for analysis of the robustness of neural 11 | networks. The first class, Analysis, wraps a single neural network, and the 12 | second class, AnalysisMulitpleModels, supports analysis on a list of 13 | neural networks. 14 | """ 15 | 16 | class Analysis: 17 | 18 | """ 19 | Class for the robustness analysis on a single neural network. 20 | """ 21 | 22 | def __init__(self, skeleton_model, filepath): 23 | self.model = loadModel(skeleton_model, filepath) 24 | 25 | # Use GPU for computation if it is available 26 | self.device = torch.device( 27 | "cuda:0" if torch.cuda.is_available() else "cpu") 28 | self.model.to(self.device) 29 | print("The model is now loaded on {}.".format(self.device)) 30 | 31 | self.filepath = filepath 32 | 33 | def testAccuracy(self): 34 | """ 35 | Evaluate the accuracy of a neural network on the MNIST test data. 36 | """ 37 | 38 | return evaluateModelAccuracy(self.model) 39 | 40 | def adversarialAccuracy(self, adversarial_type, budget, norm): 41 | """ 42 | Evaluate the accuracy of a neural network on a set of adversarial 43 | examples. 44 | """ 45 | 46 | batch_size = 512 if adversarial_type == "distributional_PGD" else 128 47 | 48 | # Numbers of iterations for pointwise and distributional PGD attacks 49 | max_iter_point, max_iter_dist = 15, 40 50 | test_loader = retrieveMNISTTestData(batch_size=batch_size) 51 | criterion = nn.CrossEntropyLoss() 52 | if adversarial_type == "FGSM": 53 | adversarial_module = FGSM( 54 | self.model, criterion, norm=norm, batch_size=batch_size) 55 | elif adversarial_type == 'PGD': 56 | adversarial_module = PGD( 57 | self.model, criterion, norm=norm, batch_size=batch_size) 58 | elif adversarial_type == "distributional_PGD": 59 | adversarial_module = DistributionalPGD(self.model, criterion) 60 | else: 61 | raise ValueError("The type of adversarial attack is not valid.") 62 | 63 | # Craft adversarial examples 64 | total, correct = 0, 0 65 | for i, data in enumerate(test_loader): 66 | images, labels = data 67 | images, labels = images.to(self.device), labels.to(self.device) 68 | data = (images, labels) 69 | 70 | # images_adv is already loaded on GPU by generatePerturbation. 71 | # Also, if FGSM is used, we have minimal=False by default. 72 | if adversarial_type == "FGSM": 73 | images_adv = adversarial_module.generatePerturbation( 74 | data, budget) 75 | elif adversarial_type == "PGD": 76 | images_adv = adversarial_module.generatePerturbation( 77 | data, budget, max_iter=max_iter_point) 78 | else: 79 | # For distributional PGD attacks 80 | images_adv = adversarial_module.generatePerturbation( 81 | data, budget, max_iter=max_iter_dist) 82 | with torch.no_grad(): 83 | softmax = nn.Softmax(dim=1) 84 | outputs = softmax(self.model(images_adv)) 85 | 86 | _, predicted = torch.max(outputs.data, 1) 87 | total += labels.size(0) 88 | correct += (predicted == labels).sum().item() 89 | return correct, total 90 | 91 | 92 | class AnalysisMulitpleModels: 93 | 94 | """ 95 | Base class for the robustness analysis on multiple neural networks. 96 | """ 97 | 98 | def __init__(self): 99 | pass 100 | 101 | def printBasicResult(self, analyzer, budget_two, budget_inf): 102 | """ 103 | Print out (i) the accuracy of a neural network on MNIST and 104 | (ii) its robustness to FGSM and PGD. 105 | """ 106 | 107 | correct, total = analyzer.testAccuracy() 108 | print("Test accuracy: {} / {} = {}".format(correct, total, correct / total)) 109 | 110 | correct, total = analyzer.adversarialAccuracy( 111 | 'FGSM', budget=budget_two, norm=2) 112 | print("Adversarial accuracy with respect to FGSM-2: {} / {} = {}".format(correct, 113 | total, correct / total)) 114 | correct, total = analyzer.adversarialAccuracy( 115 | 'FGSM', budget=budget_inf, norm=np.inf) 116 | print("Adversarial accuracy with respect to FGSM-inf: {} / {} = {}".format( 117 | correct, total, correct / total)) 118 | 119 | correct, total = analyzer.adversarialAccuracy( 120 | 'PGD', budget=budget_two, norm=2) 121 | print("Adversarial accuracy with respect to PGD-2: {} / {} = {}".format(correct, 122 | total, correct / total)) 123 | correct, total = analyzer.adversarialAccuracy( 124 | 'PGD', budget=budget_inf, norm=np.inf) 125 | print("Adversarial accuracy with respect to PGD-inf: {} / {} = {}".format( 126 | correct, total, correct / total)) 127 | 128 | def plotPerturbationLineGraph(self, ax, analyzers, labels, adversarial_type, budget, norm, bins, record_file): 129 | """ 130 | Plot a line graph of the adversarial attack success rates with various 131 | budgets for an adversarial attack. 132 | 133 | Arguments: 134 | ax: Axes object (in pyplot) where a plot a drawn 135 | analyzers: list of Analysis objects 136 | labels: list of labels of the Analysis objects in the input list 137 | bins: the number of different budgets to examine 138 | record_file: file object to be used to record the adversarial 139 | attack success rates 140 | """ 141 | 142 | length = len(analyzers) 143 | results = [[] for i in range(length)] 144 | increment_size = budget / bins if bins != 0 else None 145 | perturbations = [i * increment_size for i in range(bins+1)] 146 | assert length <= 10 147 | # Colours of lines in a graph; this colour map only has ten colours. 148 | cmap = plt.get_cmap("tab10") 149 | 150 | # Evaluate the test accuracy; i.e. robustness against adverarial 151 | # attacks with the adversarial budget of 0. 152 | for j in range(length): 153 | analyzer = analyzers[j] 154 | correct, total = analyzer.testAccuracy() 155 | results[j].append(1 - correct / total) 156 | print("0-th iteration complete") 157 | 158 | # Evaluate the robustness against adversarial attacks with non-zero 159 | # budget. 160 | for i in range(bins): 161 | for j in range(length): 162 | analyzer = analyzers[j] 163 | correct, total = analyzer.adversarialAccuracy( 164 | adversarial_type, increment_size * (i+1), norm) 165 | results[j].append(1 - correct / total) 166 | print("{}-th iteration complete".format(i+1)) 167 | 168 | # Record the results in a log if required 169 | if record_file is not None: 170 | for i in range(length): 171 | analyzer = analyzers[i] 172 | record_file.write( 173 | "Adversarial attack on {}\n".format(analyzer.filepath)) 174 | record_file.write( 175 | "Attack type: {}; Norm: {}\n".format(adversarial_type, norm)) 176 | record_file.write( 177 | "Budget: {}; Bins: {}\n".format(budget, bins)) 178 | zipped_reuslt = list(zip(perturbations, results[i])) 179 | record_file.write(str(zipped_reuslt) + "\n\n") 180 | 181 | for i in range(length): 182 | ax.plot(perturbations, results[i], color=cmap( 183 | i), linestyle='-', label=labels[i]) 184 | ax.legend() 185 | ax.set_xlabel("Perturbation size") 186 | ax.set_ylabel("Adversarial attack success rate") 187 | ax.set_xlim(0, budget) 188 | ax.set_yscale('log') 189 | -------------------------------------------------------------------------------- /util_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | from util_MNIST import retrieveMNISTTrainingData, retrieveMNISTTestData, displayImage 6 | 7 | img_rows, img_cols = 28, 28 8 | 9 | 10 | class SimpleNeuralNet(nn.Module): 11 | """ 12 | Simple neural network consisting of one hidden layer for MNIST. 13 | This neural network is only used as a toy example. 14 | """ 15 | 16 | def __init__(self): 17 | super().__init__() 18 | self.conv1 = nn.Conv2d(1, 2, 4) 19 | self.fc1 = nn.Linear(2 * 25 * 25, 10) 20 | 21 | def forward(self, x): 22 | output = F.relu(self.conv1(x)) 23 | output = output.view(-1, self.num_flat_features(output)) 24 | output = self.fc1(output) 25 | return output 26 | 27 | def num_flat_features(self, x): 28 | size = x.size()[1:] 29 | num_features = 1 30 | for s in size: 31 | num_features *= s 32 | return num_features 33 | 34 | 35 | class MNISTClassifier(nn.Module): 36 | """ 37 | Convolutional neural network used in the tutorial for CleverHans. 38 | This neural network is also used in experiments by Staib et al. (2017) and 39 | Sinha et al. (2018). 40 | """ 41 | 42 | def __init__(self, nb_filters=64, activation='relu'): 43 | """ 44 | The parameters in convolutional layers and a fully connected layer are 45 | initialized using the Glorot/Xavier initialization, which is the 46 | default initialization method in Keras. 47 | """ 48 | 49 | super().__init__() 50 | self.activation = activation 51 | self.conv1 = nn.Conv2d(1, nb_filters, kernel_size=( 52 | 8, 8), stride=(2, 2), padding=(3, 3)) 53 | nn.init.xavier_uniform_(self.conv1.weight) 54 | self.conv2 = nn.Conv2d(nb_filters, nb_filters * 2, 55 | kernel_size=(6, 6), stride=(2, 2)) 56 | nn.init.xavier_uniform_(self.conv2.weight) 57 | self.conv3 = nn.Conv2d( 58 | nb_filters * 2, nb_filters * 2, kernel_size=(5, 5), stride=(1, 1)) 59 | nn.init.xavier_uniform_(self.conv3.weight) 60 | self.fc1 = nn.Linear(nb_filters * 2, 10) 61 | nn.init.xavier_uniform_(self.fc1.weight) 62 | 63 | def forward(self, x): 64 | outputs = self.conv1(x) 65 | outputs = self.applyActivation(outputs) 66 | outputs = self.conv2(outputs) 67 | outputs = self.applyActivation(outputs) 68 | outputs = self.conv3(outputs) 69 | outputs = self.applyActivation(outputs) 70 | outputs = outputs.view((-1, self.num_flat_features(outputs))) 71 | outputs = self.fc1(outputs) 72 | # Note that because we use CrosEntropyLoss, which combines 73 | # nn.LogSoftmax and nn.NLLLoss, we do not need a softmax layer as the 74 | # last layer. 75 | return outputs 76 | 77 | def applyActivation(self, x): 78 | if self.activation == 'relu': 79 | return F.relu(x) 80 | elif self.activation == 'elu': 81 | return F.elu(x) 82 | else: 83 | raise ValueError("The activation function is not valid.") 84 | 85 | def num_flat_features(self, x): 86 | size = x.size()[1:] 87 | num_features = 1 88 | for s in size: 89 | num_features *= s 90 | return num_features 91 | 92 | 93 | def trainModel(model, loss_criterion, optimizer, epochs=25, filepath=None): 94 | # USe GPU for computation if it is available. 95 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 96 | model.to(device) # Load the neural network on GPU if it is available 97 | print("The neural network is now loaded on {}.".format(device)) 98 | 99 | running_loss = 0.0 100 | train_loader = retrieveMNISTTrainingData() 101 | period = 20 102 | for epoch in range(epochs): 103 | for i, data in enumerate(train_loader, 0): 104 | inputs, labels = data 105 | # Load images and labels on a device 106 | inputs, labels = inputs.to(device), labels.to(device) 107 | optimizer.zero_grad() 108 | outputs = model(inputs) 109 | loss = loss_criterion(outputs, labels) 110 | loss.backward() 111 | running_loss += loss.item() 112 | optimizer.step() 113 | if i % period == period - 1: 114 | print("Epoch: {}, iteration: {}, loss: {}".format( 115 | epoch, i, running_loss / period)) 116 | running_loss = 0.0 117 | print("Training is complete.") 118 | if filepath is not None: 119 | torch.save(model.state_dict(), filepath) 120 | print("The model is now saved at {}.".format(filepath)) 121 | 122 | 123 | def loadModel(model, filepath): 124 | """ 125 | Load the set of parameters into the given model. 126 | 127 | Arguments: 128 | model: a model whose paramters are to be loaded. If model is None, 129 | the file should contain information about the architecture of 130 | the model as well as its parameters. 131 | filepath: path to the .pt file that stores the parameters (and 132 | possibly also the neural network's architecutre) to be loaded 133 | """ 134 | 135 | # Load the model on GPU if it is available. 136 | # Otherwise, use CPU. 137 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 138 | if model is None: 139 | model = torch.load(filepath) 140 | else: 141 | model.load_state_dict(torch.load(filepath, map_location=device)) 142 | return model 143 | 144 | 145 | def evaluateModelAccuracy(model): 146 | # Use GPU for computation if it is available 147 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 148 | 149 | test_loader = retrieveMNISTTestData(batch_size=128) 150 | correct = 0 151 | total = 0 152 | with torch.no_grad(): 153 | for data in test_loader: 154 | images, labels = data 155 | images, labels = images.to(device), labels.to(device) 156 | outputs = model(images) 157 | softmax = nn.Softmax(dim=1) 158 | _, predicted = torch.max(softmax(outputs).data, 1) 159 | total += labels.size(0) 160 | correct += (predicted == labels).sum().item() 161 | return correct, total 162 | 163 | 164 | def evaluateModelSingleInput(model, image): 165 | # Use GPU for computation if it is available 166 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 167 | 168 | input = image.view((1, 1, img_rows, img_cols)).to(device) 169 | otuput = model(input) 170 | _, prediction = torch.max(otuput.data, 1) 171 | return prediction.item() 172 | 173 | 174 | if __name__ == "__main__": 175 | 176 | def experiment(activation, optimizer_type): 177 | epochs = 25 178 | # Note that nn.CrosEntropyLoss combines nn.LogSoftmax and nn.NLLLoss. 179 | loss_criterion = nn.CrossEntropyLoss() 180 | learning_rate = 0.001 181 | 182 | model = MNISTClassifier(activation=activation) 183 | if optimizer_type == "adam": 184 | optimizer = optim.Adam(model.parameters(), lr=learning_rate) 185 | elif optimizer_type == "sgd": 186 | optimizer = optim.SGD(model.parameters(), lr=learning_rate) 187 | else: 188 | raise ValueError("The optimizer type is not recognized.") 189 | 190 | # The file paths are only valid in UNIX systems. 191 | folderpath = "./ERM_models/" 192 | filename = "MNISTClassifier_{}_{}".format(optimizer_type, activation) 193 | 194 | trainModel(model, loss_criterion, optimizer, 195 | epochs=epochs, filepath=folderpath + filename) 196 | 197 | experiment("elu", "adam") 198 | experiment("relu", "adam") 199 | experiment("elu", "sgd") 200 | experiment("relu", "sgd") 201 | --------------------------------------------------------------------------------