├── NIPS_EnResNet_Poster.pdf ├── README.md ├── ResNet20 ├── Attack_PGD_EnResNet_5_20.py ├── Readme.md ├── main_pgd_enresnet5_20.py ├── resnet_cifar.py └── utils.py ├── WideResNet34-10 ├── Attack_PGD_WideResNet.py ├── Readme.md ├── main_pgd_wideresnet34_10_Validation.py ├── resnet_cifar.py └── utils.py ├── fig1.png ├── fig2.png └── fig4.png /NIPS_EnResNet_Poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/NIPS_EnResNet_Poster.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EnResNet 2 | This repository consists PyTorch code for the paper: 3 | Bao Wang, Binjie Yuan, Zuoqiang Shi, Stanley J. Osher. EnResNet: ResNet Ensemble via the Feynman-Kac Formalism, arXiv:1811.10745, 2018 (https://arxiv.org/abs/1811.10745) 4 | 5 | The repo contains two subfolders for PGD adversarially training of ensemble of ResNet20 and WideResNet34-10, respectively. 6 | 7 | We inteprete the adversarial vulnerability of ResNets as irregularity of the solution of the transport equation, and we propose to improve regularity of the decision boundary by adding diffusion to the transport equation. Please refer to Figure 4 of our [paper](https://arxiv.org/abs/1811.10745) for more details. 8 | 9 |

10 | 11 |

12 | 13 | The resulted convection-diffusion equation can be solved by using the Feynman-Kac formula, which can be approximated by an ensemble of modified ResNets. 14 | 15 |

16 | 17 |

18 | 19 |

20 | 21 |

22 | 23 | If you find this work useful and use it on you own research, please cite our [paper](https://arxiv.org/abs/1811.10745) 24 | 25 | ``` 26 | @ARTICLE{Wang2018EnResNet, 27 | author = {{B. Wang and B. Yuan and Z. Shi and S. Osher}, 28 | title = "{ResNets Ensemble via the Feynman-Kac Formalism to Improve Natural and Robust Accuracies}", 29 | journal = {arXiv e-prints}, 30 | year = "2018", 31 | month = "Nov", 32 | eid = {arXiv:1811.10745}, 33 | pages = {arXiv:1811.10745}, 34 | archivePrefix = {arXiv}, 35 | eprint = {1811.10745}, 36 | primaryClass = {stat.ML} 37 | } 38 | ``` 39 | 40 | ## Dependence 41 | PyTorch 0.4.1 42 | -------------------------------------------------------------------------------- /ResNet20/Attack_PGD_EnResNet_5_20.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | CW, FGSM, and IFGSM Attack CNN 4 | """ 5 | import torch._utils 6 | try: 7 | torch._utils._rebuild_tensor_v2 8 | except AttributeError: 9 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): 10 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) 11 | tensor.requires_grad = requires_grad 12 | tensor._backward_hooks = backward_hooks 13 | return tensor 14 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.backends.cudnn as cudnn 20 | import torch.optim as optim 21 | import torchvision 22 | import torchvision.datasets as dset 23 | import torchvision.transforms as transforms 24 | from torch.autograd import Variable 25 | import copy 26 | import math 27 | import numpy as np 28 | import os 29 | import argparse 30 | 31 | #from utils import * 32 | 33 | import numpy.matlib 34 | import matplotlib.pyplot as plt 35 | import pickle 36 | import cPickle 37 | from collections import OrderedDict 38 | 39 | parser = argparse.ArgumentParser(description='Fool EnResNet') 40 | ap = parser.add_argument 41 | ap('-method', help='Attack Method', type=str, default="ifgsm") # fgsm, ifgsm, cwl2 42 | #ap('-epsilon', help='Attack Strength', type=float, default=0.007) 43 | #ap('-epsilon', help='Attack Strength', type=float, default=0.003) # May 2 44 | ap('-epsilon', help='Attack Strength', type=float, default=0.031) # May 2 45 | ap('--num-ensembles', '--ne', default=2, type=int, metavar='N') 46 | ap('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.0)') 47 | ap('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)') 48 | opt = vars(parser.parse_args()) 49 | 50 | 51 | def conv3x3(in_planes, out_planes, stride=1): 52 | """ 53 | 3x3 convolution with padding 54 | """ 55 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 56 | 57 | 58 | class PreActBasicBlock(nn.Module): 59 | expansion = 1 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 62 | super(PreActBasicBlock, self).__init__() 63 | self.bn1 = nn.BatchNorm2d(inplanes) 64 | self.relu = nn.ReLU(inplace=True) 65 | self.conv1 = conv3x3(inplanes, planes, stride) 66 | self.bn2 = nn.BatchNorm2d(planes) 67 | self.conv2 = conv3x3(planes, planes) 68 | self.downsample = downsample 69 | self.stride = stride 70 | self.noise_coef = noise_coef 71 | 72 | def forward(self, x): 73 | residual = x 74 | out = self.bn1(x) 75 | out = self.relu(out) 76 | 77 | if self.downsample is not None: 78 | residual = self.downsample(out) 79 | 80 | out = self.conv1(out) 81 | out = self.bn2(out) 82 | out = self.relu(out) 83 | out = self.conv2(out) 84 | 85 | out += residual 86 | 87 | if self.noise_coef is not None: # Test Variable and rand 88 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 89 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 90 | else: 91 | return out 92 | 93 | 94 | class PreActBottleneck(nn.Module): 95 | expansion = 4 96 | 97 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 98 | super(PreActBottleneck, self).__init__() 99 | self.bn1 = nn.BatchNorm2d(inplanes) 100 | self.relu = nn.ReLU(inplace=True) 101 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 102 | self.bn2 = nn.BatchNorm2d(planes) 103 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 104 | self.bn3 = nn.BatchNorm2d(planes) 105 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 106 | self.downsample = downsample 107 | self.stride = stride 108 | self.noise_coef = noise_coef 109 | 110 | def forward(self, x): 111 | residual = x 112 | 113 | out = self.bn1(x) 114 | out = self.relu(out) 115 | 116 | if self.downsample is not None: 117 | residual = self.downsample(out) 118 | 119 | out = self.conv1(out) 120 | 121 | out = self.bn2(out) 122 | out = self.relu(out) 123 | out = self.conv2(out) 124 | 125 | out = self.bn3(out) 126 | out = self.relu(out) 127 | out = self.conv3(out) 128 | 129 | out += residual 130 | if self.noise_coef is not None: 131 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda()) 132 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 133 | else: 134 | return out 135 | 136 | 137 | class PreAct_ResNet_Cifar(nn.Module): 138 | def __init__(self, block, layers, num_classes=10, noise_coef=None): 139 | super(PreAct_ResNet_Cifar, self).__init__() 140 | self.inplanes = 16 141 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 142 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef) 143 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef) 144 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef) 145 | self.bn = nn.BatchNorm2d(64*block.expansion) 146 | self.relu = nn.ReLU(inplace=True) 147 | self.avgpool = nn.AvgPool2d(8, stride=1) 148 | self.fc = nn.Linear(64*block.expansion, num_classes) 149 | 150 | #self.loss = nn.CrossEntropyLoss() 151 | 152 | for m in self.modules(): 153 | if isinstance(m, nn.Conv2d): 154 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 155 | m.weight.data.normal_(0, math.sqrt(2. / n)) 156 | elif isinstance(m, nn.BatchNorm2d): 157 | m.weight.data.fill_(1) 158 | m.bias.data.zero_() 159 | 160 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None): 161 | downsample = None 162 | if stride != 1 or self.inplanes != planes*block.expansion: 163 | downsample = nn.Sequential( 164 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 165 | ) 166 | 167 | layers = [] 168 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef)) 169 | self.inplanes = planes*block.expansion 170 | for _ in range(1, blocks): 171 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef)) 172 | return nn.Sequential(*layers) 173 | 174 | #def forward(self, x, target): 175 | def forward(self, x): 176 | x = self.conv1(x) 177 | 178 | x = self.layer1(x) 179 | x = self.layer2(x) 180 | x = self.layer3(x) 181 | 182 | x = self.bn(x) 183 | x = self.relu(x) 184 | x = self.avgpool(x) 185 | x = x.view(x.size(0), -1) 186 | x = self.fc(x) 187 | 188 | #loss = self.loss(x, target) 189 | 190 | #return x, loss 191 | return x 192 | 193 | 194 | class Ensemble_PreAct_ResNet_Cifar(nn.Module): 195 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0): 196 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__() 197 | self.num_ensembles = num_ensembles 198 | # for emsemble resnet we should use Noisy Blocks. 199 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)]) 200 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)]) 201 | 202 | def forward(self, x): 203 | #def forward(self, x, target): 204 | ret = 0.0 205 | for net in self.ensemble: 206 | ret += net(x) 207 | #ret += net(x, target) 208 | ret /= self.num_ensembles 209 | 210 | return ret 211 | 212 | 213 | def en_preactresnet20_cifar(**kwargs): 214 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs) 215 | return model 216 | 217 | def en_preactresnet44_cifar(**kwargs): 218 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs) 219 | return model 220 | 221 | if __name__ == '__main__': 222 | """ 223 | Load the trained DNN, and attack the DNN, finally save the adversarial images 224 | """ 225 | # Load the model 226 | print '==> Resuming from checkpoint..' 227 | checkpoint = torch.load('ckpt_PGD_ensemble_5_20.t7') 228 | net = checkpoint['net'] 229 | epsilon = opt['epsilon'] 230 | attack_type = opt['method'] 231 | 232 | # Load the original test data 233 | print '==> Load the clean image' 234 | root = './data' 235 | download = False 236 | 237 | test_set = torchvision.datasets.CIFAR10( 238 | root=root, 239 | train=False, 240 | download=download, 241 | transform=transforms.Compose([ 242 | transforms.ToTensor(), 243 | #normalize, 244 | ])) 245 | 246 | kwargs = {'num_workers':1, 'pin_memory':True} 247 | batchsize_test = 200 248 | if attack_type == 'cw': 249 | batchsize_test = 1 250 | print('Batch size of the test set: ', batchsize_test) 251 | test_loader = torch.utils.data.DataLoader(dataset=test_set, 252 | batch_size=batchsize_test, 253 | shuffle=False, **kwargs 254 | ) 255 | criterion = nn.CrossEntropyLoss() 256 | #-------------------------------------------------------------------------- 257 | # Testing 258 | # images: the original images 259 | # labels: labels of the original images 260 | # images_adv: adversarial image 261 | # labels_pred: the predicted labels of the adversarial images 262 | # noise: the added noise 263 | #-------------------------------------------------------------------------- 264 | images, labels, images_adv, labels_pred, noise = [], [], [], [], [] 265 | total_fooled = 0; total_correct_classified = 0 266 | 267 | if attack_type == 'fgsm': 268 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 269 | #if batch_idx < 2: 270 | x_Test = x1.numpy() 271 | #print x_Test.min(), x_Test.max() 272 | #x_Test = ((x_Test - x_Test.min())/(x_Test.max() - x_Test.min()) - 0.5)*2 273 | #x_Test = (x_Test - x_Test.min() )/(x_Test.max() - x_Test.min()) 274 | y_Test = y1_true.numpy() 275 | 276 | #x = Variable(torch.cuda.FloatTensor(x_Test.reshape(1, 1, 28, 28)), requires_grad=True) 277 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 278 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 279 | 280 | # Classification before perturbation 281 | pred_tmp = net(x) 282 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 283 | loss = criterion(pred_tmp, y) 284 | # Attack 285 | net.zero_grad() 286 | if x.grad is not None: 287 | x.grad.data.fill_(0) 288 | loss.backward() 289 | 290 | x_val_min = 0.0 291 | x_val_max = 1.0 292 | x.grad.sign_() 293 | 294 | x_adversarial = x + epsilon*x.grad 295 | x_adversarial = torch.clamp(x_adversarial, x_val_min, x_val_max) 296 | x_adversarial = x_adversarial.data 297 | 298 | # Classify the perturbed data 299 | x_adversarial_tmp = Variable(x_adversarial) 300 | pred_tmp = net(x_adversarial_tmp) 301 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1) 302 | 303 | for i in range(len(x_Test)): 304 | #print y_pred_adversarial 305 | if y_Test[i] == y_pred_adversarial[i]: 306 | #if y_Test == y_pred_adversarial: 307 | total_correct_classified += 1 308 | 309 | for i in range(len(x_Test)): 310 | # Save the perturbed data 311 | images.append(x_Test[i, :, :, :]) # Original image 312 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image 313 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise 314 | labels.append(y_Test[i]) 315 | labels_pred.append(y_pred_adversarial[i]) 316 | 317 | elif attack_type == 'ifgsm': 318 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 319 | #if batch_idx < 100: 320 | x_Test = x1.numpy() 321 | y_Test = y1_true.numpy() 322 | 323 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 324 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 325 | 326 | # Classification before perturbation 327 | pred_tmp = net(x) 328 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 329 | loss = criterion(pred_tmp, y) 330 | # Attack 331 | alpha = epsilon 332 | #iteration = 10 333 | iteration = 1#40 # May 2 334 | x_val_min = 0.; x_val_max = 1. 335 | epsilon1 = 0.031 336 | 337 | # Helper function 338 | def where(cond, x, y): 339 | """ 340 | code from : 341 | https://discuss.pytorch.org/t/how-can-i-do-the-operation-the-same-as-np-where/1329/8 342 | """ 343 | cond = cond.float() 344 | return (cond*x) + ((1-cond)*y) 345 | 346 | # Random perturbation 347 | #x = x + torch.zeros_like(x).uniform_(-epsilon1, epsilon1) # May 2 348 | x_adv = Variable(x.data, requires_grad=True) 349 | 350 | for i in range(iteration): 351 | h_adv = net(x_adv) 352 | loss = criterion(h_adv, y) 353 | net.zero_grad() 354 | if x_adv.grad is not None: 355 | x_adv.grad.data.fill_(0) 356 | loss.backward() 357 | 358 | x_adv.grad.sign_() 359 | x_adv = x_adv + alpha*x_adv.grad 360 | x_adv = where(x_adv > x+epsilon1, x+epsilon1, x_adv) 361 | x_adv = where(x_adv < x-epsilon1, x-epsilon1, x_adv) 362 | x_adv = torch.clamp(x_adv, x_val_min, x_val_max) 363 | x_adv = Variable(x_adv.data, requires_grad=True) 364 | 365 | x_adversarial = x_adv.data 366 | 367 | x_adversarial_tmp = Variable(x_adversarial) 368 | pred_tmp = net(x_adversarial_tmp) 369 | loss = criterion(pred_tmp, y) 370 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1) 371 | 372 | #if y_Test == y_pred_adversarial: 373 | # total_correct_classified += 1 374 | for i in range(len(x_Test)): 375 | #print y_pred_adversarial 376 | if y_Test[i] == y_pred_adversarial[i]: 377 | #if y_Test == y_pred_adversarial: 378 | total_correct_classified += 1 379 | 380 | for i in range(len(x_Test)): 381 | # Save the perturbed data 382 | images.append(x_Test[i, :, :, :]) # Original image 383 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image 384 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise 385 | labels.append(y_Test[i]) 386 | labels_pred.append(y_pred_adversarial[i]) 387 | 388 | elif attack_type == 'cw': 389 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 390 | #if batch_idx < 10: 391 | if batch_idx - int(int(batch_idx/50.)*50) == 0: 392 | print batch_idx 393 | x_Test = x1.numpy() 394 | y_Test = y1_true.numpy() 395 | 396 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 397 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 398 | 399 | # Classification before perturbation 400 | pred_tmp = net(x) 401 | loss = criterion(pred_tmp, y) 402 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 403 | 404 | # Attack 405 | cwl2_learning_rate = 0.0006#0.01 406 | max_iter = 50 407 | lambdaf = 10.0 408 | kappa = 0.0 409 | 410 | # The input image we will perturb 411 | input = torch.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)) 412 | input_var = Variable(input) 413 | 414 | # w is the variable we will optimize over. We will also save the best w and loss 415 | w = Variable(input, requires_grad=True) 416 | best_w = input.clone() 417 | best_loss = float('inf') 418 | 419 | # Use the Adam optimizer for the minimization 420 | optimizer = optim.Adam([w], lr=cwl2_learning_rate) 421 | 422 | # Get the top2 predictions of the model. Get the argmaxes for the objective function 423 | probs = net(input_var.cuda()) 424 | 425 | probs_data = probs.data.cpu() 426 | top1_idx = torch.max(probs_data, 1)[1] 427 | probs_data[0][top1_idx] = -1 # making the previous top1 the lowest so we get the top2 428 | top2_idx = torch.max(probs_data, 1)[1] 429 | 430 | # Set the argmax (but maybe argmax will just equal top2_idx always?) 431 | argmax = top1_idx[0] 432 | if argmax == y_pred: 433 | argmax = top2_idx[0] 434 | 435 | # The iteration 436 | for i in range(0, max_iter): 437 | if i > 0: 438 | w.grad.data.fill_(0) 439 | 440 | # Zero grad (Only one line needed actually) 441 | net.zero_grad() 442 | optimizer.zero_grad() 443 | 444 | # Compute L2 Loss 445 | loss = torch.pow(w - input_var, 2).sum() 446 | 447 | # w variable 448 | w_data = w.data 449 | w_in = Variable(w_data, requires_grad=True) 450 | 451 | # Compute output 452 | output = net.forward(w_in.cuda()) #second argument is unneeded 453 | 454 | # Calculating the (hinge) loss 455 | loss += lambdaf * torch.clamp( output[0][y_pred] - output[0][argmax] + kappa, min=0).cpu() 456 | 457 | # Backprop the loss 458 | loss.backward() 459 | 460 | # Work on w (Don't think we need this) 461 | w.grad.data.add_(w_in.grad.data) 462 | 463 | # Optimizer step 464 | optimizer.step() 465 | 466 | # Save the best w and loss 467 | total_loss = loss.data.cpu()[0] 468 | 469 | if total_loss < best_loss: 470 | best_loss = total_loss 471 | 472 | ##best_w = torch.clamp(best_w, 0., 1.) # BW Added Aug 26 473 | 474 | best_w = w.data.clone() 475 | 476 | # Set final adversarial image as the best-found w 477 | x_adversarial = best_w 478 | 479 | ##x_adversarial = torch.clamp(x_adversarial, 0., 1.) # BW Added Aug 26 480 | 481 | #--------------- Add to introduce the noise 482 | noise_tmp = x_adversarial.cpu().numpy() - x_Test 483 | x_adversarial = x_Test + epsilon * noise_tmp 484 | #--------------- 485 | 486 | # Classify the perturbed data 487 | x_adversarial_tmp = Variable(torch.cuda.FloatTensor(x_adversarial), requires_grad=False) #Variable(x_adversarial).cuda() 488 | pred_tmp = net(x_adversarial_tmp) 489 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy()) # axis=1 490 | 491 | if y_Test == y_pred_adversarial: 492 | total_correct_classified += 1 493 | 494 | # Save the perturbed data 495 | images.append(x_Test) # Original image 496 | images_adv.append(x_adversarial) # Perturbed image 497 | noise.append(x_adversarial-x_Test) # Noise 498 | labels.append(y_Test) 499 | labels_pred.append(y_pred_adversarial) 500 | else: 501 | ValueError('Unsupported Attack') 502 | 503 | print('Number of correctly classified images: ', total_correct_classified) 504 | # Save data 505 | #with open("Adversarial" + attack_type + str(int(10*epsilon)) + ".pkl", "w") as f: 506 | #with open("Adversarial" + attack_type + str(int(100*epsilon)) + ".pkl", "w") as f: 507 | # adv_data_dict = {"images":images_adv, "labels":labels} 508 | # cPickle.dump(adv_data_dict, f) 509 | images = np.array(images).squeeze() 510 | images_adv = np.array(images_adv).squeeze() 511 | noise = np.array(noise).squeeze() 512 | labels = np.array(labels).squeeze() 513 | labels_pred = np.array(labels_pred).squeeze() 514 | print images.shape, images_adv.shape, noise.shape, labels.shape, labels_pred.shape 515 | 516 | with open("fooled_EnResNet5_20_PGD_10iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f: 517 | #with open("fooled_EnResNet5_20_PGD_20iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f: 518 | adv_data_dict = { 519 | "images" : images, 520 | "images_adversarial" : images_adv, 521 | "y_trues" : labels, 522 | "noises" : noise, 523 | "y_preds_adversarial" : labels_pred 524 | } 525 | pickle.dump(adv_data_dict, f) 526 | -------------------------------------------------------------------------------- /ResNet20/Readme.md: -------------------------------------------------------------------------------- 1 | ## For reproducing results of En_5ResNet20 on the CIFAR10 2 | ### PGD adversarial training 3 | ``` 4 | python main_pgd_enresnet5_20.py --lr 0.1 --noise-coef 0.1 5 | ``` 6 | 7 | ### Attack the trained model 8 | ``` 9 | python Attack_PGD_EnResNet5_20.py --method ifgsm 10 | ``` 11 | The method can be fgsm, ifgsm, and cw 12 | -------------------------------------------------------------------------------- /ResNet20/main_pgd_enresnet5_20.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main pgd enresnet 4 | """ 5 | import argparse 6 | import os 7 | import shutil 8 | import time 9 | 10 | import torch.backends.cudnn as cudnn 11 | import torch.optim as optim 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | from torch.autograd import Variable 15 | import torch.nn.functional as F 16 | 17 | import torch 18 | import torch.nn as nn 19 | import math 20 | 21 | from resnet_cifar import * 22 | from utils import * 23 | 24 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training') 25 | parser.add_argument('--model_name', default='en_resnet20_cifar10', type=str, help='name of the model') 26 | parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run') 27 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') 28 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N', 29 | help='mini-batch size (default: 128),only used for train') 30 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate') 31 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') 32 | parser.add_argument('--num-ensembles', '--ne', default=5, type=int, metavar='N') 33 | parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W', 34 | help='weight decay (default: 5e-4)') 35 | parser.add_argument('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.1)') 36 | parser.add_argument('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)') 37 | parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)') 38 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 39 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') 40 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT', 41 | help='10 for cifar10,100 for cifar100 (default: 10)') 42 | 43 | 44 | def conv3x3(in_planes, out_planes, stride=1): 45 | """ 46 | 3x3 convolution with padding 47 | """ 48 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 49 | 50 | 51 | class PreActBasicBlock(nn.Module): 52 | expansion = 1 53 | 54 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 55 | super(PreActBasicBlock, self).__init__() 56 | self.bn1 = nn.BatchNorm2d(inplanes) 57 | self.relu = nn.ReLU(inplace=True) 58 | self.conv1 = conv3x3(inplanes, planes, stride) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv2 = conv3x3(planes, planes) 61 | self.downsample = downsample 62 | self.stride = stride 63 | self.noise_coef = noise_coef 64 | 65 | def forward(self, x): 66 | residual = x 67 | out = self.bn1(x) 68 | out = self.relu(out) 69 | 70 | if self.downsample is not None: 71 | residual = self.downsample(out) 72 | 73 | out = self.conv1(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | out = self.conv2(out) 77 | 78 | out += residual 79 | 80 | if self.noise_coef is not None: # Test Variable and rand 81 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 82 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 83 | else: 84 | return out 85 | 86 | 87 | class PreActBottleneck(nn.Module): 88 | expansion = 4 89 | 90 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 91 | super(PreActBottleneck, self).__init__() 92 | self.bn1 = nn.BatchNorm2d(inplanes) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 95 | self.bn2 = nn.BatchNorm2d(planes) 96 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 97 | self.bn3 = nn.BatchNorm2d(planes) 98 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 99 | self.downsample = downsample 100 | self.stride = stride 101 | self.noise_coef = noise_coef 102 | 103 | def forward(self, x): 104 | residual = x 105 | 106 | out = self.bn1(x) 107 | out = self.relu(out) 108 | 109 | if self.downsample is not None: 110 | residual = self.downsample(out) 111 | 112 | out = self.conv1(out) 113 | 114 | out = self.bn2(out) 115 | out = self.relu(out) 116 | out = self.conv2(out) 117 | 118 | out = self.bn3(out) 119 | out = self.relu(out) 120 | out = self.conv3(out) 121 | 122 | out += residual 123 | if self.noise_coef is not None: 124 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda()) 125 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 126 | else: 127 | return out 128 | 129 | 130 | class PreAct_ResNet_Cifar(nn.Module): 131 | def __init__(self, block, layers, num_classes=10, noise_coef=None): 132 | super(PreAct_ResNet_Cifar, self).__init__() 133 | self.inplanes = 16 134 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 135 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef) 136 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef) 137 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef) 138 | self.bn = nn.BatchNorm2d(64*block.expansion) 139 | self.relu = nn.ReLU(inplace=True) 140 | self.avgpool = nn.AvgPool2d(8, stride=1) 141 | self.fc = nn.Linear(64*block.expansion, num_classes) 142 | 143 | #self.loss = nn.CrossEntropyLoss() 144 | 145 | for m in self.modules(): 146 | if isinstance(m, nn.Conv2d): 147 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 148 | m.weight.data.normal_(0, math.sqrt(2. / n)) 149 | elif isinstance(m, nn.BatchNorm2d): 150 | m.weight.data.fill_(1) 151 | m.bias.data.zero_() 152 | 153 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None): 154 | downsample = None 155 | if stride != 1 or self.inplanes != planes*block.expansion: 156 | downsample = nn.Sequential( 157 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 158 | ) 159 | 160 | layers = [] 161 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef)) 162 | self.inplanes = planes*block.expansion 163 | for _ in range(1, blocks): 164 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef)) 165 | return nn.Sequential(*layers) 166 | 167 | #def forward(self, x, target): 168 | def forward(self, x): 169 | x = self.conv1(x) 170 | 171 | x = self.layer1(x) 172 | x = self.layer2(x) 173 | x = self.layer3(x) 174 | 175 | x = self.bn(x) 176 | x = self.relu(x) 177 | x = self.avgpool(x) 178 | x = x.view(x.size(0), -1) 179 | x = self.fc(x) 180 | 181 | #loss = self.loss(x, target) 182 | 183 | #return x, loss 184 | return x 185 | 186 | 187 | class Ensemble_PreAct_ResNet_Cifar(nn.Module): 188 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0): 189 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__() 190 | self.num_ensembles = num_ensembles 191 | # for emsemble resnet we should use Noisy Blocks. 192 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)]) 193 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)]) 194 | 195 | def forward(self, x): 196 | #def forward(self, x, target): 197 | ret = 0.0 198 | for net in self.ensemble: 199 | ret += net(x) 200 | #ret += net(x, target) 201 | ret /= self.num_ensembles 202 | 203 | return ret 204 | 205 | 206 | def en_preactresnet20_cifar(**kwargs): 207 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs) # 20 208 | #model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) # 110 209 | return model 210 | 211 | 212 | class AttackPGD(nn.Module): 213 | """ 214 | PGD Adversarial training 215 | """ 216 | def __init__(self, basic_net, config): 217 | super(AttackPGD, self).__init__() 218 | self.basic_net = basic_net 219 | self.rand = config['random_start'] 220 | self.step_size = config['step_size'] 221 | self.epsilon = config['epsilon'] 222 | self.num_steps = config['num_steps'] 223 | assert config['loss_func'] == 'xent', 'Only xent supported for now.' 224 | 225 | def forward(self, inputs, targets): 226 | x = inputs 227 | if self.rand: 228 | x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon) 229 | for i in range(self.num_steps): # iFGSM attack 230 | x.requires_grad_() 231 | with torch.enable_grad(): 232 | logits = self.basic_net(x) 233 | loss = F.cross_entropy(logits, targets, size_average=False) 234 | grad = torch.autograd.grad(loss, [x])[0] 235 | x = x.detach() + self.step_size*torch.sign(grad.detach()) 236 | x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon) 237 | x = torch.clamp(x, 0, 1) 238 | 239 | return self.basic_net(x), x 240 | 241 | 242 | if __name__ == '__main__': 243 | use_cuda = torch.cuda.is_available 244 | global best_acc 245 | best_acc = 0 246 | start_epoch = 0 247 | args = parser.parse_args() 248 | best_count = 0 249 | #-------------------------------------------------------------------------- 250 | # Load Cifar data 251 | #-------------------------------------------------------------------------- 252 | print('==> Preparing data...') 253 | root = './data' 254 | download = True 255 | 256 | #normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) 257 | 258 | 259 | train_set = torchvision.datasets.CIFAR10( 260 | root=root, 261 | train=True, 262 | download=download, 263 | transform=transforms.Compose([ 264 | transforms.RandomCrop(32, padding=4), 265 | transforms.RandomHorizontalFlip(), 266 | transforms.ToTensor(), 267 | #normalize, 268 | ])) 269 | 270 | test_set = torchvision.datasets.CIFAR10( 271 | root=root, 272 | train=False, 273 | download=download, 274 | transform=transforms.Compose([ 275 | transforms.ToTensor(), 276 | #normalize, 277 | ])) 278 | 279 | 280 | kwargs = {'num_workers':1, 'pin_memory':True} 281 | batchsize_test = len(test_set)/40 #100 282 | print('Batch size of the test set: ', batchsize_test) 283 | test_loader = torch.utils.data.DataLoader(dataset=test_set, 284 | batch_size=batchsize_test, 285 | shuffle=False, **kwargs 286 | ) 287 | batchsize_train = 128 288 | print('Batch size of the train set: ', batchsize_train) 289 | train_loader = torch.utils.data.DataLoader(dataset=train_set, 290 | batch_size=batchsize_train, 291 | shuffle=True, **kwargs 292 | ) 293 | 294 | basic_net = en_preactresnet20_cifar(num_ensembles=args.num_ensembles, noise_coef=args.noise_coef).cuda() 295 | 296 | # From https://github.com/MadryLab/cifar10_challenge/blob/master/config.json 297 | config = { 298 | 'epsilon': 0.031, #8.0 / 255, # Test 1.0-8.0 299 | 'num_steps': 10, 300 | 'step_size': 0.007, #6.0 / 255, # 7.0 301 | 'random_start': True, 302 | 'loss_func': 'xent', 303 | } 304 | 305 | net = AttackPGD(basic_net, config).cuda() 306 | criterion = nn.CrossEntropyLoss() 307 | 308 | nepoch = 200 309 | for epoch in xrange(nepoch): 310 | print('Epoch ID', epoch) 311 | if epoch < 80: 312 | lr = 0.1 313 | elif epoch < 120: 314 | lr = 0.1/10 315 | elif epoch < 160: 316 | lr = 0.1/10/10 317 | else: 318 | lr = 0.1/10/10/10 319 | 320 | optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True) 321 | 322 | #---------------------------------------------------------------------- 323 | # Training 324 | #---------------------------------------------------------------------- 325 | correct = 0; total = 0; train_loss = 0 326 | net.train() 327 | for batch_idx, (x, target) in enumerate(train_loader): 328 | #if batch_idx < 1: 329 | optimizer.zero_grad() 330 | x, target = Variable(x.cuda()), Variable(target.cuda()) 331 | 332 | score, pert_x = net(x, target) 333 | loss = criterion(score, target) 334 | loss.backward() 335 | optimizer.step() 336 | 337 | train_loss += loss.data[0] 338 | _, predicted = torch.max(score.data, 1) 339 | total += target.size(0) 340 | correct += predicted.eq(target.data).cpu().sum() 341 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' 342 | % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) 343 | 344 | #---------------------------------------------------------------------- 345 | # Testing 346 | #---------------------------------------------------------------------- 347 | test_loss = 0; correct = 0; total = 0 348 | net.eval() 349 | for batch_idx, (x, target) in enumerate(test_loader): 350 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True) 351 | score, pert_x = net(x, target) 352 | 353 | loss = criterion(score, target) 354 | test_loss += loss.data[0] 355 | _, predicted = torch.max(score.data, 1) 356 | total += target.size(0) 357 | correct += predicted.eq(target.data).cpu().sum() 358 | progress_bar(batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' 359 | % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) 360 | 361 | #---------------------------------------------------------------------- 362 | # Save the checkpoint 363 | #---------------------------------------------------------------------- 364 | acc = 100.*correct/total 365 | #if acc > best_acc: 366 | if correct > best_count: 367 | print('Saving model...') 368 | state = { 369 | 'net': basic_net, #net, 370 | 'acc': acc, 371 | 'epoch': epoch, 372 | } 373 | 374 | torch.save(state, './ckpt_PGD_ensemble_5_20.t7') 375 | best_acc = acc 376 | best_count = correct 377 | 378 | print('The best acc: ', best_acc) 379 | -------------------------------------------------------------------------------- /ResNet20/resnet_cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | resnet for cifar in pytorch 4 | Reference: 5 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016. 6 | [2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016. 7 | """ 8 | import torch 9 | import torch.nn as nn 10 | import math 11 | from torch.autograd import Variable 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | """ 15 | 3x3 convolution with padding 16 | """ 17 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 18 | 19 | 20 | class PreActBasicBlock(nn.Module): 21 | expansion = 1 22 | 23 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 24 | super(PreActBasicBlock, self).__init__() 25 | self.bn1 = nn.BatchNorm2d(inplanes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv1 = conv3x3(inplanes, planes, stride) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.conv2 = conv3x3(planes, planes) 30 | self.downsample = downsample 31 | self.stride = stride 32 | self.noise_coef = noise_coef 33 | 34 | def forward(self, x): 35 | residual = x 36 | out = self.bn1(x) 37 | out = self.relu(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(out) 41 | 42 | out = self.conv1(out) 43 | out = self.bn2(out) 44 | out = self.relu(out) 45 | out = self.conv2(out) 46 | 47 | out += residual 48 | 49 | if self.noise_coef is not None: # Test Variable and rand 50 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 51 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 52 | else: 53 | return out 54 | 55 | 56 | class PreActBottleneck(nn.Module): 57 | expansion = 4 58 | 59 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 60 | super(PreActBottleneck, self).__init__() 61 | self.bn1 = nn.BatchNorm2d(inplanes) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn2 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 66 | self.bn3 = nn.BatchNorm2d(planes) 67 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 68 | self.downsample = downsample 69 | self.stride = stride 70 | self.noise_coef = noise_coef 71 | 72 | def forward(self, x): 73 | residual = x 74 | 75 | out = self.bn1(x) 76 | out = self.relu(out) 77 | 78 | if self.downsample is not None: 79 | residual = self.downsample(out) 80 | 81 | out = self.conv1(out) 82 | 83 | out = self.bn2(out) 84 | out = self.relu(out) 85 | out = self.conv2(out) 86 | 87 | out = self.bn3(out) 88 | out = self.relu(out) 89 | out = self.conv3(out) 90 | 91 | out += residual 92 | if self.noise_coef is not None: 93 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda()) 94 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 95 | else: 96 | return out 97 | 98 | 99 | class PreAct_ResNet_Cifar(nn.Module): 100 | def __init__(self, block, layers, num_classes=10, noise_coef=None): 101 | super(PreAct_ResNet_Cifar, self).__init__() 102 | self.inplanes = 16 103 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 104 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef) 105 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef) 106 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef) 107 | self.bn = nn.BatchNorm2d(64*block.expansion) 108 | self.relu = nn.ReLU(inplace=True) 109 | self.avgpool = nn.AvgPool2d(8, stride=1) 110 | self.fc = nn.Linear(64*block.expansion, num_classes) 111 | 112 | #self.loss = nn.CrossEntropyLoss() 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 117 | m.weight.data.normal_(0, math.sqrt(2. / n)) 118 | elif isinstance(m, nn.BatchNorm2d): 119 | m.weight.data.fill_(1) 120 | m.bias.data.zero_() 121 | 122 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None): 123 | downsample = None 124 | if stride != 1 or self.inplanes != planes*block.expansion: 125 | downsample = nn.Sequential( 126 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 127 | ) 128 | 129 | layers = [] 130 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef)) 131 | self.inplanes = planes*block.expansion 132 | for _ in range(1, blocks): 133 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef)) 134 | return nn.Sequential(*layers) 135 | 136 | #def forward(self, x, target): 137 | def forward(self, x): 138 | x = self.conv1(x) 139 | 140 | x = self.layer1(x) 141 | x = self.layer2(x) 142 | x = self.layer3(x) 143 | 144 | x = self.bn(x) 145 | x = self.relu(x) 146 | x = self.avgpool(x) 147 | x = x.view(x.size(0), -1) 148 | x = self.fc(x) 149 | 150 | #loss = self.loss(x, target) 151 | 152 | #return x, loss 153 | return x 154 | 155 | 156 | class Ensemble_PreAct_ResNet_Cifar(nn.Module): 157 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0): 158 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__() 159 | self.num_ensembles = num_ensembles 160 | # for emsemble resnet we should use Noisy Blocks. 161 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)]) 162 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)]) 163 | 164 | def forward(self, x): 165 | #def forward(self, x, target): 166 | ret = 0.0 167 | for net in self.ensemble: 168 | ret += net(x) 169 | #ret += net(x, target) 170 | ret /= self.num_ensembles 171 | 172 | return ret 173 | 174 | 175 | def en_preactresnet20_cifar(**kwargs): 176 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs) 177 | return model 178 | 179 | def en_preactresnet44_cifar(**kwargs): 180 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs) 181 | return model 182 | 183 | def en_preactresnet32_cifar(**kwargs): 184 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [5, 5, 5], **kwargs) 185 | return model 186 | 187 | def en_preactresnet110_cifar(**kwargs): 188 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) 189 | return model 190 | 191 | 192 | if __name__ == '__main__': 193 | net = en_preactresnet20_cifar() 194 | y = net(torch.autograd.Variable(torch.randn(1, 3, 32, 32))) 195 | print(net) 196 | print(y.size()) 197 | -------------------------------------------------------------------------------- /ResNet20/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some helper functions for PyTorch. 3 | """ 4 | import os 5 | import sys 6 | import time 7 | import math 8 | 9 | import torch.nn as nn 10 | import torch.nn.init as init 11 | 12 | def init_params(net): 13 | """ 14 | Initial layer parameters. 15 | """ 16 | for m in net.modules(): 17 | if isinstance(m, nn.Conv2d): 18 | init.kaiming_normal(m.weight, mode='fan_out') 19 | if m.bias: 20 | init.constant(m.bias, 0) 21 | elif isinstance(m, nn.BatchNorm2d): 22 | init.constant(m.weight, 1) 23 | init.constant(m.bias, 0) 24 | elif isinstance(m, nn.Linear): 25 | init.normal(m.weight, std=1e-3) 26 | if m.bias: 27 | init.constant(m.bias, 0) 28 | 29 | _, term_width = os.popen('stty size', 'r').read().split() 30 | term_width = int(term_width) 31 | 32 | TOTAL_BAR_LENGTH = 65. 33 | last_time = time.time() 34 | begin_time = time.time() 35 | 36 | def progress_bar(current, total, msg=None): 37 | """ 38 | The progress bar. 39 | """ 40 | global last_time, begin_time 41 | if current == 0: 42 | begin_time = time.time() # Reset for new bar. 43 | 44 | cur_len = int(TOTAL_BAR_LENGTH*current/total) 45 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 46 | 47 | sys.stdout.write(' [') 48 | for i in range(cur_len): 49 | sys.stdout.write('=') 50 | sys.stdout.write('>') 51 | for i in range(rest_len): 52 | sys.stdout.write('.') 53 | sys.stdout.write(']') 54 | 55 | cur_time = time.time() 56 | step_time = cur_time - last_time 57 | last_time = cur_time 58 | tot_time = cur_time - begin_time 59 | 60 | L = [] 61 | L.append(' Step: %s' % format_time(step_time)) 62 | L.append(' | Tot: %s' % format_time(tot_time)) 63 | if msg: 64 | L.append(' | ' + msg) 65 | 66 | msg = ''.join(L) 67 | sys.stdout.write(msg) 68 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3): 69 | sys.stdout.write(' ') 70 | 71 | # Go back to the center of the bar. 72 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2): 73 | sys.stdout.write('\b') 74 | sys.stdout.write(' %d/%d ' % (current+1, total)) 75 | 76 | if current < total-1: 77 | sys.stdout.write('\r') 78 | else: 79 | sys.stdout.write('\n') 80 | sys.stdout.flush() 81 | 82 | 83 | def format_time(seconds): 84 | days = int(seconds / 3600/24) 85 | seconds = seconds - days*3600*24 86 | hours = int(seconds / 3600) 87 | seconds = seconds - hours*3600 88 | minutes = int(seconds / 60) 89 | seconds = seconds - minutes*60 90 | secondsf = int(seconds) 91 | seconds = seconds - secondsf 92 | millis = int(seconds*1000) 93 | 94 | f = '' 95 | i = 1 96 | if days > 0: 97 | f += str(days) + 'D' 98 | i += 1 99 | if hours > 0 and i <= 2: 100 | f += str(hours) + 'h' 101 | i += 1 102 | if minutes > 0 and i <= 2: 103 | f += str(minutes) + 'm' 104 | i += 1 105 | if secondsf > 0 and i <= 2: 106 | f += str(secondsf) + 's' 107 | i += 1 108 | if millis > 0 and i <= 2: 109 | f += str(millis) + 'ms' 110 | i += 1 111 | if f == '': 112 | f = '0ms' 113 | return f 114 | 115 | 116 | def freeze_layer(layer): 117 | """ 118 | Freeze a certain layer in the DNN. 119 | #Argument: the name of a layer in the given DNN. 120 | """ 121 | for param in layer.parameters(): 122 | param.requires_grad = False 123 | 124 | 125 | def freeze_All(model): 126 | """ 127 | Freeze all the trainable parameters in the DNN. 128 | #Argument: the DNN model. 129 | """ 130 | for param in model.parameters(): # Parameter is a method of nn.Module 131 | param.requires_grad = False 132 | 133 | 134 | def Unfreeze_layer(layer): 135 | """ 136 | Unfreeze a given layer in t he DNN. 137 | #Argument: the name of a layer in the given DNN. 138 | """ 139 | for param in layer.parameters(): 140 | param.requires_grad = True 141 | 142 | def Unfreeze_All(model): 143 | """ 144 | Unfreeze a given layer in t he DNN. 145 | #Argument: DNN model. 146 | """ 147 | for param in model.parameters(): 148 | param.requires_grad = True -------------------------------------------------------------------------------- /WideResNet34-10/Attack_PGD_WideResNet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | CW, FGSM, and IFGSM Attack CNN 4 | """ 5 | import torch._utils 6 | try: 7 | torch._utils._rebuild_tensor_v2 8 | except AttributeError: 9 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks): 10 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride) 11 | tensor.requires_grad = requires_grad 12 | tensor._backward_hooks = backward_hooks 13 | return tensor 14 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.backends.cudnn as cudnn 20 | import torch.optim as optim 21 | import torchvision 22 | import torchvision.datasets as dset 23 | import torchvision.transforms as transforms 24 | from torch.autograd import Variable 25 | import copy 26 | import math 27 | import numpy as np 28 | import os 29 | import argparse 30 | 31 | #from utils import * 32 | 33 | import numpy.matlib 34 | import matplotlib.pyplot as plt 35 | import pickle 36 | import cPickle 37 | from collections import OrderedDict 38 | 39 | parser = argparse.ArgumentParser(description='Fool ResNet_PGD ') 40 | ap = parser.add_argument 41 | ap('-method', help='Attack Method', type=str, default="ifgsm") # fgsm, ifgsm, cwl2 42 | #ap('-epsilon', help='Attack Strength', type=float, default=0.007) 43 | ap('-epsilon', help='Attack Strength', type=float, default=0.003) # May 2 44 | #ap('-epsilon', help='Attack Strength', type=float, default=0.031) # May 2 45 | ap('--num-ensembles', '--ne', default=1, type=int, metavar='N') 46 | ap('--noise-coef', '--nc', default=0.0, type=float, metavar='W', help='forward noise (default: 0.0)') 47 | ap('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)') 48 | opt = vars(parser.parse_args()) 49 | 50 | 51 | class BasicBlock(nn.Module): 52 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0, noise_coef=None): # BW 53 | super(BasicBlock, self).__init__() 54 | self.bn1 = nn.BatchNorm2d(in_planes) 55 | self.relu1 = nn.ReLU(inplace=True) 56 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 57 | padding=1, bias=False) 58 | self.bn2 = nn.BatchNorm2d(out_planes) 59 | self.relu2 = nn.ReLU(inplace=True) 60 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 61 | padding=1, bias=False) 62 | self.droprate = dropRate 63 | self.equalInOut = (in_planes == out_planes) 64 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, 65 | padding=0, bias=False) or None 66 | self.noise_coef = noise_coef 67 | 68 | def forward(self, x): 69 | if not self.equalInOut: 70 | x = self.relu1(self.bn1(x)) 71 | else: 72 | out = self.relu1(self.bn1(x)) 73 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) 74 | if self.droprate > 0: 75 | out = F.dropout(out, p=self.droprate, training=self.training) 76 | out = self.conv2(out) 77 | out = torch.add(x if self.equalInOut else self.convShortcut(x), out) 78 | 79 | if self.noise_coef is not None: # Test Variable and rand 80 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 81 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 82 | else: 83 | return out 84 | 85 | 86 | class NetworkBlock(nn.Module): 87 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, noise_coef=None): 88 | super(NetworkBlock, self).__init__() 89 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef) 90 | self.noise_coef = noise_coef 91 | 92 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef): 93 | layers = [] 94 | for i in range(int(nb_layers)): 95 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, noise_coef=noise_coef)) 96 | return nn.Sequential(*layers) 97 | 98 | def forward(self, x): 99 | return self.layer(x) 100 | 101 | 102 | class WideResNet(nn.Module): 103 | def __init__(self, depth=34, num_classes=10, widen_factor=10, dropRate=0.0, noise_coef=None): 104 | super(WideResNet, self).__init__() 105 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] 106 | assert ((depth - 4) % 6 == 0) 107 | n = (depth - 4) / 6 108 | block = BasicBlock 109 | # 1st conv before any network block 110 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, 111 | padding=1, bias=False) 112 | # 1st block 113 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef) 114 | # 1st sub-block 115 | self.sub_block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef) 116 | # 2nd block 117 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate, noise_coef=noise_coef) 118 | # 3rd block 119 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate, noise_coef=noise_coef) 120 | # global average pooling and classifier 121 | self.bn1 = nn.BatchNorm2d(nChannels[3]) 122 | self.relu = nn.ReLU(inplace=True) 123 | self.fc = nn.Linear(nChannels[3], num_classes) 124 | self.nChannels = nChannels[3] 125 | 126 | for m in self.modules(): 127 | if isinstance(m, nn.Conv2d): 128 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 129 | m.weight.data.normal_(0, math.sqrt(2. / n)) 130 | elif isinstance(m, nn.BatchNorm2d): 131 | m.weight.data.fill_(1) 132 | m.bias.data.zero_() 133 | elif isinstance(m, nn.Linear): 134 | m.bias.data.zero_() 135 | 136 | def forward(self, x): 137 | out = self.conv1(x) 138 | out = self.block1(out) 139 | out = self.block2(out) 140 | out = self.block3(out) 141 | out = self.relu(self.bn1(out)) 142 | out = F.avg_pool2d(out, 8) 143 | out = out.view(-1, self.nChannels) 144 | return self.fc(out) 145 | 146 | if __name__ == '__main__': 147 | """ 148 | Load the trained DNN, and attack the DNN, finally save the adversarial images 149 | """ 150 | # Load the model 151 | print '==> Resuming from checkpoint..' 152 | checkpoint = torch.load('ckpt_PGD_ensemble_WideResNet.t7') 153 | net = checkpoint['net'] 154 | epsilon = opt['epsilon'] 155 | attack_type = opt['method'] 156 | 157 | # Load the original test data 158 | print '==> Load the clean image' 159 | root = './data' 160 | download = False 161 | 162 | test_set = torchvision.datasets.CIFAR10( 163 | root=root, 164 | train=False, 165 | download=download, 166 | transform=transforms.Compose([ 167 | transforms.ToTensor(), 168 | #normalize, 169 | ])) 170 | 171 | kwargs = {'num_workers':1, 'pin_memory':True} 172 | batchsize_test = 50 173 | if attack_type == 'cw': 174 | batchsize_test = 1 175 | print('Batch size of the test set: ', batchsize_test) 176 | test_loader = torch.utils.data.DataLoader(dataset=test_set, 177 | batch_size=batchsize_test, 178 | shuffle=False, **kwargs 179 | ) 180 | criterion = nn.CrossEntropyLoss() 181 | #-------------------------------------------------------------------------- 182 | # Testing 183 | # images: the original images 184 | # labels: labels of the original images 185 | # images_adv: adversarial image 186 | # labels_pred: the predicted labels of the adversarial images 187 | # noise: the added noise 188 | #-------------------------------------------------------------------------- 189 | images, labels, images_adv, labels_pred, noise = [], [], [], [], [] 190 | total_fooled = 0; total_correct_classified = 0 191 | 192 | if attack_type == 'fgsm': 193 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 194 | #if batch_idx < 2: 195 | x_Test = x1.numpy() 196 | #print x_Test.min(), x_Test.max() 197 | #x_Test = ((x_Test - x_Test.min())/(x_Test.max() - x_Test.min()) - 0.5)*2 198 | #x_Test = (x_Test - x_Test.min() )/(x_Test.max() - x_Test.min()) 199 | y_Test = y1_true.numpy() 200 | 201 | #x = Variable(torch.cuda.FloatTensor(x_Test.reshape(1, 1, 28, 28)), requires_grad=True) 202 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 203 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 204 | 205 | # Classification before perturbation 206 | pred_tmp = net(x) 207 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 208 | loss = criterion(pred_tmp, y) 209 | # Attack 210 | net.zero_grad() 211 | if x.grad is not None: 212 | x.grad.data.fill_(0) 213 | loss.backward() 214 | 215 | x_val_min = 0.0 216 | x_val_max = 1.0 217 | x.grad.sign_() 218 | 219 | x_adversarial = x + epsilon*x.grad 220 | x_adversarial = torch.clamp(x_adversarial, x_val_min, x_val_max) 221 | x_adversarial = x_adversarial.data 222 | 223 | # Classify the perturbed data 224 | x_adversarial_tmp = Variable(x_adversarial) 225 | pred_tmp = net(x_adversarial_tmp) 226 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1) 227 | 228 | for i in range(len(x_Test)): 229 | #print y_pred_adversarial 230 | if y_Test[i] == y_pred_adversarial[i]: 231 | #if y_Test == y_pred_adversarial: 232 | total_correct_classified += 1 233 | 234 | for i in range(len(x_Test)): 235 | # Save the perturbed data 236 | images.append(x_Test[i, :, :, :]) # Original image 237 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image 238 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise 239 | labels.append(y_Test[i]) 240 | labels_pred.append(y_pred_adversarial[i]) 241 | 242 | elif attack_type == 'ifgsm': 243 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 244 | #if batch_idx < 100: 245 | x_Test = x1.numpy() 246 | y_Test = y1_true.numpy() 247 | 248 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 249 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 250 | 251 | # Classification before perturbation 252 | pred_tmp = net(x) 253 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 254 | loss = criterion(pred_tmp, y) 255 | # Attack 256 | alpha = epsilon 257 | #iteration = 10 258 | iteration = 20#40 # May 2 259 | x_val_min = 0.; x_val_max = 1. 260 | epsilon1 = 0.031 261 | 262 | # Helper function 263 | def where(cond, x, y): 264 | """ 265 | code from : 266 | https://discuss.pytorch.org/t/how-can-i-do-the-operation-the-same-as-np-where/1329/8 267 | """ 268 | cond = cond.float() 269 | return (cond*x) + ((1-cond)*y) 270 | 271 | # Random perturbation 272 | #x = x + torch.zeros_like(x).uniform_(-epsilon1, epsilon1) # May 2 273 | x_adv = Variable(x.data, requires_grad=True) 274 | 275 | for i in range(iteration): 276 | #h_adv = net(x_adv) 277 | h_adv = (net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv))/10. 278 | loss = criterion(h_adv, y) 279 | net.zero_grad() 280 | if x_adv.grad is not None: 281 | x_adv.grad.data.fill_(0) 282 | loss.backward() 283 | 284 | x_adv.grad.sign_() 285 | x_adv = x_adv + alpha*x_adv.grad 286 | x_adv = where(x_adv > x+epsilon1, x+epsilon1, x_adv) 287 | x_adv = where(x_adv < x-epsilon1, x-epsilon1, x_adv) 288 | x_adv = torch.clamp(x_adv, x_val_min, x_val_max) 289 | x_adv = Variable(x_adv.data, requires_grad=True) 290 | 291 | x_adversarial = x_adv.data 292 | 293 | x_adversarial_tmp = Variable(x_adversarial) 294 | pred_tmp = net(x_adversarial_tmp) 295 | loss = criterion(pred_tmp, y) 296 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1) 297 | 298 | #if y_Test == y_pred_adversarial: 299 | # total_correct_classified += 1 300 | for i in range(len(x_Test)): 301 | #print y_pred_adversarial 302 | if y_Test[i] == y_pred_adversarial[i]: 303 | #if y_Test == y_pred_adversarial: 304 | total_correct_classified += 1 305 | 306 | for i in range(len(x_Test)): 307 | # Save the perturbed data 308 | images.append(x_Test[i, :, :, :]) # Original image 309 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image 310 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise 311 | labels.append(y_Test[i]) 312 | labels_pred.append(y_pred_adversarial[i]) 313 | 314 | elif attack_type == 'cw': 315 | for batch_idx, (x1, y1_true) in enumerate(test_loader): 316 | #if batch_idx < 4000: 317 | if batch_idx - int(int(batch_idx/50.)*50) == 0: 318 | print batch_idx 319 | x_Test = x1.numpy() 320 | y_Test = y1_true.numpy() 321 | 322 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True) 323 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False) 324 | 325 | # Classification before perturbation 326 | pred_tmp = net(x) 327 | loss = criterion(pred_tmp, y) 328 | y_pred = np.argmax(pred_tmp.cpu().data.numpy()) 329 | 330 | # Attack 331 | cwl2_learning_rate = 0.0006#0.01 332 | max_iter = 50 333 | lambdaf = 10.0 334 | kappa = 0.0 335 | 336 | # The input image we will perturb 337 | input = torch.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)) 338 | input_var = Variable(input) 339 | 340 | # w is the variable we will optimize over. We will also save the best w and loss 341 | w = Variable(input, requires_grad=True) 342 | best_w = input.clone() 343 | best_loss = float('inf') 344 | 345 | # Use the Adam optimizer for the minimization 346 | optimizer = optim.Adam([w], lr=cwl2_learning_rate) 347 | 348 | # Get the top2 predictions of the model. Get the argmaxes for the objective function 349 | probs = net(input_var.cuda()) 350 | 351 | probs_data = probs.data.cpu() 352 | top1_idx = torch.max(probs_data, 1)[1] 353 | probs_data[0][top1_idx] = -1 # making the previous top1 the lowest so we get the top2 354 | top2_idx = torch.max(probs_data, 1)[1] 355 | 356 | # Set the argmax (but maybe argmax will just equal top2_idx always?) 357 | argmax = top1_idx[0] 358 | if argmax == y_pred: 359 | argmax = top2_idx[0] 360 | 361 | # The iteration 362 | for i in range(0, max_iter): 363 | if i > 0: 364 | w.grad.data.fill_(0) 365 | 366 | # Zero grad (Only one line needed actually) 367 | net.zero_grad() 368 | optimizer.zero_grad() 369 | 370 | # Compute L2 Loss 371 | loss = torch.pow(w - input_var, 2).sum() 372 | 373 | # w variable 374 | w_data = w.data 375 | w_in = Variable(w_data, requires_grad=True) 376 | 377 | # Compute output 378 | output = net.forward(w_in.cuda()) #second argument is unneeded 379 | 380 | # Calculating the (hinge) loss 381 | loss += lambdaf * torch.clamp( output[0][y_pred] - output[0][argmax] + kappa, min=0).cpu() 382 | 383 | # Backprop the loss 384 | loss.backward() 385 | 386 | # Work on w (Don't think we need this) 387 | w.grad.data.add_(w_in.grad.data) 388 | 389 | # Optimizer step 390 | optimizer.step() 391 | 392 | # Save the best w and loss 393 | total_loss = loss.data.cpu()[0] 394 | 395 | if total_loss < best_loss: 396 | best_loss = total_loss 397 | 398 | ##best_w = torch.clamp(best_w, 0., 1.) # BW Added Aug 26 399 | 400 | best_w = w.data.clone() 401 | 402 | # Set final adversarial image as the best-found w 403 | x_adversarial = best_w 404 | 405 | ##x_adversarial = torch.clamp(x_adversarial, 0., 1.) # BW Added Aug 26 406 | 407 | #--------------- Add to introduce the noise 408 | noise_tmp = x_adversarial.cpu().numpy() - x_Test 409 | x_adversarial = x_Test + epsilon * noise_tmp 410 | #--------------- 411 | 412 | # Classify the perturbed data 413 | x_adversarial_tmp = Variable(torch.cuda.FloatTensor(x_adversarial), requires_grad=False) #Variable(x_adversarial).cuda() 414 | pred_tmp = net(x_adversarial_tmp) 415 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy()) # axis=1 416 | 417 | if y_Test == y_pred_adversarial: 418 | total_correct_classified += 1 419 | 420 | # Save the perturbed data 421 | images.append(x_Test) # Original image 422 | images_adv.append(x_adversarial) # Perturbed image 423 | noise.append(x_adversarial-x_Test) # Noise 424 | labels.append(y_Test) 425 | labels_pred.append(y_pred_adversarial) 426 | else: 427 | ValueError('Unsupported Attack') 428 | 429 | print('Number of correctly classified images: ', total_correct_classified) 430 | # Save data 431 | #with open("Adversarial" + attack_type + str(int(10*epsilon)) + ".pkl", "w") as f: 432 | #with open("Adversarial" + attack_type + str(int(100*epsilon)) + ".pkl", "w") as f: 433 | # adv_data_dict = {"images":images_adv, "labels":labels} 434 | # cPickle.dump(adv_data_dict, f) 435 | images = np.array(images).squeeze() 436 | images_adv = np.array(images_adv).squeeze() 437 | noise = np.array(noise).squeeze() 438 | labels = np.array(labels).squeeze() 439 | labels_pred = np.array(labels_pred).squeeze() 440 | print images.shape, images_adv.shape, noise.shape, labels.shape, labels_pred.shape 441 | 442 | with open("fooled_WideResNet_PGD_10iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f: 443 | #with open("fooled_WideResNet110_PGD_20iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f: 444 | adv_data_dict = { 445 | "images" : images, 446 | "images_adversarial" : images_adv, 447 | "y_trues" : labels, 448 | "noises" : noise, 449 | "y_preds_adversarial" : labels_pred 450 | } 451 | pickle.dump(adv_data_dict, f) 452 | 453 | -------------------------------------------------------------------------------- /WideResNet34-10/Readme.md: -------------------------------------------------------------------------------- 1 | ## For reproducing results of WideResNet34-10 on the CIFAR10 2 | ### PGD adversarial training 3 | ``` 4 | python main_pgd_wideresnet34_10_Validation.py --lr 0.1 --noise-coef 0.1 5 | ``` 6 | 7 | ### Attack the trained model 8 | ``` 9 | python Attack_PGD_WideResNet.py --method ifgsm 10 | ``` 11 | The method can be fgsm, ifgsm, and cw 12 | -------------------------------------------------------------------------------- /WideResNet34-10/main_pgd_wideresnet34_10_Validation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | main pgd enresnet 4 | """ 5 | import argparse 6 | import os 7 | import shutil 8 | import time 9 | 10 | import torch.backends.cudnn as cudnn 11 | import torch.optim as optim 12 | import torchvision 13 | import torchvision.transforms as transforms 14 | from torch.autograd import Variable 15 | import torch.nn.functional as F 16 | 17 | import torch 18 | import torch.nn as nn 19 | import math 20 | 21 | from resnet_cifar import * 22 | from utils import * 23 | 24 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training') 25 | parser.add_argument('--model_name', default='en_wideresnet34_10_cifar10', type=str, help='name of the model') 26 | parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run') 27 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') 28 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N', 29 | help='mini-batch size (default: 128),only used for train') 30 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate') 31 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') 32 | parser.add_argument('--num-ensembles', '--ne', default=1, type=int, metavar='N') 33 | parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W', 34 | help='weight decay (default: 5e-4)') 35 | parser.add_argument('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.1)') 36 | parser.add_argument('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)') 37 | parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)') 38 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') 39 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') 40 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT', 41 | help='10 for cifar10,100 for cifar100 (default: 10)') 42 | 43 | 44 | class BasicBlock(nn.Module): 45 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0, noise_coef=None): # BW 46 | super(BasicBlock, self).__init__() 47 | self.bn1 = nn.BatchNorm2d(in_planes) 48 | self.relu1 = nn.ReLU(inplace=True) 49 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 50 | padding=1, bias=False) 51 | self.bn2 = nn.BatchNorm2d(out_planes) 52 | self.relu2 = nn.ReLU(inplace=True) 53 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1, 54 | padding=1, bias=False) 55 | self.droprate = dropRate 56 | self.equalInOut = (in_planes == out_planes) 57 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, 58 | padding=0, bias=False) or None 59 | self.noise_coef = noise_coef 60 | 61 | def forward(self, x): 62 | if not self.equalInOut: 63 | x = self.relu1(self.bn1(x)) 64 | else: 65 | out = self.relu1(self.bn1(x)) 66 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) 67 | if self.droprate > 0: 68 | out = F.dropout(out, p=self.droprate, training=self.training) 69 | out = self.conv2(out) 70 | out = torch.add(x if self.equalInOut else self.convShortcut(x), out) 71 | 72 | if self.noise_coef is not None: # Test Variable and rand 73 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 74 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 75 | else: 76 | return out 77 | 78 | 79 | class NetworkBlock(nn.Module): 80 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, noise_coef=None): 81 | super(NetworkBlock, self).__init__() 82 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef) 83 | self.noise_coef = noise_coef 84 | 85 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef): 86 | layers = [] 87 | for i in range(int(nb_layers)): 88 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, noise_coef=noise_coef)) 89 | return nn.Sequential(*layers) 90 | 91 | def forward(self, x): 92 | return self.layer(x) 93 | 94 | 95 | class WideResNet(nn.Module): 96 | def __init__(self, depth=34, num_classes=10, widen_factor=10, dropRate=0.0, noise_coef=None): 97 | super(WideResNet, self).__init__() 98 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor] 99 | assert ((depth - 4) % 6 == 0) 100 | n = (depth - 4) / 6 101 | block = BasicBlock 102 | # 1st conv before any network block 103 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1, 104 | padding=1, bias=False) 105 | # 1st block 106 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef) 107 | # 1st sub-block 108 | self.sub_block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef) 109 | # 2nd block 110 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate, noise_coef=noise_coef) 111 | # 3rd block 112 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate, noise_coef=noise_coef) 113 | # global average pooling and classifier 114 | self.bn1 = nn.BatchNorm2d(nChannels[3]) 115 | self.relu = nn.ReLU(inplace=True) 116 | self.fc = nn.Linear(nChannels[3], num_classes) 117 | self.nChannels = nChannels[3] 118 | 119 | for m in self.modules(): 120 | if isinstance(m, nn.Conv2d): 121 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 122 | m.weight.data.normal_(0, math.sqrt(2. / n)) 123 | elif isinstance(m, nn.BatchNorm2d): 124 | m.weight.data.fill_(1) 125 | m.bias.data.zero_() 126 | elif isinstance(m, nn.Linear): 127 | m.bias.data.zero_() 128 | 129 | def forward(self, x): 130 | out = self.conv1(x) 131 | out = self.block1(out) 132 | out = self.block2(out) 133 | out = self.block3(out) 134 | out = self.relu(self.bn1(out)) 135 | out = F.avg_pool2d(out, 8) 136 | out = out.view(-1, self.nChannels) 137 | return self.fc(out) 138 | 139 | 140 | class AttackPGD(nn.Module): 141 | """ 142 | PGD Adversarial training 143 | """ 144 | def __init__(self, basic_net, config): 145 | super(AttackPGD, self).__init__() 146 | self.basic_net = basic_net 147 | self.rand = config['random_start'] 148 | self.step_size = config['step_size'] 149 | self.epsilon = config['epsilon'] 150 | self.num_steps = config['num_steps'] 151 | assert config['loss_func'] == 'xent', 'Only xent supported for now.' 152 | 153 | def forward(self, inputs, targets): 154 | x = inputs 155 | if self.rand: 156 | x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon) 157 | for i in range(self.num_steps): # iFGSM attack 158 | x.requires_grad_() 159 | with torch.enable_grad(): 160 | logits = self.basic_net(x) 161 | loss = F.cross_entropy(logits, targets, size_average=False) 162 | grad = torch.autograd.grad(loss, [x])[0] 163 | x = x.detach() + self.step_size*torch.sign(grad.detach()) 164 | x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon) 165 | x = torch.clamp(x, 0, 1) 166 | 167 | return self.basic_net(x), x 168 | 169 | 170 | if __name__ == '__main__': 171 | use_cuda = torch.cuda.is_available 172 | global best_acc 173 | best_acc = 0 174 | start_epoch = 0 175 | args = parser.parse_args() 176 | best_count = 0 177 | 178 | #-------------------------------------------------------------------------- 179 | # Load Cifar data 180 | #-------------------------------------------------------------------------- 181 | print('==> Preparing data...') 182 | root = './data' 183 | download = True 184 | 185 | #normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) 186 | 187 | 188 | train_set = torchvision.datasets.CIFAR10( 189 | root=root, 190 | train=True, 191 | download=download, 192 | transform=transforms.Compose([ 193 | transforms.RandomCrop(32, padding=4), 194 | transforms.RandomHorizontalFlip(), 195 | transforms.ToTensor(), 196 | #normalize, 197 | ])) 198 | 199 | #train_set_tmp = train_set[:45000] 200 | #val_set = train_set[45000:] 201 | #train_set = train_set_tmp 202 | ''' 203 | train_set_tmp = []; val_set = [] 204 | for i in range(45000): 205 | train_set_tmp.append(train_set[i]) 206 | for i in range(45000, 50000): 207 | val_set.append(train_set[i]) 208 | train_set = train_set_tmp 209 | ''' 210 | 211 | test_set = torchvision.datasets.CIFAR10( 212 | root=root, 213 | train=False, 214 | download=download, 215 | transform=transforms.Compose([ 216 | transforms.ToTensor(), 217 | #normalize, 218 | ])) 219 | 220 | 221 | kwargs = {'num_workers':1, 'pin_memory':True} 222 | batchsize_test = len(test_set)/100#50 #100 223 | print('Batch size of the test set: ', batchsize_test) 224 | test_loader = torch.utils.data.DataLoader(dataset=test_set, 225 | batch_size=batchsize_test, 226 | shuffle=False, **kwargs 227 | ) 228 | batchsize_train = 128 229 | print('Batch size of the train set: ', batchsize_train) 230 | train_loader = torch.utils.data.DataLoader(dataset=train_set, 231 | batch_size=batchsize_train, 232 | shuffle=True, **kwargs 233 | ) 234 | ''' 235 | batchsize_val = 100 236 | print('Batch size of the validation set: ', batchsize_val) 237 | val_loader = torch.utils.data.DataLoader(dataset=val_set, 238 | batch_size=batchsize_val, 239 | shuffle=False, **kwargs 240 | ) 241 | ''' 242 | 243 | basic_net = WideResNet(noise_coef=args.noise_coef).cuda() 244 | 245 | # From https://github.com/MadryLab/cifar10_challenge/blob/master/config.json 246 | config = { 247 | 'epsilon': 0.031, #8.0 / 255, # Test 1.0-8.0 248 | 'num_steps': 10, 249 | 'step_size': 0.007, #6.0 / 255, # 7.0 250 | 'random_start': True, 251 | 'loss_func': 'xent', 252 | } 253 | 254 | net = AttackPGD(basic_net, config).cuda() 255 | criterion = nn.CrossEntropyLoss() 256 | 257 | nepoch = 80 258 | for epoch in xrange(nepoch): 259 | print('Epoch ID', epoch) 260 | ''' 261 | if epoch < 60: 262 | lr = 0.1 263 | elif epoch < 75: 264 | lr = 0.1/10 265 | elif epoch < 85: 266 | lr = 0.1/10/10 267 | else: 268 | lr = 0.1/10/10/10 269 | ''' 270 | if epoch < 75: 271 | lr = 0.1 272 | elif epoch < 77: 273 | lr = 0.1/10 274 | elif epoch < 79: 275 | lr = 0.1/10/10 276 | else: 277 | lr = 0.1/10/10/10 278 | 279 | optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True) 280 | #optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-2, nesterov=True) 281 | 282 | #---------------------------------------------------------------------- 283 | # Training 284 | #---------------------------------------------------------------------- 285 | correct = 0; total = 0; train_loss = 0 286 | net.train() 287 | for batch_idx, (x, target) in enumerate(train_loader): 288 | if batch_idx < 352: 289 | optimizer.zero_grad() 290 | x, target = Variable(x.cuda()), Variable(target.cuda()) 291 | 292 | score, pert_x = net(x, target) 293 | loss = criterion(score, target) 294 | loss.backward() 295 | optimizer.step() 296 | 297 | train_loss += loss.data[0] 298 | _, predicted = torch.max(score.data, 1) 299 | total += target.size(0) 300 | correct += predicted.eq(target.data).cpu().sum() 301 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' 302 | % (train_loss/(batch_idx+1), 100.*correct/total, correct, total)) 303 | 304 | #---------------------------------------------------------------------- 305 | # Validation 306 | #---------------------------------------------------------------------- 307 | val_loss = 0; correct = 0; total = 0 308 | net.eval() 309 | for batch_idx, (x, target) in enumerate(train_loader): 310 | if batch_idx >= 352: 311 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True) 312 | score, pert_x = net(x, target) 313 | 314 | loss = criterion(score, target) 315 | val_loss += loss.data[0] 316 | _, predicted = torch.max(score.data, 1) 317 | total += target.size(0) 318 | correct += predicted.eq(target.data).cpu().sum() 319 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' 320 | % (val_loss/(batch_idx+1), 100.*correct/total, correct, total)) 321 | 322 | #---------------------------------------------------------------------- 323 | # Save the checkpoint 324 | #---------------------------------------------------------------------- 325 | acc = 100.*correct/total 326 | #if acc > best_acc: 327 | if correct > best_count: 328 | print('Saving model...') 329 | state = { 330 | 'net': basic_net, #net, 331 | 'acc': acc, 332 | 'epoch': epoch, 333 | } 334 | 335 | torch.save(state, './ckpt_PGD_ensemble_WideResNet.t7') 336 | #best_acc = acc 337 | #best_count = correct 338 | 339 | #---------------------------------------------------------------------- 340 | # Testing 341 | #---------------------------------------------------------------------- 342 | if correct > best_count: 343 | best_count = correct 344 | test_loss = 0; correct = 0; total = 0 345 | net.eval() 346 | for batch_idx, (x, target) in enumerate(test_loader): 347 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True) 348 | score, pert_x = net(x, target) 349 | 350 | loss = criterion(score, target) 351 | test_loss += loss.data[0] 352 | _, predicted = torch.max(score.data, 1) 353 | total += target.size(0) 354 | correct += predicted.eq(target.data).cpu().sum() 355 | progress_bar(batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' 356 | % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) 357 | print('The best acc: ', best_count) 358 | -------------------------------------------------------------------------------- /WideResNet34-10/resnet_cifar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | resnet for cifar in pytorch 4 | Reference: 5 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016. 6 | [2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016. 7 | """ 8 | import torch 9 | import torch.nn as nn 10 | import math 11 | from torch.autograd import Variable 12 | 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | """ 15 | 3x3 convolution with padding 16 | """ 17 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 18 | 19 | 20 | class PreActBasicBlock(nn.Module): 21 | expansion = 1 22 | 23 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 24 | super(PreActBasicBlock, self).__init__() 25 | self.bn1 = nn.BatchNorm2d(inplanes) 26 | self.relu = nn.ReLU(inplace=True) 27 | self.conv1 = conv3x3(inplanes, planes, stride) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | self.conv2 = conv3x3(planes, planes) 30 | self.downsample = downsample 31 | self.stride = stride 32 | self.noise_coef = noise_coef 33 | 34 | def forward(self, x): 35 | residual = x 36 | out = self.bn1(x) 37 | out = self.relu(out) 38 | 39 | if self.downsample is not None: 40 | residual = self.downsample(out) 41 | 42 | out = self.conv1(out) 43 | out = self.bn2(out) 44 | out = self.relu(out) 45 | out = self.conv2(out) 46 | 47 | out += residual 48 | 49 | if self.noise_coef is not None: # Test Variable and rand 50 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda()) 51 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 52 | else: 53 | return out 54 | 55 | 56 | class PreActBottleneck(nn.Module): 57 | expansion = 4 58 | 59 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None): 60 | super(PreActBottleneck, self).__init__() 61 | self.bn1 = nn.BatchNorm2d(inplanes) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 64 | self.bn2 = nn.BatchNorm2d(planes) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 66 | self.bn3 = nn.BatchNorm2d(planes) 67 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 68 | self.downsample = downsample 69 | self.stride = stride 70 | self.noise_coef = noise_coef 71 | 72 | def forward(self, x): 73 | residual = x 74 | 75 | out = self.bn1(x) 76 | out = self.relu(out) 77 | 78 | if self.downsample is not None: 79 | residual = self.downsample(out) 80 | 81 | out = self.conv1(out) 82 | 83 | out = self.bn2(out) 84 | out = self.relu(out) 85 | out = self.conv2(out) 86 | 87 | out = self.bn3(out) 88 | out = self.relu(out) 89 | out = self.conv3(out) 90 | 91 | out += residual 92 | if self.noise_coef is not None: 93 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda()) 94 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out) 95 | else: 96 | return out 97 | 98 | 99 | class PreAct_ResNet_Cifar(nn.Module): 100 | def __init__(self, block, layers, num_classes=10, noise_coef=None): 101 | super(PreAct_ResNet_Cifar, self).__init__() 102 | self.inplanes = 16 103 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) 104 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef) 105 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef) 106 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef) 107 | self.bn = nn.BatchNorm2d(64*block.expansion) 108 | self.relu = nn.ReLU(inplace=True) 109 | self.avgpool = nn.AvgPool2d(8, stride=1) 110 | self.fc = nn.Linear(64*block.expansion, num_classes) 111 | 112 | #self.loss = nn.CrossEntropyLoss() 113 | 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 117 | m.weight.data.normal_(0, math.sqrt(2. / n)) 118 | elif isinstance(m, nn.BatchNorm2d): 119 | m.weight.data.fill_(1) 120 | m.bias.data.zero_() 121 | 122 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None): 123 | downsample = None 124 | if stride != 1 or self.inplanes != planes*block.expansion: 125 | downsample = nn.Sequential( 126 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 127 | ) 128 | 129 | layers = [] 130 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef)) 131 | self.inplanes = planes*block.expansion 132 | for _ in range(1, blocks): 133 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef)) 134 | return nn.Sequential(*layers) 135 | 136 | #def forward(self, x, target): 137 | def forward(self, x): 138 | x = self.conv1(x) 139 | 140 | x = self.layer1(x) 141 | x = self.layer2(x) 142 | x = self.layer3(x) 143 | 144 | x = self.bn(x) 145 | x = self.relu(x) 146 | x = self.avgpool(x) 147 | x = x.view(x.size(0), -1) 148 | x = self.fc(x) 149 | 150 | #loss = self.loss(x, target) 151 | 152 | #return x, loss 153 | return x 154 | 155 | 156 | class Ensemble_PreAct_ResNet_Cifar(nn.Module): 157 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0): 158 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__() 159 | self.num_ensembles = num_ensembles 160 | # for emsemble resnet we should use Noisy Blocks. 161 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)]) 162 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)]) 163 | 164 | def forward(self, x): 165 | #def forward(self, x, target): 166 | ret = 0.0 167 | for net in self.ensemble: 168 | ret += net(x) 169 | #ret += net(x, target) 170 | ret /= self.num_ensembles 171 | 172 | return ret 173 | 174 | 175 | def en_preactresnet20_cifar(**kwargs): 176 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs) 177 | return model 178 | 179 | def en_preactresnet44_cifar(**kwargs): 180 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs) 181 | return model 182 | 183 | def en_preactresnet32_cifar(**kwargs): 184 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [5, 5, 5], **kwargs) 185 | return model 186 | 187 | def en_preactresnet110_cifar(**kwargs): 188 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) 189 | return model 190 | 191 | 192 | if __name__ == '__main__': 193 | net = en_preactresnet20_cifar() 194 | y = net(torch.autograd.Variable(torch.randn(1, 3, 32, 32))) 195 | print(net) 196 | print(y.size()) 197 | -------------------------------------------------------------------------------- /WideResNet34-10/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Some helper functions for PyTorch. 3 | """ 4 | import os 5 | import sys 6 | import time 7 | import math 8 | 9 | import torch.nn as nn 10 | import torch.nn.init as init 11 | 12 | def init_params(net): 13 | """ 14 | Initial layer parameters. 15 | """ 16 | for m in net.modules(): 17 | if isinstance(m, nn.Conv2d): 18 | init.kaiming_normal(m.weight, mode='fan_out') 19 | if m.bias: 20 | init.constant(m.bias, 0) 21 | elif isinstance(m, nn.BatchNorm2d): 22 | init.constant(m.weight, 1) 23 | init.constant(m.bias, 0) 24 | elif isinstance(m, nn.Linear): 25 | init.normal(m.weight, std=1e-3) 26 | if m.bias: 27 | init.constant(m.bias, 0) 28 | 29 | _, term_width = os.popen('stty size', 'r').read().split() 30 | term_width = int(term_width) 31 | 32 | TOTAL_BAR_LENGTH = 65. 33 | last_time = time.time() 34 | begin_time = time.time() 35 | 36 | def progress_bar(current, total, msg=None): 37 | """ 38 | The progress bar. 39 | """ 40 | global last_time, begin_time 41 | if current == 0: 42 | begin_time = time.time() # Reset for new bar. 43 | 44 | cur_len = int(TOTAL_BAR_LENGTH*current/total) 45 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1 46 | 47 | sys.stdout.write(' [') 48 | for i in range(cur_len): 49 | sys.stdout.write('=') 50 | sys.stdout.write('>') 51 | for i in range(rest_len): 52 | sys.stdout.write('.') 53 | sys.stdout.write(']') 54 | 55 | cur_time = time.time() 56 | step_time = cur_time - last_time 57 | last_time = cur_time 58 | tot_time = cur_time - begin_time 59 | 60 | L = [] 61 | L.append(' Step: %s' % format_time(step_time)) 62 | L.append(' | Tot: %s' % format_time(tot_time)) 63 | if msg: 64 | L.append(' | ' + msg) 65 | 66 | msg = ''.join(L) 67 | sys.stdout.write(msg) 68 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3): 69 | sys.stdout.write(' ') 70 | 71 | # Go back to the center of the bar. 72 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2): 73 | sys.stdout.write('\b') 74 | sys.stdout.write(' %d/%d ' % (current+1, total)) 75 | 76 | if current < total-1: 77 | sys.stdout.write('\r') 78 | else: 79 | sys.stdout.write('\n') 80 | sys.stdout.flush() 81 | 82 | 83 | def format_time(seconds): 84 | days = int(seconds / 3600/24) 85 | seconds = seconds - days*3600*24 86 | hours = int(seconds / 3600) 87 | seconds = seconds - hours*3600 88 | minutes = int(seconds / 60) 89 | seconds = seconds - minutes*60 90 | secondsf = int(seconds) 91 | seconds = seconds - secondsf 92 | millis = int(seconds*1000) 93 | 94 | f = '' 95 | i = 1 96 | if days > 0: 97 | f += str(days) + 'D' 98 | i += 1 99 | if hours > 0 and i <= 2: 100 | f += str(hours) + 'h' 101 | i += 1 102 | if minutes > 0 and i <= 2: 103 | f += str(minutes) + 'm' 104 | i += 1 105 | if secondsf > 0 and i <= 2: 106 | f += str(secondsf) + 's' 107 | i += 1 108 | if millis > 0 and i <= 2: 109 | f += str(millis) + 'ms' 110 | i += 1 111 | if f == '': 112 | f = '0ms' 113 | return f 114 | 115 | 116 | def freeze_layer(layer): 117 | """ 118 | Freeze a certain layer in the DNN. 119 | #Argument: the name of a layer in the given DNN. 120 | """ 121 | for param in layer.parameters(): 122 | param.requires_grad = False 123 | 124 | 125 | def freeze_All(model): 126 | """ 127 | Freeze all the trainable parameters in the DNN. 128 | #Argument: the DNN model. 129 | """ 130 | for param in model.parameters(): # Parameter is a method of nn.Module 131 | param.requires_grad = False 132 | 133 | 134 | def Unfreeze_layer(layer): 135 | """ 136 | Unfreeze a given layer in t he DNN. 137 | #Argument: the name of a layer in the given DNN. 138 | """ 139 | for param in layer.parameters(): 140 | param.requires_grad = True 141 | 142 | def Unfreeze_All(model): 143 | """ 144 | Unfreeze a given layer in t he DNN. 145 | #Argument: DNN model. 146 | """ 147 | for param in model.parameters(): 148 | param.requires_grad = True -------------------------------------------------------------------------------- /fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig1.png -------------------------------------------------------------------------------- /fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig2.png -------------------------------------------------------------------------------- /fig4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig4.png --------------------------------------------------------------------------------