├── NIPS_EnResNet_Poster.pdf
├── README.md
├── ResNet20
├── Attack_PGD_EnResNet_5_20.py
├── Readme.md
├── main_pgd_enresnet5_20.py
├── resnet_cifar.py
└── utils.py
├── WideResNet34-10
├── Attack_PGD_WideResNet.py
├── Readme.md
├── main_pgd_wideresnet34_10_Validation.py
├── resnet_cifar.py
└── utils.py
├── fig1.png
├── fig2.png
└── fig4.png
/NIPS_EnResNet_Poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/NIPS_EnResNet_Poster.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EnResNet
2 | This repository consists PyTorch code for the paper:
3 | Bao Wang, Binjie Yuan, Zuoqiang Shi, Stanley J. Osher. EnResNet: ResNet Ensemble via the Feynman-Kac Formalism, arXiv:1811.10745, 2018 (https://arxiv.org/abs/1811.10745)
4 |
5 | The repo contains two subfolders for PGD adversarially training of ensemble of ResNet20 and WideResNet34-10, respectively.
6 |
7 | We inteprete the adversarial vulnerability of ResNets as irregularity of the solution of the transport equation, and we propose to improve regularity of the decision boundary by adding diffusion to the transport equation. Please refer to Figure 4 of our [paper](https://arxiv.org/abs/1811.10745) for more details.
8 |
9 |
10 |
11 |
12 |
13 | The resulted convection-diffusion equation can be solved by using the Feynman-Kac formula, which can be approximated by an ensemble of modified ResNets.
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 | If you find this work useful and use it on you own research, please cite our [paper](https://arxiv.org/abs/1811.10745)
24 |
25 | ```
26 | @ARTICLE{Wang2018EnResNet,
27 | author = {{B. Wang and B. Yuan and Z. Shi and S. Osher},
28 | title = "{ResNets Ensemble via the Feynman-Kac Formalism to Improve Natural and Robust Accuracies}",
29 | journal = {arXiv e-prints},
30 | year = "2018",
31 | month = "Nov",
32 | eid = {arXiv:1811.10745},
33 | pages = {arXiv:1811.10745},
34 | archivePrefix = {arXiv},
35 | eprint = {1811.10745},
36 | primaryClass = {stat.ML}
37 | }
38 | ```
39 |
40 | ## Dependence
41 | PyTorch 0.4.1
42 |
--------------------------------------------------------------------------------
/ResNet20/Attack_PGD_EnResNet_5_20.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | CW, FGSM, and IFGSM Attack CNN
4 | """
5 | import torch._utils
6 | try:
7 | torch._utils._rebuild_tensor_v2
8 | except AttributeError:
9 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
10 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
11 | tensor.requires_grad = requires_grad
12 | tensor._backward_hooks = backward_hooks
13 | return tensor
14 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.backends.cudnn as cudnn
20 | import torch.optim as optim
21 | import torchvision
22 | import torchvision.datasets as dset
23 | import torchvision.transforms as transforms
24 | from torch.autograd import Variable
25 | import copy
26 | import math
27 | import numpy as np
28 | import os
29 | import argparse
30 |
31 | #from utils import *
32 |
33 | import numpy.matlib
34 | import matplotlib.pyplot as plt
35 | import pickle
36 | import cPickle
37 | from collections import OrderedDict
38 |
39 | parser = argparse.ArgumentParser(description='Fool EnResNet')
40 | ap = parser.add_argument
41 | ap('-method', help='Attack Method', type=str, default="ifgsm") # fgsm, ifgsm, cwl2
42 | #ap('-epsilon', help='Attack Strength', type=float, default=0.007)
43 | #ap('-epsilon', help='Attack Strength', type=float, default=0.003) # May 2
44 | ap('-epsilon', help='Attack Strength', type=float, default=0.031) # May 2
45 | ap('--num-ensembles', '--ne', default=2, type=int, metavar='N')
46 | ap('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.0)')
47 | ap('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)')
48 | opt = vars(parser.parse_args())
49 |
50 |
51 | def conv3x3(in_planes, out_planes, stride=1):
52 | """
53 | 3x3 convolution with padding
54 | """
55 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
56 |
57 |
58 | class PreActBasicBlock(nn.Module):
59 | expansion = 1
60 |
61 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
62 | super(PreActBasicBlock, self).__init__()
63 | self.bn1 = nn.BatchNorm2d(inplanes)
64 | self.relu = nn.ReLU(inplace=True)
65 | self.conv1 = conv3x3(inplanes, planes, stride)
66 | self.bn2 = nn.BatchNorm2d(planes)
67 | self.conv2 = conv3x3(planes, planes)
68 | self.downsample = downsample
69 | self.stride = stride
70 | self.noise_coef = noise_coef
71 |
72 | def forward(self, x):
73 | residual = x
74 | out = self.bn1(x)
75 | out = self.relu(out)
76 |
77 | if self.downsample is not None:
78 | residual = self.downsample(out)
79 |
80 | out = self.conv1(out)
81 | out = self.bn2(out)
82 | out = self.relu(out)
83 | out = self.conv2(out)
84 |
85 | out += residual
86 |
87 | if self.noise_coef is not None: # Test Variable and rand
88 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
89 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
90 | else:
91 | return out
92 |
93 |
94 | class PreActBottleneck(nn.Module):
95 | expansion = 4
96 |
97 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
98 | super(PreActBottleneck, self).__init__()
99 | self.bn1 = nn.BatchNorm2d(inplanes)
100 | self.relu = nn.ReLU(inplace=True)
101 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
102 | self.bn2 = nn.BatchNorm2d(planes)
103 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
104 | self.bn3 = nn.BatchNorm2d(planes)
105 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
106 | self.downsample = downsample
107 | self.stride = stride
108 | self.noise_coef = noise_coef
109 |
110 | def forward(self, x):
111 | residual = x
112 |
113 | out = self.bn1(x)
114 | out = self.relu(out)
115 |
116 | if self.downsample is not None:
117 | residual = self.downsample(out)
118 |
119 | out = self.conv1(out)
120 |
121 | out = self.bn2(out)
122 | out = self.relu(out)
123 | out = self.conv2(out)
124 |
125 | out = self.bn3(out)
126 | out = self.relu(out)
127 | out = self.conv3(out)
128 |
129 | out += residual
130 | if self.noise_coef is not None:
131 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda())
132 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
133 | else:
134 | return out
135 |
136 |
137 | class PreAct_ResNet_Cifar(nn.Module):
138 | def __init__(self, block, layers, num_classes=10, noise_coef=None):
139 | super(PreAct_ResNet_Cifar, self).__init__()
140 | self.inplanes = 16
141 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
142 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef)
143 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef)
144 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef)
145 | self.bn = nn.BatchNorm2d(64*block.expansion)
146 | self.relu = nn.ReLU(inplace=True)
147 | self.avgpool = nn.AvgPool2d(8, stride=1)
148 | self.fc = nn.Linear(64*block.expansion, num_classes)
149 |
150 | #self.loss = nn.CrossEntropyLoss()
151 |
152 | for m in self.modules():
153 | if isinstance(m, nn.Conv2d):
154 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
155 | m.weight.data.normal_(0, math.sqrt(2. / n))
156 | elif isinstance(m, nn.BatchNorm2d):
157 | m.weight.data.fill_(1)
158 | m.bias.data.zero_()
159 |
160 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None):
161 | downsample = None
162 | if stride != 1 or self.inplanes != planes*block.expansion:
163 | downsample = nn.Sequential(
164 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False)
165 | )
166 |
167 | layers = []
168 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef))
169 | self.inplanes = planes*block.expansion
170 | for _ in range(1, blocks):
171 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef))
172 | return nn.Sequential(*layers)
173 |
174 | #def forward(self, x, target):
175 | def forward(self, x):
176 | x = self.conv1(x)
177 |
178 | x = self.layer1(x)
179 | x = self.layer2(x)
180 | x = self.layer3(x)
181 |
182 | x = self.bn(x)
183 | x = self.relu(x)
184 | x = self.avgpool(x)
185 | x = x.view(x.size(0), -1)
186 | x = self.fc(x)
187 |
188 | #loss = self.loss(x, target)
189 |
190 | #return x, loss
191 | return x
192 |
193 |
194 | class Ensemble_PreAct_ResNet_Cifar(nn.Module):
195 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0):
196 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__()
197 | self.num_ensembles = num_ensembles
198 | # for emsemble resnet we should use Noisy Blocks.
199 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)])
200 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)])
201 |
202 | def forward(self, x):
203 | #def forward(self, x, target):
204 | ret = 0.0
205 | for net in self.ensemble:
206 | ret += net(x)
207 | #ret += net(x, target)
208 | ret /= self.num_ensembles
209 |
210 | return ret
211 |
212 |
213 | def en_preactresnet20_cifar(**kwargs):
214 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs)
215 | return model
216 |
217 | def en_preactresnet44_cifar(**kwargs):
218 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs)
219 | return model
220 |
221 | if __name__ == '__main__':
222 | """
223 | Load the trained DNN, and attack the DNN, finally save the adversarial images
224 | """
225 | # Load the model
226 | print '==> Resuming from checkpoint..'
227 | checkpoint = torch.load('ckpt_PGD_ensemble_5_20.t7')
228 | net = checkpoint['net']
229 | epsilon = opt['epsilon']
230 | attack_type = opt['method']
231 |
232 | # Load the original test data
233 | print '==> Load the clean image'
234 | root = './data'
235 | download = False
236 |
237 | test_set = torchvision.datasets.CIFAR10(
238 | root=root,
239 | train=False,
240 | download=download,
241 | transform=transforms.Compose([
242 | transforms.ToTensor(),
243 | #normalize,
244 | ]))
245 |
246 | kwargs = {'num_workers':1, 'pin_memory':True}
247 | batchsize_test = 200
248 | if attack_type == 'cw':
249 | batchsize_test = 1
250 | print('Batch size of the test set: ', batchsize_test)
251 | test_loader = torch.utils.data.DataLoader(dataset=test_set,
252 | batch_size=batchsize_test,
253 | shuffle=False, **kwargs
254 | )
255 | criterion = nn.CrossEntropyLoss()
256 | #--------------------------------------------------------------------------
257 | # Testing
258 | # images: the original images
259 | # labels: labels of the original images
260 | # images_adv: adversarial image
261 | # labels_pred: the predicted labels of the adversarial images
262 | # noise: the added noise
263 | #--------------------------------------------------------------------------
264 | images, labels, images_adv, labels_pred, noise = [], [], [], [], []
265 | total_fooled = 0; total_correct_classified = 0
266 |
267 | if attack_type == 'fgsm':
268 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
269 | #if batch_idx < 2:
270 | x_Test = x1.numpy()
271 | #print x_Test.min(), x_Test.max()
272 | #x_Test = ((x_Test - x_Test.min())/(x_Test.max() - x_Test.min()) - 0.5)*2
273 | #x_Test = (x_Test - x_Test.min() )/(x_Test.max() - x_Test.min())
274 | y_Test = y1_true.numpy()
275 |
276 | #x = Variable(torch.cuda.FloatTensor(x_Test.reshape(1, 1, 28, 28)), requires_grad=True)
277 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
278 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
279 |
280 | # Classification before perturbation
281 | pred_tmp = net(x)
282 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
283 | loss = criterion(pred_tmp, y)
284 | # Attack
285 | net.zero_grad()
286 | if x.grad is not None:
287 | x.grad.data.fill_(0)
288 | loss.backward()
289 |
290 | x_val_min = 0.0
291 | x_val_max = 1.0
292 | x.grad.sign_()
293 |
294 | x_adversarial = x + epsilon*x.grad
295 | x_adversarial = torch.clamp(x_adversarial, x_val_min, x_val_max)
296 | x_adversarial = x_adversarial.data
297 |
298 | # Classify the perturbed data
299 | x_adversarial_tmp = Variable(x_adversarial)
300 | pred_tmp = net(x_adversarial_tmp)
301 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1)
302 |
303 | for i in range(len(x_Test)):
304 | #print y_pred_adversarial
305 | if y_Test[i] == y_pred_adversarial[i]:
306 | #if y_Test == y_pred_adversarial:
307 | total_correct_classified += 1
308 |
309 | for i in range(len(x_Test)):
310 | # Save the perturbed data
311 | images.append(x_Test[i, :, :, :]) # Original image
312 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image
313 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise
314 | labels.append(y_Test[i])
315 | labels_pred.append(y_pred_adversarial[i])
316 |
317 | elif attack_type == 'ifgsm':
318 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
319 | #if batch_idx < 100:
320 | x_Test = x1.numpy()
321 | y_Test = y1_true.numpy()
322 |
323 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
324 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
325 |
326 | # Classification before perturbation
327 | pred_tmp = net(x)
328 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
329 | loss = criterion(pred_tmp, y)
330 | # Attack
331 | alpha = epsilon
332 | #iteration = 10
333 | iteration = 1#40 # May 2
334 | x_val_min = 0.; x_val_max = 1.
335 | epsilon1 = 0.031
336 |
337 | # Helper function
338 | def where(cond, x, y):
339 | """
340 | code from :
341 | https://discuss.pytorch.org/t/how-can-i-do-the-operation-the-same-as-np-where/1329/8
342 | """
343 | cond = cond.float()
344 | return (cond*x) + ((1-cond)*y)
345 |
346 | # Random perturbation
347 | #x = x + torch.zeros_like(x).uniform_(-epsilon1, epsilon1) # May 2
348 | x_adv = Variable(x.data, requires_grad=True)
349 |
350 | for i in range(iteration):
351 | h_adv = net(x_adv)
352 | loss = criterion(h_adv, y)
353 | net.zero_grad()
354 | if x_adv.grad is not None:
355 | x_adv.grad.data.fill_(0)
356 | loss.backward()
357 |
358 | x_adv.grad.sign_()
359 | x_adv = x_adv + alpha*x_adv.grad
360 | x_adv = where(x_adv > x+epsilon1, x+epsilon1, x_adv)
361 | x_adv = where(x_adv < x-epsilon1, x-epsilon1, x_adv)
362 | x_adv = torch.clamp(x_adv, x_val_min, x_val_max)
363 | x_adv = Variable(x_adv.data, requires_grad=True)
364 |
365 | x_adversarial = x_adv.data
366 |
367 | x_adversarial_tmp = Variable(x_adversarial)
368 | pred_tmp = net(x_adversarial_tmp)
369 | loss = criterion(pred_tmp, y)
370 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1)
371 |
372 | #if y_Test == y_pred_adversarial:
373 | # total_correct_classified += 1
374 | for i in range(len(x_Test)):
375 | #print y_pred_adversarial
376 | if y_Test[i] == y_pred_adversarial[i]:
377 | #if y_Test == y_pred_adversarial:
378 | total_correct_classified += 1
379 |
380 | for i in range(len(x_Test)):
381 | # Save the perturbed data
382 | images.append(x_Test[i, :, :, :]) # Original image
383 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image
384 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise
385 | labels.append(y_Test[i])
386 | labels_pred.append(y_pred_adversarial[i])
387 |
388 | elif attack_type == 'cw':
389 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
390 | #if batch_idx < 10:
391 | if batch_idx - int(int(batch_idx/50.)*50) == 0:
392 | print batch_idx
393 | x_Test = x1.numpy()
394 | y_Test = y1_true.numpy()
395 |
396 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
397 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
398 |
399 | # Classification before perturbation
400 | pred_tmp = net(x)
401 | loss = criterion(pred_tmp, y)
402 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
403 |
404 | # Attack
405 | cwl2_learning_rate = 0.0006#0.01
406 | max_iter = 50
407 | lambdaf = 10.0
408 | kappa = 0.0
409 |
410 | # The input image we will perturb
411 | input = torch.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32))
412 | input_var = Variable(input)
413 |
414 | # w is the variable we will optimize over. We will also save the best w and loss
415 | w = Variable(input, requires_grad=True)
416 | best_w = input.clone()
417 | best_loss = float('inf')
418 |
419 | # Use the Adam optimizer for the minimization
420 | optimizer = optim.Adam([w], lr=cwl2_learning_rate)
421 |
422 | # Get the top2 predictions of the model. Get the argmaxes for the objective function
423 | probs = net(input_var.cuda())
424 |
425 | probs_data = probs.data.cpu()
426 | top1_idx = torch.max(probs_data, 1)[1]
427 | probs_data[0][top1_idx] = -1 # making the previous top1 the lowest so we get the top2
428 | top2_idx = torch.max(probs_data, 1)[1]
429 |
430 | # Set the argmax (but maybe argmax will just equal top2_idx always?)
431 | argmax = top1_idx[0]
432 | if argmax == y_pred:
433 | argmax = top2_idx[0]
434 |
435 | # The iteration
436 | for i in range(0, max_iter):
437 | if i > 0:
438 | w.grad.data.fill_(0)
439 |
440 | # Zero grad (Only one line needed actually)
441 | net.zero_grad()
442 | optimizer.zero_grad()
443 |
444 | # Compute L2 Loss
445 | loss = torch.pow(w - input_var, 2).sum()
446 |
447 | # w variable
448 | w_data = w.data
449 | w_in = Variable(w_data, requires_grad=True)
450 |
451 | # Compute output
452 | output = net.forward(w_in.cuda()) #second argument is unneeded
453 |
454 | # Calculating the (hinge) loss
455 | loss += lambdaf * torch.clamp( output[0][y_pred] - output[0][argmax] + kappa, min=0).cpu()
456 |
457 | # Backprop the loss
458 | loss.backward()
459 |
460 | # Work on w (Don't think we need this)
461 | w.grad.data.add_(w_in.grad.data)
462 |
463 | # Optimizer step
464 | optimizer.step()
465 |
466 | # Save the best w and loss
467 | total_loss = loss.data.cpu()[0]
468 |
469 | if total_loss < best_loss:
470 | best_loss = total_loss
471 |
472 | ##best_w = torch.clamp(best_w, 0., 1.) # BW Added Aug 26
473 |
474 | best_w = w.data.clone()
475 |
476 | # Set final adversarial image as the best-found w
477 | x_adversarial = best_w
478 |
479 | ##x_adversarial = torch.clamp(x_adversarial, 0., 1.) # BW Added Aug 26
480 |
481 | #--------------- Add to introduce the noise
482 | noise_tmp = x_adversarial.cpu().numpy() - x_Test
483 | x_adversarial = x_Test + epsilon * noise_tmp
484 | #---------------
485 |
486 | # Classify the perturbed data
487 | x_adversarial_tmp = Variable(torch.cuda.FloatTensor(x_adversarial), requires_grad=False) #Variable(x_adversarial).cuda()
488 | pred_tmp = net(x_adversarial_tmp)
489 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy()) # axis=1
490 |
491 | if y_Test == y_pred_adversarial:
492 | total_correct_classified += 1
493 |
494 | # Save the perturbed data
495 | images.append(x_Test) # Original image
496 | images_adv.append(x_adversarial) # Perturbed image
497 | noise.append(x_adversarial-x_Test) # Noise
498 | labels.append(y_Test)
499 | labels_pred.append(y_pred_adversarial)
500 | else:
501 | ValueError('Unsupported Attack')
502 |
503 | print('Number of correctly classified images: ', total_correct_classified)
504 | # Save data
505 | #with open("Adversarial" + attack_type + str(int(10*epsilon)) + ".pkl", "w") as f:
506 | #with open("Adversarial" + attack_type + str(int(100*epsilon)) + ".pkl", "w") as f:
507 | # adv_data_dict = {"images":images_adv, "labels":labels}
508 | # cPickle.dump(adv_data_dict, f)
509 | images = np.array(images).squeeze()
510 | images_adv = np.array(images_adv).squeeze()
511 | noise = np.array(noise).squeeze()
512 | labels = np.array(labels).squeeze()
513 | labels_pred = np.array(labels_pred).squeeze()
514 | print images.shape, images_adv.shape, noise.shape, labels.shape, labels_pred.shape
515 |
516 | with open("fooled_EnResNet5_20_PGD_10iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f:
517 | #with open("fooled_EnResNet5_20_PGD_20iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f:
518 | adv_data_dict = {
519 | "images" : images,
520 | "images_adversarial" : images_adv,
521 | "y_trues" : labels,
522 | "noises" : noise,
523 | "y_preds_adversarial" : labels_pred
524 | }
525 | pickle.dump(adv_data_dict, f)
526 |
--------------------------------------------------------------------------------
/ResNet20/Readme.md:
--------------------------------------------------------------------------------
1 | ## For reproducing results of En_5ResNet20 on the CIFAR10
2 | ### PGD adversarial training
3 | ```
4 | python main_pgd_enresnet5_20.py --lr 0.1 --noise-coef 0.1
5 | ```
6 |
7 | ### Attack the trained model
8 | ```
9 | python Attack_PGD_EnResNet5_20.py --method ifgsm
10 | ```
11 | The method can be fgsm, ifgsm, and cw
12 |
--------------------------------------------------------------------------------
/ResNet20/main_pgd_enresnet5_20.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | main pgd enresnet
4 | """
5 | import argparse
6 | import os
7 | import shutil
8 | import time
9 |
10 | import torch.backends.cudnn as cudnn
11 | import torch.optim as optim
12 | import torchvision
13 | import torchvision.transforms as transforms
14 | from torch.autograd import Variable
15 | import torch.nn.functional as F
16 |
17 | import torch
18 | import torch.nn as nn
19 | import math
20 |
21 | from resnet_cifar import *
22 | from utils import *
23 |
24 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training')
25 | parser.add_argument('--model_name', default='en_resnet20_cifar10', type=str, help='name of the model')
26 | parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run')
27 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)')
28 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N',
29 | help='mini-batch size (default: 128),only used for train')
30 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate')
31 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')
32 | parser.add_argument('--num-ensembles', '--ne', default=5, type=int, metavar='N')
33 | parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W',
34 | help='weight decay (default: 5e-4)')
35 | parser.add_argument('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.1)')
36 | parser.add_argument('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)')
37 | parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)')
38 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
39 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set')
40 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT',
41 | help='10 for cifar10,100 for cifar100 (default: 10)')
42 |
43 |
44 | def conv3x3(in_planes, out_planes, stride=1):
45 | """
46 | 3x3 convolution with padding
47 | """
48 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
49 |
50 |
51 | class PreActBasicBlock(nn.Module):
52 | expansion = 1
53 |
54 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
55 | super(PreActBasicBlock, self).__init__()
56 | self.bn1 = nn.BatchNorm2d(inplanes)
57 | self.relu = nn.ReLU(inplace=True)
58 | self.conv1 = conv3x3(inplanes, planes, stride)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv2 = conv3x3(planes, planes)
61 | self.downsample = downsample
62 | self.stride = stride
63 | self.noise_coef = noise_coef
64 |
65 | def forward(self, x):
66 | residual = x
67 | out = self.bn1(x)
68 | out = self.relu(out)
69 |
70 | if self.downsample is not None:
71 | residual = self.downsample(out)
72 |
73 | out = self.conv1(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 | out = self.conv2(out)
77 |
78 | out += residual
79 |
80 | if self.noise_coef is not None: # Test Variable and rand
81 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
82 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
83 | else:
84 | return out
85 |
86 |
87 | class PreActBottleneck(nn.Module):
88 | expansion = 4
89 |
90 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
91 | super(PreActBottleneck, self).__init__()
92 | self.bn1 = nn.BatchNorm2d(inplanes)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
95 | self.bn2 = nn.BatchNorm2d(planes)
96 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
97 | self.bn3 = nn.BatchNorm2d(planes)
98 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
99 | self.downsample = downsample
100 | self.stride = stride
101 | self.noise_coef = noise_coef
102 |
103 | def forward(self, x):
104 | residual = x
105 |
106 | out = self.bn1(x)
107 | out = self.relu(out)
108 |
109 | if self.downsample is not None:
110 | residual = self.downsample(out)
111 |
112 | out = self.conv1(out)
113 |
114 | out = self.bn2(out)
115 | out = self.relu(out)
116 | out = self.conv2(out)
117 |
118 | out = self.bn3(out)
119 | out = self.relu(out)
120 | out = self.conv3(out)
121 |
122 | out += residual
123 | if self.noise_coef is not None:
124 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda())
125 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
126 | else:
127 | return out
128 |
129 |
130 | class PreAct_ResNet_Cifar(nn.Module):
131 | def __init__(self, block, layers, num_classes=10, noise_coef=None):
132 | super(PreAct_ResNet_Cifar, self).__init__()
133 | self.inplanes = 16
134 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
135 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef)
136 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef)
137 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef)
138 | self.bn = nn.BatchNorm2d(64*block.expansion)
139 | self.relu = nn.ReLU(inplace=True)
140 | self.avgpool = nn.AvgPool2d(8, stride=1)
141 | self.fc = nn.Linear(64*block.expansion, num_classes)
142 |
143 | #self.loss = nn.CrossEntropyLoss()
144 |
145 | for m in self.modules():
146 | if isinstance(m, nn.Conv2d):
147 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
148 | m.weight.data.normal_(0, math.sqrt(2. / n))
149 | elif isinstance(m, nn.BatchNorm2d):
150 | m.weight.data.fill_(1)
151 | m.bias.data.zero_()
152 |
153 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None):
154 | downsample = None
155 | if stride != 1 or self.inplanes != planes*block.expansion:
156 | downsample = nn.Sequential(
157 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False)
158 | )
159 |
160 | layers = []
161 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef))
162 | self.inplanes = planes*block.expansion
163 | for _ in range(1, blocks):
164 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef))
165 | return nn.Sequential(*layers)
166 |
167 | #def forward(self, x, target):
168 | def forward(self, x):
169 | x = self.conv1(x)
170 |
171 | x = self.layer1(x)
172 | x = self.layer2(x)
173 | x = self.layer3(x)
174 |
175 | x = self.bn(x)
176 | x = self.relu(x)
177 | x = self.avgpool(x)
178 | x = x.view(x.size(0), -1)
179 | x = self.fc(x)
180 |
181 | #loss = self.loss(x, target)
182 |
183 | #return x, loss
184 | return x
185 |
186 |
187 | class Ensemble_PreAct_ResNet_Cifar(nn.Module):
188 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0):
189 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__()
190 | self.num_ensembles = num_ensembles
191 | # for emsemble resnet we should use Noisy Blocks.
192 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)])
193 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)])
194 |
195 | def forward(self, x):
196 | #def forward(self, x, target):
197 | ret = 0.0
198 | for net in self.ensemble:
199 | ret += net(x)
200 | #ret += net(x, target)
201 | ret /= self.num_ensembles
202 |
203 | return ret
204 |
205 |
206 | def en_preactresnet20_cifar(**kwargs):
207 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs) # 20
208 | #model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) # 110
209 | return model
210 |
211 |
212 | class AttackPGD(nn.Module):
213 | """
214 | PGD Adversarial training
215 | """
216 | def __init__(self, basic_net, config):
217 | super(AttackPGD, self).__init__()
218 | self.basic_net = basic_net
219 | self.rand = config['random_start']
220 | self.step_size = config['step_size']
221 | self.epsilon = config['epsilon']
222 | self.num_steps = config['num_steps']
223 | assert config['loss_func'] == 'xent', 'Only xent supported for now.'
224 |
225 | def forward(self, inputs, targets):
226 | x = inputs
227 | if self.rand:
228 | x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)
229 | for i in range(self.num_steps): # iFGSM attack
230 | x.requires_grad_()
231 | with torch.enable_grad():
232 | logits = self.basic_net(x)
233 | loss = F.cross_entropy(logits, targets, size_average=False)
234 | grad = torch.autograd.grad(loss, [x])[0]
235 | x = x.detach() + self.step_size*torch.sign(grad.detach())
236 | x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon)
237 | x = torch.clamp(x, 0, 1)
238 |
239 | return self.basic_net(x), x
240 |
241 |
242 | if __name__ == '__main__':
243 | use_cuda = torch.cuda.is_available
244 | global best_acc
245 | best_acc = 0
246 | start_epoch = 0
247 | args = parser.parse_args()
248 | best_count = 0
249 | #--------------------------------------------------------------------------
250 | # Load Cifar data
251 | #--------------------------------------------------------------------------
252 | print('==> Preparing data...')
253 | root = './data'
254 | download = True
255 |
256 | #normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
257 |
258 |
259 | train_set = torchvision.datasets.CIFAR10(
260 | root=root,
261 | train=True,
262 | download=download,
263 | transform=transforms.Compose([
264 | transforms.RandomCrop(32, padding=4),
265 | transforms.RandomHorizontalFlip(),
266 | transforms.ToTensor(),
267 | #normalize,
268 | ]))
269 |
270 | test_set = torchvision.datasets.CIFAR10(
271 | root=root,
272 | train=False,
273 | download=download,
274 | transform=transforms.Compose([
275 | transforms.ToTensor(),
276 | #normalize,
277 | ]))
278 |
279 |
280 | kwargs = {'num_workers':1, 'pin_memory':True}
281 | batchsize_test = len(test_set)/40 #100
282 | print('Batch size of the test set: ', batchsize_test)
283 | test_loader = torch.utils.data.DataLoader(dataset=test_set,
284 | batch_size=batchsize_test,
285 | shuffle=False, **kwargs
286 | )
287 | batchsize_train = 128
288 | print('Batch size of the train set: ', batchsize_train)
289 | train_loader = torch.utils.data.DataLoader(dataset=train_set,
290 | batch_size=batchsize_train,
291 | shuffle=True, **kwargs
292 | )
293 |
294 | basic_net = en_preactresnet20_cifar(num_ensembles=args.num_ensembles, noise_coef=args.noise_coef).cuda()
295 |
296 | # From https://github.com/MadryLab/cifar10_challenge/blob/master/config.json
297 | config = {
298 | 'epsilon': 0.031, #8.0 / 255, # Test 1.0-8.0
299 | 'num_steps': 10,
300 | 'step_size': 0.007, #6.0 / 255, # 7.0
301 | 'random_start': True,
302 | 'loss_func': 'xent',
303 | }
304 |
305 | net = AttackPGD(basic_net, config).cuda()
306 | criterion = nn.CrossEntropyLoss()
307 |
308 | nepoch = 200
309 | for epoch in xrange(nepoch):
310 | print('Epoch ID', epoch)
311 | if epoch < 80:
312 | lr = 0.1
313 | elif epoch < 120:
314 | lr = 0.1/10
315 | elif epoch < 160:
316 | lr = 0.1/10/10
317 | else:
318 | lr = 0.1/10/10/10
319 |
320 | optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
321 |
322 | #----------------------------------------------------------------------
323 | # Training
324 | #----------------------------------------------------------------------
325 | correct = 0; total = 0; train_loss = 0
326 | net.train()
327 | for batch_idx, (x, target) in enumerate(train_loader):
328 | #if batch_idx < 1:
329 | optimizer.zero_grad()
330 | x, target = Variable(x.cuda()), Variable(target.cuda())
331 |
332 | score, pert_x = net(x, target)
333 | loss = criterion(score, target)
334 | loss.backward()
335 | optimizer.step()
336 |
337 | train_loss += loss.data[0]
338 | _, predicted = torch.max(score.data, 1)
339 | total += target.size(0)
340 | correct += predicted.eq(target.data).cpu().sum()
341 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
342 | % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
343 |
344 | #----------------------------------------------------------------------
345 | # Testing
346 | #----------------------------------------------------------------------
347 | test_loss = 0; correct = 0; total = 0
348 | net.eval()
349 | for batch_idx, (x, target) in enumerate(test_loader):
350 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True)
351 | score, pert_x = net(x, target)
352 |
353 | loss = criterion(score, target)
354 | test_loss += loss.data[0]
355 | _, predicted = torch.max(score.data, 1)
356 | total += target.size(0)
357 | correct += predicted.eq(target.data).cpu().sum()
358 | progress_bar(batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
359 | % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
360 |
361 | #----------------------------------------------------------------------
362 | # Save the checkpoint
363 | #----------------------------------------------------------------------
364 | acc = 100.*correct/total
365 | #if acc > best_acc:
366 | if correct > best_count:
367 | print('Saving model...')
368 | state = {
369 | 'net': basic_net, #net,
370 | 'acc': acc,
371 | 'epoch': epoch,
372 | }
373 |
374 | torch.save(state, './ckpt_PGD_ensemble_5_20.t7')
375 | best_acc = acc
376 | best_count = correct
377 |
378 | print('The best acc: ', best_acc)
379 |
--------------------------------------------------------------------------------
/ResNet20/resnet_cifar.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | resnet for cifar in pytorch
4 | Reference:
5 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016.
6 | [2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016.
7 | """
8 | import torch
9 | import torch.nn as nn
10 | import math
11 | from torch.autograd import Variable
12 |
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | """
15 | 3x3 convolution with padding
16 | """
17 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 |
19 |
20 | class PreActBasicBlock(nn.Module):
21 | expansion = 1
22 |
23 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
24 | super(PreActBasicBlock, self).__init__()
25 | self.bn1 = nn.BatchNorm2d(inplanes)
26 | self.relu = nn.ReLU(inplace=True)
27 | self.conv1 = conv3x3(inplanes, planes, stride)
28 | self.bn2 = nn.BatchNorm2d(planes)
29 | self.conv2 = conv3x3(planes, planes)
30 | self.downsample = downsample
31 | self.stride = stride
32 | self.noise_coef = noise_coef
33 |
34 | def forward(self, x):
35 | residual = x
36 | out = self.bn1(x)
37 | out = self.relu(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(out)
41 |
42 | out = self.conv1(out)
43 | out = self.bn2(out)
44 | out = self.relu(out)
45 | out = self.conv2(out)
46 |
47 | out += residual
48 |
49 | if self.noise_coef is not None: # Test Variable and rand
50 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
51 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
52 | else:
53 | return out
54 |
55 |
56 | class PreActBottleneck(nn.Module):
57 | expansion = 4
58 |
59 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
60 | super(PreActBottleneck, self).__init__()
61 | self.bn1 = nn.BatchNorm2d(inplanes)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
64 | self.bn2 = nn.BatchNorm2d(planes)
65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
66 | self.bn3 = nn.BatchNorm2d(planes)
67 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
68 | self.downsample = downsample
69 | self.stride = stride
70 | self.noise_coef = noise_coef
71 |
72 | def forward(self, x):
73 | residual = x
74 |
75 | out = self.bn1(x)
76 | out = self.relu(out)
77 |
78 | if self.downsample is not None:
79 | residual = self.downsample(out)
80 |
81 | out = self.conv1(out)
82 |
83 | out = self.bn2(out)
84 | out = self.relu(out)
85 | out = self.conv2(out)
86 |
87 | out = self.bn3(out)
88 | out = self.relu(out)
89 | out = self.conv3(out)
90 |
91 | out += residual
92 | if self.noise_coef is not None:
93 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda())
94 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
95 | else:
96 | return out
97 |
98 |
99 | class PreAct_ResNet_Cifar(nn.Module):
100 | def __init__(self, block, layers, num_classes=10, noise_coef=None):
101 | super(PreAct_ResNet_Cifar, self).__init__()
102 | self.inplanes = 16
103 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
104 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef)
105 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef)
106 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef)
107 | self.bn = nn.BatchNorm2d(64*block.expansion)
108 | self.relu = nn.ReLU(inplace=True)
109 | self.avgpool = nn.AvgPool2d(8, stride=1)
110 | self.fc = nn.Linear(64*block.expansion, num_classes)
111 |
112 | #self.loss = nn.CrossEntropyLoss()
113 |
114 | for m in self.modules():
115 | if isinstance(m, nn.Conv2d):
116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
117 | m.weight.data.normal_(0, math.sqrt(2. / n))
118 | elif isinstance(m, nn.BatchNorm2d):
119 | m.weight.data.fill_(1)
120 | m.bias.data.zero_()
121 |
122 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None):
123 | downsample = None
124 | if stride != 1 or self.inplanes != planes*block.expansion:
125 | downsample = nn.Sequential(
126 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False)
127 | )
128 |
129 | layers = []
130 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef))
131 | self.inplanes = planes*block.expansion
132 | for _ in range(1, blocks):
133 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef))
134 | return nn.Sequential(*layers)
135 |
136 | #def forward(self, x, target):
137 | def forward(self, x):
138 | x = self.conv1(x)
139 |
140 | x = self.layer1(x)
141 | x = self.layer2(x)
142 | x = self.layer3(x)
143 |
144 | x = self.bn(x)
145 | x = self.relu(x)
146 | x = self.avgpool(x)
147 | x = x.view(x.size(0), -1)
148 | x = self.fc(x)
149 |
150 | #loss = self.loss(x, target)
151 |
152 | #return x, loss
153 | return x
154 |
155 |
156 | class Ensemble_PreAct_ResNet_Cifar(nn.Module):
157 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0):
158 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__()
159 | self.num_ensembles = num_ensembles
160 | # for emsemble resnet we should use Noisy Blocks.
161 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)])
162 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)])
163 |
164 | def forward(self, x):
165 | #def forward(self, x, target):
166 | ret = 0.0
167 | for net in self.ensemble:
168 | ret += net(x)
169 | #ret += net(x, target)
170 | ret /= self.num_ensembles
171 |
172 | return ret
173 |
174 |
175 | def en_preactresnet20_cifar(**kwargs):
176 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs)
177 | return model
178 |
179 | def en_preactresnet44_cifar(**kwargs):
180 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs)
181 | return model
182 |
183 | def en_preactresnet32_cifar(**kwargs):
184 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [5, 5, 5], **kwargs)
185 | return model
186 |
187 | def en_preactresnet110_cifar(**kwargs):
188 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs)
189 | return model
190 |
191 |
192 | if __name__ == '__main__':
193 | net = en_preactresnet20_cifar()
194 | y = net(torch.autograd.Variable(torch.randn(1, 3, 32, 32)))
195 | print(net)
196 | print(y.size())
197 |
--------------------------------------------------------------------------------
/ResNet20/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Some helper functions for PyTorch.
3 | """
4 | import os
5 | import sys
6 | import time
7 | import math
8 |
9 | import torch.nn as nn
10 | import torch.nn.init as init
11 |
12 | def init_params(net):
13 | """
14 | Initial layer parameters.
15 | """
16 | for m in net.modules():
17 | if isinstance(m, nn.Conv2d):
18 | init.kaiming_normal(m.weight, mode='fan_out')
19 | if m.bias:
20 | init.constant(m.bias, 0)
21 | elif isinstance(m, nn.BatchNorm2d):
22 | init.constant(m.weight, 1)
23 | init.constant(m.bias, 0)
24 | elif isinstance(m, nn.Linear):
25 | init.normal(m.weight, std=1e-3)
26 | if m.bias:
27 | init.constant(m.bias, 0)
28 |
29 | _, term_width = os.popen('stty size', 'r').read().split()
30 | term_width = int(term_width)
31 |
32 | TOTAL_BAR_LENGTH = 65.
33 | last_time = time.time()
34 | begin_time = time.time()
35 |
36 | def progress_bar(current, total, msg=None):
37 | """
38 | The progress bar.
39 | """
40 | global last_time, begin_time
41 | if current == 0:
42 | begin_time = time.time() # Reset for new bar.
43 |
44 | cur_len = int(TOTAL_BAR_LENGTH*current/total)
45 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
46 |
47 | sys.stdout.write(' [')
48 | for i in range(cur_len):
49 | sys.stdout.write('=')
50 | sys.stdout.write('>')
51 | for i in range(rest_len):
52 | sys.stdout.write('.')
53 | sys.stdout.write(']')
54 |
55 | cur_time = time.time()
56 | step_time = cur_time - last_time
57 | last_time = cur_time
58 | tot_time = cur_time - begin_time
59 |
60 | L = []
61 | L.append(' Step: %s' % format_time(step_time))
62 | L.append(' | Tot: %s' % format_time(tot_time))
63 | if msg:
64 | L.append(' | ' + msg)
65 |
66 | msg = ''.join(L)
67 | sys.stdout.write(msg)
68 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
69 | sys.stdout.write(' ')
70 |
71 | # Go back to the center of the bar.
72 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
73 | sys.stdout.write('\b')
74 | sys.stdout.write(' %d/%d ' % (current+1, total))
75 |
76 | if current < total-1:
77 | sys.stdout.write('\r')
78 | else:
79 | sys.stdout.write('\n')
80 | sys.stdout.flush()
81 |
82 |
83 | def format_time(seconds):
84 | days = int(seconds / 3600/24)
85 | seconds = seconds - days*3600*24
86 | hours = int(seconds / 3600)
87 | seconds = seconds - hours*3600
88 | minutes = int(seconds / 60)
89 | seconds = seconds - minutes*60
90 | secondsf = int(seconds)
91 | seconds = seconds - secondsf
92 | millis = int(seconds*1000)
93 |
94 | f = ''
95 | i = 1
96 | if days > 0:
97 | f += str(days) + 'D'
98 | i += 1
99 | if hours > 0 and i <= 2:
100 | f += str(hours) + 'h'
101 | i += 1
102 | if minutes > 0 and i <= 2:
103 | f += str(minutes) + 'm'
104 | i += 1
105 | if secondsf > 0 and i <= 2:
106 | f += str(secondsf) + 's'
107 | i += 1
108 | if millis > 0 and i <= 2:
109 | f += str(millis) + 'ms'
110 | i += 1
111 | if f == '':
112 | f = '0ms'
113 | return f
114 |
115 |
116 | def freeze_layer(layer):
117 | """
118 | Freeze a certain layer in the DNN.
119 | #Argument: the name of a layer in the given DNN.
120 | """
121 | for param in layer.parameters():
122 | param.requires_grad = False
123 |
124 |
125 | def freeze_All(model):
126 | """
127 | Freeze all the trainable parameters in the DNN.
128 | #Argument: the DNN model.
129 | """
130 | for param in model.parameters(): # Parameter is a method of nn.Module
131 | param.requires_grad = False
132 |
133 |
134 | def Unfreeze_layer(layer):
135 | """
136 | Unfreeze a given layer in t he DNN.
137 | #Argument: the name of a layer in the given DNN.
138 | """
139 | for param in layer.parameters():
140 | param.requires_grad = True
141 |
142 | def Unfreeze_All(model):
143 | """
144 | Unfreeze a given layer in t he DNN.
145 | #Argument: DNN model.
146 | """
147 | for param in model.parameters():
148 | param.requires_grad = True
--------------------------------------------------------------------------------
/WideResNet34-10/Attack_PGD_WideResNet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | CW, FGSM, and IFGSM Attack CNN
4 | """
5 | import torch._utils
6 | try:
7 | torch._utils._rebuild_tensor_v2
8 | except AttributeError:
9 | def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
10 | tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
11 | tensor.requires_grad = requires_grad
12 | tensor._backward_hooks = backward_hooks
13 | return tensor
14 | torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.backends.cudnn as cudnn
20 | import torch.optim as optim
21 | import torchvision
22 | import torchvision.datasets as dset
23 | import torchvision.transforms as transforms
24 | from torch.autograd import Variable
25 | import copy
26 | import math
27 | import numpy as np
28 | import os
29 | import argparse
30 |
31 | #from utils import *
32 |
33 | import numpy.matlib
34 | import matplotlib.pyplot as plt
35 | import pickle
36 | import cPickle
37 | from collections import OrderedDict
38 |
39 | parser = argparse.ArgumentParser(description='Fool ResNet_PGD ')
40 | ap = parser.add_argument
41 | ap('-method', help='Attack Method', type=str, default="ifgsm") # fgsm, ifgsm, cwl2
42 | #ap('-epsilon', help='Attack Strength', type=float, default=0.007)
43 | ap('-epsilon', help='Attack Strength', type=float, default=0.003) # May 2
44 | #ap('-epsilon', help='Attack Strength', type=float, default=0.031) # May 2
45 | ap('--num-ensembles', '--ne', default=1, type=int, metavar='N')
46 | ap('--noise-coef', '--nc', default=0.0, type=float, metavar='W', help='forward noise (default: 0.0)')
47 | ap('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)')
48 | opt = vars(parser.parse_args())
49 |
50 |
51 | class BasicBlock(nn.Module):
52 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0, noise_coef=None): # BW
53 | super(BasicBlock, self).__init__()
54 | self.bn1 = nn.BatchNorm2d(in_planes)
55 | self.relu1 = nn.ReLU(inplace=True)
56 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
57 | padding=1, bias=False)
58 | self.bn2 = nn.BatchNorm2d(out_planes)
59 | self.relu2 = nn.ReLU(inplace=True)
60 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
61 | padding=1, bias=False)
62 | self.droprate = dropRate
63 | self.equalInOut = (in_planes == out_planes)
64 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
65 | padding=0, bias=False) or None
66 | self.noise_coef = noise_coef
67 |
68 | def forward(self, x):
69 | if not self.equalInOut:
70 | x = self.relu1(self.bn1(x))
71 | else:
72 | out = self.relu1(self.bn1(x))
73 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
74 | if self.droprate > 0:
75 | out = F.dropout(out, p=self.droprate, training=self.training)
76 | out = self.conv2(out)
77 | out = torch.add(x if self.equalInOut else self.convShortcut(x), out)
78 |
79 | if self.noise_coef is not None: # Test Variable and rand
80 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
81 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
82 | else:
83 | return out
84 |
85 |
86 | class NetworkBlock(nn.Module):
87 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, noise_coef=None):
88 | super(NetworkBlock, self).__init__()
89 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef)
90 | self.noise_coef = noise_coef
91 |
92 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef):
93 | layers = []
94 | for i in range(int(nb_layers)):
95 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, noise_coef=noise_coef))
96 | return nn.Sequential(*layers)
97 |
98 | def forward(self, x):
99 | return self.layer(x)
100 |
101 |
102 | class WideResNet(nn.Module):
103 | def __init__(self, depth=34, num_classes=10, widen_factor=10, dropRate=0.0, noise_coef=None):
104 | super(WideResNet, self).__init__()
105 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
106 | assert ((depth - 4) % 6 == 0)
107 | n = (depth - 4) / 6
108 | block = BasicBlock
109 | # 1st conv before any network block
110 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
111 | padding=1, bias=False)
112 | # 1st block
113 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef)
114 | # 1st sub-block
115 | self.sub_block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef)
116 | # 2nd block
117 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate, noise_coef=noise_coef)
118 | # 3rd block
119 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate, noise_coef=noise_coef)
120 | # global average pooling and classifier
121 | self.bn1 = nn.BatchNorm2d(nChannels[3])
122 | self.relu = nn.ReLU(inplace=True)
123 | self.fc = nn.Linear(nChannels[3], num_classes)
124 | self.nChannels = nChannels[3]
125 |
126 | for m in self.modules():
127 | if isinstance(m, nn.Conv2d):
128 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
129 | m.weight.data.normal_(0, math.sqrt(2. / n))
130 | elif isinstance(m, nn.BatchNorm2d):
131 | m.weight.data.fill_(1)
132 | m.bias.data.zero_()
133 | elif isinstance(m, nn.Linear):
134 | m.bias.data.zero_()
135 |
136 | def forward(self, x):
137 | out = self.conv1(x)
138 | out = self.block1(out)
139 | out = self.block2(out)
140 | out = self.block3(out)
141 | out = self.relu(self.bn1(out))
142 | out = F.avg_pool2d(out, 8)
143 | out = out.view(-1, self.nChannels)
144 | return self.fc(out)
145 |
146 | if __name__ == '__main__':
147 | """
148 | Load the trained DNN, and attack the DNN, finally save the adversarial images
149 | """
150 | # Load the model
151 | print '==> Resuming from checkpoint..'
152 | checkpoint = torch.load('ckpt_PGD_ensemble_WideResNet.t7')
153 | net = checkpoint['net']
154 | epsilon = opt['epsilon']
155 | attack_type = opt['method']
156 |
157 | # Load the original test data
158 | print '==> Load the clean image'
159 | root = './data'
160 | download = False
161 |
162 | test_set = torchvision.datasets.CIFAR10(
163 | root=root,
164 | train=False,
165 | download=download,
166 | transform=transforms.Compose([
167 | transforms.ToTensor(),
168 | #normalize,
169 | ]))
170 |
171 | kwargs = {'num_workers':1, 'pin_memory':True}
172 | batchsize_test = 50
173 | if attack_type == 'cw':
174 | batchsize_test = 1
175 | print('Batch size of the test set: ', batchsize_test)
176 | test_loader = torch.utils.data.DataLoader(dataset=test_set,
177 | batch_size=batchsize_test,
178 | shuffle=False, **kwargs
179 | )
180 | criterion = nn.CrossEntropyLoss()
181 | #--------------------------------------------------------------------------
182 | # Testing
183 | # images: the original images
184 | # labels: labels of the original images
185 | # images_adv: adversarial image
186 | # labels_pred: the predicted labels of the adversarial images
187 | # noise: the added noise
188 | #--------------------------------------------------------------------------
189 | images, labels, images_adv, labels_pred, noise = [], [], [], [], []
190 | total_fooled = 0; total_correct_classified = 0
191 |
192 | if attack_type == 'fgsm':
193 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
194 | #if batch_idx < 2:
195 | x_Test = x1.numpy()
196 | #print x_Test.min(), x_Test.max()
197 | #x_Test = ((x_Test - x_Test.min())/(x_Test.max() - x_Test.min()) - 0.5)*2
198 | #x_Test = (x_Test - x_Test.min() )/(x_Test.max() - x_Test.min())
199 | y_Test = y1_true.numpy()
200 |
201 | #x = Variable(torch.cuda.FloatTensor(x_Test.reshape(1, 1, 28, 28)), requires_grad=True)
202 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
203 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
204 |
205 | # Classification before perturbation
206 | pred_tmp = net(x)
207 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
208 | loss = criterion(pred_tmp, y)
209 | # Attack
210 | net.zero_grad()
211 | if x.grad is not None:
212 | x.grad.data.fill_(0)
213 | loss.backward()
214 |
215 | x_val_min = 0.0
216 | x_val_max = 1.0
217 | x.grad.sign_()
218 |
219 | x_adversarial = x + epsilon*x.grad
220 | x_adversarial = torch.clamp(x_adversarial, x_val_min, x_val_max)
221 | x_adversarial = x_adversarial.data
222 |
223 | # Classify the perturbed data
224 | x_adversarial_tmp = Variable(x_adversarial)
225 | pred_tmp = net(x_adversarial_tmp)
226 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1)
227 |
228 | for i in range(len(x_Test)):
229 | #print y_pred_adversarial
230 | if y_Test[i] == y_pred_adversarial[i]:
231 | #if y_Test == y_pred_adversarial:
232 | total_correct_classified += 1
233 |
234 | for i in range(len(x_Test)):
235 | # Save the perturbed data
236 | images.append(x_Test[i, :, :, :]) # Original image
237 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image
238 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise
239 | labels.append(y_Test[i])
240 | labels_pred.append(y_pred_adversarial[i])
241 |
242 | elif attack_type == 'ifgsm':
243 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
244 | #if batch_idx < 100:
245 | x_Test = x1.numpy()
246 | y_Test = y1_true.numpy()
247 |
248 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
249 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
250 |
251 | # Classification before perturbation
252 | pred_tmp = net(x)
253 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
254 | loss = criterion(pred_tmp, y)
255 | # Attack
256 | alpha = epsilon
257 | #iteration = 10
258 | iteration = 20#40 # May 2
259 | x_val_min = 0.; x_val_max = 1.
260 | epsilon1 = 0.031
261 |
262 | # Helper function
263 | def where(cond, x, y):
264 | """
265 | code from :
266 | https://discuss.pytorch.org/t/how-can-i-do-the-operation-the-same-as-np-where/1329/8
267 | """
268 | cond = cond.float()
269 | return (cond*x) + ((1-cond)*y)
270 |
271 | # Random perturbation
272 | #x = x + torch.zeros_like(x).uniform_(-epsilon1, epsilon1) # May 2
273 | x_adv = Variable(x.data, requires_grad=True)
274 |
275 | for i in range(iteration):
276 | #h_adv = net(x_adv)
277 | h_adv = (net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv) + net(x_adv))/10.
278 | loss = criterion(h_adv, y)
279 | net.zero_grad()
280 | if x_adv.grad is not None:
281 | x_adv.grad.data.fill_(0)
282 | loss.backward()
283 |
284 | x_adv.grad.sign_()
285 | x_adv = x_adv + alpha*x_adv.grad
286 | x_adv = where(x_adv > x+epsilon1, x+epsilon1, x_adv)
287 | x_adv = where(x_adv < x-epsilon1, x-epsilon1, x_adv)
288 | x_adv = torch.clamp(x_adv, x_val_min, x_val_max)
289 | x_adv = Variable(x_adv.data, requires_grad=True)
290 |
291 | x_adversarial = x_adv.data
292 |
293 | x_adversarial_tmp = Variable(x_adversarial)
294 | pred_tmp = net(x_adversarial_tmp)
295 | loss = criterion(pred_tmp, y)
296 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy(), axis=1)
297 |
298 | #if y_Test == y_pred_adversarial:
299 | # total_correct_classified += 1
300 | for i in range(len(x_Test)):
301 | #print y_pred_adversarial
302 | if y_Test[i] == y_pred_adversarial[i]:
303 | #if y_Test == y_pred_adversarial:
304 | total_correct_classified += 1
305 |
306 | for i in range(len(x_Test)):
307 | # Save the perturbed data
308 | images.append(x_Test[i, :, :, :]) # Original image
309 | images_adv.append(x_adversarial.cpu().numpy()[i, :, :, :]) # Perturbed image
310 | noise.append(x_adversarial.cpu().numpy()[i, :, :, :]-x_Test[i, :, :, :]) # Noise
311 | labels.append(y_Test[i])
312 | labels_pred.append(y_pred_adversarial[i])
313 |
314 | elif attack_type == 'cw':
315 | for batch_idx, (x1, y1_true) in enumerate(test_loader):
316 | #if batch_idx < 4000:
317 | if batch_idx - int(int(batch_idx/50.)*50) == 0:
318 | print batch_idx
319 | x_Test = x1.numpy()
320 | y_Test = y1_true.numpy()
321 |
322 | x = Variable(torch.cuda.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32)), requires_grad=True)
323 | y = Variable(torch.cuda.LongTensor(y_Test), requires_grad=False)
324 |
325 | # Classification before perturbation
326 | pred_tmp = net(x)
327 | loss = criterion(pred_tmp, y)
328 | y_pred = np.argmax(pred_tmp.cpu().data.numpy())
329 |
330 | # Attack
331 | cwl2_learning_rate = 0.0006#0.01
332 | max_iter = 50
333 | lambdaf = 10.0
334 | kappa = 0.0
335 |
336 | # The input image we will perturb
337 | input = torch.FloatTensor(x_Test.reshape(batchsize_test, 3, 32, 32))
338 | input_var = Variable(input)
339 |
340 | # w is the variable we will optimize over. We will also save the best w and loss
341 | w = Variable(input, requires_grad=True)
342 | best_w = input.clone()
343 | best_loss = float('inf')
344 |
345 | # Use the Adam optimizer for the minimization
346 | optimizer = optim.Adam([w], lr=cwl2_learning_rate)
347 |
348 | # Get the top2 predictions of the model. Get the argmaxes for the objective function
349 | probs = net(input_var.cuda())
350 |
351 | probs_data = probs.data.cpu()
352 | top1_idx = torch.max(probs_data, 1)[1]
353 | probs_data[0][top1_idx] = -1 # making the previous top1 the lowest so we get the top2
354 | top2_idx = torch.max(probs_data, 1)[1]
355 |
356 | # Set the argmax (but maybe argmax will just equal top2_idx always?)
357 | argmax = top1_idx[0]
358 | if argmax == y_pred:
359 | argmax = top2_idx[0]
360 |
361 | # The iteration
362 | for i in range(0, max_iter):
363 | if i > 0:
364 | w.grad.data.fill_(0)
365 |
366 | # Zero grad (Only one line needed actually)
367 | net.zero_grad()
368 | optimizer.zero_grad()
369 |
370 | # Compute L2 Loss
371 | loss = torch.pow(w - input_var, 2).sum()
372 |
373 | # w variable
374 | w_data = w.data
375 | w_in = Variable(w_data, requires_grad=True)
376 |
377 | # Compute output
378 | output = net.forward(w_in.cuda()) #second argument is unneeded
379 |
380 | # Calculating the (hinge) loss
381 | loss += lambdaf * torch.clamp( output[0][y_pred] - output[0][argmax] + kappa, min=0).cpu()
382 |
383 | # Backprop the loss
384 | loss.backward()
385 |
386 | # Work on w (Don't think we need this)
387 | w.grad.data.add_(w_in.grad.data)
388 |
389 | # Optimizer step
390 | optimizer.step()
391 |
392 | # Save the best w and loss
393 | total_loss = loss.data.cpu()[0]
394 |
395 | if total_loss < best_loss:
396 | best_loss = total_loss
397 |
398 | ##best_w = torch.clamp(best_w, 0., 1.) # BW Added Aug 26
399 |
400 | best_w = w.data.clone()
401 |
402 | # Set final adversarial image as the best-found w
403 | x_adversarial = best_w
404 |
405 | ##x_adversarial = torch.clamp(x_adversarial, 0., 1.) # BW Added Aug 26
406 |
407 | #--------------- Add to introduce the noise
408 | noise_tmp = x_adversarial.cpu().numpy() - x_Test
409 | x_adversarial = x_Test + epsilon * noise_tmp
410 | #---------------
411 |
412 | # Classify the perturbed data
413 | x_adversarial_tmp = Variable(torch.cuda.FloatTensor(x_adversarial), requires_grad=False) #Variable(x_adversarial).cuda()
414 | pred_tmp = net(x_adversarial_tmp)
415 | y_pred_adversarial = np.argmax(pred_tmp.cpu().data.numpy()) # axis=1
416 |
417 | if y_Test == y_pred_adversarial:
418 | total_correct_classified += 1
419 |
420 | # Save the perturbed data
421 | images.append(x_Test) # Original image
422 | images_adv.append(x_adversarial) # Perturbed image
423 | noise.append(x_adversarial-x_Test) # Noise
424 | labels.append(y_Test)
425 | labels_pred.append(y_pred_adversarial)
426 | else:
427 | ValueError('Unsupported Attack')
428 |
429 | print('Number of correctly classified images: ', total_correct_classified)
430 | # Save data
431 | #with open("Adversarial" + attack_type + str(int(10*epsilon)) + ".pkl", "w") as f:
432 | #with open("Adversarial" + attack_type + str(int(100*epsilon)) + ".pkl", "w") as f:
433 | # adv_data_dict = {"images":images_adv, "labels":labels}
434 | # cPickle.dump(adv_data_dict, f)
435 | images = np.array(images).squeeze()
436 | images_adv = np.array(images_adv).squeeze()
437 | noise = np.array(noise).squeeze()
438 | labels = np.array(labels).squeeze()
439 | labels_pred = np.array(labels_pred).squeeze()
440 | print images.shape, images_adv.shape, noise.shape, labels.shape, labels_pred.shape
441 |
442 | with open("fooled_WideResNet_PGD_10iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f:
443 | #with open("fooled_WideResNet110_PGD_20iters_" + attack_type + str(int(1000*epsilon)) + ".pkl", "w") as f:
444 | adv_data_dict = {
445 | "images" : images,
446 | "images_adversarial" : images_adv,
447 | "y_trues" : labels,
448 | "noises" : noise,
449 | "y_preds_adversarial" : labels_pred
450 | }
451 | pickle.dump(adv_data_dict, f)
452 |
453 |
--------------------------------------------------------------------------------
/WideResNet34-10/Readme.md:
--------------------------------------------------------------------------------
1 | ## For reproducing results of WideResNet34-10 on the CIFAR10
2 | ### PGD adversarial training
3 | ```
4 | python main_pgd_wideresnet34_10_Validation.py --lr 0.1 --noise-coef 0.1
5 | ```
6 |
7 | ### Attack the trained model
8 | ```
9 | python Attack_PGD_WideResNet.py --method ifgsm
10 | ```
11 | The method can be fgsm, ifgsm, and cw
12 |
--------------------------------------------------------------------------------
/WideResNet34-10/main_pgd_wideresnet34_10_Validation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | main pgd enresnet
4 | """
5 | import argparse
6 | import os
7 | import shutil
8 | import time
9 |
10 | import torch.backends.cudnn as cudnn
11 | import torch.optim as optim
12 | import torchvision
13 | import torchvision.transforms as transforms
14 | from torch.autograd import Variable
15 | import torch.nn.functional as F
16 |
17 | import torch
18 | import torch.nn as nn
19 | import math
20 |
21 | from resnet_cifar import *
22 | from utils import *
23 |
24 | parser = argparse.ArgumentParser(description='PyTorch Cifar10 Training')
25 | parser.add_argument('--model_name', default='en_wideresnet34_10_cifar10', type=str, help='name of the model')
26 | parser.add_argument('--epochs', default=200, type=int, metavar='N', help='number of total epochs to run')
27 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)')
28 | parser.add_argument('-b', '--batch-size', default=128, type=int, metavar='N',
29 | help='mini-batch size (default: 128),only used for train')
30 | parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate')
31 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')
32 | parser.add_argument('--num-ensembles', '--ne', default=1, type=int, metavar='N')
33 | parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float, metavar='W',
34 | help='weight decay (default: 5e-4)')
35 | parser.add_argument('--noise-coef', '--nc', default=0.1, type=float, metavar='W', help='forward noise (default: 0.1)')
36 | parser.add_argument('--noise-coef-eval', '--nce', default=0.0, type=float, metavar='W', help='forward noise (default: 0.)')
37 | parser.add_argument('--print-freq', '-p', default=10, type=int, metavar='N', help='print frequency (default: 10)')
38 | parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
39 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set')
40 | parser.add_argument('-ct', '--cifar-type', default='10', type=int, metavar='CT',
41 | help='10 for cifar10,100 for cifar100 (default: 10)')
42 |
43 |
44 | class BasicBlock(nn.Module):
45 | def __init__(self, in_planes, out_planes, stride, dropRate=0.0, noise_coef=None): # BW
46 | super(BasicBlock, self).__init__()
47 | self.bn1 = nn.BatchNorm2d(in_planes)
48 | self.relu1 = nn.ReLU(inplace=True)
49 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
50 | padding=1, bias=False)
51 | self.bn2 = nn.BatchNorm2d(out_planes)
52 | self.relu2 = nn.ReLU(inplace=True)
53 | self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
54 | padding=1, bias=False)
55 | self.droprate = dropRate
56 | self.equalInOut = (in_planes == out_planes)
57 | self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
58 | padding=0, bias=False) or None
59 | self.noise_coef = noise_coef
60 |
61 | def forward(self, x):
62 | if not self.equalInOut:
63 | x = self.relu1(self.bn1(x))
64 | else:
65 | out = self.relu1(self.bn1(x))
66 | out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
67 | if self.droprate > 0:
68 | out = F.dropout(out, p=self.droprate, training=self.training)
69 | out = self.conv2(out)
70 | out = torch.add(x if self.equalInOut else self.convShortcut(x), out)
71 |
72 | if self.noise_coef is not None: # Test Variable and rand
73 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
74 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
75 | else:
76 | return out
77 |
78 |
79 | class NetworkBlock(nn.Module):
80 | def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, noise_coef=None):
81 | super(NetworkBlock, self).__init__()
82 | self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef)
83 | self.noise_coef = noise_coef
84 |
85 | def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, noise_coef):
86 | layers = []
87 | for i in range(int(nb_layers)):
88 | layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, noise_coef=noise_coef))
89 | return nn.Sequential(*layers)
90 |
91 | def forward(self, x):
92 | return self.layer(x)
93 |
94 |
95 | class WideResNet(nn.Module):
96 | def __init__(self, depth=34, num_classes=10, widen_factor=10, dropRate=0.0, noise_coef=None):
97 | super(WideResNet, self).__init__()
98 | nChannels = [16, 16 * widen_factor, 32 * widen_factor, 64 * widen_factor]
99 | assert ((depth - 4) % 6 == 0)
100 | n = (depth - 4) / 6
101 | block = BasicBlock
102 | # 1st conv before any network block
103 | self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
104 | padding=1, bias=False)
105 | # 1st block
106 | self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef)
107 | # 1st sub-block
108 | self.sub_block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, noise_coef=noise_coef)
109 | # 2nd block
110 | self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate, noise_coef=noise_coef)
111 | # 3rd block
112 | self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate, noise_coef=noise_coef)
113 | # global average pooling and classifier
114 | self.bn1 = nn.BatchNorm2d(nChannels[3])
115 | self.relu = nn.ReLU(inplace=True)
116 | self.fc = nn.Linear(nChannels[3], num_classes)
117 | self.nChannels = nChannels[3]
118 |
119 | for m in self.modules():
120 | if isinstance(m, nn.Conv2d):
121 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
122 | m.weight.data.normal_(0, math.sqrt(2. / n))
123 | elif isinstance(m, nn.BatchNorm2d):
124 | m.weight.data.fill_(1)
125 | m.bias.data.zero_()
126 | elif isinstance(m, nn.Linear):
127 | m.bias.data.zero_()
128 |
129 | def forward(self, x):
130 | out = self.conv1(x)
131 | out = self.block1(out)
132 | out = self.block2(out)
133 | out = self.block3(out)
134 | out = self.relu(self.bn1(out))
135 | out = F.avg_pool2d(out, 8)
136 | out = out.view(-1, self.nChannels)
137 | return self.fc(out)
138 |
139 |
140 | class AttackPGD(nn.Module):
141 | """
142 | PGD Adversarial training
143 | """
144 | def __init__(self, basic_net, config):
145 | super(AttackPGD, self).__init__()
146 | self.basic_net = basic_net
147 | self.rand = config['random_start']
148 | self.step_size = config['step_size']
149 | self.epsilon = config['epsilon']
150 | self.num_steps = config['num_steps']
151 | assert config['loss_func'] == 'xent', 'Only xent supported for now.'
152 |
153 | def forward(self, inputs, targets):
154 | x = inputs
155 | if self.rand:
156 | x = x + torch.zeros_like(x).uniform_(-self.epsilon, self.epsilon)
157 | for i in range(self.num_steps): # iFGSM attack
158 | x.requires_grad_()
159 | with torch.enable_grad():
160 | logits = self.basic_net(x)
161 | loss = F.cross_entropy(logits, targets, size_average=False)
162 | grad = torch.autograd.grad(loss, [x])[0]
163 | x = x.detach() + self.step_size*torch.sign(grad.detach())
164 | x = torch.min(torch.max(x, inputs - self.epsilon), inputs + self.epsilon)
165 | x = torch.clamp(x, 0, 1)
166 |
167 | return self.basic_net(x), x
168 |
169 |
170 | if __name__ == '__main__':
171 | use_cuda = torch.cuda.is_available
172 | global best_acc
173 | best_acc = 0
174 | start_epoch = 0
175 | args = parser.parse_args()
176 | best_count = 0
177 |
178 | #--------------------------------------------------------------------------
179 | # Load Cifar data
180 | #--------------------------------------------------------------------------
181 | print('==> Preparing data...')
182 | root = './data'
183 | download = True
184 |
185 | #normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
186 |
187 |
188 | train_set = torchvision.datasets.CIFAR10(
189 | root=root,
190 | train=True,
191 | download=download,
192 | transform=transforms.Compose([
193 | transforms.RandomCrop(32, padding=4),
194 | transforms.RandomHorizontalFlip(),
195 | transforms.ToTensor(),
196 | #normalize,
197 | ]))
198 |
199 | #train_set_tmp = train_set[:45000]
200 | #val_set = train_set[45000:]
201 | #train_set = train_set_tmp
202 | '''
203 | train_set_tmp = []; val_set = []
204 | for i in range(45000):
205 | train_set_tmp.append(train_set[i])
206 | for i in range(45000, 50000):
207 | val_set.append(train_set[i])
208 | train_set = train_set_tmp
209 | '''
210 |
211 | test_set = torchvision.datasets.CIFAR10(
212 | root=root,
213 | train=False,
214 | download=download,
215 | transform=transforms.Compose([
216 | transforms.ToTensor(),
217 | #normalize,
218 | ]))
219 |
220 |
221 | kwargs = {'num_workers':1, 'pin_memory':True}
222 | batchsize_test = len(test_set)/100#50 #100
223 | print('Batch size of the test set: ', batchsize_test)
224 | test_loader = torch.utils.data.DataLoader(dataset=test_set,
225 | batch_size=batchsize_test,
226 | shuffle=False, **kwargs
227 | )
228 | batchsize_train = 128
229 | print('Batch size of the train set: ', batchsize_train)
230 | train_loader = torch.utils.data.DataLoader(dataset=train_set,
231 | batch_size=batchsize_train,
232 | shuffle=True, **kwargs
233 | )
234 | '''
235 | batchsize_val = 100
236 | print('Batch size of the validation set: ', batchsize_val)
237 | val_loader = torch.utils.data.DataLoader(dataset=val_set,
238 | batch_size=batchsize_val,
239 | shuffle=False, **kwargs
240 | )
241 | '''
242 |
243 | basic_net = WideResNet(noise_coef=args.noise_coef).cuda()
244 |
245 | # From https://github.com/MadryLab/cifar10_challenge/blob/master/config.json
246 | config = {
247 | 'epsilon': 0.031, #8.0 / 255, # Test 1.0-8.0
248 | 'num_steps': 10,
249 | 'step_size': 0.007, #6.0 / 255, # 7.0
250 | 'random_start': True,
251 | 'loss_func': 'xent',
252 | }
253 |
254 | net = AttackPGD(basic_net, config).cuda()
255 | criterion = nn.CrossEntropyLoss()
256 |
257 | nepoch = 80
258 | for epoch in xrange(nepoch):
259 | print('Epoch ID', epoch)
260 | '''
261 | if epoch < 60:
262 | lr = 0.1
263 | elif epoch < 75:
264 | lr = 0.1/10
265 | elif epoch < 85:
266 | lr = 0.1/10/10
267 | else:
268 | lr = 0.1/10/10/10
269 | '''
270 | if epoch < 75:
271 | lr = 0.1
272 | elif epoch < 77:
273 | lr = 0.1/10
274 | elif epoch < 79:
275 | lr = 0.1/10/10
276 | else:
277 | lr = 0.1/10/10/10
278 |
279 | optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4, nesterov=True)
280 | #optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-2, nesterov=True)
281 |
282 | #----------------------------------------------------------------------
283 | # Training
284 | #----------------------------------------------------------------------
285 | correct = 0; total = 0; train_loss = 0
286 | net.train()
287 | for batch_idx, (x, target) in enumerate(train_loader):
288 | if batch_idx < 352:
289 | optimizer.zero_grad()
290 | x, target = Variable(x.cuda()), Variable(target.cuda())
291 |
292 | score, pert_x = net(x, target)
293 | loss = criterion(score, target)
294 | loss.backward()
295 | optimizer.step()
296 |
297 | train_loss += loss.data[0]
298 | _, predicted = torch.max(score.data, 1)
299 | total += target.size(0)
300 | correct += predicted.eq(target.data).cpu().sum()
301 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
302 | % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
303 |
304 | #----------------------------------------------------------------------
305 | # Validation
306 | #----------------------------------------------------------------------
307 | val_loss = 0; correct = 0; total = 0
308 | net.eval()
309 | for batch_idx, (x, target) in enumerate(train_loader):
310 | if batch_idx >= 352:
311 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True)
312 | score, pert_x = net(x, target)
313 |
314 | loss = criterion(score, target)
315 | val_loss += loss.data[0]
316 | _, predicted = torch.max(score.data, 1)
317 | total += target.size(0)
318 | correct += predicted.eq(target.data).cpu().sum()
319 | progress_bar(batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
320 | % (val_loss/(batch_idx+1), 100.*correct/total, correct, total))
321 |
322 | #----------------------------------------------------------------------
323 | # Save the checkpoint
324 | #----------------------------------------------------------------------
325 | acc = 100.*correct/total
326 | #if acc > best_acc:
327 | if correct > best_count:
328 | print('Saving model...')
329 | state = {
330 | 'net': basic_net, #net,
331 | 'acc': acc,
332 | 'epoch': epoch,
333 | }
334 |
335 | torch.save(state, './ckpt_PGD_ensemble_WideResNet.t7')
336 | #best_acc = acc
337 | #best_count = correct
338 |
339 | #----------------------------------------------------------------------
340 | # Testing
341 | #----------------------------------------------------------------------
342 | if correct > best_count:
343 | best_count = correct
344 | test_loss = 0; correct = 0; total = 0
345 | net.eval()
346 | for batch_idx, (x, target) in enumerate(test_loader):
347 | x, target = Variable(x.cuda(), volatile=True), Variable(target.cuda(), volatile=True)
348 | score, pert_x = net(x, target)
349 |
350 | loss = criterion(score, target)
351 | test_loss += loss.data[0]
352 | _, predicted = torch.max(score.data, 1)
353 | total += target.size(0)
354 | correct += predicted.eq(target.data).cpu().sum()
355 | progress_bar(batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
356 | % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
357 | print('The best acc: ', best_count)
358 |
--------------------------------------------------------------------------------
/WideResNet34-10/resnet_cifar.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | resnet for cifar in pytorch
4 | Reference:
5 | [1] K. He, X. Zhang, S. Ren, and J. Sun. Deep residual learning for image recognition. In CVPR, 2016.
6 | [2] K. He, X. Zhang, S. Ren, and J. Sun. Identity mappings in deep residual networks. In ECCV, 2016.
7 | """
8 | import torch
9 | import torch.nn as nn
10 | import math
11 | from torch.autograd import Variable
12 |
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | """
15 | 3x3 convolution with padding
16 | """
17 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 |
19 |
20 | class PreActBasicBlock(nn.Module):
21 | expansion = 1
22 |
23 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
24 | super(PreActBasicBlock, self).__init__()
25 | self.bn1 = nn.BatchNorm2d(inplanes)
26 | self.relu = nn.ReLU(inplace=True)
27 | self.conv1 = conv3x3(inplanes, planes, stride)
28 | self.bn2 = nn.BatchNorm2d(planes)
29 | self.conv2 = conv3x3(planes, planes)
30 | self.downsample = downsample
31 | self.stride = stride
32 | self.noise_coef = noise_coef
33 |
34 | def forward(self, x):
35 | residual = x
36 | out = self.bn1(x)
37 | out = self.relu(out)
38 |
39 | if self.downsample is not None:
40 | residual = self.downsample(out)
41 |
42 | out = self.conv1(out)
43 | out = self.bn2(out)
44 | out = self.relu(out)
45 | out = self.conv2(out)
46 |
47 | out += residual
48 |
49 | if self.noise_coef is not None: # Test Variable and rand
50 | #return out + self.noise_coef * torch.std(out) + Variable(torch.randn(out.shape).cuda())
51 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
52 | else:
53 | return out
54 |
55 |
56 | class PreActBottleneck(nn.Module):
57 | expansion = 4
58 |
59 | def __init__(self, inplanes, planes, stride=1, downsample=None, noise_coef=None):
60 | super(PreActBottleneck, self).__init__()
61 | self.bn1 = nn.BatchNorm2d(inplanes)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
64 | self.bn2 = nn.BatchNorm2d(planes)
65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
66 | self.bn3 = nn.BatchNorm2d(planes)
67 | self.conv3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False)
68 | self.downsample = downsample
69 | self.stride = stride
70 | self.noise_coef = noise_coef
71 |
72 | def forward(self, x):
73 | residual = x
74 |
75 | out = self.bn1(x)
76 | out = self.relu(out)
77 |
78 | if self.downsample is not None:
79 | residual = self.downsample(out)
80 |
81 | out = self.conv1(out)
82 |
83 | out = self.bn2(out)
84 | out = self.relu(out)
85 | out = self.conv2(out)
86 |
87 | out = self.bn3(out)
88 | out = self.relu(out)
89 | out = self.conv3(out)
90 |
91 | out += residual
92 | if self.noise_coef is not None:
93 | #return out + self.noise_coef * torch.std(out) * Variable(torch.randn(out.shape).cuda())
94 | return out + self.noise_coef * torch.std(out) * torch.randn_like(out)
95 | else:
96 | return out
97 |
98 |
99 | class PreAct_ResNet_Cifar(nn.Module):
100 | def __init__(self, block, layers, num_classes=10, noise_coef=None):
101 | super(PreAct_ResNet_Cifar, self).__init__()
102 | self.inplanes = 16
103 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
104 | self.layer1 = self._make_layer(block, 16, layers[0], noise_coef=noise_coef)
105 | self.layer2 = self._make_layer(block, 32, layers[1], stride=2, noise_coef=noise_coef)
106 | self.layer3 = self._make_layer(block, 64, layers[2], stride=2, noise_coef=noise_coef)
107 | self.bn = nn.BatchNorm2d(64*block.expansion)
108 | self.relu = nn.ReLU(inplace=True)
109 | self.avgpool = nn.AvgPool2d(8, stride=1)
110 | self.fc = nn.Linear(64*block.expansion, num_classes)
111 |
112 | #self.loss = nn.CrossEntropyLoss()
113 |
114 | for m in self.modules():
115 | if isinstance(m, nn.Conv2d):
116 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
117 | m.weight.data.normal_(0, math.sqrt(2. / n))
118 | elif isinstance(m, nn.BatchNorm2d):
119 | m.weight.data.fill_(1)
120 | m.bias.data.zero_()
121 |
122 | def _make_layer(self, block, planes, blocks, stride=1, noise_coef=None):
123 | downsample = None
124 | if stride != 1 or self.inplanes != planes*block.expansion:
125 | downsample = nn.Sequential(
126 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False)
127 | )
128 |
129 | layers = []
130 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, noise_coef=noise_coef))
131 | self.inplanes = planes*block.expansion
132 | for _ in range(1, blocks):
133 | layers.append(block(self.inplanes, planes, noise_coef=noise_coef))
134 | return nn.Sequential(*layers)
135 |
136 | #def forward(self, x, target):
137 | def forward(self, x):
138 | x = self.conv1(x)
139 |
140 | x = self.layer1(x)
141 | x = self.layer2(x)
142 | x = self.layer3(x)
143 |
144 | x = self.bn(x)
145 | x = self.relu(x)
146 | x = self.avgpool(x)
147 | x = x.view(x.size(0), -1)
148 | x = self.fc(x)
149 |
150 | #loss = self.loss(x, target)
151 |
152 | #return x, loss
153 | return x
154 |
155 |
156 | class Ensemble_PreAct_ResNet_Cifar(nn.Module):
157 | def __init__(self, block, layers, num_classes=10, num_ensembles=3, noise_coef=0.0):
158 | super(Ensemble_PreAct_ResNet_Cifar, self).__init__()
159 | self.num_ensembles = num_ensembles
160 | # for emsemble resnet we should use Noisy Blocks.
161 | self.ensemble = nn.ModuleList([PreAct_ResNet_Cifar(block, layers, num_classes=num_classes, noise_coef=noise_coef) for i in range(num_ensembles)])
162 | # self.ensemble = nn.ModuleList([ResNet_Cifar(block, layers, num_classes=num_classes) for i in range(num_ensembles)])
163 |
164 | def forward(self, x):
165 | #def forward(self, x, target):
166 | ret = 0.0
167 | for net in self.ensemble:
168 | ret += net(x)
169 | #ret += net(x, target)
170 | ret /= self.num_ensembles
171 |
172 | return ret
173 |
174 |
175 | def en_preactresnet20_cifar(**kwargs):
176 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [3, 3, 3], **kwargs)
177 | return model
178 |
179 | def en_preactresnet44_cifar(**kwargs):
180 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [7, 7, 7], **kwargs)
181 | return model
182 |
183 | def en_preactresnet32_cifar(**kwargs):
184 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [5, 5, 5], **kwargs)
185 | return model
186 |
187 | def en_preactresnet110_cifar(**kwargs):
188 | model = Ensemble_PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs)
189 | return model
190 |
191 |
192 | if __name__ == '__main__':
193 | net = en_preactresnet20_cifar()
194 | y = net(torch.autograd.Variable(torch.randn(1, 3, 32, 32)))
195 | print(net)
196 | print(y.size())
197 |
--------------------------------------------------------------------------------
/WideResNet34-10/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Some helper functions for PyTorch.
3 | """
4 | import os
5 | import sys
6 | import time
7 | import math
8 |
9 | import torch.nn as nn
10 | import torch.nn.init as init
11 |
12 | def init_params(net):
13 | """
14 | Initial layer parameters.
15 | """
16 | for m in net.modules():
17 | if isinstance(m, nn.Conv2d):
18 | init.kaiming_normal(m.weight, mode='fan_out')
19 | if m.bias:
20 | init.constant(m.bias, 0)
21 | elif isinstance(m, nn.BatchNorm2d):
22 | init.constant(m.weight, 1)
23 | init.constant(m.bias, 0)
24 | elif isinstance(m, nn.Linear):
25 | init.normal(m.weight, std=1e-3)
26 | if m.bias:
27 | init.constant(m.bias, 0)
28 |
29 | _, term_width = os.popen('stty size', 'r').read().split()
30 | term_width = int(term_width)
31 |
32 | TOTAL_BAR_LENGTH = 65.
33 | last_time = time.time()
34 | begin_time = time.time()
35 |
36 | def progress_bar(current, total, msg=None):
37 | """
38 | The progress bar.
39 | """
40 | global last_time, begin_time
41 | if current == 0:
42 | begin_time = time.time() # Reset for new bar.
43 |
44 | cur_len = int(TOTAL_BAR_LENGTH*current/total)
45 | rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
46 |
47 | sys.stdout.write(' [')
48 | for i in range(cur_len):
49 | sys.stdout.write('=')
50 | sys.stdout.write('>')
51 | for i in range(rest_len):
52 | sys.stdout.write('.')
53 | sys.stdout.write(']')
54 |
55 | cur_time = time.time()
56 | step_time = cur_time - last_time
57 | last_time = cur_time
58 | tot_time = cur_time - begin_time
59 |
60 | L = []
61 | L.append(' Step: %s' % format_time(step_time))
62 | L.append(' | Tot: %s' % format_time(tot_time))
63 | if msg:
64 | L.append(' | ' + msg)
65 |
66 | msg = ''.join(L)
67 | sys.stdout.write(msg)
68 | for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
69 | sys.stdout.write(' ')
70 |
71 | # Go back to the center of the bar.
72 | for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
73 | sys.stdout.write('\b')
74 | sys.stdout.write(' %d/%d ' % (current+1, total))
75 |
76 | if current < total-1:
77 | sys.stdout.write('\r')
78 | else:
79 | sys.stdout.write('\n')
80 | sys.stdout.flush()
81 |
82 |
83 | def format_time(seconds):
84 | days = int(seconds / 3600/24)
85 | seconds = seconds - days*3600*24
86 | hours = int(seconds / 3600)
87 | seconds = seconds - hours*3600
88 | minutes = int(seconds / 60)
89 | seconds = seconds - minutes*60
90 | secondsf = int(seconds)
91 | seconds = seconds - secondsf
92 | millis = int(seconds*1000)
93 |
94 | f = ''
95 | i = 1
96 | if days > 0:
97 | f += str(days) + 'D'
98 | i += 1
99 | if hours > 0 and i <= 2:
100 | f += str(hours) + 'h'
101 | i += 1
102 | if minutes > 0 and i <= 2:
103 | f += str(minutes) + 'm'
104 | i += 1
105 | if secondsf > 0 and i <= 2:
106 | f += str(secondsf) + 's'
107 | i += 1
108 | if millis > 0 and i <= 2:
109 | f += str(millis) + 'ms'
110 | i += 1
111 | if f == '':
112 | f = '0ms'
113 | return f
114 |
115 |
116 | def freeze_layer(layer):
117 | """
118 | Freeze a certain layer in the DNN.
119 | #Argument: the name of a layer in the given DNN.
120 | """
121 | for param in layer.parameters():
122 | param.requires_grad = False
123 |
124 |
125 | def freeze_All(model):
126 | """
127 | Freeze all the trainable parameters in the DNN.
128 | #Argument: the DNN model.
129 | """
130 | for param in model.parameters(): # Parameter is a method of nn.Module
131 | param.requires_grad = False
132 |
133 |
134 | def Unfreeze_layer(layer):
135 | """
136 | Unfreeze a given layer in t he DNN.
137 | #Argument: the name of a layer in the given DNN.
138 | """
139 | for param in layer.parameters():
140 | param.requires_grad = True
141 |
142 | def Unfreeze_All(model):
143 | """
144 | Unfreeze a given layer in t he DNN.
145 | #Argument: DNN model.
146 | """
147 | for param in model.parameters():
148 | param.requires_grad = True
--------------------------------------------------------------------------------
/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig1.png
--------------------------------------------------------------------------------
/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig2.png
--------------------------------------------------------------------------------
/fig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BaoWangMath/EnResNet/edc4faeefa66cc02c8f1ecda1b52d6e7a0d25b75/fig4.png
--------------------------------------------------------------------------------