├── A3C ├── adv_attacks │ ├── __init__.py │ ├── __pycache__ │ │ ├── PGD.cpython-37.pyc │ │ ├── base.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── adv_model.cpython-37.pyc │ │ ├── adversary.cpython-37.pyc │ │ └── gradient_method.cpython-37.pyc │ ├── base.py │ ├── PGD.py │ ├── adversary.py │ ├── adv_model.py │ └── gradient_method.py ├── videos │ ├── PongNoFrameskip-v4_trained.gif │ ├── PongNoFrameskip-v4_robust_pgd_0.0118.gif │ └── PongNoFrameskip-v4_trained_pgd_0.0039.gif ├── utils.py ├── config.json ├── ibp.py ├── model.py ├── Result_viewer.ipynb ├── player_util.py ├── main.py ├── environment.py ├── shared_optim.py ├── test.py ├── train.py ├── pong_absolute_worst_case.ipynb └── evaluate.py ├── DQN ├── adv_attacks │ ├── __init__.py │ ├── __pycache__ │ │ ├── PGD.cpython-37.pyc │ │ ├── base.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── adv_model.cpython-37.pyc │ │ ├── adversary.cpython-37.pyc │ │ └── gradient_method.cpython-37.pyc │ ├── base.py │ ├── PGD.py │ ├── adversary.py │ ├── adv_model.py │ └── gradient_method.py ├── utils.py ├── plotter.py ├── config.json ├── model.py ├── ibp.py ├── Result_viewer.ipynb ├── main.py ├── environment.py └── train.py ├── requirements.txt ├── README.md └── LICENSE /A3C/adv_attacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /DQN/adv_attacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /A3C/videos/PongNoFrameskip-v4_trained.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/videos/PongNoFrameskip-v4_trained.gif -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/PGD.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/PGD.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/PGD.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/PGD.cpython-37.pyc -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/base.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/base.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/adv_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/adv_model.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/adversary.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/adversary.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/videos/PongNoFrameskip-v4_robust_pgd_0.0118.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/videos/PongNoFrameskip-v4_robust_pgd_0.0118.gif -------------------------------------------------------------------------------- /A3C/videos/PongNoFrameskip-v4_trained_pgd_0.0039.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/videos/PongNoFrameskip-v4_trained_pgd_0.0039.gif -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/adv_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/adv_model.cpython-37.pyc -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/adversary.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/adversary.cpython-37.pyc -------------------------------------------------------------------------------- /A3C/adv_attacks/__pycache__/gradient_method.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/A3C/adv_attacks/__pycache__/gradient_method.cpython-37.pyc -------------------------------------------------------------------------------- /DQN/adv_attacks/__pycache__/gradient_method.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tuomaso/radial_rl/HEAD/DQN/adv_attacks/__pycache__/gradient_method.cpython-37.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.18.1 2 | gym==0.17.1 3 | atari-py==0.2.6 4 | Pillow==8.3.2 5 | jupyter==1.0.0 6 | notebook==6.4.1 7 | opencv-python==4.2.0.34 8 | setproctitle==1.1.10 9 | matplotlib==3.2.1 10 | -------------------------------------------------------------------------------- /DQN/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import torch 4 | import json 5 | import logging 6 | 7 | 8 | def setup_logger(logger_name, log_file, level=logging.INFO): 9 | l = logging.getLogger(logger_name) 10 | formatter = logging.Formatter('%(asctime)s : %(message)s') 11 | fileHandler = logging.FileHandler(log_file, mode='w') 12 | fileHandler.setFormatter(formatter) 13 | streamHandler = logging.StreamHandler() 14 | streamHandler.setFormatter(formatter) 15 | 16 | l.setLevel(level) 17 | l.addHandler(fileHandler) 18 | l.addHandler(streamHandler) 19 | 20 | 21 | def read_config(file_path): 22 | """Read JSON config.""" 23 | json_object = json.load(open(file_path, 'r')) 24 | return json_object 25 | 26 | 27 | -------------------------------------------------------------------------------- /DQN/plotter.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def _moving_avg(data, window=1000): 8 | cum_data = np.cumsum(data) 9 | return (cum_data[window:] - cum_data[:-window]) / window 10 | 11 | def plot(frame_idx, rewards, losses, standard_losses, worst_case_losses, args, start_time): 12 | f = plt.figure(figsize=(20,5)) 13 | ax = f.add_subplot(131) 14 | ax.title.set_text('frame {}. last 10 avg reward: {}'.format(frame_idx, np.mean(rewards[-10:]))) 15 | ax.plot(_moving_avg(rewards, window=10), label='training reward') 16 | ax.legend() 17 | 18 | 19 | ax2 = f.add_subplot(132) 20 | ax2.title.set_text('Average of loss of last 1000 steps') 21 | ax2.plot(_moving_avg(losses), label='loss') 22 | ax2.plot(_moving_avg(standard_losses), label='standard') 23 | ax2.plot(_moving_avg(worst_case_losses), label='worst_case') 24 | 25 | ax2.set_yscale('log') 26 | ax2.legend() 27 | if not os.path.exists('figures'): 28 | os.mkdir('figures') 29 | plt.savefig('figures/{}_training_{}.png'.format(args.env, start_time)) 30 | plt.close() -------------------------------------------------------------------------------- /A3C/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import torch 4 | import json 5 | import logging 6 | 7 | 8 | def setup_logger(logger_name, log_file, level=logging.INFO): 9 | l = logging.getLogger(logger_name) 10 | formatter = logging.Formatter('%(asctime)s : %(message)s') 11 | fileHandler = logging.FileHandler(log_file, mode='w') 12 | fileHandler.setFormatter(formatter) 13 | streamHandler = logging.StreamHandler() 14 | streamHandler.setFormatter(formatter) 15 | 16 | l.setLevel(level) 17 | l.addHandler(fileHandler) 18 | l.addHandler(streamHandler) 19 | 20 | 21 | def read_config(file_path): 22 | """Read JSON config.""" 23 | json_object = json.load(open(file_path, 'r')) 24 | return json_object 25 | 26 | 27 | def ensure_shared_grads(model, shared_model, gpu=False): 28 | for param, shared_param in zip(model.parameters(), 29 | shared_model.parameters()): 30 | if shared_param.grad is not None and not gpu: 31 | return 32 | elif not gpu: 33 | shared_param._grad = param.grad 34 | else: 35 | shared_param._grad = param.grad.cpu() 36 | 37 | 38 | -------------------------------------------------------------------------------- /A3C/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "Default": { 3 | "crop1": 34, 4 | "crop2": 34, 5 | "dimension2": 80 6 | }, 7 | "Asteroids": { 8 | "crop1": 16, 9 | "crop2": 34, 10 | "dimension2": 94 11 | }, 12 | "BeamRider": { 13 | "crop1": 20, 14 | "crop2": 20, 15 | "dimension2": 80 16 | }, 17 | "Breakout": { 18 | "crop1": 34, 19 | "crop2": 34, 20 | "dimension2": 80 21 | }, 22 | "Centipede": { 23 | "crop1": 36, 24 | "crop2": 56, 25 | "dimension2": 90 26 | }, 27 | "MsPacman": { 28 | "crop1": 2, 29 | "crop2": 10, 30 | "dimension2": 84 31 | }, 32 | "Pong": { 33 | "crop1": 34, 34 | "crop2": 34, 35 | "dimension2": 80 36 | }, 37 | "Seaquest": { 38 | "crop1": 30, 39 | "crop2": 30, 40 | "dimension2": 80 41 | }, 42 | "SpaceInvaders": { 43 | "crop1": 8, 44 | "crop2": 36, 45 | "dimension2": 94 46 | }, 47 | "VideoPinball": { 48 | "crop1": 42, 49 | "crop2": 60, 50 | "dimension2": 89 51 | }, 52 | "Qbert": { 53 | "crop1": 12, 54 | "crop2": 40, 55 | "dimension2": 94 56 | }, 57 | "Boxing": { 58 | "crop1": 30, 59 | "crop2": 30, 60 | "dimension2": 80 61 | } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /DQN/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "Default": { 3 | "crop1": 34, 4 | "crop2": 34, 5 | "dimension2": 80 6 | }, 7 | "Asteroids": { 8 | "crop1": 16, 9 | "crop2": 34, 10 | "dimension2": 94 11 | }, 12 | "BeamRider": { 13 | "crop1": 20, 14 | "crop2": 20, 15 | "dimension2": 80 16 | }, 17 | "Breakout": { 18 | "crop1": 34, 19 | "crop2": 34, 20 | "dimension2": 80 21 | }, 22 | "Centipede": { 23 | "crop1": 36, 24 | "crop2": 56, 25 | "dimension2": 90 26 | }, 27 | "MsPacman": { 28 | "crop1": 2, 29 | "crop2": 10, 30 | "dimension2": 84 31 | }, 32 | "Pong": { 33 | "crop1": 34, 34 | "crop2": 34, 35 | "dimension2": 80 36 | }, 37 | "Seaquest": { 38 | "crop1": 30, 39 | "crop2": 30, 40 | "dimension2": 80 41 | }, 42 | "SpaceInvaders": { 43 | "crop1": 8, 44 | "crop2": 36, 45 | "dimension2": 94 46 | }, 47 | "VideoPinball": { 48 | "crop1": 42, 49 | "crop2": 60, 50 | "dimension2": 89 51 | }, 52 | "Qbert": { 53 | "crop1": 12, 54 | "crop2": 40, 55 | "dimension2": 94 56 | }, 57 | "Boxing": { 58 | "crop1": 30, 59 | "crop2": 30, 60 | "dimension2": 80 61 | } 62 | } 63 | 64 | -------------------------------------------------------------------------------- /DQN/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import random 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | class CnnDQN(nn.Module): 8 | def __init__(self, num_channels, action_space): 9 | super(CnnDQN, self).__init__() 10 | 11 | self.num_actions = action_space.n 12 | self.model = nn.Sequential( 13 | nn.Conv2d(num_channels, 32, kernel_size=8, stride=4), 14 | nn.ReLU(), 15 | nn.Conv2d(32, 64, kernel_size=4, stride=2), 16 | nn.ReLU(), 17 | nn.Conv2d(64, 64, kernel_size=3, stride=1), 18 | nn.ReLU(), 19 | nn.Flatten(start_dim=1), 20 | nn.Linear(64*6*6, 512), 21 | nn.ReLU(), 22 | nn.Linear(512, self.num_actions) 23 | ) 24 | self.train() 25 | 26 | def forward(self, x): 27 | x = self.model(x) 28 | return x 29 | 30 | def act(self, state, epsilon): 31 | with torch.no_grad(): 32 | if random.random() > epsilon: 33 | q_value = self.forward(state) 34 | #print(q_value) 35 | action = torch.argmax(q_value, dim=1)[0] 36 | else: 37 | action = random.randrange(self.num_actions) 38 | return action -------------------------------------------------------------------------------- /A3C/ibp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def initial_bounds(x0, epsilon): 6 | ''' 7 | x0 = input, b x c x h x w 8 | ''' 9 | upper = x0+epsilon 10 | lower = x0-epsilon 11 | return upper, lower 12 | 13 | def weighted_bound(layer, prev_upper, prev_lower): 14 | prev_mu = (prev_upper + prev_lower)/2 15 | prev_r = (prev_upper - prev_lower)/2 16 | mu = layer(prev_mu) 17 | if type(layer)==nn.Linear: 18 | r = F.linear(prev_r, torch.abs(layer.weight)) 19 | elif type(layer)==nn.Conv2d: 20 | r = F.conv2d(prev_r, torch.abs(layer.weight), stride=layer.stride, padding=layer.padding) 21 | 22 | upper = mu + r 23 | lower = mu - r 24 | return upper, lower 25 | 26 | def activation_bound(layer, prev_upper, prev_lower): 27 | upper = layer(prev_upper) 28 | lower = layer(prev_lower) 29 | return upper, lower 30 | 31 | def network_bounds(model, x0, epsilon): 32 | ''' 33 | get inteval bound progation upper and lower bounds for the actiavtion of a model 34 | 35 | model: a nn.Sequential module 36 | x0: input, b x input_shape 37 | epsilon: float, the linf distance bound is calculated over 38 | ''' 39 | upper, lower = initial_bounds(x0, epsilon) 40 | for layer in model.modules(): 41 | if type(layer) in (nn.Sequential,): 42 | pass 43 | elif type(layer) in (nn.ReLU, nn.Sigmoid, nn.Tanh, nn.MaxPool2d, nn.Flatten): 44 | upper, lower = activation_bound(layer, upper, lower) 45 | elif type(layer) in (nn.Linear, nn.Conv2d): 46 | upper, lower = weighted_bound(layer, upper, lower) 47 | else: 48 | print('Unsupported layer:', type(layer)) 49 | return upper, lower 50 | -------------------------------------------------------------------------------- /DQN/ibp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def initial_bounds(x0, epsilon): 6 | ''' 7 | x0 = input, b x c x h x w 8 | ''' 9 | upper = x0+epsilon 10 | lower = x0-epsilon 11 | return upper, lower 12 | 13 | def weighted_bound(layer, prev_upper, prev_lower): 14 | prev_mu = (prev_upper + prev_lower)/2 15 | prev_r = (prev_upper - prev_lower)/2 16 | mu = layer(prev_mu) 17 | if type(layer)==nn.Linear: 18 | r = F.linear(prev_r, torch.abs(layer.weight)) 19 | elif type(layer)==nn.Conv2d: 20 | r = F.conv2d(prev_r, torch.abs(layer.weight), stride=layer.stride, padding=layer.padding) 21 | 22 | upper = mu + r 23 | lower = mu - r 24 | return upper, lower 25 | 26 | def activation_bound(layer, prev_upper, prev_lower): 27 | upper = layer(prev_upper) 28 | lower = layer(prev_lower) 29 | return upper, lower 30 | 31 | def network_bounds(model, x0, epsilon): 32 | ''' 33 | get inteval bound progation upper and lower bounds for the actiavtion of a model 34 | 35 | model: a nn.Sequential module 36 | x0: input, b x input_shape 37 | epsilon: float, the linf distance bound is calculated over 38 | ''' 39 | upper, lower = initial_bounds(x0, epsilon) 40 | for layer in model.modules(): 41 | if type(layer) in (nn.Sequential,): 42 | pass 43 | elif type(layer) in (nn.ReLU, nn.Sigmoid, nn.Tanh, nn.MaxPool2d, nn.Flatten): 44 | upper, lower = activation_bound(layer, upper, lower) 45 | elif type(layer) in (nn.Linear, nn.Conv2d): 46 | upper, lower = weighted_bound(layer, upper, lower) 47 | else: 48 | print('Unsupported layer:', type(layer)) 49 | return upper, lower 50 | -------------------------------------------------------------------------------- /A3C/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class A3Cff(torch.nn.Module): 8 | def __init__(self, num_inputs, action_space): 9 | super(A3Cff, self).__init__() 10 | self.model = nn.Sequential( 11 | nn.Conv2d(num_inputs, 32, kernel_size=8, stride=4), 12 | nn.ReLU(), 13 | nn.Conv2d(32, 64, kernel_size=4, stride=2), 14 | nn.ReLU(), 15 | nn.Conv2d(64, 64, kernel_size=3, stride=1), 16 | nn.ReLU(), 17 | nn.Flatten(start_dim=1), 18 | nn.Linear(64*6*6, 512), 19 | nn.ReLU(), 20 | nn.Linear(512, action_space.n + 1) 21 | ) 22 | self.train() 23 | 24 | def forward(self, inputs): 25 | x = self.model(inputs) 26 | value = x[:, 0:1] 27 | actions = x[:, 1:] 28 | 29 | return value, actions 30 | 31 | 32 | class A3Cff_old(torch.nn.Module): 33 | def __init__(self, num_inputs, action_space): 34 | super(A3Cff, self).__init__() 35 | self.model = nn.Sequential(nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2), 36 | nn.MaxPool2d(2, 2), 37 | nn.ReLU(), 38 | nn.Conv2d(32, 32, 5, stride=1, padding=1), 39 | nn.MaxPool2d(2, 2), 40 | nn.ReLU(), 41 | nn.Conv2d(32, 64, 4, stride=1, padding=1), 42 | nn.MaxPool2d(2, 2), 43 | nn.ReLU(), 44 | nn.Conv2d(64, 64, 3, stride=1, padding=1), 45 | nn.MaxPool2d(2, 2), 46 | nn.ReLU(), 47 | nn.Flatten(), 48 | nn.Linear(1024, 512), 49 | nn.ReLU(), 50 | nn.Linear(512, action_space.n + 1) 51 | ) 52 | self.train() 53 | 54 | def forward(self, inputs): 55 | x = self.model(inputs) 56 | value = x[:, 0:1] 57 | actions = x[:, 1:] 58 | 59 | return value, actions 60 | -------------------------------------------------------------------------------- /A3C/adv_attacks/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 - 2018 Baidu Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base model of the model. 16 | """ 17 | import logging 18 | from abc import ABCMeta 19 | from abc import abstractmethod 20 | 21 | import numpy as np 22 | 23 | 24 | class Attack(object): 25 | """ 26 | Abstract base class for adversarial attacks. `Attack` represent an 27 | adversarial attack which search an adversarial example. subclass should 28 | implement the _apply() method. 29 | 30 | Args: 31 | model(Model): an instance of the class advbox.base.Model. 32 | 33 | """ 34 | __metaclass__ = ABCMeta 35 | 36 | def __init__(self, model): 37 | self.model = model 38 | 39 | def __call__(self, adversary, **kwargs): 40 | """ 41 | Generate the adversarial sample. 42 | 43 | Args: 44 | adversary(object): The adversary object. 45 | **kwargs: Other named arguments. 46 | """ 47 | self._preprocess(adversary) 48 | return self._apply(adversary, **kwargs) 49 | 50 | @abstractmethod 51 | def _apply(self, adversary, **kwargs): 52 | """ 53 | Search an adversarial example. 54 | 55 | Args: 56 | adversary(object): The adversary object. 57 | **kwargs: Other named arguments. 58 | """ 59 | raise NotImplementedError 60 | 61 | def _preprocess(self, adversary): 62 | """ 63 | Preprocess the adversary object. 64 | 65 | :param adversary: adversary 66 | :return: None 67 | """ 68 | #assert self.model.channel_axis() == adversary.original.ndim 69 | 70 | if adversary.original_label is None: 71 | adversary.original_label = np.argmax( 72 | self.model.predict(adversary.original)) 73 | if adversary.is_targeted_attack and adversary.target_label is None: 74 | if adversary.target is None: 75 | raise ValueError( 76 | 'When adversary.is_targeted_attack is true, ' 77 | 'adversary.target_label or adversary.target must be set.') 78 | else: 79 | adversary.target_label = np.argmax( 80 | self.model.predict(adversary.target)) 81 | 82 | logging.info('adversary:' 83 | '\n original_label: {}' 84 | '\n target_label: {}' 85 | '\n is_targeted_attack: {}' 86 | ''.format(adversary.original_label, adversary.target_label, 87 | adversary.is_targeted_attack)) 88 | -------------------------------------------------------------------------------- /DQN/adv_attacks/base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 - 2018 Baidu Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | The base model of the model. 16 | """ 17 | import logging 18 | from abc import ABCMeta 19 | from abc import abstractmethod 20 | 21 | import numpy as np 22 | 23 | 24 | class Attack(object): 25 | """ 26 | Abstract base class for adversarial attacks. `Attack` represent an 27 | adversarial attack which search an adversarial example. subclass should 28 | implement the _apply() method. 29 | 30 | Args: 31 | model(Model): an instance of the class advbox.base.Model. 32 | 33 | """ 34 | __metaclass__ = ABCMeta 35 | 36 | def __init__(self, model): 37 | self.model = model 38 | 39 | def __call__(self, adversary, **kwargs): 40 | """ 41 | Generate the adversarial sample. 42 | 43 | Args: 44 | adversary(object): The adversary object. 45 | **kwargs: Other named arguments. 46 | """ 47 | self._preprocess(adversary) 48 | return self._apply(adversary, **kwargs) 49 | 50 | @abstractmethod 51 | def _apply(self, adversary, **kwargs): 52 | """ 53 | Search an adversarial example. 54 | 55 | Args: 56 | adversary(object): The adversary object. 57 | **kwargs: Other named arguments. 58 | """ 59 | raise NotImplementedError 60 | 61 | def _preprocess(self, adversary): 62 | """ 63 | Preprocess the adversary object. 64 | 65 | :param adversary: adversary 66 | :return: None 67 | """ 68 | #assert self.model.channel_axis() == adversary.original.ndim 69 | 70 | if adversary.original_label is None: 71 | adversary.original_label = np.argmax( 72 | self.model.predict(adversary.original)) 73 | if adversary.is_targeted_attack and adversary.target_label is None: 74 | if adversary.target is None: 75 | raise ValueError( 76 | 'When adversary.is_targeted_attack is true, ' 77 | 'adversary.target_label or adversary.target must be set.') 78 | else: 79 | adversary.target_label = np.argmax( 80 | self.model.predict(adversary.target)) 81 | 82 | logging.info('adversary:' 83 | '\n original_label: {}' 84 | '\n target_label: {}' 85 | '\n is_targeted_attack: {}' 86 | ''.format(adversary.original_label, adversary.target_label, 87 | adversary.is_targeted_attack)) 88 | -------------------------------------------------------------------------------- /A3C/adv_attacks/PGD.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import logging 4 | from collections import Iterable 5 | 6 | import numpy as np 7 | 8 | from adv_attacks.base import Attack 9 | 10 | class PGDAttack(Attack): 11 | """ 12 | This class implements gradient attack method, and is the base of FGSM, BIM, 13 | ILCM, etc. 14 | """ 15 | 16 | def __init__(self, model, support_targeted=True): 17 | """ 18 | :param model(model): The model to be attacked. 19 | :param support_targeted(bool): Does this attack method support targeted. 20 | """ 21 | super(PGDAttack, self).__init__(model) 22 | self.support_targeted = support_targeted 23 | 24 | def _apply(self, 25 | adversary, 26 | norm_ord = np.inf, 27 | epsilon = 0.01, 28 | steps = 10, 29 | relative_step_size = 0.2): 30 | """ 31 | Apply the gradient attack method. 32 | :param adversary(Adversary): 33 | The Adversary object. 34 | :param norm_ord(int): 35 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 36 | :param epsilons(list|tuple|int): 37 | Attack step size (input variation). 38 | Largest step size if epsilons is not iterable. 39 | :param steps: 40 | The number of attack iteration. 41 | :param epsilon_steps: 42 | The number of Epsilons' iteration for each attack iteration. 43 | :return: 44 | adversary(Adversary): The Adversary object. 45 | """ 46 | 47 | if norm_ord != np.inf: 48 | raise ValueError("only linf norm is supported!") 49 | 50 | if not self.support_targeted: 51 | if adversary.is_targeted_attack: 52 | raise ValueError( 53 | "This attack method doesn't support targeted attack!") 54 | 55 | logging.info('epsilon={0},steps={1}'. 56 | format(epsilon,steps)) 57 | 58 | pre_label = adversary.original_label 59 | min_, max_ = self.model.bounds() 60 | #project to correct space 61 | min_ = np.maximum(min_, adversary.original-epsilon) 62 | max_ = np.minimum(max_, adversary.original+epsilon) 63 | 64 | #assert self.model.channel_axis() == adversary.original.ndim 65 | assert (self.model.channel_axis() == 1 or 66 | self.model.channel_axis() == adversary.original.shape[0] or 67 | self.model.channel_axis() == adversary.original.shape[-1]) 68 | 69 | 70 | 71 | step = 1 72 | adv_img = np.copy(adversary.original) 73 | if epsilon == 0.0: 74 | adversary.try_accept_the_example(adv_img, adv_label) 75 | return adversary 76 | for i in range(steps): 77 | if adversary.is_targeted_attack: 78 | gradient = +self.model.gradient(adv_img, 79 | adversary.target_label) 80 | else: 81 | gradient = -self.model.gradient(adv_img, 82 | adversary.original_label) 83 | if norm_ord == np.inf: 84 | gradient_norm = np.sign(gradient) 85 | else: 86 | gradient_norm = gradient / self._norm( 87 | gradient, ord=norm_ord) 88 | 89 | adv_img = adv_img + epsilon * relative_step_size * gradient_norm 90 | 91 | adv_img = np.clip(adv_img, min_, max_) 92 | step += 1 93 | 94 | 95 | adv_label = np.argmax(self.model.predict(adv_img)) 96 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, adv_label={} logits={}'. 97 | format(step, epsilon, pre_label,adv_label,self.model.predict(adv_img)[adv_label])) 98 | 99 | adversary.try_accept_the_example(adv_img, adv_label) 100 | 101 | return adversary 102 | 103 | @staticmethod 104 | def _norm(a, ord): 105 | if a.ndim == 1: 106 | return np.linalg.norm(a, ord=ord) 107 | if a.ndim == a.shape[0]: 108 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 109 | norm_axis = 1 110 | else: 111 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 112 | norm_axis = 0 113 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) -------------------------------------------------------------------------------- /DQN/adv_attacks/PGD.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import logging 4 | from collections import Iterable 5 | 6 | import numpy as np 7 | 8 | from adv_attacks.base import Attack 9 | 10 | class PGDAttack(Attack): 11 | """ 12 | This class implements gradient attack method, and is the base of FGSM, BIM, 13 | ILCM, etc. 14 | """ 15 | 16 | def __init__(self, model, support_targeted=True): 17 | """ 18 | :param model(model): The model to be attacked. 19 | :param support_targeted(bool): Does this attack method support targeted. 20 | """ 21 | super(PGDAttack, self).__init__(model) 22 | self.support_targeted = support_targeted 23 | 24 | def _apply(self, 25 | adversary, 26 | norm_ord = np.inf, 27 | epsilon = 0.01, 28 | steps = 10, 29 | relative_step_size = 0.2): 30 | """ 31 | Apply the gradient attack method. 32 | :param adversary(Adversary): 33 | The Adversary object. 34 | :param norm_ord(int): 35 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 36 | :param epsilons(list|tuple|int): 37 | Attack step size (input variation). 38 | Largest step size if epsilons is not iterable. 39 | :param steps: 40 | The number of attack iteration. 41 | :param epsilon_steps: 42 | The number of Epsilons' iteration for each attack iteration. 43 | :return: 44 | adversary(Adversary): The Adversary object. 45 | """ 46 | 47 | if norm_ord != np.inf: 48 | raise ValueError("only linf norm is supported!") 49 | 50 | if not self.support_targeted: 51 | if adversary.is_targeted_attack: 52 | raise ValueError( 53 | "This attack method doesn't support targeted attack!") 54 | 55 | logging.info('epsilon={0},steps={1}'. 56 | format(epsilon,steps)) 57 | 58 | pre_label = adversary.original_label 59 | min_, max_ = self.model.bounds() 60 | #project to correct space 61 | min_ = np.maximum(min_, adversary.original-epsilon) 62 | max_ = np.minimum(max_, adversary.original+epsilon) 63 | 64 | #assert self.model.channel_axis() == adversary.original.ndim 65 | assert (self.model.channel_axis() == 1 or 66 | self.model.channel_axis() == adversary.original.shape[0] or 67 | self.model.channel_axis() == adversary.original.shape[-1]) 68 | 69 | 70 | 71 | step = 1 72 | adv_img = np.copy(adversary.original) 73 | if epsilon == 0.0: 74 | adversary.try_accept_the_example(adv_img, adv_label) 75 | return adversary 76 | for i in range(steps): 77 | if adversary.is_targeted_attack: 78 | gradient = +self.model.gradient(adv_img, 79 | adversary.target_label) 80 | else: 81 | gradient = -self.model.gradient(adv_img, 82 | adversary.original_label) 83 | if norm_ord == np.inf: 84 | gradient_norm = np.sign(gradient) 85 | else: 86 | gradient_norm = gradient / self._norm( 87 | gradient, ord=norm_ord) 88 | 89 | adv_img = adv_img + epsilon * relative_step_size * gradient_norm 90 | 91 | adv_img = np.clip(adv_img, min_, max_) 92 | step += 1 93 | 94 | 95 | adv_label = np.argmax(self.model.predict(adv_img)) 96 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, adv_label={} logits={}'. 97 | format(step, epsilon, pre_label,adv_label,self.model.predict(adv_img)[adv_label])) 98 | 99 | adversary.try_accept_the_example(adv_img, adv_label) 100 | 101 | return adversary 102 | 103 | @staticmethod 104 | def _norm(a, ord): 105 | if a.ndim == 1: 106 | return np.linalg.norm(a, ord=ord) 107 | if a.ndim == a.shape[0]: 108 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 109 | norm_axis = 1 110 | else: 111 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 112 | norm_axis = 0 113 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) -------------------------------------------------------------------------------- /A3C/Result_viewer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "path = 'figures/BankHeistNoFrameskip-v4_robust/'" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "852.0\n", 31 | "[7.33484833]\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "nominal = np.load(path+'nominal.npy')\n", 37 | "print(np.mean(nominal))\n", 38 | "sem = np.std(nominal)/np.sqrt(np.shape(nominal))\n", 39 | "print(sem)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 52 | "[846. 863.5 854. 502.5]\n", 53 | "[ 3.63318042 9.59882805 8.0746517 61.91879763]\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "eps = np.load(path + 'fgsm_epsilons.npy')\n", 59 | "print(eps)\n", 60 | "vals = np.load(path + 'fgsm.npy')\n", 61 | "print(np.mean(vals, axis=1))\n", 62 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 63 | "print(sem)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 76 | "[855. 848. 827. 4.]\n", 77 | "[7.29725976 3.84707681 6.00416522 1.78885438]\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "eps = np.load(path + 'pgd_epsilons.npy')\n", 83 | "print(eps)\n", 84 | "vals = np.load(path + 'pgd.npy')\n", 85 | "print(np.mean(vals, axis=1))\n", 86 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 87 | "print(sem)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 100 | "[856. 832.5 1.5 0. ]\n", 101 | "[10.10940156 4.05431869 0.79843597 0. ]\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "eps = np.load(path + 'greedy_wc_epsilons.npy')\n", 107 | "print(eps)\n", 108 | "vals = np.load(path + 'greedy_wc.npy')\n", 109 | "print(np.mean(vals, axis=1))\n", 110 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 111 | "print(sem)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 124 | "[0.99955922 0.99091493 0. 0. ]\n", 125 | "[0.00015696 0.00174197 0. 0. ]\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "eps = np.load(path + 'acr_epsilons.npy')\n", 131 | "print(eps)\n", 132 | "vals = np.load(path + 'acr.npy')\n", 133 | "print(np.mean(vals, axis=1))\n", 134 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 135 | "print(sem)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 8, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "[0.01 0.02 0.05 0.1 ]\n", 148 | "[259.5 146. 54. 45. ]\n", 149 | "[21.18342512 15.03994681 6.5345237 6.38357267]\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "eps = np.load(path + 'action_pert_epsilons.npy')\n", 155 | "print(eps)\n", 156 | "vals = np.load(path + 'action_pert.npy')\n", 157 | "print(np.mean(vals, axis=1))\n", 158 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 159 | "print(sem)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.7.6" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 4 191 | } 192 | -------------------------------------------------------------------------------- /DQN/Result_viewer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#path = 'figures/BankHeistNoFrameskip-v4_2020-05-25_10:08:01_last/' #sadqn 0.02\n", 19 | "#path = 'figures/PongNoFrameskip-v4_2020-05-25_23:17:07_last/'\n", 20 | "#path = 'figures/PongNoFrameskip-v4_2020-05-26_11:55:51_best/'\n", 21 | "#path = 'figures/RoadRunnerNoFrameskip-v4_2020-05-25_10:08:46_last/' #sadqn 0.02\n", 22 | "#path = 'figures/PongNoFrameskip-v4_2020-05-29_13:16:04_last/' #sadqn 0.02\n", 23 | "path = 'figures/RoadRunnerNoFrameskip-v4_robust_ours/'" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "41720.0\n", 36 | "[3288.10431708]\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "nominal = np.load(path+'nominal.npy')\n", 42 | "print(np.mean(nominal))\n", 43 | "sem = np.std(nominal)/np.sqrt(np.shape(nominal))\n", 44 | "print(sem)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 57 | "[42005. 44515. 26055. 1965.]\n", 58 | "[1509.06386545 1570.87992858 1924.58404597 337.88126613]\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "eps = np.load(path + 'fgsm_epsilons.npy')\n", 64 | "print(eps)\n", 65 | "vals = np.load(path + 'fgsm.npy')\n", 66 | "print(np.mean(vals, axis=1))\n", 67 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 68 | "print(sem)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 81 | "[3.909e+04 4.392e+04 1.248e+04 5.000e+00]\n", 82 | "[2168.01637448 1238.23664943 901.23803737 4.87339717]\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "eps = np.load(path + 'pgd_epsilons.npy')\n", 88 | "print(eps)\n", 89 | "vals = np.load(path + 'pgd.npy')\n", 90 | "print(np.mean(vals, axis=1))\n", 91 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 92 | "print(sem)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 105 | "[38885. 33745. 0. 0.]\n", 106 | "[2017.93551681 2349.16447913 0. 0. ]\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "eps = np.load(path + 'greedy_wc_epsilons.npy')\n", 112 | "print(eps)\n", 113 | "vals = np.load(path + 'greedy_wc.npy')\n", 114 | "print(np.mean(vals, axis=1))\n", 115 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 116 | "print(sem)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "[0.00117647 0.00392157 0.01176471 0.03137255]\n", 129 | "[0.88293945 0.70004686 0. 0. ]\n", 130 | "[0.00775984 0.00585064 0. 0. ]\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "eps = np.load(path + 'acr_epsilons.npy')\n", 136 | "print(eps)\n", 137 | "vals = np.load(path + 'acr.npy')\n", 138 | "print(np.mean(vals, axis=1))\n", 139 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 140 | "print(sem)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 8, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "[0.01 0.02 0.05 0.1 ]\n", 153 | "[36070. 34095. 22225. 15345.]\n", 154 | "[2049.47676249 1281.43425504 1599.04463665 1296.23445024]\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "eps = np.load(path + 'action_pert_epsilons.npy')\n", 160 | "print(eps)\n", 161 | "vals = np.load(path + 'action_pert.npy')\n", 162 | "print(np.mean(vals, axis=1))\n", 163 | "sem = np.std(vals, axis=1)/np.sqrt(np.shape(vals)[1])\n", 164 | "print(sem)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.7.6" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 4 196 | } 197 | -------------------------------------------------------------------------------- /A3C/player_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from ibp import network_bounds 6 | 7 | class Agent(object): 8 | def __init__(self, model, env, args, state): 9 | self.model = model 10 | self.env = env 11 | self.state = state 12 | self.eps_len = 0 13 | self.args = args 14 | self.values = [] 15 | self.log_probs = [] 16 | self.rewards = [] 17 | self.entropies = [] 18 | 19 | self.min_log_probs = [] 20 | self.max_log_probs = [] 21 | 22 | self.done = True 23 | self.info = None 24 | self.reward = 0 25 | self.noclip_reward = 0 26 | self.gpu_id = -1 27 | 28 | def action_train(self, bound_epsilon = None): 29 | value, logit = self.model(Variable(self.state.unsqueeze(0))) 30 | prob = torch.clamp(F.softmax(logit, dim=1), 1e-6, 1) 31 | log_prob = torch.clamp(F.log_softmax(logit, dim=1), -30, -1e-6) 32 | entropy = -(log_prob * prob).sum(1) 33 | self.entropies.append(entropy) 34 | #print(prob) 35 | action = prob.multinomial(1).data 36 | #avoid issues with zero 37 | 38 | if bound_epsilon: 39 | upper, lower = network_bounds(self.model.model, Variable(self.state.unsqueeze(0)), 40 | epsilon=bound_epsilon) 41 | upper, lower = upper[:,1:], lower[:,1:] 42 | if self.gpu_id>=0: 43 | with torch.cuda.device(self.gpu_id): 44 | onehot_action = torch.zeros(upper.shape).cuda() 45 | else: 46 | onehot_action = torch.zeros(upper.shape) 47 | onehot_action[range(upper.shape[0]), action] = 1 48 | min_prob = torch.clamp(F.log_softmax(onehot_action*lower+(1-onehot_action)*upper, dim=1), -30, -1e-6) 49 | max_prob = torch.clamp(F.log_softmax((1-onehot_action)*lower+onehot_action*upper, dim=1), -30, -1e-6) 50 | 51 | self.max_log_probs.append(max_prob.gather(1, Variable(action))) 52 | self.min_log_probs.append(min_prob.gather(1, Variable(action))) 53 | log_prob = log_prob.gather(1, Variable(action)) 54 | state, self.noclip_reward, self.done, self.info = self.env.step( 55 | action.cpu().numpy()) 56 | self.state = torch.from_numpy(state).float() 57 | if self.gpu_id >= 0: 58 | with torch.cuda.device(self.gpu_id): 59 | self.state = self.state.cuda() 60 | self.reward = max(min(self.noclip_reward, 1), -1) 61 | self.values.append(value) 62 | self.log_probs.append(log_prob) 63 | self.rewards.append(self.reward) 64 | self.eps_len += 1 65 | return self 66 | 67 | def action_test(self): 68 | with torch.no_grad(): 69 | value, logit= self.model(Variable( 70 | self.state.unsqueeze(0))) 71 | prob = F.softmax(logit, dim=1) 72 | action = prob.max(1)[1].data.cpu().numpy() 73 | state, self.noclip_reward, self.done, self.info = self.env.step(action[0]) 74 | self.reward = max(min(self.noclip_reward, 1), -1) 75 | self.state = torch.from_numpy(state).float() 76 | if self.gpu_id >= 0: 77 | with torch.cuda.device(self.gpu_id): 78 | self.state = self.state.cuda() 79 | self.eps_len += 1 80 | return self 81 | 82 | def action_test_losses(self, bound_epsilon=None): 83 | with torch.no_grad(): 84 | value, logit= self.model(Variable( 85 | self.state.unsqueeze(0))) 86 | prob = torch.clamp(F.softmax(logit, dim=1), 1e-6, 1) 87 | log_prob = torch.clamp(F.log_softmax(logit, dim=1), -30, -1e-6) 88 | entropy = -(log_prob * prob).sum(1) 89 | self.entropies.append(entropy) 90 | 91 | action = prob.argmax(1, keepdim=True).data 92 | 93 | if bound_epsilon: 94 | upper, lower = network_bounds(self.model.model, Variable(self.state.unsqueeze(0)), 95 | epsilon=bound_epsilon) 96 | upper, lower = upper[:,1:], lower[:,1:] 97 | with torch.cuda.device(self.gpu_id): 98 | onehot_action = torch.zeros(upper.shape).cuda() 99 | onehot_action[range(upper.shape[0]), action] = 1 100 | min_prob = torch.clamp(F.log_softmax(onehot_action*lower+(1-onehot_action)*upper, dim=1), -30, -1e-6) 101 | max_prob = torch.clamp(F.log_softmax((1-onehot_action)*lower+onehot_action*upper, dim=1), -30, -1e-6) 102 | 103 | self.max_log_probs.append(max_prob.gather(1, Variable(action))) 104 | self.min_log_probs.append(min_prob.gather(1, Variable(action))) 105 | 106 | log_prob = log_prob.gather(1, Variable(action)) 107 | state, self.noclip_reward, self.done, self.info = self.env.step( 108 | action.cpu().numpy()) 109 | self.reward = max(min(self.noclip_reward, 1), -1) 110 | self.state = torch.from_numpy(state).float() 111 | if self.gpu_id >= 0: 112 | with torch.cuda.device(self.gpu_id): 113 | self.state = self.state.cuda() 114 | 115 | self.values.append(value) 116 | self.log_probs.append(log_prob) 117 | self.rewards.append(self.reward) 118 | self.eps_len += 1 119 | return self 120 | 121 | def clear_actions(self): 122 | self.values = [] 123 | self.log_probs = [] 124 | self.rewards = [] 125 | self.entropies = [] 126 | self.max_log_probs = [] 127 | self.min_log_probs = [] 128 | return self 129 | -------------------------------------------------------------------------------- /A3C/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | os.environ["OMP_NUM_THREADS"] = "1" 4 | import argparse 5 | import torch 6 | import torch.multiprocessing as mp 7 | from environment import atari_env 8 | from utils import read_config 9 | from model import A3Cff 10 | from train import train, train_robust 11 | from test import test 12 | from shared_optim import SharedRMSprop, SharedAdam 13 | #from gym.configuration import undo_logger_setup 14 | import time 15 | 16 | #undo_logger_setup() 17 | parser = argparse.ArgumentParser(description='A3C') 18 | parser.add_argument( 19 | '--lr', 20 | type=float, 21 | default=0.0001, 22 | metavar='LR', 23 | help='learning rate (default: 0.0001)') 24 | parser.add_argument( 25 | '--gamma', 26 | type=float, 27 | default=0.99, 28 | metavar='G', 29 | help='discount factor for rewards (default: 0.99)') 30 | parser.add_argument( 31 | '--tau', 32 | type=float, 33 | default=1.00, 34 | metavar='T', 35 | help='parameter for GAE (default: 1.00)') 36 | parser.add_argument( 37 | '--seed', 38 | type=int, 39 | default=None, 40 | metavar='S', 41 | help='random seed (default: 1)') 42 | parser.add_argument( 43 | '--workers', 44 | type=int, 45 | default=16, 46 | metavar='W', 47 | help='how many training processes to use (default: 16)') 48 | parser.add_argument( 49 | '--num-steps', 50 | type=int, 51 | default=20, 52 | metavar='NS', 53 | help='number of forward steps in A3C (default: 20)') 54 | parser.add_argument( 55 | '--total-frames', 56 | type=int, 57 | default=20000000, 58 | metavar='TS', 59 | help='How many frames to train for before finishing (default: 20000000)') 60 | parser.add_argument( 61 | '--max-episode-length', 62 | type=int, 63 | default=10000, 64 | metavar='M', 65 | help='maximum length of an episode (default: 10000)') 66 | parser.add_argument( 67 | '--env', 68 | default='PongNoFrameskip-v4', 69 | metavar='ENV', 70 | help='environment to train on (default: PongNoFrameskip-v4)') 71 | parser.add_argument( 72 | '--env-config', 73 | default='config.json', 74 | metavar='EC', 75 | help='environment to crop and resize info (default: config.json)') 76 | 77 | parser.add_argument( 78 | '--save-max', 79 | default=True, 80 | metavar='SM', 81 | help='Save model on every test run high score matched or bested') 82 | parser.add_argument( 83 | '--optimizer', 84 | default='Adam', 85 | metavar='OPT', 86 | help='shares optimizer choice of Adam or RMSprop') 87 | parser.add_argument( 88 | '--save-model-dir', 89 | default='trained_models/', 90 | metavar='SMD', 91 | help='folder to save trained models') 92 | parser.add_argument( 93 | '--log-dir', default='logs/', metavar='LG', help='folder to save logs') 94 | parser.add_argument( 95 | '--gpu-ids', 96 | type=int, 97 | default=-1, 98 | nargs='+', 99 | help='GPUs to use [-1 CPU only] (default: -1)') 100 | parser.add_argument( 101 | '--amsgrad', 102 | default=True, 103 | metavar='AM', 104 | help='Adam optimizer amsgrad parameter') 105 | parser.add_argument( 106 | '--skip-rate', 107 | type=int, 108 | default=4, 109 | metavar='SR', 110 | help='frame skip rate (default: 4)') 111 | parser.add_argument( 112 | '--kappa-end', 113 | type=float, 114 | default=0.5, 115 | metavar='SR', 116 | help='final value of the variable controlling importance of standard loss (default: 0.5)') 117 | parser.add_argument('--robust', 118 | dest='robust', 119 | action='store_true', 120 | help='train the model to be verifiably robust') 121 | parser.add_argument( 122 | '--load-path', 123 | default=None, 124 | help='Path to load a model from. By default starts training a new model') 125 | parser.add_argument( 126 | '--epsilon-end', 127 | type=float, 128 | default= 1/255, 129 | metavar='EPS', 130 | help='max size of perturbation trained on') 131 | 132 | parser.set_defaults(robust=False) 133 | 134 | 135 | if __name__ == '__main__': 136 | args = parser.parse_args() 137 | 138 | if not os.path.exists(args.log_dir): 139 | os.mkdir(args.log_dir) 140 | if not os.path.exists(args.save_model_dir): 141 | os.mkdir(args.save_model_dir) 142 | 143 | if args.seed: 144 | torch.manual_seed(args.seed) 145 | if args.gpu_ids == -1: 146 | args.gpu_ids = [-1] 147 | else: 148 | if args.seed: 149 | torch.cuda.manual_seed(args.seed) 150 | mp.set_start_method('spawn') 151 | setup_json = read_config(args.env_config) 152 | env_conf = setup_json["Default"] 153 | for i in setup_json.keys(): 154 | if i in args.env: 155 | env_conf = setup_json[i] 156 | env = atari_env(args.env, env_conf, args) 157 | shared_model = A3Cff(env.observation_space.shape[0], env.action_space) 158 | if args.load_path: 159 | 160 | saved_state = torch.load(args.load_path, 161 | map_location=lambda storage, loc: storage) 162 | shared_model.load_state_dict(saved_state) 163 | 164 | shared_model.share_memory() 165 | 166 | if args.optimizer == 'RMSprop': 167 | optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) 168 | if args.optimizer == 'Adam': 169 | optimizer = SharedAdam( 170 | shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) 171 | optimizer.share_memory() 172 | 173 | processes = [] 174 | 175 | p = mp.Process(target=test, args=(args, shared_model, optimizer, env_conf)) 176 | p.start() 177 | processes.append(p) 178 | time.sleep(0.1) 179 | for rank in range(0, args.workers): 180 | if args.robust: 181 | p = mp.Process(target=train_robust, args=(rank, args, shared_model, optimizer, env_conf)) 182 | else: 183 | p = mp.Process(target=train, args=(rank, args, shared_model, optimizer, env_conf)) 184 | p.start() 185 | processes.append(p) 186 | time.sleep(0.1) 187 | for p in processes: 188 | time.sleep(0.1) 189 | p.join() 190 | -------------------------------------------------------------------------------- /A3C/adv_attacks/adversary.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Defines a class that contains the original object, the target and the 17 | adversarial example. 18 | 19 | """ 20 | import numpy as np 21 | import logging 22 | logger=logging.getLogger(__name__) 23 | 24 | class Adversary(object): 25 | """ 26 | Adversary contains the original object, the target and the adversarial 27 | example. 28 | """ 29 | 30 | def __init__(self, original, original_label=None): 31 | """ 32 | :param original: The original instance, such as an image. 33 | :param original_label: The original instance's label. 34 | """ 35 | assert original is not None 36 | 37 | self.original_label = original_label 38 | #定向攻击的目标 39 | self.target_label = None 40 | self.adversarial_label = None 41 | #保存原始数据 强制拷贝 42 | self.__original = np.copy(original) 43 | self.__target = None 44 | self.__is_targeted_attack = False 45 | #保存生成的对抗样本 46 | self.__adversarial_example = None 47 | self.__bad_adversarial_example = None 48 | 49 | def set_target(self, is_targeted_attack, target=None, target_label=None): 50 | """ 51 | Set the target be targeted or untargeted. 52 | 53 | :param is_targeted_attack: bool 54 | :param target: The target. 55 | :param target_label: If is_targeted_attack is true and target_label is 56 | None, self.target_label will be set by the Attack class. 57 | If is_targeted_attack is false, target_label must be None. 58 | """ 59 | assert (target_label is None) or is_targeted_attack 60 | self.__is_targeted_attack = is_targeted_attack 61 | self.target_label = target_label 62 | self.__target = target 63 | if not is_targeted_attack: 64 | self.target_label = None 65 | self.__target = None 66 | 67 | def set_original(self, original, original_label=None): 68 | """ 69 | Reset the original. 70 | 71 | :param original: Original instance. 72 | :param original_label: Original instance's label. 73 | """ 74 | if original != self.__original: 75 | self.__original = original 76 | self.original_label = original_label 77 | self.__adversarial_example = None 78 | self.__bad_adversarial_example = None 79 | if original is None: 80 | self.original_label = None 81 | 82 | def _is_successful(self, adversarial_label): 83 | """ 84 | Is the adversarial_label is the expected adversarial label. 85 | 86 | :param adversarial_label: adversarial label. 87 | :return: bool 88 | """ 89 | if self.target_label is not None: 90 | return adversarial_label == self.target_label 91 | else: 92 | return (adversarial_label is not None) and \ 93 | (adversarial_label != self.original_label) 94 | 95 | def is_successful(self): 96 | """ 97 | Has the adversarial example been found. 98 | 99 | :return: bool 100 | """ 101 | return self._is_successful(self.adversarial_label) 102 | 103 | def try_accept_the_example(self, adversarial_example, adversarial_label): 104 | """ 105 | If adversarial_label the target label that we are finding. 106 | The adversarial_example and adversarial_label will be accepted and 107 | True will be returned. 108 | 109 | :return: bool 110 | """ 111 | assert adversarial_example is not None 112 | assert self.__original.shape == adversarial_example.shape 113 | 114 | ok = self._is_successful(adversarial_label) 115 | if ok: 116 | self.__adversarial_example = np.copy(adversarial_example) 117 | self.adversarial_label = adversarial_label 118 | else: 119 | self.__bad_adversarial_example = np.copy(adversarial_example) 120 | return ok 121 | 122 | def perturbation(self, multiplying_factor=1.0): 123 | """ 124 | The perturbation that the adversarial_example is added. 125 | 126 | :param multiplying_factor: float. 127 | :return: The perturbation that is multiplied by multiplying_factor. 128 | """ 129 | assert self.__original is not None 130 | assert (self.__adversarial_example is not None) or \ 131 | (self.__bad_adversarial_example is not None) 132 | if self.__adversarial_example is not None: 133 | return multiplying_factor * ( 134 | self.__adversarial_example - self.__original) 135 | else: 136 | return multiplying_factor * ( 137 | self.__bad_adversarial_example - self.__original) 138 | 139 | @property 140 | def is_targeted_attack(self): 141 | """ 142 | :property: is_targeted_attack 143 | """ 144 | return self.__is_targeted_attack 145 | 146 | @property 147 | def target(self): 148 | """ 149 | :property: target 150 | """ 151 | return self.__target 152 | 153 | @property 154 | def original(self): 155 | """ 156 | :property: original 157 | """ 158 | return self.__original 159 | 160 | @property 161 | def adversarial_example(self): 162 | """ 163 | :property: adversarial_example 164 | """ 165 | return self.__adversarial_example 166 | 167 | @property 168 | def bad_adversarial_example(self): 169 | """ 170 | :property: bad_adversarial_example 171 | """ 172 | return self.__bad_adversarial_example 173 | -------------------------------------------------------------------------------- /DQN/adv_attacks/adversary.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Defines a class that contains the original object, the target and the 17 | adversarial example. 18 | 19 | """ 20 | import numpy as np 21 | import logging 22 | logger=logging.getLogger(__name__) 23 | 24 | class Adversary(object): 25 | """ 26 | Adversary contains the original object, the target and the adversarial 27 | example. 28 | """ 29 | 30 | def __init__(self, original, original_label=None): 31 | """ 32 | :param original: The original instance, such as an image. 33 | :param original_label: The original instance's label. 34 | """ 35 | assert original is not None 36 | 37 | self.original_label = original_label 38 | #定向攻击的目标 39 | self.target_label = None 40 | self.adversarial_label = None 41 | #保存原始数据 强制拷贝 42 | self.__original = np.copy(original) 43 | self.__target = None 44 | self.__is_targeted_attack = False 45 | #保存生成的对抗样本 46 | self.__adversarial_example = None 47 | self.__bad_adversarial_example = None 48 | 49 | def set_target(self, is_targeted_attack, target=None, target_label=None): 50 | """ 51 | Set the target be targeted or untargeted. 52 | 53 | :param is_targeted_attack: bool 54 | :param target: The target. 55 | :param target_label: If is_targeted_attack is true and target_label is 56 | None, self.target_label will be set by the Attack class. 57 | If is_targeted_attack is false, target_label must be None. 58 | """ 59 | assert (target_label is None) or is_targeted_attack 60 | self.__is_targeted_attack = is_targeted_attack 61 | self.target_label = target_label 62 | self.__target = target 63 | if not is_targeted_attack: 64 | self.target_label = None 65 | self.__target = None 66 | 67 | def set_original(self, original, original_label=None): 68 | """ 69 | Reset the original. 70 | 71 | :param original: Original instance. 72 | :param original_label: Original instance's label. 73 | """ 74 | if original != self.__original: 75 | self.__original = original 76 | self.original_label = original_label 77 | self.__adversarial_example = None 78 | self.__bad_adversarial_example = None 79 | if original is None: 80 | self.original_label = None 81 | 82 | def _is_successful(self, adversarial_label): 83 | """ 84 | Is the adversarial_label is the expected adversarial label. 85 | 86 | :param adversarial_label: adversarial label. 87 | :return: bool 88 | """ 89 | if self.target_label is not None: 90 | return adversarial_label == self.target_label 91 | else: 92 | return (adversarial_label is not None) and \ 93 | (adversarial_label != self.original_label) 94 | 95 | def is_successful(self): 96 | """ 97 | Has the adversarial example been found. 98 | 99 | :return: bool 100 | """ 101 | return self._is_successful(self.adversarial_label) 102 | 103 | def try_accept_the_example(self, adversarial_example, adversarial_label): 104 | """ 105 | If adversarial_label the target label that we are finding. 106 | The adversarial_example and adversarial_label will be accepted and 107 | True will be returned. 108 | 109 | :return: bool 110 | """ 111 | assert adversarial_example is not None 112 | assert self.__original.shape == adversarial_example.shape 113 | 114 | ok = self._is_successful(adversarial_label) 115 | if ok: 116 | self.__adversarial_example = np.copy(adversarial_example) 117 | self.adversarial_label = adversarial_label 118 | else: 119 | self.__bad_adversarial_example = np.copy(adversarial_example) 120 | return ok 121 | 122 | def perturbation(self, multiplying_factor=1.0): 123 | """ 124 | The perturbation that the adversarial_example is added. 125 | 126 | :param multiplying_factor: float. 127 | :return: The perturbation that is multiplied by multiplying_factor. 128 | """ 129 | assert self.__original is not None 130 | assert (self.__adversarial_example is not None) or \ 131 | (self.__bad_adversarial_example is not None) 132 | if self.__adversarial_example is not None: 133 | return multiplying_factor * ( 134 | self.__adversarial_example - self.__original) 135 | else: 136 | return multiplying_factor * ( 137 | self.__bad_adversarial_example - self.__original) 138 | 139 | @property 140 | def is_targeted_attack(self): 141 | """ 142 | :property: is_targeted_attack 143 | """ 144 | return self.__is_targeted_attack 145 | 146 | @property 147 | def target(self): 148 | """ 149 | :property: target 150 | """ 151 | return self.__target 152 | 153 | @property 154 | def original(self): 155 | """ 156 | :property: original 157 | """ 158 | return self.__original 159 | 160 | @property 161 | def adversarial_example(self): 162 | """ 163 | :property: adversarial_example 164 | """ 165 | return self.__adversarial_example 166 | 167 | @property 168 | def bad_adversarial_example(self): 169 | """ 170 | :property: bad_adversarial_example 171 | """ 172 | return self.__bad_adversarial_example 173 | -------------------------------------------------------------------------------- /DQN/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import argparse 4 | import torch 5 | from environment import atari_env 6 | from utils import read_config 7 | from model import CnnDQN 8 | from train import train 9 | #from gym.configuration import undo_logger_setup 10 | import time 11 | 12 | #undo_logger_setup() 13 | parser = argparse.ArgumentParser(description='A3C') 14 | parser.add_argument( 15 | '--lr', 16 | type=float, 17 | default=0.000125, 18 | metavar='LR', 19 | help='learning rate (default: 0.000125)') 20 | parser.add_argument( 21 | '--gamma', 22 | type=float, 23 | default=0.99, 24 | metavar='G', 25 | help='discount factor for rewards (default: 0.99)') 26 | parser.add_argument( 27 | '--seed', 28 | type=int, 29 | default=None, 30 | metavar='S', 31 | help='random seed (default: None)') 32 | parser.add_argument( 33 | '--total-frames', 34 | type=int, 35 | default=6000000, 36 | metavar='TS', 37 | help='How many frames to train with') 38 | parser.add_argument( 39 | '--max-episode-length', 40 | type=int, 41 | default=10000, 42 | metavar='M', 43 | help='maximum length of an episode (default: 10000)') 44 | parser.add_argument( 45 | '--env', 46 | default='PongNoFrameskip-v4', 47 | metavar='ENV', 48 | help='environment to train on (default: PongNoFrameskip-v4)') 49 | parser.add_argument( 50 | '--env-config', 51 | default='config.json', 52 | metavar='EC', 53 | help='environment to crop and resize info (default: config.json)') 54 | 55 | parser.add_argument( 56 | '--save-max', 57 | default=True, 58 | metavar='SM', 59 | help='Save model on every test run high score matched or bested') 60 | parser.add_argument( 61 | '--optimizer', 62 | default='Adam', 63 | metavar='OPT', 64 | help='optimizer to use, one of {Adam, RMSprop}') 65 | parser.add_argument( 66 | '--save-model-dir', 67 | default='trained_models/', 68 | metavar='SMD', 69 | help='folder to save trained models') 70 | parser.add_argument( 71 | '--log-dir', default='logs/', metavar='LG', help='folder to save logs') 72 | parser.add_argument( 73 | '--gpu-id', 74 | type=int, 75 | default=-1, 76 | help='GPUs to use [-1 CPU only] (default: -1)') 77 | parser.add_argument( 78 | '--amsgrad', 79 | default=False, 80 | metavar='AM', 81 | help='Adam optimizer amsgrad parameter') 82 | parser.add_argument( 83 | '--worse-bound', 84 | default=True, 85 | help='if this is selected worst case loss uses bound that is further away from mean') 86 | 87 | parser.add_argument( 88 | '--skip-rate', 89 | type=int, 90 | default=4, 91 | metavar='SR', 92 | help='frame skip rate (default: 4)') 93 | parser.add_argument( 94 | '--kappa-end', 95 | type=float, 96 | default=0.5, 97 | metavar='SR', 98 | help='final value of the variable controlling importance of standard loss (default: 0.5)') 99 | parser.add_argument('--robust', 100 | dest='robust', 101 | action='store_true', 102 | help='train the model to be verifiably robust') 103 | parser.add_argument( 104 | '--load-path', 105 | type=str, 106 | default=None, 107 | help='Path to load a model from. By default starts training a new model') 108 | 109 | parser.add_argument( 110 | '--attack-epsilon-end', 111 | type=float, 112 | default=1/255, 113 | metavar='EPS', 114 | help='max size of perturbation trained on') 115 | parser.add_argument( 116 | '--attack-epsilon-schedule', 117 | type=int, 118 | default=3000000, 119 | help='The frame by which to reach final perturbation') 120 | parser.add_argument( 121 | '--exp-epsilon-end', 122 | type=float, 123 | default=0.01, 124 | help='for epsilon-greedy exploration') 125 | parser.add_argument( 126 | '--exp-epsilon-decay', 127 | type=int, 128 | default=500000, 129 | help='controls linear decay of exploration epsilon') 130 | parser.add_argument( 131 | '--replay-initial', 132 | type=int, 133 | default=50000, 134 | help='How many frames of experience to collect before starting to learn') 135 | parser.add_argument( 136 | '--batch-size', 137 | type=int, 138 | default=128, 139 | help='Batch size for updating agent') 140 | parser.add_argument( 141 | '--updates-per-frame', 142 | type=int, 143 | default=32, 144 | help='How many gradient updates per new frame') 145 | parser.add_argument( 146 | '--buffer-size', 147 | type=int, 148 | default=200000, 149 | help='How frames to store in replay buffer') 150 | 151 | 152 | parser.set_defaults(robust=False) 153 | 154 | if __name__ == '__main__': 155 | args = parser.parse_args() 156 | if args.seed: 157 | torch.manual_seed(args.seed) 158 | if args.gpu_id>=0: 159 | torch.cuda.manual_seed(args.seed) 160 | setup_json = read_config(args.env_config) 161 | env_conf = setup_json["Default"] 162 | for i in setup_json.keys(): 163 | if i in args.env: 164 | env_conf = setup_json[i] 165 | env = atari_env(args.env, env_conf, args) 166 | curr_model = CnnDQN(env.observation_space.shape[0], env.action_space) 167 | 168 | if not os.path.exists(args.log_dir): 169 | os.mkdir(args.log_dir) 170 | if not os.path.exists(args.save_model_dir): 171 | os.mkdir(args.save_model_dir) 172 | 173 | if args.load_path: 174 | saved_state = torch.load( 175 | args.load_path, 176 | map_location=lambda storage, loc: storage) 177 | curr_model.load_state_dict(saved_state) 178 | 179 | target_model = CnnDQN(env.observation_space.shape[0], env.action_space) 180 | target_model.load_state_dict(curr_model.state_dict()) 181 | if args.gpu_id >= 0: 182 | with torch.cuda.device(args.gpu_id): 183 | curr_model.cuda() 184 | target_model.cuda() 185 | 186 | if args.optimizer == 'RMSprop': 187 | optimizer = torch.optim.RMSprop(curr_model.parameters(), lr=args.lr, momentum=0.95, alpha=0.95, eps=1e-2) 188 | elif args.optimizer == 'Adam': 189 | optimizer = torch.optim.Adam(curr_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) 190 | 191 | train(curr_model, target_model, env, optimizer, args) 192 | 193 | -------------------------------------------------------------------------------- /A3C/environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import gym 3 | import numpy as np 4 | from collections import deque 5 | from gym.spaces.box import Box 6 | #from skimage.color import rgb2gray 7 | from cv2 import resize 8 | #from skimage.transform import resize 9 | #from scipy.misc import imresize as resize 10 | import random 11 | 12 | 13 | def atari_env(env_id, env_conf, args): 14 | env = gym.make(env_id) 15 | if 'NoFrameskip' in env_id: 16 | assert 'NoFrameskip' in env.spec.id 17 | env._max_episode_steps = args.max_episode_length * args.skip_rate 18 | env = NoopResetEnv(env, noop_max=30) 19 | env = MaxAndSkipEnv(env, skip=args.skip_rate) 20 | else: 21 | env._max_episode_steps = args.max_episode_length 22 | env = EpisodicLifeEnv(env) 23 | if 'FIRE' in env.unwrapped.get_action_meanings(): 24 | env = FireResetEnv(env) 25 | env._max_episode_steps = args.max_episode_length 26 | env = AtariRescale(env, env_conf) 27 | return env 28 | 29 | 30 | def process_frame(frame, conf): 31 | frame = frame[conf["crop1"]:conf["crop2"] + 160, :160] 32 | frame = frame.mean(2) 33 | frame = frame.astype(np.float32) 34 | frame *= (1.0 / 255.0) 35 | frame = resize(frame, (80, conf["dimension2"])) 36 | frame = resize(frame, (80, 80)) 37 | frame = np.reshape(frame, [1, 80, 80]) 38 | return frame 39 | 40 | 41 | class AtariRescale(gym.ObservationWrapper): 42 | def __init__(self, env, env_conf): 43 | gym.ObservationWrapper.__init__(self, env) 44 | self.observation_space = Box(0.0, 1.0, [1, 80, 80], dtype=np.uint8) 45 | self.conf = env_conf 46 | 47 | def observation(self, observation): 48 | return process_frame(observation, self.conf) 49 | 50 | 51 | 52 | class NoopResetEnv(gym.Wrapper): 53 | def __init__(self, env, noop_max=30): 54 | """Sample initial states by taking random number of no-ops on reset. 55 | No-op is assumed to be action 0. 56 | """ 57 | gym.Wrapper.__init__(self, env) 58 | self.noop_max = noop_max 59 | self.override_num_noops = None 60 | self.noop_action = 0 61 | assert env.unwrapped.get_action_meanings()[0] == 'NOOP' 62 | 63 | def reset(self, **kwargs): 64 | """ Do no-op action for a number of steps in [1, noop_max].""" 65 | self.env.reset(**kwargs) 66 | if self.override_num_noops is not None: 67 | noops = self.override_num_noops 68 | else: 69 | noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101 70 | assert noops > 0 71 | obs = None 72 | for _ in range(noops): 73 | obs, _, done, _ = self.env.step(self.noop_action) 74 | if done: 75 | obs = self.env.reset(**kwargs) 76 | return obs 77 | 78 | def step(self, ac): 79 | return self.env.step(ac) 80 | 81 | 82 | class FireResetEnv(gym.Wrapper): 83 | def __init__(self, env): 84 | """Take action on reset for environments that are fixed until firing.""" 85 | gym.Wrapper.__init__(self, env) 86 | assert env.unwrapped.get_action_meanings()[1] == 'FIRE' 87 | assert len(env.unwrapped.get_action_meanings()) >= 3 88 | 89 | def reset(self, **kwargs): 90 | self.env.reset(**kwargs) 91 | obs, _, done, _ = self.env.step(1) 92 | if done: 93 | self.env.reset(**kwargs) 94 | obs, _, done, _ = self.env.step(2) 95 | if done: 96 | self.env.reset(**kwargs) 97 | return obs 98 | 99 | def step(self, ac): 100 | return self.env.step(ac) 101 | 102 | 103 | class EpisodicLifeEnv(gym.Wrapper): 104 | def __init__(self, env): 105 | """Make end-of-life == end-of-episode, but only reset on true game over. 106 | Done by DeepMind for the DQN and co. since it helps value estimation. 107 | """ 108 | gym.Wrapper.__init__(self, env) 109 | self.lives = 0 110 | self.was_real_done = True 111 | 112 | def step(self, action): 113 | obs, reward, done, info = self.env.step(action) 114 | self.was_real_done = done 115 | # check current lives, make loss of life terminal, 116 | # then update lives to handle bonus lives 117 | lives = self.env.unwrapped.ale.lives() 118 | if lives < self.lives and lives > 0: 119 | # for Qbert sometimes we stay in lives == 0 condtion for a few frames 120 | # so its important to keep lives > 0, so that we only reset once 121 | # the environment advertises done. 122 | done = True 123 | self.lives = lives 124 | return obs, reward, done, self.was_real_done 125 | 126 | def reset(self, **kwargs): 127 | """Reset only when lives are exhausted. 128 | This way all states are still reachable even though lives are episodic, 129 | and the learner need not know about any of this behind-the-scenes. 130 | """ 131 | if self.was_real_done: 132 | obs = self.env.reset(**kwargs) 133 | else: 134 | # no-op step to advance from terminal/lost life state 135 | obs, _, _, _ = self.env.step(0) 136 | self.lives = self.env.unwrapped.ale.lives() 137 | return obs 138 | 139 | 140 | class MaxAndSkipEnv(gym.Wrapper): 141 | def __init__(self, env=None, skip=4): 142 | """Return only every `skip`-th frame""" 143 | super(MaxAndSkipEnv, self).__init__(env) 144 | # most recent raw observations (for max pooling across time steps) 145 | self._obs_buffer = deque(maxlen=3) 146 | self._skip = skip 147 | 148 | def step(self, action): 149 | total_reward = 0.0 150 | done = None 151 | for _ in range(self._skip): 152 | obs, reward, done, info = self.env.step(action) 153 | self._obs_buffer.append(obs) 154 | total_reward += reward 155 | if done: 156 | break 157 | 158 | max_frame = np.max(np.stack(self._obs_buffer), axis=0) 159 | 160 | return max_frame, total_reward, done, info 161 | 162 | def reset(self, **kwargs): 163 | """Clear past frame buffer and init. to first obs. from inner env.""" 164 | self._obs_buffer.clear() 165 | obs = self.env.reset(**kwargs) 166 | self._obs_buffer.append(obs) 167 | return obs 168 | 169 | -------------------------------------------------------------------------------- /DQN/environment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import gym 3 | import numpy as np 4 | from collections import deque 5 | from gym.spaces.box import Box 6 | #from skimage.color import rgb2gray 7 | from cv2 import resize 8 | #from skimage.transform import resize 9 | #from scipy.misc import imresize as resize 10 | import random 11 | 12 | 13 | def atari_env(env_id, env_conf, args): 14 | env = gym.make(env_id) 15 | if 'NoFrameskip' in env_id: 16 | assert 'NoFrameskip' in env.spec.id 17 | env._max_episode_steps = args.max_episode_length * args.skip_rate 18 | env = NoopResetEnv(env, noop_max=30) 19 | env = MaxAndSkipEnv(env, skip=args.skip_rate) 20 | else: 21 | env._max_episode_steps = args.max_episode_length 22 | env = EpisodicLifeEnv(env) 23 | if 'FIRE' in env.unwrapped.get_action_meanings(): 24 | env = FireResetEnv(env) 25 | env._max_episode_steps = args.max_episode_length 26 | env = AtariRescale(env, env_conf) 27 | return env 28 | 29 | 30 | def process_frame(frame, conf): 31 | frame = frame[conf["crop1"]:conf["crop2"] + 160, :160] 32 | frame = frame.mean(2) 33 | frame = frame.astype(np.float32) 34 | frame *= (1.0 / 255.0) 35 | frame = resize(frame, (80, conf["dimension2"])) 36 | frame = resize(frame, (80, 80)) 37 | frame = np.reshape(frame, [1, 80, 80]) 38 | return frame 39 | 40 | 41 | class AtariRescale(gym.ObservationWrapper): 42 | def __init__(self, env, env_conf): 43 | gym.ObservationWrapper.__init__(self, env) 44 | self.observation_space = Box(0.0, 1.0, [1, 80, 80], dtype=np.uint8) 45 | self.conf = env_conf 46 | 47 | def observation(self, observation): 48 | return process_frame(observation, self.conf) 49 | 50 | 51 | 52 | class NoopResetEnv(gym.Wrapper): 53 | def __init__(self, env, noop_max=30): 54 | """Sample initial states by taking random number of no-ops on reset. 55 | No-op is assumed to be action 0. 56 | """ 57 | gym.Wrapper.__init__(self, env) 58 | self.noop_max = noop_max 59 | self.override_num_noops = None 60 | self.noop_action = 0 61 | assert env.unwrapped.get_action_meanings()[0] == 'NOOP' 62 | 63 | def reset(self, **kwargs): 64 | """ Do no-op action for a number of steps in [1, noop_max].""" 65 | self.env.reset(**kwargs) 66 | if self.override_num_noops is not None: 67 | noops = self.override_num_noops 68 | else: 69 | noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101 70 | assert noops > 0 71 | obs = None 72 | for _ in range(noops): 73 | obs, _, done, _ = self.env.step(self.noop_action) 74 | if done: 75 | obs = self.env.reset(**kwargs) 76 | return obs 77 | 78 | def step(self, ac): 79 | return self.env.step(ac) 80 | 81 | 82 | class FireResetEnv(gym.Wrapper): 83 | def __init__(self, env): 84 | """Take action on reset for environments that are fixed until firing.""" 85 | gym.Wrapper.__init__(self, env) 86 | assert env.unwrapped.get_action_meanings()[1] == 'FIRE' 87 | assert len(env.unwrapped.get_action_meanings()) >= 3 88 | 89 | def reset(self, **kwargs): 90 | self.env.reset(**kwargs) 91 | obs, _, done, _ = self.env.step(1) 92 | if done: 93 | self.env.reset(**kwargs) 94 | obs, _, done, _ = self.env.step(2) 95 | if done: 96 | self.env.reset(**kwargs) 97 | return obs 98 | 99 | def step(self, ac): 100 | return self.env.step(ac) 101 | 102 | 103 | class EpisodicLifeEnv(gym.Wrapper): 104 | def __init__(self, env): 105 | """Make end-of-life == end-of-episode, but only reset on true game over. 106 | Done by DeepMind for the DQN and co. since it helps value estimation. 107 | """ 108 | gym.Wrapper.__init__(self, env) 109 | self.lives = 0 110 | self.was_real_done = True 111 | 112 | def step(self, action): 113 | obs, reward, done, info = self.env.step(action) 114 | self.was_real_done = done 115 | # check current lives, make loss of life terminal, 116 | # then update lives to handle bonus lives 117 | lives = self.env.unwrapped.ale.lives() 118 | if lives < self.lives and lives > 0: 119 | # for Qbert sometimes we stay in lives == 0 condtion for a few frames 120 | # so its important to keep lives > 0, so that we only reset once 121 | # the environment advertises done. 122 | done = True 123 | self.lives = lives 124 | return obs, reward, done, self.was_real_done 125 | 126 | def reset(self, **kwargs): 127 | """Reset only when lives are exhausted. 128 | This way all states are still reachable even though lives are episodic, 129 | and the learner need not know about any of this behind-the-scenes. 130 | """ 131 | if self.was_real_done: 132 | obs = self.env.reset(**kwargs) 133 | else: 134 | # no-op step to advance from terminal/lost life state 135 | obs, _, _, _ = self.env.step(0) 136 | self.lives = self.env.unwrapped.ale.lives() 137 | return obs 138 | 139 | 140 | class MaxAndSkipEnv(gym.Wrapper): 141 | def __init__(self, env=None, skip=4): 142 | """Return only every `skip`-th frame""" 143 | super(MaxAndSkipEnv, self).__init__(env) 144 | # most recent raw observations (for max pooling across time steps) 145 | self._obs_buffer = deque(maxlen=3) 146 | self._skip = skip 147 | 148 | def step(self, action): 149 | total_reward = 0.0 150 | done = None 151 | for _ in range(self._skip): 152 | obs, reward, done, info = self.env.step(action) 153 | self._obs_buffer.append(obs) 154 | total_reward += reward 155 | if done: 156 | break 157 | 158 | max_frame = np.max(np.stack(self._obs_buffer), axis=0) 159 | 160 | return max_frame, total_reward, done, info 161 | 162 | def reset(self, **kwargs): 163 | """Clear past frame buffer and init. to first obs. from inner env.""" 164 | self._obs_buffer.clear() 165 | obs = self.env.reset(**kwargs) 166 | self._obs_buffer.append(obs) 167 | return obs 168 | 169 | -------------------------------------------------------------------------------- /A3C/shared_optim.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import math 3 | import torch 4 | import torch.optim as optim 5 | from collections import defaultdict 6 | 7 | 8 | class SharedRMSprop(optim.Optimizer): 9 | """Implements RMSprop algorithm with shared states. 10 | """ 11 | 12 | def __init__(self, 13 | params, 14 | lr=7e-4, 15 | alpha=0.99, 16 | eps=0.1, 17 | weight_decay=0, 18 | momentum=0, 19 | centered=False): 20 | defaults = defaultdict( 21 | lr=lr, 22 | alpha=alpha, 23 | eps=eps, 24 | weight_decay=weight_decay, 25 | momentum=momentum, 26 | centered=centered) 27 | super(SharedRMSprop, self).__init__(params, defaults) 28 | 29 | for group in self.param_groups: 30 | for p in group['params']: 31 | state = self.state[p] 32 | state['step'] = torch.zeros(1) 33 | state['grad_avg'] = p.data.new().resize_as_(p.data).zero_() 34 | state['square_avg'] = p.data.new().resize_as_(p.data).zero_() 35 | state['momentum_buffer'] = p.data.new().resize_as_( 36 | p.data).zero_() 37 | 38 | def share_memory(self): 39 | for group in self.param_groups: 40 | for p in group['params']: 41 | state = self.state[p] 42 | state['square_avg'].share_memory_() 43 | state['step'].share_memory_() 44 | state['grad_avg'].share_memory_() 45 | state['momentum_buffer'].share_memory_() 46 | 47 | def step(self, closure=None): 48 | """Performs a single optimization step. 49 | Arguments: 50 | closure (callable, optional): A closure that reevaluates the model 51 | and returns the loss. 52 | """ 53 | loss = None 54 | if closure is not None: 55 | loss = closure() 56 | 57 | for group in self.param_groups: 58 | for p in group['params']: 59 | if p.grad is None: 60 | continue 61 | grad = p.grad.data 62 | if grad.is_sparse: 63 | raise RuntimeError( 64 | 'RMSprop does not support sparse gradients') 65 | state = self.state[p] 66 | 67 | square_avg = state['square_avg'] 68 | alpha = group['alpha'] 69 | 70 | state['step'] += 1 71 | 72 | if group['weight_decay'] != 0: 73 | grad = grad.add(group['weight_decay'], p.data) 74 | 75 | square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) 76 | 77 | if group['centered']: 78 | grad_avg = state['grad_avg'] 79 | grad_avg.mul_(alpha).add_(1 - alpha, grad) 80 | avg = square_avg.addcmul(-1, grad_avg, 81 | grad_avg).sqrt().add_( 82 | group['eps']) 83 | else: 84 | avg = square_avg.sqrt().add_(group['eps']) 85 | 86 | if group['momentum'] > 0: 87 | buf = state['momentum_buffer'] 88 | buf.mul_(group['momentum']).addcdiv_(grad, avg) 89 | p.data.add_(-group['lr'], buf) 90 | else: 91 | p.data.addcdiv_(-group['lr'], grad, avg) 92 | 93 | return loss 94 | 95 | 96 | class SharedAdam(optim.Optimizer): 97 | """Implements Adam algorithm with shared states. 98 | """ 99 | 100 | def __init__(self, 101 | params, 102 | lr=1e-3, 103 | betas=(0.9, 0.999), 104 | eps=1e-3, 105 | weight_decay=0, 106 | amsgrad=False): 107 | defaults = defaultdict( 108 | lr=lr, 109 | betas=betas, 110 | eps=eps, 111 | weight_decay=weight_decay, 112 | amsgrad=amsgrad) 113 | super(SharedAdam, self).__init__(params, defaults) 114 | 115 | for group in self.param_groups: 116 | for p in group['params']: 117 | state = self.state[p] 118 | state['step'] = torch.zeros(1) 119 | state['exp_avg'] = p.data.new().resize_as_(p.data).zero_() 120 | state['exp_avg_sq'] = p.data.new().resize_as_(p.data).zero_() 121 | state['max_exp_avg_sq'] = p.data.new().resize_as_( 122 | p.data).zero_() 123 | 124 | def share_memory(self): 125 | for group in self.param_groups: 126 | for p in group['params']: 127 | state = self.state[p] 128 | state['step'].share_memory_() 129 | state['exp_avg'].share_memory_() 130 | state['exp_avg_sq'].share_memory_() 131 | state['max_exp_avg_sq'].share_memory_() 132 | 133 | def step(self, closure=None): 134 | """Performs a single optimization step. 135 | Arguments: 136 | closure (callable, optional): A closure that reevaluates the model 137 | and returns the loss. 138 | """ 139 | loss = None 140 | if closure is not None: 141 | loss = closure() 142 | 143 | for group in self.param_groups: 144 | for p in group['params']: 145 | if p.grad is None: 146 | continue 147 | grad = p.grad.data 148 | if grad.is_sparse: 149 | raise RuntimeError( 150 | 'Adam does not support sparse gradients, please consider SparseAdam instead' 151 | ) 152 | amsgrad = group['amsgrad'] 153 | 154 | state = self.state[p] 155 | 156 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 157 | if amsgrad: 158 | max_exp_avg_sq = state['max_exp_avg_sq'] 159 | beta1, beta2 = group['betas'] 160 | 161 | state['step'] += 1 162 | 163 | if group['weight_decay'] != 0: 164 | grad = grad.add(group['weight_decay'], p.data) 165 | 166 | # Decay the first and second moment running average coefficient 167 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 168 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 169 | 170 | if amsgrad: 171 | # Maintains the maximum of all 2nd moment running avg. till 172 | # now 173 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 174 | # Use the max. for normalizing running avg. of gradient 175 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 176 | else: 177 | denom = exp_avg_sq.sqrt().add_(group['eps']) 178 | 179 | bias_correction1 = 1 - beta1**state['step'].item() 180 | bias_correction2 = 1 - beta2**state['step'].item() 181 | step_size = group['lr'] * \ 182 | math.sqrt(bias_correction2) / bias_correction1 183 | 184 | p.data.addcdiv_(-step_size, exp_avg, denom) 185 | return loss 186 | -------------------------------------------------------------------------------- /A3C/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from setproctitle import setproctitle as ptitle 3 | import torch 4 | from environment import atari_env 5 | from utils import setup_logger 6 | from model import A3Cff 7 | from player_util import Agent 8 | from torch.autograd import Variable 9 | import time 10 | from datetime import datetime 11 | import os 12 | import logging 13 | 14 | 15 | def test(args, shared_model, optimizer, env_conf): 16 | ptitle('Test Agent') 17 | gpu_id = args.gpu_ids[-1] 18 | start_time = datetime.now().strftime('%Y-%m-%d_%H_%M_%S') 19 | log = {} 20 | 21 | setup_logger('{}_log'.format(args.env), r'{0}{1}_{2}_log'.format( 22 | args.log_dir, args.env, start_time)) 23 | log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( 24 | args.env)) 25 | d_args = vars(args) 26 | for k in d_args.keys(): 27 | log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) 28 | if not os.path.exists(args.save_model_dir): 29 | os.mkdir(args.save_model_dir) 30 | if args.seed: 31 | torch.manual_seed(args.seed) 32 | if gpu_id >= 0: 33 | torch.cuda.manual_seed(args.seed) 34 | 35 | env = atari_env(args.env, env_conf, args) 36 | reward_sum = 0 37 | start = time.time() 38 | num_tests = 0 39 | reward_total_sum = 0 40 | player = Agent(None, env, args, None) 41 | player.gpu_id = gpu_id 42 | player.model = A3Cff(player.env.observation_space.shape[0], 43 | player.env.action_space) 44 | 45 | player.state = player.env.reset() 46 | player.eps_len += 2 47 | player.state = torch.from_numpy(player.state).float() 48 | if gpu_id >= 0: 49 | with torch.cuda.device(gpu_id): 50 | player.model = player.model.cuda() 51 | player.state = player.state.cuda() 52 | flag = True 53 | max_score = -10000 54 | 55 | while True: 56 | p = optimizer.param_groups[0]['params'][0] 57 | step = optimizer.state[p]['step'] 58 | player.model.eval() 59 | 60 | if flag: 61 | if gpu_id >= 0: 62 | with torch.cuda.device(gpu_id): 63 | player.model.load_state_dict(shared_model.state_dict()) 64 | else: 65 | player.model.load_state_dict(shared_model.state_dict()) 66 | 67 | flag = False 68 | 69 | with torch.no_grad(): 70 | if args.robust: 71 | #player.action_test_losses(args.epsilon_end) 72 | lin_coeff = min(1, (1.5*int(step)+1)/(args.total_frames/args.num_steps)) 73 | epsilon = lin_coeff*args.epsilon_end 74 | player.action_train(epsilon) 75 | else: 76 | player.action_train() 77 | #player.action_test_losses() 78 | 79 | reward_sum += player.noclip_reward 80 | 81 | if player.done and not player.info: 82 | state = player.env.reset() 83 | player.eps_len += 2 84 | player.state = torch.from_numpy(state).float() 85 | if gpu_id >= 0: 86 | with torch.cuda.device(gpu_id): 87 | player.state = player.state.cuda() 88 | elif player.info: 89 | # calculate losses for tracking 90 | R = torch.zeros(1, 1) 91 | if gpu_id >= 0: 92 | with torch.cuda.device(gpu_id): 93 | R = R.cuda() 94 | player.values.append(R) 95 | gae = torch.zeros(1, 1) 96 | if gpu_id >= 0: 97 | with torch.cuda.device(gpu_id): 98 | gae = gae.cuda() 99 | R = Variable(R) 100 | 101 | standard_loss = 0 102 | worst_case_loss = 0 103 | value_loss = 0 104 | entropy = 0 105 | 106 | for i in reversed(range(len(player.rewards))): 107 | R = args.gamma * R + player.rewards[i] 108 | advantage = R - player.values[i] 109 | 110 | value_loss += 0.5 * advantage.pow(2) 111 | 112 | # Generalized Advantage Estimataion 113 | delta_t = player.rewards[i] + args.gamma * \ 114 | player.values[i + 1].data - player.values[i].data 115 | 116 | gae = gae * args.gamma * args.tau + delta_t 117 | if args.robust: 118 | if advantage >= 0: 119 | worst_case_loss += - player.min_log_probs[i] * Variable(gae) 120 | else: 121 | worst_case_loss += - player.max_log_probs[i] * Variable(gae) 122 | 123 | standard_loss += -player.log_probs[i] * Variable(gae) 124 | entropy += player.entropies[i] 125 | 126 | standard_loss = standard_loss/len(player.rewards) 127 | worst_case_loss = worst_case_loss/len(player.rewards) 128 | value_loss = value_loss/len(player.rewards) 129 | entropy = entropy/len(player.rewards) 130 | player.clear_actions() 131 | 132 | flag = True 133 | num_tests += 1 134 | reward_total_sum += reward_sum 135 | reward_mean = reward_total_sum / num_tests 136 | log['{}_log'.format(args.env)].info( 137 | ("Time {0}, steps {1}/{2}, ep reward {3}, ep length {4}, reward mean {5:.3f} \n"+ 138 | "Losses: Policy:{6:.3f}, Worst case: {7:.3f}, Value: {8:.3f}, Entropy: {9:.3f}"). 139 | format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start)), 140 | int(step), args.total_frames/args.num_steps, reward_sum, player.eps_len, reward_mean, 141 | float(standard_loss), float(worst_case_loss), float(value_loss), float(entropy))) 142 | 143 | if args.save_max and reward_sum >= max_score: 144 | max_score = reward_sum 145 | if gpu_id >= 0: 146 | with torch.cuda.device(gpu_id): 147 | state_to_save = player.model.state_dict() 148 | torch.save(state_to_save, '{0}{1}_{2}_best.pt'.format( 149 | args.save_model_dir, args.env, start_time)) 150 | else: 151 | state_to_save = player.model.state_dict() 152 | torch.save(state_to_save, '{0}{1}_{2}_best.pt'.format( 153 | args.save_model_dir, args.env, start_time)) 154 | 155 | reward_sum = 0 156 | player.eps_len = 0 157 | state = player.env.reset() 158 | player.eps_len += 2 159 | 160 | #stop after total steps gradient updates have passed 161 | if step >= args.total_frames/args.num_steps: 162 | if gpu_id >= 0: 163 | with torch.cuda.device(gpu_id): 164 | state_to_save = player.model.state_dict() 165 | torch.save(state_to_save, '{0}{1}_{2}_last.pt'.format( 166 | args.save_model_dir, args.env, start_time)) 167 | else: 168 | state_to_save = player.model.state_dict() 169 | torch.save(state_to_save, '{0}{1}_{2}_last.pt'.format( 170 | args.save_model_dir, args.env, start_time)) 171 | return 172 | 173 | time.sleep(10) 174 | player.state = torch.from_numpy(state).float() 175 | if gpu_id >= 0: 176 | with torch.cuda.device(gpu_id): 177 | player.state = player.state.cuda() 178 | -------------------------------------------------------------------------------- /A3C/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from setproctitle import setproctitle as ptitle 3 | import torch 4 | import torch.optim as optim 5 | from environment import atari_env 6 | from utils import ensure_shared_grads 7 | from model import A3Cff 8 | from player_util import Agent 9 | from torch.autograd import Variable 10 | from ibp import network_bounds 11 | 12 | def train_robust(rank, args, shared_model, optimizer, env_conf): 13 | ptitle('Training Agent: {}'.format(rank)) 14 | gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] 15 | if args.seed: 16 | torch.manual_seed(args.seed + rank) 17 | if gpu_id >= 0: 18 | torch.cuda.manual_seed(args.seed + rank) 19 | env = atari_env(args.env, env_conf, args) 20 | if optimizer is None: 21 | if args.optimizer == 'RMSprop': 22 | optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr) 23 | if args.optimizer == 'Adam': 24 | optimizer = optim.Adam( 25 | shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) 26 | if args.seed: 27 | env.seed(args.seed + rank) 28 | player = Agent(None, env, args, None) 29 | player.gpu_id = gpu_id 30 | player.model = A3Cff(player.env.observation_space.shape[0], 31 | player.env.action_space) 32 | 33 | player.state = player.env.reset() 34 | player.state = torch.from_numpy(player.state).float() 35 | if gpu_id >= 0: 36 | with torch.cuda.device(gpu_id): 37 | player.state = player.state.cuda() 38 | player.model = player.model.cuda() 39 | player.model.train() 40 | player.eps_len += 2 41 | while True: 42 | if gpu_id >= 0: 43 | with torch.cuda.device(gpu_id): 44 | player.model.load_state_dict(shared_model.state_dict()) 45 | else: 46 | player.model.load_state_dict(shared_model.state_dict()) 47 | 48 | p = optimizer.param_groups[0]['params'][0] 49 | step = optimizer.state[p]['step'] 50 | if step >= (args.total_frames/args.num_steps): 51 | return 52 | #increase linearly until 2/3 through halfway 53 | lin_coeff = min(1, (1.5*int(step)+1)/(args.total_frames/args.num_steps)) 54 | epsilon = lin_coeff*args.epsilon_end 55 | kappa = args.kappa_end#(1-lin_coeff)*1 + lin_coeff*args.kappa_end 56 | for step in range(args.num_steps): 57 | player.action_train(bound_epsilon = epsilon) 58 | if player.done: 59 | break 60 | 61 | if player.done: 62 | state = player.env.reset() 63 | player.state = torch.from_numpy(state).float() 64 | if gpu_id >= 0: 65 | with torch.cuda.device(gpu_id): 66 | player.state = player.state.cuda() 67 | 68 | R = torch.zeros(1, 1) 69 | if not player.done: 70 | value, _ = player.model(Variable(player.state.unsqueeze(0))) 71 | R = value.data 72 | 73 | if gpu_id >= 0: 74 | with torch.cuda.device(gpu_id): 75 | R = R.cuda() 76 | 77 | player.values.append(Variable(R)) 78 | policy_loss = 0 79 | value_loss = 0 80 | gae = torch.zeros(1, 1) 81 | if gpu_id >= 0: 82 | with torch.cuda.device(gpu_id): 83 | gae = gae.cuda() 84 | R = Variable(R) 85 | for i in reversed(range(len(player.rewards))): 86 | R = args.gamma * R + player.rewards[i] 87 | advantage = R - player.values[i] 88 | 89 | value_loss = value_loss + 0.5 * advantage.pow(2) 90 | 91 | # Generalized Advantage Estimataion 92 | delta_t = player.rewards[i] + args.gamma * \ 93 | player.values[i + 1].data - player.values[i].data 94 | 95 | gae = gae * args.gamma * args.tau + delta_t 96 | 97 | if gae >= 0: 98 | worst_case_loss = - player.min_log_probs[i] * Variable(gae) 99 | else: 100 | worst_case_loss = - player.max_log_probs[i] * Variable(gae) 101 | standard_loss = -player.log_probs[i] * Variable(gae) 102 | 103 | policy_loss = policy_loss +kappa*standard_loss +(1-kappa)*worst_case_loss - 0.01 * player.entropies[i] 104 | #print(policy_loss + 0.5 * value_loss) 105 | player.model.zero_grad() 106 | (policy_loss + 0.5 * value_loss).backward() 107 | ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0) 108 | optimizer.step() 109 | 110 | player.clear_actions() 111 | 112 | 113 | def train(rank, args, shared_model, optimizer, env_conf): 114 | ptitle('Training Agent: {}'.format(rank)) 115 | gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] 116 | if args.seed: 117 | torch.manual_seed(args.seed + rank) 118 | if gpu_id >= 0: 119 | torch.cuda.manual_seed(args.seed + rank) 120 | env = atari_env(args.env, env_conf, args) 121 | if optimizer is None: 122 | if args.optimizer == 'RMSprop': 123 | optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr) 124 | if args.optimizer == 'Adam': 125 | optimizer = optim.Adam( 126 | shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) 127 | if args.seed: 128 | env.seed(args.seed + rank) 129 | player = Agent(None, env, args, None) 130 | player.gpu_id = gpu_id 131 | player.model = A3Cff(player.env.observation_space.shape[0], 132 | player.env.action_space) 133 | 134 | player.state = player.env.reset() 135 | player.state = torch.from_numpy(player.state).float() 136 | if gpu_id >= 0: 137 | with torch.cuda.device(gpu_id): 138 | player.state = player.state.cuda() 139 | player.model = player.model.cuda() 140 | player.model.train() 141 | player.eps_len += 2 142 | while True: 143 | if gpu_id >= 0: 144 | with torch.cuda.device(gpu_id): 145 | player.model.load_state_dict(shared_model.state_dict()) 146 | else: 147 | player.model.load_state_dict(shared_model.state_dict()) 148 | 149 | p = optimizer.param_groups[0]['params'][0] 150 | step = optimizer.state[p]['step'] 151 | if step >= (args.total_frames/args.num_steps): 152 | return 153 | 154 | for step in range(args.num_steps): 155 | player.action_train() 156 | if player.done: 157 | break 158 | 159 | if player.done: 160 | state = player.env.reset() 161 | player.state = torch.from_numpy(state).float() 162 | if gpu_id >= 0: 163 | with torch.cuda.device(gpu_id): 164 | player.state = player.state.cuda() 165 | 166 | R = torch.zeros(1, 1) 167 | if not player.done: 168 | value, _ = player.model(Variable(player.state.unsqueeze(0))) 169 | R = value.data 170 | 171 | if gpu_id >= 0: 172 | with torch.cuda.device(gpu_id): 173 | R = R.cuda() 174 | 175 | player.values.append(Variable(R)) 176 | policy_loss = 0 177 | value_loss = 0 178 | gae = torch.zeros(1, 1) 179 | if gpu_id >= 0: 180 | with torch.cuda.device(gpu_id): 181 | gae = gae.cuda() 182 | R = Variable(R) 183 | for i in reversed(range(len(player.rewards))): 184 | R = args.gamma * R + player.rewards[i] 185 | advantage = R - player.values[i] 186 | value_loss = value_loss + 0.5 * advantage.pow(2) 187 | 188 | # Generalized Advantage Estimataion 189 | delta_t = player.rewards[i] + args.gamma * \ 190 | player.values[i + 1].data - player.values[i].data 191 | 192 | gae = gae * args.gamma * args.tau + delta_t 193 | 194 | policy_loss = policy_loss - \ 195 | player.log_probs[i] * \ 196 | Variable(gae) - 0.01 * player.entropies[i] 197 | 198 | player.model.zero_grad() 199 | (policy_loss + 0.5 * value_loss).backward() 200 | ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0) 201 | optimizer.step() 202 | player.clear_actions() 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Robust Deep Reinforcement Learning through Adversarial Loss 2 | 3 | Updated code release here: [https://github.com/tuomaso/radial_rl_v2](https://github.com/tuomaso/radial_rl_v2). 4 | 5 | This repository is an old version of our implementation for [Robust Deep Reinforcement Learning through Adversarial Loss](https://arxiv.org/abs/2008.01976). See up to date code above. 6 | 7 | # Overview 8 | 9 | **RADIAL**(**R**obust **AD**versar**IA**l **L**oss) - RL, is a framework for training more robust deep RL agents. It leverages algorithms for calculating certified output bounds such as Interval Bound Probagation to minimize an upper bound of the original algorithms loss function under worst possible (bounded) adversarial perturbation. This framework significantly increases neural net robustness against PGD-attacks. 10 | 11 | In addition we propose *Greedy Worst-Case Reward (GWC)*, an efficient method for estimating agents performance under worst possible sequence of adversarial attacks. 12 | 13 | 14 | 15 | Trained A3C agents playing a game of Pong(trained agents control the right paddle). From left to right: 16 | * A3C under no perturbation, reward 21.0 17 | * A3C under continuous 1/255 PGD-attack, reward -19.0. Standard agent fails under almost incerceptiple perturbations. 18 | * RADIAL-A3C under 3/255 PGD-attack, reward 20.0. After RADIAL training retains high performance even against 3x larger perturbation. 19 | 20 | Our code builds on top of various existing implementations, most notably: 21 | 22 | * A3C implementation and overall flow based on https://github.com/dgriff777/rl_a3c_pytorch. 23 | * DQN implementation based on https://github.com/higgsfield/RL-Adventure 24 | * Adversarial attack implementations based on https://github.com/advboxes/AdvBox/blob/master/advbox.md. 25 | 26 | 27 | 28 | ## Requirements 29 | To run our code you need to have Python 3 (>=3.7) and pip installed on your systems. Additionally we require PyTorch>=1.4, which should be installed using instructions from https://pytorch.org/get-started/locally/. 30 | 31 | To install requirements: 32 | 33 | ```setup 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | ## Pre-trained Models 38 | 39 | You can download our trained models from Dropbox: [DQN trained models](https://www.dropbox.com/s/5xo4pa02v20s3iq/DQN_trained_models.zip?dl=0), [A3C trained models](https://www.dropbox.com/s/xdwulv6mhbvgrk9/A3C_trained_models.zip?dl=0). We suggest unpacking these to `radial_rl/DQN/trained_models/` and `radial_rl/A3C/trained_models/` respectively. 40 | 41 | ## Training 42 | 43 | To train a standard DQN model on Pong like the one used in our paper, run this command: 44 | 45 | ```train DQN 46 | cd DQN 47 | python main.py 48 | ``` 49 | To speed up training by using gpu x (in a system with one gpu x=0) add the following argument `--gpu-id x`. 50 | To train in another game, like RoadRunner use `--env RoadRunnerNoFrameskip-v4`. Other games used in the paper are `FreewayNoFrameskip-v4 `and `BankHeistNoFrameskip-v4`. 51 | 52 | ```train A3C 53 | cd A3C 54 | python main.py 55 | ``` 56 | Additionally you can use --gpu-ids argument to train with one or more gpus, for example use GPUs 0 and 1 with `--gpu-ids 0 1`. Note the default value of workers used for A3C is 16, and you might want to change it to the amount of cpu cores in system for max efficiency with the argument `--workers 4` for example. This may effect results however. 57 | 58 | The models will be saved in args.save_model_dir, with a name of their environment and time and date training started. Each run produces two models but we used the \_last.pt for all experiments, while \_best.pt is mostly useful as intermediate checkpoint if training is disrupted. 59 | 60 | 61 | ## Robust training 62 | 63 | To train a robust DQN model on RoadRunner like the one used in our paper, using our pre-trained RoadRunner model, use the following: 64 | 65 | ```Radial DQN 66 | cd DQN 67 | python main.py --env RoadRunnerNoFrameskip-v4 --robust --load-path "trained_models/RoadRunnerNoFrameskip-v4_trained.pt" --total-frames 4500000 --exp-epsilon-decay 1 --replay-initial 256 68 | ``` 69 | 70 | 71 | ```Radial A3C 72 | cd A3C 73 | python main.py --env RoadRunnerNoFrameskip-v4 --robust --load-path "trained_models/RoadRunnerNoFrameskip-v4_trained.pt" --total-frames 10000000 74 | ``` 75 | 76 | 77 | 78 | ## Evaluation 79 | 80 | To evaluate our robustly trained BankHeist model using the metrics described in the paper, use the following command in the DQN or A3C directory: 81 | 82 | ``` 83 | python evaluate.py --env BankHeistNoFrameskip-v4 --load-path "trained_models/BankHeistNoFrameskip-v4_robust.pt" --pgd --gwc --nominal 84 | ``` 85 | Additionally you can use `--gpu-id x` argument to use a GPU to speed up evaluation. Note that pgd takes much longer to run than other evaluation metrics, so you can try replacing it with much faster evaluation against FGSM attacks by switching the command to `--fgsm`. 86 | 87 | Results will be saved in numpy arrays, and the result_viewer.ipynb provide a convenient way to view them. 88 | 89 | 90 | ## Results 91 | 92 | ### Robustness on Atari games 93 | | Game | Model\Metric | PGD attack | | GWC reward | 94 | |--------------|--------------|:------------:|:-----------:|:------------:| 95 | | | epsilon | 1/255 | 3/255 | 1/255 | 96 | | Pong | RS-DQN | 18.13 | - | - | 97 | | | SA-DQN | 20.1+-0.0 | - | - | 98 | | | RADIAL-DQN | 20.8+-0.09 | 20.8+-0.09 | -1.85+-4.62 | 99 | | | RADIAL-A3C | 20.0+-0.0 | 20.0+-0.0 | 20.0+-0.0 | 100 | | Freeway | RS-DQN | 32.53 | - | - | 101 | | | SA-DQN | 30.36+-0.7 | - | - | 102 | | | RADIAL-DQN | 21.95+-0.40 | 21.55+-0.26 | 21.7+-0.39 | 103 | | BankHeist | RS-DQN | 190.67 | - | - | 104 | | | SA-DQN | 1043.6+-9.5 | - | - | 105 | | | RADIAL-DQN | 1038.0+-23.0 | 833.5+-45.2 | 1048.0+-32.3 | 106 | | | RADIAL-A3C | 848.0+-3.8 | 827.0+-6.0 | 832.5+-4.1 | 107 | | RoadRunner | RS-DQN | 5753.33 | - | - | 108 | | | SA-DQN | 15280+-828 | - | - | 109 | | | RADIAL-DQN | 43920+-1238 | 12480+-901 | 33745+-2389 | 110 | | | RADIAL-A3C | 30435+-1504 | 30620+-1141 | 29595+-1428 | 111 | 112 | ### Training commands for models above 113 | For DQN models make sure you are in the `radial_rl/DQN` directory before issuing commands, and in the `radial_rl/A3C` directory for A3C models. And have downloaded the pretrained models to specified directories. 114 | 115 | | Game | Model | Command | 116 | |------------|------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:| 117 | | Pong | RADIAL-DQN | python main.py --robust --load-path "trained_models/PongNoFrameskip-v4_trained.pt" --total-frames 4500000 --exp-epsilon-decay 1 --replay-initial 256 --amsgrad | 118 | | | RADIAL-A3C | python main.py --robust --load-path "trained_models/PongNoFrameskip-v4_trained.pt" --total-frames 10000000 | 119 | | Freeway | RADIAL-DQN | python main.py --env FreewayNoFrameskip-v4 --robust --load-path "trained_models/FreewayNoFrameskip-v4_trained.pt" --total-frames 4500000 --exp-epsilon-decay 1 --replay-initial 256 | 120 | | BankHeist | RADIAL-DQN | python main.py --env BankHeistNoFrameskip-v4 --robust --load-path "trained_models/BankHeistNoFrameskip-v4_trained.pt" --total-frames 4500000 --exp-epsilon-decay 1 --replay-initial 256 | 121 | | | RADIAL-A3C | python main.py --env BankHeistNoFrameskip-v4 --robust --load-path "trained_models/BankHeistNoFrameskip-v4_trained.pt" --total-frames 10000000 | 122 | | RoadRunner | RADIAL-DQN | python main.py --env RoadRunnerNoFrameskip-v4 --robust --load-path "trained_models/RoadRunnerNoFrameskip-v4_trained.pt" --total-frames 4500000 --exp-epsilon-decay 1 --replay-initial 256 | 123 | | | RADIAL-A3C | python main.py --env RoadRunnerNoFrameskip-v4 --robust --load-path "trained_models/RoadRunnerNoFrameskip-v4_trained.pt" --total-frames 10000000 124 | 125 | 126 | ## Common issues 127 | 128 | On some machines you might get the following error ImportError: libSM.so.6: cannot open shared object file: No such file or directory, 129 | which can be fixed by running the following line: 130 | ``` 131 | sudo apt-get install libsm6 libxrender1 libfontconfig1 132 | ``` 133 | -------------------------------------------------------------------------------- /DQN/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import time 3 | import torch 4 | import torch.nn.functional as F 5 | import random 6 | import logging 7 | from datetime import datetime 8 | 9 | from utils import setup_logger 10 | from plotter import plot 11 | from ibp import network_bounds 12 | 13 | 14 | def _compute_robust_loss(curr_model, target_model, data, epsilon, kappa, gamma, device, args): 15 | state, action, reward, next_state, done = data 16 | 17 | q_values = curr_model(state) 18 | next_q_values = curr_model(next_state) 19 | next_q_state_values = target_model(next_state) 20 | 21 | q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) 22 | next_q_value = next_q_state_values.gather(1, torch.argmax(next_q_values, 1, keepdim=True)).squeeze(1) 23 | expected_q_value = reward + gamma * next_q_value * (1 - done) 24 | 25 | standard_loss = torch.min((q_value - expected_q_value.detach()).pow(2), torch.abs(q_value - expected_q_value.detach())) 26 | 27 | upper, lower = network_bounds(curr_model.model, state, epsilon) 28 | onehot_labels = torch.zeros(upper.shape).to(device) 29 | onehot_labels[range(state.shape[0]), action] = 1 30 | 31 | if args.worse_bound: 32 | upper_diff = upper - q_values*(1-onehot_labels) - expected_q_value.detach().unsqueeze(1)*onehot_labels 33 | lower_diff = lower - q_values*(1-onehot_labels) - expected_q_value.detach().unsqueeze(1)*onehot_labels 34 | wc_diff = torch.max(torch.abs(upper_diff), torch.abs(lower_diff)) 35 | else: 36 | worst_case = onehot_labels*lower+(1-onehot_labels)*upper 37 | wc_diff = torch.abs(worst_case - q_values*(1-onehot_labels) - expected_q_value.detach().unsqueeze(1)*onehot_labels) 38 | 39 | #sum over output layer, mean only in batch dimension 40 | worst_case_loss = torch.sum(torch.min(wc_diff.pow(2), wc_diff), dim=1).mean() 41 | 42 | standard_loss = standard_loss.mean() 43 | 44 | loss = (kappa*(standard_loss)+(1-kappa)*(worst_case_loss)) 45 | 46 | return loss, standard_loss, worst_case_loss 47 | 48 | def _compute_loss(curr_model, target_model, data, gamma, device): 49 | state, action, reward, next_state, done = data 50 | 51 | q_values = curr_model(state) 52 | next_q_values = curr_model(next_state) 53 | next_q_state_values = target_model(next_state) 54 | 55 | q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1) 56 | next_q_value = next_q_state_values.gather(1, torch.argmax(next_q_values, 1, keepdim=True)).squeeze(1) 57 | expected_q_value = reward + gamma * next_q_value * (1 - done) 58 | #Huber loss 59 | standard_loss = torch.min((q_value - expected_q_value.detach()).pow(2), torch.abs(q_value - expected_q_value.detach())) 60 | standard_loss = standard_loss.mean() 61 | 62 | return standard_loss, standard_loss, standard_loss 63 | 64 | def train(current_model, target_model, env, optimizer, args): 65 | start_time = datetime.now().strftime('%Y-%m-%d_%H_%M_%S') 66 | log = {} 67 | setup_logger('{}_log'.format(args.env), r'{}{}_{}_log'.format( 68 | args.log_dir, args.env, start_time)) 69 | log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( 70 | args.env)) 71 | d_args = vars(args) 72 | for k in d_args.keys(): 73 | log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) 74 | 75 | 76 | #linearly decrease epsilon from 1 to epsilon end over epsilon decay steps 77 | epsilon_start = 1.0 78 | epsilon_by_frame = lambda frame_idx: (args.exp_epsilon_end + 79 | max(0, 1-frame_idx / args.exp_epsilon_decay)*(epsilon_start-args.exp_epsilon_end)) 80 | 81 | if args.gpu_id >= 0: 82 | device = torch.device('cuda:{}'.format(args.gpu_id)) 83 | else: 84 | device = torch.device('cpu') 85 | 86 | replay_buffer = ReplayBuffer(args.buffer_size, device) 87 | start = time.time() 88 | 89 | losses = [] 90 | standard_losses = [] 91 | worst_case_losses = [] 92 | all_rewards = [] 93 | worst_case_rewards = [] 94 | #initialize as a large negative number to save first 95 | max_score = -10000 96 | episode_reward = 0 97 | 98 | state = env.reset() 99 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 100 | 101 | for frame_idx in range(1, args.total_frames + 1): 102 | action_epsilon = epsilon_by_frame(frame_idx) 103 | action = current_model.act(state, action_epsilon) 104 | next_state, reward, done, info = env.step(action) 105 | episode_reward += reward 106 | 107 | next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device) 108 | action = torch.LongTensor([action]).to(device) 109 | #scale rewards between -1 and 1 110 | reward = torch.clamp(torch.FloatTensor([reward]).to(device), min=-1, max=1) 111 | done = torch.FloatTensor([info]).to(device) 112 | 113 | replay_buffer.push(state, action, reward, next_state, done) 114 | 115 | state = next_state 116 | 117 | if done and not info: 118 | state = env.reset() 119 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 120 | 121 | elif info: 122 | state = env.reset() 123 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 124 | all_rewards.append(episode_reward) 125 | episode_reward = 0 126 | plot(frame_idx, all_rewards, losses, standard_losses, worst_case_losses, args, start_time) 127 | 128 | if frame_idx%5==0: 129 | test_reward = test(args, current_model, env, device) 130 | log['{}_log'.format(args.env)].info("Steps: {}, Test reward: {}, Time taken: {:.3f}s".format(frame_idx, 131 | test_reward, time.time()-start)) 132 | if args.save_max and test_reward >= max_score: 133 | max_score = test_reward 134 | state_to_save = current_model.state_dict() 135 | torch.save(state_to_save, '{}{}_{}_best.pt'.format( 136 | args.save_model_dir, args.env, start_time)) 137 | 138 | 139 | if frame_idx > args.replay_initial and frame_idx%(args.batch_size/args.updates_per_frame)==0: 140 | 141 | lin_coeff = min(1, (frame_idx+1)/max(args.attack_epsilon_schedule, args.total_frames)) 142 | 143 | attack_epsilon = lin_coeff*args.attack_epsilon_end 144 | kappa = (1-lin_coeff)*1 + lin_coeff*args.kappa_end 145 | 146 | data = replay_buffer.sample(args.batch_size) 147 | if args.robust: 148 | loss, standard_loss, worst_case_loss = _compute_robust_loss(current_model, target_model, data, attack_epsilon, 149 | kappa, args.gamma, device, args) 150 | else: 151 | loss, standard_loss, worst_case_loss = _compute_loss(current_model, target_model, data, args.gamma, device) 152 | 153 | optimizer.zero_grad() 154 | loss.backward() 155 | optimizer.step() 156 | losses.append(loss.data.item()) 157 | standard_losses.append(standard_loss.data.item()) 158 | worst_case_losses.append(worst_case_loss.data.item()) 159 | 160 | if frame_idx % (1000*(args.batch_size/args.updates_per_frame)) == 0: 161 | target_model.load_state_dict(current_model.state_dict()) 162 | #save final model 163 | state_to_save = current_model.state_dict() 164 | torch.save(state_to_save, '{}{}_{}_last.pt'.format( 165 | args.save_model_dir, args.env, start_time)) 166 | 167 | log['{}_log'.format(args.env)].info("Done in {:.3f}s".format(time.time()-start)) 168 | 169 | def test(args, model, env, device): 170 | episode_reward = 0 171 | state = env.reset() 172 | 173 | with torch.no_grad(): 174 | while True: 175 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 176 | output = model.forward(state) 177 | 178 | action = torch.argmax(output, dim=1) 179 | 180 | next_state, reward, done, info = env.step(action) 181 | episode_reward += reward 182 | state = next_state 183 | if done and not info: 184 | state = env.reset() 185 | 186 | elif info: 187 | state = env.reset() 188 | return episode_reward 189 | 190 | 191 | class ReplayBuffer(object): 192 | def __init__(self, capacity, device): 193 | self.capacity = capacity 194 | self.device = device 195 | self.buffer = [] 196 | 197 | def push(self, state, action, reward, next_state, done): 198 | self.buffer.append((state, action, reward, next_state, done)) 199 | if len(self.buffer) > self.capacity: 200 | self.buffer.pop(0) 201 | 202 | def sample(self, batch_size): 203 | state, action, reward, next_state, done = zip(*random.sample(self.buffer, batch_size)) 204 | return (torch.cat(state, dim=0), torch.cat(action, dim=0), torch.cat(reward, dim=0), 205 | torch.cat(next_state, dim =0), torch.cat(done, dim=0)) 206 | 207 | 208 | def __len__(self): 209 | return len(self.buffer) 210 | -------------------------------------------------------------------------------- /A3C/adv_attacks/adv_model.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import absolute_import 17 | 18 | import numpy as np 19 | import os 20 | 21 | from abc import ABCMeta 22 | from abc import abstractmethod 23 | 24 | import logging 25 | logger=logging.getLogger(__name__) 26 | 27 | 28 | 29 | import torchvision 30 | from torch.autograd import Variable 31 | import torch.nn as nn 32 | 33 | """ 34 | The base model of the model. 35 | """ 36 | 37 | """ 38 | Pytorch model 39 | """ 40 | 41 | class Model(object): 42 | """ 43 | Base class of model to provide attack. 44 | 45 | Args: 46 | bounds(tuple): The lower and upper bound for the image pixel. 47 | channel_axis(int): The index of the axis that represents the color 48 | channel. 49 | preprocess(tuple): Two element tuple used to preprocess the input. 50 | First substract the first element, then divide the second element. 51 | """ 52 | __metaclass__ = ABCMeta 53 | 54 | def __init__(self, bounds, channel_axis, preprocess=None): 55 | assert len(bounds) == 2 56 | assert channel_axis in [0, 1, 2, 3] 57 | 58 | self._bounds = bounds 59 | self._channel_axis = channel_axis 60 | 61 | # Make self._preprocess to be (0,1) if possible, so that don't need 62 | # to do substract or divide. 63 | if preprocess is not None: 64 | sub, div = np.array(preprocess) 65 | if not np.any(sub): 66 | sub = 0 67 | if np.all(div == 1): 68 | div = 1 69 | assert (div is None) or np.all(div) 70 | self._preprocess = (sub, div) 71 | else: 72 | self._preprocess = (0, 1) 73 | 74 | def bounds(self): 75 | """ 76 | Return the upper and lower bounds of the model. 77 | """ 78 | return self._bounds 79 | 80 | def channel_axis(self): 81 | """ 82 | Return the channel axis of the model. 83 | """ 84 | return self._channel_axis 85 | 86 | def _process_input(self, input_): 87 | res = None 88 | sub, div = self._preprocess 89 | if np.any(sub != 0): 90 | res = input_ - sub 91 | if not np.all(sub == 1): 92 | if res is None: # "res = input_ - sub" is not executed! 93 | res = input_ / div 94 | else: 95 | res /= div 96 | if res is None: # "res = (input_ - sub)/ div" is not executed! 97 | return input_ 98 | return res 99 | 100 | @abstractmethod 101 | def predict(self, data): 102 | """ 103 | Calculate the prediction of the data. 104 | 105 | Args: 106 | data(numpy.ndarray): input data with shape (size, 107 | height, width, channels). 108 | 109 | Return: 110 | numpy.ndarray: predictions of the data with shape (batch_size, 111 | num_of_classes). 112 | """ 113 | raise NotImplementedError 114 | 115 | @abstractmethod 116 | def num_classes(self): 117 | """ 118 | Determine the number of the classes 119 | 120 | Return: 121 | int: the number of the classes 122 | """ 123 | raise NotImplementedError 124 | 125 | @abstractmethod 126 | def gradient(self, data, label): 127 | """ 128 | Calculate the gradient of the cross-entropy loss w.r.t the image. 129 | 130 | Args: 131 | data(numpy.ndarray): input data with shape (size, height, width, 132 | channels). 133 | label(int): Label used to calculate the gradient. 134 | 135 | Return: 136 | numpy.ndarray: gradient of the cross-entropy loss w.r.t the image 137 | with the shape (height, width, channel). 138 | """ 139 | raise NotImplementedError 140 | 141 | @abstractmethod 142 | def predict_name(self): 143 | """ 144 | Get the predict name, such as "softmax",etc. 145 | :return: string 146 | """ 147 | raise NotImplementedError 148 | 149 | #直接加载pb文件 150 | class PytorchModel(Model): 151 | 152 | 153 | def __init__(self, 154 | model, 155 | loss, 156 | bounds, 157 | channel_axis=3, 158 | nb_classes=10, 159 | preprocess=None, 160 | device=None): 161 | 162 | import torch 163 | 164 | 165 | if preprocess is None: 166 | preprocess = (0, 1) 167 | 168 | super(PytorchModel, self).__init__( 169 | bounds=bounds, channel_axis=channel_axis, preprocess=preprocess) 170 | 171 | 172 | self._model = model 173 | 174 | #暂时不支持自定义loss 175 | self._loss=loss 176 | 177 | self._nb_classes=nb_classes 178 | if not device: 179 | self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 180 | elif device == -1: 181 | self._device = torch.device("cpu") 182 | else: 183 | self._device = torch.device("cuda:{}".format(device)) 184 | 185 | print(self._device) 186 | 187 | logger.info("Finish PytorchModel init") 188 | 189 | #返回值为标量 190 | def predict(self, data): 191 | """ 192 | Calculate the prediction of the data. 193 | Args: 194 | data(numpy.ndarray): input data with shape (size, 195 | height, width, channels). 196 | Return: 197 | numpy.ndarray: predictions of the data with shape (batch_size, 198 | num_of_classes). 199 | """ 200 | 201 | import torch 202 | 203 | scaled_data = self._process_input(data) 204 | 205 | scaled_data = torch.from_numpy(scaled_data).to(self._device) 206 | 207 | 208 | # Run prediction 209 | predict = self._model(scaled_data) 210 | #if A3C choose action output, don't care about value for evaluation 211 | if type(predict)==tuple: 212 | predict = predict[1] 213 | predict = np.squeeze(predict, axis=0) 214 | 215 | predict=predict.detach() 216 | 217 | predict=predict.cpu().numpy().copy() 218 | 219 | #logging.info(predict) 220 | 221 | return predict 222 | 223 | #返回值为tensor 224 | def predict_tensor(self, data): 225 | """ 226 | Calculate the prediction of the data. 227 | Args: 228 | data(numpy.ndarray): input data with shape (size, 229 | height, width, channels). 230 | Return: 231 | numpy.ndarray: predictions of the data with shape (batch_size, 232 | num_of_classes). 233 | """ 234 | 235 | import torch 236 | 237 | scaled_data = self._process_input(data).to(self._device) 238 | 239 | #scaled_data = torch.from_numpy(scaled_data) 240 | 241 | 242 | # Run prediction 243 | predict = self._model(scaled_data) 244 | #predict = np.squeeze(predict, axis=0) 245 | 246 | #predict=predict.detach() 247 | 248 | #predict=predict.numpy() 249 | 250 | #logging.info(predict) 251 | 252 | return predict 253 | 254 | def num_classes(self): 255 | """ 256 | Calculate the number of classes of the output label. 257 | Return: 258 | int: the number of classes 259 | """ 260 | 261 | return self._nb_classes 262 | 263 | def gradient(self, data, label): 264 | """ 265 | Calculate the gradient of the cross-entropy loss w.r.t the image. 266 | Args: 267 | data(numpy.ndarray): input data with shape (size, height, width, 268 | channels). 269 | label(int): Label used to calculate the gradient. 270 | Return: 271 | numpy.ndarray: gradient of the cross-entropy loss w.r.t the image 272 | with the shape (height, width, channel). 273 | """ 274 | 275 | import torch 276 | 277 | scaled_data = self._process_input(data) 278 | 279 | #logging.info(scaled_data) 280 | 281 | scaled_data = torch.from_numpy(scaled_data).to(self._device) 282 | scaled_data.requires_grad = True 283 | 284 | label = np.array([label]) 285 | label = torch.from_numpy(label).to(self._device) 286 | #deal with multiple outputs 287 | try: 288 | output=self.predict_tensor(scaled_data).to(self._device) 289 | except(AttributeError): 290 | output = self.predict_tensor(scaled_data)[1].to(self._device) 291 | #loss=self._loss(output, label) 292 | ce = nn.CrossEntropyLoss() 293 | loss=-ce(output, label) 294 | 295 | #计算梯度 296 | # Zero all existing gradients 297 | self._model.zero_grad() 298 | loss.backward() 299 | 300 | #技巧 梯度也是tensor 需要转换成np 301 | grad = scaled_data.grad.cpu().numpy().copy() 302 | 303 | 304 | return grad.reshape(scaled_data.shape) 305 | 306 | def predict_name(self): 307 | """ 308 | Get the predict name, such as "softmax",etc. 309 | :return: string 310 | """ 311 | return self._predict_program.block(0).var(self._predict_name).op.type 312 | -------------------------------------------------------------------------------- /DQN/adv_attacks/adv_model.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import absolute_import 17 | 18 | import numpy as np 19 | import os 20 | 21 | from abc import ABCMeta 22 | from abc import abstractmethod 23 | 24 | import logging 25 | logger=logging.getLogger(__name__) 26 | 27 | 28 | 29 | import torchvision 30 | from torch.autograd import Variable 31 | import torch.nn as nn 32 | 33 | """ 34 | The base model of the model. 35 | """ 36 | 37 | """ 38 | Pytorch model 39 | """ 40 | 41 | class Model(object): 42 | """ 43 | Base class of model to provide attack. 44 | 45 | Args: 46 | bounds(tuple): The lower and upper bound for the image pixel. 47 | channel_axis(int): The index of the axis that represents the color 48 | channel. 49 | preprocess(tuple): Two element tuple used to preprocess the input. 50 | First substract the first element, then divide the second element. 51 | """ 52 | __metaclass__ = ABCMeta 53 | 54 | def __init__(self, bounds, channel_axis, preprocess=None): 55 | assert len(bounds) == 2 56 | assert channel_axis in [0, 1, 2, 3] 57 | 58 | self._bounds = bounds 59 | self._channel_axis = channel_axis 60 | 61 | # Make self._preprocess to be (0,1) if possible, so that don't need 62 | # to do substract or divide. 63 | if preprocess is not None: 64 | sub, div = np.array(preprocess) 65 | if not np.any(sub): 66 | sub = 0 67 | if np.all(div == 1): 68 | div = 1 69 | assert (div is None) or np.all(div) 70 | self._preprocess = (sub, div) 71 | else: 72 | self._preprocess = (0, 1) 73 | 74 | def bounds(self): 75 | """ 76 | Return the upper and lower bounds of the model. 77 | """ 78 | return self._bounds 79 | 80 | def channel_axis(self): 81 | """ 82 | Return the channel axis of the model. 83 | """ 84 | return self._channel_axis 85 | 86 | def _process_input(self, input_): 87 | res = None 88 | sub, div = self._preprocess 89 | if np.any(sub != 0): 90 | res = input_ - sub 91 | if not np.all(sub == 1): 92 | if res is None: # "res = input_ - sub" is not executed! 93 | res = input_ / div 94 | else: 95 | res /= div 96 | if res is None: # "res = (input_ - sub)/ div" is not executed! 97 | return input_ 98 | return res 99 | 100 | @abstractmethod 101 | def predict(self, data): 102 | """ 103 | Calculate the prediction of the data. 104 | 105 | Args: 106 | data(numpy.ndarray): input data with shape (size, 107 | height, width, channels). 108 | 109 | Return: 110 | numpy.ndarray: predictions of the data with shape (batch_size, 111 | num_of_classes). 112 | """ 113 | raise NotImplementedError 114 | 115 | @abstractmethod 116 | def num_classes(self): 117 | """ 118 | Determine the number of the classes 119 | 120 | Return: 121 | int: the number of the classes 122 | """ 123 | raise NotImplementedError 124 | 125 | @abstractmethod 126 | def gradient(self, data, label): 127 | """ 128 | Calculate the gradient of the cross-entropy loss w.r.t the image. 129 | 130 | Args: 131 | data(numpy.ndarray): input data with shape (size, height, width, 132 | channels). 133 | label(int): Label used to calculate the gradient. 134 | 135 | Return: 136 | numpy.ndarray: gradient of the cross-entropy loss w.r.t the image 137 | with the shape (height, width, channel). 138 | """ 139 | raise NotImplementedError 140 | 141 | @abstractmethod 142 | def predict_name(self): 143 | """ 144 | Get the predict name, such as "softmax",etc. 145 | :return: string 146 | """ 147 | raise NotImplementedError 148 | 149 | #直接加载pb文件 150 | class PytorchModel(Model): 151 | 152 | 153 | def __init__(self, 154 | model, 155 | loss, 156 | bounds, 157 | channel_axis=3, 158 | nb_classes=10, 159 | preprocess=None, 160 | device=None): 161 | 162 | import torch 163 | 164 | 165 | if preprocess is None: 166 | preprocess = (0, 1) 167 | 168 | super(PytorchModel, self).__init__( 169 | bounds=bounds, channel_axis=channel_axis, preprocess=preprocess) 170 | 171 | 172 | self._model = model 173 | 174 | #暂时不支持自定义loss 175 | self._loss=loss 176 | 177 | self._nb_classes=nb_classes 178 | if not device: 179 | self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 180 | elif device == -1: 181 | self._device = torch.device("cpu") 182 | else: 183 | self._device = torch.device("cuda:{}".format(device)) 184 | 185 | print(self._device) 186 | 187 | logger.info("Finish PytorchModel init") 188 | 189 | #返回值为标量 190 | def predict(self, data): 191 | """ 192 | Calculate the prediction of the data. 193 | Args: 194 | data(numpy.ndarray): input data with shape (size, 195 | height, width, channels). 196 | Return: 197 | numpy.ndarray: predictions of the data with shape (batch_size, 198 | num_of_classes). 199 | """ 200 | 201 | import torch 202 | 203 | scaled_data = self._process_input(data) 204 | 205 | scaled_data = torch.from_numpy(scaled_data).to(self._device) 206 | 207 | 208 | # Run prediction 209 | predict = self._model(scaled_data) 210 | #if A3C choose action output, don't care about value for evaluation 211 | if type(predict)==tuple: 212 | predict = predict[1] 213 | predict = np.squeeze(predict, axis=0) 214 | 215 | predict=predict.detach() 216 | 217 | predict=predict.cpu().numpy().copy() 218 | 219 | #logging.info(predict) 220 | 221 | return predict 222 | 223 | #返回值为tensor 224 | def predict_tensor(self, data): 225 | """ 226 | Calculate the prediction of the data. 227 | Args: 228 | data(numpy.ndarray): input data with shape (size, 229 | height, width, channels). 230 | Return: 231 | numpy.ndarray: predictions of the data with shape (batch_size, 232 | num_of_classes). 233 | """ 234 | 235 | import torch 236 | 237 | scaled_data = self._process_input(data).to(self._device) 238 | 239 | #scaled_data = torch.from_numpy(scaled_data) 240 | 241 | 242 | # Run prediction 243 | predict = self._model(scaled_data) 244 | #predict = np.squeeze(predict, axis=0) 245 | 246 | #predict=predict.detach() 247 | 248 | #predict=predict.numpy() 249 | 250 | #logging.info(predict) 251 | 252 | return predict 253 | 254 | def num_classes(self): 255 | """ 256 | Calculate the number of classes of the output label. 257 | Return: 258 | int: the number of classes 259 | """ 260 | 261 | return self._nb_classes 262 | 263 | def gradient(self, data, label): 264 | """ 265 | Calculate the gradient of the cross-entropy loss w.r.t the image. 266 | Args: 267 | data(numpy.ndarray): input data with shape (size, height, width, 268 | channels). 269 | label(int): Label used to calculate the gradient. 270 | Return: 271 | numpy.ndarray: gradient of the cross-entropy loss w.r.t the image 272 | with the shape (height, width, channel). 273 | """ 274 | 275 | import torch 276 | 277 | scaled_data = self._process_input(data) 278 | 279 | #logging.info(scaled_data) 280 | 281 | scaled_data = torch.from_numpy(scaled_data).to(self._device) 282 | scaled_data.requires_grad = True 283 | 284 | label = np.array([label]) 285 | label = torch.from_numpy(label).to(self._device) 286 | #deal with multiple outputs 287 | try: 288 | output=self.predict_tensor(scaled_data).to(self._device) 289 | except(AttributeError): 290 | output = self.predict_tensor(scaled_data)[1].to(self._device) 291 | #loss=self._loss(output, label) 292 | ce = nn.CrossEntropyLoss() 293 | loss=-ce(output, label) 294 | 295 | #计算梯度 296 | # Zero all existing gradients 297 | self._model.zero_grad() 298 | loss.backward() 299 | 300 | #技巧 梯度也是tensor 需要转换成np 301 | grad = scaled_data.grad.cpu().numpy().copy() 302 | 303 | 304 | return grad.reshape(scaled_data.shape) 305 | 306 | def predict_name(self): 307 | """ 308 | Get the predict name, such as "softmax",etc. 309 | :return: string 310 | """ 311 | return self._predict_program.block(0).var(self._predict_name).op.type 312 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /A3C/pong_absolute_worst_case.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import math\n", 10 | "\n", 11 | "import gym\n", 12 | "import matplotlib\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import numpy as np\n", 15 | "\n", 16 | "import torch\n", 17 | "import torch.nn as nn\n", 18 | "import torch.optim as optim\n", 19 | "import torch.nn.functional as F\n", 20 | "\n", 21 | "from utils import read_config\n", 22 | "from model import A3Cff\n", 23 | "from environment import atari_env\n", 24 | "from ibp import network_bounds\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "%matplotlib inline\n", 34 | "\n", 35 | "class ArgHelper(object):\n", 36 | " def __init__(self, env, gpu_id, skip_rate, max_episode_length, load_path, env_config):\n", 37 | " self.env = env\n", 38 | " self.gpu_id = gpu_id\n", 39 | " self.skip_rate = skip_rate\n", 40 | " self.max_episode_length = max_episode_length\n", 41 | " self.load_path = load_path\n", 42 | " self.env_config = env_config\n", 43 | " " 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "args = ArgHelper(env = 'PongNoFrameskip-v4',\n", 53 | " gpu_id = 0,\n", 54 | " skip_rate = 4,\n", 55 | " max_episode_length = 10000,\n", 56 | " load_path = 'trained_models/PongNoFrameskip-v4_robust.pt',\n", 57 | " env_config = 'config.json')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "def create_env():\n", 67 | " setup_json = read_config(args.env_config)\n", 68 | " env_conf = setup_json[\"Default\"]\n", 69 | " for i in setup_json.keys():\n", 70 | " if i in args.env:\n", 71 | " env_conf = setup_json[i]\n", 72 | " env = atari_env(args.env, env_conf, args)\n", 73 | " return env" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "if args.gpu_id < 0:\n", 83 | " device = torch.device('cpu')\n", 84 | "else:\n", 85 | " device = torch.device('cuda:{}'.format(args.gpu_id))\n", 86 | "\n", 87 | "env = create_env()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "current_model = A3Cff(env.observation_space.shape[0], env.action_space)\n", 97 | "new_dict = torch.load(args.load_path, map_location=device)\n", 98 | "current_model.load_state_dict(new_dict)\n", 99 | "\n", 100 | "current_model = current_model.to(device)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "def get_next(curr_model, env, epsilon, state):\n", 110 | " next_snapshots = []\n", 111 | " next_states = []\n", 112 | " input_x = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 113 | " _, output = curr_model.forward(input_x)\n", 114 | " #print(output)\n", 115 | " action = torch.argmax(output, dim=1)\n", 116 | "\n", 117 | " upper, lower = network_bounds(curr_model.model, input_x, epsilon=epsilon)\n", 118 | " upper, lower = upper[:,1:], lower[:, 1:]\n", 119 | " impossible = upper < torch.max(lower, dim=1)[0]\n", 120 | " \n", 121 | " snapshot = env.ale.cloneState()\n", 122 | " for i in range(impossible.shape[1]):\n", 123 | " if (not impossible[0, i]):\n", 124 | " next_state, reward, done, _ = env.step(i)\n", 125 | " #Won the game, no need to check future states\n", 126 | " if reward > 1e-5:\n", 127 | " env.ale.restoreState(snapshot)\n", 128 | " continue\n", 129 | " elif reward < -1e-5:\n", 130 | " return -1\n", 131 | " else:\n", 132 | " next_snapshots.append(env.ale.cloneState())\n", 133 | " next_states.append(next_state)\n", 134 | " env.ale.restoreState(snapshot)\n", 135 | " return next_snapshots, next_states\n", 136 | "\n", 137 | "def get_greedy_worst_case(curr_model, env, epsilon, state):\n", 138 | " orig_env = env.ale.cloneState()\n", 139 | " while True:\n", 140 | " input_x = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 141 | " _, output = curr_model.forward(input_x)\n", 142 | " action = torch.argmax(output, dim=1)\n", 143 | "\n", 144 | " upper, lower = network_bounds(curr_model.model, input_x, epsilon=epsilon)\n", 145 | " upper, lower = upper[:,1:], lower[:, 1:]\n", 146 | " impossible = upper < torch.max(lower, dim=1)[0]\n", 147 | " worst_case_action = torch.argmin(output+1e6*impossible, dim=1)\n", 148 | " next_state, reward, done, _ = env.step(worst_case_action[0])\n", 149 | " \n", 150 | " if abs(reward) > 1e-5:\n", 151 | " env.ale.restoreState(orig_env)\n", 152 | " #print(\"Greedy worst case reward: {}\".format(reward))\n", 153 | " return reward\n", 154 | " \n", 155 | " else:\n", 156 | " state = next_state\n", 157 | "\n", 158 | "def get_action_cert_rate(curr_model, env, epsilon, state):\n", 159 | " certified = 0\n", 160 | " total = 0\n", 161 | " orig_env = env.ale.cloneState()\n", 162 | " while True:\n", 163 | " input_x = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 164 | " _, output = curr_model.forward(input_x)\n", 165 | " #print(output)\n", 166 | " action = torch.argmax(output, dim=1)\n", 167 | "\n", 168 | " upper, lower = network_bounds(curr_model.model, input_x, epsilon=epsilon)\n", 169 | " upper, lower = upper[:,1:], lower[:, 1:]\n", 170 | " \n", 171 | " upper[:, action] = -1e10 \n", 172 | " max_other = torch.max(upper, dim=1)[0]\n", 173 | " if lower[:, action] > max_other:\n", 174 | " certified += 1\n", 175 | " total += 1\n", 176 | " \n", 177 | " action = torch.argmax(output, dim=1)\n", 178 | " \n", 179 | " next_state, reward, done, _ = env.step(action[0])\n", 180 | " \n", 181 | " if abs(reward) > 1e-5:\n", 182 | " env.ale.restoreState(orig_env)\n", 183 | " return certified/total\n", 184 | " \n", 185 | " else:\n", 186 | " state = next_state\n", 187 | "\n", 188 | "def worst_case_reward(curr_model, env, epsilon):\n", 189 | " envs_to_check = []\n", 190 | " states_to_check = []\n", 191 | " \n", 192 | " state = env.reset()\n", 193 | " pos_rewards = 0\n", 194 | " neg_rewards = 0\n", 195 | " paths = 1\n", 196 | " with torch.no_grad():\n", 197 | " #play through first game\n", 198 | " while True:\n", 199 | " input_x = torch.FloatTensor(state).unsqueeze(0).to(device)\n", 200 | " _, output = curr_model.forward(input_x)\n", 201 | " #print(output)\n", 202 | " action = torch.argmax(output, dim=1)\n", 203 | " next_state, reward, done, _ = env.step(action)\n", 204 | " if reward > 1e-5:\n", 205 | " pos_rewards += 1\n", 206 | " elif reward < -1e-5:\n", 207 | " neg_rewards += 1\n", 208 | " #early stopping\n", 209 | " if max(pos_rewards, abs(neg_rewards)) >= 1:\n", 210 | " envs_to_check.append(env.ale.cloneState())\n", 211 | " states_to_check.append(next_state)\n", 212 | " break\n", 213 | " state = next_state\n", 214 | " \n", 215 | " greedy_reward = get_greedy_worst_case(curr_model, env, epsilon, state)\n", 216 | " acr = get_action_cert_rate(curr_model, env, epsilon, state)\n", 217 | " \n", 218 | " while len(envs_to_check)>0:\n", 219 | " \n", 220 | " env.ale.restoreState(envs_to_check.pop(-1))\n", 221 | " out = get_next(curr_model, env, epsilon, states_to_check.pop(-1)) \n", 222 | " if out == -1:\n", 223 | " return -1, greedy_reward, paths, acr\n", 224 | " else:\n", 225 | " next_envs, next_states = out\n", 226 | " envs_to_check.extend(next_envs)\n", 227 | " states_to_check.extend(next_states)\n", 228 | " paths += max(0,len(next_envs)-1)\n", 229 | " #if (len(next_envs)-1) > 0 and paths%500==0:\n", 230 | " \n", 231 | " if paths > 5000:\n", 232 | " print(paths, len(envs_to_check))\n", 233 | " return 0, greedy_reward, paths, acr\n", 234 | " return 1, greedy_reward, paths, acr" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "%%time\n", 244 | "verified_rewards = []\n", 245 | "greedy_rewards = []\n", 246 | "acrs = []\n", 247 | "epsilons = np.array([1, 1.1, 1.15, 1.2, 1.3])/255\n", 248 | "#np.array([0.1, 0.3, 1, 1.1, 1.15, 1.2, 1.3, 3, 8])/255\n", 249 | "\n", 250 | "\n", 251 | "for epsilon in epsilons:\n", 252 | " verified = []\n", 253 | " greedy = []\n", 254 | " acr = []\n", 255 | " print('Epsilon: {}'.format(epsilon))\n", 256 | " for j in range(20):\n", 257 | " env = create_env()\n", 258 | " reward, greedy_reward, paths, acr_res = worst_case_reward(current_model, env, epsilon)\n", 259 | " print('Greedy: {}, Absolute worst case reward:{}, paths checked:{}, action cert rate:{:.4f}'.format(greedy_reward,\n", 260 | " reward, paths, acr_res))\n", 261 | " #only count ones where we get absolute worst case result\n", 262 | " if reward != 0:\n", 263 | " verified.append(reward)\n", 264 | " greedy.append(greedy_reward)\n", 265 | " acr.append(acr_res)\n", 266 | " verified_rewards.append(verified)\n", 267 | " greedy_rewards.append(greedy)\n", 268 | " acrs.append(acr)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "font = {'size' : 18}\n", 278 | "\n", 279 | "matplotlib.rc('font', **font)\n", 280 | "\n", 281 | "greed = [np.mean(i) for i in greedy_rewards]\n", 282 | "ver = [np.mean(i) for i in verified_rewards]\n", 283 | "acr_ = [np.mean(i) for i in acrs]\n", 284 | "\n", 285 | "plt.plot(epsilons*255, greed, marker='o', label='Greedy worst case reward')\n", 286 | "plt.plot(epsilons*255, ver, marker='.', label='Absolute worst case reward')\n", 287 | "plt.plot(epsilons*255, np.array(acr_)*2-1, marker='s', label='(Action certification rate)*2-1')\n", 288 | "\n", 289 | "plt.legend(bbox_to_anchor=[1,1.5])\n", 290 | "plt.xlabel('epsilon*255')\n", 291 | "plt.ylabel('Average result')\n", 292 | "#plt.xscale('log')\n", 293 | "plt.show()" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "font = {'size' : 14}\n", 303 | "matplotlib.rc('font', **font)\n", 304 | "\n", 305 | "x = epsilons*255 # the label locations\n", 306 | "width = 0.01 # the width of the bars\n", 307 | "\n", 308 | "fig, ax = plt.subplots()\n", 309 | "\n", 310 | "rects1 = ax.bar(x - width - 0.001, (np.array(greed)+1)/2, width, label='GWC', color=np.array((255,153,51))/255)\n", 311 | "rects2 = ax.bar(x, (np.array(ver)+1)/2, width, label='AWC', color=np.array((30,144,255))/255)\n", 312 | "rects3 = ax.bar(x + width + 0.001, acr_, width, label='ACR', color=np.array((40,164,40))/255)\n", 313 | "\n", 314 | "# Add some text for labels, title and custom x-axis tick labels, etc.\n", 315 | "ax.set_ylabel('Average result')\n", 316 | "ax.set_xlabel('epsilon*255')\n", 317 | "plt.ylim(0,1)\n", 318 | "plt.xlim(0.95,1.35)\n", 319 | "#ax.set_title('Scores by group and gender')\n", 320 | "ax.set_xticks(x)\n", 321 | "ax.legend()\n", 322 | "\n", 323 | "def label(rect, color=(0,0,0), offset=(0,3)):\n", 324 | " height = rect.get_height()\n", 325 | " ax.annotate('{:.2f}'.format(rect.get_height()),\n", 326 | " xy=(rect.get_x() + rect.get_width() / 2, rect.get_height()),\n", 327 | " xytext=offset, # 3 points vertical offset\n", 328 | " textcoords=\"offset points\", color=color,\n", 329 | " ha='center', va='bottom')\n", 330 | " \n", 331 | "label(rects1[2], offset=(-2,3))\n", 332 | "label(rects1[3], offset=(-2,3))\n", 333 | "label(rects1[4], offset=(-5,3))\n", 334 | "\n", 335 | "label(rects3[2], np.array((40,164,40))/255)\n", 336 | "label(rects3[3], np.array((40,164,40))/255)\n", 337 | "label(rects3[4], np.array((40,164,40))/255, (9,3))\n", 338 | "\n", 339 | "plt.show()" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "for i in range(len(epsilons)):\n", 349 | " print(epsilons[i]*255, len(greedy_rewards[i]))\n", 350 | " " 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "def result_with_eps(eps_index):\n", 360 | " pos_acrs = []\n", 361 | " pos_gwcs = []\n", 362 | " neg_acrs = []\n", 363 | " neg_gwcs = []\n", 364 | "\n", 365 | " for i in range(len(verified_rewards[eps_index])):\n", 366 | " if verified_rewards[eps_index][i]==1:\n", 367 | " pos_acrs.append(acrs[eps_index][i])\n", 368 | " pos_gwcs.append(greedy_rewards[eps_index][i])\n", 369 | " \n", 370 | " elif verified_rewards[eps_index][i]==-1:\n", 371 | " neg_acrs.append(acrs[eps_index][i])\n", 372 | " neg_gwcs.append(greedy_rewards[eps_index][i])\n", 373 | " \n", 374 | " print('Epsilon: {}/255'.format(epsilons[eps_index]*255))\n", 375 | " print('Average total AWC:{} GWC:{} ACR:{}'.format(np.mean(verified_rewards[eps_index]), \n", 376 | " np.mean(greedy_rewards[eps_index]), np.mean(acrs[eps_index])))\n", 377 | " print('Positive AWC: {}/20'.format(len(pos_acrs)))\n", 378 | " print('Average pos GWC:{} ACR:{}'.format(np.mean(pos_gwcs), np.mean(pos_acrs)))\n", 379 | " print('Negative AWC: {}/20'.format(len(neg_acrs)))\n", 380 | " print('Average neg GWC:{} ACR:{}'.format(np.mean(neg_gwcs), np.mean(neg_acrs)))\n", 381 | " print('')" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "for i in range(len(epsilons)):\n", 391 | " result_with_eps(i)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [] 400 | } 401 | ], 402 | "metadata": { 403 | "kernelspec": { 404 | "display_name": "Python 3", 405 | "language": "python", 406 | "name": "python3" 407 | }, 408 | "language_info": { 409 | "codemirror_mode": { 410 | "name": "ipython", 411 | "version": 3 412 | }, 413 | "file_extension": ".py", 414 | "mimetype": "text/x-python", 415 | "name": "python", 416 | "nbconvert_exporter": "python", 417 | "pygments_lexer": "ipython3", 418 | "version": "3.7.6" 419 | } 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 2 423 | } 424 | -------------------------------------------------------------------------------- /A3C/adv_attacks/gradient_method.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | This module provide the attack method for Iterator FGSM's implement. 17 | """ 18 | from __future__ import division 19 | 20 | import logging 21 | from collections import Iterable 22 | 23 | import numpy as np 24 | 25 | 26 | from .base import Attack 27 | 28 | __all__ = [ 29 | 'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM', 30 | 'FastGradientSignMethodTargetedAttack', 'FGSMT', 31 | 'BasicIterativeMethodAttack', 'BIM', 32 | 'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack', 33 | 'MIFGSM','FGSM_static' 34 | ] 35 | 36 | 37 | 38 | 39 | class GradientMethodAttack(Attack): 40 | """ 41 | This class implements gradient attack method, and is the base of FGSM, BIM, 42 | ILCM, etc. 43 | """ 44 | 45 | def __init__(self, model, support_targeted=True): 46 | """ 47 | :param model(model): The model to be attacked. 48 | :param support_targeted(bool): Does this attack method support targeted. 49 | """ 50 | super(GradientMethodAttack, self).__init__(model) 51 | self.support_targeted = support_targeted 52 | 53 | def _apply(self, 54 | adversary, 55 | norm_ord=np.inf, 56 | epsilons=0.01, 57 | epsilons_max=0.5, 58 | steps=10, 59 | epsilon_steps=100): 60 | #epsilons_max 为动态调整epsilon时的上限 静态epsilon算法时epsilon_steps=1 epsilons_max=epsilons即可 61 | """ 62 | Apply the gradient attack method. 63 | :param adversary(Adversary): 64 | The Adversary object. 65 | :param norm_ord(int): 66 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 67 | :param epsilons(list|tuple|int): 68 | Attack step size (input variation). 69 | Largest step size if epsilons is not iterable. 70 | :param steps: 71 | The number of attack iteration. 72 | :param epsilon_steps: 73 | The number of Epsilons' iteration for each attack iteration. 74 | :return: 75 | adversary(Adversary): The Adversary object. 76 | """ 77 | if norm_ord == 0: 78 | raise ValueError("L0 norm is not supported!") 79 | 80 | if not self.support_targeted: 81 | if adversary.is_targeted_attack: 82 | raise ValueError( 83 | "This attack method doesn't support targeted attack!") 84 | 85 | logging.info('epsilons={0},epsilons_max={1},steps={2},epsilon_steps={3}'. 86 | format(epsilons,epsilons_max,steps,epsilon_steps)) 87 | 88 | if not isinstance(epsilons, Iterable): 89 | #从epsilons到0.5逐步增大 90 | epsilons = np.linspace(epsilons, epsilons_max, num=epsilon_steps) 91 | 92 | pre_label = adversary.original_label 93 | min_, max_ = self.model.bounds() 94 | 95 | 96 | #assert self.model.channel_axis() == adversary.original.ndim 97 | assert (self.model.channel_axis() == 1 or 98 | self.model.channel_axis() == adversary.original.shape[0] or 99 | self.model.channel_axis() == adversary.original.shape[-1]) 100 | 101 | 102 | 103 | #从[epsilon,0.5]动态调整epsilon 直到攻击成功 104 | for epsilon in epsilons[:]: 105 | step = 1 106 | #强制拷贝 避免针对adv_img的修改也影响adversary.original 107 | adv_img = np.copy(adversary.original) 108 | if epsilon == 0.0: 109 | continue 110 | for i in range(steps): 111 | if adversary.is_targeted_attack: 112 | gradient = +self.model.gradient(adv_img, 113 | adversary.target_label) 114 | else: 115 | gradient = -self.model.gradient(adv_img, 116 | adversary.original_label) 117 | if norm_ord == np.inf: 118 | gradient_norm = np.sign(gradient) 119 | else: 120 | gradient_norm = gradient / self._norm( 121 | gradient, ord=norm_ord) 122 | 123 | #logging.info('epsilon * gradient_norm={0}'.format(gradient_norm * epsilon)) 124 | #logging.info('epsilon * gradient_norm* (max_ - min_)={0}'.format(gradient_norm * epsilon* (max_ - min_))) 125 | #改进的实现 不用考虑特征取值范围 126 | #adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) 127 | #按照论文实现 128 | adv_img = adv_img + epsilon * gradient_norm 129 | 130 | adv_img = np.clip(adv_img, min_, max_) 131 | adv_label = np.argmax(self.model.predict(adv_img)) 132 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, adv_label={} logits={}'. 133 | format(step, epsilon, pre_label,adv_label,self.model.predict(adv_img)[adv_label])) 134 | if adversary.try_accept_the_example(adv_img, adv_label): 135 | return adversary 136 | step += 1 137 | return adversary 138 | 139 | @staticmethod 140 | def _norm(a, ord): 141 | if a.ndim == 1: 142 | return np.linalg.norm(a, ord=ord) 143 | if a.ndim == a.shape[0]: 144 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 145 | norm_axis = 1 146 | else: 147 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 148 | norm_axis = 0 149 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) 150 | 151 | 152 | class FastGradientSignMethodTargetedAttack(GradientMethodAttack): 153 | """ 154 | "Fast Gradient Sign Method" is extended to support targeted attack. 155 | "Fast Gradient Sign Method" was originally implemented by Goodfellow et 156 | al. (2015) with the infinity norm. 157 | 158 | Paper link: https://arxiv.org/abs/1412.6572 159 | """ 160 | 161 | #硬编码了epsilons=0.01 162 | def _apply(self, adversary, epsilons=0.01,epsilons_max=0.5,epsilon_steps=100,steps=10): 163 | return GradientMethodAttack._apply( 164 | self, 165 | adversary=adversary, 166 | norm_ord=np.inf, 167 | epsilons=epsilons, 168 | epsilons_max=epsilons_max, 169 | steps=steps, 170 | epsilon_steps=epsilon_steps) 171 | 172 | 173 | 174 | class FastGradientSignMethodAttack(FastGradientSignMethodTargetedAttack): 175 | """ 176 | This attack was originally implemented by Goodfellow et al. (2015) with the 177 | infinity norm, and is known as the "Fast Gradient Sign Method". 178 | 179 | Paper link: https://arxiv.org/abs/1412.6572 180 | """ 181 | 182 | def __init__(self, model): 183 | super(FastGradientSignMethodAttack, self).__init__(model, False) 184 | 185 | 186 | class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack): 187 | """ 188 | "Iterative Least-likely Class Method (ILCM)" extends "BIM" to support 189 | targeted attack. 190 | "The Basic Iterative Method (BIM)" is to extend "FSGM". "BIM" iteratively 191 | take multiple small steps while adjusting the direction after each step. 192 | 193 | Paper link: https://arxiv.org/abs/1607.02533 194 | """ 195 | 196 | def _apply(self, adversary, epsilons=0.01, epsilons_max=0.5,steps=1000,epsilon_steps=1000): 197 | return GradientMethodAttack._apply( 198 | self, 199 | adversary=adversary, 200 | norm_ord=np.inf, 201 | epsilons=epsilons, 202 | epsilons_max=epsilons_max, 203 | steps=steps, 204 | epsilon_steps=epsilon_steps) 205 | 206 | 207 | class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack): 208 | """ 209 | FGSM is a one-step method. "The Basic Iterative Method (BIM)" iteratively 210 | take multiple small steps while adjusting the direction after each step. 211 | Paper link: https://arxiv.org/abs/1607.02533 212 | """ 213 | 214 | def __init__(self, model): 215 | super(BasicIterativeMethodAttack, self).__init__(model, False) 216 | 217 | 218 | class MomentumIteratorAttack(GradientMethodAttack): 219 | """ 220 | The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017). 221 | This method won the first places in NIPS 2017 Non-targeted Adversarial 222 | Attacks and Targeted Adversarial Attacks. The original paper used 223 | hard labels for this attack; no label smoothing. inf norm. 224 | Paper link: https://arxiv.org/pdf/1710.06081.pdf 225 | """ 226 | 227 | def __init__(self, model, support_targeted=True): 228 | """ 229 | :param model(model): The model to be attacked. 230 | :param support_targeted(bool): Does this attack method support targeted. 231 | """ 232 | super(MomentumIteratorAttack, self).__init__(model) 233 | self.support_targeted = support_targeted 234 | 235 | def _apply(self, 236 | adversary, 237 | norm_ord=np.inf, 238 | epsilons=0.1, 239 | epsilons_max=0.5, 240 | steps=100, 241 | epsilon_steps=100, 242 | decay_factor=1): 243 | """ 244 | Apply the momentum iterative gradient attack method. 245 | :param adversary(Adversary): 246 | The Adversary object. 247 | :param norm_ord(int): 248 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 249 | :param epsilons(list|tuple|float): 250 | Attack step size (input variation). 251 | Largest step size if epsilons is not iterable. 252 | :param epsilon_steps: 253 | The number of Epsilons' iteration for each attack iteration. 254 | :param steps: 255 | The number of attack iteration. 256 | :param decay_factor: 257 | The decay factor for the momentum term. 258 | :return: 259 | adversary(Adversary): The Adversary object. 260 | """ 261 | if norm_ord == 0: 262 | raise ValueError("L0 norm is not supported!") 263 | 264 | if not self.support_targeted: 265 | if adversary.is_targeted_attack: 266 | raise ValueError( 267 | "This attack method doesn't support targeted attack!") 268 | 269 | assert self.model.channel_axis() == adversary.original.ndim 270 | assert (self.model.channel_axis() == 1 or 271 | self.model.channel_axis() == adversary.original.shape[0] or 272 | self.model.channel_axis() == adversary.original.shape[-1]) 273 | 274 | if not isinstance(epsilons, Iterable): 275 | #epsilons = np.linspace(0, epsilons, num=epsilon_steps) 276 | #从epsilons到epsilons_max逐步增大 277 | epsilons = np.linspace(epsilons, epsilons_max, num=epsilon_steps) 278 | 279 | min_, max_ = self.model.bounds() 280 | pre_label = adversary.original_label 281 | 282 | for epsilon in epsilons[:]: 283 | if epsilon == 0.0: 284 | continue 285 | step = 1 286 | adv_img = adversary.original 287 | momentum = 0 288 | for i in range(steps): 289 | if adversary.is_targeted_attack: 290 | gradient = -self.model.gradient(adv_img, 291 | adversary.target_label) 292 | else: 293 | gradient = self.model.gradient(adv_img, pre_label) 294 | 295 | # normalize gradient 296 | velocity = gradient / self._norm(gradient, ord=1) 297 | momentum = decay_factor * momentum + velocity 298 | if norm_ord == np.inf: 299 | normalized_grad = np.sign(momentum) 300 | else: 301 | normalized_grad = self._norm(momentum, ord=norm_ord) 302 | perturbation = epsilon * normalized_grad 303 | adv_img = adv_img + perturbation 304 | adv_img = np.clip(adv_img, min_, max_) 305 | adv_label = np.argmax(self.model.predict(adv_img)) 306 | logging.info( 307 | 'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}' 308 | .format(step, epsilon, pre_label, adv_label)) 309 | if adversary.try_accept_the_example(adv_img, adv_label): 310 | return adversary 311 | step += 1 312 | 313 | return adversary 314 | 315 | 316 | 317 | 318 | class FGSMSAttack(Attack): 319 | """ 320 | 静态FGSM epsilon静态 321 | """ 322 | 323 | def __init__(self, model, support_targeted=True): 324 | """ 325 | :param model(model): The model to be attacked. 326 | :param support_targeted(bool): Does this attack method support targeted. 327 | """ 328 | super(FGSMSAttack, self).__init__(model) 329 | self.support_targeted = support_targeted 330 | 331 | def _apply(self, 332 | adversary, 333 | norm_ord=np.inf, 334 | epsilon=0.01, 335 | steps=10): 336 | """ 337 | Apply the gradient attack method. 338 | :param adversary(Adversary): 339 | The Adversary object. 340 | :param norm_ord(int): 341 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 342 | :param epsilons(list|tuple|int): 343 | Attack step size (input variation). 344 | Largest step size if epsilons is not iterable. 345 | :param steps: 346 | The number of attack iteration. 347 | :param epsilon_steps: 348 | The number of Epsilons' iteration for each attack iteration. 349 | :return: 350 | adversary(Adversary): The Adversary object. 351 | """ 352 | if norm_ord == 0: 353 | raise ValueError("L0 norm is not supported!") 354 | 355 | if not self.support_targeted: 356 | if adversary.is_targeted_attack: 357 | raise ValueError( 358 | "This attack method doesn't support targeted attack!") 359 | 360 | pre_label = adversary.original_label 361 | min_, max_ = self.model.bounds() 362 | 363 | #assert self.model.channel_axis() == adversary.original.ndim 364 | assert (self.model.channel_axis() == 1 or 365 | self.model.channel_axis() == adversary.original.shape[0] or 366 | self.model.channel_axis() == adversary.original.shape[-1]) 367 | 368 | step = 1 369 | #强制拷贝 避免针对adv_img的修改也影响adversary.original 370 | adv_img = np.copy(adversary.original) 371 | for i in range(steps): 372 | if adversary.is_targeted_attack: 373 | gradient = -self.model.gradient(adv_img, 374 | adversary.target_label) 375 | else: 376 | gradient = self.model.gradient(adv_img, 377 | adversary.original_label) 378 | if norm_ord == np.inf: 379 | gradient_norm = np.sign(gradient) 380 | else: 381 | gradient_norm = gradient / self._norm( 382 | gradient, ord=norm_ord) 383 | 384 | adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) 385 | adv_img = np.clip(adv_img, min_, max_) 386 | adv_label = np.argmax(self.model.predict(adv_img)) 387 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, ' 388 | 'adv_label={}'.format(step, epsilon, pre_label, 389 | adv_label)) 390 | if adversary.try_accept_the_example(adv_img, adv_label): 391 | return adversary 392 | step += 1 393 | 394 | 395 | return adversary 396 | 397 | @staticmethod 398 | def _norm(a, ord): 399 | if a.ndim == 1: 400 | return np.linalg.norm(a, ord=ord) 401 | if a.ndim == a.shape[0]: 402 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 403 | norm_axis = 1 404 | else: 405 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 406 | norm_axis = 0 407 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) 408 | 409 | 410 | FGSM = FastGradientSignMethodAttack 411 | FGSMT = FastGradientSignMethodTargetedAttack 412 | BIM = BasicIterativeMethodAttack 413 | ILCM = IterativeLeastLikelyClassMethodAttack 414 | MIFGSM = MomentumIteratorAttack 415 | FGSM_static = FGSMSAttack 416 | -------------------------------------------------------------------------------- /DQN/adv_attacks/gradient_method.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | # Copyright 2017 - 2018 Baidu Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | This module provide the attack method for Iterator FGSM's implement. 17 | """ 18 | from __future__ import division 19 | 20 | import logging 21 | from collections import Iterable 22 | 23 | import numpy as np 24 | 25 | 26 | from .base import Attack 27 | 28 | __all__ = [ 29 | 'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM', 30 | 'FastGradientSignMethodTargetedAttack', 'FGSMT', 31 | 'BasicIterativeMethodAttack', 'BIM', 32 | 'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack', 33 | 'MIFGSM','FGSM_static' 34 | ] 35 | 36 | 37 | 38 | 39 | class GradientMethodAttack(Attack): 40 | """ 41 | This class implements gradient attack method, and is the base of FGSM, BIM, 42 | ILCM, etc. 43 | """ 44 | 45 | def __init__(self, model, support_targeted=True): 46 | """ 47 | :param model(model): The model to be attacked. 48 | :param support_targeted(bool): Does this attack method support targeted. 49 | """ 50 | super(GradientMethodAttack, self).__init__(model) 51 | self.support_targeted = support_targeted 52 | 53 | def _apply(self, 54 | adversary, 55 | norm_ord=np.inf, 56 | epsilons=0.01, 57 | epsilons_max=0.5, 58 | steps=10, 59 | epsilon_steps=100): 60 | #epsilons_max 为动态调整epsilon时的上限 静态epsilon算法时epsilon_steps=1 epsilons_max=epsilons即可 61 | """ 62 | Apply the gradient attack method. 63 | :param adversary(Adversary): 64 | The Adversary object. 65 | :param norm_ord(int): 66 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 67 | :param epsilons(list|tuple|int): 68 | Attack step size (input variation). 69 | Largest step size if epsilons is not iterable. 70 | :param steps: 71 | The number of attack iteration. 72 | :param epsilon_steps: 73 | The number of Epsilons' iteration for each attack iteration. 74 | :return: 75 | adversary(Adversary): The Adversary object. 76 | """ 77 | if norm_ord == 0: 78 | raise ValueError("L0 norm is not supported!") 79 | 80 | if not self.support_targeted: 81 | if adversary.is_targeted_attack: 82 | raise ValueError( 83 | "This attack method doesn't support targeted attack!") 84 | 85 | logging.info('epsilons={0},epsilons_max={1},steps={2},epsilon_steps={3}'. 86 | format(epsilons,epsilons_max,steps,epsilon_steps)) 87 | 88 | if not isinstance(epsilons, Iterable): 89 | #从epsilons到0.5逐步增大 90 | epsilons = np.linspace(epsilons, epsilons_max, num=epsilon_steps) 91 | 92 | pre_label = adversary.original_label 93 | min_, max_ = self.model.bounds() 94 | 95 | 96 | #assert self.model.channel_axis() == adversary.original.ndim 97 | assert (self.model.channel_axis() == 1 or 98 | self.model.channel_axis() == adversary.original.shape[0] or 99 | self.model.channel_axis() == adversary.original.shape[-1]) 100 | 101 | 102 | 103 | #从[epsilon,0.5]动态调整epsilon 直到攻击成功 104 | for epsilon in epsilons[:]: 105 | step = 1 106 | #强制拷贝 避免针对adv_img的修改也影响adversary.original 107 | adv_img = np.copy(adversary.original) 108 | if epsilon == 0.0: 109 | continue 110 | for i in range(steps): 111 | if adversary.is_targeted_attack: 112 | gradient = +self.model.gradient(adv_img, 113 | adversary.target_label) 114 | else: 115 | gradient = -self.model.gradient(adv_img, 116 | adversary.original_label) 117 | if norm_ord == np.inf: 118 | gradient_norm = np.sign(gradient) 119 | else: 120 | gradient_norm = gradient / self._norm( 121 | gradient, ord=norm_ord) 122 | 123 | #logging.info('epsilon * gradient_norm={0}'.format(gradient_norm * epsilon)) 124 | #logging.info('epsilon * gradient_norm* (max_ - min_)={0}'.format(gradient_norm * epsilon* (max_ - min_))) 125 | #改进的实现 不用考虑特征取值范围 126 | #adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) 127 | #按照论文实现 128 | adv_img = adv_img + epsilon * gradient_norm 129 | 130 | adv_img = np.clip(adv_img, min_, max_) 131 | adv_label = np.argmax(self.model.predict(adv_img)) 132 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, adv_label={} logits={}'. 133 | format(step, epsilon, pre_label,adv_label,self.model.predict(adv_img)[adv_label])) 134 | if adversary.try_accept_the_example(adv_img, adv_label): 135 | return adversary 136 | step += 1 137 | return adversary 138 | 139 | @staticmethod 140 | def _norm(a, ord): 141 | if a.ndim == 1: 142 | return np.linalg.norm(a, ord=ord) 143 | if a.ndim == a.shape[0]: 144 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 145 | norm_axis = 1 146 | else: 147 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 148 | norm_axis = 0 149 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) 150 | 151 | 152 | class FastGradientSignMethodTargetedAttack(GradientMethodAttack): 153 | """ 154 | "Fast Gradient Sign Method" is extended to support targeted attack. 155 | "Fast Gradient Sign Method" was originally implemented by Goodfellow et 156 | al. (2015) with the infinity norm. 157 | 158 | Paper link: https://arxiv.org/abs/1412.6572 159 | """ 160 | 161 | #硬编码了epsilons=0.01 162 | def _apply(self, adversary, epsilons=0.01,epsilons_max=0.5,epsilon_steps=100,steps=10): 163 | return GradientMethodAttack._apply( 164 | self, 165 | adversary=adversary, 166 | norm_ord=np.inf, 167 | epsilons=epsilons, 168 | epsilons_max=epsilons_max, 169 | steps=steps, 170 | epsilon_steps=epsilon_steps) 171 | 172 | 173 | 174 | class FastGradientSignMethodAttack(FastGradientSignMethodTargetedAttack): 175 | """ 176 | This attack was originally implemented by Goodfellow et al. (2015) with the 177 | infinity norm, and is known as the "Fast Gradient Sign Method". 178 | 179 | Paper link: https://arxiv.org/abs/1412.6572 180 | """ 181 | 182 | def __init__(self, model): 183 | super(FastGradientSignMethodAttack, self).__init__(model, False) 184 | 185 | 186 | class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack): 187 | """ 188 | "Iterative Least-likely Class Method (ILCM)" extends "BIM" to support 189 | targeted attack. 190 | "The Basic Iterative Method (BIM)" is to extend "FSGM". "BIM" iteratively 191 | take multiple small steps while adjusting the direction after each step. 192 | 193 | Paper link: https://arxiv.org/abs/1607.02533 194 | """ 195 | 196 | def _apply(self, adversary, epsilons=0.01, epsilons_max=0.5,steps=1000,epsilon_steps=1000): 197 | return GradientMethodAttack._apply( 198 | self, 199 | adversary=adversary, 200 | norm_ord=np.inf, 201 | epsilons=epsilons, 202 | epsilons_max=epsilons_max, 203 | steps=steps, 204 | epsilon_steps=epsilon_steps) 205 | 206 | 207 | class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack): 208 | """ 209 | FGSM is a one-step method. "The Basic Iterative Method (BIM)" iteratively 210 | take multiple small steps while adjusting the direction after each step. 211 | Paper link: https://arxiv.org/abs/1607.02533 212 | """ 213 | 214 | def __init__(self, model): 215 | super(BasicIterativeMethodAttack, self).__init__(model, False) 216 | 217 | 218 | class MomentumIteratorAttack(GradientMethodAttack): 219 | """ 220 | The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017). 221 | This method won the first places in NIPS 2017 Non-targeted Adversarial 222 | Attacks and Targeted Adversarial Attacks. The original paper used 223 | hard labels for this attack; no label smoothing. inf norm. 224 | Paper link: https://arxiv.org/pdf/1710.06081.pdf 225 | """ 226 | 227 | def __init__(self, model, support_targeted=True): 228 | """ 229 | :param model(model): The model to be attacked. 230 | :param support_targeted(bool): Does this attack method support targeted. 231 | """ 232 | super(MomentumIteratorAttack, self).__init__(model) 233 | self.support_targeted = support_targeted 234 | 235 | def _apply(self, 236 | adversary, 237 | norm_ord=np.inf, 238 | epsilons=0.1, 239 | epsilons_max=0.5, 240 | steps=100, 241 | epsilon_steps=100, 242 | decay_factor=1): 243 | """ 244 | Apply the momentum iterative gradient attack method. 245 | :param adversary(Adversary): 246 | The Adversary object. 247 | :param norm_ord(int): 248 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 249 | :param epsilons(list|tuple|float): 250 | Attack step size (input variation). 251 | Largest step size if epsilons is not iterable. 252 | :param epsilon_steps: 253 | The number of Epsilons' iteration for each attack iteration. 254 | :param steps: 255 | The number of attack iteration. 256 | :param decay_factor: 257 | The decay factor for the momentum term. 258 | :return: 259 | adversary(Adversary): The Adversary object. 260 | """ 261 | if norm_ord == 0: 262 | raise ValueError("L0 norm is not supported!") 263 | 264 | if not self.support_targeted: 265 | if adversary.is_targeted_attack: 266 | raise ValueError( 267 | "This attack method doesn't support targeted attack!") 268 | 269 | assert self.model.channel_axis() == adversary.original.ndim 270 | assert (self.model.channel_axis() == 1 or 271 | self.model.channel_axis() == adversary.original.shape[0] or 272 | self.model.channel_axis() == adversary.original.shape[-1]) 273 | 274 | if not isinstance(epsilons, Iterable): 275 | #epsilons = np.linspace(0, epsilons, num=epsilon_steps) 276 | #从epsilons到epsilons_max逐步增大 277 | epsilons = np.linspace(epsilons, epsilons_max, num=epsilon_steps) 278 | 279 | min_, max_ = self.model.bounds() 280 | pre_label = adversary.original_label 281 | 282 | for epsilon in epsilons[:]: 283 | if epsilon == 0.0: 284 | continue 285 | step = 1 286 | adv_img = adversary.original 287 | momentum = 0 288 | for i in range(steps): 289 | if adversary.is_targeted_attack: 290 | gradient = -self.model.gradient(adv_img, 291 | adversary.target_label) 292 | else: 293 | gradient = self.model.gradient(adv_img, pre_label) 294 | 295 | # normalize gradient 296 | velocity = gradient / self._norm(gradient, ord=1) 297 | momentum = decay_factor * momentum + velocity 298 | if norm_ord == np.inf: 299 | normalized_grad = np.sign(momentum) 300 | else: 301 | normalized_grad = self._norm(momentum, ord=norm_ord) 302 | perturbation = epsilon * normalized_grad 303 | adv_img = adv_img + perturbation 304 | adv_img = np.clip(adv_img, min_, max_) 305 | adv_label = np.argmax(self.model.predict(adv_img)) 306 | logging.info( 307 | 'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}' 308 | .format(step, epsilon, pre_label, adv_label)) 309 | if adversary.try_accept_the_example(adv_img, adv_label): 310 | return adversary 311 | step += 1 312 | 313 | return adversary 314 | 315 | 316 | 317 | 318 | class FGSMSAttack(Attack): 319 | """ 320 | 静态FGSM epsilon静态 321 | """ 322 | 323 | def __init__(self, model, support_targeted=True): 324 | """ 325 | :param model(model): The model to be attacked. 326 | :param support_targeted(bool): Does this attack method support targeted. 327 | """ 328 | super(FGSMSAttack, self).__init__(model) 329 | self.support_targeted = support_targeted 330 | 331 | def _apply(self, 332 | adversary, 333 | norm_ord=np.inf, 334 | epsilon=0.01, 335 | steps=10): 336 | """ 337 | Apply the gradient attack method. 338 | :param adversary(Adversary): 339 | The Adversary object. 340 | :param norm_ord(int): 341 | Order of the norm, such as np.inf, 1, 2, etc. It can't be 0. 342 | :param epsilons(list|tuple|int): 343 | Attack step size (input variation). 344 | Largest step size if epsilons is not iterable. 345 | :param steps: 346 | The number of attack iteration. 347 | :param epsilon_steps: 348 | The number of Epsilons' iteration for each attack iteration. 349 | :return: 350 | adversary(Adversary): The Adversary object. 351 | """ 352 | if norm_ord == 0: 353 | raise ValueError("L0 norm is not supported!") 354 | 355 | if not self.support_targeted: 356 | if adversary.is_targeted_attack: 357 | raise ValueError( 358 | "This attack method doesn't support targeted attack!") 359 | 360 | pre_label = adversary.original_label 361 | min_, max_ = self.model.bounds() 362 | 363 | #assert self.model.channel_axis() == adversary.original.ndim 364 | assert (self.model.channel_axis() == 1 or 365 | self.model.channel_axis() == adversary.original.shape[0] or 366 | self.model.channel_axis() == adversary.original.shape[-1]) 367 | 368 | step = 1 369 | #强制拷贝 避免针对adv_img的修改也影响adversary.original 370 | adv_img = np.copy(adversary.original) 371 | for i in range(steps): 372 | if adversary.is_targeted_attack: 373 | gradient = -self.model.gradient(adv_img, 374 | adversary.target_label) 375 | else: 376 | gradient = self.model.gradient(adv_img, 377 | adversary.original_label) 378 | if norm_ord == np.inf: 379 | gradient_norm = np.sign(gradient) 380 | else: 381 | gradient_norm = gradient / self._norm( 382 | gradient, ord=norm_ord) 383 | 384 | adv_img = adv_img + epsilon * gradient_norm * (max_ - min_) 385 | adv_img = np.clip(adv_img, min_, max_) 386 | adv_label = np.argmax(self.model.predict(adv_img)) 387 | logging.info('step={}, epsilon = {:.5f}, pre_label = {}, ' 388 | 'adv_label={}'.format(step, epsilon, pre_label, 389 | adv_label)) 390 | if adversary.try_accept_the_example(adv_img, adv_label): 391 | return adversary 392 | step += 1 393 | 394 | 395 | return adversary 396 | 397 | @staticmethod 398 | def _norm(a, ord): 399 | if a.ndim == 1: 400 | return np.linalg.norm(a, ord=ord) 401 | if a.ndim == a.shape[0]: 402 | norm_shape = (a.ndim, reduce(np.dot, a.shape[1:])) 403 | norm_axis = 1 404 | else: 405 | norm_shape = (reduce(np.dot, a.shape[:-1]), a.ndim) 406 | norm_axis = 0 407 | return np.linalg.norm(a.reshape(norm_shape), ord=ord, axis=norm_axis) 408 | 409 | 410 | FGSM = FastGradientSignMethodAttack 411 | FGSMT = FastGradientSignMethodTargetedAttack 412 | BIM = BasicIterativeMethodAttack 413 | ILCM = IterativeLeastLikelyClassMethodAttack 414 | MIFGSM = MomentumIteratorAttack 415 | FGSM_static = FGSMSAttack 416 | -------------------------------------------------------------------------------- /A3C/evaluate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import os 4 | import random 5 | 6 | import gym 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | from PIL import Image 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from model import A3Cff 16 | from environment import atari_env 17 | from utils import read_config 18 | 19 | from torch.autograd import Variable 20 | 21 | from adv_attacks.adversary import Adversary 22 | from adv_attacks.gradient_method import FGSM 23 | from adv_attacks.adv_model import PytorchModel 24 | from adv_attacks.PGD import PGDAttack 25 | from ibp import network_bounds 26 | 27 | parser = argparse.ArgumentParser(description='A3C') 28 | 29 | 30 | parser.add_argument( 31 | '--max-episode-length', 32 | type=int, 33 | default=10000, 34 | metavar='M', 35 | help='maximum length of an episode (default: 10000)') 36 | parser.add_argument( 37 | '--env', 38 | default='PongNoFrameskip-v4', 39 | metavar='ENV', 40 | help='environment to train on (default: PongNoFrameskip-v4)') 41 | parser.add_argument( 42 | '--env-config', 43 | default='config.json', 44 | metavar='EC', 45 | help='environment to crop and resize info (default: config.json)') 46 | parser.add_argument( 47 | '--load-path', 48 | default='trained_models/PongNoFrameskip-v4_robust.pt', 49 | metavar='LMD', 50 | help='folder to load trained models from') 51 | parser.add_argument( 52 | '--gpu-id', 53 | type=int, 54 | default=-1, 55 | help='GPU to use [-1 CPU only] (default: -1)') 56 | parser.add_argument( 57 | '--skip-rate', 58 | type=int, 59 | default=4, 60 | metavar='SR', 61 | help='frame skip rate (default: 4)') 62 | parser.add_argument( 63 | '--fgsm-video', 64 | type=float, 65 | default=None, 66 | metavar='FV', 67 | help='whether to to produce a video of the agent performing under FGSM attack with given epsilon') 68 | parser.add_argument( 69 | '--pgd-video', 70 | type=float, 71 | default=None, 72 | metavar='PV', 73 | help='whether to to produce a video of the agent performing under PGD attack with given epsilon') 74 | parser.add_argument('--video', 75 | dest='video', 76 | action='store_true', 77 | help = 'saves a video of standard eval run of model') 78 | parser.add_argument('--fgsm', 79 | dest='fgsm', 80 | action='store_true', 81 | help = 'evaluate against fast gradient sign attack') 82 | parser.add_argument('--pgd', 83 | dest='pgd', 84 | action='store_true', 85 | help='evaluate against projected gradient descent attack') 86 | parser.add_argument('--gwc', 87 | dest='gwc', 88 | action='store_true', 89 | help='whether to evaluate worst possible(greedy) outcome under any epsilon bounded attack') 90 | parser.add_argument('--action-pert', 91 | dest='action_pert', 92 | action='store_true', 93 | help='whether to evaluate performance under action perturbations') 94 | parser.add_argument('--acr', 95 | dest='acr', 96 | action='store_true', 97 | help='whether to evaluate the action certification rate of an agent') 98 | parser.add_argument('--nominal', 99 | dest='nominal', 100 | action='store_true', 101 | help='evaluate the agents nominal performance without any adversaries') 102 | 103 | parser.set_defaults(video=False, fgsm=False, pgd=False, gwc=False, action_pert=False, acr=False) 104 | 105 | 106 | def record_game(curr_model, env, args): 107 | 108 | state = env.reset() 109 | if args.gpu_id >= 0: 110 | with torch.cuda.device(args.gpu_id): 111 | curr_model = curr_model.cuda() 112 | 113 | states = [state*255] 114 | episode_reward = 0 115 | 116 | with torch.no_grad(): 117 | while True: 118 | input_x = torch.FloatTensor(state).unsqueeze(0) 119 | if args.gpu_id >= 0: 120 | with torch.cuda.device(args.gpu_id): 121 | input_x = input_x.cuda() 122 | _, output = curr_model.forward(input_x) 123 | action = torch.argmax(output, dim=1) 124 | next_state, reward, done, info = env.step(action[0]) 125 | 126 | episode_reward += reward 127 | state = next_state 128 | states.append(state*255) 129 | 130 | if done and not info: 131 | state = env.reset() 132 | elif info: 133 | env.reset() 134 | states = np.array(states) 135 | print(states.shape) 136 | return episode_reward, np.array(states, dtype=np.uint8) 137 | 138 | 139 | def attack_eval(curr_model, env, args, epsilon=1e-4, attack_type='FGSM', record=False): 140 | assert attack_type in ('FGSM', 'PGD'), 'Invalid attack type' 141 | loss_func = torch.nn.CrossEntropyLoss() 142 | m = PytorchModel(curr_model, loss_func,(0, 1), channel_axis=1, nb_classes=env.action_space, device=args.gpu_id) 143 | 144 | if attack_type=='FGSM': 145 | attack = FGSM(m) 146 | attack_config = {"epsilons": [epsilon], 'steps': 1} 147 | elif attack_type == 'PGD': 148 | attack = PGDAttack(m) 149 | attack_config = {"epsilon": epsilon, "steps": 30, "relative_step_size":0.1} 150 | 151 | total_count = 0 152 | fooling_count = 0 153 | 154 | episode_reward = 0 155 | state = env.reset() 156 | if record: 157 | states = [] 158 | 159 | while True: 160 | total_count += 1 161 | input_x = torch.FloatTensor(state).unsqueeze(0) 162 | if args.gpu_id >= 0: 163 | with torch.cuda.device(args.gpu_id): 164 | input_x = input_x.cuda() 165 | _, output = curr_model.forward(input_x) 166 | #print(output) 167 | action = torch.argmax(output, dim=1) 168 | inputs, labels= input_x.cpu().numpy(), action.cpu().numpy() 169 | adversary = Adversary(inputs, labels[0]) 170 | adversary = attack(adversary, **attack_config) 171 | 172 | if adversary.is_successful(): 173 | fooling_count += 1 174 | if record: 175 | states.append(adversary.adversarial_example[0]*255) 176 | next_state, reward, done, info = env.step(adversary.adversarial_label) 177 | else: 178 | if record: 179 | states.append(adversary.bad_adversarial_example[0]*255) 180 | next_state, reward, done, info = env.step(action[0]) 181 | 182 | episode_reward += reward 183 | state = next_state 184 | if done and not info: 185 | state = env.reset() 186 | 187 | elif info: 188 | state = env.reset() 189 | print("[TEST_DATASET]: fooling_count={}, total_count={}, fooling_rate={:.3f}".format( 190 | fooling_count, total_count, float(fooling_count) / total_count)) 191 | print('Reward under {} attack {}'.format(attack_type, episode_reward)) 192 | if record: 193 | return episode_reward, np.array(states, dtype=np.uint8) 194 | else: 195 | return episode_reward 196 | 197 | 198 | def eval_greedy_wc(curr_model, env, args, epsilon=1e-4): 199 | episode_reward = 0 200 | state = env.reset() 201 | 202 | with torch.no_grad(): 203 | while True: 204 | input_x = torch.FloatTensor(state).unsqueeze(0) 205 | if args.gpu_id >= 0: 206 | with torch.cuda.device(args.gpu_id): 207 | input_x = input_x.cuda() 208 | _, output = curr_model.forward(input_x) 209 | #print(output) 210 | 211 | upper, lower = network_bounds(curr_model.model, input_x, epsilon=epsilon) 212 | upper, lower = upper[:,1:], lower[:, 1:] 213 | 214 | impossible = upper < torch.max(lower, dim=1)[0] 215 | #add a large number to ignore impossible ones, choose possible action with smallest q-value 216 | worst_case_action = torch.argmin(output+1e6*impossible, dim=1) 217 | next_state, reward, done, info = env.step(worst_case_action[0]) 218 | episode_reward += reward 219 | state = next_state 220 | if done and not info: 221 | state = env.reset() 222 | elif info: 223 | state = env.reset() 224 | print('Worst case reward {}'.format(episode_reward)) 225 | return episode_reward 226 | 227 | def eval_action_pert(curr_model, env, args, epsilon=0.01): 228 | episode_reward = 0 229 | state = env.reset() 230 | 231 | with torch.no_grad(): 232 | while True: 233 | input_x = torch.FloatTensor(state).unsqueeze(0) 234 | if args.gpu_id >= 0: 235 | with torch.cuda.device(args.gpu_id): 236 | input_x = input_x.cuda() 237 | _, output = curr_model.forward(input_x) 238 | #print(output) 239 | if random.random() < epsilon: 240 | action = random.randint(0, output.shape[1]-1) 241 | else: 242 | action = torch.argmax(output[0]) 243 | next_state, reward, done, info = env.step(action) 244 | episode_reward += reward 245 | state = next_state 246 | if done and not info: 247 | state = env.reset() 248 | elif info: 249 | state = env.reset() 250 | print('Reward under {} action perturbation:{}'.format(epsilon, episode_reward)) 251 | return episode_reward 252 | 253 | def eval_action_cert_rate(curr_model, env, args, epsilon=1e-4): 254 | episode_reward = 0 255 | state = env.reset() 256 | total = 0 257 | certified = 0 258 | with torch.no_grad(): 259 | while True: 260 | input_x = torch.FloatTensor(state).unsqueeze(0) 261 | if args.gpu_id >= 0: 262 | with torch.cuda.device(args.gpu_id): 263 | input_x = input_x.cuda() 264 | _, output = curr_model.forward(input_x) 265 | action = torch.argmax(output, dim=1) 266 | 267 | upper, lower = network_bounds(curr_model.model, input_x, epsilon=epsilon) 268 | upper, lower = upper[:,1:], lower[:, 1:] 269 | #remove the action selected from calculations 270 | upper[:, action] = -1e10 271 | 272 | max_other = torch.max(upper, dim=1)[0] 273 | if lower[:, action] > max_other: 274 | certified += 1 275 | total += 1 276 | 277 | next_state, reward, done, info = env.step(action) 278 | episode_reward += reward 279 | state = next_state 280 | if done and not info: 281 | state = env.reset() 282 | elif info: 283 | state = env.reset() 284 | print('Reward:{}, action certification rate {:.4f}'.format(episode_reward, certified/total)) 285 | return certified/total 286 | 287 | 288 | if __name__ == '__main__': 289 | args = parser.parse_args() 290 | setup_json = read_config(args.env_config) 291 | env_conf = setup_json["Default"] 292 | for i in setup_json.keys(): 293 | if i in args.env: 294 | env_conf = setup_json[i] 295 | env = atari_env(args.env, env_conf, args) 296 | model = A3Cff(env.observation_space.shape[0], env.action_space) 297 | 298 | if args.gpu_id >= 0: 299 | weights = torch.load(args.load_path, map_location=torch.device('cuda:{}'.format(args.gpu_id))) 300 | model.load_state_dict(weights) 301 | with torch.cuda.device(args.gpu_id): 302 | model.cuda() 303 | else: 304 | weights = torch.load(args.load_path, map_location=torch.device('cpu')) 305 | model.load_state_dict(weights) 306 | model.eval() 307 | 308 | save_name = (args.load_path.split('/')[-1]).split('.')[0] 309 | if not os.path.exists('videos'): 310 | os.mkdir('videos') 311 | if not os.path.exists('figures'): 312 | os.mkdir('figures') 313 | if not os.path.exists('figures/'+save_name): 314 | os.mkdir('figures/'+save_name) 315 | 316 | if args.video: 317 | reward, states = record_game(model, env, args) 318 | print(reward) 319 | 320 | im = Image.fromarray(states[0,0], mode='L') 321 | im.save('videos/{}.gif'.format(save_name), save_all=True, optimize=True, duration=40, mode='L', loop=0, 322 | append_images=[Image.fromarray(state[0]) for state in states[1:]]) 323 | 324 | if args.fgsm_video: 325 | reward, states = attack_eval(model, env, args, args.fgsm_video, 'FGSM', record=True) 326 | print(reward) 327 | 328 | width = env.observation_space.shape[1] 329 | height = env.observation_space.shape[2] 330 | im = Image.fromarray(states[0,0], mode='L') 331 | im.save('videos/{}_fgsm_{}.gif'.format(save_name, args.fgsm_video), save_all=True, duration=40, mode='L', loop=0, 332 | append_images=[Image.fromarray(state[0]) for state in states[1:]]) 333 | 334 | if args.pgd_video: 335 | reward, states = attack_eval(model, env, args, args.pgd_video, 'PGD',record=True) 336 | print(reward) 337 | 338 | width = env.observation_space.shape[1] 339 | height = env.observation_space.shape[2] 340 | im = Image.fromarray(states[0,0], mode='L') 341 | im.save('videos/{}_pgd_{}.gif'.format(save_name, args.pgd_video), save_all=True, duration=40, mode='L', loop=0, 342 | append_images=[Image.fromarray(state[0]) for state in states[1:]]) 343 | 344 | epsilons = [0.3/255, 1/255, 3/255, 8/255] 345 | if args.fgsm: 346 | np.save('figures/{}/fgsm_epsilons.npy'.format(save_name), epsilons) 347 | rewards = [] 348 | for epsilon in epsilons: 349 | print(epsilon) 350 | curr_rewards = [] 351 | for i in range(20): 352 | reward = attack_eval(model, env, args, epsilon, 'FGSM') 353 | curr_rewards.append(reward) 354 | rewards.append(curr_rewards) 355 | 356 | rewards = np.sort(rewards, axis=1) 357 | plt.plot(epsilons, np.mean(rewards, axis=1), label='mean') 358 | plt.fill_between(epsilons, rewards[:, -1], rewards[:, 0], alpha=0.2, label='interval') 359 | plt.legend() 360 | plt.xlabel('l-inf perturbation') 361 | plt.ylabel('reward') 362 | plt.xscale('log') 363 | np.save('figures/{}/fgsm.npy'.format(save_name), rewards) 364 | plt.savefig('figures/{}/fgsm.png'.format(save_name)) 365 | plt.close() 366 | 367 | if args.pgd: 368 | np.save('figures/{}/pgd_epsilons.npy'.format(save_name), epsilons) 369 | rewards = [] 370 | for epsilon in epsilons: 371 | print(epsilon) 372 | curr_rewards = [] 373 | for i in range(20): 374 | reward = attack_eval(model, env, args, epsilon, 'PGD') 375 | curr_rewards.append(reward) 376 | rewards.append(curr_rewards) 377 | 378 | rewards = np.sort(rewards, axis=1) 379 | plt.plot(epsilons, np.mean(rewards, axis=1), label='mean') 380 | plt.fill_between(epsilons, rewards[:, -1], rewards[:, 0], alpha=0.2, label='interval') 381 | plt.legend() 382 | plt.xlabel('l-inf perturbation') 383 | plt.ylabel('reward') 384 | plt.xscale('log') 385 | np.save('figures/{}/pgd.npy'.format(save_name), rewards) 386 | plt.savefig('figures/{}/pgd.png'.format(save_name)) 387 | plt.close() 388 | 389 | if args.gwc: 390 | np.save('figures/{}/greedy_wc_epsilons.npy'.format(save_name), epsilons) 391 | rewards = [] 392 | for epsilon in epsilons: 393 | print(epsilon) 394 | curr_rewards = [] 395 | for i in range(20): 396 | reward = eval_greedy_wc(model, env, args, epsilon) 397 | curr_rewards.append(reward) 398 | rewards.append(curr_rewards) 399 | 400 | rewards = np.sort(rewards, axis=1) 401 | plt.plot(epsilons, np.mean(rewards, axis=1), label=' mean') 402 | plt.fill_between(epsilons, rewards[:, -1], rewards[:, 0], alpha=0.2, label='interval') 403 | plt.legend() 404 | plt.xlabel('l-inf perturbation') 405 | plt.ylabel('reward') 406 | plt.xscale('log') 407 | np.save('figures/{}/greedy_wc.npy'.format(save_name), rewards) 408 | plt.savefig('figures/{}/greedy_wc.png'.format(save_name)) 409 | plt.close() 410 | 411 | if args.acr: 412 | np.save('figures/{}/acr_epsilons.npy'.format(save_name), epsilons) 413 | rates = [] 414 | for epsilon in epsilons: 415 | print(epsilon) 416 | curr_rates = [] 417 | for i in range(20): 418 | rate = eval_action_cert_rate(model, env, args, epsilon) 419 | curr_rates.append(rate) 420 | rates.append(curr_rates) 421 | 422 | rates = np.sort(rates, axis=1) 423 | plt.plot(epsilons, np.mean(rates, axis=1), label='mean') 424 | plt.fill_between(epsilons, rates[:, -1], rates[:, 0], alpha=0.2, label='interval') 425 | plt.legend() 426 | plt.xlabel('l-inf perturbation') 427 | plt.ylabel('Action certification rate') 428 | plt.xscale('log') 429 | np.save('figures/{}/acr.npy'.format(save_name), rates) 430 | plt.savefig('figures/{}/acr.png'.format(save_name)) 431 | plt.close() 432 | 433 | if args.action_pert: 434 | epsilons = [0.01, 0.02, 0.05, 0.1] 435 | np.save('figures/{}/action_pert_epsilons.npy'.format(save_name), epsilons) 436 | rewards = [] 437 | for epsilon in epsilons: 438 | print(epsilon) 439 | curr_rewards = [] 440 | for i in range(20): 441 | reward = eval_action_pert(model, env, args, epsilon) 442 | curr_rewards.append(reward) 443 | rewards.append(curr_rewards) 444 | 445 | rewards = np.sort(rewards, axis=1) 446 | plt.plot(epsilons, np.mean(rewards, axis=1), label='mean reward') 447 | plt.fill_between(epsilons, rewards[:, -1], rewards[:, 0], alpha=0.2, label='reward interval') 448 | plt.legend() 449 | plt.xlabel('action perturbation') 450 | plt.ylabel('reward') 451 | plt.xscale('log') 452 | np.save('figures/{}/action_pert.npy'.format(save_name), rewards) 453 | plt.savefig('figures/{}/action_pert.png'.format(save_name)) 454 | plt.close() 455 | 456 | if args.nominal: 457 | curr_rewards = [] 458 | for i in range(20): 459 | reward = eval_action_pert(model, env, args, epsilon=0) 460 | curr_rewards.append(reward) 461 | rewards = np.sort(curr_rewards) 462 | plt.hist(rewards, bins=10) 463 | plt.title('Nominal mean reward:{:.1f}'.format(np.mean(rewards))) 464 | np.save('figures/{}/nominal.npy'.format(save_name), rewards) 465 | plt.savefig('figures/{}/nominal.png'.format(save_name)) 466 | plt.close() 467 | --------------------------------------------------------------------------------