├── model └── .gitkeep ├── src ├── __init__.py ├── data │ ├── __init__.py │ ├── data.py │ ├── DataGenerator.py │ └── cifar10.py ├── model │ ├── __init__.py │ ├── model.py │ ├── cifar10.py │ └── cifar_res.py ├── attacks │ ├── __init__.py │ ├── backdoor_generator.py │ ├── Deepfool.py │ ├── universal_perturbation.py │ └── CW.py ├── classifiers │ ├── __init__.py │ └── classifier.py ├── poison_detection │ ├── __init__.py │ ├── poison_filtering_defence.py │ ├── ground_truth_evaluator.py │ └── clustering_analyzer.py ├── poison.py ├── activations.py ├── conf.py ├── defences │ ├── spectral.py │ └── activation_clustering.py ├── utils.py ├── test_specific_pair.py ├── visualization.py └── backdoor.py ├── perturbation └── .gitkeep ├── .gitattributes ├── imgs ├── example.png ├── Res2VGG_CW.png ├── VGG2Res_CW.png ├── generation.png ├── injection.png ├── Res2VGG_Deepfool.png └── VGG2Res_Deepfool.png ├── .gitignore ├── json └── cifar.json ├── requirements.txt └── README.md /model/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /perturbation/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.model import CNNModel 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /imgs/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/example.png -------------------------------------------------------------------------------- /imgs/Res2VGG_CW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/Res2VGG_CW.png -------------------------------------------------------------------------------- /imgs/VGG2Res_CW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/VGG2Res_CW.png -------------------------------------------------------------------------------- /imgs/generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/generation.png -------------------------------------------------------------------------------- /imgs/injection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/injection.png -------------------------------------------------------------------------------- /imgs/Res2VGG_Deepfool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/Res2VGG_Deepfool.png -------------------------------------------------------------------------------- /imgs/VGG2Res_Deepfool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/VGG2Res_Deepfool.png -------------------------------------------------------------------------------- /src/attacks/__init__.py: -------------------------------------------------------------------------------- 1 | from attacks.backdoor_generator import BackdoorGenerator 2 | from attacks.Deepfool import Deepfool 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | src/img/* 3 | data/* 4 | model/* 5 | gen_img/* 6 | perturbation/* 7 | log/* 8 | .ipynb_checkpoints/* 9 | src/.ipynb_checkpoints/* 10 | src/__pycache__/* 11 | *.pyc 12 | vis/* 13 | clustering_result/* 14 | *.h5 15 | .vscode/* 16 | !.gitkeep 17 | -------------------------------------------------------------------------------- /src/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classifier API for applying all attacks. Use the :class:`.Classifier` wrapper to be able to apply an attack to a 3 | preexisting model. 4 | """ 5 | from classifiers.classifier import Classifier 6 | from classifiers.keras import KerasClassifier 7 | # from classifiers.mxnet import MXClassifier 8 | # from classifiers.pytorch import PyTorchClassifier 9 | # from classifiers.tensorflow import TFClassifier 10 | # from classifiers.ensemble import EnsembleClassifier 11 | -------------------------------------------------------------------------------- /src/poison_detection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Poison detection defence API. Use the :class:`.PoisonFilteringDefence` wrapper to be able to apply a defence for a 3 | preexisting model. 4 | """ 5 | from poison_detection.poison_filtering_defence import PoisonFilteringDefence 6 | from poison_detection.activation_defence import ActivationDefence 7 | 8 | from poison_detection.clustering_analyzer import ClusteringAnalyzer 9 | 10 | from poison_detection.ground_truth_evaluator import GroundTruthEvaluator 11 | -------------------------------------------------------------------------------- /json/cifar.json: -------------------------------------------------------------------------------- 1 | { 2 | "data_path": "", 3 | "auto_encoder_data_path": "../data/mnist.npz", 4 | "save_dir": "../model/", 5 | "auto_encoder_save_dir": "../model/", 6 | "perturbation_dir": "../perturbation/", 7 | "method": "universal", 8 | "model_prefix": "cifar", 9 | "backdoor_type": "adversarial", 10 | "pert_path": "../perturbation/cifar_universal_5to6_20210430-142209.pkl", 11 | "result_path": "../log/20191103/mnist_train_poison_rate_20191104.pkl", 12 | "train_epoch": 20, 13 | "save_interval": 5, 14 | "num_selection": 50000, 15 | "train_poison_rate": 0.3, 16 | "test_poison_rate": 0.3, 17 | "poison_label_source": 5, 18 | "poison_label_target": 6, 19 | "alpha_pert": 1, 20 | "pert_xi": 30, 21 | "num_classes": 10, 22 | "batch_size": 128, 23 | "train_image_size": 32, 24 | "model_path": "../model/cifar_clean_20210430-145041.pkl", 25 | "model_path_backdoor": "../model/cifar_20210427-181241_poison_5to6.pkl", 26 | "model_path_finetune": "../model/cifar_20210430-141935_clean.pkl", 27 | "num_gpu": 1 28 | } 29 | -------------------------------------------------------------------------------- /src/poison.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | 4 | # class for 5 | # 1. number of poison 6 | # 2. indices to be poisoned 7 | class Poison: 8 | def __init__(self, num, indices, backdoor_type, sources, targets, percent_poison): 9 | self.num_poison = num 10 | self.indices_to_be_poisoned = indices 11 | self.backdoor_type = backdoor_type 12 | self.sources = sources 13 | self.targets = targets 14 | self.percent_poison = percent_poison 15 | self.random_selection_indices = None 16 | self.shuffled_indices = None 17 | 18 | def get_num_poison(self): 19 | return self.num_poison 20 | 21 | def set_num_poison(self, num): 22 | self.num_poison = num 23 | 24 | def get_indices_to_be_poisoned(self): 25 | return self.indices_to_be_poisoned 26 | 27 | def set_indices_to_be_poisoned(self, indices): 28 | self.indices_to_be_poisoned = indices 29 | 30 | def get_backdoor_type(self): 31 | return self.backdoor_type 32 | 33 | def set_backdoor_type(self, backdoor_type): 34 | self.backdoor_type = backdoor_type 35 | 36 | def get_sources(self): 37 | return self.sources 38 | 39 | def set_sources(self, sources): 40 | self.sources = sources 41 | 42 | def get_targets(self): 43 | return self.targets 44 | 45 | def set_targets(self, targets): 46 | self.targets = targets 47 | 48 | def get_percent_poison(self): 49 | return self.percent_poison 50 | 51 | def set_percent_poison(self, percent_poison): 52 | self.percent_poison = percent_poison 53 | 54 | def get_random_selection_indices(self): 55 | return self.random_selection_indices 56 | 57 | def set_random_selection_indices(self, indices): 58 | self.random_selection_indices = indices 59 | 60 | def get_shuffled_indices(self): 61 | return self.shuffled_indices 62 | 63 | def set_shuffled_indices(self, indices): 64 | self.shuffled_indices = indices 65 | -------------------------------------------------------------------------------- /src/activations.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from poison_detection import ActivationDefence 4 | 5 | 6 | class Activations: 7 | def __init__(self, model, para): 8 | # self.poison = model.get_train_poison() 9 | # defence = ActivationDefence(model.classifier, data.x_train, data.y_train, 10 | # data_path=os.path.join(data.data_path, 'train'), batch_size=data.batch_size) 11 | # self.activations = self._get_activations(defence) 12 | self.activations = model 13 | self.para = para 14 | 15 | def _get_activations(self, defences): 16 | nb_layers = len(defences.classifier.layer_names) 17 | activations_by_layers = [] 18 | ''' 19 | for i in range(nb_layers): 20 | activations_by_layers.append( 21 | defences.classifier.get_activations(defences.x_train, layer=i, data_path=defences.data_path, 22 | batch_size=defences.batch_size)) 23 | ''' 24 | 25 | activations_by_layers.append( 26 | defences.classifier.get_activations(defences.x_train, layer=nb_layers - 2, data_path=defences.data_path, 27 | batch_size=defences.batch_size)) 28 | nb_layers = 1 29 | activations = [[] for i in range(len(defences.x_train))] 30 | for i in range(nb_layers): 31 | for j in range(len(defences.x_train)): 32 | activations[j].append(activations_by_layers[i][j]) 33 | # print(len(activations[0])) 34 | return activations 35 | 36 | def restore_data(self, data): 37 | data = data(self.para) 38 | data.load_data(); 39 | data.restore_train_backdoor(self.poison) 40 | # self.shuffle_activations(data.shuffled_indices) 41 | data.gen_test_backdoor() 42 | return data 43 | 44 | def shuffle_activations(self, shuffled_index): 45 | self.activations = [self.activations[i] for i in shuffled_index] 46 | -------------------------------------------------------------------------------- /src/data/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import abc 4 | 5 | 6 | class Data(metaclass=abc.ABCMeta): 7 | def __init__(self, param): 8 | self.param = param 9 | self.init() 10 | self.batch_size = None 11 | 12 | @abc.abstractmethod 13 | def init(self): 14 | pass 15 | 16 | @abc.abstractmethod 17 | def load_data(self, is_add_channel=False): 18 | pass 19 | 20 | @abc.abstractmethod 21 | def add_channel_axis(self): 22 | pass 23 | 24 | @abc.abstractmethod 25 | def gen_indices(self): 26 | pass 27 | 28 | @abc.abstractmethod 29 | def gen_train_data(self): 30 | pass 31 | 32 | @abc.abstractmethod 33 | def gen_train_backdoor_data(self): 34 | pass 35 | 36 | @abc.abstractmethod 37 | def gen_shuffled_indices(self): 38 | pass 39 | 40 | @abc.abstractmethod 41 | def gen_shuffle_train_data(self): 42 | pass 43 | 44 | @abc.abstractmethod 45 | def print_backdoor_info(self): 46 | pass 47 | 48 | @abc.abstractmethod 49 | def gen_train_backdoor(self): 50 | pass 51 | 52 | @abc.abstractmethod 53 | def gen_test_backdoor_data(self): 54 | pass 55 | 56 | @abc.abstractmethod 57 | def gen_test_backdoor(self): 58 | pass 59 | 60 | @abc.abstractmethod 61 | def gen_backdoor(self, model): 62 | pass 63 | 64 | @abc.abstractmethod 65 | def restore_train_backdoor_data(self, poison): 66 | pass 67 | 68 | @abc.abstractmethod 69 | def restore_train_backdoor(self, poison): 70 | pass 71 | 72 | @abc.abstractmethod 73 | def restore_test_backdoor_data(self, poison): 74 | pass 75 | 76 | @abc.abstractmethod 77 | def restore_test_backdoor(self, poison): 78 | pass 79 | 80 | @abc.abstractmethod 81 | def restore_backdoor(self, model): 82 | pass 83 | 84 | @abc.abstractmethod 85 | def visiualize_img_by_idx(self, shuffled_idx, pre_label, is_train=True): 86 | pass 87 | 88 | @abc.abstractmethod 89 | def cal_index(self, idx, is_train=True): 90 | pass 91 | -------------------------------------------------------------------------------- /src/data/DataGenerator.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import math 3 | import os 4 | 5 | import cv2 6 | import keras 7 | import numpy as np 8 | 9 | from keras.utils import to_categorical 10 | 11 | from utils import preprocess_input_vgg 12 | 13 | 14 | class DataGenerator(keras.utils.Sequence): 15 | 16 | def __init__(self, x, param, y=None, batch_size=1, shuffle=True, preprocess=preprocess_input_vgg, postfix=None): 17 | self.batch_size = batch_size 18 | self.x = x 19 | self.y = y 20 | self.indexes = np.arange(len(self.x)) 21 | self.shuffle = shuffle 22 | self.preprocess = preprocess 23 | self.batch_size = batch_size 24 | self.param = param 25 | if postfix is None: 26 | self.data_path = self.param.get_conf('data_path') 27 | else: 28 | self.data_path = os.path.join(self.param.get_conf('data_path'), postfix) 29 | self.train_size = param.get_conf('train_image_size') 30 | self.nb_class = self.param.get_conf('classes_num') 31 | 32 | def __len__(self): 33 | return math.ceil(len(self.x) / float(self.batch_size)) 34 | 35 | def __getitem__(self, index): 36 | batch_indexs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 37 | batch_datas = [self.x[k] for k in batch_indexs] 38 | y=None 39 | if self.y is not None: 40 | y = [self.y[k] for k in batch_indexs] 41 | return self.data_generation(batch_datas,y) 42 | 43 | def on_epoch_end(self): 44 | if self.shuffle == True: 45 | np.random.shuffle(self.indexes) 46 | 47 | def data_generation(self, batch_datas, y=None): 48 | images = [] 49 | labels = [] 50 | for i, data in enumerate(batch_datas): 51 | 52 | img = cv2.imread(os.path.join(self.data_path, data))[:, :, ::-1] 53 | img = cv2.resize(img, (self.train_size, self.train_size)) 54 | 55 | images.append(img) 56 | if self.y is not None: 57 | for label in y: 58 | labels.append(to_categorical(label, self.nb_class)) 59 | 60 | images = np.array(images) 61 | 62 | if self.preprocess is not None: 63 | images = self.preprocess(images) 64 | # print(images.shape) 65 | if self.y is None: 66 | return np.array(images) 67 | 68 | return np.array(images), np.array(labels) 69 | 70 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | _libgcc_mutex=0.1=conda_forge 5 | _openmp_mutex=4.5=1_gnu 6 | _tflow_select=2.1.0=gpu 7 | absl-py=0.12.0=pyhd8ed1ab_0 8 | astor=0.8.1=pyh9f0ad1d_0 9 | blas=1.0=mkl 10 | c-ares=1.17.1=h7f98852_1 11 | ca-certificates=2020.12.5=ha878542_0 12 | certifi=2020.12.5=py36h5fab9bb_1 13 | cudatoolkit=9.2=0 14 | cudnn=7.6.4=cuda9.2_0 15 | cycler=0.10.0=py_2 16 | dbus=1.13.6=h48d8840_2 17 | expat=2.3.0=h9c3ff4c_0 18 | fontconfig=2.13.1=he4413a7_1000 19 | freetype=2.10.4=h0708190_1 20 | gast=0.4.0=pyh9f0ad1d_0 21 | gettext=0.19.8.1=h0b5b191_1005 22 | glib=2.68.1=h9c3ff4c_0 23 | glib-tools=2.68.1=h9c3ff4c_0 24 | grpcio=1.37.0=py36h8e87921_0 25 | gst-plugins-base=1.14.0=hbbd80ab_1 26 | gstreamer=1.14.0=h28cd5cc_2 27 | h5py=2.7.1=py36_2 28 | hdf5=1.10.1=2 29 | icu=58.2=hf484d3e_1000 30 | imageio=2.4.1=py36_1000 31 | importlib-metadata=4.0.1=py36h5fab9bb_0 32 | intel-openmp=2021.2.0=h06a4308_610 33 | jpeg=9d=h36c2ea0_0 34 | keras=2.1.3 35 | kiwisolver=1.3.1=py36h605e78d_1 36 | lcms2=2.12=hddcbb42_0 37 | ld_impl_linux-64=2.35.1=hea4e1c9_2 38 | libffi=3.3=h58526e2_2 39 | libgcc-ng=9.3.0=h2828fa1_19 40 | libgfortran=3.0.0=1 41 | libgfortran-ng=7.5.0=h14aa051_19 42 | libgfortran4=7.5.0=h14aa051_19 43 | libglib=2.68.1=h3e27bee_0 44 | libgomp=9.3.0=h2828fa1_19 45 | libiconv=1.16=h516909a_0 46 | libpng=1.6.37=h21135ba_2 47 | libprotobuf=3.15.8=h780b84a_0 48 | libstdcxx-ng=9.3.0=h6de172a_19 49 | libtiff=4.2.0=hdc55705_1 50 | libuuid=2.32.1=h7f98852_1000 51 | libwebp-base=1.2.0=h7f98852_2 52 | libxcb=1.13=h7f98852_1003 53 | libxml2=2.9.9=h13577e0_2 54 | lz4-c=1.9.3=h9c3ff4c_0 55 | markdown=3.3.4=pyhd8ed1ab_0 56 | matplotlib=3.0.1=py36h5429711_0 57 | mkl=2018.0.3=1 58 | ncurses=6.2=h58526e2_4 59 | numpy=1.14.2=py36hdbf6ddf_0 60 | olefile=0.46=pyh9f0ad1d_1 61 | openjpeg=2.4.0=hf7af979_0 62 | openssl=1.1.1k=h7f98852_0 63 | pandas=0.24.2=py36hf484d3e_0 64 | pcre=8.44=he1b5a44_0 65 | pillow=8.1.2=py36ha6010c0_1 66 | pip=21.1=pyhd8ed1ab_0 67 | protobuf=3.15.8=py36hc4f0c31_0 68 | pthread-stubs=0.4=h36c2ea0_1001 69 | pyparsing=2.4.7=pyh9f0ad1d_0 70 | pyqt=5.9.2=py36hcca6a23_4 71 | python=3.6.13=hffdb5ce_0_cpython 72 | python-dateutil=2.8.1=py_0 73 | python_abi=3.6=1_cp36m 74 | pytz=2021.1=pyhd8ed1ab_0 75 | qt=5.9.7=h5867ecd_1 76 | readline=8.1=h46c0cb4_0 77 | scikit-learn=0.20.0=py36h4989274_1 78 | scipy=1.1.0=py36hd20e5f9_0 79 | setuptools=49.6.0=py36h5fab9bb_3 80 | sip=4.19.8=py36hf484d3e_1000 81 | six=1.15.0=pyh9f0ad1d_0 82 | sqlite=3.35.5=h74cdb3f_0 83 | tensorboard=1.10.0=py36_0 84 | tensorflow=1.10.0=py36_0 85 | tensorflow-gpu=1.10.0=hf154084_0 86 | termcolor=1.1.0=py_2 87 | tk=8.6.10=h21135ba_1 88 | tornado=6.1=py36h8f6f2f9_1 89 | tqdm=4.39.0=py_0 90 | typing_extensions=3.7.4.3=py_0 91 | werkzeug=1.0.1=pyh9f0ad1d_0 92 | wheel=0.36.2=pyhd3deb0d_0 93 | xorg-libxau=1.0.9=h7f98852_0 94 | xorg-libxdmcp=1.1.3=h7f98852_0 95 | xz=5.2.5=h516909a_1 96 | zipp=3.4.1=pyhd8ed1ab_0 97 | zlib=1.2.11=h516909a_1010 98 | zstd=1.4.9=ha95c52a_0 99 | -------------------------------------------------------------------------------- /src/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import os 4 | import sys 5 | import json 6 | import pickle 7 | import pprint 8 | import imageio 9 | import datetime 10 | import numpy as np 11 | # import pandas as pd 12 | import logging 13 | import logging.config 14 | from poison import * 15 | import cv2 16 | import matplotlib.pyplot as plt 17 | # import seaborn as sns 18 | plt.switch_backend('agg') 19 | import keras.backend as K 20 | from tqdm import * 21 | from numpy import float32 22 | from keras.models import Sequential, Model, load_model 23 | from keras.preprocessing import image 24 | from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Activation, Input, UpSampling2D, AveragePooling2D,BatchNormalization 25 | from keras.applications.vgg16 import preprocess_input, decode_predictions 26 | from keras.callbacks import Callback 27 | models_load = ['vgg16', 'nuaa'] 28 | models_noLoad = ['cifar', 'mnist', "GTSRB"] 29 | data_dir = '../data' 30 | json_dir = '../json' 31 | clutser_result = '../vis/clustering_result' 32 | tsne_result = '../vis/t_sne' 33 | MODEL_RESTORE_PATH = '../model/mnist_universal' 34 | 35 | LOGGING = { 36 | 'version': 1, 37 | 'disable_existing_loggers': False, 38 | 'formatters': { 39 | 'std': { 40 | 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s', 41 | 'datefmt': '%Y-%m-%d %H:%M' 42 | } 43 | }, 44 | 'handlers': { 45 | 'default': { 46 | 'class': 'logging.NullHandler', 47 | }, 48 | 'test': { 49 | 'class': 'logging.StreamHandler', 50 | 'formatter': 'std', 51 | 'level': logging.DEBUG 52 | } 53 | }, 54 | 'loggers': { 55 | '': { 56 | 'handlers': ['default'] 57 | }, 58 | 'testLogger': { 59 | 'handlers': ['test'], 60 | 'level': 'INFO', 61 | 'propagate': True 62 | } 63 | } 64 | } 65 | logging.config.dictConfig(LOGGING) 66 | logger = logging.getLogger(__name__) 67 | 68 | _folder = os.path.expanduser('~') 69 | if not os.access(_folder, os.W_OK): 70 | _folder = '/tmp' 71 | _folder = os.path.join(_folder, '.art') 72 | 73 | _config_path = os.path.expanduser(os.path.join(_folder, 'config.json')) 74 | if os.path.exists(_config_path): 75 | try: 76 | with open(_config_path) as f: 77 | _config = json.load(f) 78 | except ValueError: 79 | _config = {} 80 | 81 | if not os.path.exists(_folder): 82 | try: 83 | os.makedirs(_folder) 84 | except OSError: 85 | logger.warning('Unable to create folder for configuration file.', exc_info=True) 86 | 87 | if not os.path.exists(_config_path): 88 | # Generate default config 89 | _config = {'DATA_PATH': os.path.join(_folder, 'data')} 90 | 91 | try: 92 | with open(_config_path, 'w') as f: 93 | f.write(json.dumps(_config, indent=4)) 94 | except IOError: 95 | logger.warning('Unable to create configuration file', exc_info=True) 96 | 97 | if 'DATA_PATH' in _config: 98 | DATA_PATH = _config['DATA_PATH'] 99 | 100 | NUMPY_DTYPE = float32 101 | 102 | 103 | def get_date(): 104 | # '20191007' 105 | return datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 106 | 107 | 108 | def set_model_restore_path(restore_path): 109 | MODEL_RESTORE_PATH = restore_path 110 | # print(MODEL_RESTORE_PATH) -------------------------------------------------------------------------------- /src/attacks/backdoor_generator.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import abc 4 | 5 | from utils import * 6 | 7 | 8 | class BackdoorGenerator(metaclass=abc.ABCMeta): 9 | def __init__(self, model, param): 10 | self.model = model 11 | self.param = param 12 | 13 | def serialize(self, postfix='perturbation'): 14 | self.save_name = '_'.join([self.param.get_conf('model_prefix'), postfix, get_date(),]) 15 | self.save_png = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name + '.png') 16 | self.save_pkl = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name + '.pkl') 17 | # self.save_path = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name) 18 | 19 | plt.figure() 20 | perturb_squeeze = np.squeeze(self.perturb) 21 | if self.param.get_conf('model_prefix') == 'mnist': 22 | plt.imshow(perturb_squeeze, cmap='gray') 23 | else: 24 | plt.imshow(self.perturb_to_image(self.perturb)) 25 | plt.show() 26 | print('perturb_squeeze.shape = ', perturb_squeeze.shape) 27 | print('self.perturb.shape = ', self.perturb.shape) 28 | 29 | imageio.imwrite(uri=self.save_png, im=perturb_squeeze) 30 | 31 | # im_imageio = imageio.imread(uri=self.save_png) 32 | # print('im_imageio.shape = ', im_imageio.shape) 33 | 34 | with open(self.save_pkl, 'wb') as f: 35 | pickle.dump(self.perturb, f) 36 | 37 | print('save perturbation done, name = ', self.save_pkl) 38 | return self.save_pkl 39 | 40 | def predict(self, img): 41 | pred = self.model.predict_instance(img) 42 | label = np.argmax(pred[0]) 43 | 44 | print('label = ', label) 45 | print('pred = ', pred) 46 | 47 | return label, pred 48 | 49 | def serialize_img(self, img, postfix='image', is_deprocess=False): 50 | save_name = '_'.join([self.param.get_conf('model_prefix'), get_date(), postfix, get_signature()]) + '.png' 51 | save_path = os.path.join(self.param.get_conf('perturbation_dir'), save_name) 52 | 53 | if self.param.get_conf('model_prefix') in models_noLoad: 54 | # img = np.squeeze(img, axis=(2,)) 55 | img = np.squeeze(img) 56 | img = np.clip(img * 255, 0, 255) 57 | elif self.param.get_conf('model_prefix') in models_load: 58 | # img = img.flatten().reshape((224, 224, 3)) 59 | if is_deprocess: 60 | img = deprocess_vgg(img) 61 | img = np.squeeze(img) 62 | 63 | # print('img.shape = ', img.shape) 64 | print('save_name = ', save_name) 65 | 66 | # perturb_squeeze = np.squeeze(img, axis=(0,)) 67 | imageio.imsave(save_path, img) 68 | 69 | print('save img done') 70 | 71 | def deserialize(self, save_pkl): 72 | 73 | with open(save_pkl, 'rb') as f: 74 | self.perturb = pickle.load(f) 75 | 76 | # self.perturb = self.perturb.reshape(self.model.get_input_shape) 77 | 78 | print('load perturbation done') 79 | print('self.perturb.shape = ', self.perturb.shape) 80 | 81 | return self.perturb 82 | 83 | def perturb_to_image(self, x): 84 | x = x.reshape((self.param.get_conf('train_image_size'),self.param.get_conf('train_image_size'),3)) 85 | # 'BGR'->'RGB' 86 | x = x[:, :, ::-1] 87 | # normalize to [0,1] -> [0,255] 88 | x_normed = ((x - x.min()) / (x.max() - x.min())) * 255 89 | x_normed = np.clip(x_normed, 0, 255).astype('uint8') 90 | return x_normed 91 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdvDoor: Adversarial Backdoor Attack of Deep Learning System 2 | This is the repository for paper AdvDoor: Adversarial Backdoor Attack of Deep Learning System. 3 | We show the whole workflow of backdoor trigger generation, injection and detection. 4 | ## Usage 5 | ### Environments 6 | We mainly use the tensorflow-gpu==1.10, keras==2.1.3, imageio, scikit-learn, matplotlib, and numpy==1.14.2, opencv-python. The CUDA version is==9.2 and the cudnn version is 7.6.4. We can also run the command to create an environment with anaconda. 7 | 8 | ``` bash 9 | conda create --name python=3.6 10 | conda activate 11 | conda install --file requirements.txt 12 | pip install opencv-python 13 | ``` 14 | 15 | **Meanwhile, we need to ensure that the root path of project is the `src`.** 16 | 17 | ### Build Backdoor Trigger 18 | Run command to generate the backdoor trigger and train the poisoned model. 19 | 20 | ``` python test_specific_pair.py -c cifar.json -s 5 -t 6 -g ``` 21 | 22 | The generated trigger is saved at the `perturbation` dir. We should copy the file name and paste it to the item `pert_path` in `json/cifar.json`. 23 | 24 | During generation, we need a benign model, if the `model_path` item in `json/cifar.json` is not valid, we will train a new benign model. Then, we can set the `model_path` item with the path of the newly trained model. 25 | 26 | After generation, we will use it to train a backdoor model. 27 | 28 | ### Inject AdvDoor 29 | 30 | If there is a trigger already, run the following command to train a poisoned model on it. 31 | 32 | ``` python test_specific_pair.py -c cifar.json -s 5 -t 6 ``` 33 | 34 | Ensure that the `pert_path` in `json/cifar.json` is valid. 35 | 36 | During injection, we will finetune the backdoor model on a benign model, so we need to train a benign model first. If we already have a benign model, we could set the `model_path_finetune` item in `json/cifar.json` as its path. 37 | 38 | 39 | ### Evaluation 40 | We will first generate the perturbation. 41 | The fooling rate is shown below. 42 |
generation
43 | 44 | The example of the generated trigger. 45 | 46 |
generation
47 | 48 | In the following image, we can get the attack success rate, which is the 'Poisonous test set accuracy' in image. 49 | Meanwhile, we will try to detect the AdvDoor with Activation Clustering method [1](#activation_clustering). We can find that the f1-score is very low, which means that Activation Clustering can hardly find the poisoned data. 50 | 51 |
injection
52 | 53 | 54 | 55 | ### Section5.4.2 Crossing Model Attacks 56 | 57 | #### ResNet to VGGNet, TUAP-Deepfool 58 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/Res2VGG_Deepfool.png) 59 | 60 | #### ResNet to VGGNet, TUAP-C\&W 61 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/Res2VGG_CW.png) 62 | 63 | #### VGGNet to ResNet, TUAP-Deepfool 64 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/VGG2Res_Deepfool.png) 65 | 66 | #### VGGNet to ResNet, TUAP-C\&W 67 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/VGG2Res_CW.png) 68 | 69 | 70 | When adding a new dataset, pelase set up config json, dataloader and model. Besides, please change the `models_noLoad` and `models_load` item in `conf.py`. 71 | 72 | ### Reference 73 | 74 |
75 | - [1] Bryant Chen, Wilka Carvalho, Nathalie aracaldo, Heiko Ludwig, Benjamin Edwards, Taesung Lee, Ian Molloy, Biplav Srivastava: Detecting Backdoor Attacks on Deep Neural Networks by Activation Clustering. SafeAI@AAAI 2019 76 | 77 | 78 | ### Contacts 79 | Quan Zhang zhangq20@mails.tsinghua.edu.cn 80 | -------------------------------------------------------------------------------- /src/poison_detection/poison_filtering_defence.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) IBM Corporation 2018 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | # persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 11 | # Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | # SOFTWARE. 18 | from __future__ import absolute_import, division, print_function, unicode_literals 19 | 20 | import abc 21 | import sys 22 | 23 | # Ensure compatibility with Python 2 and 3 when using ABCMeta 24 | if sys.version_info >= (3, 4): 25 | ABC = abc.ABC 26 | else: 27 | ABC = abc.ABCMeta(str('ABC'), (), {}) 28 | 29 | 30 | class PoisonFilteringDefence(ABC): 31 | """ 32 | Base class for all poison filtering defences. 33 | """ 34 | defence_params = ['classifier'] 35 | 36 | def __init__(self, classifier, x_train, y_train): 37 | """ 38 | Create an :class:`.ActivationDefence` object with the provided classifier. 39 | 40 | :param classifier: model evaluated for poison 41 | :type classifier: :class:`.Classifier` 42 | :param x_train: dataset used to train the classifier. 43 | :type x_train: :class:`numpy.ndarray` 44 | :param y_train: labels used to train the classifier. 45 | :type y_train: :class:`numpy.ndarray` 46 | """ 47 | self.classifier = classifier 48 | self.x_train = x_train 49 | self.y_train = y_train 50 | 51 | @abc.abstractmethod 52 | def detect_poison(self, **kwargs): 53 | """ 54 | Detect poison. 55 | 56 | :param kwargs: Defence-specific parameters used by child classes. 57 | :type kwargs: `dict` 58 | :return: `(dict, list)` dictionary with report and list with items identified as poison 59 | """ 60 | raise NotImplementedError 61 | 62 | @abc.abstractmethod 63 | def evaluate_defence(self, is_clean, **kwargs): 64 | """ 65 | Evaluate the defence given the labels specifying if the data is poisoned or not. 66 | 67 | :param is_clean: 1-D array where is_clean[i]=1 means x_train[i] is clean and is_clean[i]=0 that it's poison. 68 | :param kwargs: Defence-specific parameters used by child classes. 69 | :type kwargs: `dict` 70 | :return: JSON object with confusion matrix 71 | """ 72 | raise NotImplementedError 73 | 74 | def set_params(self, **kwargs): 75 | """ 76 | Take in a dictionary of parameters and apply attack-specific checks before saving them as attributes. 77 | 78 | :param kwargs: a dictionary of defence-specific parameters 79 | :type kwargs: `dict` 80 | :return: `True` when parsing was successful 81 | """ 82 | for key, value in kwargs.items(): 83 | if key in self.defence_params: 84 | setattr(self, key, value) 85 | return True 86 | 87 | def get_params(self): 88 | """ 89 | Returns dictionary of parameters used to run defence. 90 | 91 | :return: `dict` 92 | """ 93 | dictionary = {} 94 | for param in self.defence_params: 95 | dictionary.update({param: getattr(self, param)}) 96 | return dictionary 97 | -------------------------------------------------------------------------------- /src/defences/spectral.py: -------------------------------------------------------------------------------- 1 | """Trains a model, saving checkpoints and tensorboard summaries along 2 | the way.""" 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import argparse 8 | import json 9 | import os 10 | import pickle 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | from tqdm import trange 15 | import keras.backend as K 16 | from utils import * 17 | 18 | 19 | def compute_corr(param): 20 | # seeding randomness 21 | with open(param.get_conf('model_path_backdoor'), 'rb') as f: 22 | model = pickle.load(f) 23 | model.set_learning_phase(0) 24 | if param.get_conf('model_prefix') == 'mnist': 25 | from data.mnist import MnistData 26 | data = MnistData(param) 27 | iteation = K.function(inputs=[model.get_classifier().get_model().input], 28 | outputs=[model.get_classifier().get_model().get_layer('dense_1').output]) 29 | elif param.get_conf('model_prefix') == 'cifar': 30 | from data.cifar10 import CifarData 31 | data = CifarData(param) 32 | iteation = K.function(inputs=[model.get_classifier().get_model().input], 33 | outputs=[model.get_classifier().get_model().get_layer('dense_2').output]) 34 | data.load_data() 35 | data.restore_backdoor(model) 36 | # Setting up the data and the model 37 | train_x, train_y, test_x, test_y, is_poison_train, is_poison_test = data.get_specific_label_data(6) 38 | target_label = param.get_conf('poison_label_target') 39 | num_poisoned_left = np.sum(is_poison_train == True) 40 | print('Num poisoned left: ', num_poisoned_left) 41 | num_training_examples = len(train_x) 42 | 43 | 44 | print('Dataset Size: ', len(data.x_train)) 45 | 46 | lbl = target_label 47 | cur_examples = num_training_examples 48 | print('Label, num ex: ', lbl, cur_examples) 49 | # cur_op = model.representation 50 | for iex in trange(cur_examples): 51 | x_batch = train_x[iex:iex + 1, :] 52 | y_batch = train_y[iex:iex + 1] 53 | 54 | batch_grads = iteation([x_batch])[0].flatten() 55 | 56 | if iex == 0: 57 | clean_cov = np.zeros(shape=(cur_examples - num_poisoned_left, len(batch_grads))) 58 | full_cov = np.zeros(shape=(cur_examples, len(batch_grads))) 59 | if iex < (cur_examples - num_poisoned_left): 60 | clean_cov[iex] = batch_grads 61 | full_cov[iex] = batch_grads 62 | 63 | # np.save(corr_dir+str(lbl)+'_full_cov.npy', full_cov) 64 | 65 | total_p = 73 66 | 67 | 68 | clean_mean = np.mean(clean_cov, axis=0, keepdims=True) 69 | full_mean = np.mean(full_cov, axis=0, keepdims=True) 70 | 71 | print('Norm of Difference in Mean: ', np.linalg.norm(clean_mean - full_mean)) 72 | clean_centered_cov = clean_cov - clean_mean 73 | s_clean = np.linalg.svd(clean_centered_cov, full_matrices=False, compute_uv=False) 74 | print('Top 7 Clean SVs: ', s_clean[0:7]) 75 | 76 | centered_cov = full_cov - full_mean 77 | u, s, v = np.linalg.svd(centered_cov, full_matrices=False) 78 | print('Top 7 Singular Values: ', s[0:7]) 79 | eigs = v[0:1] 80 | p = total_p 81 | corrs = np.matmul(eigs, np.transpose(full_cov)) # shape num_top, num_active_indices 82 | scores = np.linalg.norm(corrs, axis=0) # shape num_active_indices 83 | # np.save(os.path.join(model_dir, 'scores.npy'), scores) 84 | print('Length Scores: ', len(scores)) 85 | p_score = np.percentile(scores, p) 86 | top_scores = np.where(scores > p_score)[0] 87 | print(top_scores) 88 | 89 | num_bad_removed = np.sum(is_poison_train[top_scores]) 90 | print('Num Bad Removed: ', num_bad_removed) 91 | print('Num Good Rmoved: ', len(top_scores) - num_bad_removed) 92 | 93 | num_poisoned_after = num_poisoned_left - num_bad_removed 94 | 95 | print('Num Poisoned Left: ', num_poisoned_after) 96 | 97 | print_f1(num_bad_removed, num_poisoned_after, len(top_scores) - num_bad_removed) 98 | 99 | if os.path.exists('job_result.json'): 100 | with open('job_result.json') as result_file: 101 | result = json.load(result_file) 102 | result['num_poisoned_left'] = '{}'.format(num_poisoned_after) 103 | else: 104 | result = {'num_poisoned_left': '{}'.format(num_poisoned_after)} 105 | with open('job_result.json', 'w') as result_file: 106 | json.dump(result, result_file, sort_keys=True, indent=4) 107 | 108 | 109 | -------------------------------------------------------------------------------- /src/attacks/Deepfool.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | 4 | from attacks.backdoor_generator import BackdoorGenerator 5 | from utils import * 6 | 7 | 8 | class Deepfool(BackdoorGenerator): 9 | def __init__(self, model, param, pair=None): 10 | super(Deepfool, self).__init__(model, param) 11 | if pair is None: 12 | self.get_loss_gradient(param.get_conf('poison_label_source'), param.get_conf('poison_label_target')) 13 | else: 14 | self.get_loss_gradient(pair[0], pair[1]) 15 | 16 | def deepfool(self, image, source, target, overshoot=0.02, max_iter=150): 17 | """ 18 | :param image: Image of size HxWx3 19 | :param f: feedforward function (input: images, output: values of activation BEFORE softmax). 20 | :param grads: gradient functions with respect to input (as many gradients as classes). 21 | :param num_classes: num_classes (limits the number of classes to test against, by default = 10) 22 | :param overshoot: used as a termination criterion to prevent vanishing updates (default = 0.02). 23 | :param max_iter: maximum number of iterations for deepfool (default = 10) 24 | :return: minimal perturbation that fools the classifier, number of iterations that it required, new estimated_label and perturbed image 25 | """ 26 | 27 | input_shape = image.shape 28 | pert_image = image 29 | 30 | # iterate = K.function([self.input_tensor], [self.before_softmax_tensor]) 31 | [grads_s, grads_t, f_i, pred] = self.iterate([image]) 32 | f_i = f_i[0] 33 | pred = pred[0] 34 | 35 | # distance = max(abs(f_i[target] - f_i[source]), 10) 36 | # distance = max(abs(f_i[target] - f_i[source]), 1) 37 | 38 | # f_i = np.array(f).flatten() 39 | k_i = int(np.argmax(f_i)) 40 | 41 | w = np.zeros(input_shape) 42 | r_tot = np.zeros(input_shape) 43 | 44 | loop_i = 0 45 | pert = np.inf 46 | while (k_i != target or pred[target] < 0.8) and loop_i < max_iter: 47 | w_k = grads_t - grads_s 48 | 49 | f_k = (f_i[target] - f_i[source]) * 2 #- distance 50 | pert_k = abs(f_k) / (np.linalg.norm(w_k.flatten(), ord=2) 51 | # * 256.0 52 | ) 53 | 54 | # determine which w_k to use 55 | 56 | pert = pert_k 57 | w = w_k 58 | 59 | # compute r_i and r_tot 60 | r_i = pert * w / (np.linalg.norm(w.flatten(), ord=2)) # * 256.0) 61 | r_tot = r_tot + r_i 62 | 63 | # compute new perturbed image 64 | pert_image = np.clip(image + (1 + overshoot) * r_tot, 0, 1) 65 | r_tot = (pert_image - image) / (1 + overshoot) 66 | 67 | # pert_image = image + (1 + overshoot) * r_tot 68 | 69 | # pert_image = deprocess_vgg(pert_image).astype(np.float64) 70 | # pert_image = preprocess_input_vgg(pert_image) 71 | 72 | loop_i += 1 73 | 74 | [grads_s, grads_t, f, pred] = self.iterate([pert_image]) 75 | pred = pred[0] 76 | 77 | # compute new label 78 | f_i = np.array(f).flatten() 79 | k_i = int(np.argmax(f)) 80 | 81 | r_tot = (1 + overshoot) * r_tot 82 | 83 | return r_tot, loop_i, pert_image 84 | 85 | def get_loss_gradient(self, source, target): 86 | if self.param.get_conf('model_path') == 'origin': 87 | self.input_tensor = self.model.get_input_tensor_origin() 88 | self.before_softmax_tensor = self.model.get_before_softmax_tensor_origin() 89 | else: 90 | self.input_tensor = self.model.get_classifier().get_input_tensor() 91 | self.before_softmax_tensor = self.model.get_classifier().get_output_bef_softmax() 92 | self.output_tensor = self.model.get_classifier().get_output_tensor() 93 | 94 | self.dydx_s = K.gradients(self.before_softmax_tensor[..., source], self.input_tensor)[0] 95 | self.dydx_t = K.gradients(self.before_softmax_tensor[..., target], self.input_tensor)[0] 96 | 97 | self.iterate = K.function([self.input_tensor], 98 | [self.dydx_s, self.dydx_t, self.before_softmax_tensor, self.output_tensor]) 99 | 100 | def gen_perturbation(self, img, source=5, target=6): 101 | self.perturb, self.loop_i, self.pert_image = self.deepfool(img, source, target) 102 | self.perturb = np.squeeze(self.perturb) 103 | return self.perturb, self.loop_i, self.pert_image 104 | -------------------------------------------------------------------------------- /src/model/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from conf import * 4 | from utils import * 5 | import abc 6 | 7 | 8 | 9 | class CNNModel(metaclass=abc.ABCMeta): 10 | def __init__(self, param): 11 | # input_shape = x_train.shape[1:] 12 | self.param = param 13 | self.train_poison = None 14 | self.test_poison = None 15 | self.classifier = None 16 | def init(self, data): 17 | self.input_shape = data.x_train.shape[1:] 18 | self.min_ = data.min_ 19 | self.max_ = data.max_ 20 | 21 | def set_learning_phase(self, learning_phase): 22 | K.set_learning_phase(learning_phase) 23 | 24 | @abc.abstractmethod 25 | def init_model(self): 26 | pass 27 | 28 | def predict_acc(self, x, y, is_poison, type_str): 29 | # Evaluate the classifier on the test set 30 | self.test_preds = np.argmax(self.classifier.predict(x), axis=1) 31 | self.test_acc = np.sum(self.test_preds == np.argmax(y, axis=1)) / y.shape[0] 32 | print("\n%s accuracy: %.2f%%" % (type_str, self.test_acc * 100)) 33 | 34 | # Evaluate the classifier on poisonous data in test set 35 | # self.poison_preds = np.argmax(self.classifier.predict(x[is_poison]), axis=1) 36 | self.poison_preds = self.test_preds[is_poison] 37 | self.poison_acc = np.sum(self.poison_preds == np.argmax(y[is_poison], axis=1)) / max(is_poison.sum(),1) 38 | print("\nPoisonous %s set accuracy (i.e. effectiveness of poison): %.2f%%" % (type_str, self.poison_acc * 100)) 39 | 40 | # Evaluate the classifier on clean data 41 | # self.clean_preds = np.argmax(self.classifier.predict(x[is_poison == 0]), axis=1) 42 | self.clean_preds = self.test_preds[is_poison==0] 43 | self.clean_acc = np.sum(self.clean_preds == np.argmax(y[is_poison == 0], axis=1)) / y[is_poison == 0].shape[0] 44 | print("\nClean %s set accuracy: %.2f%%" % (type_str, self.clean_acc * 100)) 45 | 46 | # when result_dict is not empty, start record experiment results 47 | 48 | # to validate backdoor insert effectiveness 49 | # check whether the backdoor data with poison label is predicted by the model with poison label 50 | def predict(self, data): 51 | # Evaluate the classifier on the train set 52 | self.predict_acc(data.x_train, data.y_train, data.is_poison_train, 'train') 53 | 54 | 55 | # visualize predict 56 | # for i in range(3): 57 | # data.visiualize_img_by_idx(np.where(np.array(data.is_poison_train) == 1)[0][i], self.poison_preds[i]) 58 | 59 | 60 | # Evaluate the classifier on the test set 61 | self.predict_acc(data.x_test, data.y_test, data.is_poison_test, 'test') 62 | 63 | ''' 64 | # visualize predict 65 | for i in range(3): 66 | print(np.where(np.array(data.is_poison_test) == 1)[0][i]) 67 | data.visiualize_img_by_idx(np.where(np.array(data.is_poison_test) == 1)[0][i], self.poison_preds[i], False) 68 | ''' 69 | 70 | def predict_robust(self, x, y, is_poison, type_str=''): 71 | self.test_preds = np.argmax(self.classifier.predict(x), axis=1) 72 | self.test_acc = np.sum(self.test_preds == np.argmax(y, axis=1)) / y.shape[0] 73 | print("\n%s accuracy: %.2f%%" % (type_str, self.test_acc * 100)) 74 | 75 | # Evaluate the classifier on poisonous data in test set 76 | # self.poison_preds = np.argmax(self.classifier.predict(x[is_poison]), axis=1) 77 | self.poison_preds = self.test_preds[is_poison] 78 | self.poison_acc = np.sum(self.poison_preds == np.argmax(y[is_poison], axis=1)) / max(is_poison.sum(),1) 79 | print("\nPoisonous %s set accuracy (i.e. effectiveness of poison): %.2f%%" % (type_str, self.poison_acc * 100)) 80 | 81 | # Evaluate the classifier on clean data 82 | # self.clean_preds = np.argmax(self.classifier.predict(x[is_poison == 0]), axis=1) 83 | self.clean_preds = self.test_preds[is_poison==0] 84 | self.clean_acc = np.sum(self.clean_preds == np.argmax(y[is_poison == 0], axis=1)) / y[is_poison == 0].shape[0] 85 | print("\nClean %s set accuracy: %.2f%%" % (type_str, self.clean_acc * 100)) 86 | 87 | def set_param(self, param): 88 | self.classifier.param = param 89 | self.param = param 90 | 91 | def get_train_poison(self): 92 | return self.train_poison 93 | 94 | def set_train_poison(self, poison): 95 | self.train_poison = poison 96 | 97 | def get_test_poison(self): 98 | return self.test_poison 99 | 100 | def set_test_poison(self, poison): 101 | self.test_poison = poison 102 | 103 | 104 | def predict_instance(self, x): 105 | return self.classifier.predict(x)[0] 106 | 107 | def get_input_shape(self): 108 | return self.input_shape 109 | 110 | def set_input_shape(self, input_shape): 111 | self.input_shape = input_shape 112 | 113 | def get_classifier(self): 114 | return self.classifier 115 | 116 | def set_classifier(self, classifier): 117 | self.classifier = classifier 118 | 119 | def get_input_tensor(self): 120 | return self.classifier.get_input_tensor() 121 | 122 | def get_output_tensor(self): 123 | return self.classifier.get_output_tensor() 124 | 125 | @abc.abstractmethod 126 | def get_dense_tensor(self): 127 | pass 128 | 129 | -------------------------------------------------------------------------------- /src/model/cifar10.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from sys import is_finalizing 3 | from keras.preprocessing.image import ImageDataGenerator 4 | from classifiers import KerasClassifier 5 | from model.model import * 6 | 7 | class CifarModel(CNNModel): 8 | def __init__(self, param): 9 | super(CifarModel, self).__init__(param) 10 | 11 | def init(self, data): 12 | self.input_shape = data.x_train.shape[1:] 13 | self.min_ = data.min_ 14 | self.max_ = data.max_ 15 | 16 | def set_learning_phase(self, learning_phase): 17 | K.set_learning_phase(learning_phase) 18 | 19 | def init_model(self): 20 | K.set_learning_phase(1) 21 | model = Sequential() 22 | model.add( 23 | Conv2D(64, (3, 3), activation='relu', input_shape=self.input_shape, name='block1_conv1', padding='same')) 24 | model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', padding='same')) 25 | model.add(MaxPooling2D(pool_size=(2, 2), name='block1_pool1')) 26 | model.add(Dropout(0.25, name='dropout_1')) 27 | model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', padding='same')) 28 | model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2', padding='same')) 29 | model.add(MaxPooling2D(pool_size=(2, 2), name='block2_pool1')) 30 | model.add(Dropout(0.25, name='dropout_2')) 31 | model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv1', padding='same')) 32 | model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv2', padding='same')) 33 | model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv3', padding='same')) 34 | model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv4', padding='same')) 35 | model.add(MaxPooling2D(pool_size=(2, 2), name='block3_pool1')) 36 | model.add(Dropout(0.25, name='dropout_3')) 37 | model.add(Flatten(name='flatten1')) 38 | model.add(Dense(1024, activation='relu', name='dense_1')) 39 | model.add(Dropout(0.5, name='dropout_4')) 40 | model.add(Dense(1024, activation='relu', name='dense_2')) 41 | model.add(Dropout(0.5, name='dropout_5')) 42 | model.add(Dense(self.param.get_conf('num_classes'), activation=None, name='predictions')) 43 | model.add(Activation('softmax', name='softmax_output')) 44 | 45 | model.compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy']) 46 | 47 | self.classifier = KerasClassifier(clip_values=(self.min_, self.max_), model=model, param=self.param) 48 | 49 | def train(self, data, nb_epochs=None): 50 | # default 51 | # nb_epochs=20 52 | # batch_size=128 53 | self.classifier.get_model().compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy']) 54 | if nb_epochs is None: 55 | nb_epochs = self.param.get_conf('train_epoch') 56 | # if isinstance(data, Data): 57 | 58 | datagen = ImageDataGenerator( 59 | featurewise_center=False, # set input mean to 0 over the dataset 60 | samplewise_center=False, # set each sample mean to 0 61 | featurewise_std_normalization=False, # divide inputs by std of the dataset 62 | samplewise_std_normalization=False, # divide each input by its std 63 | zca_whitening=False, # apply ZCA whitening 64 | zca_epsilon=1e-06, # epsilon for ZCA whitening 65 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 66 | # randomly shift images horizontally (fraction of total width) 67 | width_shift_range=0.1, 68 | # randomly shift images vertically (fraction of total height) 69 | height_shift_range=0.1, 70 | shear_range=0., # set range for random shear 71 | zoom_range=0., # set range for random zoom 72 | channel_shift_range=0., # set range for random channel shifts 73 | # set mode for filling points outside the input boundaries 74 | fill_mode='nearest', 75 | cval=0., # value used for fill_mode = "constant" 76 | horizontal_flip=True, # randomly flip images 77 | # validation_split=0.0 78 | ) 79 | # Fit the model on the batches generated by datagen.flow(). 80 | self.classifier.get_model().fit_generator(datagen.flow(data.x_train, data.y_train, batch_size=128), 81 | epochs=nb_epochs, 82 | steps_per_epoch=data.x_train.shape[0] / 128, 83 | validation_data=(data.x_test, data.y_test), 84 | validation_steps=data.x_train.shape[0] / 128, 85 | workers=4) 86 | 87 | 88 | def predict_instance(self, x): 89 | return self.classifier.predict(x)[0] 90 | 91 | def get_input_shape(self): 92 | return self.input_shape 93 | 94 | def set_input_shape(self, input_shape): 95 | self.input_shape = input_shape 96 | 97 | def get_classifier(self): 98 | return self.classifier 99 | 100 | def set_classifier(self, classifier): 101 | self.classifier = classifier 102 | 103 | def get_input_tensor(self): 104 | return self.classifier.get_input_tensor() 105 | 106 | def get_output_tensor(self): 107 | return self.classifier.get_output_tensor() 108 | 109 | def get_output_bef_softmax(self): 110 | return self.classifier.get_output_bef_softmax() 111 | 112 | def get_dense_tensor(self): 113 | return self.classifier.get_model().get_layer('dense_2').output 114 | -------------------------------------------------------------------------------- /src/defences/activation_clustering.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from sys import stdout 4 | from analyzer import Analyzer 5 | from conf import * 6 | from poison_detection import ActivationDefence 7 | 8 | 9 | class ActivationClustering: 10 | def __init__(self, data, param, model=None, activations=None): 11 | # model is activations when is_resume is True, and is DNN model when is_resume is False 12 | if model is None and activations is None: 13 | raise ("You must supply either model or activations") 14 | self.data = data 15 | self.param = param 16 | 17 | if activations is not None: 18 | self.defence = Analyzer(activations, self.data.x_train, self.data.y_train, self.param) 19 | else: 20 | self.model = model 21 | self.defence = ActivationDefence(self.model.classifier, self.data.x_train, self.data.y_train, self.param) 22 | 23 | def size_metric(self , log_file=None): 24 | # End-to-end method: 25 | print("------------------- Results using size metric -------------------") 26 | print(self.defence.get_params()) 27 | self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA") 28 | 29 | confusion_matrix = self.defence.evaluate_defence(self.data.is_clean) 30 | print("Evaluation defence results for size-based metric: ") 31 | jsonObject = json.loads(confusion_matrix) 32 | 33 | 34 | label = 'class_{}'.format(self.param.get_conf('poison_label_target')) 35 | print(label) 36 | pprint.pprint(jsonObject[label]) 37 | 38 | self.print_f1_score(jsonObject, label) 39 | if log_file: 40 | savedStdout = sys.stdout 41 | sys.stdout = log_file 42 | for label in jsonObject: 43 | print(label) 44 | pprint.pprint(jsonObject[label]) 45 | self.print_f1_score(jsonObject, label) 46 | sys.stdout= savedStdout 47 | 48 | 49 | def size_metric_visualize(self): 50 | # Visualize clusters: 51 | print("Visualize clusters") 52 | sprites_by_class = self.defence.visualize_clusters(self.data.x_train, 'mnist_poison_demo') 53 | # Show plots for clusters of class 5 54 | n_class = self.param.get_conf('poison_label_target') 55 | try: 56 | import matplotlib.pyplot as plt 57 | plt.imshow(sprites_by_class[n_class][0]) 58 | plt.title("Class " + str(n_class) + " cluster: 0") 59 | plt.show() 60 | plt.imshow(sprites_by_class[n_class][1]) 61 | plt.title("Class " + str(n_class) + " cluster: 1") 62 | plt.show() 63 | except: 64 | print("matplotlib not installed. For this reason, cluster visualization was not displayed") 65 | 66 | def distance_metric(self): 67 | print("------------------- Results using distance metric -------------------") 68 | print(self.defence.get_params()) 69 | self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA", cluster_analysis='distance') 70 | confusion_matrix = self.defence.evaluate_defence(self.data.is_clean) 71 | print("Evaluation defence results for distance-based metric: ") 72 | jsonObject = json.loads(confusion_matrix) 73 | 74 | # when result_dict is not empty, start record experiment results 75 | 76 | 77 | for label in jsonObject: 78 | print(label) 79 | pprint.pprint(jsonObject[label]) 80 | 81 | self.print_f1_score(jsonObject, label) 82 | 83 | # Other ways to invoke the defence: 84 | self.defence.cluster_activations(n_clusters=2, ndims=10, reduce='PCA') 85 | 86 | self.defence.analyze_clusters(cluster_analysis='distance') 87 | self.defence.evaluate_defence(self.data.is_clean) 88 | 89 | self.defence.analyze_clusters(cluster_analysis='smaller') 90 | self.defence.evaluate_defence(self.data.is_clean) 91 | 92 | def relative_size_metric(self): 93 | print("------------------- Results using relative-size metric -------------------") 94 | print(self.defence.get_params()) 95 | self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA", cluster_analysis='relative-size') 96 | confusion_matrix = self.defence.evaluate_defence(self.data.is_clean) 97 | print("Evaluation defence results for relative-size metric: ") 98 | jsonObject = json.loads(confusion_matrix) 99 | 100 | # when result_dict is not empty, start record experiment results 101 | 102 | 103 | if type(self.param.get_conf('poison_label_target')) is list: 104 | for lab in self.param.get_conf('poison_label_target'): 105 | lab = 'class_{}'.format(lab) 106 | print(lab) 107 | pprint.pprint(jsonObject[lab]) 108 | self.print_f1_score(jsonObject, lab) 109 | else: 110 | label = 'class_{}'.format(self.param.get_conf('poison_label_target')) 111 | print(label) 112 | pprint.pprint(jsonObject[label]) 113 | self.print_f1_score(jsonObject, label) 114 | 115 | def print_f1_score(self, jsonObject, label): 116 | tp = jsonObject[label]['TruePositive']['numerator'] 117 | fn = jsonObject[label]['FalseNegative']['numerator'] 118 | tn = jsonObject[label]['TrueNegative']['numerator'] 119 | fp = jsonObject[label]['FalsePositive']['numerator'] 120 | 121 | if tp + fp == 0 or tp + fn == 0: 122 | print('escape the detection') 123 | return 124 | 125 | precision = float(tp) / (tp + fp) 126 | recall = float(tp) / (tp + fn) 127 | if tp==0: 128 | f1 = 0.0 129 | else: 130 | f1 = (2 * precision * recall) / (precision + recall) 131 | 132 | print('precision = ', precision) 133 | print('recall = ', recall) 134 | print('f1 = ', f1) 135 | -------------------------------------------------------------------------------- /src/attacks/universal_perturbation.py: -------------------------------------------------------------------------------- 1 | from attacks.Deepfool import Deepfool 2 | from attacks.backdoor_generator import BackdoorGenerator 3 | from conf import * 4 | from utils import preprocess_input_vgg 5 | 6 | 7 | class Universal_perturbation(BackdoorGenerator): 8 | def __init__(self, model, param): 9 | super(Universal_perturbation, self).__init__(model, param) 10 | self.deepfool = Deepfool(self.model, self.param) 11 | self.data_path = os.path.join(self.param.get_conf('data_path'), 'train') 12 | self.image_size = (self.param.get_conf('train_image_size'), self.param.get_conf('train_image_size')) 13 | 14 | def proj_lp(self, v, xi, p): 15 | 16 | # Project on the lp ball centered at 0 and of radius xi 17 | 18 | # SUPPORTS only p = 2 and p = Inf for now 19 | if p == 2: 20 | v = v * min(1, xi / np.linalg.norm(v.flatten(1))) 21 | # v = v / np.linalg.norm(v.flatten(1)) * xi 22 | elif p == np.inf: 23 | v = np.sign(v) * np.minimum(abs(v), xi) 24 | else: 25 | raise ValueError('Values of p different from 2 and Inf are currently not supported...') 26 | 27 | return v 28 | 29 | def universal_perturbation(self, dataset, source, target, delta=0.2, max_iter_uni=5, xi=15.0/255.0, p=np.inf, 30 | overshoot=0.02, max_iter_df=20): 31 | 32 | """ 33 | :param dataset: Images of size MxHxWxC (M: number of images) 34 | 35 | :param f: feedforward function (input: images, output: values of activation BEFORE softmax). 36 | 37 | :param grads: gradient functions with respect to input (as many gradients as classes). 38 | 39 | :param delta: controls the desired fooling rate (default = 80% fooling rate) 40 | 41 | :param max_iter_uni: optional other termination criterion (maximum number of iteration, default = np.inf) 42 | 43 | :param xi: controls the l_p magnitude of the perturbation (default = 10) 44 | 45 | :param p: norm to be used (FOR NOW, ONLY p = 2, and p = np.inf ARE ACCEPTED!) (default = np.inf) 46 | 47 | :param num_classes: num_classes (limits the number of classes to test against, by default = 10) 48 | 49 | :param overshoot: used as a termination criterion to prevent vanishing updates (default = 0.02). 50 | 51 | :param max_iter_df: maximum number of iterations for deepfool (default = 10) 52 | 53 | :return: the universal perturbation. 54 | """ 55 | 56 | v = 0 57 | fooling_rate = 0.0 58 | batch_size = self.param.get_conf('batch_size') 59 | if self.param.get_conf('model_prefix') in models_load: 60 | source_imgs = dataset.x_train 61 | source_imgs = [source_imgs[i] for i in np.where(dataset.y_train == source)[0]] 62 | y_train = dataset.y_train[(dataset.y_train == source)] 63 | else: 64 | y_train = dataset.y_train[(dataset.y_train.argmax(axis=1).flatten() == source)] 65 | source_imgs = dataset.x_train 66 | source_imgs = [source_imgs[i] for i in np.where(dataset.y_train.argmax(axis=1).flatten() == source)[0]] 67 | source_imgs = np.array(source_imgs) 68 | num_images = len(source_imgs) 69 | num_selection = min(num_images, 5000) 70 | 71 | print('num_selection = ', num_selection) 72 | 73 | itr = 0 74 | index = np.arange(num_selection) 75 | while fooling_rate < 1 - delta and itr < max_iter_uni: 76 | # Shuffle the dataset 77 | np.random.shuffle(index) 78 | 79 | print('Starting pass number ', itr) 80 | 81 | # Go through the data set and compute the perturbation increments sequentially 82 | for idx, k in enumerate(index): 83 | if self.param.get_conf('model_prefix') in models_load: 84 | cur_img = source_imgs[k] 85 | cur_img = cv2.imread(os.path.join(self.data_path, cur_img))[:, :, ::-1] 86 | cur_img = cv2.resize(cur_img, self.image_size) 87 | cur_img = preprocess_input_vgg(cur_img) 88 | else: 89 | cur_img = source_imgs[k:k + 1] 90 | if idx % 1000 == 999: 91 | print('>> k = ', idx, ', img_idx = ', k, ', pass #', itr) 92 | if target != int(np.argmax(np.array(self.deepfool.iterate([cur_img + v])[2]).flatten())): 93 | 94 | 95 | # Compute adversarial perturbation 96 | dr, iter, _ = self.deepfool.deepfool(cur_img + v, source=source, target=target, overshoot=overshoot, 97 | max_iter=max_iter_df) 98 | 99 | # Make sure it converged... 100 | if iter < max_iter_df - 1: 101 | v = v + dr 102 | 103 | # Project on l_p ball 104 | v = self.proj_lp(v, xi, p) 105 | 106 | itr = itr + 1 107 | # v *= 0.99 108 | est_labels_pert = np.zeros(num_selection) 109 | 110 | num_batches = np.int(np.ceil(np.float(num_selection) / np.float(batch_size))) 111 | np.random.shuffle(index) 112 | imgs_test = [source_imgs[i] for i in index[:num_selection]] 113 | for ii in range(0, num_batches): 114 | m = (ii * batch_size) 115 | M = min((ii + 1) * batch_size, num_selection) 116 | if self.param.get_conf('model_prefix') in models_load: 117 | imgs = [] 118 | for fi in imgs_test[m:M]: 119 | img = cv2.imread(os.path.join(self.data_path, fi))[:, :, ::-1] 120 | img = cv2.resize(img, self.image_size) 121 | imgs.append(img) 122 | imgs = preprocess_input_vgg(np.array(imgs)) 123 | else: 124 | imgs = np.array(imgs_test[m:M]) 125 | # imgs[:] += v 126 | imgs = np.clip(imgs[:] + v, 0, 1) 127 | est_labels_pert[m:M] = np.argmax(self.deepfool.iterate([imgs])[2], axis=1).flatten() 128 | # Compute the fooling rate 129 | if self.param.get_conf('model_prefix') in models_load: 130 | fooling_rate = float(np.sum(est_labels_pert != y_train[index[:num_selection]]) / 131 | float(num_selection)) 132 | else: 133 | fooling_rate = float(np.sum(est_labels_pert != y_train[index[:num_selection]].argmax(axis=1)) / 134 | float(num_selection)) 135 | print('FOOLING RATE = ', fooling_rate) 136 | self.perturb = v 137 | print('magnitude of pert is', np.linalg.norm(v)) 138 | return v 139 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from conf import * 4 | 5 | 6 | def print_f1(tp, fn, fp): 7 | # tp = num_bad_removed 8 | # fn = num_poisoned_after 9 | # fp = len(top_scores) - num_bad_removed 10 | precision = float(tp) / (tp + fp) 11 | recall = float(tp) / (tp + fn) 12 | f1 = (2 * precision * recall) / (precision + recall) 13 | 14 | print('precision = {:.2f}'.format(precision *100)) 15 | print('recall = {:.2f}'.format(recall*100)) 16 | print('f1-score = {:.2f}'.format(f1*100)) 17 | 18 | 19 | def normalize(x): 20 | # utility function to normalize a tensor by its L2 norm 21 | return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) 22 | 23 | 24 | def preprocess_image(img_path): 25 | img = image.load_img(img_path, target_size=(28, 28), grayscale=True) 26 | input_img_data = image.img_to_array(img) 27 | input_img_data = input_img_data.reshape(1, 28, 28, 1) 28 | 29 | input_img_data = input_img_data.astype('float32') 30 | input_img_data /= 255 31 | 32 | # input_img_data = preprocess_input(input_img_data) # final input shape = (1,224,224,3) 33 | return input_img_data 34 | 35 | 36 | def get_signature(): 37 | now = datetime.datetime.now() 38 | past = datetime.datetime(2015, 6, 6, 0, 0, 0, 0) 39 | timespan = now - past 40 | time_sig = int(timespan.total_seconds() * 1000) 41 | 42 | return str(time_sig) 43 | 44 | 45 | def serialize_img(img, param): 46 | save_name = '_'.join([param.get_conf('model_prefix'), get_date(), 'image', get_signature()]) 47 | save_path = os.path.join(param.get_conf('perturbation_dir'), save_name + '.png') 48 | save_pkl = os.path.join(param.get_conf('perturbation_dir'), save_name + '.pkl') 49 | 50 | img = img.flatten().reshape((28, 28)) 51 | print('img.shape = ', img.shape) 52 | 53 | plt.figure() 54 | plt.imshow(img, cmap='gray') 55 | plt.show() 56 | 57 | imageio.imwrite(uri=save_path, im=img) 58 | 59 | with open(save_pkl, 'wb') as f: 60 | pickle.dump(img, f) 61 | 62 | print('save img done') 63 | 64 | 65 | def deserialize_pert(save_pkl, alpha): 66 | with open(save_pkl, 'rb') as f: 67 | perturb = pickle.load(f) 68 | 69 | print('load perturbation done', save_pkl) 70 | print('self.perturb.shape = {}, magnitude of pert is {}'.format(perturb.shape, np.linalg.norm(perturb))) 71 | 72 | print('alpha = ', alpha) 73 | 74 | perturb = perturb * alpha 75 | # cilp the float part, 3.7->4, 3.1->3 76 | # perturb = (perturb*255).astype(np.int32) 77 | # perturb = perturb.astype(np.uint8) 78 | 79 | return perturb 80 | 81 | 82 | def to_categorical(labels, nb_classes=None): 83 | """ 84 | Convert an array of labels to binary class matrix. 85 | 86 | :param labels: An array of integer labels of shape `(nb_samples,)` 87 | :type labels: `np.ndarray` 88 | :param nb_classes: The number of classes (possible labels) 89 | :type nb_classes: `int` 90 | :return: A binary matrix representation of `y` in the shape `(nb_samples, nb_classes)` 91 | :rtype: `np.ndarray` 92 | """ 93 | labels = np.array(labels, dtype=np.int32) 94 | if not nb_classes: 95 | nb_classes = np.max(labels) + 1 96 | categorical = np.zeros((labels.shape[0], nb_classes), dtype=np.float32) 97 | categorical[np.arange(labels.shape[0]), np.squeeze(labels)] = 1 98 | return categorical 99 | 100 | 101 | def preprocess_mnist(x, y, nb_classes=10, clip_values=None): 102 | """Scales `x` to [0, 1] and converts `y` to class categorical confidences. 103 | 104 | :param x: Data instances. 105 | :type x: `np.ndarray` 106 | :param y: Labels. 107 | :type y: `np.ndarray` 108 | :param nb_classes: Number of classes in dataset. 109 | :type nb_classes: `int` 110 | :param clip_values: Original data range allowed value for features, either one respective scalar or one value per 111 | feature. 112 | :type clip_values: `tuple(float, float)` or `tuple(np.ndarray, np.ndarray)` 113 | :return: Rescaled values of `x`, `y` 114 | :rtype: `tuple` 115 | """ 116 | if clip_values is None: 117 | min_, max_ = np.amin(x), np.amax(x) 118 | else: 119 | min_, max_ = clip_values 120 | 121 | normalized_x = (x - min_) / (max_ - min_) 122 | categorical_y = to_categorical(y, nb_classes) 123 | 124 | return normalized_x, categorical_y 125 | 126 | 127 | def preprocess_x_mnist(x, clip_values=None): 128 | """Scales `x` to [0, 1] and converts `y` to class categorical confidences. 129 | 130 | :param x: Data instances. 131 | :type x: `np.ndarray` 132 | :param y: Labels. 133 | :type y: `np.ndarray` 134 | :param nb_classes: Number of classes in dataset. 135 | :type nb_classes: `int` 136 | :param clip_values: Original data range allowed value for features, either one respective scalar or one value per 137 | feature. 138 | :type clip_values: `tuple(float, float)` or `tuple(np.ndarray, np.ndarray)` 139 | :return: Rescaled values of `x`, `y` 140 | :rtype: `tuple` 141 | """ 142 | if len(x.shape) == 2: 143 | x = np.expand_dims(x, axis=2) 144 | if len(x.shape) == 3: 145 | x = np.expand_dims(x, axis=0) 146 | if clip_values is None: 147 | min_, max_ = np.amin(x), np.amax(x) 148 | else: 149 | min_, max_ = clip_values 150 | 151 | normalized_x = (x - min_) / (max_ - min_) 152 | 153 | return normalized_x 154 | 155 | 156 | def preprocess_input_vgg(x): 157 | if (len(x.shape) == 3): 158 | x = np.expand_dims(x, axis=0) 159 | 160 | x = x.astype(np.float64) 161 | x = preprocess_input(x) 162 | return x 163 | 164 | 165 | def deprocess_vgg(x): 166 | x = x.reshape((224, 224, 3)) 167 | # Remove zero-center by mean pixel 168 | x[:, :, 0] += 103.939 169 | x[:, :, 1] += 116.779 170 | x[:, :, 2] += 123.68 171 | # 'BGR'->'RGB' 172 | x = x[:, :, ::-1] 173 | x = np.clip(x, 0, 255).astype('uint8') 174 | return x 175 | 176 | 177 | def load_img(img_path): 178 | img = cv2.imread(img_path)[:, :, ::-1] 179 | img = cv2.resize(img, (224, 224)) 180 | img = preprocess_input_vgg(img) 181 | return img 182 | 183 | 184 | def dump_model(model, param, prefix='model_prefix'): 185 | # concat dump name 186 | serialize_name = '_'.join([param.get_conf()[prefix], get_date()]) + '.pkl' 187 | print('serialize_name = ', serialize_name) 188 | 189 | # concat dump path 190 | serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name) 191 | with open(serialize_path, 'wb') as f: 192 | pickle.dump(model, f) 193 | 194 | print('model dump success') 195 | 196 | return serialize_path 197 | 198 | 199 | def deserialize_model(path): 200 | with open(path, 'rb') as f: 201 | model = pickle.load(f) 202 | 203 | print('model load success') 204 | 205 | return model 206 | 207 | 208 | def check_dir(path): 209 | if not os.path.exists(path): 210 | os.mkdir(path) 211 | 212 | 213 | class Param: 214 | def __init__(self, json_file): 215 | self.conf = None 216 | self.json_file = json_file 217 | 218 | def load_json(self, prefix=None): 219 | if prefix: 220 | self.json_path = os.path.join(prefix, self.json_file) 221 | else: 222 | self.json_path = os.path.join(json_dir, self.json_file) 223 | 224 | with open(self.json_path, 'r') as f: 225 | self.conf = json.load(f) 226 | 227 | for key, val in self.conf.items(): 228 | print(key, ':', val) 229 | 230 | def get_conf(self, key_value=None): 231 | if key_value == None: 232 | return self.conf 233 | return self.conf[key_value] 234 | 235 | def set_conf(self, key, value): 236 | self.conf[key] = value 237 | 238 | def print_conf(self): 239 | for key, val in self.conf.items(): 240 | print(key, ':', val) 241 | -------------------------------------------------------------------------------- /src/test_specific_pair.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 4 | os.environ['KERAS_BACKEND'] = 'tensorflow' 5 | from keras.layers import serialize 6 | from attacks.universal_perturbation import Universal_perturbation 7 | from attacks.CW import CarliniWagnerL2 8 | from data.cifar10 import CifarData 9 | # from data.GTSRB import GTSRBData 10 | from defences.activation_clustering import ActivationClustering 11 | from defences.spectral import compute_corr 12 | from model.cifar10 import CifarModel 13 | # from model.GTSRBModel import GTSRBModel 14 | from visualization import * 15 | import numpy as np 16 | import glob 17 | import argparse 18 | import cv2.imread 19 | 20 | def test_deserialize_model(param, args): 21 | # the json configuration can be accessed by model.param 22 | K.set_learning_phase(1) 23 | if param.get_conf('model_prefix') == 'cifar': 24 | Data = CifarData 25 | Model = CifarModel 26 | elif param.get_conf('model_prefix') == 'GTSRB': 27 | Data = GTSRBData 28 | Model = GTSRBModel 29 | 30 | path = param.get_conf('model_path_finetune') 31 | if os.path.exists(path) and 'pkl' in path: 32 | with open(path, 'rb') as f: 33 | model = pickle.load(f) 34 | model.set_param(param) 35 | print('model load success') 36 | else: 37 | model = Model(param) 38 | data_clean = Data(param) 39 | data_clean.load_data(is_add_channel=True) 40 | model.init(data_clean) 41 | model.init_model() 42 | model.train(data_clean, nb_epochs=180) 43 | serialize_name = '_'.join( 44 | [param.get_conf('model_prefix'), get_date(), 'clean']) + '.pkl' 45 | serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name) 46 | print('serialize_name = ', serialize_name) 47 | with open(serialize_path,'wb') as f: 48 | pickle.dump(model,f) 49 | param.set_conf('model_path_finetune', serialize_name) 50 | 51 | data = Data(param) 52 | data.load_data() 53 | data.gen_backdoor(model) 54 | model.train(data) 55 | 56 | 57 | serialize_name = '_'.join( 58 | [param.get_conf('model_prefix'), get_date()]) + \ 59 | '_poison_{}to{}'.format(param.get_conf('poison_label_source'),param.get_conf('poison_label_target')) + '.pkl' 60 | print('serialize_name = ', serialize_name) 61 | serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name) 62 | with open(serialize_path,'wb') as f: 63 | pickle.dump(model,f) 64 | 65 | K.set_learning_phase(0) 66 | with open(serialize_path, 'rb') as f: 67 | model2 = pickle.load(f) 68 | 69 | model2.predict(data) 70 | ac = ActivationClustering(data, param, model2) 71 | ac.size_metric() 72 | # ac.relative_size_metric() 73 | 74 | 75 | 76 | def test_resume_model(param, args): 77 | # the json configuration can be accessed by model.param 78 | K.set_learning_phase(0) 79 | with open(param.get_conf('model_path_backdoor'), 'rb') as f: 80 | model = pickle.load(f) 81 | model.param.set_conf('pert_path', param.get_conf('pert_path')) 82 | print("model", param.get_conf('model_path_backdoor'), 'load success') 83 | 84 | if model.param.get_conf('model_prefix') == 'GTSRB': 85 | data = GTSRBData(model.param) 86 | elif model.param.get_conf('model_prefix') == 'cifar': 87 | data = CifarData(model.param) 88 | 89 | data.load_data() 90 | data.restore_backdoor(model) 91 | # data.gen_backdoor() 92 | model.param.print_conf() 93 | print('model load success') 94 | 95 | model.predict(data) 96 | if args.spectral: 97 | compute_corr(model.param, model, data) 98 | else: 99 | ac = ActivationClustering(data, model.param, model) 100 | ac.size_metric() 101 | # ac.relative_size_metric() 102 | 103 | def gen_perturbation(model, data, method, param): 104 | 105 | # serialize perturbation 106 | 107 | if method == 'universal': 108 | universal = Universal_perturbation(model, param) 109 | v = universal.universal_perturbation(data, param.get_conf('poison_label_source'), 110 | param.get_conf('poison_label_target'), xi= param.get_conf('pert_xi')/255.0) 111 | serialize_name = 'universal_{}to{}'.format(param.get_conf('poison_label_source'), 112 | param.get_conf('poison_label_target')) 113 | serialize_name = universal.serialize(serialize_name) 114 | elif method == 'CW': 115 | cw = CarliniWagnerL2(model, param) 116 | cw.attack(data,xi= param.get_conf('pert_xi') / 255.0) 117 | serialize_name = 'CW_{}to{}'.format(param.get_conf('poison_label_source'), 118 | param.get_conf('poison_label_target')) 119 | serialize_name = cw.serialize(serialize_name) 120 | # 3. test perturbation on some input cases 121 | return serialize_name 122 | 123 | 124 | 125 | 126 | def gen_rand_perturbation(param, args): 127 | model_path = param.get_conf('model_path') 128 | K.set_learning_phase(0) 129 | if param.get_conf('model_prefix') == 'GTSRB': 130 | data = GTSRBData(param) 131 | model = GTSRBModel 132 | else: 133 | data = CifarData(param) 134 | model = CifarModel 135 | data.load_data(is_add_channel=True) 136 | if os.path.exists(model_path) and 'pkl' in model_path: 137 | # 2. load model and generate perturbation 138 | serialize_path = model_path 139 | with open(serialize_path, 'rb') as f: 140 | model = pickle.load(f) 141 | else: 142 | 143 | model = model(param) 144 | model.init(data) 145 | model.init_model() 146 | model.train(data, nb_epochs=180) 147 | serialize_name = '_'.join( 148 | [param.get_conf('model_prefix'), get_date()]) + '_clean.pkl' 149 | print('serialize_name = ', serialize_name) 150 | serialize_path = os.path.join( 151 | param.get_conf('save_dir'), serialize_name) 152 | 153 | with open(serialize_path, 'wb') as f: 154 | pickle.dump(model, f) 155 | print('model dump success') 156 | # model.predict(data) 157 | print('model load success') 158 | 159 | param.set_conf('poison_label_source', args.source) 160 | param.set_conf('poison_label_target', args.target) 161 | print('source number is {}, target number is {}'.format(args.source, args.target)) 162 | method = param.get_conf('method') 163 | 164 | return gen_perturbation(model, data, method=method, param=param, ) 165 | 166 | 167 | def experiment_on_pair(args, param, pair): 168 | 169 | param.set_conf('poison_label_source', pair[0]) 170 | param.set_conf('poison_label_target', pair[1]) 171 | test_deserialize_model(param, args) 172 | 173 | K.clear_session() 174 | 175 | 176 | if __name__ == '__main__': 177 | parser = argparse.ArgumentParser(description='random pair testing') 178 | parser.add_argument('-c', '--config', default='train.json', type=str, help='config file') 179 | parser.add_argument('-g', '--gen', action='store_true', help='generate perturbations') 180 | parser.add_argument('-s','--source', type=int, default=5, help='attack spurce') 181 | parser.add_argument('-t','--target', type=int, default=6, help='attack target') 182 | parser.add_argument('-r', '--resume', action='store_true', help="resume and test trained models") 183 | parser.add_argument('-sp', '--spectral', action='store_true', help="using spectral to detect") 184 | args = parser.parse_args() 185 | json_name = args.config 186 | param = Param(json_name) 187 | param.load_json() 188 | if args.gen: 189 | pert_path = gen_rand_perturbation(param, args) 190 | param.set_conf("pert_path", pert_path) 191 | if args.resume: 192 | test_resume_model(param, args) 193 | else: 194 | experiment_on_pair(args, param, (args.source, args.target)) 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /src/poison_detection/ground_truth_evaluator.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) IBM Corporation 2018 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | # persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 11 | # Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | # SOFTWARE. 18 | from __future__ import absolute_import, division, print_function, unicode_literals 19 | 20 | import json 21 | import logging 22 | 23 | import numpy as np 24 | 25 | logger = logging.getLogger(__name__) 26 | 27 | 28 | class GroundTruthEvaluator: 29 | """ 30 | Class to evaluate the performance of the poison detection method. 31 | """ 32 | def __init__(self): 33 | """ 34 | Evaluates ground truth constructor 35 | """ 36 | 37 | def analyze_correctness(self, assigned_clean_by_class, is_clean_by_class): 38 | """ 39 | For each training sample, determine whether the activation clustering method was correct. 40 | 41 | :param assigned_clean_by_class: Result of clustering 42 | :type assigned_clean_by_class `list` 43 | :param is_clean_by_class: is clean separated by class 44 | :type is_clean_by_class `list` 45 | :return: Two variables are returned: 46 | 1) all_errors_by_class[i]: an array indicating the correctness of each assignment 47 | in the ith class. Such that: 48 | all_errors_by_class[i] = 0 if marked poison, is poison 49 | all_errors_by_class[i] = 1 if marked clean, is clean 50 | all_errors_by_class[i] = 2 if marked poison, is clean 51 | all_errors_by_class[i] = 3 marked clean, is poison 52 | 2) Json object with confusion matrix per-class 53 | """ 54 | 55 | all_errors_by_class = [] 56 | poison = 0 57 | clean = 1 58 | dic_json = {} 59 | 60 | logger.debug("Error rates per class:") 61 | for class_i, (assigned_clean, is_clean) in enumerate(zip(assigned_clean_by_class, is_clean_by_class)): 62 | errors = [] 63 | for assignment, bl in zip(assigned_clean, is_clean): 64 | bl = int(bl) 65 | # marked poison, is poison = 0 66 | # true positive 67 | if assignment == poison and bl == poison: 68 | errors.append(0) 69 | 70 | # marked clean, is clean = 1 71 | # true negative 72 | elif assignment == clean and bl == clean: 73 | errors.append(1) 74 | 75 | # marked poison, is clean = 2 76 | # false positive 77 | elif assignment == poison and bl == clean: 78 | errors.append(2) 79 | 80 | # marked clean, is poison = 3 81 | # false negative 82 | elif assignment == clean and bl == poison: 83 | errors.append(3) 84 | else: 85 | print(assignment, bl, type(assignment), type(bl),clean, poison) 86 | raise Exception('Analyze_correctness entered wrong class') 87 | 88 | errors = np.asarray(errors) 89 | logger.debug('-------------------%d---------------', class_i) 90 | key_i = "class_" + str(class_i) 91 | matrix_i = self.get_confusion_matrix(errors) 92 | dic_json.update({key_i: matrix_i}) 93 | all_errors_by_class.append(errors) 94 | 95 | all_errors_by_class = np.asarray(all_errors_by_class) 96 | conf_matrix_json = json.dumps(dic_json) 97 | 98 | return all_errors_by_class, conf_matrix_json 99 | 100 | def get_confusion_matrix(self, values): 101 | """ 102 | Computes and returns a json object that contains the confusion matrix for each class. 103 | 104 | :param values: Array indicating the correctness of each assignment in the ith class 105 | :type values `array` 106 | :return: Json object with confusion matrix per-class 107 | """ 108 | dic_class = {} 109 | true_positive = np.where(values == 0)[0].shape[0] 110 | true_negative = np.where(values == 1)[0].shape[0] 111 | false_positive = np.where(values == 2)[0].shape[0] 112 | false_negative = np.where(values == 3)[0].shape[0] 113 | 114 | tp = self.calculate_and_print(true_positive, 115 | true_positive + false_negative, 116 | "true-positive rate") 117 | tn = self.calculate_and_print(true_negative, 118 | false_positive + true_negative, 119 | "true-negative rate") 120 | fp = self.calculate_and_print(false_positive, 121 | false_positive + true_negative, 122 | "false-positive rate") 123 | fn = self.calculate_and_print(false_negative, 124 | true_positive + false_negative, 125 | "false-negative rate") 126 | 127 | dic_tp = dict(rate=round(tp, 2), numerator=true_positive, denominator=(true_positive + false_negative)) 128 | if (true_positive + false_negative) == 0: 129 | dic_tp = dict(rate='N/A', numerator=true_positive, denominator=(true_positive + false_negative)) 130 | 131 | dic_tn = dict(rate=round(tn, 2), numerator=true_negative, denominator=(false_positive + true_negative)) 132 | if (false_positive + true_negative) == 0: 133 | dic_tn = dict(rate='N/A', numerator=true_negative, denominator=(false_positive + true_negative)) 134 | 135 | dic_fp = dict(rate=round(fp, 2), numerator=false_positive, denominator=(false_positive + true_negative)) 136 | if (false_positive + true_negative) == 0: 137 | dic_fp = dict(rate='N/A', numerator=false_positive, denominator=(false_positive + true_negative)) 138 | 139 | dic_fn = dict(rate=round(fn, 2), numerator=false_negative, denominator=(true_positive + false_negative)) 140 | if (true_positive + false_negative) == 0: 141 | dic_fn = dict(rate='N/A', numerator=false_negative, denominator=(true_positive + false_negative)) 142 | 143 | dic_class.update(dict(TruePositive=dic_tp)) 144 | dic_class.update(dict(TrueNegative=dic_tn)) 145 | dic_class.update(dict(FalsePositive=dic_fp)) 146 | dic_class.update(dict(FalseNegative=dic_fn)) 147 | 148 | return dic_class 149 | 150 | @staticmethod 151 | def calculate_and_print(numerator, denominator, name): 152 | """ 153 | Computes and prints the rates based on the denominator provided. 154 | 155 | :param numerator: number used to compute the rate 156 | :type numerator `int` 157 | :param denominator: number used to compute the rate 158 | :type denominator `int` 159 | :param name: Rate name being computed e.g., false-positive rate 160 | :type name `str` 161 | :return: Computed rate 162 | """ 163 | try: 164 | res = 100 * (numerator / float(denominator)) 165 | logger.debug("%s: %d/%d=%.3g", name, numerator, denominator, res) 166 | return res 167 | except Exception: 168 | logger.debug("%s: couldn't calculate %d/%d", name, numerator, denominator) 169 | return 0 170 | -------------------------------------------------------------------------------- /src/model/cifar_res.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | '''ResNet50 model for Keras. 3 | # Reference: 4 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 5 | Adapted from code contributed by BigMoyan. 6 | ''' 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | import warnings 11 | 12 | from keras.layers import Input 13 | from keras import layers 14 | from keras.layers import Dense 15 | from keras.layers import Activation 16 | from keras.layers import Flatten 17 | from keras.layers import Conv2D 18 | from keras.layers import MaxPooling2D 19 | from keras.layers import GlobalMaxPooling2D 20 | from keras.layers import ZeroPadding2D 21 | from keras.layers import AveragePooling2D 22 | from keras.layers import GlobalAveragePooling2D 23 | from keras.layers import BatchNormalization 24 | from keras.models import Model 25 | from keras.preprocessing.image import ImageDataGenerator 26 | import keras.backend as K 27 | from keras.utils import layer_utils 28 | # from keras.utils.data_utils import get_file 29 | # from keras.applications.imagenet_utils import decode_predictions 30 | # from keras_applications.imagenet_utils import preprocess_input 31 | # from keras_applications.imagenet_utils import _obtain_input_shape 32 | # from keras.engine.topology import get_source_inputs 33 | from model.model import * 34 | from classifiers import KerasClassifier 35 | import record 36 | 37 | 38 | WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5' 39 | WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 40 | 41 | 42 | def identity_block(input_tensor, kernel_size, filters, stage, block): 43 | """The identity block is the block that has no conv layer at shortcut. 44 | # Arguments 45 | input_tensor: input tensor 46 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 47 | filters: list of integers, the filterss of 3 conv layer at main path 48 | stage: integer, current stage label, used for generating layer names 49 | block: 'a','b'..., current block label, used for generating layer names 50 | # Returns 51 | Output tensor for the block. 52 | """ 53 | filters1, filters2, filters3 = filters 54 | if K.image_data_format() == 'channels_last': 55 | bn_axis = 3 56 | else: 57 | bn_axis = 1 58 | conv_name_base = 'res' + str(stage) + block + '_branch' 59 | bn_name_base = 'bn' + str(stage) + block + '_branch' 60 | 61 | x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) 62 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 63 | x = Activation('relu')(x) 64 | 65 | x = Conv2D(filters2, kernel_size, 66 | padding='same', name=conv_name_base + '2b')(x) 67 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 68 | x = Activation('relu')(x) 69 | 70 | x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) 71 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 72 | 73 | x = layers.add([x, input_tensor]) 74 | x = Activation('relu')(x) 75 | return x 76 | 77 | 78 | def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): 79 | """conv_block is the block that has a conv layer at shortcut 80 | # Arguments 81 | input_tensor: input tensor 82 | kernel_size: defualt 3, the kernel size of middle conv layer at main path 83 | filters: list of integers, the filterss of 3 conv layer at main path 84 | stage: integer, current stage label, used for generating layer names 85 | block: 'a','b'..., current block label, used for generating layer names 86 | # Returns 87 | Output tensor for the block. 88 | Note that from stage 3, the first conv layer at main path is with strides=(2,2) 89 | And the shortcut should have strides=(2,2) as well 90 | """ 91 | filters1, filters2, filters3 = filters 92 | if K.image_data_format() == 'channels_last': 93 | bn_axis = 3 94 | else: 95 | bn_axis = 1 96 | conv_name_base = 'res' + str(stage) + block + '_branch' 97 | bn_name_base = 'bn' + str(stage) + block + '_branch' 98 | 99 | x = Conv2D(filters1, (1, 1), strides=strides, 100 | name=conv_name_base + '2a')(input_tensor) 101 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) 102 | x = Activation('relu')(x) 103 | 104 | x = Conv2D(filters2, kernel_size, padding='same', 105 | name=conv_name_base + '2b')(x) 106 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) 107 | x = Activation('relu')(x) 108 | 109 | x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) 110 | x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) 111 | 112 | shortcut = Conv2D(filters3, (1, 1), strides=strides, 113 | name=conv_name_base + '1')(input_tensor) 114 | shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) 115 | 116 | x = layers.add([x, shortcut]) 117 | x = Activation('relu')(x) 118 | return x 119 | 120 | class CifarModel(CNNModel): 121 | 122 | def __init__(self, param): 123 | super(CifarModel, self).__init__(param) 124 | 125 | 126 | def init(self, data): 127 | self.input_shape = data.x_train.shape[1:] 128 | self.min_ = data.min_ 129 | self.max_ = data.max_ 130 | 131 | def set_learning_phase(self, learning_phase): 132 | K.set_learning_phase(learning_phase) 133 | 134 | def init_model(self,include_top=False, weights=None, 135 | input_tensor=None, input_shape=None, 136 | pooling=None): 137 | """Instantiates the ResNet50 architecture. 138 | Optionally loads weights pre-trained 139 | on ImageNet. Note that when using TensorFlow, 140 | for best performance you should set 141 | `image_data_format="channels_last"` in your Keras config 142 | at ~/.keras/keras.json. 143 | The model and the weights are compatible with both 144 | TensorFlow and Theano. The data format 145 | convention used by the model is the one 146 | specified in your Keras config file. 147 | # Arguments 148 | include_top: whether to include the fully-connected 149 | layer at the top of the network. 150 | weights: one of `None` (random initialization) 151 | or "imagenet" (pre-training on ImageNet). 152 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 153 | to use as image input for the model. 154 | input_shape: optional shape tuple, only to be specified 155 | if `include_top` is False (otherwise the input shape 156 | has to be `(224, 224, 3)` (with `channels_last` data format) 157 | or `(3, 224, 244)` (with `channels_first` data format). 158 | It should have exactly 3 inputs channels, 159 | and width and height should be no smaller than 197. 160 | E.g. `(200, 200, 3)` would be one valid value. 161 | pooling: Optional pooling mode for feature extraction 162 | when `include_top` is `False`. 163 | - `None` means that the output of the model will be 164 | the 4D tensor output of the 165 | last convolutional layer. 166 | - `avg` means that global average pooling 167 | will be applied to the output of the 168 | last convolutional layer, and thus 169 | the output of the model will be a 2D tensor. 170 | - `max` means that global max pooling will 171 | be applied. 172 | classes: optional number of classes to classify images 173 | into, only to be specified if `include_top` is True, and 174 | if no `weights` argument is specified. 175 | # Returns 176 | A Keras model instance. 177 | # Raises 178 | ValueError: in case of invalid argument for `weights`, 179 | or invalid input shape. 180 | """ 181 | 182 | 183 | # Determine proper input shape 184 | # input_shape = _obtain_input_shape(self.input_shape, 185 | # default_size=224, 186 | # min_size=32, #for cifar10 compatibility; 187 | # data_format=K.image_data_format(), 188 | # require_flatten=include_top) #Look keras 2.0+ version change logs 189 | 190 | if input_tensor is None: 191 | img_input = Input(shape=self.input_shape) 192 | else: 193 | if not K.is_keras_tensor(input_tensor): 194 | img_input = Input(tensor=input_tensor, shape=input_shape) 195 | else: 196 | img_input = input_tensor 197 | if K.image_data_format() == 'channels_last': 198 | bn_axis = 3 199 | else: 200 | bn_axis = 1 201 | 202 | x = ZeroPadding2D((3, 3))(img_input) 203 | x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) 204 | x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) 205 | x = Activation('relu')(x) 206 | x = MaxPooling2D((3, 3), strides=(2, 2))(x) 207 | 208 | x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) 209 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') 210 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') 211 | 212 | x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') 213 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') 214 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') 215 | # x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') 216 | 217 | x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') 218 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') 219 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') 220 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') 221 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') 222 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') 223 | 224 | x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') 225 | x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') 226 | x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') 227 | 228 | # x = AveragePooling2D((7, 7), name='avg_pool')(x) 229 | 230 | # if include_top: 231 | x = Flatten(name='dense_2')(x) 232 | x = Dense(self.param.get_conf('num_classes'), activation=None, name='predictions')(x) 233 | x = Activation('softmax', name='softmax_output')(x) 234 | # Ensure that the model takes into account 235 | # any potential predecessors of `input_tensor`. 236 | # if input_tensor is not None: 237 | # inputs = get_source_inputs(input_tensor) 238 | # else: 239 | # inputs = img_input 240 | # Create model. 241 | model = Model(img_input, x, name='resnet50') 242 | # model.summary() 243 | # load weights 244 | # weights_path = '../model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 245 | # model.load_weights(weights_path,strict=False) 246 | 247 | if K.image_data_format() == 'channels_first': 248 | if include_top: 249 | maxpool = model.get_layer(name='avg_pool') 250 | shape = maxpool.output_shape[1:] 251 | dense = model.get_layer(name='fc1000') 252 | layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') 253 | model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) 254 | self.classifier = KerasClassifier(clip_values=(self.min_, self.max_), model=model, param=self.param) 255 | 256 | def train(self, data, nb_epochs=None): 257 | # default 258 | # nb_epochs=20 259 | # batch_size=128 260 | 261 | datagen = ImageDataGenerator( 262 | featurewise_center=False, # set input mean to 0 over the dataset 263 | samplewise_center=False, # set each sample mean to 0 264 | featurewise_std_normalization=False, # divide inputs by std of the dataset 265 | samplewise_std_normalization=False, # divide each input by its std 266 | zca_whitening=False, # apply ZCA whitening 267 | zca_epsilon=1e-06, # epsilon for ZCA whitening 268 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 269 | # randomly shift images horizontally (fraction of total width) 270 | width_shift_range=0.1, 271 | # randomly shift images vertically (fraction of total height) 272 | height_shift_range=0.1, 273 | shear_range=0., # set range for random shear 274 | zoom_range=0., # set range for random zoom 275 | channel_shift_range=0., # set range for random channel shifts 276 | # set mode for filling points outside the input boundaries 277 | fill_mode='nearest', 278 | cval=0., # value used for fill_mode = "constant" 279 | horizontal_flip=True, # randomly flip images 280 | # validation_split=0.0 281 | ) 282 | if nb_epochs is None: 283 | nb_epochs = self.param.get_conf()['train_epoch'] 284 | # Fit the model on the batches generated by datagen.flow(). 285 | self.classifier.get_model().fit_generator(datagen.flow(data.x_train, data.y_train, batch_size=128), 286 | epochs=nb_epochs, 287 | steps_per_epoch=data.x_train.shape[0] / 128, 288 | validation_data=(data.x_test, data.y_test), 289 | validation_steps=data.x_train.shape[0] / 128, 290 | workers=4) 291 | 292 | def predict_instance(self, x): 293 | return self.classifier.predict(x)[0] 294 | 295 | def get_input_shape(self): 296 | return self.input_shape 297 | 298 | def set_input_shape(self, input_shape): 299 | self.input_shape = input_shape 300 | 301 | def get_classifier(self): 302 | return self.classifier 303 | 304 | def set_classifier(self, classifier): 305 | self.classifier = classifier 306 | 307 | def get_input_tensor(self): 308 | return self.classifier.get_input_tensor() 309 | 310 | def get_output_tensor(self): 311 | return self.classifier.get_output_tensor() 312 | 313 | def get_output_bef_softmax(self): 314 | return self.classifier.get_output_bef_softmax() 315 | 316 | def get_dense_tensor(self): 317 | return self.classifier.get_model().get_layer('dense_2').output 318 | 319 | 320 | if __name__ == '__main__': 321 | model = ResNet50(include_top=True, weights='imagenet') 322 | 323 | img_path = 'elephant.jpg' 324 | img = image.load_img(img_path, target_size=(224, 224)) 325 | x = image.img_to_array(img) 326 | x = np.expand_dims(x, axis=0) 327 | x = preprocess_input(x) 328 | print('Input image shape:', x.shape) 329 | 330 | preds = model.predict(x) 331 | print('Predicted:', decode_predictions(preds)) -------------------------------------------------------------------------------- /src/data/cifar10.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from keras.datasets import cifar10 4 | 5 | from backdoor import Backdoor 6 | from data.data import Data 7 | from utils import * 8 | from visualization import visualize_img_without_backdoor, visualize_img_with_backdoor 9 | 10 | 11 | class CifarData(Data): 12 | 13 | def __init__(self, param): 14 | super(CifarData, self).__init__(param) 15 | 16 | def init(self): 17 | self.x_train_ordered = None 18 | self.y_train_ordered = None 19 | self.x_test = None 20 | self.y_test = None 21 | self.min_ = None 22 | self.max_ = None 23 | self.random_selection_indices = None 24 | self.train_poisoned_index = None 25 | self.test_poisoned_index = None 26 | self.data_path = self.param.get_conf('data_path') 27 | self.backdoor = Backdoor(self.param.get_conf()) 28 | if type(self.param.get_conf('poison_label_source')) is list: 29 | self.source_num = self.param.get_conf('poison_label_source') 30 | else: 31 | self.source_num = np.array([int(self.param.get_conf('poison_label_source'))]) 32 | if type(self.param.get_conf('poison_label_target')) is list: 33 | self.target_num = self.param.get_conf('poison_label_target') 34 | 35 | else: 36 | self.target_num = np.array([int(self.param.get_conf('poison_label_target'))]) 37 | 38 | def load_data(self, is_add_channel=False): 39 | # print('self.data_path = ', self.data_path) 40 | 41 | (self.x_train_ordered, self.y_train_ordered), (self.x_test, self.y_test) = cifar10.load_data() 42 | self.y_train_ordered = self.y_train_ordered.squeeze() 43 | self.y_test = self.y_test.squeeze() 44 | # serialize_img(self.x_test[0], self.param) 45 | 46 | # Add channel axis 47 | self.min_, self.max_ = 0, 255 48 | 49 | self.n_train = np.shape(self.x_train_ordered)[0] 50 | 51 | if is_add_channel: 52 | self.gen_indices() 53 | self.shuffled_indices = np.arange(min(len(self.x_train_ordered), self.param.get_conf('num_selection'))) 54 | self.gen_train_data() # train_data got 55 | self.add_channel_axis() 56 | 57 | print('after reading data') 58 | print('x_train.shape = ', self.x_train_ordered.shape) 59 | print('y_train.shape = ', self.y_train_ordered.shape) 60 | print('x_test.shape = ', self.x_test.shape) 61 | print('y_test.shape = ', self.y_test.shape) 62 | 63 | def add_channel_axis(self): 64 | self.is_poison_train_ordered = np.zeros_like(self.y_train_ordered) == 1 65 | self.is_poison_test = np.zeros_like(self.y_test) == 1 66 | self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_train_ordered, 67 | self.y_train_ordered) 68 | self.x_test, self.y_test = preprocess_mnist(self.x_test, self.y_test) 69 | 70 | def gen_indices(self): 71 | # self.param.get_conf('num_selection') means number of input case selected 72 | # self.n_train means total number of input case 73 | # self.random_selection_indices = np.random.choice(self.n_train, self.param.get_conf('num_selection')) 74 | self.random_selection_indices = np.arange(self.n_train) 75 | np.random.shuffle(self.random_selection_indices) 76 | self.random_selection_indices = self.random_selection_indices[:self.param.get_conf('num_selection')] 77 | 78 | def gen_train_data(self): 79 | # data.n_train = 60000 80 | # param.get_conf('num_selection') = 5000 81 | # random_selection_indices = np.random.choice(self.n_train, self.param.get_conf('num_selection')) 82 | 83 | # update random train data 84 | self.x_train_ordered = self.x_train_ordered[self.random_selection_indices] 85 | self.y_train_ordered = self.y_train_ordered[self.random_selection_indices] 86 | 87 | def gen_train_backdoor_data(self): 88 | # start creating backdoor data 89 | # the backdoor method can be changed 90 | 91 | self.is_poison_train_ordered, \ 92 | self.x_poisoned_raw, \ 93 | self.y_poisoned_raw = self.backdoor.generate_backdoor(self.x_train_ordered, 94 | self.y_train_ordered, 95 | self.backdoor.train_poison_rate, 96 | sources=self.source_num, 97 | targets=self.target_num) 98 | 99 | self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_poisoned_raw, self.y_poisoned_raw) 100 | 101 | # Add channel axis: 102 | # self.x_train_ordered = np.expand_dims(self.x_train_ordered, axis=3) 103 | 104 | def gen_shuffled_indices(self): 105 | # Shuffle training data so poison is not together 106 | n_train = np.shape(self.y_train_ordered)[0] 107 | self.shuffled_indices = np.arange(n_train) 108 | np.random.shuffle(self.shuffled_indices) 109 | 110 | def gen_shuffle_train_data(self): 111 | 112 | # self.x_train_ordered = self.x_train_ordered[self.shuffled_indices] 113 | # self.y_train_ordered = self.y_train_ordered[self.shuffled_indices] 114 | # self.is_poison_train_ordered = self.is_poison_train_ordered[self.shuffled_indices] 115 | 116 | self.is_clean_ordered = (self.is_poison_train_ordered == 0) 117 | 118 | def print_backdoor_info(self,info): 119 | print('after',info,'backdoor') 120 | print('x_train.shape = ', self.x_train_ordered.shape) 121 | print('y_train.shape = ', self.y_train_ordered.shape) 122 | print('x_poisoned_raw.shape = ', self.x_poisoned_raw.shape) 123 | print('y_poisoned_raw.shape = ', self.y_poisoned_raw.shape) 124 | 125 | ''' 126 | after generating backdoor 127 | x_train.shape = (5000, 28, 28) 128 | y_train.shape = (5000,) 129 | x_poisoned_raw.shape = (5209, 28, 28) 130 | y_poisoned_raw.shape = (5209,) 131 | 132 | 5000 -> 5209 133 | 134 | increasing number depends on the number of cases of sources 135 | generate extra test case from sources to targets 136 | ''' 137 | 138 | def gen_train_backdoor(self): 139 | self.gen_indices() 140 | self.gen_train_data() # train_data got 141 | self.gen_train_backdoor_data() 142 | 143 | self.gen_shuffled_indices() 144 | self.gen_shuffle_train_data() 145 | # print("self.shuffled_indices = ", self.shuffled_indices) 146 | 147 | # test data 148 | def gen_test_backdoor_data(self): 149 | # Poison test data 150 | self.is_poison_test, \ 151 | self.x_poisoned_raw_test, \ 152 | self.y_poisoned_raw_test = self.backdoor.generate_backdoor(self.x_test, 153 | self.y_test, 154 | self.backdoor.test_poison_rate, 155 | sources=self.source_num, 156 | targets=self.target_num) 157 | 158 | self.x_test, self.y_test = preprocess_mnist(self.x_poisoned_raw_test, self.y_poisoned_raw_test) 159 | # Add channel axis: 160 | # self.x_test = np.expand_dims(self.x_test, axis=3) 161 | 162 | def gen_test_backdoor(self): 163 | self.gen_test_backdoor_data() 164 | 165 | self.print_backdoor_info("generate") 166 | 167 | def gen_backdoor(self, model=None): 168 | # self.gen_indices() 169 | 170 | self.gen_train_backdoor() 171 | # 1. input case index 172 | # 2. train poison meta data 173 | # should be stored in model 174 | self.backdoor.get_poison().set_random_selection_indices(self.random_selection_indices) 175 | self.backdoor.get_poison().set_shuffled_indices(self.shuffled_indices) 176 | self.train_poisoned_index = self.backdoor.get_poison().get_indices_to_be_poisoned() 177 | if model: 178 | model.set_train_poison(self.backdoor.get_poison()) 179 | 180 | self.gen_test_backdoor() 181 | 182 | # test poison meta data 183 | self.test_poisoned_index = self.backdoor.get_poison().get_indices_to_be_poisoned() 184 | if model: 185 | model.set_test_poison(self.backdoor.get_poison()) 186 | 187 | # restore train data 188 | def restore_train_backdoor_data(self, poison): 189 | self.is_poison_train_ordered, \ 190 | self.x_poisoned_raw, \ 191 | self.y_poisoned_raw = self.backdoor.restore_backdoor(self.x_train_ordered, 192 | self.y_train_ordered, 193 | poison) 194 | # Add channel axis: 195 | # self.x_poisoned_raw = np.expand_dims(self.x_poisoned_raw, axis=3) 196 | self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_poisoned_raw, self.y_poisoned_raw) 197 | 198 | def restore_train_backdoor(self, poison): 199 | self.random_selection_indices = poison.get_random_selection_indices() 200 | self.shuffled_indices = poison.get_shuffled_indices() 201 | self.train_poisoned_index = poison.get_indices_to_be_poisoned() 202 | self.gen_train_data() 203 | self.restore_train_backdoor_data(poison) 204 | 205 | self.gen_shuffle_train_data() 206 | 207 | def restore_test_backdoor_data(self, poison): 208 | # Poison test data 209 | self.is_poison_test, \ 210 | self.x_poisoned_raw_test, \ 211 | self.y_poisoned_raw_test = self.backdoor.restore_backdoor(self.x_test, 212 | self.y_test, 213 | poison) 214 | 215 | self.x_test, self.y_test = preprocess_mnist(self.x_poisoned_raw_test, self.y_poisoned_raw_test) 216 | 217 | # Add channel axis: 218 | # self.x_test = np.expand_dims(self.x_test, axis=3) 219 | 220 | @property 221 | def x_train(self): 222 | return self.x_train_ordered[self.shuffled_indices] 223 | 224 | @property 225 | def y_train(self): 226 | return self.y_train_ordered[self.shuffled_indices] 227 | 228 | @property 229 | def is_poison_train(self): 230 | return self.is_poison_train_ordered[self.shuffled_indices] 231 | 232 | @property 233 | def is_clean(self): 234 | return self.is_clean_ordered[self.shuffled_indices] 235 | 236 | def restore_test_backdoor(self, poison): 237 | self.test_poisoned_index = poison.get_indices_to_be_poisoned() 238 | self.restore_test_backdoor_data(poison) 239 | self.print_backdoor_info("restore") 240 | 241 | 242 | def restore_backdoor(self, model): 243 | self.restore_train_backdoor(model.get_train_poison()) 244 | self.restore_test_backdoor(model.get_test_poison()) 245 | 246 | def get_backdoor(self): 247 | return self.backdoor 248 | 249 | def set_backdoor(self, backdoor): 250 | self.backdoor = backdoor 251 | 252 | def visiualize_img_by_idx(self, shuffled_idx, pre_label, is_train=True): 253 | 254 | if is_train: 255 | idx = self.shuffled_indices[shuffled_idx] 256 | if self.is_poison_train_ordered[idx]: 257 | # print("idx of poison in train set", self.train_poisoned_index[self.cal_index(idx)], self.cal_index(idx)) 258 | visualize_img_with_backdoor( 259 | self.x_poisoned_raw[self.train_poisoned_index[self.cal_index(idx)]], 260 | self.y_train_ordered[self.train_poisoned_index[self.cal_index(idx)]].argmax(), 261 | pre_label, 262 | self.x_poisoned_raw[idx], 263 | np.argmax(self.y_train_ordered[idx]) 264 | ) 265 | else: 266 | visualize_img_without_backdoor( 267 | self.x_poisoned_raw[idx], 268 | self.y_train_ordered[idx].argmax(), 269 | pre_label, 270 | None) 271 | else: 272 | idx = shuffled_idx 273 | if self.is_poison_test[idx]: 274 | # print("idx of poison in test set", self.test_poisoned_index[self.cal_index(idx, False)], 275 | # self.cal_index(idx, False)) 276 | visualize_img_with_backdoor( 277 | self.x_poisoned_raw_test[self.test_poisoned_index[self.cal_index(idx, False)]], 278 | self.y_test[self.test_poisoned_index[self.cal_index(idx, False)]].argmax(), 279 | pre_label, 280 | self.x_poisoned_raw_test[idx], 281 | np.argmax(self.y_test[idx]), 282 | "Test") 283 | 284 | else: 285 | visualize_img_without_backdoor(self.x_poisoned_raw_test[idx], self.y_test[idx].argmax(), pre_label, 286 | "Test" ) 287 | 288 | def cal_index(self, idx, is_train=True): 289 | if is_train: 290 | return idx - len(self.random_selection_indices) 291 | else: 292 | return idx - len(self.y_test) + len(self.test_poisoned_index) 293 | 294 | def get_clean_data(self): 295 | if not hasattr(self, "is_poison_train"): 296 | return self.x_train, self.y_train, self.x_test, self.y_test 297 | return self.x_train[self.is_poison_train == 0], \ 298 | self.y_train[self.is_poison_train == 0], \ 299 | self.x_test[self.is_poison_test == 0], \ 300 | self.y_test[self.is_poison_test == 0] 301 | 302 | def get_poison_data(self): 303 | return self.x_train[self.is_poison_train == 1], \ 304 | self.y_train[self.is_poison_train == 1], \ 305 | self.x_test[self.is_poison_test == 1], \ 306 | self.y_test[self.is_poison_test == 1] 307 | 308 | def get_specific_label_clean_data(self, label): 309 | x_train, y_train, x_test, y_test = self.get_clean_data() 310 | y_train_label = np.argmax(y_train, axis=1) 311 | y_test_label = np.argmax(y_test, axis=1) 312 | return x_train[y_train_label == label], \ 313 | y_train[y_train_label == label], \ 314 | x_test[y_test_label == label], \ 315 | y_test[y_test_label == label] 316 | 317 | def get_specific_label_poison_data(self, label): 318 | x_train, y_train, x_test, y_test = self.get_poison_data() 319 | y_train_label = np.argmax(y_train, axis=1) 320 | y_test_label = np.argmax(y_test, axis=1) 321 | return x_train[y_train_label == label], \ 322 | y_train[y_train_label == label], \ 323 | x_test[y_test_label == label], \ 324 | y_test[y_test_label == label] 325 | 326 | def get_specific_label_data(self, label): 327 | y_train = self.y_train.argmax(axis=1) 328 | y_test = self.y_test.argmax(axis=1) 329 | return self.x_train[y_train == label], \ 330 | self.y_train[y_train == label], \ 331 | self.x_test[y_test == label], \ 332 | self.y_test[y_test == label], \ 333 | self.is_poison_train[y_train == label], \ 334 | self.is_poison_test[y_test == label] 335 | 336 | 337 | if __name__ == '__main__': 338 | json_name = sys.argv[1] 339 | param = Param(json_name) 340 | param.load_json() 341 | data = Data(param) 342 | data.load_data() 343 | data.gen_backdoor() 344 | -------------------------------------------------------------------------------- /src/classifiers/classifier.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) IBM Corporation 2018 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | # persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 11 | # Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | # SOFTWARE. 18 | from __future__ import absolute_import, division, print_function, unicode_literals 19 | 20 | import abc 21 | import os 22 | import sys 23 | 24 | sys.path.append(os.path.dirname(__file__) + os.sep + '../') 25 | 26 | import numpy as np 27 | 28 | # Ensure compatibility with Python 2 and 3 when using ABCMeta 29 | if sys.version_info >= (3, 4): 30 | ABC = abc.ABC 31 | else: 32 | ABC = abc.ABCMeta(str('ABC'), (), {}) 33 | 34 | 35 | class Classifier(ABC): 36 | """ 37 | Base class for all classifiers. 38 | """ 39 | 40 | def __init__(self, channel_index, clip_values=None, defences=None, preprocessing=(0, 1), param=None): 41 | """ 42 | Initialize a `Classifier` object. 43 | 44 | :param channel_index: Index of the axis in data containing the color channels or features. 45 | :type channel_index: `int` 46 | :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and 47 | maximum values allowed for features. If floats are provided, these will be used as the range of all 48 | features. If arrays are provided, each value will be considered the bound for a feature, thus 49 | the shape of clip values needs to match the total number of features. 50 | :type clip_values: `tuple` 51 | :param defences: Defence(s) to be activated with the classifier. 52 | :type defences: :class:`.Preprocessor` or `list(Preprocessor)` instances 53 | :param preprocessing: Tuple of the form `(substractor, divider)` of floats or `np.ndarray` of values to be 54 | used for data preprocessing. The first value will be substracted from the input. The input will then 55 | be divided by the second one. 56 | :type preprocessing: `tuple` 57 | """ 58 | self.param = param 59 | if clip_values is not None: 60 | if len(clip_values) != 2: 61 | raise ValueError('`clip_values` should be a tuple of 2 floats or arrays containing the allowed' 62 | 'data range.') 63 | if np.array(clip_values[0] >= clip_values[1]).any(): 64 | raise ValueError('Invalid `clip_values`: min >= max.') 65 | self._clip_values = clip_values 66 | 67 | self._channel_index = channel_index 68 | 69 | self.defences = defences 70 | 71 | if len(preprocessing) != 2: 72 | raise ValueError('`preprocessing` should be a tuple of 2 floats with the substract and divide values for' 73 | 'the model inputs.') 74 | self.preprocessing = preprocessing 75 | 76 | @abc.abstractmethod 77 | def predict(self, x, logits=False, batch_size=128): 78 | """ 79 | Perform prediction for a batch of inputs. 80 | 81 | :param x: Test set. 82 | :type x: `np.ndarray` 83 | :param logits: `True` if the prediction should be done at the logits layer. 84 | :type logits: `bool` 85 | :param batch_size: Size of batches. 86 | :type batch_size: `int` 87 | :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`. 88 | :rtype: `np.ndarray` 89 | """ 90 | raise NotImplementedError 91 | 92 | @abc.abstractmethod 93 | def fit(self, x, y, batch_size=128, nb_epochs=20, **kwargs): 94 | """ 95 | Fit the classifier on the training set `(x, y)`. 96 | 97 | :param x: Training data. 98 | :type x: `np.ndarray` 99 | :param y: Labels, one-vs-rest encoding. 100 | :type y: `np.ndarray` 101 | :param batch_size: Size of batches. 102 | :type batch_size: `int` 103 | :param nb_epochs: Number of epochs to use for training. 104 | :type nb_epochs: `int` 105 | :param kwargs: Dictionary of framework-specific arguments. 106 | :type kwargs: `dict` 107 | :return: `None` 108 | """ 109 | raise NotImplementedError 110 | 111 | def fit_generator(self, generator, nb_epochs=20, **kwargs): 112 | """ 113 | Fit the classifier using the generator `gen` that yields batches as specified. Framework implementations can 114 | provide framework-specific versions of this function to speed-up computation. 115 | 116 | :param generator: Batch generator providing `(x, y)` for each epoch. 117 | :type generator: :class:`.DataGenerator` 118 | :param nb_epochs: Number of epochs to use for training. 119 | :type nb_epochs: `int` 120 | :param kwargs: Dictionary of framework-specific arguments. 121 | :type kwargs: `dict` 122 | :return: `None` 123 | """ 124 | from ..data_generators import DataGenerator 125 | 126 | if not isinstance(generator, DataGenerator): 127 | raise ValueError('Expected instance of `DataGenerator` for `fit_generator`, got %s instead.' 128 | % str(type(generator))) 129 | 130 | for _ in range(nb_epochs): 131 | x, y = generator.get_batch() 132 | 133 | # Apply preprocessing and defences 134 | x = self._apply_processing(x) 135 | x, y = self._apply_defences(x, y, fit=True) 136 | 137 | # Fit for current batch 138 | self.fit(x, y, nb_epochs=1, batch_size=len(x), **kwargs) 139 | 140 | @property 141 | def nb_classes(self): 142 | """ 143 | Return the number of output classes. 144 | 145 | :return: Number of classes in the data. 146 | :rtype: `int` 147 | """ 148 | return self._nb_classes 149 | 150 | @property 151 | def input_shape(self): 152 | """ 153 | Return the shape of one input. 154 | 155 | :return: Shape of one input for the classifier. 156 | :rtype: `tuple` 157 | """ 158 | return self._input_shape 159 | 160 | @property 161 | def clip_values(self): 162 | """ 163 | :return: Tuple of the form `(min, max)` representing the minimum and maximum values allowed for features. 164 | :rtype: `tuple` 165 | """ 166 | return self._clip_values 167 | 168 | @property 169 | def channel_index(self): 170 | """ 171 | :return: Index of the axis in data containing the color channels or features. 172 | :rtype: `int` 173 | """ 174 | return self._channel_index 175 | 176 | @property 177 | def learning_phase(self): 178 | """ 179 | Return the learning phase set by the user for the current classifier. Possible values are `True` for training, 180 | `False` for prediction and `None` if it has not been set through the library. In the latter case, the library 181 | does not do any explicit learning phase manipulation and the current value of the backend framework is used. 182 | If a value has been set by the user for this property, it will impact all following computations for 183 | model fitting, prediction and gradients. 184 | 185 | :return: Value of the learning phase. 186 | :rtype: `bool` or `None` 187 | """ 188 | return self._learning_phase if hasattr(self, '_learning_phase') else None 189 | 190 | @abc.abstractmethod 191 | def class_gradient(self, x, label=None, logits=False): 192 | """ 193 | Compute per-class derivatives w.r.t. `x`. 194 | 195 | :param x: Sample input with shape as expected by the model. 196 | :type x: `np.ndarray` 197 | :param label: Index of a specific per-class derivative. If an integer is provided, the gradient of that class 198 | output is computed for all samples. If multiple values as provided, the first dimension should 199 | match the batch size of `x`, and each value will be used as target for its corresponding sample in 200 | `x`. If `None`, then gradients for all classes will be computed for each sample. 201 | :type label: `int` or `list` 202 | :param logits: `True` if the prediction should be done at the logits layer. 203 | :type logits: `bool` 204 | :return: Array of gradients of input features w.r.t. each class in the form 205 | `(batch_size, nb_classes, input_shape)` when computing for all classes, otherwise shape becomes 206 | `(batch_size, 1, input_shape)` when `label` parameter is specified. 207 | :rtype: `np.ndarray` 208 | """ 209 | raise NotImplementedError 210 | 211 | @abc.abstractmethod 212 | def loss_gradient(self, x, y): 213 | """ 214 | Compute the gradient of the loss function w.r.t. `x`. 215 | 216 | :param x: Sample input with shape as expected by the model. 217 | :type x: `np.ndarray` 218 | :param y: Correct labels, one-vs-rest encoding. 219 | :type y: `np.ndarray` 220 | :return: Array of gradients of the same shape as `x`. 221 | :rtype: `np.ndarray` 222 | """ 223 | raise NotImplementedError 224 | 225 | @property 226 | def layer_names(self): 227 | """ 228 | Return the hidden layers in the model, if applicable. 229 | 230 | :return: The hidden layers in the model, input and output layers excluded. 231 | :rtype: `list` 232 | 233 | .. warning:: `layer_names` tries to infer the internal structure of the model. 234 | This feature comes with no guarantees on the correctness of the result. 235 | The intended order of the layers tries to match their order in the model, but this is not 236 | guaranteed either. 237 | """ 238 | raise NotImplementedError 239 | 240 | @abc.abstractmethod 241 | def get_activations(self, x, layer, batch_size): 242 | """ 243 | Return the output of the specified layer for input `x`. `layer` is specified by layer index (between 0 and 244 | `nb_layers - 1`) or by name. The number of layers can be determined by counting the results returned by 245 | calling `layer_names`. 246 | 247 | :param x: Input for computing the activations. 248 | :type x: `np.ndarray` 249 | :param layer: Layer for computing the activations 250 | :type layer: `int` or `str` 251 | :param batch_size: Size of batches. 252 | :type batch_size: `int` 253 | :return: The output of `layer`, where the first dimension is the batch size corresponding to `x`. 254 | :rtype: `np.ndarray` 255 | """ 256 | raise NotImplementedError 257 | 258 | @abc.abstractmethod 259 | def set_learning_phase(self, train): 260 | """ 261 | Set the learning phase for the backend framework. 262 | 263 | :param train: True to set the learning phase to training, False to set it to prediction. 264 | :type train: `bool` 265 | """ 266 | raise NotImplementedError 267 | 268 | @abc.abstractmethod 269 | def save(self, filename, path=None): 270 | """ 271 | Save a model to file in the format specific to the backend framework. 272 | 273 | :param filename: Name of the file where to store the model. 274 | :type filename: `str` 275 | :param path: Path of the folder where to store the model. If no path is specified, the model will be stored in 276 | the default data location of the library `DATA_PATH`. 277 | :type path: `str` 278 | :return: None 279 | """ 280 | raise NotImplementedError 281 | 282 | def _apply_defences(self, x, y, fit=False): 283 | """ 284 | Apply the defences specified for the classifier in inputs `(x, y)`. 285 | 286 | :param x: Input data, where first dimension is the batch size. 287 | :type x: `np.ndarray` 288 | :param y: Labels for input data, where first dimension is the batch size. 289 | :type y: `np.ndarray` 290 | :param fit: `True` if the defences are applied during training. 291 | :return: Value of the data after applying the defences. 292 | :rtype: `np.ndarray` 293 | """ 294 | if self.defences is not None: 295 | for defence in self.defences: 296 | if fit: 297 | if defence.apply_fit: 298 | x, y = defence(x, y) 299 | else: 300 | if defence.apply_predict: 301 | x, y = defence(x, y) 302 | 303 | return x, y 304 | 305 | def _apply_defences_gradient(self, x, grad, fit=False): 306 | """ 307 | Apply the backward pass through the preprocessing defences. 308 | 309 | :param x: Input data for which the gradient is estimated. First dimension is the batch size. 310 | :type x: `np.ndarray` 311 | :param grad: Gradient value so far. 312 | :type grad: `np.ndarray` 313 | :param fit: `True` if the gradient is computed during training. 314 | :return: Value of the gradient. 315 | :rtype: `np.ndarray` 316 | """ 317 | if self.defences is not None: 318 | for defence in self.defences[::-1]: 319 | if fit: 320 | if defence.apply_fit: 321 | grad = defence.estimate_gradient(x, grad) 322 | else: 323 | if defence.apply_predict: 324 | grad = defence.estimate_gradient(x, grad) 325 | 326 | return grad 327 | 328 | def _apply_processing(self, x): 329 | """ 330 | Apply the data preprocessing / normalization steps specified for the classifier on `x`. 331 | 332 | :param x: Input data, where first dimension is the batch size. 333 | :type x: `np.ndarray` 334 | :return: Value of the preprocessed data. 335 | :rtype: `np.ndarray` 336 | """ 337 | sub, div = self.preprocessing 338 | sub = np.asarray(sub, dtype=x.dtype) 339 | div = np.asarray(div, dtype=x.dtype) 340 | 341 | res = x - sub 342 | res = res / div 343 | 344 | return res 345 | 346 | def _apply_processing_gradient(self, grad): 347 | """ 348 | Apply the backward pass through the data preprocessing / normalization steps. 349 | 350 | :param grad: Gradient value so far. 351 | :type grad: `np.ndarray` 352 | :return: Value of the gradient. 353 | :rtype: `np.ndarray` 354 | """ 355 | _, div = self.preprocessing 356 | div = np.asarray(div, dtype=grad.dtype) 357 | res = grad / div 358 | return res 359 | 360 | def __repr__(self): 361 | repr_ = "%s(channel_index=%r, clip_values=%r, defences=%r, preprocessing=%r)" \ 362 | % (self.__module__ + '.' + self.__class__.__name__, 363 | self.channel_index, self.clip_values, self.defences, self.preprocessing) 364 | 365 | return repr_ 366 | -------------------------------------------------------------------------------- /src/visualization.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) IBM Corporation 2018 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | # persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 11 | # Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | # SOFTWARE. 18 | """ 19 | Module providing visualization functions. 20 | """ 21 | from __future__ import absolute_import, division, print_function, unicode_literals 22 | 23 | import logging 24 | import os.path 25 | 26 | from sklearn.manifold import TSNE 27 | from sklearn.datasets import load_iris, load_digits 28 | from sklearn.decomposition import PCA 29 | import matplotlib.pyplot as plt 30 | import numpy as np 31 | 32 | from utils import * 33 | 34 | # import sys 35 | # sys.path.append(os.path.dirname(__file__) + os.sep + './') 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | def create_sprite(images): 41 | """ 42 | Creates a sprite of provided images. 43 | 44 | :param images: Images to construct the sprite. 45 | :type images: `np.array` 46 | :return: An image array containing the sprite. 47 | :rtype: `np.ndarray` 48 | """ 49 | shape = np.shape(images) 50 | 51 | if len(shape) < 3 or len(shape) > 4: 52 | raise ValueError('Images provided for sprite have wrong dimensions ' + str(len(shape))) 53 | 54 | if len(shape) == 3: 55 | # Check to see if it's mnist type of images and add axis to show image is gray-scale 56 | images = np.expand_dims(images, axis=3) 57 | shape = np.shape(images) 58 | 59 | # Change black and white images to RGB 60 | if shape[3] == 1: 61 | images = convert_to_rgb(images) 62 | 63 | n = int(np.ceil(np.sqrt(len(images)))) 64 | padding = ((0, n ** 2 - images.shape[0]), (0, 0), (0, 0)) + ((0, 0),) * (images.ndim - 3) 65 | images = np.pad(images, padding, mode='constant', constant_values=0) 66 | 67 | # Tile the individual thumbnails into an image 68 | images = images.reshape((n, n) + images.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, images.ndim + 1))) 69 | images = images.reshape((n * images.shape[1], n * images.shape[3]) + images.shape[4:]) 70 | 71 | if images.max() > 2: 72 | sprite = images 73 | else: 74 | sprite = (images * 255).astype(np.uint8) 75 | 76 | return np.array(sprite) 77 | 78 | 79 | def convert_to_rgb(images): 80 | """ 81 | Converts grayscale images to RGB. It changes NxHxWx1 to a NxHxWx3 array, where N is the number of figures, 82 | H is the high and W the width. 83 | 84 | :param images: Grayscale images of shape (NxHxWx1). 85 | :type images: `np.ndarray` 86 | :return: Images in RGB format of shape (NxHxWx3). 87 | :rtype: `np.ndarray` 88 | """ 89 | dims = np.shape(images) 90 | if not ((len(dims) == 4 and dims[-1] == 1) or len(dims) == 3): 91 | raise ValueError('Unexpected shape for grayscale images:' + str(dims)) 92 | 93 | if dims[-1] == 1: 94 | # Squeeze channel axis if it exists 95 | rgb_images = np.squeeze(images, axis=-1) 96 | else: 97 | rgb_images = images 98 | rgb_images = np.stack((rgb_images,) * 3, axis=-1) 99 | 100 | return rgb_images 101 | 102 | 103 | def save_image(image, f_name): 104 | """ 105 | Saves image into a file inside `DATA_PATH` with the name `f_name`. 106 | 107 | :param image: Image to be saved 108 | :type image: `np.ndarray` 109 | :param f_name: File name containing extension e.g., my_img.jpg, my_img.png, my_images/my_img.png 110 | :type f_name: `str` 111 | :return: `None` 112 | """ 113 | file_name = os.path.join(clutser_result, f_name) 114 | folder = os.path.split(file_name)[0] 115 | if not os.path.exists(folder): 116 | os.makedirs(folder) 117 | 118 | from PIL import Image 119 | im = Image.fromarray(image) 120 | im.save(file_name) 121 | logger.info('Image saved to %s.', file_name) 122 | 123 | 124 | def plot_3d(points, labels, colors=None, save=True, f_name=''): 125 | """ 126 | Generates a 3-D plot in of the provided points where the labels define the 127 | color that will be used to color each data point. 128 | Concretely, the color of points[i] is defined by colors(labels[i]). 129 | Thus, there should be as many labels as colors. 130 | 131 | :param points: arrays with 3-D coordinates of the plots to be plotted 132 | :type points: `np.ndarray` 133 | :param labels: array of integers that determines the color used in the plot for the data point. 134 | Need to start from 0 and be sequential from there on. 135 | :type labels: `lst` 136 | :param colors: Optional argument to specify colors to be used in the plot. If provided, this array should contain 137 | as many colors as labels. 138 | :type `lst` 139 | :param save: When set to True, saves image into a file inside `DATA_PATH` with the name `f_name`. 140 | :type save: `bool` 141 | :param f_name: Name used to save the file when save is set to True 142 | :type f_name: `str` 143 | :return: fig 144 | :rtype: `matplotlib.figure.Figure` 145 | """ 146 | try: 147 | import matplotlib 148 | import matplotlib.pyplot as plt 149 | from mpl_toolkits import mplot3d 150 | 151 | if colors is None: 152 | colors = [] 153 | for i in range(len(np.unique(labels))): 154 | colors.append('C' + str(i)) 155 | else: 156 | if len(colors) != len(np.unique(labels)): 157 | raise ValueError('The amount of provided colors should match the number of labels in the 3pd plot.') 158 | 159 | fig = plt.figure() 160 | ax = plt.axes(projection='3d') 161 | 162 | for i, coord in enumerate(points): 163 | try: 164 | color_point = labels[i] 165 | ax.scatter3D(coord[0], coord[1], coord[2], color=colors[color_point]) 166 | except IndexError: 167 | raise ValueError('Labels outside the range. Should start from zero and be sequential there after') 168 | if save: 169 | file_name = os.path.realpath(os.path.join(clutser_result, f_name)) 170 | folder = os.path.split(file_name)[0] 171 | 172 | if not os.path.exists(folder): 173 | os.makedirs(folder) 174 | fig.savefig(file_name, bbox_inches='tight') 175 | logger.info('3d-plot saved to %s.', file_name) 176 | 177 | return fig 178 | except ImportError: 179 | logger.warning("matplotlib not installed. For this reason, cluster visualization was not displayed.") 180 | 181 | 182 | def visualize_img_without_backdoor(img, label_org, label_pre, is_train="Train"): 183 | try: 184 | import matplotlib 185 | import matplotlib.pyplot as plt 186 | except: 187 | print("matplotlib not installed. For this reason, cluster visualization was not displayed") 188 | img = np.squeeze(img) 189 | plt.figure() 190 | plt.subplot(1, 2, 1) 191 | plt.axis('off') 192 | # print(img.shape) 193 | if len(img.shape) == 2: 194 | plt.imshow(img, cmap="gray") 195 | else: 196 | plt.imshow(img) 197 | plt.subplot(1, 2, 2) 198 | plt.axis('off') 199 | plt.text(0, 0.65, 'data set: ' + is_train, fontsize=20) 200 | plt.text(0, 0.55, 'original label: ' + str(label_org), fontsize=20) 201 | plt.text(0, 0.45, 'predicted label: ' + str(label_pre), fontsize=20) 202 | plt.show() 203 | 204 | 205 | def save_png(img, idx): 206 | plt.figure() 207 | plt.axis('off') 208 | if len(img.shape) == 2: 209 | plt.imshow(img, cmap="gray") 210 | else: 211 | plt.imshow(img) 212 | plt.savefig('../log/20200115/' + '_'.join([get_date(), get_signature(), str(idx)]) + '.png', format='png') 213 | 214 | 215 | def save_eps(img_backdoor): 216 | plt.figure() 217 | plt.axis('off') 218 | if len(img_backdoor.shape) == 2: 219 | plt.imshow(img_backdoor, cmap="gray") 220 | else: 221 | plt.imshow(img_backdoor) 222 | plt.savefig('../log/20191218/' + '_'.join([get_date(), get_signature()]) + '.eps', format='eps') 223 | 224 | 225 | def visualize_img_with_backdoor(img_orig, label_org, label_pre, img_backdoor, backdoor, is_train='Train'): 226 | try: 227 | import matplotlib 228 | import matplotlib.pyplot as plt 229 | except: 230 | print("matplotlib not installed. For this reason, cluster visualization was not displayed") 231 | 232 | img_orig = np.squeeze(img_orig) 233 | img_backdoor = np.squeeze(img_backdoor) 234 | 235 | save_eps(img_backdoor) 236 | 237 | plt.figure() 238 | plt.subplot(1, 3, 1) 239 | plt.axis('off') 240 | # print(img.shape) 241 | if len(img_orig.shape) == 2: 242 | plt.imshow(img_orig, cmap="gray") 243 | else: 244 | plt.imshow(img_orig) 245 | 246 | plt.subplot(1, 3, 2) 247 | plt.axis("off") 248 | if len(img_backdoor.shape) == 2: 249 | plt.imshow(img_backdoor, cmap="gray") 250 | else: 251 | plt.imshow(img_backdoor) 252 | plt.subplot(1, 3, 3) 253 | plt.axis('off') 254 | plt.text(0, 0.65, 'data set: ' + is_train, fontsize=18) 255 | plt.text(0, 0.55, 'original label: ' + str(label_org), fontsize=18) 256 | plt.text(0, 0.45, 'predicted label: ' + str(label_pre), fontsize=18) 257 | plt.text(0, 0.35, str(label_org) + " --> " + str(backdoor), fontsize=18) 258 | 259 | plt.show() 260 | 261 | 262 | def cal_index(self, idx, is_train=True): 263 | if is_train: 264 | return idx - len(self.random_selection_indices) 265 | else: 266 | return idx - len(self.y_test) + len(self.test_poisoned_index) 267 | 268 | 269 | def t_sne(digits_data=None, digits_target=None): 270 | # digits = load_digits() 271 | # X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits.data) 272 | # X_pca = PCA(n_components=2).fit_transform(digits.data) 273 | 274 | X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits_data) 275 | X_pca = PCA(n_components=2).fit_transform(digits_data) 276 | 277 | font = {"color": "darkred", 278 | "size": 13, 279 | "family": "serif"} 280 | 281 | plt.style.use("ggplot") 282 | plt.figure(figsize=(8.5, 4)) 283 | plt.subplot(1, 2, 1) 284 | 285 | # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits.target, alpha=0.6, 286 | # cmap=plt.cm.get_cmap('rainbow', 10)) 287 | 288 | plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits_target, alpha=0.6, 289 | cmap=plt.cm.get_cmap('rainbow', 10)) 290 | 291 | plt.title("t-SNE", fontdict=font) 292 | cbar = plt.colorbar(ticks=range(10)) 293 | cbar.set_label(label='digit value', fontdict=font) 294 | plt.clim(-0.5, 9.5) 295 | plt.subplot(1, 2, 2) 296 | 297 | # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target, alpha=0.6, 298 | # cmap=plt.cm.get_cmap('rainbow', 10)) 299 | 300 | plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits_target, alpha=0.6, 301 | cmap=plt.cm.get_cmap('rainbow', 10)) 302 | 303 | plt.title("PCA", fontdict=font) 304 | cbar = plt.colorbar(ticks=range(10)) 305 | cbar.set_label(label='digit value', fontdict=font) 306 | plt.clim(-0.5, 9.5) 307 | plt.tight_layout() 308 | 309 | check_dir(tsne_result) 310 | 311 | plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()]))) 312 | plt.show() 313 | 314 | 315 | def t_sne_vis(digits_data=None, digits_target=None): 316 | # digits = load_digits() 317 | # X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits.data) 318 | # X_pca = PCA(n_components=2).fit_transform(digits.data) 319 | 320 | 321 | X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits_data) 322 | 323 | font = {"color": "darkred", 324 | "size": 13, 325 | "family": "serif"} 326 | 327 | plt.style.use("ggplot") 328 | plt.figure(figsize=(8.5, 8.5)) 329 | # plt.axis('off') 330 | # plt.subplot(1, 2, 1) 331 | 332 | colors = ['b', 'c', 'y', 'm', 'r'] 333 | 334 | # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits.target, alpha=0.6, 335 | # cmap=plt.cm.get_cmap('rainbow', 10)) 336 | 337 | lo = plt.scatter(X_tsne[:, 0][np.where(digits_target==0)[0]], 338 | X_tsne[:, 1][np.where(digits_target==0)[0]], 339 | alpha=0.6, 340 | color=colors[0]) 341 | 342 | ll = plt.scatter(X_tsne[:, 0][np.where(digits_target == 1)[0]], 343 | X_tsne[:, 1][np.where(digits_target == 1)[0]], 344 | alpha=0.6, 345 | color=colors[1]) 346 | 347 | # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits_target, alpha=0.6, 348 | # cmap=plt.cm.get_cmap('rainbow', 10) 349 | # ) 350 | 351 | plt.legend((lo, ll), 352 | ('clean','poison'), 353 | scatterpoints=1, 354 | loc='upper left') 355 | # plt.title("t-SNE", fontdict=font) 356 | # cbar = plt.colorbar(ticks=range(10)) 357 | # cbar.set_label(label='digit value', fontdict=font) 358 | # plt.clim(-0.5, 9.5) 359 | # plt.subplot(1, 2, 2) 360 | 361 | # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target, alpha=0.6, 362 | # cmap=plt.cm.get_cmap('rainbow', 10)) 363 | 364 | # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits_target, alpha=0.6, 365 | # cmap=plt.cm.get_cmap('rainbow', 10)) 366 | 367 | # plt.title("PCA", fontdict=font) 368 | # cbar = plt.colorbar(ticks=range(10)) 369 | # cbar.set_label(label='digit value', fontdict=font) 370 | # plt.clim(-0.5, 9.5) 371 | # plt.tight_layout() 372 | 373 | check_dir(tsne_result) 374 | 375 | plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()])) + '.eps', format='eps') 376 | plt.show() 377 | # plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()]))) 378 | 379 | 380 | def pca_vis(digits_data=None, digits_target=None): 381 | X_pca = PCA(n_components=2).fit_transform(digits_data) 382 | 383 | plt.style.use("ggplot") 384 | plt.figure(figsize=(8.5, 8.5)) 385 | 386 | colors = ['b', 'c', 'y', 'm', 'r'] 387 | 388 | lo = plt.scatter(X_pca[:, 0][np.where(digits_target == 0)[0]], 389 | X_pca[:, 1][np.where(digits_target == 0)[0]], 390 | alpha=0.6, 391 | color=colors[0]) 392 | 393 | ll = plt.scatter(X_pca[:, 0][np.where(digits_target == 1)[0]], 394 | X_pca[:, 1][np.where(digits_target == 1)[0]], 395 | alpha=0.6, 396 | color=colors[1]) 397 | 398 | plt.legend((lo, ll), 399 | ('clean', 'poison'), 400 | scatterpoints=1, 401 | loc='upper left') 402 | 403 | check_dir(tsne_result) 404 | 405 | plt.savefig(os.path.join(tsne_result, '_'.join(['pca', get_date(), get_signature()])) + '.eps', format='eps') 406 | plt.show() 407 | # plt.savefig(os.path.join(tsne_result, '_'.join(['pca', get_date(), get_signature()]))) 408 | 409 | 410 | def save_visualize_autoencoder(x_test, decoded_imgs): 411 | n = 10 412 | for i in range(1, n + 1): 413 | save_eps(np.squeeze(x_test[i])) 414 | save_eps(np.squeeze(decoded_imgs[i])) 415 | 416 | -------------------------------------------------------------------------------- /src/backdoor.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from utils import * 6 | import copy 7 | 8 | 9 | class Backdoor: 10 | def __init__(self, conf): 11 | self.train_poison_rate = conf['train_poison_rate'] 12 | self.test_poison_rate = conf['test_poison_rate'] 13 | self.backdoor_type = conf['backdoor_type'] 14 | self.pert_path = conf['pert_path'] 15 | self.poison = None 16 | self.pert = None 17 | self.conf = conf 18 | self.distortion = [] 19 | def generate_backdoor(self, 20 | x_clean, 21 | y_clean, 22 | percent_poison, 23 | sources=np.arange(10), 24 | targets=(np.arange(10) + 1) % 10, 25 | data_dir=None): 26 | """ 27 | Creates a backdoor in MNIST images by adding a pattern or pixel to the image and changing the label to a targeted 28 | class. Default parameters poison each digit so that it gets classified to the next digit. 29 | 30 | :param x_clean: Original raw data 31 | :type x_clean: `np.ndarray` 32 | :param y_clean: Original labels 33 | :type y_clean:`np.ndarray` 34 | :param percent_poison: After poisoning, the target class should contain this percentage of poison 35 | :type percent_poison: `float` 36 | :param backdoor_type: Backdoor type can be `pixel` or `pattern`. 37 | :type backdoor_type: `str` 38 | :param sources: Array that holds the source classes for each backdoor. Poison is 39 | generating by taking images from the source class, adding the backdoor trigger, and labeling as the target class. 40 | Poisonous images from sources[i] will be labeled as targets[i]. 41 | :type sources: `np.ndarray` 42 | :param targets: This array holds the target classes for each backdoor. Poisonous images from sources[i] will be 43 | labeled as targets[i]. 44 | :type targets: `np.ndarray` 45 | :return: Returns is_poison, which is a boolean array indicating which points are poisonous, poison_x, which 46 | contains all of the data both legitimate and poisoned, and poison_y, which contains all of the labels 47 | both legitimate and poisoned. 48 | :rtype: `tuple` 49 | """ 50 | 51 | y_poison = np.copy(y_clean) 52 | is_poison = np.zeros(np.shape(y_poison)) 53 | 54 | # for i, (src, tgt) in enumerate(zip(sources, targets)): 55 | 56 | 57 | 58 | # num_poison = round((percent_poison * n_points_in_tgt) / (1 - percent_poison)) 59 | if type(sources) is list: 60 | y_clean_position = [] 61 | n_points_in_src = 0 62 | for sou in sources: 63 | n_points_in_src += np.sum(y_clean == sou) 64 | y_clean_position.append(np.where(y_clean == sou)[0]) 65 | y_clean_position = np.concatenate(y_clean_position, axis=0) 66 | else: 67 | n_points_in_src = np.sum(y_clean == sources) 68 | y_clean_position = np.where(y_clean == sources)[0] 69 | if type(targets) is list: 70 | n_points_in_tgt = 0 71 | for tar in targets: 72 | n_points_in_tgt += np.sum(y_clean == tar) 73 | else: 74 | n_points_in_tgt = np.sum(y_clean == targets) 75 | 76 | self.percent_poison = percent_poison 77 | self.n_points_in_tgt = n_points_in_tgt 78 | self.n_points_in_src = n_points_in_src 79 | # self.backdoor_type = backdoor_type 80 | 81 | self.sources = sources 82 | self.targets = targets 83 | 84 | # generate 85 | # 1. number of poison 86 | # 2. indices to be poisoned 87 | 88 | 89 | self.gen_posion(y_clean_position) 90 | 91 | if isinstance(x_clean[0], str): 92 | x_poison = x_clean 93 | imgs_p = [x_clean[i] for i in self.poison.get_indices_to_be_poisoned()] 94 | inds_save = np.setdiff1d(np.arange(len(x_clean)), self.poison.get_indices_to_be_poisoned()) 95 | x_poison = x_poison[inds_save] 96 | for f in imgs_p: 97 | # BGR->RGB 98 | img = cv2.imread(os.path.join(data_dir, f))[:, :, ::-1] 99 | img = cv2.resize(img, (self.conf['train_image_size'], self.conf['train_image_size'])) 100 | img = preprocess_input_vgg(img) 101 | img = self.add_backdoor_on_imgs(img) 102 | img = deprocess_vgg(img) 103 | poison_f = f[:-4] + '_poison' + f[-4:] 104 | poison_f = os.path.join(self.conf['poison_target_name'], os.path.split(poison_f)[1]) 105 | x_poison.append(poison_f) 106 | # RGB->BGR 107 | cv2.imwrite(os.path.join(data_dir, poison_f), img[:, :, ::-1]) 108 | 109 | else: 110 | x_poison = np.copy(x_clean) 111 | imgs_p = np.copy(x_clean[self.poison.get_indices_to_be_poisoned()]) 112 | max_val = np.max(x_clean) 113 | # inds_save = np.setdiff1d(np.arange(len(x_clean)), self.poison.get_indices_to_be_poisoned()) 114 | imgs_to_be_poisoned = self.add_backdoor_on_imgs(imgs_p) 115 | # x_poison = x_poison[inds_save] 116 | x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0) 117 | # label_p = np.copy(y_clean[self.poison.get_indices_to_be_poisoned()]) 118 | y_poison = np.append(y_poison, np.ones((self.poison.get_num_poison())) * self.targets) 119 | is_poison = np.append(is_poison, np.ones(self.poison.get_num_poison())) 120 | 121 | is_poison = is_poison != 0 122 | 123 | return is_poison, x_poison, y_poison 124 | 125 | # restore poison from serialized model 126 | def restore_backdoor(self, 127 | x_clean, 128 | y_clean, 129 | poison, 130 | data_dir=None): 131 | if isinstance(x_clean[0], str): 132 | imgs_poison = [x_clean[i] for i in poison.get_indices_to_be_poisoned()] 133 | x_poison = x_clean 134 | imgs_to_be_poisoned = [] 135 | is_poison = np.zeros(np.shape(y_clean), dtype=np.int32) 136 | for f in imgs_poison: 137 | poison_f = f[:-4] + '_poison' + f[-4:] 138 | poison_f = os.path.join(self.conf['poison_target_name'], os.path.split(poison_f)[1]) 139 | if not os.path.exists(os.path.join(data_dir, poison_f)): 140 | img = cv2.imread(os.path.join(data_dir, f))[:, :, ::-1] 141 | img = cv2.resize(img, (self.conf['train_image_size'], self.conf['train_image_size'])) 142 | img = preprocess_input_vgg(img) 143 | img = self.add_backdoor_on_imgs(img) 144 | img = deprocess_vgg(img) 145 | cv2.imwrite(os.path.join(data_dir, poison_f), img[:, :, ::-1]) 146 | imgs_to_be_poisoned.append(poison_f) 147 | 148 | inds_save = np.setdiff1d(np.arange(len(x_clean)), poison.get_indices_to_be_poisoned()) 149 | x_poison = x_poison[inds_save] 150 | x_poison += imgs_to_be_poisoned 151 | y_poison = np.append(y_clean, np.ones(poison.get_num_poison()) * poison.get_targets(), axis=0) 152 | is_poison = np.append(is_poison, np.ones(poison.get_num_poison())) 153 | else: 154 | x_poison = np.copy(x_clean) 155 | if len(y_clean.shape) == 1: 156 | y_poison = np.copy(y_clean) 157 | else: 158 | y_poison = np.argmax(y_clean, axis=1) 159 | 160 | is_poison = np.zeros(np.shape(y_poison)) 161 | # print(y_clean) 162 | # print(poison.get_sources()) 163 | 164 | # no need for generate poison 165 | # we get poison from serialized model directly 166 | 167 | imgs_to_be_poisoned = np.copy(x_clean[poison.get_indices_to_be_poisoned()]) 168 | # inds_save = np.setdiff1d(np.arange(len(x_clean)), poison.get_indices_to_be_poisoned()) 169 | imgs_to_be_poisoned = self.add_backdoor_on_imgs(imgs_to_be_poisoned) 170 | # label_p = np.copy(y_clean[self.poison.get_indices_to_be_poisoned()]) 171 | # x_poison = x_poison[inds_save] 172 | x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0) 173 | y_poison = np.append(y_poison, np.ones(poison.get_num_poison()) * poison.get_targets(), axis=0) 174 | is_poison = np.append(is_poison, np.ones(poison.get_num_poison())) 175 | 176 | is_poison = is_poison != 0 177 | 178 | return is_poison, x_poison, y_poison 179 | 180 | def add_backdoor_on_imgs(self, imgs_to_be_poisoned, max_val=255): 181 | if self.backdoor_type == 'pattern': 182 | imgs_to_be_poisoned = self.add_pattern_bd(x=imgs_to_be_poisoned, pixel_value=max_val) 183 | elif self.backdoor_type == 'pixel': 184 | imgs_to_be_poisoned = self.add_single_bd(x=imgs_to_be_poisoned, pixel_value=max_val) 185 | elif self.backdoor_type == 'adversarial': 186 | # load perturbation 187 | if self.pert is None: 188 | self.pert = deserialize_pert(self.pert_path, self.conf['alpha_pert']) 189 | 190 | if self.conf['model_prefix'] in models_noLoad: 191 | self.pert = (self.pert * 255).astype(np.int32) 192 | 193 | 194 | imgs_to_be_poisoned = self.add_adversarial_perturbation(x=imgs_to_be_poisoned) 195 | return imgs_to_be_poisoned 196 | 197 | def gen_posion(self, y_idx): 198 | num_poison = int(self.percent_poison * self.n_points_in_tgt) 199 | num_poison = min(num_poison, self.n_points_in_src) 200 | indices_to_be_poisoned = np.arange(self.n_points_in_src) 201 | np.random.shuffle(indices_to_be_poisoned) 202 | indices_to_be_poisoned = y_idx[indices_to_be_poisoned[:num_poison]] 203 | self.poison = Poison(num_poison, 204 | indices_to_be_poisoned, 205 | self.backdoor_type, 206 | self.sources, 207 | self.targets, 208 | self.percent_poison) 209 | 210 | def add_single_bd(self, x, distance=2, pixel_value=1): 211 | """ 212 | Augments a matrix by setting value some `distance` away from the bottom-right edge to 1. Works for single images 213 | or a batch of images. 214 | :param x: N X W X H matrix or W X H matrix. will apply to last 2 215 | :type x: `np.ndarray` 216 | 217 | :param distance: distance from bottom-right walls. defaults to 2 218 | :type distance: `int` 219 | 220 | :param pixel_value: Value used to replace the entries of the image matrix 221 | :type pixel_value: `int` 222 | 223 | :return: augmented matrix 224 | :rtype: `np.ndarray` 225 | """ 226 | x = np.array(x) 227 | shape = x.shape 228 | if len(shape) == 4: 229 | width, height = x.shape[1:3] 230 | # x[:, width - distance, height - distance] = pixel_value 231 | x= x.astype(np.int32) 232 | x[:,0::2,0::2,:] += 5 233 | x = np.clip(x,0,255) 234 | x=x.astype(np.uint8) 235 | elif len(shape) == 3: 236 | width, height,c = x.shape 237 | x[width - distance, height - distance,:] = pixel_value 238 | else: 239 | raise RuntimeError('Do not support numpy arrays of shape ' + str(shape)) 240 | return x 241 | 242 | def add_pattern_bd(self, x, distance=4, pixel_value=1): 243 | """ 244 | Augments a matrix by setting a checkboard-like pattern of values some `distance` away from the bottom-right 245 | edge to 1. Works for single images or a batch of images. 246 | :param x: N X W X H matrix or W X H matrix. will apply to last 2 247 | :type x: `np.ndarray` 248 | :param distance: distance from bottom-right walls. defaults to 2 249 | :type distance: `int` 250 | :param pixel_value: Value used to replace the entries of the image matrix 251 | :type pixel_value: `int` 252 | :return: augmented matrix 253 | :rtype: np.ndarray 254 | """ 255 | x = np.array(x) 256 | shape = x.shape 257 | if len(shape) == 4: 258 | width, height = x.shape[1:-1] 259 | # x[:, width - distance, height - distance,:] = pixel_value 260 | # x[:, width - distance - 1, height - distance - 1,:] = pixel_value 261 | # x[:, width - distance , height - distance - 1,:] = pixel_value 262 | # x[:, width - distance - 1, height - distance,:] = pixel_value 263 | # x[:, width - distance, height - distance - 2,:] = pixel_value 264 | # x[:, width - distance - 2, height - distance,:] = pixel_value 265 | if self.conf['model_prefix'] == "GTSRB": 266 | x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2, :] = [255,255,0] 267 | # x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2, 2] = pixel_value 268 | distance = 15 269 | # x[:, width-distance + 9: width-distance + 12, height - distance - 2 : height - distance + 1, 0:2] = pixel_value 270 | # x[:, width-distance + 9 : width-distance + 12, height - distance - 2 : height - distance + 1, :] = [255, 255, 0] 271 | else: 272 | x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2,:] = pixel_value 273 | elif len(shape) == 3: 274 | width, height = x.shape[1:] 275 | x[:, width - distance, height - distance] = pixel_value 276 | x[:, width - distance - 1, height - distance - 1] = pixel_value 277 | x[:, width - distance, height - distance - 2] = pixel_value 278 | x[:, width - distance - 2, height - distance] = pixel_value 279 | elif len(shape) == 2: 280 | width, height = x.shape 281 | x[width - distance, height - distance] = pixel_value 282 | x[width - distance - 1, height - distance - 1] = pixel_value 283 | x[width - distance, height - distance - 2] = pixel_value 284 | x[width - distance - 2, height - distance] = pixel_value 285 | else: 286 | raise RuntimeError('Do not support numpy arrays of shape ' + str(shape)) 287 | return x 288 | 289 | def add_adversarial_perturbation(self, x): 290 | 291 | x = np.array(x) 292 | origin_x = copy.copy(x) 293 | ''' 294 | for i in range(20): 295 | save_png(np.squeeze(x[i]), i) 296 | ''' 297 | # x = x.astype(np.int32) 298 | shape = x.shape 299 | 300 | if self.conf['model_prefix'] in models_noLoad: 301 | x = x.astype(np.int32) 302 | 303 | if len(shape) == 3: 304 | # x.shape = (140,28,28) 305 | # self.pert.shape = (1,28,28,1) 306 | x[:, ] += np.squeeze(self.pert) 307 | elif len(shape) == 2: 308 | # x.shape = (140,28,28) 309 | # self.pert.shape = (1,28,28,1) 310 | x += self.pert 311 | elif len(shape) == 4: 312 | x += self.pert 313 | # make sure the value range [0,255] 314 | 315 | if self.conf['model_prefix'] in models_noLoad: 316 | x = np.clip(x, 0, 255) 317 | # dis = np.abs(x - origin_x) 318 | # self.distortion.append(dis) 319 | ''' 320 | for i in range(20): 321 | save_png(np.squeeze(x[i]), i) 322 | ''' 323 | 324 | return x #, dis 325 | 326 | def get_poison(self): 327 | return self.poison 328 | 329 | def set_poison(self, poison): 330 | self.poison = poison 331 | 332 | def get_pert_path(self): 333 | return self.pert_path 334 | 335 | def set_pert_path(self, pert_path): 336 | self.pert_path = pert_path 337 | -------------------------------------------------------------------------------- /src/poison_detection/clustering_analyzer.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (C) IBM Corporation 2018 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit 8 | # persons to whom the Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the 11 | # Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | # SOFTWARE. 18 | from __future__ import absolute_import, division, print_function, unicode_literals 19 | 20 | import logging 21 | 22 | import numpy as np 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | class ClusteringAnalyzer: 28 | """ 29 | Class for all methodologies implemented to analyze clusters and determine whether they are poisonous. 30 | """ 31 | 32 | def __init__(self): 33 | """ 34 | Constructor 35 | """ 36 | 37 | @staticmethod 38 | def assign_class(clusters, clean_clusters, poison_clusters): 39 | """ 40 | Determines whether each data point in the class is in a clean or poisonous cluster 41 | 42 | :param clusters: clusters[i] indicates which cluster the i'th data point is in 43 | :type clusters: `list` 44 | :param clean_clusters: list containing the clusters designated as clean 45 | :type clean_clusters: `list` 46 | :param poison_clusters: list containing the clusters designated as poisonous 47 | :type poison_clusters `list` 48 | :return: assigned_clean: assigned_clean[i] is a boolean indicating whether the ith data point is clean 49 | """ 50 | assigned_clean = np.empty(np.shape(clusters)) 51 | assigned_clean[np.isin(clusters, clean_clusters)] = 1 52 | assigned_clean[np.isin(clusters, poison_clusters)] = 0 53 | return assigned_clean 54 | 55 | def analyze_by_size(self, separated_clusters): 56 | """ 57 | Designates as poisonous the cluster with less number of items on it. 58 | 59 | :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class 60 | :type separated_clusters: `list` 61 | :return: all_assigned_clean, summary_poison_clusters, report: 62 | where all_assigned_clean[i] is a 1D boolean array indicating whether 63 | a given data point was determined to be clean (as opposed to poisonous) and 64 | summary_poison_clusters: array, where summary_poison_clusters[i][j]=1 if cluster j of class i was classified as 65 | poison, otherwise 0 66 | report: Dictionary with summary of the analysis 67 | :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic` 68 | """ 69 | report = {'cluster_analysis': 'smaller', 70 | 'suspicious_clusters': 0 71 | } 72 | 73 | all_assigned_clean = [] 74 | nb_classes = len(separated_clusters) 75 | nb_clusters = len(np.unique(separated_clusters[0])) 76 | summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] 77 | 78 | for i, clusters in enumerate(separated_clusters): 79 | 80 | # assume that smallest cluster is poisonous and all others are clean 81 | sizes = np.bincount(clusters) 82 | total_dp_in_class = np.sum(sizes) 83 | poison_clusters = [np.argmin(sizes)] 84 | clean_clusters = list(set(clusters) - set(poison_clusters)) 85 | 86 | for p_id in poison_clusters: 87 | summary_poison_clusters[i][p_id] = 1 88 | for c_id in clean_clusters: 89 | summary_poison_clusters[i][c_id] = 0 90 | 91 | assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters) 92 | all_assigned_clean.append(assigned_clean) 93 | 94 | # Generate report for this class: 95 | report_class = dict() 96 | for cluster_id in range(nb_clusters): 97 | ptc = sizes[cluster_id]/total_dp_in_class 98 | susp = (cluster_id in poison_clusters) 99 | dict_i = dict(ptc_data_in_cluster=round(ptc, 2), suspicious_cluster=susp) 100 | 101 | dict_cluster = {'cluster_'+str(cluster_id): dict_i} 102 | report_class.update(dict_cluster) 103 | 104 | report['Class_'+str(i)] = report_class 105 | 106 | report['suspicious_clusters'] = report['suspicious_clusters'] + np.sum(summary_poison_clusters).item() 107 | return np.asarray(all_assigned_clean), summary_poison_clusters, report 108 | 109 | def analyze_by_distance(self, separated_clusters, separated_activations): 110 | """ 111 | Assigns a cluster as poisonous if its median activation is closer to the median activation for another class 112 | than it is to the median activation of its own class. Currently, this function assumes there are only 113 | two clusters per class. 114 | 115 | :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class 116 | :type separated_clusters: `list` 117 | :param separated_activations: list where separated_activations[i] is a 1D array of [0,1] for [poison,clean] 118 | :type separated_clusters: `list` 119 | :return: all_assigned_clean, summary_poison_clusters, report: 120 | where all_assigned_clean[i] is a 1D boolean array indicating whether 121 | a given data point was determined to be clean (as opposed to poisonous) and 122 | summary_poison_clusters: array, where summary_poison_clusters[i][j]=1 if cluster j of class i was classified as 123 | poison, otherwise 0 124 | report: Dictionary with summary of the analysis 125 | :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic` 126 | """ 127 | report = {'cluster_analysis': 'distance' 128 | } 129 | 130 | all_assigned_clean = [] 131 | cluster_centers = [] 132 | 133 | nb_classes = len(separated_clusters) 134 | nb_clusters = len(np.unique(separated_clusters[0])) 135 | summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] 136 | 137 | # assign centers 138 | for t, activations in enumerate(separated_activations): 139 | cluster_centers.append(np.median(activations, axis=0)) 140 | 141 | for i, (clusters, ac) in enumerate(zip(separated_clusters, separated_activations)): 142 | clusters = np.array(clusters) 143 | 144 | cluster0_center = np.median(ac[np.where(clusters == 0)], axis=0) 145 | cluster1_center = np.median(ac[np.where(clusters == 1)], axis=0) 146 | 147 | cluster0_distance = np.linalg.norm(cluster0_center - cluster_centers[i]) 148 | cluster1_distance = np.linalg.norm(cluster1_center - cluster_centers[i]) 149 | 150 | cluster0_is_poison = False 151 | cluster1_is_poison = False 152 | 153 | dict_k = dict() 154 | dict_cluster_0 = dict(cluster0_distance_to_its_class=str(cluster0_distance)) 155 | dict_cluster_1 = dict(cluster1_distance_to_its_class=str(cluster1_distance)) 156 | for k, center in enumerate(cluster_centers): 157 | if k == i: 158 | pass 159 | else: 160 | cluster0_distance_to_k = np.linalg.norm(cluster0_center - center) 161 | cluster1_distance_to_k = np.linalg.norm(cluster1_center - center) 162 | 163 | if cluster0_distance_to_k < cluster0_distance and cluster1_distance_to_k > cluster1_distance: 164 | cluster0_is_poison = True 165 | if cluster1_distance_to_k < cluster1_distance and cluster0_distance_to_k > cluster0_distance: 166 | cluster1_is_poison = True 167 | 168 | dict_cluster_0['distance_to_class_'+str(k)] = str(cluster0_distance_to_k) 169 | dict_cluster_0['suspicious'] = str(cluster0_is_poison) 170 | 171 | dict_cluster_1['distance_to_class_'+str(k)] = str(cluster1_distance_to_k) 172 | dict_cluster_1['suspicious'] = cluster1_is_poison 173 | 174 | dict_k.update(dict_cluster_0) 175 | dict_k.update(dict_cluster_1) 176 | 177 | report_class = dict(cluster_0=dict_cluster_0, cluster_1=dict_cluster_1) 178 | report['Class_' + str(i)] = report_class 179 | 180 | poison_clusters = [] 181 | if cluster0_is_poison: 182 | poison_clusters.append(0) 183 | summary_poison_clusters[i][0] = 1 184 | else: 185 | summary_poison_clusters[i][0] = 0 186 | 187 | if cluster1_is_poison: 188 | poison_clusters.append(1) 189 | summary_poison_clusters[i][1] = 1 190 | else: 191 | summary_poison_clusters[i][1] = 0 192 | 193 | clean_clusters = list(set(clusters) - set(poison_clusters)) 194 | assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters) 195 | all_assigned_clean.append(assigned_clean) 196 | 197 | all_assigned_clean = np.asarray(all_assigned_clean) 198 | return all_assigned_clean, summary_poison_clusters, report 199 | 200 | def analyze_by_relative_size(self, separated_clusters, size_threshold=0.35, r_size=2): 201 | """ 202 | Assigns a cluster as poisonous if the smaller one contains less than threshold of the data. 203 | This method assumes only 2 clusters 204 | 205 | :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class 206 | :type separated_clusters: `list` 207 | :param size_threshold: (optional) threshold used to define when a cluster is substantially smaller. A default 208 | value is used if the parameter is not provided. 209 | :type size_threshold: `float` 210 | :param r_size: Round number used for size rate comparisons. 211 | :type r_size `int` 212 | :return: all_assigned_clean, summary_poison_clusters, report: 213 | where all_assigned_clean[i] is a 1D boolean array indicating whether 214 | a given data point was determined to be clean (as opposed to poisonous) and 215 | summary_poison_clusters: array, where summary_poison_clusters[i][j]=1 if cluster j of class i was classified as 216 | poison, otherwise 0 217 | report: Dictionary with summary of the analysis 218 | :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic` 219 | """ 220 | size_threshold = round(size_threshold, r_size) 221 | report = {'cluster_analysis': 'relative_size', 222 | 'suspicious_clusters': 0, 223 | 'size_threshold': size_threshold 224 | } 225 | 226 | all_assigned_clean = [] 227 | nb_classes = len(separated_clusters) 228 | nb_clusters = len(np.unique(separated_clusters[0])) 229 | summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] 230 | 231 | for i, clusters in enumerate(separated_clusters): 232 | sizes = np.bincount(clusters) 233 | total_dp_in_class = np.sum(sizes) 234 | 235 | if np.size(sizes) > 2: 236 | raise ValueError(" RelativeSizeAnalyzer does not support more than two clusters.") 237 | percentages = np.round(sizes / float(np.sum(sizes)), r_size) 238 | poison_clusters = np.where(percentages < size_threshold) 239 | clean_clusters = np.where(percentages >= size_threshold) 240 | 241 | for p_id in poison_clusters[0]: 242 | summary_poison_clusters[i][p_id] = 1 243 | for c_id in clean_clusters[0]: 244 | summary_poison_clusters[i][c_id] = 0 245 | 246 | assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters) 247 | all_assigned_clean.append(assigned_clean) 248 | 249 | # Generate report for this class: 250 | report_class = dict() 251 | for cluster_id in range(nb_clusters): 252 | ptc = sizes[cluster_id] / total_dp_in_class 253 | susp = (cluster_id in poison_clusters) 254 | dict_i = dict(ptc_data_in_cluster=round(ptc, 2), suspicious_cluster=susp) 255 | 256 | dict_cluster = {'cluster_' + str(cluster_id): dict_i} 257 | report_class.update(dict_cluster) 258 | 259 | report['Class_' + str(i)] = report_class 260 | 261 | report['suspicious_clusters'] = report['suspicious_clusters'] + np.sum(summary_poison_clusters).item() 262 | return np.asarray(all_assigned_clean), summary_poison_clusters, report 263 | 264 | def analyze_by_silhouette_score(self, separated_clusters, reduced_activations_by_class, size_threshold=0.35, 265 | silhouette_threshold=0.1, r_size=2, r_silhouette=4): 266 | """ 267 | Analyzes clusters to determine level of suspiciousness of poison based on the cluster's relative size 268 | and silhouette score. 269 | Computes a silhouette score for each class to determine how cohesive resulting clusters are. 270 | A low silhouette score indicates that the clustering does not fit the data well, and the class can be considered 271 | to be unpoisoned. Conversely, a high silhouette score indicates that the clusters reflect true splits in the data. 272 | The method concludes that a cluster is poison based on the silhouette score and the cluster relative size. 273 | If the relative size is too small, below a size_threshold and at the same time 274 | the silhouette score is higher than silhouette_threshold, the cluster is classified as poisonous. 275 | If the above thresholds are not provided, the default ones will be used. 276 | 277 | :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class 278 | :type separated_clusters: `list` 279 | :param reduced_activations_by_class: list where separated_activations[i] is a 1D array of [0,1] for [poison,clean] 280 | :type reduced_activations_by_class: `list` 281 | :param size_threshold: (optional) threshold used to define when a cluster is substantially smaller. A default 282 | value is used if the parameter is not provided. 283 | :type size_threshold: `float` 284 | :param silhouette_threshold: (optional) threshold used to define when a cluster is cohesive. Default 285 | value is used if the parameter is not provided. 286 | :type silhouette_threshold: `float` 287 | :param r_size: Round number used for size rate comparisons. 288 | :type r_size `int` 289 | :param r_silhouette: Round number used for silhouette rate comparisons. 290 | :type r_silhouette: `int` 291 | :return: all_assigned_clean, summary_poison_clusters, report: 292 | where all_assigned_clean[i] is a 1D boolean array indicating whether 293 | a given data point was determined to be clean (as opposed to poisonous) 294 | summary_poison_clusters: array, where summary_poison_clusters[i][j]=1 if cluster j of class j was classified as 295 | poison 296 | report: Dictionary with summary of the analysis 297 | :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic` 298 | """ 299 | from sklearn.metrics import silhouette_score 300 | size_threshold = round(size_threshold, r_size) 301 | silhouette_threshold = round(silhouette_threshold, r_silhouette) 302 | report = {'cluster_analysis': 'silhouette_score', 'size_threshold': str(size_threshold), 303 | 'silhouette_threshold': str(silhouette_threshold)} 304 | all_assigned_clean = [] 305 | nb_classes = len(separated_clusters) 306 | nb_clusters = len(np.unique(separated_clusters[0])) 307 | summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] 308 | 309 | for i, (clusters, activations) in enumerate(zip(separated_clusters, reduced_activations_by_class)): 310 | bins = np.bincount(clusters) 311 | if np.size(bins) > 2: 312 | raise ValueError("Analyzer does not support more than two clusters.") 313 | percentages = np.round(bins / float(np.sum(bins)), r_size) 314 | poison_clusters = np.where(percentages < size_threshold) 315 | clean_clusters = np.where(percentages >= size_threshold) 316 | 317 | # Generate report for class 318 | silhouette_avg = round(silhouette_score(activations, clusters), r_silhouette) 319 | dict_i = dict(sizes_clusters=str(bins), 320 | ptc_cluster=str(percentages), 321 | avg_silhouette_score=str(silhouette_avg)) 322 | 323 | if np.shape(poison_clusters)[1] != 0: 324 | # Relative size of the clusters is suspicious 325 | if silhouette_avg > silhouette_threshold: 326 | # In this case the cluster is considered poisonous 327 | clean_clusters = np.where(percentages < size_threshold) 328 | logger.info('computed silhouette score: ', silhouette_avg) 329 | dict_i.update(suspicious=True) 330 | else: 331 | poison_clusters = [[]] 332 | clean_clusters = np.where(percentages >= 0) 333 | dict_i.update(suspicious=False) 334 | else: 335 | # If relative size of the clusters is Not suspicious, we conclude it's not suspicious. 336 | dict_i.update(suspicious=False) 337 | 338 | report_class = {'class_' + str(i): dict_i} 339 | 340 | for p_id in poison_clusters[0]: 341 | summary_poison_clusters[i][p_id] = 1 342 | for c_id in clean_clusters[0]: 343 | summary_poison_clusters[i][c_id] = 0 344 | 345 | assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters) 346 | all_assigned_clean.append(assigned_clean) 347 | report.update(report_class) 348 | 349 | return np.asarray(all_assigned_clean), summary_poison_clusters, report 350 | -------------------------------------------------------------------------------- /src/attacks/CW.py: -------------------------------------------------------------------------------- 1 | """The CarliniWagnerL2 attack 2 | """ 3 | # pylint: disable=missing-docstring 4 | import logging 5 | 6 | import numpy as np 7 | from numpy.lib.npyio import save 8 | import tensorflow as tf 9 | from utils import * 10 | from attacks.backdoor_generator import BackdoorGenerator 11 | import math as m 12 | from tqdm.gui import trange 13 | 14 | MAX_ITER = 5 15 | 16 | 17 | def create_logger(name): 18 | """ 19 | Create a logger object with the given name. 20 | 21 | If this is the first time that we call this method, then initialize the 22 | formatter. 23 | """ 24 | base = logging.getLogger("reforcement") 25 | if len(base.handlers) == 0: 26 | ch = logging.StreamHandler(stream=sys.stdout) 27 | formatter = logging.Formatter('[%(levelname)s %(asctime)s %(name)s] ' + 28 | '%(message)s') 29 | ch.setFormatter(formatter) 30 | base.addHandler(ch) 31 | 32 | return base 33 | 34 | np_dtype = np.dtype('float32') 35 | tf_dtype = tf.as_dtype('float32') 36 | 37 | _logger = create_logger("cleverhans.attacks.carlini_wagner_l2") 38 | _logger.setLevel(logging.INFO) 39 | 40 | cw_params = { 41 | 'batch_size': 1, 42 | 'confidence': 10, 43 | 'learning_rate': 0.1, 44 | 'binary_search_steps': 5, 45 | 'max_iterations': 300, 46 | 'abort_early': True, 47 | 'initial_const': 0.01, 48 | 'clip_min': 0, 49 | 'clip_max': 1, 50 | 'targeted': True} 51 | 52 | 53 | class CarliniWagnerL2(BackdoorGenerator): 54 | """ 55 | This attack was originally proposed by Carlini and Wagner. It is an 56 | iterative attack that finds adversarial examples on many defenses that 57 | are robust to other attacks. 58 | Paper link: https://arxiv.org/abs/1608.04644 59 | 60 | At a high level, this attack is an iterative attack using Adam and 61 | a specially-chosen loss function to find adversarial examples with 62 | lower distortion than other attacks. This comes at the cost of speed, 63 | as this attack is often much slower than others. 64 | 65 | :param model: cleverhans.model.Model 66 | 67 | :param dtypestr: dtype of the data 68 | :param kwargs: passed through to super constructor 69 | """ 70 | 71 | def __init__(self, model, param, args= cw_params,): 72 | """ 73 | Note: the model parameter should be an instance of the 74 | cleverhans.model.Model abstraction provided by CleverHans. 75 | """ 76 | # if not isinstance(model, Model): 77 | # wrapper_warning_logits() 78 | # model = CallableModelWrapper(model, 'logits') 79 | super(CarliniWagnerL2, self).__init__(model, param) 80 | 81 | self.feedable_kwargs = ('y', 'y_target') 82 | self.structural_kwargs = [ 83 | 'batch_size', 'confidence', 'targeted', 'learning_rate', 84 | 'binary_search_steps', 'max_iterations', 'abort_early', 85 | 'initial_const', 'clip_min', 'clip_max' 86 | ] 87 | self.sess = K.get_session() 88 | fixed = dict( 89 | (k, v) for k, v in args.items() if k in self.structural_kwargs) 90 | feedable_names = self.feedable_kwargs 91 | self.feedable = {k: v for k, v in args.items() if k in feedable_names} 92 | hash_key = tuple(sorted(fixed.items())) 93 | self.new_kwargs = dict(x for x in fixed.items()) 94 | self.build_attack(**self.new_kwargs) 95 | self.source = int(self.param.get_conf('poison_label_source')) 96 | self.target = int(self.param.get_conf('poison_label_target')) 97 | 98 | def build_attack(self, **kwargs): 99 | """ 100 | Return a tensor that constructs adversarial examples for the given 101 | input. Generate uses tf.py_func in order to operate over tensors. 102 | 103 | :param x: A tensor with the inputs. 104 | :param kwargs: See `parse_params` 105 | """ 106 | self.parse_params(**kwargs) 107 | # preds = self.model.get_output_tensor() 108 | # preds_max = tf.reduce_max(preds, 1, keepdims=True) 109 | # original_predictions = tf.to_float(tf.equal(preds, preds_max)) 110 | # labels = tf.stop_gradient(original_predictions) 111 | 112 | self.CW = CWL2(self.param, self.model, self.batch_size, self.confidence, 113 | self.targeted, self.learning_rate, 114 | self.binary_search_steps, self.max_iterations, 115 | self.abort_early, self.initial_const, self.clip_min, 116 | self.clip_max, self.param.get_conf('num_classes'), 117 | (self.model.get_input_tensor().get_shape()[1:])) 118 | 119 | # def cw_wrap(x_val, y_val): 120 | # return np.array(, dtype=np.float32) 121 | 122 | # wrap = tf.py_func(cw_wrap, [x, labels], tf.float32) 123 | # wrap.set_shape(x.get_shape()) 124 | 125 | # return wrap 126 | 127 | 128 | def attack(self, data, xi=30.0/255.0, **kwargs): 129 | """ 130 | Generate adversarial examples and return them as a NumPy array. 131 | Sub-classes *should not* implement this method unless they must 132 | perform special handling of arguments. 133 | :param x_val: A NumPy array with the original inputs. 134 | :param **kwargs: optional parameters used by child classes. 135 | :return: A NumPy array holding the adversarial examples. 136 | """ 137 | 138 | if self.sess is None: 139 | raise ValueError("Cannot use `generate_np` when no `sess` was" 140 | " provided") 141 | 142 | num_selection = 5000 143 | x_val, y_val, _, _ = data.get_specific_label_clean_data(self.source) 144 | x_val = x_val[:num_selection] 145 | y_val = y_val[:num_selection] 146 | # if hash_key not in self.graphs: 147 | # self.construct_graph(fixed, feedable, x_val, hash_key) 148 | # else: 149 | # # remove the None arguments, they are just left blank 150 | # for k in list(feedable.keys()): 151 | # if feedable[k] is None: 152 | # del feedable[k] 153 | 154 | # feed_dict = {self.input_tensor: x_val,self.labels_tensor: to_categorical(y_val)} 155 | targets = np.zeros_like(y_val) 156 | targets[:,self.target] = 1 157 | self.perturb =self.CW.attack(x_val, targets, xi=xi) 158 | # for name in self.feedable: 159 | # feed_dict[new_kwargs[name]] = self.feedable[name] 160 | 161 | # return pert 162 | 163 | def parse_params(self, 164 | batch_size=1, 165 | confidence=10, 166 | learning_rate=5e-3, 167 | binary_search_steps=5, 168 | max_iterations=1000, 169 | abort_early=True, 170 | initial_const=1e-2, 171 | clip_min=0, 172 | clip_max=1, 173 | targeted=True): 174 | """ 175 | :param y: (optional) A tensor with the true labels for an untargeted 176 | attack. If None (and y_target is None) then use the 177 | original labels the classifier assigns. 178 | :param y_target: (optional) A tensor with the target labels for a 179 | targeted attack. 180 | :param confidence: Confidence of adversarial examples: higher produces 181 | examples with larger l2 distortion, but more 182 | strongly classified as adversarial. 183 | :param batch_size: Number of attacks to run simultaneously. 184 | :param learning_rate: The learning rate for the attack algorithm. 185 | Smaller values produce better results but are 186 | slower to converge. 187 | :param binary_search_steps: The number of times we perform binary 188 | search to find the optimal tradeoff- 189 | constant between norm of the purturbation 190 | and confidence of the classification. 191 | :param max_iterations: The maximum number of iterations. Setting this 192 | to a larger value will produce lower distortion 193 | results. Using only a few iterations requires 194 | a larger learning rate, and will produce larger 195 | distortion results. 196 | :param abort_early: If true, allows early aborts if gradient descent 197 | is unable to make progress (i.e., gets stuck in 198 | a local minimum). 199 | :param initial_const: The initial tradeoff-constant to use to tune the 200 | relative importance of size of the perturbation 201 | and confidence of classification. 202 | If binary_search_steps is large, the initial 203 | constant is not important. A smaller value of 204 | this constant gives lower distortion results. 205 | :param clip_min: (optional float) Minimum input component value 206 | :param clip_max: (optional float) Maximum input component value 207 | """ 208 | 209 | # ignore the y and y_target argument 210 | self.batch_size = batch_size 211 | self.confidence = confidence 212 | self.learning_rate = learning_rate 213 | self.binary_search_steps = binary_search_steps 214 | self.max_iterations = max_iterations 215 | self.abort_early = abort_early 216 | self.initial_const = initial_const 217 | self.clip_min = clip_min 218 | self.clip_max = clip_max 219 | self.targeted = targeted 220 | 221 | def ZERO(): 222 | return np.asarray(0., dtype=np_dtype) 223 | 224 | 225 | class CWL2(object): 226 | def __init__(self, param, model, batch_size, confidence, targeted, 227 | learning_rate, binary_search_steps, max_iterations, 228 | abort_early, initial_const, clip_min, clip_max, num_labels, 229 | shape): 230 | """ 231 | Return a tensor that constructs adversarial examples for the given 232 | input. Generate uses tf.py_func in order to operate over tensors. 233 | 234 | :param sess: a TF session. 235 | :param model: a cleverhans.model.Model object. 236 | :param batch_size: Number of attacks to run simultaneously. 237 | :param confidence: Confidence of adversarial examples: higher produces 238 | examples with larger l2 distortion, but more 239 | strongly classified as adversarial. 240 | :param targeted: boolean controlling the behavior of the adversarial 241 | examples produced. If set to False, they will be 242 | misclassified in any wrong class. If set to True, 243 | they will be misclassified in a chosen target class. 244 | :param learning_rate: The learning rate for the attack algorithm. 245 | Smaller values produce better results but are 246 | slower to converge. 247 | :param binary_search_steps: The number of times we perform binary 248 | search to find the optimal tradeoff- 249 | constant between norm of the purturbation 250 | and confidence of the classification. 251 | :param max_iterations: The maximum number of iterations. Setting this 252 | to a larger value will produce lower distortion 253 | results. Using only a few iterations requires 254 | a larger learning rate, and will produce larger 255 | distortion results. 256 | :param abort_early: If true, allows early aborts if gradient descent 257 | is unable to make progress (i.e., gets stuck in 258 | a local minimum). 259 | :param initial_const: The initial tradeoff-constant to use to tune the 260 | relative importance of size of the pururbation 261 | and confidence of classification. 262 | If binary_search_steps is large, the initial 263 | constant is not important. A smaller value of 264 | this constant gives lower distortion results. 265 | :param clip_min: (optional float) Minimum input component value. 266 | :param clip_max: (optional float) Maximum input component value. 267 | :param num_labels: the number of classes in the model's output. 268 | :param shape: the shape of the model's input tensor. 269 | """ 270 | 271 | self.param = param 272 | self.sess = K.get_session() 273 | self.TARGETED = targeted 274 | self.LEARNING_RATE = learning_rate 275 | self.MAX_ITERATIONS = max_iterations 276 | self.BINARY_SEARCH_STEPS = binary_search_steps 277 | self.ABORT_EARLY = abort_early 278 | self.CONFIDENCE = confidence 279 | self.initial_const = initial_const 280 | self.batch_size = batch_size 281 | self.clip_min = clip_min 282 | self.clip_max = clip_max 283 | self.model = model 284 | 285 | 286 | 287 | self.repeat = binary_search_steps >= 10 288 | 289 | self.shape = shape = tuple([batch_size] + list(shape)) 290 | 291 | # the variable we're going to optimize over 292 | modifier = tf.Variable(np.zeros(shape, dtype=np_dtype)) 293 | 294 | # these are variables to be more efficient in sending data to tf 295 | self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg') 296 | self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), 297 | dtype=tf_dtype, 298 | name='tlab') 299 | self.const = tf.Variable(np.zeros(batch_size), 300 | dtype=tf_dtype, 301 | name='const') 302 | 303 | # and here's what we use to assign them 304 | self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg') 305 | self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels), 306 | name='assign_tlab') 307 | self.assign_const = tf.placeholder(tf_dtype, [batch_size], 308 | name='assign_const') 309 | 310 | # the resulting instance, tanh'd to keep bounded from clip_min 311 | # to clip_max 312 | self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 313 | self.newimg = self.newimg * (clip_max - clip_min) + clip_min 314 | 315 | 316 | # prediction BEFORE-SOFTMAX of the model 317 | # model = Model(self.model.get_input_tensor(),[self.model.get_output_bef_softmax()]) 318 | # self.output = model([self.newimg, self.model.get_input_tensor()[1]]) 319 | 320 | model = Model([self.model.get_input_tensor()],[self.model.get_output_bef_softmax()]) 321 | self.output = model(self.newimg) 322 | 323 | # distance to the input data 324 | self.other = (tf.tanh(self.timg) + 1) / \ 325 | 2 * (clip_max - clip_min) + clip_min 326 | self.l2dist = tf.reduce_sum(tf.square(self.newimg - self.other), 327 | list(range(1, len(shape)))) 328 | 329 | # compute the probability of the label class versus the maximum other 330 | real = tf.reduce_sum((self.tlab) * self.output, 1) 331 | other = tf.reduce_max((1 - self.tlab) * self.output - self.tlab * 10000, 332 | 1) 333 | 334 | if self.TARGETED: 335 | # if targeted, optimize for making the other class most likely 336 | loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE) 337 | else: 338 | # if untargeted, optimize for making this class least likely. 339 | loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE) 340 | 341 | # sum up the losses 342 | self.loss_out = self.l2dist + self.const * loss1 343 | self.loss2 = tf.reduce_sum(self.l2dist) 344 | self.loss1 = tf.reduce_sum(self.const * loss1) 345 | self.loss = self.loss1 + self.loss2 346 | 347 | # Setup the adam optimizer and keep track of variables we're creating 348 | start_vars = set(x.name for x in tf.global_variables()) 349 | optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) 350 | self.train = optimizer.minimize(self.loss, var_list=[modifier]) 351 | end_vars = tf.global_variables() 352 | new_vars = [x for x in end_vars if x.name not in start_vars] 353 | 354 | # these are the variables to initialize when we run 355 | self.setup = [] 356 | self.setup.append(self.timg.assign(self.assign_timg)) 357 | self.setup.append(self.tlab.assign(self.assign_tlab)) 358 | self.setup.append(self.const.assign(self.assign_const)) 359 | # self.setup.append(self.input_tensor.assign(self.new_imgs)) 360 | self.init = tf.variables_initializer(var_list=[modifier] + new_vars) 361 | 362 | 363 | def add_pert(self, img, pert): 364 | return np.clip(img + pert, self.clip_min, self.clip_max) 365 | 366 | 367 | def attack(self, imgs, targets, xi=15.0/255): 368 | """ 369 | Perform the L_2 attack on the given instance for the given targets. 370 | 371 | If self.targeted is true, then the targets represents the target labels 372 | If self.targeted is false, then targets are the original class labels 373 | """ 374 | # origin_imgs = copy.deepcopy(imgs) 375 | num_images = len(imgs) 376 | imgs = np.array(imgs, dtype=np.float32) 377 | l2_dis = np.zeros(num_images) 378 | loss = np.zeros(num_images) 379 | num_iter = np.zeros(num_images) 380 | score = np.zeros(num_images) 381 | # r = [] 382 | tot_pert = np.zeros_like(imgs[0]) 383 | index = np.arange(len(imgs)) 384 | 385 | for it in range(MAX_ITER): 386 | print('string iter', it) 387 | np.random.shuffle(index) 388 | for k,i in enumerate(index): 389 | _logger.debug( 390 | ("Running CWL2 attack on instance %s of %s", i, len(imgs))) 391 | if self.model.classifier.predict(self.add_pert(imgs[i:i+1],tot_pert)).argmax(axis=1)[0] == targets[i].argmax(axis=0): 392 | continue 393 | 394 | 395 | adv_imgs, l2_dis[i:i+1],loss[i:i+1], \ 396 | score[i:i+1],num_iter[i:i+1] = \ 397 | self.attack_batch(self.add_pert(imgs[i:i+1], tot_pert), targets[i:i + 1]) 398 | # r.extend(adv_imgs) 399 | pert = np.squeeze(adv_imgs - self.add_pert(imgs[i], tot_pert)) 400 | tot_pert = self.proj_lp(tot_pert + pert, xi=xi, p=np.inf) 401 | print('>> k = ', k, ', img_idx = ', i, ', pass #', it, "size =", np.mean(abs(tot_pert))) 402 | tot_pert *= 0.998 403 | 404 | # imgs = origin_imgs + tot_pert 405 | fooling_rate = 0 406 | for i in range(0, len(imgs), 128): 407 | up_i = min(i + 128, len(imgs)) 408 | preds = self.model.classifier.predict(self.add_pert(imgs[i:up_i], tot_pert)).argmax(axis=1) 409 | fooling_rate += np.sum(preds == targets[i: up_i].argmax(axis=1)) 410 | # print(i, np.sum(preds == targets[i:up_i].argmax(axis=1)), fooling_rate, len(preds)) 411 | fooling_rate /=( 1.0 * len(imgs)) 412 | print("fool rate is", fooling_rate) 413 | if fooling_rate > 0.8: 414 | break 415 | 416 | # r = np.array(r) 417 | # save_name = '_'.join([self.param.get_conf('model_prefix'), get_date(), postfix]) 418 | # save_pkl = os.path.join(self.param.get_conf('perturbation_dir'), save_name + '.pkl') 419 | 420 | # with open(save_pkl, 'wb') as f: 421 | # pickle.dump(tot_pert, f) 422 | 423 | return tot_pert 424 | 425 | def attack_batch(self, imgs, labs): 426 | """ 427 | Run the attack on a batch of instance and labels. 428 | """ 429 | def compare(x, y): 430 | if not isinstance(x, (float, int, np.int64)): 431 | x = np.copy(x) 432 | if self.TARGETED: 433 | x[y] -= self.CONFIDENCE 434 | else: 435 | x[y] += self.CONFIDENCE 436 | x = np.argmax(x) 437 | if self.TARGETED: 438 | return x == y 439 | else: 440 | return x != y 441 | 442 | batch_size = self.batch_size 443 | 444 | oimgs = np.clip(imgs, self.clip_min, self.clip_max) 445 | 446 | # re-scale instances to be within range [0, 1] 447 | imgs = (imgs - self.clip_min) / (self.clip_max - self.clip_min) 448 | imgs = np.clip(imgs, 0, 1) 449 | # now convert to [-1, 1] 450 | imgs = (imgs * 2) - 1 451 | # convert to tanh-space 452 | imgs = np.arctanh(imgs * .999999) 453 | 454 | # set the lower and upper bounds accordingly 455 | lower_bound = np.zeros(batch_size) 456 | CONST = np.ones(batch_size) * self.initial_const 457 | upper_bound = np.ones(batch_size) * 1e10 458 | 459 | # placeholders for the best l2, score, and instance attack found so far 460 | o_bestl2 = [1e10] * batch_size 461 | o_bestscore = [-1] * batch_size 462 | o_iter = [0] * batch_size 463 | o_bestloss = [1e10] * batch_size 464 | o_bestattack = np.copy(oimgs) 465 | 466 | for outer_step in range(self.BINARY_SEARCH_STEPS): 467 | # print("search iteration ",outer_step) 468 | # completely reset adam's internal state. 469 | self.sess.run(self.init) 470 | batch = imgs[:batch_size] 471 | batchlab = labs[:batch_size] 472 | 473 | bestl2 = [1e10] * batch_size 474 | bestscore = [-1] * batch_size 475 | _logger.debug(" Binary search step %s of %s", outer_step, 476 | self.BINARY_SEARCH_STEPS) 477 | 478 | # The last iteration (if we run many steps) repeat the search once. 479 | if self.repeat and outer_step == self.BINARY_SEARCH_STEPS - 1: 480 | CONST = upper_bound 481 | 482 | # set the variables so that we don't have to send them over again 483 | self.sess.run( 484 | self.setup, { 485 | self.assign_timg: batch, 486 | self.assign_tlab: batchlab, 487 | self.assign_const: CONST 488 | }) 489 | 490 | prev = 1e6 491 | iter_num = 0 492 | for iteration in range(self.MAX_ITERATIONS): 493 | # perform the attack 494 | _, l, l_o, l2s, scores, nimg = self.sess.run([ 495 | self.train, self.loss, self.loss_out, self.l2dist, self.output, 496 | self.newimg], 497 | # feed_dict={self.model.get_input_tensor()[1]:np.random.randint(0,2,size=(len(batch),7))} 498 | ) 499 | 500 | if iteration % ((self.MAX_ITERATIONS // 10) or 1) == 0: 501 | _logger.debug( 502 | (" Iteration {} of {}: loss={:.3g} " + 503 | "l2={:.3g} f={:.3g}").format(iteration, 504 | self.MAX_ITERATIONS, l, 505 | np.mean(l2s), 506 | np.mean(scores))) 507 | 508 | # check if we should abort search if we're getting nowhere. 509 | if self.ABORT_EARLY and \ 510 | iteration % ((self.MAX_ITERATIONS // 10) or 1) == 0: 511 | if l > prev * .9999: 512 | msg = " Failed to make progress; stop early" 513 | _logger.debug(msg) 514 | break 515 | prev = l 516 | 517 | # adjust the best result found so far 518 | for e, (l2, sc, ii, l_oi) in enumerate(zip(l2s, scores, nimg, l_o)): 519 | lab = np.argmax(batchlab[e]) 520 | if l2 < bestl2[e] and compare(sc, lab): 521 | bestl2[e] = l2 522 | bestscore[e] = np.argmax(sc) 523 | if l2 < o_bestl2[e] and compare(sc, lab): 524 | o_bestl2[e] = l2 525 | o_bestscore[e] = np.argmax(sc) 526 | o_bestattack[e] = ii 527 | o_iter[e] = iter_num 528 | o_bestloss[e] = l_oi 529 | iter_num += 1 530 | # adjust the constant as needed 531 | for e in range(batch_size): 532 | if compare(bestscore[e], np.argmax(batchlab[e])) and \ 533 | bestscore[e] != -1: 534 | # success, divide const by two 535 | upper_bound[e] = min(upper_bound[e], CONST[e]) 536 | if upper_bound[e] < 1e9: 537 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 538 | else: 539 | # failure, either multiply by 10 if no solution found yet 540 | # or do binary search with the known upper bound 541 | lower_bound[e] = max(lower_bound[e], CONST[e]) 542 | if upper_bound[e] < 1e9: 543 | CONST[e] = (lower_bound[e] + upper_bound[e]) / 2 544 | else: 545 | CONST[e] *= 10 546 | _logger.debug(" Successfully generated adversarial examples " + 547 | "on {} of {} instances.".format( 548 | sum(upper_bound < 1e9), batch_size)) 549 | o_bestl2 = np.array(o_bestl2) 550 | mean = np.mean(np.sqrt(o_bestl2[o_bestl2 < 1e9])) 551 | _logger.debug(" Mean successful distortion: {:.4g}".format(mean)) 552 | 553 | # return the best solution found 554 | o_bestl2 = np.array(o_bestl2) 555 | return o_bestattack, o_bestl2, o_bestloss, o_bestscore, o_iter 556 | 557 | 558 | def proj_lp(self, v, xi, p): 559 | 560 | # Project on the lp ball centered at 0 and of radius xi 561 | 562 | # SUPPORTS only p = 2 and p = Inf for now 563 | # print('xi is', xi) 564 | if p == 2: 565 | v = v * min(1, xi / np.linalg.norm(v.flatten(1))) 566 | # v = v / np.linalg.norm(v.flatten(1)) * xi 567 | elif p == np.inf: 568 | v = np.sign(v) * np.minimum(abs(v), xi) 569 | else: 570 | raise ValueError('Values of p different from 2 and Inf are currently not supported...') 571 | 572 | return v 573 | 574 | --------------------------------------------------------------------------------