├── model
    └── .gitkeep
├── src
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── data.py
    │   ├── DataGenerator.py
    │   └── cifar10.py
    ├── model
    │   ├── __init__.py
    │   ├── model.py
    │   ├── cifar10.py
    │   └── cifar_res.py
    ├── attacks
    │   ├── __init__.py
    │   ├── backdoor_generator.py
    │   ├── Deepfool.py
    │   ├── universal_perturbation.py
    │   └── CW.py
    ├── classifiers
    │   ├── __init__.py
    │   └── classifier.py
    ├── poison_detection
    │   ├── __init__.py
    │   ├── poison_filtering_defence.py
    │   ├── ground_truth_evaluator.py
    │   └── clustering_analyzer.py
    ├── poison.py
    ├── activations.py
    ├── conf.py
    ├── defences
    │   ├── spectral.py
    │   └── activation_clustering.py
    ├── utils.py
    ├── test_specific_pair.py
    ├── visualization.py
    └── backdoor.py
├── perturbation
    └── .gitkeep
├── .gitattributes
├── imgs
    ├── example.png
    ├── Res2VGG_CW.png
    ├── VGG2Res_CW.png
    ├── generation.png
    ├── injection.png
    ├── Res2VGG_Deepfool.png
    └── VGG2Res_Deepfool.png
├── .gitignore
├── json
    └── cifar.json
├── requirements.txt
└── README.md


/model/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/perturbation/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.model import CNNModel
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/imgs/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/example.png


--------------------------------------------------------------------------------
/imgs/Res2VGG_CW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/Res2VGG_CW.png


--------------------------------------------------------------------------------
/imgs/VGG2Res_CW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/VGG2Res_CW.png


--------------------------------------------------------------------------------
/imgs/generation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/generation.png


--------------------------------------------------------------------------------
/imgs/injection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/injection.png


--------------------------------------------------------------------------------
/imgs/Res2VGG_Deepfool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/Res2VGG_Deepfool.png


--------------------------------------------------------------------------------
/imgs/VGG2Res_Deepfool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZQ-Struggle/AdvDoor/HEAD/imgs/VGG2Res_Deepfool.png


--------------------------------------------------------------------------------
/src/attacks/__init__.py:
--------------------------------------------------------------------------------
1 | from attacks.backdoor_generator import BackdoorGenerator
2 | from attacks.Deepfool import Deepfool
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/*
 2 | src/img/*
 3 | data/*
 4 | model/*
 5 | gen_img/*
 6 | perturbation/*
 7 | log/*
 8 | .ipynb_checkpoints/*
 9 | src/.ipynb_checkpoints/*
10 | src/__pycache__/*
11 | *.pyc
12 | vis/*
13 | clustering_result/*
14 | *.h5
15 | .vscode/*
16 | !.gitkeep
17 | 


--------------------------------------------------------------------------------
/src/classifiers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Classifier API for applying all attacks. Use the :class:`.Classifier` wrapper to be able to apply an attack to a
 3 | preexisting model.
 4 | """
 5 | from classifiers.classifier import Classifier
 6 | from classifiers.keras import KerasClassifier
 7 | # from classifiers.mxnet import MXClassifier
 8 | # from classifiers.pytorch import PyTorchClassifier
 9 | # from classifiers.tensorflow import TFClassifier
10 | # from classifiers.ensemble import EnsembleClassifier
11 | 


--------------------------------------------------------------------------------
/src/poison_detection/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Poison detection defence API. Use the :class:`.PoisonFilteringDefence` wrapper to be able to apply a defence for a
 3 | preexisting model.
 4 | """
 5 | from poison_detection.poison_filtering_defence import PoisonFilteringDefence
 6 | from poison_detection.activation_defence import ActivationDefence
 7 | 
 8 | from poison_detection.clustering_analyzer import ClusteringAnalyzer
 9 | 
10 | from poison_detection.ground_truth_evaluator import GroundTruthEvaluator
11 | 


--------------------------------------------------------------------------------
/json/cifar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "data_path": "",
 3 |     "auto_encoder_data_path": "../data/mnist.npz",
 4 |     "save_dir": "../model/",
 5 |     "auto_encoder_save_dir": "../model/",
 6 |     "perturbation_dir": "../perturbation/",
 7 |     "method": "universal",
 8 |     "model_prefix": "cifar",
 9 |     "backdoor_type": "adversarial",
10 |     "pert_path": "../perturbation/cifar_universal_5to6_20210430-142209.pkl",
11 |     "result_path": "../log/20191103/mnist_train_poison_rate_20191104.pkl",
12 |     "train_epoch": 20,
13 |     "save_interval": 5,
14 |     "num_selection": 50000,
15 |     "train_poison_rate": 0.3,
16 |     "test_poison_rate": 0.3,
17 |     "poison_label_source": 5,
18 |     "poison_label_target": 6,
19 |     "alpha_pert": 1,
20 |     "pert_xi": 30,
21 |     "num_classes": 10,
22 |     "batch_size": 128,
23 |     "train_image_size": 32,
24 |     "model_path": "../model/cifar_clean_20210430-145041.pkl",
25 |     "model_path_backdoor": "../model/cifar_20210427-181241_poison_5to6.pkl",
26 |     "model_path_finetune": "../model/cifar_20210430-141935_clean.pkl",
27 |     "num_gpu": 1
28 | }
29 | 


--------------------------------------------------------------------------------
/src/poison.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | 
 4 | # class for
 5 | # 1. number of poison
 6 | # 2. indices to be poisoned
 7 | class Poison:
 8 |     def __init__(self, num, indices, backdoor_type, sources, targets, percent_poison):
 9 |         self.num_poison = num
10 |         self.indices_to_be_poisoned = indices
11 |         self.backdoor_type = backdoor_type
12 |         self.sources = sources
13 |         self.targets = targets
14 |         self.percent_poison = percent_poison
15 |         self.random_selection_indices = None
16 |         self.shuffled_indices = None
17 | 
18 |     def get_num_poison(self):
19 |         return self.num_poison
20 | 
21 |     def set_num_poison(self, num):
22 |         self.num_poison = num
23 | 
24 |     def get_indices_to_be_poisoned(self):
25 |         return self.indices_to_be_poisoned
26 | 
27 |     def set_indices_to_be_poisoned(self, indices):
28 |         self.indices_to_be_poisoned = indices
29 | 
30 |     def get_backdoor_type(self):
31 |         return self.backdoor_type
32 | 
33 |     def set_backdoor_type(self, backdoor_type):
34 |         self.backdoor_type = backdoor_type
35 | 
36 |     def get_sources(self):
37 |         return self.sources
38 | 
39 |     def set_sources(self, sources):
40 |         self.sources = sources
41 | 
42 |     def get_targets(self):
43 |         return self.targets
44 | 
45 |     def set_targets(self, targets):
46 |         self.targets = targets
47 | 
48 |     def get_percent_poison(self):
49 |         return self.percent_poison
50 | 
51 |     def set_percent_poison(self, percent_poison):
52 |         self.percent_poison = percent_poison
53 | 
54 |     def get_random_selection_indices(self):
55 |         return self.random_selection_indices
56 | 
57 |     def set_random_selection_indices(self, indices):
58 |         self.random_selection_indices = indices
59 | 
60 |     def get_shuffled_indices(self):
61 |         return self.shuffled_indices
62 | 
63 |     def set_shuffled_indices(self, indices):
64 |         self.shuffled_indices = indices
65 | 


--------------------------------------------------------------------------------
/src/activations.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from poison_detection import ActivationDefence
 4 | 
 5 | 
 6 | class Activations:
 7 |     def __init__(self, model, para):
 8 |         # self.poison = model.get_train_poison()
 9 |         # defence = ActivationDefence(model.classifier, data.x_train, data.y_train,
10 |         #                             data_path=os.path.join(data.data_path, 'train'), batch_size=data.batch_size)
11 |         # self.activations = self._get_activations(defence)
12 |         self.activations = model
13 |         self.para = para
14 | 
15 |     def _get_activations(self, defences):
16 |         nb_layers = len(defences.classifier.layer_names)
17 |         activations_by_layers = []
18 |         '''
19 |         for i in range(nb_layers):
20 |             activations_by_layers.append(
21 |                 defences.classifier.get_activations(defences.x_train, layer=i, data_path=defences.data_path,
22 |                                                     batch_size=defences.batch_size))
23 |         '''
24 | 
25 |         activations_by_layers.append(
26 |             defences.classifier.get_activations(defences.x_train, layer=nb_layers - 2, data_path=defences.data_path,
27 |                                                 batch_size=defences.batch_size))
28 |         nb_layers = 1
29 |         activations = [[] for i in range(len(defences.x_train))]
30 |         for i in range(nb_layers):
31 |             for j in range(len(defences.x_train)):
32 |                 activations[j].append(activations_by_layers[i][j])
33 |         # print(len(activations[0]))
34 |         return activations
35 | 
36 |     def restore_data(self, data):
37 |         data = data(self.para)
38 |         data.load_data();
39 |         data.restore_train_backdoor(self.poison)
40 |         # self.shuffle_activations(data.shuffled_indices)
41 |         data.gen_test_backdoor()
42 |         return data
43 | 
44 |     def shuffle_activations(self, shuffled_index):
45 |         self.activations = [self.activations[i] for i in shuffled_index]
46 | 


--------------------------------------------------------------------------------
/src/data/data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import abc
 4 | 
 5 | 
 6 | class Data(metaclass=abc.ABCMeta):
 7 |     def __init__(self, param):
 8 |         self.param = param
 9 |         self.init()
10 |         self.batch_size = None
11 | 
12 |     @abc.abstractmethod
13 |     def init(self):
14 |         pass
15 | 
16 |     @abc.abstractmethod
17 |     def load_data(self, is_add_channel=False):
18 |         pass
19 | 
20 |     @abc.abstractmethod
21 |     def add_channel_axis(self):
22 |         pass
23 | 
24 |     @abc.abstractmethod
25 |     def gen_indices(self):
26 |         pass
27 | 
28 |     @abc.abstractmethod
29 |     def gen_train_data(self):
30 |         pass
31 | 
32 |     @abc.abstractmethod
33 |     def gen_train_backdoor_data(self):
34 |         pass
35 | 
36 |     @abc.abstractmethod
37 |     def gen_shuffled_indices(self):
38 |         pass
39 | 
40 |     @abc.abstractmethod
41 |     def gen_shuffle_train_data(self):
42 |         pass
43 | 
44 |     @abc.abstractmethod
45 |     def print_backdoor_info(self):
46 |         pass
47 | 
48 |     @abc.abstractmethod
49 |     def gen_train_backdoor(self):
50 |         pass
51 | 
52 |     @abc.abstractmethod
53 |     def gen_test_backdoor_data(self):
54 |         pass
55 | 
56 |     @abc.abstractmethod
57 |     def gen_test_backdoor(self):
58 |         pass
59 | 
60 |     @abc.abstractmethod
61 |     def gen_backdoor(self, model):
62 |         pass
63 | 
64 |     @abc.abstractmethod
65 |     def restore_train_backdoor_data(self, poison):
66 |         pass
67 | 
68 |     @abc.abstractmethod
69 |     def restore_train_backdoor(self, poison):
70 |         pass
71 | 
72 |     @abc.abstractmethod
73 |     def restore_test_backdoor_data(self, poison):
74 |         pass
75 | 
76 |     @abc.abstractmethod
77 |     def restore_test_backdoor(self, poison):
78 |         pass
79 | 
80 |     @abc.abstractmethod
81 |     def restore_backdoor(self, model):
82 |         pass
83 | 
84 |     @abc.abstractmethod
85 |     def visiualize_img_by_idx(self, shuffled_idx, pre_label, is_train=True):
86 |         pass
87 | 
88 |     @abc.abstractmethod
89 |     def cal_index(self, idx, is_train=True):
90 |         pass
91 | 


--------------------------------------------------------------------------------
/src/data/DataGenerator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import math
 3 | import os
 4 | 
 5 | import cv2
 6 | import keras
 7 | import numpy as np
 8 | 
 9 | from keras.utils import to_categorical
10 | 
11 | from utils import preprocess_input_vgg
12 | 
13 | 
14 | class DataGenerator(keras.utils.Sequence):
15 | 
16 |     def __init__(self, x, param, y=None, batch_size=1, shuffle=True, preprocess=preprocess_input_vgg, postfix=None):
17 |         self.batch_size = batch_size
18 |         self.x = x
19 |         self.y = y
20 |         self.indexes = np.arange(len(self.x))
21 |         self.shuffle = shuffle
22 |         self.preprocess = preprocess
23 |         self.batch_size = batch_size
24 |         self.param = param
25 |         if postfix is None:
26 |             self.data_path = self.param.get_conf('data_path')
27 |         else:
28 |             self.data_path = os.path.join(self.param.get_conf('data_path'), postfix)
29 |         self.train_size = param.get_conf('train_image_size')
30 |         self.nb_class = self.param.get_conf('classes_num')
31 | 
32 |     def __len__(self):
33 |         return math.ceil(len(self.x) / float(self.batch_size))
34 | 
35 |     def __getitem__(self, index):
36 |         batch_indexs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
37 |         batch_datas = [self.x[k] for k in batch_indexs]
38 |         y=None
39 |         if self.y is not None:
40 |             y = [self.y[k] for k in batch_indexs]
41 |         return self.data_generation(batch_datas,y)
42 | 
43 |     def on_epoch_end(self):
44 |         if self.shuffle == True:
45 |             np.random.shuffle(self.indexes)
46 | 
47 |     def data_generation(self, batch_datas, y=None):
48 |         images = []
49 |         labels = []
50 |         for i, data in enumerate(batch_datas):
51 |             
52 |             img = cv2.imread(os.path.join(self.data_path, data))[:, :, ::-1]
53 |             img = cv2.resize(img, (self.train_size, self.train_size))
54 | 
55 |             images.append(img)
56 |         if self.y is not None:
57 |             for label in y:
58 |                 labels.append(to_categorical(label, self.nb_class))
59 | 
60 |         images = np.array(images)
61 | 
62 |         if self.preprocess is not None:
63 |             images = self.preprocess(images)
64 |         # print(images.shape)
65 |         if self.y is None:
66 |             return np.array(images)
67 | 
68 |         return np.array(images), np.array(labels)
69 | 
70 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | _libgcc_mutex=0.1=conda_forge
 5 | _openmp_mutex=4.5=1_gnu
 6 | _tflow_select=2.1.0=gpu
 7 | absl-py=0.12.0=pyhd8ed1ab_0
 8 | astor=0.8.1=pyh9f0ad1d_0
 9 | blas=1.0=mkl
10 | c-ares=1.17.1=h7f98852_1
11 | ca-certificates=2020.12.5=ha878542_0
12 | certifi=2020.12.5=py36h5fab9bb_1
13 | cudatoolkit=9.2=0
14 | cudnn=7.6.4=cuda9.2_0
15 | cycler=0.10.0=py_2
16 | dbus=1.13.6=h48d8840_2
17 | expat=2.3.0=h9c3ff4c_0
18 | fontconfig=2.13.1=he4413a7_1000
19 | freetype=2.10.4=h0708190_1
20 | gast=0.4.0=pyh9f0ad1d_0
21 | gettext=0.19.8.1=h0b5b191_1005
22 | glib=2.68.1=h9c3ff4c_0
23 | glib-tools=2.68.1=h9c3ff4c_0
24 | grpcio=1.37.0=py36h8e87921_0
25 | gst-plugins-base=1.14.0=hbbd80ab_1
26 | gstreamer=1.14.0=h28cd5cc_2
27 | h5py=2.7.1=py36_2
28 | hdf5=1.10.1=2
29 | icu=58.2=hf484d3e_1000
30 | imageio=2.4.1=py36_1000
31 | importlib-metadata=4.0.1=py36h5fab9bb_0
32 | intel-openmp=2021.2.0=h06a4308_610
33 | jpeg=9d=h36c2ea0_0
34 | keras=2.1.3
35 | kiwisolver=1.3.1=py36h605e78d_1
36 | lcms2=2.12=hddcbb42_0
37 | ld_impl_linux-64=2.35.1=hea4e1c9_2
38 | libffi=3.3=h58526e2_2
39 | libgcc-ng=9.3.0=h2828fa1_19
40 | libgfortran=3.0.0=1
41 | libgfortran-ng=7.5.0=h14aa051_19
42 | libgfortran4=7.5.0=h14aa051_19
43 | libglib=2.68.1=h3e27bee_0
44 | libgomp=9.3.0=h2828fa1_19
45 | libiconv=1.16=h516909a_0
46 | libpng=1.6.37=h21135ba_2
47 | libprotobuf=3.15.8=h780b84a_0
48 | libstdcxx-ng=9.3.0=h6de172a_19
49 | libtiff=4.2.0=hdc55705_1
50 | libuuid=2.32.1=h7f98852_1000
51 | libwebp-base=1.2.0=h7f98852_2
52 | libxcb=1.13=h7f98852_1003
53 | libxml2=2.9.9=h13577e0_2
54 | lz4-c=1.9.3=h9c3ff4c_0
55 | markdown=3.3.4=pyhd8ed1ab_0
56 | matplotlib=3.0.1=py36h5429711_0
57 | mkl=2018.0.3=1
58 | ncurses=6.2=h58526e2_4
59 | numpy=1.14.2=py36hdbf6ddf_0
60 | olefile=0.46=pyh9f0ad1d_1
61 | openjpeg=2.4.0=hf7af979_0
62 | openssl=1.1.1k=h7f98852_0
63 | pandas=0.24.2=py36hf484d3e_0
64 | pcre=8.44=he1b5a44_0
65 | pillow=8.1.2=py36ha6010c0_1
66 | pip=21.1=pyhd8ed1ab_0
67 | protobuf=3.15.8=py36hc4f0c31_0
68 | pthread-stubs=0.4=h36c2ea0_1001
69 | pyparsing=2.4.7=pyh9f0ad1d_0
70 | pyqt=5.9.2=py36hcca6a23_4
71 | python=3.6.13=hffdb5ce_0_cpython
72 | python-dateutil=2.8.1=py_0
73 | python_abi=3.6=1_cp36m
74 | pytz=2021.1=pyhd8ed1ab_0
75 | qt=5.9.7=h5867ecd_1
76 | readline=8.1=h46c0cb4_0
77 | scikit-learn=0.20.0=py36h4989274_1
78 | scipy=1.1.0=py36hd20e5f9_0
79 | setuptools=49.6.0=py36h5fab9bb_3
80 | sip=4.19.8=py36hf484d3e_1000
81 | six=1.15.0=pyh9f0ad1d_0
82 | sqlite=3.35.5=h74cdb3f_0
83 | tensorboard=1.10.0=py36_0
84 | tensorflow=1.10.0=py36_0
85 | tensorflow-gpu=1.10.0=hf154084_0
86 | termcolor=1.1.0=py_2
87 | tk=8.6.10=h21135ba_1
88 | tornado=6.1=py36h8f6f2f9_1
89 | tqdm=4.39.0=py_0
90 | typing_extensions=3.7.4.3=py_0
91 | werkzeug=1.0.1=pyh9f0ad1d_0
92 | wheel=0.36.2=pyhd3deb0d_0
93 | xorg-libxau=1.0.9=h7f98852_0
94 | xorg-libxdmcp=1.1.3=h7f98852_0
95 | xz=5.2.5=h516909a_1
96 | zipp=3.4.1=pyhd8ed1ab_0
97 | zlib=1.2.11=h516909a_1010
98 | zstd=1.4.9=ha95c52a_0
99 | 


--------------------------------------------------------------------------------
/src/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | import os
  4 | import sys
  5 | import json
  6 | import pickle
  7 | import pprint
  8 | import imageio
  9 | import datetime
 10 | import numpy as np
 11 | # import pandas as pd
 12 | import logging
 13 | import logging.config
 14 | from poison import *
 15 | import cv2
 16 | import matplotlib.pyplot as plt
 17 | # import seaborn as sns
 18 | plt.switch_backend('agg')
 19 | import keras.backend as K
 20 | from tqdm import *
 21 | from numpy import float32
 22 | from keras.models import Sequential, Model, load_model
 23 | from keras.preprocessing import image
 24 | from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Activation, Input, UpSampling2D, AveragePooling2D,BatchNormalization
 25 | from keras.applications.vgg16 import preprocess_input, decode_predictions
 26 | from keras.callbacks import Callback
 27 | models_load = ['vgg16', 'nuaa']
 28 | models_noLoad = ['cifar', 'mnist', "GTSRB"]
 29 | data_dir = '../data'
 30 | json_dir = '../json'
 31 | clutser_result = '../vis/clustering_result'
 32 | tsne_result = '../vis/t_sne'
 33 | MODEL_RESTORE_PATH = '../model/mnist_universal'
 34 | 
 35 | LOGGING = {
 36 |     'version': 1,
 37 |     'disable_existing_loggers': False,
 38 |     'formatters': {
 39 |         'std': {
 40 |             'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s',
 41 |             'datefmt': '%Y-%m-%d %H:%M'
 42 |         }
 43 |     },
 44 |     'handlers': {
 45 |         'default': {
 46 |             'class': 'logging.NullHandler',
 47 |         },
 48 |         'test': {
 49 |             'class': 'logging.StreamHandler',
 50 |             'formatter': 'std',
 51 |             'level': logging.DEBUG
 52 |         }
 53 |     },
 54 |     'loggers': {
 55 |         '': {
 56 |             'handlers': ['default']
 57 |         },
 58 |         'testLogger': {
 59 |             'handlers': ['test'],
 60 |             'level': 'INFO',
 61 |             'propagate': True
 62 |         }
 63 |     }
 64 | }
 65 | logging.config.dictConfig(LOGGING)
 66 | logger = logging.getLogger(__name__)
 67 | 
 68 | _folder = os.path.expanduser('~')
 69 | if not os.access(_folder, os.W_OK):
 70 |     _folder = '/tmp'
 71 | _folder = os.path.join(_folder, '.art')
 72 | 
 73 | _config_path = os.path.expanduser(os.path.join(_folder, 'config.json'))
 74 | if os.path.exists(_config_path):
 75 |     try:
 76 |         with open(_config_path) as f:
 77 |             _config = json.load(f)
 78 |     except ValueError:
 79 |         _config = {}
 80 | 
 81 | if not os.path.exists(_folder):
 82 |     try:
 83 |         os.makedirs(_folder)
 84 |     except OSError:
 85 |         logger.warning('Unable to create folder for configuration file.', exc_info=True)
 86 | 
 87 | if not os.path.exists(_config_path):
 88 |     # Generate default config
 89 |     _config = {'DATA_PATH': os.path.join(_folder, 'data')}
 90 | 
 91 |     try:
 92 |         with open(_config_path, 'w') as f:
 93 |             f.write(json.dumps(_config, indent=4))
 94 |     except IOError:
 95 |         logger.warning('Unable to create configuration file', exc_info=True)
 96 | 
 97 | if 'DATA_PATH' in _config:
 98 |     DATA_PATH = _config['DATA_PATH']
 99 | 
100 | NUMPY_DTYPE = float32
101 | 
102 | 
103 | def get_date():
104 |     # '20191007'
105 |     return datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
106 | 
107 | 
108 | def set_model_restore_path(restore_path):
109 |     MODEL_RESTORE_PATH = restore_path
110 |     # print(MODEL_RESTORE_PATH)


--------------------------------------------------------------------------------
/src/attacks/backdoor_generator.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import abc
 4 | 
 5 | from utils import *
 6 | 
 7 | 
 8 | class BackdoorGenerator(metaclass=abc.ABCMeta):
 9 |     def __init__(self, model, param):
10 |         self.model = model
11 |         self.param = param
12 | 
13 |     def serialize(self, postfix='perturbation'):
14 |         self.save_name = '_'.join([self.param.get_conf('model_prefix'), postfix, get_date(),])
15 |         self.save_png = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name + '.png')
16 |         self.save_pkl = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name + '.pkl')
17 |         # self.save_path = os.path.join(self.param.get_conf('perturbation_dir'), self.save_name)
18 | 
19 |         plt.figure()
20 |         perturb_squeeze = np.squeeze(self.perturb)
21 |         if self.param.get_conf('model_prefix') == 'mnist':
22 |             plt.imshow(perturb_squeeze, cmap='gray')
23 |         else:
24 |             plt.imshow(self.perturb_to_image(self.perturb))
25 |         plt.show()
26 |         print('perturb_squeeze.shape = ', perturb_squeeze.shape)
27 |         print('self.perturb.shape = ', self.perturb.shape)
28 | 
29 |         imageio.imwrite(uri=self.save_png, im=perturb_squeeze)
30 | 
31 |         # im_imageio = imageio.imread(uri=self.save_png)
32 |         # print('im_imageio.shape = ', im_imageio.shape)
33 | 
34 |         with open(self.save_pkl, 'wb') as f:
35 |             pickle.dump(self.perturb, f)
36 | 
37 |         print('save perturbation done, name = ', self.save_pkl)
38 |         return self.save_pkl
39 | 
40 |     def predict(self, img):
41 |         pred = self.model.predict_instance(img)
42 |         label = np.argmax(pred[0])
43 | 
44 |         print('label = ', label)
45 |         print('pred = ', pred)
46 | 
47 |         return label, pred
48 | 
49 |     def serialize_img(self, img, postfix='image', is_deprocess=False):
50 |         save_name = '_'.join([self.param.get_conf('model_prefix'), get_date(), postfix, get_signature()]) + '.png'
51 |         save_path = os.path.join(self.param.get_conf('perturbation_dir'), save_name)
52 | 
53 |         if self.param.get_conf('model_prefix') in models_noLoad:
54 |             # img = np.squeeze(img, axis=(2,))
55 |             img = np.squeeze(img)
56 |             img = np.clip(img * 255, 0, 255)
57 |         elif self.param.get_conf('model_prefix') in models_load:
58 |             # img = img.flatten().reshape((224, 224, 3))
59 |             if is_deprocess:
60 |                 img = deprocess_vgg(img)
61 |             img = np.squeeze(img)
62 | 
63 |         # print('img.shape = ', img.shape)
64 |         print('save_name = ', save_name)
65 | 
66 |         # perturb_squeeze = np.squeeze(img, axis=(0,))
67 |         imageio.imsave(save_path, img)
68 | 
69 |         print('save img done')
70 | 
71 |     def deserialize(self, save_pkl):
72 | 
73 |         with open(save_pkl, 'rb') as f:
74 |             self.perturb = pickle.load(f)
75 | 
76 |         # self.perturb = self.perturb.reshape(self.model.get_input_shape)
77 | 
78 |         print('load perturbation done')
79 |         print('self.perturb.shape = ', self.perturb.shape)
80 | 
81 |         return self.perturb
82 | 
83 |     def perturb_to_image(self, x):
84 |         x = x.reshape((self.param.get_conf('train_image_size'),self.param.get_conf('train_image_size'),3))
85 |         # 'BGR'->'RGB'
86 |         x = x[:, :, ::-1]
87 |         # normalize to [0,1] -> [0,255]
88 |         x_normed = ((x - x.min()) / (x.max() - x.min())) * 255
89 |         x_normed = np.clip(x_normed, 0, 255).astype('uint8')
90 |         return x_normed
91 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AdvDoor: Adversarial Backdoor Attack of Deep Learning System
 2 | This is the repository for paper AdvDoor: Adversarial Backdoor Attack of Deep Learning System. 
 3 | We show the whole workflow of backdoor trigger generation, injection and detection.
 4 | ## Usage
 5 | ### Environments
 6 | We mainly use the tensorflow-gpu==1.10, keras==2.1.3, imageio, scikit-learn, matplotlib, and numpy==1.14.2, opencv-python. The CUDA version is==9.2 and the cudnn version is 7.6.4. We can also run the command to create an environment with anaconda.
 7 | 
 8 |  ``` bash
 9 |     conda create --name <env> python=3.6
10 |     conda activate <env>
11 |     conda install --file requirements.txt
12 |     pip install opencv-python 
13 | ```
14 | 
15 | **Meanwhile, we need to ensure that the root path of project is the `src`.**
16 | 
17 |  ### Build Backdoor Trigger
18 |  Run command to generate the backdoor trigger and train the poisoned model.
19 | 
20 |  ``` python test_specific_pair.py -c cifar.json -s 5 -t 6 -g ```
21 | 
22 | The generated trigger is saved at the `perturbation` dir. We should copy the file name and paste it to the item `pert_path` in `json/cifar.json`.
23 | 
24 | During generation, we need a benign model, if the `model_path` item in `json/cifar.json` is not valid, we will train a new benign model. Then, we can set the `model_path` item with the path of the newly trained model.
25 | 
26 | After generation, we will use it to train a backdoor model.
27 | 
28 |  ### Inject AdvDoor
29 | 
30 | If there is a trigger already, run the following command to train a poisoned model on it.
31 | 
32 |  ``` python test_specific_pair.py -c cifar.json -s 5 -t 6 ```
33 | 
34 |  Ensure that the `pert_path` in `json/cifar.json` is valid.
35 | 
36 |  During injection, we will finetune the backdoor model on a benign model, so we need to train a benign model first. If we already have a benign model, we could set the `model_path_finetune` item in `json/cifar.json` as its path.
37 | 
38 | 
39 | ### Evaluation
40 | We will first generate the perturbation.
41 | The fooling rate is shown below.
42 | <div align=center><img src="imgs/generation.png" alt="generation" style="zoom:67%;" /></div>
43 | 
44 | The example of the generated trigger.
45 | 
46 | <div align=center><img src="imgs/example.png" alt="generation" style="zoom:150%;" /></div>
47 | 
48 | In the following image, we can get the attack success rate, which is the 'Poisonous test set accuracy' in image. 
49 | Meanwhile, we will try to detect the AdvDoor with Activation Clustering method [<sup>1</sup>](#activation_clustering). We can find that the f1-score is very low, which means that Activation Clustering can hardly find the poisoned data.
50 | 
51 | <div align=center><img src="imgs/injection.png" alt="injection" style="zoom:100%;" /></div>
52 | 
53 | 
54 | 
55 | ### Section5.4.2 Crossing Model Attacks
56 | 
57 | #### ResNet to VGGNet, TUAP-Deepfool
58 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/Res2VGG_Deepfool.png)
59 | 
60 | #### ResNet to VGGNet, TUAP-C\&W
61 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/Res2VGG_CW.png)
62 | 
63 | #### VGGNet to ResNet, TUAP-Deepfool
64 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/VGG2Res_Deepfool.png)
65 | 
66 | #### VGGNet to ResNet, TUAP-C\&W
67 | ![image](https://github.com/ZQ-Struggle/AdvDoor/blob/master/imgs/VGG2Res_CW.png)
68 | 
69 | 
70 | When adding a new dataset, pelase set up config json, dataloader and model. Besides, please change the `models_noLoad` and `models_load` item in `conf.py`.
71 | 
72 | ### Reference
73 | 
74 | <div id="activation_clustering"></div>
75 | - [1] Bryant Chen, Wilka Carvalho, Nathalie aracaldo, Heiko Ludwig, Benjamin Edwards, Taesung Lee, Ian Molloy, Biplav Srivastava: Detecting Backdoor Attacks on Deep Neural Networks by Activation Clustering. SafeAI@AAAI 2019
76 | 
77 | 
78 | ### Contacts
79 | Quan Zhang zhangq20@mails.tsinghua.edu.cn
80 | 


--------------------------------------------------------------------------------
/src/poison_detection/poison_filtering_defence.py:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | #
 3 | # Copyright (C) IBM Corporation 2018
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
 7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
 8 | # persons to whom the Software is furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11 | # Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | # SOFTWARE.
18 | from __future__ import absolute_import, division, print_function, unicode_literals
19 | 
20 | import abc
21 | import sys
22 | 
23 | # Ensure compatibility with Python 2 and 3 when using ABCMeta
24 | if sys.version_info >= (3, 4):
25 |     ABC = abc.ABC
26 | else:
27 |     ABC = abc.ABCMeta(str('ABC'), (), {})
28 | 
29 | 
30 | class PoisonFilteringDefence(ABC):
31 |     """
32 |     Base class for all poison filtering defences.
33 |     """
34 |     defence_params = ['classifier']
35 | 
36 |     def __init__(self, classifier, x_train, y_train):
37 |         """
38 |         Create an :class:`.ActivationDefence` object with the provided classifier.
39 | 
40 |         :param classifier: model evaluated for poison
41 |         :type classifier: :class:`.Classifier`
42 |         :param x_train: dataset used to train the classifier.
43 |         :type x_train: :class:`numpy.ndarray`
44 |         :param y_train: labels used to train the classifier.
45 |         :type y_train: :class:`numpy.ndarray`
46 |         """
47 |         self.classifier = classifier
48 |         self.x_train = x_train
49 |         self.y_train = y_train
50 | 
51 |     @abc.abstractmethod
52 |     def detect_poison(self, **kwargs):
53 |         """
54 |         Detect poison.
55 | 
56 |         :param kwargs: Defence-specific parameters used by child classes.
57 |         :type kwargs: `dict`
58 |         :return: `(dict, list)` dictionary with report and list with items identified as poison
59 |         """
60 |         raise NotImplementedError
61 | 
62 |     @abc.abstractmethod
63 |     def evaluate_defence(self, is_clean, **kwargs):
64 |         """
65 |         Evaluate the defence given the labels specifying if the data is poisoned or not.
66 | 
67 |         :param is_clean: 1-D array where is_clean[i]=1 means x_train[i] is clean and is_clean[i]=0 that it's poison.
68 |         :param kwargs: Defence-specific parameters used by child classes.
69 |         :type kwargs: `dict`
70 |         :return: JSON object with confusion matrix
71 |         """
72 |         raise NotImplementedError
73 | 
74 |     def set_params(self, **kwargs):
75 |         """
76 |         Take in a dictionary of parameters and apply attack-specific checks before saving them as attributes.
77 | 
78 |         :param kwargs: a dictionary of defence-specific parameters
79 |         :type kwargs: `dict`
80 |         :return: `True` when parsing was successful
81 |         """
82 |         for key, value in kwargs.items():
83 |             if key in self.defence_params:
84 |                 setattr(self, key, value)
85 |         return True
86 | 
87 |     def get_params(self):
88 |         """
89 |         Returns dictionary of parameters used to run defence.
90 | 
91 |         :return: `dict`
92 |         """
93 |         dictionary = {}
94 |         for param in self.defence_params:
95 |             dictionary.update({param: getattr(self, param)})
96 |         return dictionary
97 | 


--------------------------------------------------------------------------------
/src/defences/spectral.py:
--------------------------------------------------------------------------------
  1 | """Trains a model, saving checkpoints and tensorboard summaries along
  2 |    the way."""
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import argparse
  8 | import json
  9 | import os
 10 | import pickle
 11 | 
 12 | import numpy as np
 13 | import tensorflow as tf
 14 | from tqdm import trange
 15 | import keras.backend as K
 16 | from utils import *
 17 | 
 18 | 
 19 | def compute_corr(param):
 20 |     # seeding randomness
 21 |     with open(param.get_conf('model_path_backdoor'), 'rb') as f:
 22 |         model = pickle.load(f)
 23 |     model.set_learning_phase(0)
 24 |     if param.get_conf('model_prefix') == 'mnist':
 25 |         from data.mnist import MnistData
 26 |         data = MnistData(param)
 27 |         iteation = K.function(inputs=[model.get_classifier().get_model().input],
 28 |                               outputs=[model.get_classifier().get_model().get_layer('dense_1').output])
 29 |     elif param.get_conf('model_prefix') == 'cifar':
 30 |         from data.cifar10 import CifarData
 31 |         data = CifarData(param)
 32 |         iteation = K.function(inputs=[model.get_classifier().get_model().input],
 33 |                               outputs=[model.get_classifier().get_model().get_layer('dense_2').output])
 34 |     data.load_data()
 35 |     data.restore_backdoor(model)
 36 |     # Setting up the data and the model
 37 |     train_x, train_y, test_x, test_y, is_poison_train, is_poison_test = data.get_specific_label_data(6)
 38 |     target_label = param.get_conf('poison_label_target')
 39 |     num_poisoned_left = np.sum(is_poison_train == True)
 40 |     print('Num poisoned left: ', num_poisoned_left)
 41 |     num_training_examples = len(train_x)
 42 | 
 43 | 
 44 |     print('Dataset Size: ', len(data.x_train))
 45 | 
 46 |     lbl = target_label
 47 |     cur_examples = num_training_examples
 48 |     print('Label, num ex: ', lbl, cur_examples)
 49 |     # cur_op = model.representation
 50 |     for iex in trange(cur_examples):
 51 |         x_batch = train_x[iex:iex + 1, :]
 52 |         y_batch = train_y[iex:iex + 1]
 53 | 
 54 |         batch_grads = iteation([x_batch])[0].flatten()
 55 | 
 56 |         if iex == 0:
 57 |             clean_cov = np.zeros(shape=(cur_examples - num_poisoned_left, len(batch_grads)))
 58 |             full_cov = np.zeros(shape=(cur_examples, len(batch_grads)))
 59 |         if iex < (cur_examples - num_poisoned_left):
 60 |             clean_cov[iex] = batch_grads
 61 |         full_cov[iex] = batch_grads
 62 | 
 63 |     # np.save(corr_dir+str(lbl)+'_full_cov.npy', full_cov)
 64 |     
 65 |     total_p = 73
 66 | 
 67 | 
 68 |     clean_mean = np.mean(clean_cov, axis=0, keepdims=True)
 69 |     full_mean = np.mean(full_cov, axis=0, keepdims=True)
 70 | 
 71 |     print('Norm of Difference in Mean: ', np.linalg.norm(clean_mean - full_mean))
 72 |     clean_centered_cov = clean_cov - clean_mean
 73 |     s_clean = np.linalg.svd(clean_centered_cov, full_matrices=False, compute_uv=False)
 74 |     print('Top 7 Clean SVs: ', s_clean[0:7])
 75 | 
 76 |     centered_cov = full_cov - full_mean
 77 |     u, s, v = np.linalg.svd(centered_cov, full_matrices=False)
 78 |     print('Top 7 Singular Values: ', s[0:7])
 79 |     eigs = v[0:1]
 80 |     p = total_p
 81 |     corrs = np.matmul(eigs, np.transpose(full_cov))  # shape num_top, num_active_indices
 82 |     scores = np.linalg.norm(corrs, axis=0)  # shape num_active_indices
 83 |     # np.save(os.path.join(model_dir, 'scores.npy'), scores)
 84 |     print('Length Scores: ', len(scores))
 85 |     p_score = np.percentile(scores, p)
 86 |     top_scores = np.where(scores > p_score)[0]
 87 |     print(top_scores)
 88 | 
 89 |     num_bad_removed = np.sum(is_poison_train[top_scores])
 90 |     print('Num Bad Removed: ', num_bad_removed)
 91 |     print('Num Good Rmoved: ', len(top_scores) - num_bad_removed)
 92 | 
 93 |     num_poisoned_after = num_poisoned_left - num_bad_removed
 94 | 
 95 |     print('Num Poisoned Left: ', num_poisoned_after)
 96 | 
 97 |     print_f1(num_bad_removed, num_poisoned_after, len(top_scores) - num_bad_removed)
 98 | 
 99 |     if os.path.exists('job_result.json'):
100 |         with open('job_result.json') as result_file:
101 |             result = json.load(result_file)
102 |             result['num_poisoned_left'] = '{}'.format(num_poisoned_after)
103 |     else:
104 |         result = {'num_poisoned_left': '{}'.format(num_poisoned_after)}
105 |     with open('job_result.json', 'w') as result_file:
106 |         json.dump(result, result_file, sort_keys=True, indent=4)
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/src/attacks/Deepfool.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | 
  4 | from attacks.backdoor_generator import BackdoorGenerator
  5 | from utils import *
  6 | 
  7 | 
  8 | class Deepfool(BackdoorGenerator):
  9 |     def __init__(self, model, param, pair=None):
 10 |         super(Deepfool, self).__init__(model, param)
 11 |         if pair is None:
 12 |             self.get_loss_gradient(param.get_conf('poison_label_source'), param.get_conf('poison_label_target'))
 13 |         else:
 14 |             self.get_loss_gradient(pair[0], pair[1])
 15 | 
 16 |     def deepfool(self, image, source, target, overshoot=0.02, max_iter=150):
 17 |         """
 18 |            :param image: Image of size HxWx3
 19 |            :param f: feedforward function (input: images, output: values of activation BEFORE softmax).
 20 |            :param grads: gradient functions with respect to input (as many gradients as classes).
 21 |            :param num_classes: num_classes (limits the number of classes to test against, by default = 10)
 22 |            :param overshoot: used as a termination criterion to prevent vanishing updates (default = 0.02).
 23 |            :param max_iter: maximum number of iterations for deepfool (default = 10)
 24 |            :return: minimal perturbation that fools the classifier, number of iterations that it required, new estimated_label and perturbed image
 25 |         """
 26 | 
 27 |         input_shape = image.shape
 28 |         pert_image = image
 29 | 
 30 |         # iterate = K.function([self.input_tensor], [self.before_softmax_tensor])
 31 |         [grads_s, grads_t, f_i, pred] = self.iterate([image])
 32 |         f_i = f_i[0]
 33 |         pred = pred[0]
 34 | 
 35 |         # distance = max(abs(f_i[target] - f_i[source]), 10)
 36 |         # distance = max(abs(f_i[target] - f_i[source]), 1)
 37 | 
 38 |         # f_i = np.array(f).flatten()
 39 |         k_i = int(np.argmax(f_i))
 40 | 
 41 |         w = np.zeros(input_shape)
 42 |         r_tot = np.zeros(input_shape)
 43 | 
 44 |         loop_i = 0
 45 |         pert = np.inf
 46 |         while (k_i != target or pred[target] < 0.8) and loop_i < max_iter:
 47 |             w_k = grads_t - grads_s
 48 | 
 49 |             f_k = (f_i[target] - f_i[source]) * 2 #- distance
 50 |             pert_k = abs(f_k) / (np.linalg.norm(w_k.flatten(), ord=2)
 51 |                                  # * 256.0
 52 |                                  )
 53 | 
 54 |             # determine which w_k to use
 55 | 
 56 |             pert = pert_k
 57 |             w = w_k
 58 | 
 59 |             # compute r_i and r_tot
 60 |             r_i = pert * w / (np.linalg.norm(w.flatten(), ord=2))  # * 256.0)
 61 |             r_tot = r_tot + r_i
 62 | 
 63 |             # compute new perturbed image
 64 |             pert_image = np.clip(image + (1 + overshoot) * r_tot, 0, 1)
 65 |             r_tot = (pert_image - image) / (1 + overshoot)
 66 |             
 67 |             # pert_image = image + (1 + overshoot) * r_tot
 68 | 
 69 |             # pert_image = deprocess_vgg(pert_image).astype(np.float64)
 70 |             # pert_image = preprocess_input_vgg(pert_image)
 71 | 
 72 |             loop_i += 1
 73 | 
 74 |             [grads_s, grads_t, f, pred] = self.iterate([pert_image])
 75 |             pred = pred[0]
 76 | 
 77 |             # compute new label
 78 |             f_i = np.array(f).flatten()
 79 |             k_i = int(np.argmax(f))
 80 | 
 81 |         r_tot = (1 + overshoot) * r_tot
 82 | 
 83 |         return r_tot, loop_i, pert_image
 84 | 
 85 |     def get_loss_gradient(self, source, target):
 86 |         if self.param.get_conf('model_path') == 'origin':
 87 |             self.input_tensor = self.model.get_input_tensor_origin()
 88 |             self.before_softmax_tensor = self.model.get_before_softmax_tensor_origin()
 89 |         else:
 90 |             self.input_tensor = self.model.get_classifier().get_input_tensor()
 91 |             self.before_softmax_tensor = self.model.get_classifier().get_output_bef_softmax()
 92 |             self.output_tensor = self.model.get_classifier().get_output_tensor()
 93 | 
 94 |         self.dydx_s = K.gradients(self.before_softmax_tensor[..., source], self.input_tensor)[0]
 95 |         self.dydx_t = K.gradients(self.before_softmax_tensor[..., target], self.input_tensor)[0]
 96 | 
 97 |         self.iterate = K.function([self.input_tensor],
 98 |                                   [self.dydx_s, self.dydx_t, self.before_softmax_tensor, self.output_tensor])
 99 | 
100 |     def gen_perturbation(self, img, source=5, target=6):
101 |         self.perturb, self.loop_i, self.pert_image = self.deepfool(img, source, target)
102 |         self.perturb = np.squeeze(self.perturb)
103 |         return self.perturb, self.loop_i, self.pert_image
104 | 


--------------------------------------------------------------------------------
/src/model/model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from conf import *
  4 | from utils import *
  5 | import abc
  6 | 
  7 | 
  8 | 
  9 | class CNNModel(metaclass=abc.ABCMeta):
 10 |     def __init__(self, param):
 11 |         # input_shape = x_train.shape[1:]
 12 |         self.param = param
 13 |         self.train_poison = None
 14 |         self.test_poison = None
 15 |         self.classifier = None
 16 |     def init(self, data):
 17 |         self.input_shape = data.x_train.shape[1:]
 18 |         self.min_ = data.min_
 19 |         self.max_ = data.max_
 20 | 
 21 |     def set_learning_phase(self, learning_phase):
 22 |         K.set_learning_phase(learning_phase)
 23 | 
 24 |     @abc.abstractmethod
 25 |     def init_model(self):
 26 |         pass
 27 | 
 28 |     def predict_acc(self, x, y, is_poison, type_str):
 29 |         # Evaluate the classifier on the test set
 30 |         self.test_preds = np.argmax(self.classifier.predict(x), axis=1)
 31 |         self.test_acc = np.sum(self.test_preds == np.argmax(y, axis=1)) / y.shape[0]
 32 |         print("\n%s accuracy: %.2f%%" % (type_str, self.test_acc * 100))
 33 | 
 34 |         # Evaluate the classifier on poisonous data in test set
 35 |         # self.poison_preds = np.argmax(self.classifier.predict(x[is_poison]), axis=1)
 36 |         self.poison_preds = self.test_preds[is_poison]
 37 |         self.poison_acc = np.sum(self.poison_preds == np.argmax(y[is_poison], axis=1)) / max(is_poison.sum(),1)
 38 |         print("\nPoisonous %s set accuracy (i.e. effectiveness of poison): %.2f%%" % (type_str, self.poison_acc * 100))
 39 | 
 40 |         # Evaluate the classifier on clean data
 41 |         # self.clean_preds = np.argmax(self.classifier.predict(x[is_poison == 0]), axis=1)
 42 |         self.clean_preds = self.test_preds[is_poison==0]
 43 |         self.clean_acc = np.sum(self.clean_preds == np.argmax(y[is_poison == 0], axis=1)) / y[is_poison == 0].shape[0]
 44 |         print("\nClean %s set accuracy: %.2f%%" % (type_str, self.clean_acc * 100))
 45 | 
 46 |         # when result_dict is not empty, start record experiment results
 47 | 
 48 |     # to validate backdoor insert effectiveness
 49 |     # check whether the backdoor data with poison label is predicted by the model with poison label
 50 |     def predict(self, data):
 51 |         # Evaluate the classifier on the train set
 52 |         self.predict_acc(data.x_train, data.y_train, data.is_poison_train, 'train')
 53 | 
 54 | 
 55 |         # visualize predict
 56 |         # for i in range(3):
 57 |         #     data.visiualize_img_by_idx(np.where(np.array(data.is_poison_train) == 1)[0][i], self.poison_preds[i])
 58 | 
 59 | 
 60 |         # Evaluate the classifier on the test set
 61 |         self.predict_acc(data.x_test, data.y_test, data.is_poison_test, 'test')
 62 | 
 63 |         '''
 64 |         # visualize predict
 65 |         for i in range(3):
 66 |             print(np.where(np.array(data.is_poison_test) == 1)[0][i])
 67 |             data.visiualize_img_by_idx(np.where(np.array(data.is_poison_test) == 1)[0][i], self.poison_preds[i], False)
 68 |         '''
 69 | 
 70 |     def predict_robust(self, x, y,  is_poison, type_str=''):
 71 |         self.test_preds = np.argmax(self.classifier.predict(x), axis=1)
 72 |         self.test_acc = np.sum(self.test_preds == np.argmax(y, axis=1)) / y.shape[0]
 73 |         print("\n%s accuracy: %.2f%%" % (type_str, self.test_acc * 100))
 74 | 
 75 |         # Evaluate the classifier on poisonous data in test set
 76 |         # self.poison_preds = np.argmax(self.classifier.predict(x[is_poison]), axis=1)
 77 |         self.poison_preds = self.test_preds[is_poison]
 78 |         self.poison_acc = np.sum(self.poison_preds == np.argmax(y[is_poison], axis=1)) / max(is_poison.sum(),1)
 79 |         print("\nPoisonous %s set accuracy (i.e. effectiveness of poison): %.2f%%" % (type_str, self.poison_acc * 100))
 80 | 
 81 |         # Evaluate the classifier on clean data
 82 |         # self.clean_preds = np.argmax(self.classifier.predict(x[is_poison == 0]), axis=1)
 83 |         self.clean_preds = self.test_preds[is_poison==0]
 84 |         self.clean_acc = np.sum(self.clean_preds == np.argmax(y[is_poison == 0], axis=1)) / y[is_poison == 0].shape[0]
 85 |         print("\nClean %s set accuracy: %.2f%%" % (type_str, self.clean_acc * 100))
 86 |     
 87 |     def set_param(self, param):
 88 |         self.classifier.param = param
 89 |         self.param = param
 90 |     
 91 |     def get_train_poison(self):
 92 |         return self.train_poison
 93 | 
 94 |     def set_train_poison(self, poison):
 95 |         self.train_poison = poison
 96 | 
 97 |     def get_test_poison(self):
 98 |         return self.test_poison
 99 | 
100 |     def set_test_poison(self, poison):
101 |         self.test_poison = poison
102 | 
103 | 
104 |     def predict_instance(self, x):
105 |         return self.classifier.predict(x)[0]
106 | 
107 |     def get_input_shape(self):
108 |         return self.input_shape
109 | 
110 |     def set_input_shape(self, input_shape):
111 |         self.input_shape = input_shape
112 | 
113 |     def get_classifier(self):
114 |         return self.classifier
115 | 
116 |     def set_classifier(self, classifier):
117 |         self.classifier = classifier
118 | 
119 |     def get_input_tensor(self):
120 |         return self.classifier.get_input_tensor()
121 | 
122 |     def get_output_tensor(self):
123 |         return self.classifier.get_output_tensor()
124 | 
125 |     @abc.abstractmethod
126 |     def get_dense_tensor(self):
127 |         pass
128 | 
129 |     


--------------------------------------------------------------------------------
/src/model/cifar10.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from sys import is_finalizing
  3 | from keras.preprocessing.image import ImageDataGenerator
  4 | from classifiers import KerasClassifier
  5 | from model.model import *
  6 | 
  7 | class CifarModel(CNNModel):
  8 |     def __init__(self, param):
  9 |         super(CifarModel, self).__init__(param)
 10 | 
 11 |     def init(self, data):
 12 |         self.input_shape = data.x_train.shape[1:]
 13 |         self.min_ = data.min_
 14 |         self.max_ = data.max_
 15 | 
 16 |     def set_learning_phase(self, learning_phase):
 17 |         K.set_learning_phase(learning_phase)
 18 | 
 19 |     def init_model(self):
 20 |         K.set_learning_phase(1)
 21 |         model = Sequential()
 22 |         model.add(
 23 |             Conv2D(64, (3, 3), activation='relu', input_shape=self.input_shape, name='block1_conv1', padding='same'))
 24 |         model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', padding='same'))
 25 |         model.add(MaxPooling2D(pool_size=(2, 2), name='block1_pool1'))
 26 |         model.add(Dropout(0.25, name='dropout_1'))
 27 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', padding='same'))
 28 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2', padding='same'))
 29 |         model.add(MaxPooling2D(pool_size=(2, 2), name='block2_pool1'))
 30 |         model.add(Dropout(0.25, name='dropout_2'))
 31 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv1', padding='same'))
 32 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv2', padding='same'))
 33 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv3', padding='same'))
 34 |         model.add(Conv2D(128, (3, 3), activation='relu', name='block3_conv4', padding='same'))
 35 |         model.add(MaxPooling2D(pool_size=(2, 2), name='block3_pool1'))
 36 |         model.add(Dropout(0.25, name='dropout_3'))
 37 |         model.add(Flatten(name='flatten1'))
 38 |         model.add(Dense(1024, activation='relu', name='dense_1'))
 39 |         model.add(Dropout(0.5, name='dropout_4'))
 40 |         model.add(Dense(1024, activation='relu', name='dense_2'))
 41 |         model.add(Dropout(0.5, name='dropout_5'))
 42 |         model.add(Dense(self.param.get_conf('num_classes'), activation=None, name='predictions'))
 43 |         model.add(Activation('softmax', name='softmax_output'))
 44 | 
 45 |         model.compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy'])
 46 | 
 47 |         self.classifier = KerasClassifier(clip_values=(self.min_, self.max_), model=model, param=self.param)
 48 | 
 49 |     def train(self, data, nb_epochs=None):
 50 |         # default
 51 |         # nb_epochs=20
 52 |         # batch_size=128
 53 |         self.classifier.get_model().compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy'])
 54 |         if nb_epochs is None:
 55 |             nb_epochs = self.param.get_conf('train_epoch')
 56 |         # if isinstance(data, Data):
 57 | 
 58 |         datagen = ImageDataGenerator(
 59 |             featurewise_center=False,  # set input mean to 0 over the dataset
 60 |             samplewise_center=False,  # set each sample mean to 0
 61 |             featurewise_std_normalization=False,  # divide inputs by std of the dataset
 62 |             samplewise_std_normalization=False,  # divide each input by its std
 63 |             zca_whitening=False,  # apply ZCA whitening
 64 |             zca_epsilon=1e-06,  # epsilon for ZCA whitening
 65 |             rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
 66 |             # randomly shift images horizontally (fraction of total width)
 67 |             width_shift_range=0.1,
 68 |             # randomly shift images vertically (fraction of total height)
 69 |             height_shift_range=0.1,
 70 |             shear_range=0.,  # set range for random shear
 71 |             zoom_range=0.,  # set range for random zoom
 72 |             channel_shift_range=0.,  # set range for random channel shifts
 73 |             # set mode for filling points outside the input boundaries
 74 |             fill_mode='nearest',
 75 |             cval=0.,  # value used for fill_mode = "constant"
 76 |             horizontal_flip=True,  # randomly flip images
 77 |             # validation_split=0.0
 78 |             )
 79 |         # Fit the model on the batches generated by datagen.flow().
 80 |         self.classifier.get_model().fit_generator(datagen.flow(data.x_train, data.y_train, batch_size=128),
 81 |                                                 epochs=nb_epochs,
 82 |                                                 steps_per_epoch=data.x_train.shape[0] / 128,
 83 |                                                 validation_data=(data.x_test, data.y_test),
 84 |                                                 validation_steps=data.x_train.shape[0] / 128,
 85 |                                                 workers=4)
 86 | 
 87 | 
 88 |     def predict_instance(self, x):
 89 |         return self.classifier.predict(x)[0]
 90 | 
 91 |     def get_input_shape(self):
 92 |         return self.input_shape
 93 | 
 94 |     def set_input_shape(self, input_shape):
 95 |         self.input_shape = input_shape
 96 | 
 97 |     def get_classifier(self):
 98 |         return self.classifier
 99 | 
100 |     def set_classifier(self, classifier):
101 |         self.classifier = classifier
102 | 
103 |     def get_input_tensor(self):
104 |         return self.classifier.get_input_tensor()
105 | 
106 |     def get_output_tensor(self):
107 |         return self.classifier.get_output_tensor()
108 | 
109 |     def get_output_bef_softmax(self):
110 |         return self.classifier.get_output_bef_softmax()
111 | 
112 |     def get_dense_tensor(self):
113 |         return self.classifier.get_model().get_layer('dense_2').output
114 | 


--------------------------------------------------------------------------------
/src/defences/activation_clustering.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from sys import stdout
  4 | from analyzer import Analyzer
  5 | from conf import *
  6 | from poison_detection import ActivationDefence
  7 | 
  8 | 
  9 | class ActivationClustering:
 10 |     def __init__(self, data, param, model=None, activations=None):
 11 |         # model is activations when is_resume is True, and is DNN model when is_resume is False
 12 |         if model is None and activations is None:
 13 |             raise ("You must supply either model or activations")
 14 |         self.data = data
 15 |         self.param = param
 16 | 
 17 |         if activations is not None:
 18 |             self.defence = Analyzer(activations, self.data.x_train, self.data.y_train, self.param)
 19 |         else:
 20 |             self.model = model
 21 |             self.defence = ActivationDefence(self.model.classifier, self.data.x_train, self.data.y_train, self.param)
 22 | 
 23 |     def size_metric(self , log_file=None):
 24 |         # End-to-end method:
 25 |         print("------------------- Results using size metric -------------------")
 26 |         print(self.defence.get_params())
 27 |         self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA")
 28 | 
 29 |         confusion_matrix = self.defence.evaluate_defence(self.data.is_clean)
 30 |         print("Evaluation defence results for size-based metric: ")
 31 |         jsonObject = json.loads(confusion_matrix)
 32 | 
 33 |      
 34 |         label = 'class_{}'.format(self.param.get_conf('poison_label_target'))
 35 |         print(label)
 36 |         pprint.pprint(jsonObject[label])
 37 | 
 38 |         self.print_f1_score(jsonObject, label)
 39 |         if log_file:
 40 |             savedStdout = sys.stdout
 41 |             sys.stdout = log_file
 42 |             for label in jsonObject:
 43 |                 print(label)
 44 |                 pprint.pprint(jsonObject[label])
 45 |                 self.print_f1_score(jsonObject, label)
 46 |             sys.stdout= savedStdout
 47 |             
 48 | 
 49 |     def size_metric_visualize(self):
 50 |         # Visualize clusters:
 51 |         print("Visualize clusters")
 52 |         sprites_by_class = self.defence.visualize_clusters(self.data.x_train, 'mnist_poison_demo')
 53 |         # Show plots for clusters of class 5
 54 |         n_class = self.param.get_conf('poison_label_target')
 55 |         try:
 56 |             import matplotlib.pyplot as plt
 57 |             plt.imshow(sprites_by_class[n_class][0])
 58 |             plt.title("Class " + str(n_class) + " cluster: 0")
 59 |             plt.show()
 60 |             plt.imshow(sprites_by_class[n_class][1])
 61 |             plt.title("Class " + str(n_class) + " cluster: 1")
 62 |             plt.show()
 63 |         except:
 64 |             print("matplotlib not installed. For this reason, cluster visualization was not displayed")
 65 | 
 66 |     def distance_metric(self):
 67 |         print("------------------- Results using distance metric -------------------")
 68 |         print(self.defence.get_params())
 69 |         self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA", cluster_analysis='distance')
 70 |         confusion_matrix = self.defence.evaluate_defence(self.data.is_clean)
 71 |         print("Evaluation defence results for distance-based metric: ")
 72 |         jsonObject = json.loads(confusion_matrix)
 73 | 
 74 |         # when result_dict is not empty, start record experiment results
 75 |         
 76 | 
 77 |         for label in jsonObject:
 78 |             print(label)
 79 |             pprint.pprint(jsonObject[label])
 80 | 
 81 |             self.print_f1_score(jsonObject, label)
 82 | 
 83 |         # Other ways to invoke the defence:
 84 |         self.defence.cluster_activations(n_clusters=2, ndims=10, reduce='PCA')
 85 | 
 86 |         self.defence.analyze_clusters(cluster_analysis='distance')
 87 |         self.defence.evaluate_defence(self.data.is_clean)
 88 | 
 89 |         self.defence.analyze_clusters(cluster_analysis='smaller')
 90 |         self.defence.evaluate_defence(self.data.is_clean)
 91 | 
 92 |     def relative_size_metric(self):
 93 |         print("------------------- Results using relative-size metric -------------------")
 94 |         print(self.defence.get_params())
 95 |         self.defence.detect_poison(n_clusters=2, ndims=10, reduce="PCA", cluster_analysis='relative-size')
 96 |         confusion_matrix = self.defence.evaluate_defence(self.data.is_clean)
 97 |         print("Evaluation defence results for relative-size metric: ")
 98 |         jsonObject = json.loads(confusion_matrix)
 99 | 
100 |         # when result_dict is not empty, start record experiment results
101 | 
102 |         
103 |         if type(self.param.get_conf('poison_label_target')) is list:
104 |             for lab in self.param.get_conf('poison_label_target'):
105 |                 lab = 'class_{}'.format(lab)
106 |                 print(lab)
107 |                 pprint.pprint(jsonObject[lab])
108 |                 self.print_f1_score(jsonObject, lab)
109 |         else:
110 |             label = 'class_{}'.format(self.param.get_conf('poison_label_target'))
111 |             print(label)
112 |             pprint.pprint(jsonObject[label])
113 |             self.print_f1_score(jsonObject, label)
114 | 
115 |     def print_f1_score(self, jsonObject, label):
116 |         tp = jsonObject[label]['TruePositive']['numerator']
117 |         fn = jsonObject[label]['FalseNegative']['numerator']
118 |         tn = jsonObject[label]['TrueNegative']['numerator']
119 |         fp = jsonObject[label]['FalsePositive']['numerator']
120 | 
121 |         if tp + fp == 0 or tp + fn == 0:
122 |             print('escape the detection')
123 |             return
124 | 
125 |         precision = float(tp) / (tp + fp)
126 |         recall = float(tp) / (tp + fn)
127 |         if tp==0:
128 |             f1 = 0.0
129 |         else:
130 |             f1 = (2 * precision * recall) / (precision + recall)
131 | 
132 |         print('precision = ', precision)
133 |         print('recall = ', recall)
134 |         print('f1 = ', f1)
135 | 


--------------------------------------------------------------------------------
/src/attacks/universal_perturbation.py:
--------------------------------------------------------------------------------
  1 | from attacks.Deepfool import Deepfool
  2 | from attacks.backdoor_generator import BackdoorGenerator
  3 | from conf import *
  4 | from utils import preprocess_input_vgg
  5 | 
  6 | 
  7 | class Universal_perturbation(BackdoorGenerator):
  8 |     def __init__(self, model, param):
  9 |         super(Universal_perturbation, self).__init__(model, param)
 10 |         self.deepfool = Deepfool(self.model, self.param)
 11 |         self.data_path = os.path.join(self.param.get_conf('data_path'), 'train')
 12 |         self.image_size = (self.param.get_conf('train_image_size'), self.param.get_conf('train_image_size'))
 13 | 
 14 |     def proj_lp(self, v, xi, p):
 15 | 
 16 |         # Project on the lp ball centered at 0 and of radius xi
 17 | 
 18 |         # SUPPORTS only p = 2 and p = Inf for now
 19 |         if p == 2:
 20 |             v = v * min(1, xi / np.linalg.norm(v.flatten(1)))
 21 |             # v = v / np.linalg.norm(v.flatten(1)) * xi
 22 |         elif p == np.inf:
 23 |             v = np.sign(v) * np.minimum(abs(v), xi)
 24 |         else:
 25 |             raise ValueError('Values of p different from 2 and Inf are currently not supported...')
 26 | 
 27 |         return v
 28 | 
 29 |     def universal_perturbation(self, dataset, source, target, delta=0.2, max_iter_uni=5, xi=15.0/255.0, p=np.inf,
 30 |                                overshoot=0.02, max_iter_df=20):
 31 | 
 32 |         """
 33 |         :param dataset: Images of size MxHxWxC (M: number of images)
 34 | 
 35 |         :param f: feedforward function (input: images, output: values of activation BEFORE softmax).
 36 | 
 37 |         :param grads: gradient functions with respect to input (as many gradients as classes).
 38 | 
 39 |         :param delta: controls the desired fooling rate (default = 80% fooling rate)
 40 | 
 41 |         :param max_iter_uni: optional other termination criterion (maximum number of iteration, default = np.inf)
 42 | 
 43 |         :param xi: controls the l_p magnitude of the perturbation (default = 10)
 44 | 
 45 |         :param p: norm to be used (FOR NOW, ONLY p = 2, and p = np.inf ARE ACCEPTED!) (default = np.inf)
 46 | 
 47 |         :param num_classes: num_classes (limits the number of classes to test against, by default = 10)
 48 | 
 49 |         :param overshoot: used as a termination criterion to prevent vanishing updates (default = 0.02).
 50 | 
 51 |         :param max_iter_df: maximum number of iterations for deepfool (default = 10)
 52 | 
 53 |         :return: the universal perturbation.
 54 |         """
 55 | 
 56 |         v = 0
 57 |         fooling_rate = 0.0
 58 |         batch_size = self.param.get_conf('batch_size')
 59 |         if self.param.get_conf('model_prefix') in models_load:
 60 |             source_imgs = dataset.x_train
 61 |             source_imgs = [source_imgs[i] for i in np.where(dataset.y_train == source)[0]]
 62 |             y_train = dataset.y_train[(dataset.y_train == source)]
 63 |         else:
 64 |             y_train = dataset.y_train[(dataset.y_train.argmax(axis=1).flatten() == source)]
 65 |             source_imgs = dataset.x_train
 66 |             source_imgs = [source_imgs[i] for i in np.where(dataset.y_train.argmax(axis=1).flatten() == source)[0]]
 67 |             source_imgs = np.array(source_imgs)
 68 |         num_images = len(source_imgs)
 69 |         num_selection = min(num_images, 5000)
 70 | 
 71 |         print('num_selection = ', num_selection)
 72 | 
 73 |         itr = 0
 74 |         index = np.arange(num_selection)
 75 |         while fooling_rate < 1 - delta and itr < max_iter_uni:
 76 |             # Shuffle the dataset
 77 |             np.random.shuffle(index)
 78 | 
 79 |             print('Starting pass number ', itr)
 80 | 
 81 |             # Go through the data set and compute the perturbation increments sequentially
 82 |             for idx, k in enumerate(index):
 83 |                 if self.param.get_conf('model_prefix') in models_load:
 84 |                     cur_img = source_imgs[k]
 85 |                     cur_img = cv2.imread(os.path.join(self.data_path, cur_img))[:, :, ::-1]
 86 |                     cur_img = cv2.resize(cur_img, self.image_size)
 87 |                     cur_img = preprocess_input_vgg(cur_img)
 88 |                 else:
 89 |                     cur_img = source_imgs[k:k + 1]
 90 |                 if idx % 1000 == 999:
 91 |                         print('>> k = ', idx, ', img_idx = ', k, ', pass #', itr)
 92 |                 if target != int(np.argmax(np.array(self.deepfool.iterate([cur_img + v])[2]).flatten())):
 93 |                     
 94 | 
 95 |                     # Compute adversarial perturbation
 96 |                     dr, iter, _ = self.deepfool.deepfool(cur_img + v, source=source, target=target, overshoot=overshoot,
 97 |                                                          max_iter=max_iter_df)
 98 | 
 99 |                     # Make sure it converged...
100 |                     if iter < max_iter_df - 1:
101 |                         v = v + dr
102 | 
103 |                         # Project on l_p ball
104 |                         v = self.proj_lp(v, xi, p)
105 | 
106 |             itr = itr + 1
107 |             # v *= 0.99
108 |             est_labels_pert = np.zeros(num_selection)
109 | 
110 |             num_batches = np.int(np.ceil(np.float(num_selection) / np.float(batch_size)))
111 |             np.random.shuffle(index)
112 |             imgs_test = [source_imgs[i] for i in index[:num_selection]]
113 |             for ii in range(0, num_batches):
114 |                 m = (ii * batch_size)
115 |                 M = min((ii + 1) * batch_size, num_selection)
116 |                 if self.param.get_conf('model_prefix') in models_load:
117 |                     imgs = []
118 |                     for fi in imgs_test[m:M]:
119 |                         img = cv2.imread(os.path.join(self.data_path, fi))[:, :, ::-1]
120 |                         img = cv2.resize(img, self.image_size)
121 |                         imgs.append(img)
122 |                     imgs = preprocess_input_vgg(np.array(imgs))
123 |                 else:
124 |                     imgs = np.array(imgs_test[m:M])
125 |                 # imgs[:] += v
126 |                 imgs = np.clip(imgs[:] + v, 0, 1)
127 |                 est_labels_pert[m:M] = np.argmax(self.deepfool.iterate([imgs])[2], axis=1).flatten()
128 |             # Compute the fooling rate
129 |             if self.param.get_conf('model_prefix') in models_load:
130 |                 fooling_rate = float(np.sum(est_labels_pert != y_train[index[:num_selection]]) /
131 |                                      float(num_selection))
132 |             else:
133 |                 fooling_rate = float(np.sum(est_labels_pert != y_train[index[:num_selection]].argmax(axis=1)) /
134 |                                      float(num_selection))
135 |             print('FOOLING RATE = ', fooling_rate)
136 |         self.perturb = v
137 |         print('magnitude of pert is', np.linalg.norm(v))
138 |         return v
139 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from conf import *
  4 | 
  5 | 
  6 | def print_f1(tp, fn, fp):
  7 |     # tp = num_bad_removed
  8 |     # fn = num_poisoned_after
  9 |     # fp = len(top_scores) - num_bad_removed
 10 |     precision = float(tp) / (tp + fp)
 11 |     recall = float(tp) / (tp + fn)
 12 |     f1 = (2 * precision * recall) / (precision + recall)
 13 | 
 14 |     print('precision = {:.2f}'.format(precision *100))
 15 |     print('recall = {:.2f}'.format(recall*100))
 16 |     print('f1-score = {:.2f}'.format(f1*100))
 17 | 
 18 | 
 19 | def normalize(x):
 20 |     # utility function to normalize a tensor by its L2 norm
 21 |     return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
 22 | 
 23 | 
 24 | def preprocess_image(img_path):
 25 |     img = image.load_img(img_path, target_size=(28, 28), grayscale=True)
 26 |     input_img_data = image.img_to_array(img)
 27 |     input_img_data = input_img_data.reshape(1, 28, 28, 1)
 28 | 
 29 |     input_img_data = input_img_data.astype('float32')
 30 |     input_img_data /= 255
 31 | 
 32 |     # input_img_data = preprocess_input(input_img_data)  # final input shape = (1,224,224,3)
 33 |     return input_img_data
 34 | 
 35 | 
 36 | def get_signature():
 37 |     now = datetime.datetime.now()
 38 |     past = datetime.datetime(2015, 6, 6, 0, 0, 0, 0)
 39 |     timespan = now - past
 40 |     time_sig = int(timespan.total_seconds() * 1000)
 41 | 
 42 |     return str(time_sig)
 43 | 
 44 | 
 45 | def serialize_img(img, param):
 46 |     save_name = '_'.join([param.get_conf('model_prefix'), get_date(), 'image', get_signature()])
 47 |     save_path = os.path.join(param.get_conf('perturbation_dir'), save_name + '.png')
 48 |     save_pkl = os.path.join(param.get_conf('perturbation_dir'), save_name + '.pkl')
 49 | 
 50 |     img = img.flatten().reshape((28, 28))
 51 |     print('img.shape = ', img.shape)
 52 | 
 53 |     plt.figure()
 54 |     plt.imshow(img, cmap='gray')
 55 |     plt.show()
 56 | 
 57 |     imageio.imwrite(uri=save_path, im=img)
 58 | 
 59 |     with open(save_pkl, 'wb') as f:
 60 |         pickle.dump(img, f)
 61 | 
 62 |     print('save img done')
 63 | 
 64 | 
 65 | def deserialize_pert(save_pkl, alpha):
 66 |     with open(save_pkl, 'rb') as f:
 67 |         perturb = pickle.load(f)
 68 | 
 69 |     print('load perturbation done', save_pkl)
 70 |     print('self.perturb.shape = {}, magnitude of pert is {}'.format(perturb.shape, np.linalg.norm(perturb)))
 71 | 
 72 |     print('alpha = ', alpha)
 73 | 
 74 |     perturb = perturb * alpha
 75 |     # cilp the float part, 3.7->4, 3.1->3
 76 |     # perturb = (perturb*255).astype(np.int32)
 77 |     # perturb = perturb.astype(np.uint8)
 78 | 
 79 |     return perturb
 80 | 
 81 | 
 82 | def to_categorical(labels, nb_classes=None):
 83 |     """
 84 |     Convert an array of labels to binary class matrix.
 85 | 
 86 |     :param labels: An array of integer labels of shape `(nb_samples,)`
 87 |     :type labels: `np.ndarray`
 88 |     :param nb_classes: The number of classes (possible labels)
 89 |     :type nb_classes: `int`
 90 |     :return: A binary matrix representation of `y` in the shape `(nb_samples, nb_classes)`
 91 |     :rtype: `np.ndarray`
 92 |     """
 93 |     labels = np.array(labels, dtype=np.int32)
 94 |     if not nb_classes:
 95 |         nb_classes = np.max(labels) + 1
 96 |     categorical = np.zeros((labels.shape[0], nb_classes), dtype=np.float32)
 97 |     categorical[np.arange(labels.shape[0]), np.squeeze(labels)] = 1
 98 |     return categorical
 99 | 
100 | 
101 | def preprocess_mnist(x, y, nb_classes=10, clip_values=None):
102 |     """Scales `x` to [0, 1] and converts `y` to class categorical confidences.
103 | 
104 |     :param x: Data instances.
105 |     :type x: `np.ndarray`
106 |     :param y: Labels.
107 |     :type y: `np.ndarray`
108 |     :param nb_classes: Number of classes in dataset.
109 |     :type nb_classes: `int`
110 |     :param clip_values: Original data range allowed value for features, either one respective scalar or one value per
111 |            feature.
112 |     :type clip_values: `tuple(float, float)` or `tuple(np.ndarray, np.ndarray)`
113 |     :return: Rescaled values of `x`, `y`
114 |     :rtype: `tuple`
115 |     """
116 |     if clip_values is None:
117 |         min_, max_ = np.amin(x), np.amax(x)
118 |     else: 
119 |         min_, max_ = clip_values
120 | 
121 |     normalized_x = (x - min_) / (max_ - min_)
122 |     categorical_y = to_categorical(y, nb_classes)
123 | 
124 |     return normalized_x, categorical_y
125 | 
126 | 
127 | def preprocess_x_mnist(x, clip_values=None):
128 |     """Scales `x` to [0, 1] and converts `y` to class categorical confidences.
129 | 
130 |     :param x: Data instances.
131 |     :type x: `np.ndarray`
132 |     :param y: Labels.
133 |     :type y: `np.ndarray`
134 |     :param nb_classes: Number of classes in dataset.
135 |     :type nb_classes: `int`
136 |     :param clip_values: Original data range allowed value for features, either one respective scalar or one value per
137 |                    feature.
138 |     :type clip_values: `tuple(float, float)` or `tuple(np.ndarray, np.ndarray)`
139 |     :return: Rescaled values of `x`, `y`
140 |     :rtype: `tuple`
141 |     """
142 |     if len(x.shape) == 2:
143 |         x = np.expand_dims(x, axis=2)
144 |     if len(x.shape) == 3:
145 |         x = np.expand_dims(x, axis=0)
146 |     if clip_values is None:
147 |         min_, max_ = np.amin(x), np.amax(x)
148 |     else:
149 |         min_, max_ = clip_values
150 | 
151 |     normalized_x = (x - min_) / (max_ - min_)
152 | 
153 |     return normalized_x
154 | 
155 | 
156 | def preprocess_input_vgg(x):
157 |     if (len(x.shape) == 3):
158 |         x = np.expand_dims(x, axis=0)
159 | 
160 |     x = x.astype(np.float64)
161 |     x = preprocess_input(x)
162 |     return x
163 | 
164 | 
165 | def deprocess_vgg(x):
166 |     x = x.reshape((224, 224, 3))
167 |     # Remove zero-center by mean pixel
168 |     x[:, :, 0] += 103.939
169 |     x[:, :, 1] += 116.779
170 |     x[:, :, 2] += 123.68
171 |     # 'BGR'->'RGB'
172 |     x = x[:, :, ::-1]
173 |     x = np.clip(x, 0, 255).astype('uint8')
174 |     return x
175 | 
176 | 
177 | def load_img(img_path):
178 |     img = cv2.imread(img_path)[:, :, ::-1]
179 |     img = cv2.resize(img, (224, 224))
180 |     img = preprocess_input_vgg(img)
181 |     return img
182 | 
183 | 
184 | def dump_model(model, param, prefix='model_prefix'):
185 |     # concat dump name
186 |     serialize_name = '_'.join([param.get_conf()[prefix], get_date()]) + '.pkl'
187 |     print('serialize_name = ', serialize_name)
188 | 
189 |     # concat dump path
190 |     serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name)
191 |     with open(serialize_path, 'wb') as f:
192 |         pickle.dump(model, f)
193 | 
194 |     print('model dump success')
195 | 
196 |     return serialize_path
197 | 
198 | 
199 | def deserialize_model(path):
200 |     with open(path, 'rb') as f:
201 |         model = pickle.load(f)
202 | 
203 |     print('model load success')
204 | 
205 |     return model
206 | 
207 | 
208 | def check_dir(path):
209 |     if not os.path.exists(path):
210 |         os.mkdir(path)
211 | 
212 | 
213 | class Param:
214 |     def __init__(self, json_file):
215 |         self.conf = None
216 |         self.json_file = json_file
217 | 
218 |     def load_json(self, prefix=None):
219 |         if prefix:
220 |             self.json_path = os.path.join(prefix, self.json_file)
221 |         else:
222 |             self.json_path = os.path.join(json_dir, self.json_file)
223 | 
224 |         with open(self.json_path, 'r') as f:
225 |             self.conf = json.load(f)
226 | 
227 |         for key, val in self.conf.items():
228 |             print(key, ':', val)
229 | 
230 |     def get_conf(self, key_value=None):
231 |         if key_value == None:
232 |             return self.conf
233 |         return self.conf[key_value]
234 | 
235 |     def set_conf(self, key, value):
236 |         self.conf[key] = value
237 | 
238 |     def print_conf(self):
239 |         for key, val in self.conf.items():
240 |             print(key, ':', val)
241 | 


--------------------------------------------------------------------------------
/src/test_specific_pair.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 
  4 | os.environ['KERAS_BACKEND'] = 'tensorflow'
  5 | from keras.layers import serialize
  6 | from attacks.universal_perturbation import Universal_perturbation
  7 | from attacks.CW import CarliniWagnerL2
  8 | from data.cifar10 import CifarData
  9 | # from data.GTSRB import GTSRBData
 10 | from defences.activation_clustering import ActivationClustering
 11 | from defences.spectral import compute_corr
 12 | from model.cifar10 import CifarModel
 13 | # from model.GTSRBModel import GTSRBModel
 14 | from visualization import *
 15 | import numpy as np
 16 | import glob
 17 | import argparse
 18 | import cv2.imread
 19 | 
 20 | def test_deserialize_model(param, args):
 21 |     # the json configuration can be accessed by model.param
 22 |     K.set_learning_phase(1)
 23 |     if param.get_conf('model_prefix') == 'cifar':
 24 |         Data = CifarData
 25 |         Model = CifarModel
 26 |     elif param.get_conf('model_prefix') == 'GTSRB':
 27 |         Data = GTSRBData
 28 |         Model = GTSRBModel
 29 |     
 30 |     path = param.get_conf('model_path_finetune')
 31 |     if os.path.exists(path) and 'pkl' in path:
 32 |         with open(path, 'rb') as f:
 33 |             model = pickle.load(f)
 34 |         model.set_param(param)
 35 |         print('model load success')
 36 |     else:
 37 |         model = Model(param)
 38 |         data_clean = Data(param)
 39 |         data_clean.load_data(is_add_channel=True)
 40 |         model.init(data_clean)
 41 |         model.init_model()
 42 |         model.train(data_clean, nb_epochs=180)
 43 |         serialize_name = '_'.join(
 44 |         [param.get_conf('model_prefix'), get_date(), 'clean']) + '.pkl'
 45 |         serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name)
 46 |         print('serialize_name = ', serialize_name)
 47 |         with open(serialize_path,'wb') as f:
 48 |             pickle.dump(model,f)
 49 |         param.set_conf('model_path_finetune', serialize_name)
 50 |     
 51 |     data = Data(param)
 52 |     data.load_data()
 53 |     data.gen_backdoor(model)
 54 |     model.train(data)
 55 |     
 56 |     
 57 |     serialize_name = '_'.join(
 58 |         [param.get_conf('model_prefix'), get_date()]) + \
 59 |          '_poison_{}to{}'.format(param.get_conf('poison_label_source'),param.get_conf('poison_label_target')) + '.pkl'
 60 |     print('serialize_name = ', serialize_name)
 61 |     serialize_path = os.path.join(param.get_conf('save_dir'), serialize_name)
 62 |     with open(serialize_path,'wb') as f:
 63 |         pickle.dump(model,f)
 64 | 
 65 |     K.set_learning_phase(0)
 66 |     with open(serialize_path, 'rb') as f:
 67 |         model2 = pickle.load(f)
 68 | 
 69 |     model2.predict(data)
 70 |     ac = ActivationClustering(data, param, model2)
 71 |     ac.size_metric()
 72 |     # ac.relative_size_metric()
 73 | 
 74 | 
 75 | 
 76 | def test_resume_model(param, args):
 77 |     # the json configuration can be accessed by model.param    
 78 |     K.set_learning_phase(0)
 79 |     with open(param.get_conf('model_path_backdoor'), 'rb') as f:
 80 |         model = pickle.load(f)
 81 |     model.param.set_conf('pert_path', param.get_conf('pert_path'))
 82 |     print("model", param.get_conf('model_path_backdoor'), 'load success')
 83 | 
 84 |     if model.param.get_conf('model_prefix') == 'GTSRB':
 85 |         data = GTSRBData(model.param)
 86 |     elif model.param.get_conf('model_prefix') == 'cifar':
 87 |         data = CifarData(model.param)
 88 |     
 89 |     data.load_data()
 90 |     data.restore_backdoor(model)
 91 |     # data.gen_backdoor()
 92 |     model.param.print_conf()
 93 |     print('model load success')
 94 | 
 95 |     model.predict(data)
 96 |     if args.spectral:
 97 |         compute_corr(model.param, model, data)
 98 |     else:
 99 |         ac = ActivationClustering(data, model.param, model)
100 |         ac.size_metric()
101 |         # ac.relative_size_metric()
102 | 
103 | def gen_perturbation(model, data, method, param):
104 | 
105 |     # serialize perturbation
106 | 
107 |     if method == 'universal':
108 |         universal = Universal_perturbation(model, param)
109 |         v = universal.universal_perturbation(data, param.get_conf('poison_label_source'),
110 |                                              param.get_conf('poison_label_target'), xi= param.get_conf('pert_xi')/255.0)
111 |         serialize_name = 'universal_{}to{}'.format(param.get_conf('poison_label_source'),
112 |                                         param.get_conf('poison_label_target'))
113 |         serialize_name = universal.serialize(serialize_name)
114 |     elif method == 'CW':
115 |         cw = CarliniWagnerL2(model, param)
116 |         cw.attack(data,xi= param.get_conf('pert_xi') / 255.0)
117 |         serialize_name = 'CW_{}to{}'.format(param.get_conf('poison_label_source'),
118 |                                 param.get_conf('poison_label_target'))
119 |         serialize_name = cw.serialize(serialize_name)
120 |     # 3. test perturbation on some input cases
121 |     return serialize_name
122 | 
123 | 
124 | 
125 | 
126 | def gen_rand_perturbation(param, args):
127 |     model_path = param.get_conf('model_path')
128 |     K.set_learning_phase(0)
129 |     if param.get_conf('model_prefix') == 'GTSRB':
130 |         data = GTSRBData(param)
131 |         model = GTSRBModel
132 |     else:
133 |         data = CifarData(param)
134 |         model = CifarModel
135 |     data.load_data(is_add_channel=True)
136 |     if os.path.exists(model_path) and 'pkl' in model_path:
137 |         # 2. load model and generate perturbation
138 |         serialize_path = model_path
139 |         with open(serialize_path, 'rb') as f:
140 |             model = pickle.load(f)
141 |     else:
142 |                 
143 |         model = model(param)
144 |         model.init(data)
145 |         model.init_model()
146 |         model.train(data, nb_epochs=180)
147 |         serialize_name = '_'.join(
148 |             [param.get_conf('model_prefix'), get_date()]) + '_clean.pkl'
149 |         print('serialize_name = ', serialize_name)
150 |         serialize_path = os.path.join(
151 |             param.get_conf('save_dir'), serialize_name)
152 | 
153 |         with open(serialize_path, 'wb') as f:
154 |             pickle.dump(model, f)
155 |         print('model dump success')
156 |     # model.predict(data)
157 |     print('model load success')
158 |    
159 |     param.set_conf('poison_label_source', args.source)
160 |     param.set_conf('poison_label_target', args.target)
161 |     print('source number is {}, target number is {}'.format(args.source, args.target))
162 |     method = param.get_conf('method')
163 | 
164 |     return gen_perturbation(model, data, method=method, param=param, )
165 |     
166 | 
167 | def experiment_on_pair(args, param, pair):
168 | 
169 |     param.set_conf('poison_label_source', pair[0])
170 |     param.set_conf('poison_label_target', pair[1])
171 |     test_deserialize_model(param, args)
172 | 
173 |     K.clear_session()
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     parser = argparse.ArgumentParser(description='random pair testing')
178 |     parser.add_argument('-c', '--config', default='train.json', type=str, help='config file')
179 |     parser.add_argument('-g', '--gen', action='store_true', help='generate perturbations')
180 |     parser.add_argument('-s','--source', type=int, default=5, help='attack spurce')
181 |     parser.add_argument('-t','--target', type=int, default=6, help='attack target')
182 |     parser.add_argument('-r', '--resume', action='store_true', help="resume and test trained models")
183 |     parser.add_argument('-sp', '--spectral', action='store_true', help="using spectral to detect")
184 |     args = parser.parse_args()
185 |     json_name = args.config
186 |     param = Param(json_name)
187 |     param.load_json()
188 |     if args.gen:
189 |         pert_path = gen_rand_perturbation(param, args)
190 |         param.set_conf("pert_path", pert_path)
191 |     if args.resume:
192 |         test_resume_model(param, args)
193 |     else:
194 |         experiment_on_pair(args, param, (args.source, args.target))
195 | 
196 | 
197 |             
198 | 


--------------------------------------------------------------------------------
/src/poison_detection/ground_truth_evaluator.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | #
  3 | # Copyright (C) IBM Corporation 2018
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | # persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
 11 | # Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 17 | # SOFTWARE.
 18 | from __future__ import absolute_import, division, print_function, unicode_literals
 19 | 
 20 | import json
 21 | import logging
 22 | 
 23 | import numpy as np
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | class GroundTruthEvaluator:
 29 |     """
 30 |     Class to evaluate the performance of the poison detection method.
 31 |     """
 32 |     def __init__(self):
 33 |         """
 34 |         Evaluates ground truth constructor
 35 |         """
 36 | 
 37 |     def analyze_correctness(self, assigned_clean_by_class, is_clean_by_class):
 38 |         """
 39 |         For each training sample, determine whether the activation clustering method was correct.
 40 | 
 41 |         :param assigned_clean_by_class: Result of clustering
 42 |         :type assigned_clean_by_class `list`
 43 |         :param is_clean_by_class: is clean separated by class
 44 |         :type is_clean_by_class `list`
 45 |         :return: Two variables are returned:
 46 |                  1) all_errors_by_class[i]: an array indicating the correctness of each assignment
 47 |                  in the ith class. Such that:
 48 |                  all_errors_by_class[i] = 0 if marked poison, is poison
 49 |                  all_errors_by_class[i] = 1 if marked clean, is clean
 50 |                  all_errors_by_class[i] = 2 if marked poison, is clean
 51 |                  all_errors_by_class[i] = 3 marked clean, is poison
 52 |                  2) Json object with confusion matrix per-class
 53 |         """
 54 | 
 55 |         all_errors_by_class = []
 56 |         poison = 0
 57 |         clean = 1
 58 |         dic_json = {}
 59 | 
 60 |         logger.debug("Error rates per class:")
 61 |         for class_i, (assigned_clean, is_clean) in enumerate(zip(assigned_clean_by_class, is_clean_by_class)):
 62 |             errors = []
 63 |             for assignment, bl in zip(assigned_clean, is_clean):
 64 |                 bl = int(bl)
 65 |                 # marked poison, is poison = 0
 66 |                 # true positive
 67 |                 if assignment == poison and bl == poison:
 68 |                     errors.append(0)
 69 | 
 70 |                 # marked clean, is clean = 1
 71 |                 # true negative
 72 |                 elif assignment == clean and bl == clean:
 73 |                     errors.append(1)
 74 | 
 75 |                 # marked poison, is clean = 2
 76 |                 # false positive
 77 |                 elif assignment == poison and bl == clean:
 78 |                     errors.append(2)
 79 | 
 80 |                 # marked clean, is poison = 3
 81 |                 # false negative
 82 |                 elif assignment == clean and bl == poison:
 83 |                     errors.append(3)
 84 |                 else:
 85 |                     print(assignment, bl, type(assignment), type(bl),clean, poison)
 86 |                     raise Exception('Analyze_correctness entered wrong class')
 87 | 
 88 |             errors = np.asarray(errors)
 89 |             logger.debug('-------------------%d---------------', class_i)
 90 |             key_i = "class_" + str(class_i)
 91 |             matrix_i = self.get_confusion_matrix(errors)
 92 |             dic_json.update({key_i: matrix_i})
 93 |             all_errors_by_class.append(errors)
 94 | 
 95 |         all_errors_by_class = np.asarray(all_errors_by_class)
 96 |         conf_matrix_json = json.dumps(dic_json)
 97 | 
 98 |         return all_errors_by_class, conf_matrix_json
 99 | 
100 |     def get_confusion_matrix(self, values):
101 |         """
102 |         Computes and returns a json object that contains the confusion matrix for each class.
103 | 
104 |         :param values: Array indicating the correctness of each assignment in the ith class
105 |         :type values `array`
106 |         :return: Json object with confusion matrix per-class
107 |         """
108 |         dic_class = {}
109 |         true_positive = np.where(values == 0)[0].shape[0]
110 |         true_negative = np.where(values == 1)[0].shape[0]
111 |         false_positive = np.where(values == 2)[0].shape[0]
112 |         false_negative = np.where(values == 3)[0].shape[0]
113 | 
114 |         tp = self.calculate_and_print(true_positive,
115 |                                       true_positive + false_negative,
116 |                                       "true-positive rate")
117 |         tn = self.calculate_and_print(true_negative,
118 |                                       false_positive + true_negative,
119 |                                       "true-negative rate")
120 |         fp = self.calculate_and_print(false_positive,
121 |                                       false_positive + true_negative,
122 |                                       "false-positive rate")
123 |         fn = self.calculate_and_print(false_negative,
124 |                                       true_positive + false_negative,
125 |                                       "false-negative rate")
126 | 
127 |         dic_tp = dict(rate=round(tp, 2), numerator=true_positive, denominator=(true_positive + false_negative))
128 |         if (true_positive + false_negative) == 0:
129 |             dic_tp = dict(rate='N/A', numerator=true_positive, denominator=(true_positive + false_negative))
130 | 
131 |         dic_tn = dict(rate=round(tn, 2), numerator=true_negative, denominator=(false_positive + true_negative))
132 |         if (false_positive + true_negative) == 0:
133 |             dic_tn = dict(rate='N/A', numerator=true_negative, denominator=(false_positive + true_negative))
134 | 
135 |         dic_fp = dict(rate=round(fp, 2), numerator=false_positive, denominator=(false_positive + true_negative))
136 |         if (false_positive + true_negative) == 0:
137 |             dic_fp = dict(rate='N/A', numerator=false_positive, denominator=(false_positive + true_negative))
138 | 
139 |         dic_fn = dict(rate=round(fn, 2), numerator=false_negative, denominator=(true_positive + false_negative))
140 |         if (true_positive + false_negative) == 0:
141 |             dic_fn = dict(rate='N/A', numerator=false_negative, denominator=(true_positive + false_negative))
142 | 
143 |         dic_class.update(dict(TruePositive=dic_tp))
144 |         dic_class.update(dict(TrueNegative=dic_tn))
145 |         dic_class.update(dict(FalsePositive=dic_fp))
146 |         dic_class.update(dict(FalseNegative=dic_fn))
147 | 
148 |         return dic_class
149 | 
150 |     @staticmethod
151 |     def calculate_and_print(numerator, denominator, name):
152 |         """
153 |         Computes and prints the rates based on the denominator provided.
154 | 
155 |         :param numerator: number used to compute the rate
156 |         :type numerator `int`
157 |         :param denominator: number used to compute the rate
158 |         :type denominator `int`
159 |         :param name: Rate name being computed e.g., false-positive rate
160 |         :type name `str`
161 |         :return: Computed rate
162 |         """
163 |         try:
164 |             res = 100 * (numerator / float(denominator))
165 |             logger.debug("%s: %d/%d=%.3g", name, numerator, denominator, res)
166 |             return res
167 |         except Exception:
168 |             logger.debug("%s: couldn't calculate %d/%d", name, numerator, denominator)
169 |             return 0
170 | 


--------------------------------------------------------------------------------
/src/model/cifar_res.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''ResNet50 model for Keras.
  3 | # Reference:
  4 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
  5 | Adapted from code contributed by BigMoyan.
  6 | '''
  7 | from __future__ import print_function
  8 | 
  9 | import numpy as np
 10 | import warnings
 11 | 
 12 | from keras.layers import Input
 13 | from keras import layers
 14 | from keras.layers import Dense
 15 | from keras.layers import Activation
 16 | from keras.layers import Flatten
 17 | from keras.layers import Conv2D
 18 | from keras.layers import MaxPooling2D
 19 | from keras.layers import GlobalMaxPooling2D
 20 | from keras.layers import ZeroPadding2D
 21 | from keras.layers import AveragePooling2D
 22 | from keras.layers import GlobalAveragePooling2D
 23 | from keras.layers import BatchNormalization
 24 | from keras.models import Model
 25 | from keras.preprocessing.image import ImageDataGenerator
 26 | import keras.backend as K
 27 | from keras.utils import layer_utils
 28 | # from keras.utils.data_utils import get_file
 29 | # from keras.applications.imagenet_utils import decode_predictions
 30 | # from keras_applications.imagenet_utils import preprocess_input
 31 | # from keras_applications.imagenet_utils import _obtain_input_shape
 32 | # from keras.engine.topology import get_source_inputs
 33 | from model.model import *
 34 | from classifiers import KerasClassifier
 35 | import record
 36 | 
 37 | 
 38 | WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5'
 39 | WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
 40 | 
 41 | 
 42 | def identity_block(input_tensor, kernel_size, filters, stage, block):
 43 |     """The identity block is the block that has no conv layer at shortcut.
 44 |     # Arguments
 45 |         input_tensor: input tensor
 46 |         kernel_size: defualt 3, the kernel size of middle conv layer at main path
 47 |         filters: list of integers, the filterss of 3 conv layer at main path
 48 |         stage: integer, current stage label, used for generating layer names
 49 |         block: 'a','b'..., current block label, used for generating layer names
 50 |     # Returns
 51 |         Output tensor for the block.
 52 |     """
 53 |     filters1, filters2, filters3 = filters
 54 |     if K.image_data_format() == 'channels_last':
 55 |         bn_axis = 3
 56 |     else:
 57 |         bn_axis = 1
 58 |     conv_name_base = 'res' + str(stage) + block + '_branch'
 59 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
 60 | 
 61 |     x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
 62 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
 63 |     x = Activation('relu')(x)
 64 | 
 65 |     x = Conv2D(filters2, kernel_size,
 66 |                padding='same', name=conv_name_base + '2b')(x)
 67 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
 68 |     x = Activation('relu')(x)
 69 | 
 70 |     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
 71 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 72 | 
 73 |     x = layers.add([x, input_tensor])
 74 |     x = Activation('relu')(x)
 75 |     return x
 76 | 
 77 | 
 78 | def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
 79 |     """conv_block is the block that has a conv layer at shortcut
 80 |     # Arguments
 81 |         input_tensor: input tensor
 82 |         kernel_size: defualt 3, the kernel size of middle conv layer at main path
 83 |         filters: list of integers, the filterss of 3 conv layer at main path
 84 |         stage: integer, current stage label, used for generating layer names
 85 |         block: 'a','b'..., current block label, used for generating layer names
 86 |     # Returns
 87 |         Output tensor for the block.
 88 |     Note that from stage 3, the first conv layer at main path is with strides=(2,2)
 89 |     And the shortcut should have strides=(2,2) as well
 90 |     """
 91 |     filters1, filters2, filters3 = filters
 92 |     if K.image_data_format() == 'channels_last':
 93 |         bn_axis = 3
 94 |     else:
 95 |         bn_axis = 1
 96 |     conv_name_base = 'res' + str(stage) + block + '_branch'
 97 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
 98 | 
 99 |     x = Conv2D(filters1, (1, 1), strides=strides,
100 |                name=conv_name_base + '2a')(input_tensor)
101 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
102 |     x = Activation('relu')(x)
103 | 
104 |     x = Conv2D(filters2, kernel_size, padding='same',
105 |                name=conv_name_base + '2b')(x)
106 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
107 |     x = Activation('relu')(x)
108 | 
109 |     x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
110 |     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
111 | 
112 |     shortcut = Conv2D(filters3, (1, 1), strides=strides,
113 |                       name=conv_name_base + '1')(input_tensor)
114 |     shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
115 | 
116 |     x = layers.add([x, shortcut])
117 |     x = Activation('relu')(x)
118 |     return x
119 | 
120 | class CifarModel(CNNModel):
121 | 
122 |     def __init__(self, param):
123 |         super(CifarModel, self).__init__(param)
124 | 
125 |     
126 |     def init(self, data):
127 |         self.input_shape = data.x_train.shape[1:]
128 |         self.min_ = data.min_
129 |         self.max_ = data.max_
130 | 
131 |     def set_learning_phase(self, learning_phase):
132 |         K.set_learning_phase(learning_phase)
133 |     
134 |     def init_model(self,include_top=False, weights=None,
135 |                 input_tensor=None, input_shape=None,
136 |                 pooling=None):
137 |         """Instantiates the ResNet50 architecture.
138 |         Optionally loads weights pre-trained
139 |         on ImageNet. Note that when using TensorFlow,
140 |         for best performance you should set
141 |         `image_data_format="channels_last"` in your Keras config
142 |         at ~/.keras/keras.json.
143 |         The model and the weights are compatible with both
144 |         TensorFlow and Theano. The data format
145 |         convention used by the model is the one
146 |         specified in your Keras config file.
147 |         # Arguments
148 |             include_top: whether to include the fully-connected
149 |                 layer at the top of the network.
150 |             weights: one of `None` (random initialization)
151 |                 or "imagenet" (pre-training on ImageNet).
152 |             input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
153 |                 to use as image input for the model.
154 |             input_shape: optional shape tuple, only to be specified
155 |                 if `include_top` is False (otherwise the input shape
156 |                 has to be `(224, 224, 3)` (with `channels_last` data format)
157 |                 or `(3, 224, 244)` (with `channels_first` data format).
158 |                 It should have exactly 3 inputs channels,
159 |                 and width and height should be no smaller than 197.
160 |                 E.g. `(200, 200, 3)` would be one valid value.
161 |             pooling: Optional pooling mode for feature extraction
162 |                 when `include_top` is `False`.
163 |                 - `None` means that the output of the model will be
164 |                     the 4D tensor output of the
165 |                     last convolutional layer.
166 |                 - `avg` means that global average pooling
167 |                     will be applied to the output of the
168 |                     last convolutional layer, and thus
169 |                     the output of the model will be a 2D tensor.
170 |                 - `max` means that global max pooling will
171 |                     be applied.
172 |             classes: optional number of classes to classify images
173 |                 into, only to be specified if `include_top` is True, and
174 |                 if no `weights` argument is specified.
175 |         # Returns
176 |             A Keras model instance.
177 |         # Raises
178 |             ValueError: in case of invalid argument for `weights`,
179 |                 or invalid input shape.
180 |         """
181 | 
182 | 
183 |         # Determine proper input shape
184 |         # input_shape = _obtain_input_shape(self.input_shape,
185 |         #                                 default_size=224,
186 |         #                                 min_size=32, #for cifar10 compatibility;
187 |         #                                 data_format=K.image_data_format(),
188 |         #                                 require_flatten=include_top) #Look keras 2.0+ version change logs
189 | 
190 |         if input_tensor is None:
191 |             img_input = Input(shape=self.input_shape)
192 |         else:
193 |             if not K.is_keras_tensor(input_tensor):
194 |                 img_input = Input(tensor=input_tensor, shape=input_shape)
195 |             else:
196 |                 img_input = input_tensor
197 |         if K.image_data_format() == 'channels_last':
198 |             bn_axis = 3
199 |         else:
200 |             bn_axis = 1
201 | 
202 |         x = ZeroPadding2D((3, 3))(img_input)
203 |         x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
204 |         x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
205 |         x = Activation('relu')(x)
206 |         x = MaxPooling2D((3, 3), strides=(2, 2))(x)
207 | 
208 |         x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
209 |         x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
210 |         x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
211 | 
212 |         x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
213 |         x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
214 |         x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
215 |         # x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
216 | 
217 |         x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
218 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
219 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
220 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
221 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
222 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
223 | 
224 |         x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
225 |         x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
226 |         x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
227 |     
228 |     #    x = AveragePooling2D((7, 7), name='avg_pool')(x)
229 | 
230 |         # if include_top:
231 |         x = Flatten(name='dense_2')(x)
232 |         x = Dense(self.param.get_conf('num_classes'), activation=None, name='predictions')(x)
233 |         x = Activation('softmax', name='softmax_output')(x)
234 |         # Ensure that the model takes into account
235 |         # any potential predecessors of `input_tensor`.
236 |         # if input_tensor is not None:
237 |         #     inputs = get_source_inputs(input_tensor)
238 |         # else:
239 |         #     inputs = img_input
240 |         # Create model.
241 |         model = Model(img_input, x, name='resnet50')
242 |     #    model.summary()
243 |         # load weights
244 |         # weights_path = '../model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
245 |         # model.load_weights(weights_path,strict=False)
246 | 
247 |         if K.image_data_format() == 'channels_first':
248 |             if include_top:
249 |                 maxpool = model.get_layer(name='avg_pool')
250 |                 shape = maxpool.output_shape[1:]
251 |                 dense = model.get_layer(name='fc1000')
252 |                 layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first')
253 |         model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
254 |         self.classifier = KerasClassifier(clip_values=(self.min_, self.max_), model=model, param=self.param)
255 |         
256 |     def train(self, data, nb_epochs=None):
257 |         # default
258 |         # nb_epochs=20
259 |         # batch_size=128
260 | 
261 |         datagen = ImageDataGenerator(
262 |             featurewise_center=False,  # set input mean to 0 over the dataset
263 |             samplewise_center=False,  # set each sample mean to 0
264 |             featurewise_std_normalization=False,  # divide inputs by std of the dataset
265 |             samplewise_std_normalization=False,  # divide each input by its std
266 |             zca_whitening=False,  # apply ZCA whitening
267 |             zca_epsilon=1e-06,  # epsilon for ZCA whitening
268 |             rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
269 |             # randomly shift images horizontally (fraction of total width)
270 |             width_shift_range=0.1,
271 |             # randomly shift images vertically (fraction of total height)
272 |             height_shift_range=0.1,
273 |             shear_range=0.,  # set range for random shear
274 |             zoom_range=0.,  # set range for random zoom
275 |             channel_shift_range=0.,  # set range for random channel shifts
276 |             # set mode for filling points outside the input boundaries
277 |             fill_mode='nearest',
278 |             cval=0.,  # value used for fill_mode = "constant"
279 |             horizontal_flip=True,  # randomly flip images
280 |             # validation_split=0.0
281 |             )
282 |         if nb_epochs is None:
283 |             nb_epochs = self.param.get_conf()['train_epoch']
284 |         # Fit the model on the batches generated by datagen.flow().
285 |         self.classifier.get_model().fit_generator(datagen.flow(data.x_train, data.y_train, batch_size=128),
286 |                                                 epochs=nb_epochs,
287 |                                                 steps_per_epoch=data.x_train.shape[0] / 128,
288 |                                                 validation_data=(data.x_test, data.y_test),
289 |                                                 validation_steps=data.x_train.shape[0] / 128,
290 |                                                 workers=4)
291 | 
292 |     def predict_instance(self, x):
293 |         return self.classifier.predict(x)[0]
294 | 
295 |     def get_input_shape(self):
296 |         return self.input_shape
297 | 
298 |     def set_input_shape(self, input_shape):
299 |         self.input_shape = input_shape
300 | 
301 |     def get_classifier(self):
302 |         return self.classifier
303 | 
304 |     def set_classifier(self, classifier):
305 |         self.classifier = classifier
306 | 
307 |     def get_input_tensor(self):
308 |         return self.classifier.get_input_tensor()
309 | 
310 |     def get_output_tensor(self):
311 |         return self.classifier.get_output_tensor()
312 | 
313 |     def get_output_bef_softmax(self):
314 |         return self.classifier.get_output_bef_softmax()
315 | 
316 |     def get_dense_tensor(self):
317 |         return self.classifier.get_model().get_layer('dense_2').output
318 | 
319 | 
320 | if __name__ == '__main__':
321 |     model = ResNet50(include_top=True, weights='imagenet')
322 | 
323 |     img_path = 'elephant.jpg'
324 |     img = image.load_img(img_path, target_size=(224, 224))
325 |     x = image.img_to_array(img)
326 |     x = np.expand_dims(x, axis=0)
327 |     x = preprocess_input(x)
328 |     print('Input image shape:', x.shape)
329 | 
330 |     preds = model.predict(x)
331 |     print('Predicted:', decode_predictions(preds))


--------------------------------------------------------------------------------
/src/data/cifar10.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from keras.datasets import cifar10
  4 | 
  5 | from backdoor import Backdoor
  6 | from data.data import Data
  7 | from utils import *
  8 | from visualization import visualize_img_without_backdoor, visualize_img_with_backdoor
  9 | 
 10 | 
 11 | class CifarData(Data):
 12 | 
 13 |     def __init__(self, param):
 14 |         super(CifarData, self).__init__(param)
 15 | 
 16 |     def init(self):
 17 |         self.x_train_ordered = None
 18 |         self.y_train_ordered = None
 19 |         self.x_test = None
 20 |         self.y_test = None
 21 |         self.min_ = None
 22 |         self.max_ = None
 23 |         self.random_selection_indices = None
 24 |         self.train_poisoned_index = None
 25 |         self.test_poisoned_index = None
 26 |         self.data_path = self.param.get_conf('data_path')
 27 |         self.backdoor = Backdoor(self.param.get_conf())
 28 |         if type(self.param.get_conf('poison_label_source')) is list:
 29 |             self.source_num = self.param.get_conf('poison_label_source')
 30 |         else:
 31 |             self.source_num = np.array([int(self.param.get_conf('poison_label_source'))])
 32 |         if type(self.param.get_conf('poison_label_target')) is list:
 33 |             self.target_num = self.param.get_conf('poison_label_target')
 34 | 
 35 |         else:
 36 |             self.target_num = np.array([int(self.param.get_conf('poison_label_target'))])
 37 |  
 38 |     def load_data(self, is_add_channel=False):
 39 |         # print('self.data_path = ', self.data_path)
 40 | 
 41 |         (self.x_train_ordered, self.y_train_ordered), (self.x_test, self.y_test) = cifar10.load_data()
 42 |         self.y_train_ordered = self.y_train_ordered.squeeze()
 43 |         self.y_test = self.y_test.squeeze()
 44 |         # serialize_img(self.x_test[0], self.param)
 45 | 
 46 |         # Add channel axis
 47 |         self.min_, self.max_ = 0, 255
 48 | 
 49 |         self.n_train = np.shape(self.x_train_ordered)[0]
 50 | 
 51 |         if is_add_channel:
 52 |             self.gen_indices()
 53 |             self.shuffled_indices = np.arange(min(len(self.x_train_ordered), self.param.get_conf('num_selection')))
 54 |             self.gen_train_data()  # train_data got
 55 |             self.add_channel_axis()
 56 | 
 57 |         print('after reading data')
 58 |         print('x_train.shape = ', self.x_train_ordered.shape)
 59 |         print('y_train.shape = ', self.y_train_ordered.shape)
 60 |         print('x_test.shape = ', self.x_test.shape)
 61 |         print('y_test.shape = ', self.y_test.shape)
 62 | 
 63 |     def add_channel_axis(self):
 64 |         self.is_poison_train_ordered = np.zeros_like(self.y_train_ordered) == 1
 65 |         self.is_poison_test = np.zeros_like(self.y_test) == 1 
 66 |         self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_train_ordered,
 67 |                                                                       self.y_train_ordered)
 68 |         self.x_test, self.y_test = preprocess_mnist(self.x_test, self.y_test)
 69 | 
 70 |     def gen_indices(self):
 71 |         # self.param.get_conf('num_selection') means number of input case selected
 72 |         # self.n_train means total number of input case
 73 |         # self.random_selection_indices = np.random.choice(self.n_train, self.param.get_conf('num_selection'))
 74 |         self.random_selection_indices = np.arange(self.n_train)
 75 |         np.random.shuffle(self.random_selection_indices)
 76 |         self.random_selection_indices = self.random_selection_indices[:self.param.get_conf('num_selection')]
 77 | 
 78 |     def gen_train_data(self):
 79 |         # data.n_train = 60000
 80 |         # param.get_conf('num_selection') = 5000
 81 |         # random_selection_indices = np.random.choice(self.n_train, self.param.get_conf('num_selection'))
 82 | 
 83 |         # update random train data
 84 |         self.x_train_ordered = self.x_train_ordered[self.random_selection_indices]
 85 |         self.y_train_ordered = self.y_train_ordered[self.random_selection_indices]
 86 | 
 87 |     def gen_train_backdoor_data(self):
 88 |         # start creating backdoor data
 89 |         # the backdoor method can be changed
 90 | 
 91 |         self.is_poison_train_ordered, \
 92 |         self.x_poisoned_raw, \
 93 |         self.y_poisoned_raw = self.backdoor.generate_backdoor(self.x_train_ordered,
 94 |                                                               self.y_train_ordered,
 95 |                                                               self.backdoor.train_poison_rate,
 96 |                                                               sources=self.source_num,
 97 |                                                               targets=self.target_num)
 98 | 
 99 |         self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_poisoned_raw, self.y_poisoned_raw)
100 | 
101 |         # Add channel axis:
102 |         # self.x_train_ordered = np.expand_dims(self.x_train_ordered, axis=3)
103 | 
104 |     def gen_shuffled_indices(self):
105 |         # Shuffle training data so poison is not together
106 |         n_train = np.shape(self.y_train_ordered)[0]
107 |         self.shuffled_indices = np.arange(n_train)
108 |         np.random.shuffle(self.shuffled_indices)
109 | 
110 |     def gen_shuffle_train_data(self):
111 | 
112 |         # self.x_train_ordered = self.x_train_ordered[self.shuffled_indices]
113 |         # self.y_train_ordered = self.y_train_ordered[self.shuffled_indices]
114 |         # self.is_poison_train_ordered = self.is_poison_train_ordered[self.shuffled_indices]
115 | 
116 |         self.is_clean_ordered = (self.is_poison_train_ordered == 0)
117 | 
118 |     def print_backdoor_info(self,info):
119 |         print('after',info,'backdoor')
120 |         print('x_train.shape = ', self.x_train_ordered.shape)
121 |         print('y_train.shape = ', self.y_train_ordered.shape)
122 |         print('x_poisoned_raw.shape = ', self.x_poisoned_raw.shape)
123 |         print('y_poisoned_raw.shape = ', self.y_poisoned_raw.shape)
124 | 
125 |         '''
126 |         after generating backdoor
127 |         x_train.shape =  (5000, 28, 28)
128 |         y_train.shape =  (5000,)
129 |         x_poisoned_raw.shape =  (5209, 28, 28)
130 |         y_poisoned_raw.shape =  (5209,)
131 | 
132 |         5000 -> 5209
133 | 
134 |         increasing number depends on the number of cases of sources
135 |         generate extra test case from sources to targets
136 |         '''
137 | 
138 |     def gen_train_backdoor(self):
139 |         self.gen_indices()
140 |         self.gen_train_data()  # train_data got
141 |         self.gen_train_backdoor_data()
142 | 
143 |         self.gen_shuffled_indices()
144 |         self.gen_shuffle_train_data()
145 |         # print("self.shuffled_indices = ", self.shuffled_indices)
146 | 
147 |     # test data
148 |     def gen_test_backdoor_data(self):
149 |         # Poison test data
150 |         self.is_poison_test, \
151 |         self.x_poisoned_raw_test, \
152 |         self.y_poisoned_raw_test = self.backdoor.generate_backdoor(self.x_test,
153 |                                                                    self.y_test,
154 |                                                                    self.backdoor.test_poison_rate,
155 |                                                                    sources=self.source_num,
156 |                                                                    targets=self.target_num)
157 | 
158 |         self.x_test, self.y_test = preprocess_mnist(self.x_poisoned_raw_test, self.y_poisoned_raw_test)
159 |         # Add channel axis:
160 |         # self.x_test = np.expand_dims(self.x_test, axis=3)
161 | 
162 |     def gen_test_backdoor(self):
163 |         self.gen_test_backdoor_data()
164 | 
165 |         self.print_backdoor_info("generate")
166 | 
167 |     def gen_backdoor(self, model=None):
168 |         # self.gen_indices()
169 | 
170 |         self.gen_train_backdoor()
171 |         # 1. input case index
172 |         # 2. train poison meta data
173 |         # should be stored in model
174 |         self.backdoor.get_poison().set_random_selection_indices(self.random_selection_indices)
175 |         self.backdoor.get_poison().set_shuffled_indices(self.shuffled_indices)
176 |         self.train_poisoned_index = self.backdoor.get_poison().get_indices_to_be_poisoned()
177 |         if model:
178 |             model.set_train_poison(self.backdoor.get_poison())
179 | 
180 |         self.gen_test_backdoor()
181 | 
182 |         # test poison meta data
183 |         self.test_poisoned_index = self.backdoor.get_poison().get_indices_to_be_poisoned()
184 |         if model:
185 |             model.set_test_poison(self.backdoor.get_poison())
186 | 
187 |     # restore train data
188 |     def restore_train_backdoor_data(self, poison):
189 |         self.is_poison_train_ordered, \
190 |         self.x_poisoned_raw, \
191 |         self.y_poisoned_raw = self.backdoor.restore_backdoor(self.x_train_ordered,
192 |                                                              self.y_train_ordered,
193 |                                                              poison)
194 |         # Add channel axis:
195 |         # self.x_poisoned_raw = np.expand_dims(self.x_poisoned_raw, axis=3)
196 |         self.x_train_ordered, self.y_train_ordered = preprocess_mnist(self.x_poisoned_raw, self.y_poisoned_raw)
197 | 
198 |     def restore_train_backdoor(self, poison):
199 |         self.random_selection_indices = poison.get_random_selection_indices()
200 |         self.shuffled_indices = poison.get_shuffled_indices()
201 |         self.train_poisoned_index = poison.get_indices_to_be_poisoned()
202 |         self.gen_train_data()
203 |         self.restore_train_backdoor_data(poison)
204 | 
205 |         self.gen_shuffle_train_data()
206 | 
207 |     def restore_test_backdoor_data(self, poison):
208 |         # Poison test data
209 |         self.is_poison_test, \
210 |         self.x_poisoned_raw_test, \
211 |         self.y_poisoned_raw_test = self.backdoor.restore_backdoor(self.x_test,
212 |                                                                   self.y_test,
213 |                                                                   poison)
214 | 
215 |         self.x_test, self.y_test = preprocess_mnist(self.x_poisoned_raw_test, self.y_poisoned_raw_test)
216 | 
217 |         # Add channel axis:
218 |         # self.x_test = np.expand_dims(self.x_test, axis=3)
219 | 
220 |     @property
221 |     def x_train(self):
222 |         return self.x_train_ordered[self.shuffled_indices]
223 | 
224 |     @property
225 |     def y_train(self):
226 |         return self.y_train_ordered[self.shuffled_indices]
227 | 
228 |     @property
229 |     def is_poison_train(self):
230 |         return self.is_poison_train_ordered[self.shuffled_indices]
231 | 
232 |     @property
233 |     def is_clean(self):
234 |         return self.is_clean_ordered[self.shuffled_indices]
235 | 
236 |     def restore_test_backdoor(self, poison):
237 |         self.test_poisoned_index = poison.get_indices_to_be_poisoned()
238 |         self.restore_test_backdoor_data(poison)
239 |         self.print_backdoor_info("restore")
240 | 
241 | 
242 |     def restore_backdoor(self, model):
243 |         self.restore_train_backdoor(model.get_train_poison())
244 |         self.restore_test_backdoor(model.get_test_poison())
245 | 
246 |     def get_backdoor(self):
247 |         return self.backdoor
248 | 
249 |     def set_backdoor(self, backdoor):
250 |         self.backdoor = backdoor
251 | 
252 |     def visiualize_img_by_idx(self, shuffled_idx, pre_label, is_train=True):
253 | 
254 |         if is_train:
255 |             idx = self.shuffled_indices[shuffled_idx]
256 |             if self.is_poison_train_ordered[idx]:
257 |                 # print("idx of poison in train set", self.train_poisoned_index[self.cal_index(idx)], self.cal_index(idx))
258 |                 visualize_img_with_backdoor(
259 |                     self.x_poisoned_raw[self.train_poisoned_index[self.cal_index(idx)]],
260 |                     self.y_train_ordered[self.train_poisoned_index[self.cal_index(idx)]].argmax(),
261 |                     pre_label,
262 |                     self.x_poisoned_raw[idx],
263 |                     np.argmax(self.y_train_ordered[idx])
264 |                 )
265 |             else:
266 |                 visualize_img_without_backdoor(
267 |                     self.x_poisoned_raw[idx],
268 |                     self.y_train_ordered[idx].argmax(),
269 |                     pre_label,
270 |                     None)
271 |         else:
272 |             idx = shuffled_idx
273 |             if self.is_poison_test[idx]:
274 |                 # print("idx of poison in test set", self.test_poisoned_index[self.cal_index(idx, False)],
275 |                 #       self.cal_index(idx, False))
276 |                 visualize_img_with_backdoor(
277 |                     self.x_poisoned_raw_test[self.test_poisoned_index[self.cal_index(idx, False)]],
278 |                     self.y_test[self.test_poisoned_index[self.cal_index(idx, False)]].argmax(),
279 |                     pre_label,
280 |                     self.x_poisoned_raw_test[idx],
281 |                     np.argmax(self.y_test[idx]),
282 |                     "Test")
283 |                 
284 |             else:
285 |                 visualize_img_without_backdoor(self.x_poisoned_raw_test[idx], self.y_test[idx].argmax(), pre_label,
286 |                                                 "Test" )
287 | 
288 |     def cal_index(self, idx, is_train=True):
289 |         if is_train:
290 |             return idx - len(self.random_selection_indices)
291 |         else:
292 |             return idx - len(self.y_test) + len(self.test_poisoned_index)
293 | 
294 |     def get_clean_data(self):
295 |         if not hasattr(self, "is_poison_train"):
296 |             return self.x_train, self.y_train, self.x_test, self.y_test
297 |         return self.x_train[self.is_poison_train == 0], \
298 |                self.y_train[self.is_poison_train == 0], \
299 |                self.x_test[self.is_poison_test == 0], \
300 |                self.y_test[self.is_poison_test == 0]
301 | 
302 |     def get_poison_data(self):
303 |         return self.x_train[self.is_poison_train == 1], \
304 |                self.y_train[self.is_poison_train == 1], \
305 |                self.x_test[self.is_poison_test == 1], \
306 |                self.y_test[self.is_poison_test == 1]
307 | 
308 |     def get_specific_label_clean_data(self, label):
309 |         x_train, y_train, x_test, y_test = self.get_clean_data()
310 |         y_train_label = np.argmax(y_train, axis=1)
311 |         y_test_label = np.argmax(y_test, axis=1)
312 |         return x_train[y_train_label == label], \
313 |                y_train[y_train_label == label], \
314 |                x_test[y_test_label == label], \
315 |                y_test[y_test_label == label]
316 | 
317 |     def get_specific_label_poison_data(self, label):
318 |         x_train, y_train, x_test, y_test = self.get_poison_data()
319 |         y_train_label = np.argmax(y_train, axis=1)
320 |         y_test_label = np.argmax(y_test, axis=1)
321 |         return x_train[y_train_label == label], \
322 |                y_train[y_train_label == label], \
323 |                x_test[y_test_label == label], \
324 |                y_test[y_test_label == label]
325 | 
326 |     def get_specific_label_data(self, label):
327 |         y_train = self.y_train.argmax(axis=1)
328 |         y_test = self.y_test.argmax(axis=1)
329 |         return self.x_train[y_train == label], \
330 |                self.y_train[y_train == label], \
331 |                self.x_test[y_test == label], \
332 |                self.y_test[y_test == label], \
333 |                self.is_poison_train[y_train == label], \
334 |                self.is_poison_test[y_test == label]
335 | 
336 | 
337 | if __name__ == '__main__':
338 |     json_name = sys.argv[1]
339 |     param = Param(json_name)
340 |     param.load_json()
341 |     data = Data(param)
342 |     data.load_data()
343 |     data.gen_backdoor()
344 | 


--------------------------------------------------------------------------------
/src/classifiers/classifier.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | #
  3 | # Copyright (C) IBM Corporation 2018
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | # persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
 11 | # Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 17 | # SOFTWARE.
 18 | from __future__ import absolute_import, division, print_function, unicode_literals
 19 | 
 20 | import abc
 21 | import os
 22 | import sys
 23 | 
 24 | sys.path.append(os.path.dirname(__file__) + os.sep + '../')
 25 | 
 26 | import numpy as np
 27 | 
 28 | # Ensure compatibility with Python 2 and 3 when using ABCMeta
 29 | if sys.version_info >= (3, 4):
 30 |     ABC = abc.ABC
 31 | else:
 32 |     ABC = abc.ABCMeta(str('ABC'), (), {})
 33 | 
 34 | 
 35 | class Classifier(ABC):
 36 |     """
 37 |     Base class for all classifiers.
 38 |     """
 39 | 
 40 |     def __init__(self, channel_index, clip_values=None, defences=None, preprocessing=(0, 1), param=None):
 41 |         """
 42 |         Initialize a `Classifier` object.
 43 | 
 44 |         :param channel_index: Index of the axis in data containing the color channels or features.
 45 |         :type channel_index: `int`
 46 |         :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
 47 |                maximum values allowed for features. If floats are provided, these will be used as the range of all
 48 |                features. If arrays are provided, each value will be considered the bound for a feature, thus
 49 |                the shape of clip values needs to match the total number of features.
 50 |         :type clip_values: `tuple`
 51 |         :param defences: Defence(s) to be activated with the classifier.
 52 |         :type defences: :class:`.Preprocessor` or `list(Preprocessor)` instances
 53 |         :param preprocessing: Tuple of the form `(substractor, divider)` of floats or `np.ndarray` of values to be
 54 |                used for data preprocessing. The first value will be substracted from the input. The input will then
 55 |                be divided by the second one.
 56 |         :type preprocessing: `tuple`
 57 |         """
 58 |         self.param = param
 59 |         if clip_values is not None:
 60 |             if len(clip_values) != 2:
 61 |                 raise ValueError('`clip_values` should be a tuple of 2 floats or arrays containing the allowed'
 62 |                                  'data range.')
 63 |             if np.array(clip_values[0] >= clip_values[1]).any():
 64 |                 raise ValueError('Invalid `clip_values`: min >= max.')
 65 |         self._clip_values = clip_values
 66 | 
 67 |         self._channel_index = channel_index
 68 | 
 69 |         self.defences = defences
 70 | 
 71 |         if len(preprocessing) != 2:
 72 |             raise ValueError('`preprocessing` should be a tuple of 2 floats with the substract and divide values for'
 73 |                              'the model inputs.')
 74 |         self.preprocessing = preprocessing
 75 | 
 76 |     @abc.abstractmethod
 77 |     def predict(self, x, logits=False, batch_size=128):
 78 |         """
 79 |         Perform prediction for a batch of inputs.
 80 | 
 81 |         :param x: Test set.
 82 |         :type x: `np.ndarray`
 83 |         :param logits: `True` if the prediction should be done at the logits layer.
 84 |         :type logits: `bool`
 85 |         :param batch_size: Size of batches.
 86 |         :type batch_size: `int`
 87 |         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
 88 |         :rtype: `np.ndarray`
 89 |         """
 90 |         raise NotImplementedError
 91 | 
 92 |     @abc.abstractmethod
 93 |     def fit(self, x, y, batch_size=128, nb_epochs=20, **kwargs):
 94 |         """
 95 |         Fit the classifier on the training set `(x, y)`.
 96 | 
 97 |         :param x: Training data.
 98 |         :type x: `np.ndarray`
 99 |         :param y: Labels, one-vs-rest encoding.
100 |         :type y: `np.ndarray`
101 |         :param batch_size: Size of batches.
102 |         :type batch_size: `int`
103 |         :param nb_epochs: Number of epochs to use for training.
104 |         :type nb_epochs: `int`
105 |         :param kwargs: Dictionary of framework-specific arguments.
106 |         :type kwargs: `dict`
107 |         :return: `None`
108 |         """
109 |         raise NotImplementedError
110 | 
111 |     def fit_generator(self, generator, nb_epochs=20, **kwargs):
112 |         """
113 |         Fit the classifier using the generator `gen` that yields batches as specified. Framework implementations can
114 |         provide framework-specific versions of this function to speed-up computation.
115 | 
116 |         :param generator: Batch generator providing `(x, y)` for each epoch.
117 |         :type generator: :class:`.DataGenerator`
118 |         :param nb_epochs: Number of epochs to use for training.
119 |         :type nb_epochs: `int`
120 |         :param kwargs: Dictionary of framework-specific arguments.
121 |         :type kwargs: `dict`
122 |         :return: `None`
123 |         """
124 |         from ..data_generators import DataGenerator
125 | 
126 |         if not isinstance(generator, DataGenerator):
127 |             raise ValueError('Expected instance of `DataGenerator` for `fit_generator`, got %s instead.'
128 |                              % str(type(generator)))
129 | 
130 |         for _ in range(nb_epochs):
131 |             x, y = generator.get_batch()
132 | 
133 |             # Apply preprocessing and defences
134 |             x = self._apply_processing(x)
135 |             x, y = self._apply_defences(x, y, fit=True)
136 | 
137 |             # Fit for current batch
138 |             self.fit(x, y, nb_epochs=1, batch_size=len(x), **kwargs)
139 | 
140 |     @property
141 |     def nb_classes(self):
142 |         """
143 |         Return the number of output classes.
144 | 
145 |         :return: Number of classes in the data.
146 |         :rtype: `int`
147 |         """
148 |         return self._nb_classes
149 | 
150 |     @property
151 |     def input_shape(self):
152 |         """
153 |         Return the shape of one input.
154 | 
155 |         :return: Shape of one input for the classifier.
156 |         :rtype: `tuple`
157 |         """
158 |         return self._input_shape
159 | 
160 |     @property
161 |     def clip_values(self):
162 |         """
163 |         :return: Tuple of the form `(min, max)` representing the minimum and maximum values allowed for features.
164 |         :rtype: `tuple`
165 |         """
166 |         return self._clip_values
167 | 
168 |     @property
169 |     def channel_index(self):
170 |         """
171 |         :return: Index of the axis in data containing the color channels or features.
172 |         :rtype: `int`
173 |         """
174 |         return self._channel_index
175 | 
176 |     @property
177 |     def learning_phase(self):
178 |         """
179 |         Return the learning phase set by the user for the current classifier. Possible values are `True` for training,
180 |         `False` for prediction and `None` if it has not been set through the library. In the latter case, the library
181 |         does not do any explicit learning phase manipulation and the current value of the backend framework is used.
182 |         If a value has been set by the user for this property, it will impact all following computations for
183 |         model fitting, prediction and gradients.
184 | 
185 |         :return: Value of the learning phase.
186 |         :rtype: `bool` or `None`
187 |         """
188 |         return self._learning_phase if hasattr(self, '_learning_phase') else None
189 | 
190 |     @abc.abstractmethod
191 |     def class_gradient(self, x, label=None, logits=False):
192 |         """
193 |         Compute per-class derivatives w.r.t. `x`.
194 | 
195 |         :param x: Sample input with shape as expected by the model.
196 |         :type x: `np.ndarray`
197 |         :param label: Index of a specific per-class derivative. If an integer is provided, the gradient of that class
198 |                       output is computed for all samples. If multiple values as provided, the first dimension should
199 |                       match the batch size of `x`, and each value will be used as target for its corresponding sample in
200 |                       `x`. If `None`, then gradients for all classes will be computed for each sample.
201 |         :type label: `int` or `list`
202 |         :param logits: `True` if the prediction should be done at the logits layer.
203 |         :type logits: `bool`
204 |         :return: Array of gradients of input features w.r.t. each class in the form
205 |                  `(batch_size, nb_classes, input_shape)` when computing for all classes, otherwise shape becomes
206 |                  `(batch_size, 1, input_shape)` when `label` parameter is specified.
207 |         :rtype: `np.ndarray`
208 |         """
209 |         raise NotImplementedError
210 | 
211 |     @abc.abstractmethod
212 |     def loss_gradient(self, x, y):
213 |         """
214 |         Compute the gradient of the loss function w.r.t. `x`.
215 | 
216 |         :param x: Sample input with shape as expected by the model.
217 |         :type x: `np.ndarray`
218 |         :param y: Correct labels, one-vs-rest encoding.
219 |         :type y: `np.ndarray`
220 |         :return: Array of gradients of the same shape as `x`.
221 |         :rtype: `np.ndarray`
222 |         """
223 |         raise NotImplementedError
224 | 
225 |     @property
226 |     def layer_names(self):
227 |         """
228 |         Return the hidden layers in the model, if applicable.
229 | 
230 |         :return: The hidden layers in the model, input and output layers excluded.
231 |         :rtype: `list`
232 | 
233 |         .. warning:: `layer_names` tries to infer the internal structure of the model.
234 |                      This feature comes with no guarantees on the correctness of the result.
235 |                      The intended order of the layers tries to match their order in the model, but this is not
236 |                      guaranteed either.
237 |         """
238 |         raise NotImplementedError
239 | 
240 |     @abc.abstractmethod
241 |     def get_activations(self, x, layer, batch_size):
242 |         """
243 |         Return the output of the specified layer for input `x`. `layer` is specified by layer index (between 0 and
244 |         `nb_layers - 1`) or by name. The number of layers can be determined by counting the results returned by
245 |         calling `layer_names`.
246 | 
247 |         :param x: Input for computing the activations.
248 |         :type x: `np.ndarray`
249 |         :param layer: Layer for computing the activations
250 |         :type layer: `int` or `str`
251 |         :param batch_size: Size of batches.
252 |         :type batch_size: `int`
253 |         :return: The output of `layer`, where the first dimension is the batch size corresponding to `x`.
254 |         :rtype: `np.ndarray`
255 |         """
256 |         raise NotImplementedError
257 | 
258 |     @abc.abstractmethod
259 |     def set_learning_phase(self, train):
260 |         """
261 |         Set the learning phase for the backend framework.
262 | 
263 |         :param train: True to set the learning phase to training, False to set it to prediction.
264 |         :type train: `bool`
265 |         """
266 |         raise NotImplementedError
267 | 
268 |     @abc.abstractmethod
269 |     def save(self, filename, path=None):
270 |         """
271 |         Save a model to file in the format specific to the backend framework.
272 | 
273 |         :param filename: Name of the file where to store the model.
274 |         :type filename: `str`
275 |         :param path: Path of the folder where to store the model. If no path is specified, the model will be stored in
276 |                      the default data location of the library `DATA_PATH`.
277 |         :type path: `str`
278 |         :return: None
279 |         """
280 |         raise NotImplementedError
281 | 
282 |     def _apply_defences(self, x, y, fit=False):
283 |         """
284 |         Apply the defences specified for the classifier in inputs `(x, y)`.
285 | 
286 |         :param x: Input data, where first dimension is the batch size.
287 |         :type x: `np.ndarray`
288 |         :param y: Labels for input data, where first dimension is the batch size.
289 |         :type y: `np.ndarray`
290 |         :param fit: `True` if the defences are applied during training.
291 |         :return: Value of the data after applying the defences.
292 |         :rtype: `np.ndarray`
293 |         """
294 |         if self.defences is not None:
295 |             for defence in self.defences:
296 |                 if fit:
297 |                     if defence.apply_fit:
298 |                         x, y = defence(x, y)
299 |                 else:
300 |                     if defence.apply_predict:
301 |                         x, y = defence(x, y)
302 | 
303 |         return x, y
304 | 
305 |     def _apply_defences_gradient(self, x, grad, fit=False):
306 |         """
307 |         Apply the backward pass through the preprocessing defences.
308 | 
309 |         :param x: Input data for which the gradient is estimated. First dimension is the batch size.
310 |         :type x: `np.ndarray`
311 |         :param grad: Gradient value so far.
312 |         :type grad: `np.ndarray`
313 |         :param fit: `True` if the gradient is computed during training.
314 |         :return: Value of the gradient.
315 |         :rtype: `np.ndarray`
316 |         """
317 |         if self.defences is not None:
318 |             for defence in self.defences[::-1]:
319 |                 if fit:
320 |                     if defence.apply_fit:
321 |                         grad = defence.estimate_gradient(x, grad)
322 |                 else:
323 |                     if defence.apply_predict:
324 |                         grad = defence.estimate_gradient(x, grad)
325 | 
326 |         return grad
327 | 
328 |     def _apply_processing(self, x):
329 |         """
330 |         Apply the data preprocessing / normalization steps specified for the classifier on `x`.
331 | 
332 |         :param x: Input data, where first dimension is the batch size.
333 |         :type x: `np.ndarray`
334 |         :return: Value of the preprocessed data.
335 |         :rtype: `np.ndarray`
336 |         """
337 |         sub, div = self.preprocessing
338 |         sub = np.asarray(sub, dtype=x.dtype)
339 |         div = np.asarray(div, dtype=x.dtype)
340 | 
341 |         res = x - sub
342 |         res = res / div
343 | 
344 |         return res
345 | 
346 |     def _apply_processing_gradient(self, grad):
347 |         """
348 |         Apply the backward pass through the data preprocessing / normalization steps.
349 | 
350 |         :param grad: Gradient value so far.
351 |         :type grad: `np.ndarray`
352 |         :return: Value of the gradient.
353 |         :rtype: `np.ndarray`
354 |         """
355 |         _, div = self.preprocessing
356 |         div = np.asarray(div, dtype=grad.dtype)
357 |         res = grad / div
358 |         return res
359 | 
360 |     def __repr__(self):
361 |         repr_ = "%s(channel_index=%r, clip_values=%r, defences=%r, preprocessing=%r)" \
362 |                 % (self.__module__ + '.' + self.__class__.__name__,
363 |                    self.channel_index, self.clip_values, self.defences, self.preprocessing)
364 | 
365 |         return repr_
366 | 


--------------------------------------------------------------------------------
/src/visualization.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | #
  3 | # Copyright (C) IBM Corporation 2018
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | # persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
 11 | # Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 17 | # SOFTWARE.
 18 | """
 19 | Module providing visualization functions.
 20 | """
 21 | from __future__ import absolute_import, division, print_function, unicode_literals
 22 | 
 23 | import logging
 24 | import os.path
 25 | 
 26 | from sklearn.manifold import TSNE
 27 | from sklearn.datasets import load_iris, load_digits
 28 | from sklearn.decomposition import PCA
 29 | import matplotlib.pyplot as plt
 30 | import numpy as np
 31 | 
 32 | from utils import *
 33 | 
 34 | # import sys
 35 | # sys.path.append(os.path.dirname(__file__) + os.sep + './')
 36 | 
 37 | logger = logging.getLogger(__name__)
 38 | 
 39 | 
 40 | def create_sprite(images):
 41 |     """
 42 |     Creates a sprite of provided images.
 43 | 
 44 |     :param images: Images to construct the sprite.
 45 |     :type images: `np.array`
 46 |     :return: An image array containing the sprite.
 47 |     :rtype: `np.ndarray`
 48 |     """
 49 |     shape = np.shape(images)
 50 | 
 51 |     if len(shape) < 3 or len(shape) > 4:
 52 |         raise ValueError('Images provided for sprite have wrong dimensions ' + str(len(shape)))
 53 | 
 54 |     if len(shape) == 3:
 55 |         # Check to see if it's mnist type of images and add axis to show image is gray-scale
 56 |         images = np.expand_dims(images, axis=3)
 57 |         shape = np.shape(images)
 58 | 
 59 |     # Change black and white images to RGB
 60 |     if shape[3] == 1:
 61 |         images = convert_to_rgb(images)
 62 | 
 63 |     n = int(np.ceil(np.sqrt(len(images))))
 64 |     padding = ((0, n ** 2 - images.shape[0]), (0, 0), (0, 0)) + ((0, 0),) * (images.ndim - 3)
 65 |     images = np.pad(images, padding, mode='constant', constant_values=0)
 66 | 
 67 |     # Tile the individual thumbnails into an image
 68 |     images = images.reshape((n, n) + images.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, images.ndim + 1)))
 69 |     images = images.reshape((n * images.shape[1], n * images.shape[3]) + images.shape[4:])
 70 | 
 71 |     if images.max() > 2:
 72 |         sprite = images
 73 |     else:
 74 |         sprite = (images * 255).astype(np.uint8)
 75 | 
 76 |     return np.array(sprite)
 77 | 
 78 | 
 79 | def convert_to_rgb(images):
 80 |     """
 81 |     Converts grayscale images to RGB. It changes NxHxWx1 to a NxHxWx3 array, where N is the number of figures,
 82 |     H is the high and W the width.
 83 | 
 84 |     :param images: Grayscale images of shape (NxHxWx1).
 85 |     :type images: `np.ndarray`
 86 |     :return: Images in RGB format of shape (NxHxWx3).
 87 |     :rtype: `np.ndarray`
 88 |     """
 89 |     dims = np.shape(images)
 90 |     if not ((len(dims) == 4 and dims[-1] == 1) or len(dims) == 3):
 91 |         raise ValueError('Unexpected shape for grayscale images:' + str(dims))
 92 | 
 93 |     if dims[-1] == 1:
 94 |         # Squeeze channel axis if it exists
 95 |         rgb_images = np.squeeze(images, axis=-1)
 96 |     else:
 97 |         rgb_images = images
 98 |     rgb_images = np.stack((rgb_images,) * 3, axis=-1)
 99 | 
100 |     return rgb_images
101 | 
102 | 
103 | def save_image(image, f_name):
104 |     """
105 |     Saves image into a file inside `DATA_PATH` with the name `f_name`.
106 | 
107 |     :param image: Image to be saved
108 |     :type image: `np.ndarray`
109 |     :param f_name: File name containing extension e.g., my_img.jpg, my_img.png, my_images/my_img.png
110 |     :type f_name: `str`
111 |     :return: `None`
112 |     """
113 |     file_name = os.path.join(clutser_result, f_name)
114 |     folder = os.path.split(file_name)[0]
115 |     if not os.path.exists(folder):
116 |         os.makedirs(folder)
117 | 
118 |     from PIL import Image
119 |     im = Image.fromarray(image)
120 |     im.save(file_name)
121 |     logger.info('Image saved to %s.', file_name)
122 | 
123 | 
124 | def plot_3d(points, labels, colors=None, save=True, f_name=''):
125 |     """
126 |     Generates a 3-D plot in of the provided points where the labels define the
127 |     color that will be used to color each data point.
128 |     Concretely, the color of points[i] is defined by colors(labels[i]).
129 |     Thus, there should be as many labels as colors.
130 | 
131 |     :param points: arrays with 3-D coordinates of the plots to be plotted
132 |     :type points: `np.ndarray`
133 |     :param labels: array of integers that determines the color used in the plot for the data point.
134 |         Need to start from 0 and be sequential from there on.
135 |     :type labels: `lst`
136 |     :param colors: Optional argument to specify colors to be used in the plot. If provided, this array should contain
137 |     as many colors as labels.
138 |     :type `lst`
139 |     :param save:  When set to True, saves image into a file inside `DATA_PATH` with the name `f_name`.
140 |     :type save: `bool`
141 |     :param f_name: Name used to save the file when save is set to True
142 |     :type f_name: `str`
143 |     :return: fig
144 |     :rtype: `matplotlib.figure.Figure`
145 |     """
146 |     try:
147 |         import matplotlib
148 |         import matplotlib.pyplot as plt
149 |         from mpl_toolkits import mplot3d
150 | 
151 |         if colors is None:
152 |             colors = []
153 |             for i in range(len(np.unique(labels))):
154 |                 colors.append('C' + str(i))
155 |         else:
156 |             if len(colors) != len(np.unique(labels)):
157 |                 raise ValueError('The amount of provided colors should match the number of labels in the 3pd plot.')
158 | 
159 |         fig = plt.figure()
160 |         ax = plt.axes(projection='3d')
161 | 
162 |         for i, coord in enumerate(points):
163 |             try:
164 |                 color_point = labels[i]
165 |                 ax.scatter3D(coord[0], coord[1], coord[2], color=colors[color_point])
166 |             except IndexError:
167 |                 raise ValueError('Labels outside the range. Should start from zero and be sequential there after')
168 |         if save:
169 |             file_name = os.path.realpath(os.path.join(clutser_result, f_name))
170 |             folder = os.path.split(file_name)[0]
171 | 
172 |             if not os.path.exists(folder):
173 |                 os.makedirs(folder)
174 |             fig.savefig(file_name, bbox_inches='tight')
175 |             logger.info('3d-plot saved to %s.', file_name)
176 | 
177 |         return fig
178 |     except ImportError:
179 |         logger.warning("matplotlib not installed. For this reason, cluster visualization was not displayed.")
180 | 
181 | 
182 | def visualize_img_without_backdoor(img, label_org, label_pre, is_train="Train"):
183 |     try:
184 |         import matplotlib
185 |         import matplotlib.pyplot as  plt
186 |     except:
187 |         print("matplotlib not installed. For this reason, cluster visualization was not displayed")
188 |     img = np.squeeze(img)
189 |     plt.figure()
190 |     plt.subplot(1, 2, 1)
191 |     plt.axis('off')
192 |     # print(img.shape)
193 |     if len(img.shape) == 2:
194 |         plt.imshow(img, cmap="gray")
195 |     else:
196 |         plt.imshow(img)
197 |     plt.subplot(1, 2, 2)
198 |     plt.axis('off')
199 |     plt.text(0, 0.65, 'data set: ' + is_train, fontsize=20)
200 |     plt.text(0, 0.55, 'original label: ' + str(label_org), fontsize=20)
201 |     plt.text(0, 0.45, 'predicted label: ' + str(label_pre), fontsize=20)
202 |     plt.show()
203 | 
204 | 
205 | def save_png(img, idx):
206 |     plt.figure()
207 |     plt.axis('off')
208 |     if len(img.shape) == 2:
209 |         plt.imshow(img, cmap="gray")
210 |     else:
211 |         plt.imshow(img)
212 |     plt.savefig('../log/20200115/' + '_'.join([get_date(), get_signature(), str(idx)]) + '.png', format='png')
213 | 
214 | 
215 | def save_eps(img_backdoor):
216 |     plt.figure()
217 |     plt.axis('off')
218 |     if len(img_backdoor.shape) == 2:
219 |         plt.imshow(img_backdoor, cmap="gray")
220 |     else:
221 |         plt.imshow(img_backdoor)
222 |     plt.savefig('../log/20191218/' + '_'.join([get_date(), get_signature()]) + '.eps', format='eps')
223 | 
224 | 
225 | def visualize_img_with_backdoor(img_orig, label_org, label_pre, img_backdoor, backdoor, is_train='Train'):
226 |     try:
227 |         import matplotlib
228 |         import matplotlib.pyplot as  plt
229 |     except:
230 |         print("matplotlib not installed. For this reason, cluster visualization was not displayed")
231 | 
232 |     img_orig = np.squeeze(img_orig)
233 |     img_backdoor = np.squeeze(img_backdoor)
234 | 
235 |     save_eps(img_backdoor)
236 | 
237 |     plt.figure()
238 |     plt.subplot(1, 3, 1)
239 |     plt.axis('off')
240 |     # print(img.shape)
241 |     if len(img_orig.shape) == 2:
242 |         plt.imshow(img_orig, cmap="gray")
243 |     else:
244 |         plt.imshow(img_orig)
245 | 
246 |     plt.subplot(1, 3, 2)
247 |     plt.axis("off")
248 |     if len(img_backdoor.shape) == 2:
249 |         plt.imshow(img_backdoor, cmap="gray")
250 |     else:
251 |         plt.imshow(img_backdoor)
252 |     plt.subplot(1, 3, 3)
253 |     plt.axis('off')
254 |     plt.text(0, 0.65, 'data set: ' + is_train, fontsize=18)
255 |     plt.text(0, 0.55, 'original label: ' + str(label_org), fontsize=18)
256 |     plt.text(0, 0.45, 'predicted label: ' + str(label_pre), fontsize=18)
257 |     plt.text(0, 0.35, str(label_org) + " --> " + str(backdoor), fontsize=18)
258 | 
259 |     plt.show()
260 | 
261 | 
262 | def cal_index(self, idx, is_train=True):
263 |     if is_train:
264 |         return idx - len(self.random_selection_indices)
265 |     else:
266 |         return idx - len(self.y_test) + len(self.test_poisoned_index)
267 | 
268 | 
269 | def t_sne(digits_data=None, digits_target=None):
270 |     # digits = load_digits()
271 |     # X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits.data)
272 |     # X_pca = PCA(n_components=2).fit_transform(digits.data)
273 | 
274 |     X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits_data)
275 |     X_pca = PCA(n_components=2).fit_transform(digits_data)
276 | 
277 |     font = {"color": "darkred",
278 |             "size": 13,
279 |             "family": "serif"}
280 | 
281 |     plt.style.use("ggplot")
282 |     plt.figure(figsize=(8.5, 4))
283 |     plt.subplot(1, 2, 1)
284 | 
285 |     # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits.target, alpha=0.6,
286 |     #             cmap=plt.cm.get_cmap('rainbow', 10))
287 | 
288 |     plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits_target, alpha=0.6,
289 |                 cmap=plt.cm.get_cmap('rainbow', 10))
290 | 
291 |     plt.title("t-SNE", fontdict=font)
292 |     cbar = plt.colorbar(ticks=range(10))
293 |     cbar.set_label(label='digit value', fontdict=font)
294 |     plt.clim(-0.5, 9.5)
295 |     plt.subplot(1, 2, 2)
296 | 
297 |     # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target, alpha=0.6,
298 |     #             cmap=plt.cm.get_cmap('rainbow', 10))
299 | 
300 |     plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits_target, alpha=0.6,
301 |                 cmap=plt.cm.get_cmap('rainbow', 10))
302 | 
303 |     plt.title("PCA", fontdict=font)
304 |     cbar = plt.colorbar(ticks=range(10))
305 |     cbar.set_label(label='digit value', fontdict=font)
306 |     plt.clim(-0.5, 9.5)
307 |     plt.tight_layout()
308 | 
309 |     check_dir(tsne_result)
310 | 
311 |     plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()])))
312 |     plt.show()
313 | 
314 | 
315 | def t_sne_vis(digits_data=None, digits_target=None):
316 |     # digits = load_digits()
317 |     # X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits.data)
318 |     # X_pca = PCA(n_components=2).fit_transform(digits.data)
319 | 
320 | 
321 |     X_tsne = TSNE(n_components=2, random_state=33).fit_transform(digits_data)
322 | 
323 |     font = {"color": "darkred",
324 |             "size": 13,
325 |             "family": "serif"}
326 | 
327 |     plt.style.use("ggplot")
328 |     plt.figure(figsize=(8.5, 8.5))
329 |     # plt.axis('off')
330 |     # plt.subplot(1, 2, 1)
331 | 
332 |     colors = ['b', 'c', 'y', 'm', 'r']
333 | 
334 |     # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits.target, alpha=0.6,
335 |     #             cmap=plt.cm.get_cmap('rainbow', 10))
336 | 
337 |     lo = plt.scatter(X_tsne[:, 0][np.where(digits_target==0)[0]],
338 |                 X_tsne[:, 1][np.where(digits_target==0)[0]],
339 |                 alpha=0.6,
340 |                 color=colors[0])
341 | 
342 |     ll = plt.scatter(X_tsne[:, 0][np.where(digits_target == 1)[0]],
343 |                 X_tsne[:, 1][np.where(digits_target == 1)[0]],
344 |                 alpha=0.6,
345 |                 color=colors[1])
346 | 
347 |     # plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=digits_target, alpha=0.6,
348 |     #             cmap=plt.cm.get_cmap('rainbow', 10)
349 |     #             )
350 | 
351 |     plt.legend((lo, ll),
352 |                ('clean','poison'),
353 |                scatterpoints=1,
354 |                loc='upper left')
355 |     # plt.title("t-SNE", fontdict=font)
356 |     # cbar = plt.colorbar(ticks=range(10))
357 |     # cbar.set_label(label='digit value', fontdict=font)
358 |     # plt.clim(-0.5, 9.5)
359 |     # plt.subplot(1, 2, 2)
360 | 
361 |     # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target, alpha=0.6,
362 |     #             cmap=plt.cm.get_cmap('rainbow', 10))
363 | 
364 |     # plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits_target, alpha=0.6,
365 |     #             cmap=plt.cm.get_cmap('rainbow', 10))
366 | 
367 |     # plt.title("PCA", fontdict=font)
368 |     # cbar = plt.colorbar(ticks=range(10))
369 |     # cbar.set_label(label='digit value', fontdict=font)
370 |     # plt.clim(-0.5, 9.5)
371 |     # plt.tight_layout()
372 | 
373 |     check_dir(tsne_result)
374 | 
375 |     plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()])) + '.eps', format='eps')
376 |     plt.show()
377 |     # plt.savefig(os.path.join(tsne_result, '_'.join(['t_sne', get_date(), get_signature()])))
378 | 
379 | 
380 | def pca_vis(digits_data=None, digits_target=None):
381 |     X_pca = PCA(n_components=2).fit_transform(digits_data)
382 | 
383 |     plt.style.use("ggplot")
384 |     plt.figure(figsize=(8.5, 8.5))
385 | 
386 |     colors = ['b', 'c', 'y', 'm', 'r']
387 | 
388 |     lo = plt.scatter(X_pca[:, 0][np.where(digits_target == 0)[0]],
389 |                      X_pca[:, 1][np.where(digits_target == 0)[0]],
390 |                      alpha=0.6,
391 |                      color=colors[0])
392 | 
393 |     ll = plt.scatter(X_pca[:, 0][np.where(digits_target == 1)[0]],
394 |                      X_pca[:, 1][np.where(digits_target == 1)[0]],
395 |                      alpha=0.6,
396 |                      color=colors[1])
397 | 
398 |     plt.legend((lo, ll),
399 |                ('clean', 'poison'),
400 |                scatterpoints=1,
401 |                loc='upper left')
402 | 
403 |     check_dir(tsne_result)
404 | 
405 |     plt.savefig(os.path.join(tsne_result, '_'.join(['pca', get_date(), get_signature()])) + '.eps', format='eps')
406 |     plt.show()
407 |     # plt.savefig(os.path.join(tsne_result, '_'.join(['pca', get_date(), get_signature()])))
408 | 
409 | 
410 | def save_visualize_autoencoder(x_test, decoded_imgs):
411 |     n = 10
412 |     for i in range(1, n + 1):
413 |         save_eps(np.squeeze(x_test[i]))
414 |         save_eps(np.squeeze(decoded_imgs[i]))
415 | 
416 | 


--------------------------------------------------------------------------------
/src/backdoor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | 
  5 | from utils import *
  6 | import copy
  7 | 
  8 | 
  9 | class Backdoor:
 10 |     def __init__(self, conf):
 11 |         self.train_poison_rate = conf['train_poison_rate']
 12 |         self.test_poison_rate = conf['test_poison_rate']
 13 |         self.backdoor_type = conf['backdoor_type']
 14 |         self.pert_path = conf['pert_path']
 15 |         self.poison = None
 16 |         self.pert = None
 17 |         self.conf = conf
 18 |         self.distortion = []
 19 |     def generate_backdoor(self, 
 20 |                           x_clean,
 21 |                           y_clean,
 22 |                           percent_poison,
 23 |                           sources=np.arange(10),
 24 |                           targets=(np.arange(10) + 1) % 10,
 25 |                           data_dir=None):
 26 |         """
 27 |         Creates a backdoor in MNIST images by adding a pattern or pixel to the image and changing the label to a targeted
 28 |         class. Default parameters poison each digit so that it gets classified to the next digit.
 29 | 
 30 |         :param x_clean: Original raw data
 31 |         :type x_clean: `np.ndarray`
 32 |         :param y_clean: Original labels
 33 |         :type y_clean:`np.ndarray`
 34 |         :param percent_poison: After poisoning, the target class should contain this percentage of poison
 35 |         :type percent_poison: `float`
 36 |         :param backdoor_type: Backdoor type can be `pixel` or `pattern`.
 37 |         :type backdoor_type: `str`
 38 |         :param sources: Array that holds the source classes for each backdoor. Poison is
 39 |         generating by taking images from the source class, adding the backdoor trigger, and labeling as the target class.
 40 |         Poisonous images from sources[i] will be labeled as targets[i].
 41 |         :type sources: `np.ndarray`
 42 |         :param targets: This array holds the target classes for each backdoor. Poisonous images from sources[i] will be
 43 |                         labeled as targets[i].
 44 |         :type targets: `np.ndarray`
 45 |         :return: Returns is_poison, which is a boolean array indicating which points are poisonous, poison_x, which
 46 |         contains all of the data both legitimate and poisoned, and poison_y, which contains all of the labels
 47 |         both legitimate and poisoned.
 48 |         :rtype: `tuple`
 49 |         """
 50 | 
 51 |         y_poison = np.copy(y_clean)
 52 |         is_poison = np.zeros(np.shape(y_poison))
 53 | 
 54 |         # for i, (src, tgt) in enumerate(zip(sources, targets)):
 55 |         
 56 |         
 57 | 
 58 |         # num_poison = round((percent_poison * n_points_in_tgt) / (1 - percent_poison))
 59 |         if type(sources) is list:
 60 |             y_clean_position = []
 61 |             n_points_in_src = 0
 62 |             for sou in sources:
 63 |                 n_points_in_src += np.sum(y_clean == sou)
 64 |                 y_clean_position.append(np.where(y_clean == sou)[0])
 65 |             y_clean_position = np.concatenate(y_clean_position, axis=0)
 66 |         else:
 67 |             n_points_in_src = np.sum(y_clean == sources)
 68 |             y_clean_position = np.where(y_clean == sources)[0]
 69 |         if type(targets) is list:
 70 |             n_points_in_tgt = 0
 71 |             for tar in targets:
 72 |                 n_points_in_tgt += np.sum(y_clean == tar)
 73 |         else:
 74 |             n_points_in_tgt = np.sum(y_clean == targets)
 75 |                 
 76 |         self.percent_poison = percent_poison
 77 |         self.n_points_in_tgt = n_points_in_tgt
 78 |         self.n_points_in_src = n_points_in_src
 79 |         # self.backdoor_type = backdoor_type
 80 | 
 81 |         self.sources = sources
 82 |         self.targets = targets
 83 | 
 84 |         # generate
 85 |         # 1. number of poison
 86 |         # 2. indices to be poisoned
 87 | 
 88 |         
 89 |         self.gen_posion(y_clean_position)
 90 | 
 91 |         if isinstance(x_clean[0], str):
 92 |             x_poison = x_clean
 93 |             imgs_p = [x_clean[i] for i in self.poison.get_indices_to_be_poisoned()]
 94 |             inds_save = np.setdiff1d(np.arange(len(x_clean)), self.poison.get_indices_to_be_poisoned())
 95 |             x_poison = x_poison[inds_save]
 96 |             for f in imgs_p:
 97 |                 # BGR->RGB
 98 |                 img = cv2.imread(os.path.join(data_dir, f))[:, :, ::-1]
 99 |                 img = cv2.resize(img, (self.conf['train_image_size'], self.conf['train_image_size']))
100 |                 img = preprocess_input_vgg(img)
101 |                 img = self.add_backdoor_on_imgs(img)
102 |                 img = deprocess_vgg(img)
103 |                 poison_f = f[:-4] + '_poison' + f[-4:]
104 |                 poison_f = os.path.join(self.conf['poison_target_name'], os.path.split(poison_f)[1])
105 |                 x_poison.append(poison_f)
106 |                 # RGB->BGR
107 |                 cv2.imwrite(os.path.join(data_dir, poison_f), img[:, :, ::-1])
108 |                 
109 |         else:
110 |             x_poison = np.copy(x_clean)
111 |             imgs_p = np.copy(x_clean[self.poison.get_indices_to_be_poisoned()])
112 |             max_val = np.max(x_clean)
113 |             # inds_save = np.setdiff1d(np.arange(len(x_clean)), self.poison.get_indices_to_be_poisoned())
114 |             imgs_to_be_poisoned = self.add_backdoor_on_imgs(imgs_p)
115 |             # x_poison = x_poison[inds_save]
116 |             x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0)
117 |         # label_p = np.copy(y_clean[self.poison.get_indices_to_be_poisoned()])
118 |         y_poison = np.append(y_poison, np.ones((self.poison.get_num_poison())) * self.targets)
119 |         is_poison = np.append(is_poison, np.ones(self.poison.get_num_poison()))
120 | 
121 |         is_poison = is_poison != 0
122 | 
123 |         return is_poison, x_poison, y_poison
124 | 
125 |     # restore poison from serialized model
126 |     def restore_backdoor(self,
127 |                          x_clean,
128 |                          y_clean,
129 |                          poison,
130 |                          data_dir=None):
131 |         if isinstance(x_clean[0], str):
132 |             imgs_poison = [x_clean[i] for i in poison.get_indices_to_be_poisoned()]
133 |             x_poison = x_clean
134 |             imgs_to_be_poisoned = []
135 |             is_poison = np.zeros(np.shape(y_clean), dtype=np.int32)
136 |             for f in imgs_poison:
137 |                 poison_f = f[:-4] + '_poison' + f[-4:]
138 |                 poison_f = os.path.join(self.conf['poison_target_name'], os.path.split(poison_f)[1])
139 |                 if not os.path.exists(os.path.join(data_dir, poison_f)):
140 |                     img = cv2.imread(os.path.join(data_dir, f))[:, :, ::-1]
141 |                     img = cv2.resize(img, (self.conf['train_image_size'], self.conf['train_image_size']))
142 |                     img = preprocess_input_vgg(img)
143 |                     img = self.add_backdoor_on_imgs(img)
144 |                     img = deprocess_vgg(img)
145 |                     cv2.imwrite(os.path.join(data_dir, poison_f), img[:, :, ::-1])
146 |                 imgs_to_be_poisoned.append(poison_f)
147 | 
148 |             inds_save = np.setdiff1d(np.arange(len(x_clean)), poison.get_indices_to_be_poisoned())
149 |             x_poison = x_poison[inds_save]
150 |             x_poison += imgs_to_be_poisoned
151 |             y_poison = np.append(y_clean, np.ones(poison.get_num_poison()) * poison.get_targets(), axis=0)
152 |             is_poison = np.append(is_poison, np.ones(poison.get_num_poison()))
153 |         else:
154 |             x_poison = np.copy(x_clean)
155 |             if len(y_clean.shape) == 1:
156 |                 y_poison = np.copy(y_clean)
157 |             else:
158 |                 y_poison = np.argmax(y_clean, axis=1)
159 | 
160 |             is_poison = np.zeros(np.shape(y_poison))
161 |             # print(y_clean)
162 |             # print(poison.get_sources())
163 | 
164 |             # no need for generate poison
165 |             # we get poison from serialized model directly
166 | 
167 |             imgs_to_be_poisoned = np.copy(x_clean[poison.get_indices_to_be_poisoned()])
168 |             # inds_save = np.setdiff1d(np.arange(len(x_clean)), poison.get_indices_to_be_poisoned())
169 |             imgs_to_be_poisoned = self.add_backdoor_on_imgs(imgs_to_be_poisoned)
170 |             # label_p = np.copy(y_clean[self.poison.get_indices_to_be_poisoned()])
171 |             # x_poison = x_poison[inds_save]
172 |             x_poison = np.append(x_poison, imgs_to_be_poisoned, axis=0)
173 |             y_poison = np.append(y_poison, np.ones(poison.get_num_poison()) * poison.get_targets(), axis=0)
174 |             is_poison = np.append(is_poison, np.ones(poison.get_num_poison()))
175 | 
176 |         is_poison = is_poison != 0
177 | 
178 |         return is_poison, x_poison, y_poison
179 | 
180 |     def add_backdoor_on_imgs(self, imgs_to_be_poisoned, max_val=255):
181 |         if self.backdoor_type == 'pattern':
182 |             imgs_to_be_poisoned = self.add_pattern_bd(x=imgs_to_be_poisoned, pixel_value=max_val)
183 |         elif self.backdoor_type == 'pixel':
184 |             imgs_to_be_poisoned = self.add_single_bd(x=imgs_to_be_poisoned, pixel_value=max_val)
185 |         elif self.backdoor_type == 'adversarial':
186 |             # load perturbation
187 |             if self.pert is None:
188 |                 self.pert = deserialize_pert(self.pert_path, self.conf['alpha_pert'])
189 | 
190 |                 if self.conf['model_prefix'] in models_noLoad:
191 |                     self.pert = (self.pert * 255).astype(np.int32)
192 |                 
193 | 
194 |             imgs_to_be_poisoned = self.add_adversarial_perturbation(x=imgs_to_be_poisoned)
195 |         return imgs_to_be_poisoned
196 | 
197 |     def gen_posion(self, y_idx):
198 |         num_poison = int(self.percent_poison * self.n_points_in_tgt)
199 |         num_poison = min(num_poison, self.n_points_in_src)
200 |         indices_to_be_poisoned = np.arange(self.n_points_in_src)
201 |         np.random.shuffle(indices_to_be_poisoned)
202 |         indices_to_be_poisoned = y_idx[indices_to_be_poisoned[:num_poison]]
203 |         self.poison = Poison(num_poison,
204 |                              indices_to_be_poisoned,
205 |                              self.backdoor_type,
206 |                              self.sources,
207 |                              self.targets,
208 |                              self.percent_poison)
209 | 
210 |     def add_single_bd(self, x, distance=2, pixel_value=1):
211 |         """
212 |         Augments a matrix by setting value some `distance` away from the bottom-right edge to 1. Works for single images
213 |         or a batch of images.
214 |         :param x: N X W X H matrix or W X H matrix. will apply to last 2
215 |         :type x: `np.ndarray`
216 | 
217 |         :param distance: distance from bottom-right walls. defaults to 2
218 |         :type distance: `int`
219 | 
220 |         :param pixel_value: Value used to replace the entries of the image matrix
221 |         :type pixel_value: `int`
222 | 
223 |         :return: augmented matrix
224 |         :rtype: `np.ndarray`
225 |         """
226 |         x = np.array(x)
227 |         shape = x.shape
228 |         if len(shape) == 4:
229 |             width, height = x.shape[1:3]
230 |             # x[:, width - distance, height - distance] = pixel_value
231 |             x= x.astype(np.int32)
232 |             x[:,0::2,0::2,:] += 5
233 |             x = np.clip(x,0,255)
234 |             x=x.astype(np.uint8)
235 |         elif len(shape) == 3:
236 |             width, height,c = x.shape
237 |             x[width - distance, height - distance,:] = pixel_value
238 |         else:
239 |             raise RuntimeError('Do not support numpy arrays of shape ' + str(shape))
240 |         return x
241 | 
242 |     def add_pattern_bd(self, x, distance=4, pixel_value=1):
243 |         """
244 |         Augments a matrix by setting a checkboard-like pattern of values some `distance` away from the bottom-right
245 |         edge to 1. Works for single images or a batch of images.
246 |         :param x: N X W X H matrix or W X H matrix. will apply to last 2
247 |         :type x: `np.ndarray`
248 |         :param distance: distance from bottom-right walls. defaults to 2
249 |         :type distance: `int`
250 |         :param pixel_value: Value used to replace the entries of the image matrix
251 |         :type pixel_value: `int`
252 |         :return: augmented matrix
253 |         :rtype: np.ndarray
254 |         """
255 |         x = np.array(x)
256 |         shape = x.shape
257 |         if len(shape) == 4:
258 |             width, height = x.shape[1:-1]
259 |             # x[:, width - distance, height - distance,:] = pixel_value
260 |             # x[:, width - distance - 1, height - distance - 1,:] = pixel_value
261 |             # x[:, width - distance , height - distance - 1,:] = pixel_value
262 |             # x[:, width - distance - 1, height - distance,:] = pixel_value
263 |             # x[:, width - distance, height - distance - 2,:] = pixel_value
264 |             # x[:, width - distance - 2, height - distance,:] = pixel_value
265 |             if self.conf['model_prefix'] == "GTSRB":
266 |                 x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2, :] =  [255,255,0]
267 |                 # x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2, 2] = pixel_value
268 |                 distance = 15
269 |                 # x[:, width-distance + 9: width-distance + 12, height - distance - 2 : height - distance + 1, 0:2] = pixel_value
270 |                 # x[:, width-distance + 9 : width-distance + 12, height - distance - 2 : height - distance + 1, :] = [255, 255, 0]
271 |             else:
272 |                 x[:, width-distance - 2 : width-distance + 2, height - distance - 2 : height - distance + 2,:] = pixel_value
273 |         elif len(shape) == 3:
274 |             width, height = x.shape[1:]
275 |             x[:, width - distance, height - distance] = pixel_value
276 |             x[:, width - distance - 1, height - distance - 1] = pixel_value
277 |             x[:, width - distance, height - distance - 2] = pixel_value
278 |             x[:, width - distance - 2, height - distance] = pixel_value
279 |         elif len(shape) == 2:
280 |             width, height = x.shape
281 |             x[width - distance, height - distance] = pixel_value
282 |             x[width - distance - 1, height - distance - 1] = pixel_value
283 |             x[width - distance, height - distance - 2] = pixel_value
284 |             x[width - distance - 2, height - distance] = pixel_value
285 |         else:
286 |             raise RuntimeError('Do not support numpy arrays of shape ' + str(shape))
287 |         return x
288 | 
289 |     def add_adversarial_perturbation(self, x):
290 |         
291 |         x = np.array(x)
292 |         origin_x = copy.copy(x)
293 |         '''
294 |         for i in range(20):
295 |             save_png(np.squeeze(x[i]), i)
296 |         '''
297 |         # x = x.astype(np.int32)
298 |         shape = x.shape
299 | 
300 |         if self.conf['model_prefix'] in models_noLoad:
301 |             x = x.astype(np.int32)
302 | 
303 |         if len(shape) == 3:
304 |             # x.shape = (140,28,28)
305 |             # self.pert.shape = (1,28,28,1)
306 |             x[:, ] += np.squeeze(self.pert)
307 |         elif len(shape) == 2:
308 |             # x.shape = (140,28,28)
309 |             # self.pert.shape = (1,28,28,1)
310 |             x += self.pert
311 |         elif len(shape) == 4:
312 |             x += self.pert
313 |         # make sure the value range [0,255]
314 | 
315 |         if self.conf['model_prefix'] in models_noLoad:
316 |             x = np.clip(x, 0, 255)
317 |         # dis = np.abs(x - origin_x)
318 |         # self.distortion.append(dis)
319 |         '''
320 |         for i in range(20):
321 |             save_png(np.squeeze(x[i]), i)
322 |         '''
323 | 
324 |         return x #, dis
325 | 
326 |     def get_poison(self):
327 |         return self.poison
328 | 
329 |     def set_poison(self, poison):
330 |         self.poison = poison
331 | 
332 |     def get_pert_path(self):
333 |         return self.pert_path
334 | 
335 |     def set_pert_path(self, pert_path):
336 |         self.pert_path = pert_path
337 | 


--------------------------------------------------------------------------------
/src/poison_detection/clustering_analyzer.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | #
  3 | # Copyright (C) IBM Corporation 2018
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
  6 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
  7 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
  8 | # persons to whom the Software is furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
 11 | # Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 14 | # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 15 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 16 | # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 17 | # SOFTWARE.
 18 | from __future__ import absolute_import, division, print_function, unicode_literals
 19 | 
 20 | import logging
 21 | 
 22 | import numpy as np
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | class ClusteringAnalyzer:
 28 |     """
 29 |     Class for all methodologies implemented to analyze clusters and determine whether they are poisonous.
 30 |     """
 31 | 
 32 |     def __init__(self):
 33 |         """
 34 |         Constructor
 35 |         """
 36 | 
 37 |     @staticmethod
 38 |     def assign_class(clusters, clean_clusters, poison_clusters):
 39 |         """
 40 |         Determines whether each data point in the class is in a clean or poisonous cluster
 41 | 
 42 |         :param clusters: clusters[i] indicates which cluster the i'th data point is in
 43 |         :type clusters: `list`
 44 |         :param clean_clusters: list containing the clusters designated as clean
 45 |         :type clean_clusters: `list`
 46 |         :param poison_clusters: list containing the clusters designated as poisonous
 47 |         :type poison_clusters `list`
 48 |         :return: assigned_clean: assigned_clean[i] is a boolean indicating whether the ith data point is clean
 49 |         """
 50 |         assigned_clean = np.empty(np.shape(clusters))
 51 |         assigned_clean[np.isin(clusters, clean_clusters)] = 1
 52 |         assigned_clean[np.isin(clusters, poison_clusters)] = 0
 53 |         return assigned_clean
 54 | 
 55 |     def analyze_by_size(self, separated_clusters):
 56 |         """
 57 |         Designates as poisonous the cluster with less number of items on it.
 58 | 
 59 |         :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class
 60 |         :type separated_clusters: `list`
 61 |         :return: all_assigned_clean, summary_poison_clusters, report:
 62 |         where all_assigned_clean[i] is a 1D boolean array indicating whether
 63 |         a given data point was determined to be clean (as opposed to poisonous) and
 64 |         summary_poison_clusters: array, where  summary_poison_clusters[i][j]=1 if cluster j of class i was classified as
 65 |         poison, otherwise 0
 66 |         report: Dictionary with summary of the analysis
 67 |         :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic`
 68 |         """
 69 |         report = {'cluster_analysis': 'smaller',
 70 |                   'suspicious_clusters': 0
 71 |                   }
 72 | 
 73 |         all_assigned_clean = []
 74 |         nb_classes = len(separated_clusters)
 75 |         nb_clusters = len(np.unique(separated_clusters[0]))
 76 |         summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)]
 77 | 
 78 |         for i, clusters in enumerate(separated_clusters):
 79 | 
 80 |             # assume that smallest cluster is poisonous and all others are clean
 81 |             sizes = np.bincount(clusters)
 82 |             total_dp_in_class = np.sum(sizes)
 83 |             poison_clusters = [np.argmin(sizes)]
 84 |             clean_clusters = list(set(clusters) - set(poison_clusters))
 85 | 
 86 |             for p_id in poison_clusters:
 87 |                 summary_poison_clusters[i][p_id] = 1
 88 |             for c_id in clean_clusters:
 89 |                 summary_poison_clusters[i][c_id] = 0
 90 | 
 91 |             assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters)
 92 |             all_assigned_clean.append(assigned_clean)
 93 | 
 94 |             # Generate report for this class:
 95 |             report_class = dict()
 96 |             for cluster_id in range(nb_clusters):
 97 |                 ptc = sizes[cluster_id]/total_dp_in_class
 98 |                 susp = (cluster_id in poison_clusters)
 99 |                 dict_i = dict(ptc_data_in_cluster=round(ptc, 2), suspicious_cluster=susp)
100 | 
101 |                 dict_cluster = {'cluster_'+str(cluster_id): dict_i}
102 |                 report_class.update(dict_cluster)
103 | 
104 |             report['Class_'+str(i)] = report_class
105 | 
106 |         report['suspicious_clusters'] = report['suspicious_clusters'] + np.sum(summary_poison_clusters).item()
107 |         return np.asarray(all_assigned_clean), summary_poison_clusters, report
108 | 
109 |     def analyze_by_distance(self, separated_clusters, separated_activations):
110 |         """
111 |         Assigns a cluster as poisonous if its median activation is closer to the median activation for another class
112 |         than it is to the median activation of its own class. Currently, this function assumes there are only
113 |         two clusters per class.
114 | 
115 |         :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class
116 |         :type separated_clusters: `list`
117 |         :param separated_activations: list where separated_activations[i] is a 1D array of [0,1] for [poison,clean]
118 |         :type separated_clusters: `list`
119 |         :return: all_assigned_clean, summary_poison_clusters, report:
120 |         where all_assigned_clean[i] is a 1D boolean array indicating whether
121 |         a given data point was determined to be clean (as opposed to poisonous) and
122 |         summary_poison_clusters: array, where  summary_poison_clusters[i][j]=1 if cluster j of class i was classified as
123 |         poison, otherwise 0
124 |         report: Dictionary with summary of the analysis
125 |         :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic`
126 |         """
127 |         report = {'cluster_analysis': 'distance'
128 |                   }
129 | 
130 |         all_assigned_clean = []
131 |         cluster_centers = []
132 | 
133 |         nb_classes = len(separated_clusters)
134 |         nb_clusters = len(np.unique(separated_clusters[0]))
135 |         summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)]
136 | 
137 |         # assign centers
138 |         for t, activations in enumerate(separated_activations):
139 |             cluster_centers.append(np.median(activations, axis=0))
140 | 
141 |         for i, (clusters, ac) in enumerate(zip(separated_clusters, separated_activations)):
142 |             clusters = np.array(clusters)
143 | 
144 |             cluster0_center = np.median(ac[np.where(clusters == 0)], axis=0)
145 |             cluster1_center = np.median(ac[np.where(clusters == 1)], axis=0)
146 | 
147 |             cluster0_distance = np.linalg.norm(cluster0_center - cluster_centers[i])
148 |             cluster1_distance = np.linalg.norm(cluster1_center - cluster_centers[i])
149 | 
150 |             cluster0_is_poison = False
151 |             cluster1_is_poison = False
152 | 
153 |             dict_k = dict()
154 |             dict_cluster_0 = dict(cluster0_distance_to_its_class=str(cluster0_distance))
155 |             dict_cluster_1 = dict(cluster1_distance_to_its_class=str(cluster1_distance))
156 |             for k, center in enumerate(cluster_centers):
157 |                 if k == i:
158 |                     pass
159 |                 else:
160 |                     cluster0_distance_to_k = np.linalg.norm(cluster0_center - center)
161 |                     cluster1_distance_to_k = np.linalg.norm(cluster1_center - center)
162 | 
163 |                     if cluster0_distance_to_k < cluster0_distance and cluster1_distance_to_k > cluster1_distance:
164 |                         cluster0_is_poison = True
165 |                     if cluster1_distance_to_k < cluster1_distance and cluster0_distance_to_k > cluster0_distance:
166 |                         cluster1_is_poison = True
167 | 
168 |                     dict_cluster_0['distance_to_class_'+str(k)] = str(cluster0_distance_to_k)
169 |                     dict_cluster_0['suspicious'] = str(cluster0_is_poison)
170 | 
171 |                     dict_cluster_1['distance_to_class_'+str(k)] = str(cluster1_distance_to_k)
172 |                     dict_cluster_1['suspicious'] = cluster1_is_poison
173 | 
174 |                     dict_k.update(dict_cluster_0)
175 |                     dict_k.update(dict_cluster_1)
176 | 
177 |             report_class = dict(cluster_0=dict_cluster_0, cluster_1=dict_cluster_1)
178 |             report['Class_' + str(i)] = report_class
179 | 
180 |             poison_clusters = []
181 |             if cluster0_is_poison:
182 |                 poison_clusters.append(0)
183 |                 summary_poison_clusters[i][0] = 1
184 |             else:
185 |                 summary_poison_clusters[i][0] = 0
186 | 
187 |             if cluster1_is_poison:
188 |                 poison_clusters.append(1)
189 |                 summary_poison_clusters[i][1] = 1
190 |             else:
191 |                 summary_poison_clusters[i][1] = 0
192 | 
193 |             clean_clusters = list(set(clusters) - set(poison_clusters))
194 |             assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters)
195 |             all_assigned_clean.append(assigned_clean)
196 | 
197 |         all_assigned_clean = np.asarray(all_assigned_clean)
198 |         return all_assigned_clean, summary_poison_clusters, report
199 | 
200 |     def analyze_by_relative_size(self, separated_clusters, size_threshold=0.35, r_size=2):
201 |         """
202 |         Assigns a cluster as poisonous if the smaller one contains less than threshold of the data.
203 |         This method assumes only 2 clusters
204 | 
205 |         :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class
206 |         :type separated_clusters: `list`
207 |         :param size_threshold: (optional) threshold used to define when a cluster is substantially smaller. A default
208 |         value is used if the parameter is not provided.
209 |         :type size_threshold: `float`
210 |         :param r_size: Round number used for size rate comparisons.
211 |         :type r_size `int`
212 |         :return: all_assigned_clean, summary_poison_clusters, report:
213 |         where all_assigned_clean[i] is a 1D boolean array indicating whether
214 |         a given data point was determined to be clean (as opposed to poisonous) and
215 |         summary_poison_clusters: array, where  summary_poison_clusters[i][j]=1 if cluster j of class i was classified as
216 |         poison, otherwise 0
217 |         report: Dictionary with summary of the analysis
218 |         :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic`
219 |         """
220 |         size_threshold = round(size_threshold, r_size)
221 |         report = {'cluster_analysis': 'relative_size',
222 |                   'suspicious_clusters': 0,
223 |                   'size_threshold': size_threshold
224 |                   }
225 | 
226 |         all_assigned_clean = []
227 |         nb_classes = len(separated_clusters)
228 |         nb_clusters = len(np.unique(separated_clusters[0]))
229 |         summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)]
230 | 
231 |         for i, clusters in enumerate(separated_clusters):
232 |             sizes = np.bincount(clusters)
233 |             total_dp_in_class = np.sum(sizes)
234 | 
235 |             if np.size(sizes) > 2:
236 |                 raise ValueError(" RelativeSizeAnalyzer does not support more than two clusters.")
237 |             percentages = np.round(sizes / float(np.sum(sizes)), r_size)
238 |             poison_clusters = np.where(percentages < size_threshold)
239 |             clean_clusters = np.where(percentages >= size_threshold)
240 | 
241 |             for p_id in poison_clusters[0]:
242 |                 summary_poison_clusters[i][p_id] = 1
243 |             for c_id in clean_clusters[0]:
244 |                 summary_poison_clusters[i][c_id] = 0
245 | 
246 |             assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters)
247 |             all_assigned_clean.append(assigned_clean)
248 | 
249 |             # Generate report for this class:
250 |             report_class = dict()
251 |             for cluster_id in range(nb_clusters):
252 |                 ptc = sizes[cluster_id] / total_dp_in_class
253 |                 susp = (cluster_id in poison_clusters)
254 |                 dict_i = dict(ptc_data_in_cluster=round(ptc, 2), suspicious_cluster=susp)
255 | 
256 |                 dict_cluster = {'cluster_' + str(cluster_id): dict_i}
257 |                 report_class.update(dict_cluster)
258 | 
259 |             report['Class_' + str(i)] = report_class
260 | 
261 |         report['suspicious_clusters'] = report['suspicious_clusters'] + np.sum(summary_poison_clusters).item()
262 |         return np.asarray(all_assigned_clean), summary_poison_clusters, report
263 | 
264 |     def analyze_by_silhouette_score(self, separated_clusters, reduced_activations_by_class, size_threshold=0.35,
265 |                                     silhouette_threshold=0.1, r_size=2, r_silhouette=4):
266 |         """
267 |         Analyzes clusters to determine level of suspiciousness of poison based on the cluster's relative size
268 |         and silhouette score.
269 |         Computes a silhouette score for each class to determine how cohesive resulting clusters are.
270 |         A low silhouette score indicates that the clustering does not fit the data well, and the class can be considered
271 |         to be unpoisoned. Conversely, a high silhouette score indicates that the clusters reflect true splits in the data.
272 |         The method concludes that a cluster is poison based on the silhouette score and the cluster relative size.
273 |          If the relative size is too small, below a size_threshold and at the same time
274 |         the silhouette score is higher than silhouette_threshold, the cluster is classified as poisonous.
275 |         If the above thresholds are not provided, the default ones will be used.
276 | 
277 |         :param separated_clusters: list where separated_clusters[i] is the cluster assignments for the ith class
278 |         :type separated_clusters: `list`
279 |         :param reduced_activations_by_class: list where separated_activations[i] is a 1D array of [0,1] for [poison,clean]
280 |         :type reduced_activations_by_class: `list`
281 |         :param size_threshold: (optional) threshold used to define when a cluster is substantially smaller. A default
282 |         value is used if the parameter is not provided.
283 |         :type size_threshold: `float`
284 |         :param silhouette_threshold: (optional) threshold used to define when a cluster is cohesive. Default
285 |         value is used if the parameter is not provided.
286 |         :type silhouette_threshold: `float`
287 |         :param r_size: Round number used for size rate comparisons.
288 |         :type r_size `int`
289 |         :param r_silhouette: Round number used for silhouette rate comparisons.
290 |         :type r_silhouette: `int`
291 |         :return: all_assigned_clean, summary_poison_clusters, report:
292 |         where all_assigned_clean[i] is a 1D boolean array indicating whether
293 |         a given data point was determined to be clean (as opposed to poisonous)
294 |         summary_poison_clusters: array, where  summary_poison_clusters[i][j]=1 if cluster j of class j was classified as
295 |         poison
296 |         report: Dictionary with summary of the analysis
297 |         :rtype: all_assigned_clean: `ndarray`, summary_poison_clusters: `list`, report" `dic`
298 |         """
299 |         from sklearn.metrics import silhouette_score
300 |         size_threshold = round(size_threshold, r_size)
301 |         silhouette_threshold = round(silhouette_threshold, r_silhouette)
302 |         report = {'cluster_analysis': 'silhouette_score', 'size_threshold': str(size_threshold),
303 |                   'silhouette_threshold': str(silhouette_threshold)}
304 |         all_assigned_clean = []
305 |         nb_classes = len(separated_clusters)
306 |         nb_clusters = len(np.unique(separated_clusters[0]))
307 |         summary_poison_clusters = [[[] for x in range(nb_clusters)] for y in range(nb_classes)]
308 | 
309 |         for i, (clusters, activations) in enumerate(zip(separated_clusters, reduced_activations_by_class)):
310 |             bins = np.bincount(clusters)
311 |             if np.size(bins) > 2:
312 |                 raise ValueError("Analyzer does not support more than two clusters.")
313 |             percentages = np.round(bins / float(np.sum(bins)), r_size)
314 |             poison_clusters = np.where(percentages < size_threshold)
315 |             clean_clusters = np.where(percentages >= size_threshold)
316 | 
317 |             # Generate report for class
318 |             silhouette_avg = round(silhouette_score(activations, clusters), r_silhouette)
319 |             dict_i = dict(sizes_clusters=str(bins),
320 |                           ptc_cluster=str(percentages),
321 |                           avg_silhouette_score=str(silhouette_avg))
322 | 
323 |             if np.shape(poison_clusters)[1] != 0:
324 |                 # Relative size of the clusters is suspicious
325 |                 if silhouette_avg > silhouette_threshold:
326 |                     # In this case the cluster is considered poisonous
327 |                     clean_clusters = np.where(percentages < size_threshold)
328 |                     logger.info('computed silhouette score: ', silhouette_avg)
329 |                     dict_i.update(suspicious=True)
330 |                 else:
331 |                     poison_clusters = [[]]
332 |                     clean_clusters = np.where(percentages >= 0)
333 |                     dict_i.update(suspicious=False)
334 |             else:
335 |                 # If relative size of the clusters is Not suspicious, we conclude it's not suspicious.
336 |                 dict_i.update(suspicious=False)
337 | 
338 |             report_class = {'class_' + str(i): dict_i}
339 | 
340 |             for p_id in poison_clusters[0]:
341 |                 summary_poison_clusters[i][p_id] = 1
342 |             for c_id in clean_clusters[0]:
343 |                 summary_poison_clusters[i][c_id] = 0
344 | 
345 |             assigned_clean = self.assign_class(clusters, clean_clusters, poison_clusters)
346 |             all_assigned_clean.append(assigned_clean)
347 |             report.update(report_class)
348 | 
349 |         return np.asarray(all_assigned_clean), summary_poison_clusters, report
350 | 


--------------------------------------------------------------------------------
/src/attacks/CW.py:
--------------------------------------------------------------------------------
  1 | """The CarliniWagnerL2 attack
  2 | """
  3 | # pylint: disable=missing-docstring
  4 | import logging
  5 | 
  6 | import numpy as np
  7 | from numpy.lib.npyio import save
  8 | import tensorflow as tf
  9 | from utils import *
 10 | from attacks.backdoor_generator import BackdoorGenerator
 11 | import math as m
 12 | from tqdm.gui import trange
 13 | 
 14 | MAX_ITER = 5
 15 | 
 16 | 
 17 | def create_logger(name):
 18 |   """
 19 |   Create a logger object with the given name.
 20 | 
 21 |   If this is the first time that we call this method, then initialize the
 22 |   formatter.
 23 |   """
 24 |   base = logging.getLogger("reforcement")
 25 |   if len(base.handlers) == 0:
 26 |     ch = logging.StreamHandler(stream=sys.stdout)
 27 |     formatter = logging.Formatter('[%(levelname)s %(asctime)s %(name)s] ' +
 28 |                                   '%(message)s')
 29 |     ch.setFormatter(formatter)
 30 |     base.addHandler(ch)
 31 | 
 32 |   return base
 33 | 
 34 | np_dtype = np.dtype('float32')
 35 | tf_dtype = tf.as_dtype('float32')
 36 | 
 37 | _logger = create_logger("cleverhans.attacks.carlini_wagner_l2")
 38 | _logger.setLevel(logging.INFO)
 39 | 
 40 | cw_params = {
 41 |              'batch_size': 1,
 42 |              'confidence': 10,
 43 |              'learning_rate': 0.1,
 44 |              'binary_search_steps': 5,
 45 |              'max_iterations': 300,
 46 |              'abort_early': True,
 47 |              'initial_const': 0.01,
 48 |              'clip_min': 0,
 49 |              'clip_max': 1,
 50 |              'targeted': True}
 51 | 
 52 | 
 53 | class CarliniWagnerL2(BackdoorGenerator):
 54 |     """
 55 |     This attack was originally proposed by Carlini and Wagner. It is an
 56 |     iterative attack that finds adversarial examples on many defenses that
 57 |     are robust to other attacks.
 58 |     Paper link: https://arxiv.org/abs/1608.04644
 59 | 
 60 |     At a high level, this attack is an iterative attack using Adam and
 61 |     a specially-chosen loss function to find adversarial examples with
 62 |     lower distortion than other attacks. This comes at the cost of speed,
 63 |     as this attack is often much slower than others.
 64 | 
 65 |     :param model: cleverhans.model.Model
 66 | 
 67 |     :param dtypestr: dtype of the data
 68 |     :param kwargs: passed through to super constructor
 69 |     """
 70 | 
 71 |     def __init__(self, model, param, args= cw_params,):
 72 |         """
 73 |         Note: the model parameter should be an instance of the
 74 |         cleverhans.model.Model abstraction provided by CleverHans.
 75 |         """
 76 |         # if not isinstance(model, Model):
 77 |         #   wrapper_warning_logits()
 78 |         #   model = CallableModelWrapper(model, 'logits')
 79 |         super(CarliniWagnerL2, self).__init__(model, param)
 80 |         
 81 |         self.feedable_kwargs = ('y', 'y_target')
 82 |         self.structural_kwargs = [
 83 |             'batch_size', 'confidence', 'targeted', 'learning_rate',
 84 |             'binary_search_steps', 'max_iterations', 'abort_early',
 85 |             'initial_const', 'clip_min', 'clip_max'
 86 |         ]
 87 |         self.sess = K.get_session()
 88 |         fixed = dict(
 89 |             (k, v) for k, v in args.items() if k in self.structural_kwargs)
 90 |         feedable_names = self.feedable_kwargs
 91 |         self.feedable = {k: v for k, v in args.items() if k in feedable_names}
 92 |         hash_key = tuple(sorted(fixed.items()))
 93 |         self.new_kwargs = dict(x for x in fixed.items())
 94 |         self.build_attack(**self.new_kwargs)
 95 |         self.source = int(self.param.get_conf('poison_label_source'))
 96 |         self.target = int(self.param.get_conf('poison_label_target'))
 97 | 
 98 |     def build_attack(self, **kwargs):
 99 |         """
100 |         Return a tensor that constructs adversarial examples for the given
101 |         input. Generate uses tf.py_func in order to operate over tensors.
102 | 
103 |         :param x: A tensor with the inputs.
104 |         :param kwargs: See `parse_params`
105 |         """
106 |         self.parse_params(**kwargs)
107 |         # preds = self.model.get_output_tensor()
108 |         # preds_max = tf.reduce_max(preds, 1, keepdims=True)
109 |         # original_predictions = tf.to_float(tf.equal(preds, preds_max))
110 |         # labels = tf.stop_gradient(original_predictions)
111 | 
112 |         self.CW = CWL2(self.param, self.model, self.batch_size, self.confidence,
113 |                     self.targeted, self.learning_rate,
114 |                     self.binary_search_steps, self.max_iterations,
115 |                     self.abort_early, self.initial_const, self.clip_min,
116 |                     self.clip_max, self.param.get_conf('num_classes'),
117 |                     (self.model.get_input_tensor().get_shape()[1:]))
118 | 
119 |         # def cw_wrap(x_val, y_val):
120 |         #     return np.array(, dtype=np.float32)
121 | 
122 |         # wrap = tf.py_func(cw_wrap, [x, labels], tf.float32)
123 |         # wrap.set_shape(x.get_shape())
124 | 
125 |         # return wrap
126 | 
127 | 
128 |     def attack(self, data, xi=30.0/255.0, **kwargs):
129 |         """
130 |         Generate adversarial examples and return them as a NumPy array.
131 |         Sub-classes *should not* implement this method unless they must
132 |         perform special handling of arguments.
133 |         :param x_val: A NumPy array with the original inputs.
134 |         :param **kwargs: optional parameters used by child classes.
135 |         :return: A NumPy array holding the adversarial examples.
136 |         """
137 | 
138 |         if self.sess is None:
139 |             raise ValueError("Cannot use `generate_np` when no `sess` was"
140 |                             " provided")
141 |         
142 |         num_selection  = 5000
143 |         x_val, y_val, _, _ = data.get_specific_label_clean_data(self.source)
144 |         x_val = x_val[:num_selection]
145 |         y_val = y_val[:num_selection]
146 |         # if hash_key not in self.graphs:
147 |         #     self.construct_graph(fixed, feedable, x_val, hash_key)
148 |         # else:
149 |         # # remove the None arguments, they are just left blank
150 |         #     for k in list(feedable.keys()):
151 |         #         if feedable[k] is None:
152 |         #             del feedable[k]
153 |         
154 |         # feed_dict = {self.input_tensor: x_val,self.labels_tensor: to_categorical(y_val)}
155 |         targets = np.zeros_like(y_val)
156 |         targets[:,self.target] = 1 
157 |         self.perturb =self.CW.attack(x_val, targets, xi=xi)
158 |         # for name in self.feedable:
159 |         #     feed_dict[new_kwargs[name]] = self.feedable[name]
160 |         
161 |         # return pert
162 | 
163 |     def parse_params(self,
164 |                     batch_size=1,
165 |                     confidence=10,
166 |                     learning_rate=5e-3,
167 |                     binary_search_steps=5,
168 |                     max_iterations=1000,
169 |                     abort_early=True,
170 |                     initial_const=1e-2,
171 |                     clip_min=0,
172 |                     clip_max=1,
173 |                     targeted=True):
174 |         """
175 |         :param y: (optional) A tensor with the true labels for an untargeted
176 |                 attack. If None (and y_target is None) then use the
177 |                 original labels the classifier assigns.
178 |         :param y_target: (optional) A tensor with the target labels for a
179 |                 targeted attack.
180 |         :param confidence: Confidence of adversarial examples: higher produces
181 |                         examples with larger l2 distortion, but more
182 |                         strongly classified as adversarial.
183 |         :param batch_size: Number of attacks to run simultaneously.
184 |         :param learning_rate: The learning rate for the attack algorithm.
185 |                             Smaller values produce better results but are
186 |                             slower to converge.
187 |         :param binary_search_steps: The number of times we perform binary
188 |                                     search to find the optimal tradeoff-
189 |                                     constant between norm of the purturbation
190 |                                     and confidence of the classification.
191 |         :param max_iterations: The maximum number of iterations. Setting this
192 |                             to a larger value will produce lower distortion
193 |                             results. Using only a few iterations requires
194 |                             a larger learning rate, and will produce larger
195 |                             distortion results.
196 |         :param abort_early: If true, allows early aborts if gradient descent
197 |                             is unable to make progress (i.e., gets stuck in
198 |                             a local minimum).
199 |         :param initial_const: The initial tradeoff-constant to use to tune the
200 |                             relative importance of size of the perturbation
201 |                             and confidence of classification.
202 |                             If binary_search_steps is large, the initial
203 |                             constant is not important. A smaller value of
204 |                             this constant gives lower distortion results.
205 |         :param clip_min: (optional float) Minimum input component value
206 |         :param clip_max: (optional float) Maximum input component value
207 |         """
208 | 
209 |         # ignore the y and y_target argument
210 |         self.batch_size = batch_size
211 |         self.confidence = confidence
212 |         self.learning_rate = learning_rate
213 |         self.binary_search_steps = binary_search_steps
214 |         self.max_iterations = max_iterations
215 |         self.abort_early = abort_early
216 |         self.initial_const = initial_const
217 |         self.clip_min = clip_min
218 |         self.clip_max = clip_max
219 |         self.targeted = targeted
220 | 
221 | def ZERO():
222 |     return np.asarray(0., dtype=np_dtype)
223 | 
224 | 
225 | class CWL2(object):
226 |     def __init__(self, param, model, batch_size, confidence, targeted,
227 |                  learning_rate, binary_search_steps, max_iterations,
228 |                  abort_early, initial_const, clip_min, clip_max, num_labels,
229 |                  shape):
230 |         """
231 |         Return a tensor that constructs adversarial examples for the given
232 |         input. Generate uses tf.py_func in order to operate over tensors.
233 | 
234 |         :param sess: a TF session.
235 |         :param model: a cleverhans.model.Model object.
236 |         :param batch_size: Number of attacks to run simultaneously.
237 |         :param confidence: Confidence of adversarial examples: higher produces
238 |                         examples with larger l2 distortion, but more
239 |                         strongly classified as adversarial.
240 |         :param targeted: boolean controlling the behavior of the adversarial
241 |                         examples produced. If set to False, they will be
242 |                         misclassified in any wrong class. If set to True,
243 |                         they will be misclassified in a chosen target class.
244 |         :param learning_rate: The learning rate for the attack algorithm.
245 |                             Smaller values produce better results but are
246 |                             slower to converge.
247 |         :param binary_search_steps: The number of times we perform binary
248 |                                     search to find the optimal tradeoff-
249 |                                     constant between norm of the purturbation
250 |                                     and confidence of the classification.
251 |         :param max_iterations: The maximum number of iterations. Setting this
252 |                             to a larger value will produce lower distortion
253 |                             results. Using only a few iterations requires
254 |                             a larger learning rate, and will produce larger
255 |                             distortion results.
256 |         :param abort_early: If true, allows early aborts if gradient descent
257 |                             is unable to make progress (i.e., gets stuck in
258 |                             a local minimum).
259 |         :param initial_const: The initial tradeoff-constant to use to tune the
260 |                             relative importance of size of the pururbation
261 |                             and confidence of classification.
262 |                             If binary_search_steps is large, the initial
263 |                             constant is not important. A smaller value of
264 |                             this constant gives lower distortion results.
265 |         :param clip_min: (optional float) Minimum input component value.
266 |         :param clip_max: (optional float) Maximum input component value.
267 |         :param num_labels: the number of classes in the model's output.
268 |         :param shape: the shape of the model's input tensor.
269 |         """
270 | 
271 |         self.param = param
272 |         self.sess = K.get_session()
273 |         self.TARGETED = targeted
274 |         self.LEARNING_RATE = learning_rate
275 |         self.MAX_ITERATIONS = max_iterations
276 |         self.BINARY_SEARCH_STEPS = binary_search_steps
277 |         self.ABORT_EARLY = abort_early
278 |         self.CONFIDENCE = confidence
279 |         self.initial_const = initial_const
280 |         self.batch_size = batch_size
281 |         self.clip_min = clip_min
282 |         self.clip_max = clip_max
283 |         self.model = model
284 | 
285 | 
286 | 
287 |         self.repeat = binary_search_steps >= 10
288 | 
289 |         self.shape = shape = tuple([batch_size] + list(shape))
290 | 
291 |         # the variable we're going to optimize over
292 |         modifier = tf.Variable(np.zeros(shape, dtype=np_dtype))
293 | 
294 |         # these are variables to be more efficient in sending data to tf
295 |         self.timg = tf.Variable(np.zeros(shape), dtype=tf_dtype, name='timg')
296 |         self.tlab = tf.Variable(np.zeros((batch_size, num_labels)),
297 |                                 dtype=tf_dtype,
298 |                                 name='tlab')
299 |         self.const = tf.Variable(np.zeros(batch_size),
300 |                                  dtype=tf_dtype,
301 |                                  name='const')
302 |         
303 |         # and here's what we use to assign them
304 |         self.assign_timg = tf.placeholder(tf_dtype, shape, name='assign_timg')
305 |         self.assign_tlab = tf.placeholder(tf_dtype, (batch_size, num_labels),
306 |                                           name='assign_tlab')
307 |         self.assign_const = tf.placeholder(tf_dtype, [batch_size],
308 |                                            name='assign_const')
309 | 
310 |         # the resulting instance, tanh'd to keep bounded from clip_min
311 |         # to clip_max
312 |         self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2
313 |         self.newimg = self.newimg * (clip_max - clip_min) + clip_min
314 |         
315 | 
316 |         # prediction BEFORE-SOFTMAX of the model
317 |         # model = Model(self.model.get_input_tensor(),[self.model.get_output_bef_softmax()])
318 |         # self.output = model([self.newimg, self.model.get_input_tensor()[1]])
319 | 
320 |         model = Model([self.model.get_input_tensor()],[self.model.get_output_bef_softmax()])
321 |         self.output = model(self.newimg)
322 | 
323 |         # distance to the input data
324 |         self.other = (tf.tanh(self.timg) + 1) / \
325 |             2 * (clip_max - clip_min) + clip_min
326 |         self.l2dist = tf.reduce_sum(tf.square(self.newimg - self.other),
327 |                                  list(range(1, len(shape))))
328 | 
329 |         # compute the probability of the label class versus the maximum other
330 |         real = tf.reduce_sum((self.tlab) * self.output, 1)
331 |         other = tf.reduce_max((1 - self.tlab) * self.output - self.tlab * 10000,
332 |                            1)
333 | 
334 |         if self.TARGETED:
335 |             # if targeted, optimize for making the other class most likely
336 |             loss1 = tf.maximum(ZERO(), other - real + self.CONFIDENCE)
337 |         else:
338 |             # if untargeted, optimize for making this class least likely.
339 |             loss1 = tf.maximum(ZERO(), real - other + self.CONFIDENCE)
340 | 
341 |         # sum up the losses
342 |         self.loss_out = self.l2dist + self.const * loss1
343 |         self.loss2 = tf.reduce_sum(self.l2dist)
344 |         self.loss1 = tf.reduce_sum(self.const * loss1)
345 |         self.loss = self.loss1 + self.loss2
346 | 
347 |         # Setup the adam optimizer and keep track of variables we're creating
348 |         start_vars = set(x.name for x in tf.global_variables())
349 |         optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
350 |         self.train = optimizer.minimize(self.loss, var_list=[modifier])
351 |         end_vars = tf.global_variables()
352 |         new_vars = [x for x in end_vars if x.name not in start_vars]
353 | 
354 |         # these are the variables to initialize when we run
355 |         self.setup = []
356 |         self.setup.append(self.timg.assign(self.assign_timg))
357 |         self.setup.append(self.tlab.assign(self.assign_tlab))
358 |         self.setup.append(self.const.assign(self.assign_const))
359 |         # self.setup.append(self.input_tensor.assign(self.new_imgs))
360 |         self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
361 | 
362 | 
363 |     def add_pert(self, img, pert):
364 |         return np.clip(img + pert, self.clip_min, self.clip_max)
365 |         
366 | 
367 |     def attack(self, imgs, targets, xi=15.0/255):
368 |         """
369 |         Perform the L_2 attack on the given instance for the given targets.
370 | 
371 |         If self.targeted is true, then the targets represents the target labels
372 |         If self.targeted is false, then targets are the original class labels
373 |         """
374 |         # origin_imgs = copy.deepcopy(imgs)
375 |         num_images = len(imgs)
376 |         imgs = np.array(imgs, dtype=np.float32)
377 |         l2_dis = np.zeros(num_images)
378 |         loss = np.zeros(num_images)
379 |         num_iter = np.zeros(num_images)
380 |         score = np.zeros(num_images)
381 |         # r = []
382 |         tot_pert = np.zeros_like(imgs[0])
383 |         index = np.arange(len(imgs))
384 |         
385 |         for it in range(MAX_ITER):
386 |             print('string iter', it)
387 |             np.random.shuffle(index)
388 |             for k,i in enumerate(index):
389 |                 _logger.debug(
390 |                     ("Running CWL2 attack on instance %s of %s", i, len(imgs)))
391 |                 if self.model.classifier.predict(self.add_pert(imgs[i:i+1],tot_pert)).argmax(axis=1)[0] == targets[i].argmax(axis=0):
392 |                     continue
393 | 
394 | 
395 |                 adv_imgs, l2_dis[i:i+1],loss[i:i+1], \
396 |                     score[i:i+1],num_iter[i:i+1] = \
397 |                         self.attack_batch(self.add_pert(imgs[i:i+1], tot_pert), targets[i:i + 1])
398 |                 # r.extend(adv_imgs)
399 |                 pert = np.squeeze(adv_imgs - self.add_pert(imgs[i], tot_pert))
400 |                 tot_pert = self.proj_lp(tot_pert + pert, xi=xi, p=np.inf)
401 |                 print('>> k = ', k, ', img_idx = ', i, ', pass #', it, "size =", np.mean(abs(tot_pert)))
402 |                 tot_pert *= 0.998
403 | 
404 |             # imgs = origin_imgs + tot_pert
405 |             fooling_rate = 0
406 |             for i in range(0, len(imgs), 128): 
407 |                 up_i = min(i + 128, len(imgs))
408 |                 preds = self.model.classifier.predict(self.add_pert(imgs[i:up_i], tot_pert)).argmax(axis=1)
409 |                 fooling_rate += np.sum(preds == targets[i: up_i].argmax(axis=1))
410 |                 # print(i, np.sum(preds == targets[i:up_i].argmax(axis=1)), fooling_rate, len(preds))
411 |             fooling_rate /=( 1.0 * len(imgs))
412 |             print("fool rate is", fooling_rate)
413 |             if fooling_rate > 0.8:
414 |                 break
415 |         
416 |         # r = np.array(r)
417 |         # save_name = '_'.join([self.param.get_conf('model_prefix'), get_date(), postfix])
418 |         # save_pkl = os.path.join(self.param.get_conf('perturbation_dir'), save_name + '.pkl')
419 | 
420 |         # with open(save_pkl, 'wb') as f:
421 |         #     pickle.dump(tot_pert, f)
422 | 
423 |         return tot_pert
424 | 
425 |     def attack_batch(self, imgs, labs):
426 |         """
427 |     Run the attack on a batch of instance and labels.
428 |     """
429 |         def compare(x, y):
430 |             if not isinstance(x, (float, int, np.int64)):
431 |                 x = np.copy(x)
432 |                 if self.TARGETED:
433 |                     x[y] -= self.CONFIDENCE
434 |                 else:
435 |                     x[y] += self.CONFIDENCE
436 |                 x = np.argmax(x)
437 |             if self.TARGETED:
438 |                 return x == y
439 |             else:
440 |                 return x != y
441 | 
442 |         batch_size = self.batch_size
443 | 
444 |         oimgs = np.clip(imgs, self.clip_min, self.clip_max)
445 | 
446 |         # re-scale instances to be within range [0, 1]
447 |         imgs = (imgs - self.clip_min) / (self.clip_max - self.clip_min)
448 |         imgs = np.clip(imgs, 0, 1)
449 |         # now convert to [-1, 1]
450 |         imgs = (imgs * 2) - 1
451 |         # convert to tanh-space
452 |         imgs = np.arctanh(imgs * .999999)
453 | 
454 |         # set the lower and upper bounds accordingly
455 |         lower_bound = np.zeros(batch_size)
456 |         CONST = np.ones(batch_size) * self.initial_const
457 |         upper_bound = np.ones(batch_size) * 1e10
458 | 
459 |         # placeholders for the best l2, score, and instance attack found so far
460 |         o_bestl2 = [1e10] * batch_size
461 |         o_bestscore = [-1] * batch_size
462 |         o_iter = [0] * batch_size
463 |         o_bestloss = [1e10] * batch_size
464 |         o_bestattack = np.copy(oimgs)
465 | 
466 |         for outer_step in range(self.BINARY_SEARCH_STEPS):
467 |             # print("search iteration ",outer_step)
468 |             # completely reset adam's internal state.
469 |             self.sess.run(self.init)
470 |             batch = imgs[:batch_size]
471 |             batchlab = labs[:batch_size]
472 | 
473 |             bestl2 = [1e10] * batch_size
474 |             bestscore = [-1] * batch_size
475 |             _logger.debug("  Binary search step %s of %s", outer_step,
476 |                           self.BINARY_SEARCH_STEPS)
477 | 
478 |             # The last iteration (if we run many steps) repeat the search once.
479 |             if self.repeat and outer_step == self.BINARY_SEARCH_STEPS - 1:
480 |                 CONST = upper_bound
481 | 
482 |             # set the variables so that we don't have to send them over again
483 |             self.sess.run(
484 |                 self.setup, {
485 |                     self.assign_timg: batch,
486 |                     self.assign_tlab: batchlab,
487 |                     self.assign_const: CONST
488 |                 })
489 | 
490 |             prev = 1e6
491 |             iter_num = 0
492 |             for iteration in range(self.MAX_ITERATIONS):
493 |                 # perform the attack
494 |                 _, l, l_o, l2s, scores, nimg = self.sess.run([
495 |                     self.train, self.loss, self.loss_out, self.l2dist, self.output,
496 |                     self.newimg],
497 |                 # feed_dict={self.model.get_input_tensor()[1]:np.random.randint(0,2,size=(len(batch),7))}
498 |                 )
499 |                 
500 |                 if iteration % ((self.MAX_ITERATIONS // 10) or 1) == 0:
501 |                     _logger.debug(
502 |                         ("    Iteration {} of {}: loss={:.3g} " +
503 |                          "l2={:.3g} f={:.3g}").format(iteration,
504 |                                                       self.MAX_ITERATIONS, l,
505 |                                                       np.mean(l2s),
506 |                                                       np.mean(scores)))
507 | 
508 |                 # check if we should abort search if we're getting nowhere.
509 |                 if self.ABORT_EARLY and \
510 |                    iteration % ((self.MAX_ITERATIONS // 10) or 1) == 0:
511 |                     if l > prev * .9999:
512 |                         msg = "    Failed to make progress; stop early"
513 |                         _logger.debug(msg)
514 |                         break
515 |                     prev = l
516 | 
517 |                 # adjust the best result found so far
518 |                 for e, (l2, sc, ii, l_oi) in enumerate(zip(l2s, scores, nimg, l_o)):
519 |                     lab = np.argmax(batchlab[e])
520 |                     if l2 < bestl2[e] and compare(sc, lab):
521 |                         bestl2[e] = l2
522 |                         bestscore[e] = np.argmax(sc)
523 |                     if l2 < o_bestl2[e] and compare(sc, lab):
524 |                         o_bestl2[e] = l2
525 |                         o_bestscore[e] = np.argmax(sc)
526 |                         o_bestattack[e] = ii
527 |                         o_iter[e] = iter_num
528 |                         o_bestloss[e] = l_oi
529 |                 iter_num += 1
530 |             # adjust the constant as needed
531 |             for e in range(batch_size):
532 |                 if compare(bestscore[e], np.argmax(batchlab[e])) and \
533 |                    bestscore[e] != -1:
534 |                     # success, divide const by two
535 |                     upper_bound[e] = min(upper_bound[e], CONST[e])
536 |                     if upper_bound[e] < 1e9:
537 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
538 |                 else:
539 |                     # failure, either multiply by 10 if no solution found yet
540 |                     #          or do binary search with the known upper bound
541 |                     lower_bound[e] = max(lower_bound[e], CONST[e])
542 |                     if upper_bound[e] < 1e9:
543 |                         CONST[e] = (lower_bound[e] + upper_bound[e]) / 2
544 |                     else:
545 |                         CONST[e] *= 10
546 |             _logger.debug("  Successfully generated adversarial examples " +
547 |                           "on {} of {} instances.".format(
548 |                               sum(upper_bound < 1e9), batch_size))
549 |             o_bestl2 = np.array(o_bestl2)
550 |             mean = np.mean(np.sqrt(o_bestl2[o_bestl2 < 1e9]))
551 |             _logger.debug("   Mean successful distortion: {:.4g}".format(mean))
552 | 
553 |         # return the best solution found
554 |         o_bestl2 = np.array(o_bestl2)
555 |         return o_bestattack, o_bestl2, o_bestloss, o_bestscore, o_iter
556 | 
557 | 
558 |     def proj_lp(self, v, xi, p):
559 | 
560 |         # Project on the lp ball centered at 0 and of radius xi
561 | 
562 |         # SUPPORTS only p = 2 and p = Inf for now
563 |         # print('xi is', xi)
564 |         if p == 2:
565 |             v = v * min(1, xi / np.linalg.norm(v.flatten(1)))
566 |             # v = v / np.linalg.norm(v.flatten(1)) * xi
567 |         elif p == np.inf:
568 |             v = np.sign(v) * np.minimum(abs(v), xi)
569 |         else:
570 |             raise ValueError('Values of p different from 2 and Inf are currently not supported...')
571 | 
572 |         return v
573 | 
574 | 


--------------------------------------------------------------------------------