├── powerline_pattern.npy ├── models ├── __pycache__ │ ├── fcn.cpython-39.pyc │ └── noise_gan.cpython-39.pyc ├── noise_gan.py ├── noise_gan_expdim.py ├── fcn.py ├── noise_gan_3L.py └── resnet.py ├── utils ├── __pycache__ │ ├── utils.cpython-39.pyc │ └── constants.cpython-39.pyc ├── grad_cam.py ├── constants.py └── utils.py ├── requirements.txt ├── README.md └── main.py /powerline_pattern.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/powerline_pattern.npy -------------------------------------------------------------------------------- /models/__pycache__/fcn.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/models/__pycache__/fcn.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/utils/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/constants.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/utils/__pycache__/constants.cpython-39.pyc -------------------------------------------------------------------------------- /models/__pycache__/noise_gan.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/models/__pycache__/noise_gan.cpython-39.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn==0.24.1 2 | matplotlib==3.3.4 3 | keras-nightly==2.5.0.dev2021032900 4 | h5py==3.1.0 5 | pandas==1.2.4 6 | numpy==1.19.5 7 | scipy==1.6.2 8 | tensorflow==2.5.0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TSBA: Time Series Backdoor Attack 2 | 3 | This is the code for the [SaTML'23 paper](https://arxiv.org/pdf/2211.07915.pdf) "Backdoor Attacks on Time Series: A Generative Approach" by Yujing Jiang, Xingjun Ma, Sarah Monazam Erfani, and James Bailey. 4 | 5 | ## Prerequisites 6 | * Python (3.9.7) 7 | * Pytorch (1.10.0) 8 | * CUDA (with 4 GPUs) 9 | 10 | ## Data 11 | The data used in this project comes from two sources: 12 | * The [UCR/UEA archive](http://timeseriesclassification.com/TSC.zip), which contains the 85 **univariate** time series datasets. 13 | * The [MTS archive](https://drive.google.com/drive/folders/1FgIPN3uUT-b1tiHG-ONi5B31iPN3BMNy?usp=sharing), which contains the 13 **multivariate** time series datasets. 14 | 15 | ## How to run 16 | 17 | To run the clean model: 18 | ``` 19 | python main.py run_baseline 20 | ``` 21 | 22 | To run the vanilla backdoor method: 23 | ``` 24 | python main.py run_backdoor vanilla 25 | ``` 26 | 27 | To run the static noise backdoor method: 28 | ``` 29 | python main.py run_backdoor powerline 30 | ``` 31 | 32 | To run our proposed TSBA: 33 | ``` 34 | python main.py run_backdoor generator 35 | ``` 36 | 37 | To test the generator from trained TSBA model: 38 | ``` 39 | python main.py run_backdoor generative_test 40 | ``` 41 | 42 | ## Reference 43 | For technical details and full experimental results, please check [the paper](https://arxiv.org/pdf/2211.07915.pdf). 44 | ``` 45 | @inproceedings{xxxxx, 46 | title={Backdoor Attacks on Time Series: A Generative Approach}, 47 | author={Jiang, Yujing and Ma, Xingjun and Erfani, Sarah Monazam and Bailey, James}, 48 | } 49 | ``` 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /utils/grad_cam.py: -------------------------------------------------------------------------------- 1 | 2 | ## make_gradcam_heatmap(x_train_backdoor, self.model) 3 | ## import matplotlib.pyplot as plt 4 | 5 | def make_gradcam_heatmap(dataset, model, last_conv_layer_name='activation_8', pred_index=None): 6 | # First, we create a model that maps the input image to the activations 7 | # of the last conv layer as well as the output predictions 8 | img_array = dataset[0:1,:] 9 | 10 | grad_model = tf.keras.models.Model( 11 | [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] 12 | ) 13 | 14 | # Then, we compute the gradient of the top predicted class for our input image 15 | # with respect to the activations of the last conv layer 16 | with tf.GradientTape() as tape: 17 | last_conv_layer_output, preds = grad_model(img_array) 18 | if pred_index is None: 19 | pred_index = tf.argmax(preds[0]) 20 | class_channel = preds[:, pred_index] 21 | 22 | # This is the gradient of the output neuron (top predicted or chosen) 23 | # with regard to the output feature map of the last conv layer 24 | grads = tape.gradient(class_channel, last_conv_layer_output) 25 | 26 | # This is a vector where each entry is the mean intensity of the gradient 27 | # over a specific feature map channel 28 | pooled_grads = tf.reduce_mean(grads, axis=(0, 1)) 29 | 30 | # We multiply each channel in the feature map array 31 | # by "how important this channel is" with regard to the top predicted class 32 | # then sum all the channels to obtain the heatmap class activation 33 | last_conv_layer_output = last_conv_layer_output[0] 34 | heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis] 35 | heatmap = tf.squeeze(heatmap) 36 | 37 | # For visualization purpose, we will also normalize the heatmap between 0 & 1 38 | heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap) 39 | return heatmap.numpy() -------------------------------------------------------------------------------- /utils/constants.py: -------------------------------------------------------------------------------- 1 | UNIVARIATE_DATASET_NAMES = ['BirdChicken', 'ECG5000', 'Earthquakes', 'ElectricDevices', 'Haptics', 'PowerCons', 2 | 'ShapeletSim', 'Wine'] 3 | 4 | UNIVARIATE_DATASET_NAMES_2018 = ['BirdChicken', 'ECG5000', 'Earthquakes', 'ElectricDevices', 'Haptics', 'PowerCons', 5 | 'ShapeletSim', 'Wine'] 6 | 7 | MTS_DATASET_NAMES = ['ArabicDigits', 'ECG', 'KickvsPunch', 'NetFlow', 'UWave'] 8 | 9 | UNIVARIATE_TEST = ['BirdChicken', #Image 10 | 'ECG5000', #ECG 11 | 'Earthquakes', #Sensor 12 | 'ElectricDevices', #Device 13 | 'Haptics', #Motion 14 | 'PowerCons', #Power 15 | 'ShapeletSim', #Simulated 16 | 'Wine' #Spectro 17 | ] 18 | 19 | MTS_TEST = ['ArabicDigits', 20 | 'ECG', 21 | 'KickvsPunch', 22 | 'NetFlow', 23 | 'UWave'] 24 | 25 | ITERATIONS = 1 # nb of random runs for random initializations 26 | 27 | ARCHIVE_NAMES = ['UCRArchive_2018', 'mts_archive'] 28 | 29 | dataset_names_for_archive = {'UCRArchive_2018': UNIVARIATE_TEST, 30 | 'mts_archive': MTS_TEST} 31 | 32 | CLASSIFIERS = ['fcn', 'resnet'] 33 | 34 | dataset_types = {'ElectricDevices': 'DEVICE', 'FordB': 'SENSOR', 35 | 'FordA': 'SENSOR', 'NonInvasiveFatalECG_Thorax2': 'ECG', 36 | 'NonInvasiveFatalECG_Thorax1': 'ECG', 'PhalangesOutlinesCorrect': 'IMAGE', 37 | 'HandOutlines': 'IMAGE', 'StarLightCurves': 'SENSOR', 38 | 'wafer': 'SENSOR', 'Two_Patterns': 'SIMULATED', 39 | 'UWaveGestureLibraryAll': 'MOTION', 'uWaveGestureLibrary_Z': 'MOTION', 40 | 'uWaveGestureLibrary_Y': 'MOTION', 'uWaveGestureLibrary_X': 'MOTION', 41 | 'Strawberry': 'SPECTRO', 'ShapesAll': 'IMAGE', 42 | 'ProximalPhalanxOutlineCorrect': 'IMAGE', 'MiddlePhalanxOutlineCorrect': 'IMAGE', 43 | 'DistalPhalanxOutlineCorrect': 'IMAGE', 'FaceAll': 'IMAGE', 44 | 'ECG5000': 'ECG', 'SwedishLeaf': 'IMAGE', 'ChlorineConcentration': 'SIMULATED', 45 | '50words': 'IMAGE', 'ProximalPhalanxTW': 'IMAGE', 'ProximalPhalanxOutlineAgeGroup': 'IMAGE', 46 | 'MiddlePhalanxOutlineAgeGroup': 'IMAGE', 'DistalPhalanxTW': 'IMAGE', 47 | 'DistalPhalanxOutlineAgeGroup': 'IMAGE', 'MiddlePhalanxTW': 'IMAGE', 48 | 'Cricket_Z': 'MOTION', 'Cricket_Y': 'MOTION', 49 | 'Cricket_X': 'MOTION', 'Adiac': 'IMAGE', 50 | 'MedicalImages': 'IMAGE', 'SmallKitchenAppliances': 'DEVICE', 51 | 'ScreenType': 'DEVICE', 'RefrigerationDevices': 'DEVICE', 52 | 'LargeKitchenAppliances': 'DEVICE', 'Earthquakes': 'SENSOR', 53 | 'yoga': 'IMAGE', 'synthetic_control': 'SIMULATED', 54 | 'WordsSynonyms': 'IMAGE', 'Computers': 'DEVICE', 55 | 'InsectWingbeatSound': 'SENSOR', 'Phoneme': 'SENSOR', 56 | 'OSULeaf': 'IMAGE', 'FacesUCR': 'IMAGE', 57 | 'WormsTwoClass': 'MOTION', 'Worms': 'MOTION', 58 | 'FISH': 'IMAGE', 'Haptics': 'MOTION', 59 | 'Epilepsy': 'HAR', 'Ham': 'SPECTRO', 60 | 'Plane': 'SENSOR', 'InlineSkate': 'MOTION', 61 | 'Trace': 'SENSOR', 'ECG200': 'ECG', 62 | 'Lighting7': 'SENSOR', 'ItalyPowerDemand': 'SENSOR', 63 | 'Herring': 'IMAGE', 'Lighting2': 'SENSOR', 64 | 'Car': 'SENSOR', 'Meat': 'SPECTRO', 65 | 'Wine': 'SPECTRO', 'MALLAT': 'SIMULATED', 66 | 'Gun_Point': 'MOTION', 'CinC_ECG_torso': 'ECG', 67 | 'ToeSegmentation1': 'MOTION', 'ToeSegmentation2': 'MOTION', 68 | 'ArrowHead': 'IMAGE', 'OliveOil': 'SPECTRO', 69 | 'Beef': 'SPECTRO', 'CBF': 'SIMULATED', 70 | 'Coffee': 'SPECTRO', 'SonyAIBORobotSurfaceII': 'SENSOR', 71 | 'Symbols': 'IMAGE', 'FaceFour': 'IMAGE', 72 | 'ECGFiveDays': 'ECG', 'TwoLeadECG': 'ECG', 73 | 'BirdChicken': 'IMAGE', 'BeetleFly': 'IMAGE', 74 | 'ShapeletSim': 'SIMULATED', 'MoteStrain': 'SENSOR', 75 | 'SonyAIBORobotSurface': 'SENSOR', 'DiatomSizeReduction': 'IMAGE'} 76 | 77 | themes_colors = {'IMAGE': 'red', 'SENSOR': 'blue', 'ECG': 'green', 78 | 'SIMULATED': 'yellow', 'SPECTRO': 'orange', 79 | 'MOTION': 'purple', 'DEVICE': 'gray'} 80 | -------------------------------------------------------------------------------- /models/noise_gan.py: -------------------------------------------------------------------------------- 1 | # Noise-GAN model 2 | import tensorflow.keras as keras 3 | from keras import backend as K 4 | import tensorflow as tf 5 | import numpy as np 6 | import time 7 | 8 | from utils.utils import save_logs 9 | from utils.utils import calculate_metrics 10 | 11 | from models.fcn import callback_val_ASR 12 | 13 | 14 | class Classifier_Noise_GAN: 15 | def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None): 16 | self.output_directory = output_directory 17 | if build == True: 18 | self.model = self.build_model(input_shape, c_loss) 19 | self.model.summary() 20 | self.verbose = verbose 21 | self.model.save_weights(self.output_directory + 'generator_init.hdf5') 22 | return 23 | 24 | def build_model(self, input_shape, c_loss=None): 25 | input_layer = keras.layers.Input(input_shape) 26 | 27 | conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer) 28 | conv1 = keras.layers.BatchNormalization()(conv1) 29 | conv1 = keras.layers.Activation(activation='relu')(conv1) 30 | 31 | conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1) 32 | conv2 = keras.layers.BatchNormalization()(conv2) 33 | conv2 = keras.layers.Activation('relu')(conv2) 34 | 35 | fc1 = keras.layers.Dense(256, activation='relu')(conv2) 36 | fc2 = keras.layers.Dense(input_shape[1], activation='tanh')(fc1) 37 | output_layer = fc2 38 | model = keras.models.Model(inputs=input_layer, outputs=output_layer) 39 | 40 | return model 41 | 42 | def clip_add(self, pattern, ori_data): 43 | return (1 + pattern* 0.1) * ori_data 44 | 45 | def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True): 46 | final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0])) 47 | full_model = keras.models.Model(inputs=self.model.input, outputs=final_out) 48 | backdoor_clf.model.trainable = bd_trainable 49 | self.model.trainable = gen_trainable 50 | full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), 51 | metrics=['accuracy']) 52 | return full_model 53 | 54 | def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 55 | poison_rate, clean_label): 56 | x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target, 57 | poison_rate=1.0, clean_label=clean_label, 58 | one_hot=True) 59 | x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target, 60 | poison_rate=1.0, clean_label=clean_label, 61 | one_hot=True) 62 | 63 | print(backdoor_clf.model.evaluate(x_test, y_test)[1]) 64 | 65 | x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target, 66 | poison_rate, clean_label, one_hot=True, 67 | only_target=True) 68 | 69 | # mm = keras.models.Model(inputs=self.model.input, outputs=self.model.outputs[0]) 70 | # xx = x_train[0:1,:,:] 71 | # np.save('sample1.npy', xx) 72 | # np.save('pattern1.npy', xx * mm(xx)) 73 | for e in range(1, 50): 74 | print("Epoch:", e) 75 | ''' 76 | for layer in backdoor_clf.model.layers: 77 | layer.trainable = False 78 | ''' 79 | 80 | # Train noise generator 81 | full_model = self.get_full_model(backdoor_clf, True, False) 82 | #K.set_value(full_model.optimizer.learning_rate, 0.001) 83 | 84 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=20) 85 | 86 | # Train backdoor classifier 87 | full_model = self.get_full_model(backdoor_clf, False, True) 88 | 89 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=5) 90 | 91 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 92 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 93 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 94 | print('#' * 10 + ' Now ' + '#' * 10) 95 | print("Clean acc.:", val_clean_acc) 96 | print("ASR:", val_ASR) 97 | print("ASR_train:", val_ASR_train) 98 | print('#' * 10 + ' Now ' + '#' * 10) 99 | 100 | #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.005) 101 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(4*e**0.3)) 102 | 103 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 104 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 105 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 106 | print('#' * 20) 107 | print("Clean acc.:", val_clean_acc) 108 | print("ASR:", val_ASR) 109 | print("ASR_train:", val_ASR_train) 110 | print('#' * 20) 111 | self.model.save_weights(self.output_directory + 'generator_save/' + \ 112 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 113 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \ 114 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 115 | 116 | return 117 | 118 | def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances, 119 | y_target=0, poison_rate=0.1, clean_label=False): 120 | self.process_instances = process_instances 121 | 122 | print("Pre-training...") 123 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=10) 124 | self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 125 | poison_rate, clean_label) 126 | self.model.save_weights(self.output_directory + 'generator_final.hdf5') 127 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5') 128 | 129 | return 130 | -------------------------------------------------------------------------------- /models/noise_gan_expdim.py: -------------------------------------------------------------------------------- 1 | # Noise-GAN model 2 | import tensorflow.keras as keras 3 | from keras import backend as K 4 | import tensorflow as tf 5 | import numpy as np 6 | import time 7 | 8 | from utils.utils import save_logs 9 | from utils.utils import calculate_metrics 10 | 11 | from models.fcn import callback_val_ASR 12 | 13 | 14 | class Classifier_Noise_GAN: 15 | def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None): 16 | self.output_directory = output_directory 17 | if build == True: 18 | self.model = self.build_model(input_shape, c_loss) 19 | self.model.summary() 20 | self.verbose = verbose 21 | self.model.save_weights(self.output_directory + 'generator_init.hdf5') 22 | return 23 | 24 | def build_model(self, input_shape, c_loss=None): 25 | input_layer = keras.layers.Input(tuple(list(input_shape) + [1])) 26 | 27 | conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer) 28 | conv1 = keras.layers.BatchNormalization()(conv1) 29 | conv1 = keras.layers.Activation(activation='relu')(conv1) 30 | 31 | conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1) 32 | conv2 = keras.layers.BatchNormalization()(conv2) 33 | conv2 = keras.layers.Activation('relu')(conv2) 34 | 35 | conv3 = keras.layers.Conv1D(filters=512, kernel_size=15, padding='same', name='conv3')(conv2) 36 | conv3 = keras.layers.BatchNormalization()(conv3) 37 | conv3 = keras.layers.Activation('relu')(conv3) 38 | 39 | fc1 = keras.layers.Dense(256, activation='relu')(conv3) 40 | fc2 = keras.layers.Dense(1, activation='relu')(fc1) 41 | output_layer = fc2 42 | model = keras.models.Model(inputs=input_layer, outputs=output_layer) 43 | 44 | return model 45 | 46 | def clip_add(self, pattern, ori_data): 47 | return (1 + pattern* 0.2) * ori_data 48 | 49 | def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True): 50 | final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0])) 51 | full_model = keras.models.Model(inputs=self.model.input, outputs=final_out) 52 | backdoor_clf.model.trainable = bd_trainable 53 | self.model.trainable = gen_trainable 54 | full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), 55 | metrics=['accuracy']) 56 | return full_model 57 | 58 | def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 59 | poison_rate, clean_label): 60 | x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target, 61 | poison_rate=1.0, clean_label=clean_label, 62 | one_hot=True) 63 | x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target, 64 | poison_rate=1.0, clean_label=clean_label, 65 | one_hot=True) 66 | 67 | print(backdoor_clf.model.evaluate(x_test, y_test)[1]) 68 | 69 | x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target, 70 | poison_rate, clean_label, one_hot=True, 71 | only_target=True) 72 | for e in range(1, 40): 73 | print("Epoch:", e) 74 | ''' 75 | for layer in backdoor_clf.model.layers: 76 | layer.trainable = False 77 | ''' 78 | 79 | # Train noise generator 80 | full_model = self.get_full_model(backdoor_clf, True, False) 81 | #K.set_value(full_model.optimizer.learning_rate, 0.01) 82 | 83 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=10) 84 | 85 | # Train backdoor classifier 86 | full_model = self.get_full_model(backdoor_clf, False, True) 87 | 88 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=10) 89 | 90 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 91 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 92 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 93 | print('!' * 10 + ' In Progress ' + '!' * 10) 94 | print("Clean acc.:", val_clean_acc) 95 | print("ASR:", val_ASR) 96 | print("ASR_train:", val_ASR_train) 97 | print('!' * 10 + ' In Progress ' + '!' * 10) 98 | 99 | #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.002) 100 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(4*e**0.3)) 101 | 102 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 103 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 104 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 105 | print('#' * 20) 106 | print("Clean acc.:", val_clean_acc) 107 | print("ASR:", val_ASR) 108 | print("ASR_train:", val_ASR_train) 109 | print('#' * 20) 110 | self.model.save_weights(self.output_directory + 'generator_save/' + \ 111 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 112 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \ 113 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 114 | 115 | return 116 | 117 | def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances, 118 | y_target=0, poison_rate=0.1, clean_label=False): 119 | self.process_instances = process_instances 120 | 121 | print("Pre-training...") 122 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=100) 123 | self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 124 | poison_rate, clean_label) 125 | self.model.save_weights(self.output_directory + 'generator_final.hdf5') 126 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5') 127 | 128 | return 129 | -------------------------------------------------------------------------------- /models/fcn.py: -------------------------------------------------------------------------------- 1 | # FCN model 2 | # when tuning start with learning rate->mini_batch_size -> 3 | # momentum-> #hidden_units -> # learning_rate_decay -> #layers 4 | import tensorflow.keras as keras 5 | import tensorflow as tf 6 | import numpy as np 7 | import time 8 | 9 | from utils.utils import save_logs 10 | from utils.utils import calculate_metrics 11 | 12 | 13 | class callback_val_ASR(tf.keras.callbacks.Callback): 14 | def __init__(self, x_ASR, y_ASR, x_ASR_train, y_ASR_train): 15 | self.x_ASR = x_ASR 16 | self.y_ASR = y_ASR 17 | self.x_ASR_train = x_ASR_train 18 | self.y_ASR_train = y_ASR_train 19 | 20 | def on_epoch_end(self, epoch, logs=None): 21 | val_ASR = self.model.evaluate(self.x_ASR, self.y_ASR, verbose=0) 22 | val_ASR_train = self.model.evaluate(self.x_ASR_train, self.y_ASR_train, verbose=0) 23 | logs['ASR'] = val_ASR[1] 24 | print('ASR_test:', val_ASR[1]) 25 | print('ASR_train:', val_ASR_train[1]) 26 | 27 | 28 | class Classifier_FCN: 29 | def __init__(self, output_directory, input_shape, nb_classes, verbose=False, build=True, c_loss=None): 30 | self.output_directory = output_directory 31 | if build == True: 32 | self.model = self.build_model(input_shape, nb_classes, c_loss) 33 | if (verbose == True): 34 | self.model.summary() 35 | self.verbose = verbose 36 | self.model.save_weights(self.output_directory + 'model_init.hdf5') 37 | return 38 | 39 | def build_model(self, input_shape, nb_classes, c_loss=None): 40 | input_layer = keras.layers.Input(input_shape) 41 | 42 | conv1 = keras.layers.Conv1D(filters=128, kernel_size=8, padding='same')(input_layer) 43 | conv1 = keras.layers.BatchNormalization()(conv1) 44 | conv1 = keras.layers.Activation(activation='relu')(conv1) 45 | 46 | conv2 = keras.layers.Conv1D(filters=256, kernel_size=5, padding='same')(conv1) 47 | conv2 = keras.layers.BatchNormalization()(conv2) 48 | conv2 = keras.layers.Activation('relu')(conv2) 49 | 50 | conv3 = keras.layers.Conv1D(128, kernel_size=3, padding='same')(conv2) 51 | conv3 = keras.layers.BatchNormalization()(conv3) 52 | conv3 = keras.layers.Activation('relu')(conv3) 53 | 54 | gap_layer = keras.layers.GlobalAveragePooling1D()(conv3) 55 | 56 | output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer) 57 | 58 | model = keras.models.Model(inputs=input_layer, outputs=output_layer) 59 | if c_loss is None: 60 | model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), 61 | metrics=['accuracy']) 62 | else: 63 | model.compile(loss=c_loss, optimizer=keras.optimizers.Adam(), metrics=['accuracy']) 64 | 65 | reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, 66 | min_lr=0.0001) 67 | 68 | file_path = self.output_directory + 'best_model.hdf5' 69 | 70 | model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss', 71 | save_best_only=True) 72 | 73 | self.callbacks = [reduce_lr, model_checkpoint] 74 | 75 | return model 76 | 77 | def _fit_model(self, x_train, y_train, x_val, y_val, eval_val_ASR, batch_size, nb_epochs): 78 | if not tf.test.is_gpu_available: 79 | print('error') 80 | exit() 81 | # x_val and y_val are only used to monitor the test loss and NOT for training 82 | 83 | mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) 84 | 85 | start_time = time.time() 86 | 87 | hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, 88 | verbose=self.verbose, validation_data=(x_val, y_val), 89 | callbacks=self.callbacks + eval_val_ASR) 90 | 91 | duration = time.time() - start_time 92 | 93 | self.model.save(self.output_directory + 'last_model.hdf5') 94 | 95 | model = keras.models.load_model(self.output_directory + 'best_model.hdf5') 96 | 97 | return model, hist, duration 98 | 99 | def fit(self, x_train, y_train, x_val, y_val, y_test_classlabel, batch_size=16, nb_epochs=50): 100 | 101 | model, hist, duration = self._fit_model(x_train, y_train, x_val, y_val, [], batch_size, nb_epochs) 102 | 103 | y_pred = model.predict(x_val) 104 | 105 | # convert the predicted from binary to integer 106 | y_pred = np.argmax(y_pred, axis=1) 107 | 108 | save_logs(self.output_directory, hist, y_pred, y_test_classlabel, duration, calc_ASR=False) 109 | 110 | keras.backend.clear_session() 111 | 112 | def fit_backdoor(self, x_train, y_train, x_val, y_val, 113 | pattern_generator, y_target=0, poison_rate=0.1, 114 | clean_label=True, batch_size=16): 115 | y_val_classlabel = np.argmax(y_val, axis=1) 116 | 117 | x_ASR, y_ASR = pattern_generator(x_val, y_val, y_target, poison_rate=1.0, 118 | clean_label=False, one_hot=True, exclude_target=True) 119 | x_ASR_train, y_ASR_train = pattern_generator(x_train, y_train, y_target, poison_rate=1.0, 120 | clean_label=False, one_hot=True, exclude_target=True) 121 | eval_val_ASR = callback_val_ASR(x_ASR, y_ASR, x_ASR_train, y_ASR_train) 122 | 123 | for e in range(500): 124 | x_train_backdoor, y_train_backdoor = pattern_generator(x_train, y_train, y_target, 125 | poison_rate=poison_rate, clean_label=clean_label, 126 | one_hot=True) 127 | print("Epoch:", e + 1) 128 | model, hist, duration = self._fit_model(x_train_backdoor, y_train_backdoor, 129 | x_val, y_val, [eval_val_ASR], batch_size, nb_epochs=2) 130 | 131 | y_pred = model.predict(x_val) 132 | 133 | # convert the predicted from binary to integer 134 | y_pred_classlabel = np.argmax(y_pred, axis=1) 135 | 136 | # Backdoor attack 137 | x_val_backdoor, y_val_backdoor = pattern_generator(x_val, y_val, y_target, 138 | poison_rate=1.0, clean_label=False, one_hot=False) 139 | y_pred_backdoor = np.argmax(model.predict(x_val_backdoor), axis=1) 140 | 141 | save_logs(self.output_directory, hist, y_pred_classlabel, y_val_classlabel, duration, 142 | calc_ASR=True, y_pred_backdoor=y_pred_backdoor, y_target=y_val_backdoor) 143 | 144 | keras.backend.clear_session() 145 | -------------------------------------------------------------------------------- /models/noise_gan_3L.py: -------------------------------------------------------------------------------- 1 | # Noise-GAN model 2 | import tensorflow.keras as keras 3 | from keras import backend as K 4 | import tensorflow as tf 5 | import numpy as np 6 | import time 7 | 8 | from utils.utils import save_logs 9 | from utils.utils import calculate_metrics 10 | 11 | from models.fcn import callback_val_ASR 12 | 13 | 14 | class Classifier_Noise_GAN: 15 | def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None): 16 | self.output_directory = output_directory 17 | if build == True: 18 | self.model = self.build_model(input_shape, c_loss) 19 | self.model.summary() 20 | self.verbose = verbose 21 | self.model.save_weights(self.output_directory + 'generator_init.hdf5') 22 | return 23 | 24 | def build_model(self, input_shape, c_loss=None): 25 | input_layer = keras.layers.Input(input_shape) 26 | 27 | conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer) 28 | conv1 = keras.layers.BatchNormalization()(conv1) 29 | conv1 = keras.layers.Activation(activation='relu')(conv1) 30 | 31 | conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1) 32 | conv2 = keras.layers.BatchNormalization()(conv2) 33 | conv2 = keras.layers.Activation('relu')(conv2) 34 | 35 | conv3 = keras.layers.Conv1D(filters=1024, kernel_size=8, padding='same', name='conv3')(conv2) 36 | conv3 = keras.layers.BatchNormalization()(conv3) 37 | conv3 = keras.layers.Activation('relu')(conv3) 38 | 39 | fc1 = keras.layers.Dense(512, activation='relu')(conv3) 40 | fc2 = keras.layers.Dense(input_shape[1], activation='tanh')(fc1) 41 | output_layer = fc2 42 | model = keras.models.Model(inputs=input_layer, outputs=output_layer) 43 | 44 | return model 45 | 46 | def clip_add(self, pattern, ori_data): 47 | return (1 + pattern* 0.2) * ori_data 48 | 49 | def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True): 50 | final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0])) 51 | full_model = keras.models.Model(inputs=self.model.input, outputs=final_out) 52 | backdoor_clf.model.trainable = bd_trainable 53 | self.model.trainable = gen_trainable 54 | full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), 55 | metrics=['accuracy']) 56 | return full_model 57 | 58 | def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 59 | poison_rate, clean_label): 60 | x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target, 61 | poison_rate=1.0, clean_label=clean_label, 62 | one_hot=True) 63 | x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target, 64 | poison_rate=1.0, clean_label=clean_label, 65 | one_hot=True) 66 | 67 | print(backdoor_clf.model.evaluate(x_test, y_test)[1]) 68 | 69 | x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target, 70 | poison_rate, clean_label, one_hot=True, 71 | only_target=True) 72 | for e in range(1, 40): 73 | print("Epoch:", e) 74 | ''' 75 | for layer in backdoor_clf.model.layers: 76 | layer.trainable = False 77 | ''' 78 | 79 | # Train noise generator 80 | full_model = self.get_full_model(backdoor_clf, True, False) 81 | #K.set_value(full_model.optimizer.learning_rate, 0.01) 82 | 83 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=10) 84 | 85 | # Train backdoor classifier 86 | full_model = self.get_full_model(backdoor_clf, False, True) 87 | 88 | full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=4) 89 | 90 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 91 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 92 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 93 | print('!' * 10 + ' In Progress ' + '!' * 10) 94 | print("Clean acc.:", val_clean_acc) 95 | print("ASR:", val_ASR) 96 | print("ASR_train:", val_ASR_train) 97 | print('!' * 10 + ' In Progress ' + '!' * 10) 98 | self.model.save_weights(self.output_directory + 'generator_save/' + \ 99 | f'epoch{e}p_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 100 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \ 101 | f'epoch{e}p_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 102 | 103 | #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.002) 104 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(3*e**0.3)) 105 | 106 | val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1] 107 | val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1] 108 | val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1] 109 | print('#' * 20) 110 | print("Clean acc.:", val_clean_acc) 111 | print("ASR:", val_ASR) 112 | print("ASR_train:", val_ASR_train) 113 | print('#' * 20) 114 | self.model.save_weights(self.output_directory + 'generator_save/' + \ 115 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 116 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \ 117 | f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5') 118 | 119 | return 120 | 121 | def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances, 122 | y_target=0, poison_rate=0.1, clean_label=False): 123 | self.process_instances = process_instances 124 | 125 | print("Pre-training...") 126 | backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=100) 127 | self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel, 128 | poison_rate, clean_label) 129 | self.model.save_weights(self.output_directory + 'generator_final.hdf5') 130 | backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5') 131 | 132 | return 133 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | # resnet model 2 | # when tuning start with learning rate->mini_batch_size -> 3 | # momentum-> #hidden_units -> # learning_rate_decay -> #layers 4 | import tensorflow.keras as keras 5 | import tensorflow as tf 6 | import numpy as np 7 | import time 8 | 9 | import matplotlib 10 | from utils.utils import save_test_duration 11 | 12 | matplotlib.use('agg') 13 | import matplotlib.pyplot as plt 14 | 15 | from utils.utils import save_logs 16 | from utils.utils import calculate_metrics 17 | from models.fcn import callback_val_ASR 18 | 19 | 20 | class Classifier_RESNET: 21 | 22 | def __init__(self, output_directory, input_shape, nb_classes, verbose=False, build=True, load_weights=False): 23 | self.output_directory = output_directory 24 | if build == True: 25 | self.model = self.build_model(input_shape, nb_classes) 26 | if (verbose == True): 27 | self.model.summary() 28 | self.verbose = verbose 29 | if load_weights == True: 30 | self.model.load_weights(self.output_directory 31 | .replace('resnet_augment', 'resnet') 32 | .replace('TSC_itr_augment_x_10', 'TSC_itr_10') 33 | + '/model_init.hdf5') 34 | else: 35 | self.model.save_weights(self.output_directory + 'model_init.hdf5') 36 | return 37 | 38 | def build_model(self, input_shape, nb_classes): 39 | n_feature_maps = 64 40 | 41 | input_layer = keras.layers.Input(input_shape) 42 | 43 | # BLOCK 1 44 | 45 | conv_x = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer) 46 | conv_x = keras.layers.BatchNormalization()(conv_x) 47 | conv_x = keras.layers.Activation('relu')(conv_x) 48 | 49 | conv_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=5, padding='same')(conv_x) 50 | conv_y = keras.layers.BatchNormalization()(conv_y) 51 | conv_y = keras.layers.Activation('relu')(conv_y) 52 | 53 | conv_z = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y) 54 | conv_z = keras.layers.BatchNormalization()(conv_z) 55 | 56 | # expand channels for the sum 57 | shortcut_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer) 58 | shortcut_y = keras.layers.BatchNormalization()(shortcut_y) 59 | 60 | output_block_1 = keras.layers.add([shortcut_y, conv_z]) 61 | output_block_1 = keras.layers.Activation('relu')(output_block_1) 62 | 63 | # BLOCK 2 64 | 65 | conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_1) 66 | conv_x = keras.layers.BatchNormalization()(conv_x) 67 | conv_x = keras.layers.Activation('relu')(conv_x) 68 | 69 | conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x) 70 | conv_y = keras.layers.BatchNormalization()(conv_y) 71 | conv_y = keras.layers.Activation('relu')(conv_y) 72 | 73 | conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y) 74 | conv_z = keras.layers.BatchNormalization()(conv_z) 75 | 76 | # expand channels for the sum 77 | shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1) 78 | shortcut_y = keras.layers.BatchNormalization()(shortcut_y) 79 | 80 | output_block_2 = keras.layers.add([shortcut_y, conv_z]) 81 | output_block_2 = keras.layers.Activation('relu')(output_block_2) 82 | 83 | # BLOCK 3 84 | 85 | conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2) 86 | conv_x = keras.layers.BatchNormalization()(conv_x) 87 | conv_x = keras.layers.Activation('relu')(conv_x) 88 | 89 | conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x) 90 | conv_y = keras.layers.BatchNormalization()(conv_y) 91 | conv_y = keras.layers.Activation('relu')(conv_y) 92 | 93 | conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y) 94 | conv_z = keras.layers.BatchNormalization()(conv_z) 95 | 96 | # no need to expand channels because they are equal 97 | shortcut_y = keras.layers.BatchNormalization()(output_block_2) 98 | 99 | output_block_3 = keras.layers.add([shortcut_y, conv_z]) 100 | output_block_3 = keras.layers.Activation('relu')(output_block_3) 101 | 102 | # FINAL 103 | 104 | gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3) 105 | 106 | output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer) 107 | 108 | model = keras.models.Model(inputs=input_layer, outputs=output_layer) 109 | 110 | model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), 111 | metrics=['accuracy']) 112 | 113 | reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001) 114 | 115 | file_path = self.output_directory + 'best_model.hdf5' 116 | 117 | model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss', 118 | save_best_only=True) 119 | 120 | self.callbacks = [reduce_lr, model_checkpoint] 121 | 122 | return model 123 | 124 | def fit(self, x_train, y_train, x_val, y_val, y_true, nb_epochs=50): 125 | if not tf.test.is_gpu_available: 126 | print('error') 127 | exit() 128 | # x_val and y_val are only used to monitor the test loss and NOT for training 129 | batch_size = 64 130 | 131 | mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) 132 | 133 | start_time = time.time() 134 | 135 | hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs, 136 | verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks) 137 | 138 | duration = time.time() - start_time 139 | 140 | self.model.save(self.output_directory + 'last_model.hdf5') 141 | 142 | y_pred = self.predict(x_val, y_true, x_train, y_train, y_val, 143 | return_df_metrics=False) 144 | 145 | # save predictions 146 | np.save(self.output_directory + 'y_pred.npy', y_pred) 147 | 148 | # convert the predicted from binary to integer 149 | y_pred = np.argmax(y_pred, axis=1) 150 | 151 | df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration) 152 | 153 | keras.backend.clear_session() 154 | 155 | return df_metrics 156 | 157 | def fit_backdoor(self, x_train, y_train, x_val, y_val, 158 | pattern_generator, y_target=0, poison_rate=0.1, 159 | clean_label=True, batch_size=64, nb_epochs=50): 160 | if not tf.test.is_gpu_available: 161 | print('error') 162 | exit() 163 | # x_val and y_val are only used to monitor the test loss and NOT for training 164 | mini_batch_size = int(min(x_train.shape[0] / 10, batch_size)) 165 | 166 | start_time = time.time() 167 | 168 | duration = time.time() - start_time 169 | 170 | y_val_classlabel = np.argmax(y_val, axis=1) 171 | 172 | x_ASR, y_ASR = pattern_generator(x_val, y_val, y_target, poison_rate=1.0, 173 | clean_label=False, one_hot=True, exclude_target=True) 174 | x_ASR_train, y_ASR_train = pattern_generator(x_train, y_train, y_target, poison_rate=1.0, 175 | clean_label=False, one_hot=True, exclude_target=True) 176 | eval_val_ASR = callback_val_ASR(x_ASR, y_ASR, x_ASR_train, y_ASR_train) 177 | 178 | for e in range(500): 179 | x_train_backdoor, y_train_backdoor = pattern_generator(x_train, y_train, y_target, 180 | poison_rate=poison_rate, clean_label=clean_label, 181 | one_hot=True) 182 | print("Epoch:", e + 1) 183 | hist = self.model.fit(x_train_backdoor, y_train_backdoor, batch_size=mini_batch_size, epochs=nb_epochs, 184 | verbose=self.verbose, validation_data=(x_val, y_val), 185 | callbacks=self.callbacks + [eval_val_ASR]) 186 | 187 | y_pred = self.model.predict(x_val) 188 | 189 | # convert the predicted from binary to integer 190 | y_pred_classlabel = np.argmax(y_pred, axis=1) 191 | 192 | # Backdoor attack 193 | x_val_backdoor, y_val_backdoor = pattern_generator(x_val, y_val, y_target, 194 | poison_rate=1.0, clean_label=False, one_hot=False) 195 | y_pred_backdoor = np.argmax(self.model.predict(x_val_backdoor), axis=1) 196 | 197 | df_metrics = save_logs(self.output_directory, hist, y_pred_classlabel, y_val_classlabel, duration, 198 | calc_ASR=True, y_pred_backdoor=y_pred_backdoor, y_target=y_val_backdoor) 199 | 200 | keras.backend.clear_session() 201 | 202 | return df_metrics 203 | 204 | def predict(self, x_test, y_true, x_train, y_train, y_test, return_df_metrics=True): 205 | start_time = time.time() 206 | model_path = self.output_directory + 'best_model.hdf5' 207 | model = keras.models.load_model(model_path) 208 | y_pred = model.predict(x_test) 209 | if return_df_metrics: 210 | y_pred = np.argmax(y_pred, axis=1) 211 | df_metrics = calculate_metrics(y_true, y_pred, 0.0) 212 | return df_metrics 213 | else: 214 | test_duration = time.time() - start_time 215 | save_test_duration(self.output_directory + 'test_duration.csv', test_duration) 216 | return y_pred 217 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from utils.utils import generate_results_csv 2 | from utils.utils import create_directory 3 | from utils.utils import read_dataset 4 | from utils.utils import transform_mts_to_ucr_format 5 | 6 | import os 7 | import numpy as np 8 | import sys 9 | import sklearn 10 | import utils 11 | from utils.constants import CLASSIFIERS 12 | from utils.constants import ARCHIVE_NAMES 13 | from utils.constants import ITERATIONS 14 | from utils.utils import read_all_datasets 15 | 16 | 17 | def gen_vanilla_pattern(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False): 18 | # num of instance in target class < poison_rate * total num of instances 19 | INTENSITY = 0.02 20 | x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target) 21 | 22 | pattern_max = np.max(x, axis=1) 23 | pattern_max = pattern_max.reshape(pattern_max.shape[0], 1, pattern_max.shape[1]) 24 | pattern_min = np.min(x, axis=1) 25 | pattern_min = pattern_min.reshape(pattern_min.shape[0], 1, pattern_min.shape[1]) 26 | 27 | pattern = np.concatenate((pattern_max, pattern_min), axis=1) 28 | #pattern[:, 1, :] = -pattern[:, 1, :] 29 | pattern = np.tile(pattern, (int(INTENSITY * x.shape[1] / 2), 1)) 30 | x_backdoor = x.copy() 31 | x_backdoor[:, 0:int(INTENSITY * x.shape[1] / 2) * 2, :] = pattern 32 | 33 | return x_backdoor, y_backdoor 34 | 35 | 36 | def gen_powerline_noise(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False): 37 | PATTERN_FILE = './powerline_pattern.npy' 38 | x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target) 39 | pattern = np.load(PATTERN_FILE) 40 | pattern = (pattern - np.mean(pattern)) / np.std(pattern) 41 | 42 | if x.shape[1] < pattern.shape[0] * 5: 43 | pattern = pattern[::pattern.shape[0] // x.shape[1] * 5, 0] 44 | pattern = np.resize(pattern, (1, x.shape[1], 1)).repeat(x.shape[2], axis=2).repeat(x.shape[0], axis=0) 45 | normal_mul = (np.max(x, axis=1) - np.min(x, axis=1)).reshape(x.shape[0], 1, x.shape[2]).repeat(pattern.shape[1], 46 | axis=1) / 10 47 | 48 | pattern *= normal_mul 49 | x_backdoor = x.copy() + pattern 50 | 51 | return x_backdoor, y_backdoor 52 | 53 | 54 | def generative_pattern(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False): 55 | global NOISE_GEN_INS 56 | noise_generator = NOISE_GEN_INS 57 | 58 | x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target) 59 | #noise_generator.model.load_weights('./results/fcn_generator/mts_archive/ECG/generator_final.hdf5') 60 | 61 | pattern = noise_generator.model(x) 62 | pattern = (pattern - pattern.numpy().mean()) / pattern.numpy().std() 63 | data_std = np.resize(x.std(axis=1), (x.shape[0], 1, x.shape[2])).repeat(x.shape[1], axis=1) 64 | data_mean = np.resize(x.mean(axis=1), (x.shape[0], 1, x.shape[2])).repeat(x.shape[1], axis=1) 65 | x_backdoor = x.copy() + pattern * data_std + data_mean 66 | print(f'Generative rate: {poison_rate}') 67 | return x_backdoor, y_backdoor 68 | 69 | 70 | def process_instances(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False, only_target=False): 71 | y_classlabel = np.argmax(y, axis=1) 72 | enc = sklearn.preprocessing.OneHotEncoder(categories='auto') 73 | enc.fit(y_classlabel.reshape(-1, 1)) 74 | 75 | if exclude_target: 76 | index_exclude = np.where(y_classlabel != y_target)[0] 77 | x = x[index_exclude] 78 | y_classlabel = y_classlabel[index_exclude] 79 | 80 | if clean_label: 81 | index = np.where(y_classlabel == y_target)[0] 82 | if len(index) / len(y_classlabel) < poison_rate: 83 | print('!!!ACTUAL POISON RATE:', len(index) / len(y_classlabel)) 84 | 85 | else: 86 | index = np.where(y_classlabel != y_target)[0] 87 | if poison_rate < 1.0: 88 | index = np.random.choice(index, size=int(len(y_classlabel) * poison_rate), replace=False) 89 | 90 | y_backdoor = y_classlabel.copy() 91 | y_backdoor[index] = y_target 92 | 93 | if only_target: 94 | index_target = np.where(y_backdoor == y_target)[0] 95 | x = x[index_target] 96 | y_backdoor = y_backdoor[index_target] 97 | 98 | if one_hot: 99 | y_backdoor = enc.transform(y_backdoor.reshape(-1, 1)).toarray() 100 | 101 | return x, y_backdoor 102 | 103 | 104 | def fit_classifier(backdoor=None, clean_label=False): 105 | x_train = datasets_dict[dataset_name][0] 106 | y_train = datasets_dict[dataset_name][1] 107 | x_test = datasets_dict[dataset_name][2] 108 | y_test = datasets_dict[dataset_name][3] 109 | 110 | nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0))) 111 | 112 | # transform the labels from integers to one hot vectors 113 | enc = sklearn.preprocessing.OneHotEncoder(categories='auto') 114 | enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1)) 115 | y_train = enc.transform(y_train.reshape(-1, 1)).toarray() 116 | y_test = enc.transform(y_test.reshape(-1, 1)).toarray() 117 | 118 | # save orignal y because later we will use binary 119 | y_test_classlabel = np.argmax(y_test, axis=1) 120 | 121 | if len(x_train.shape) == 2: # if univariate 122 | # add a dimension to make it multivariate with one dimension 123 | x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) 124 | x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) 125 | 126 | input_shape = x_train.shape[1:] 127 | 128 | classifier = create_classifier(classifier_name, input_shape, nb_classes, output_directory) 129 | 130 | # print(dataset_name, x_train.shape) 131 | 132 | if backdoor is None: 133 | classifier.fit(x_train, y_train, x_test, y_test, y_test_classlabel) 134 | 135 | elif backdoor == 'vanilla': 136 | classifier.fit_backdoor(x_train, y_train, x_test, y_test, 137 | gen_vanilla_pattern, y_target=0, poison_rate=0.1, 138 | clean_label=clean_label) 139 | elif backdoor == 'powerline': 140 | classifier.fit_backdoor(x_train, y_train, x_test, y_test, 141 | gen_powerline_noise, y_target=0, poison_rate=0.1, 142 | clean_label=clean_label) 143 | elif backdoor == 'generator': 144 | if classifier_name == 'fcn': 145 | from models import noise_gan 146 | if classifier_name == 'resnet': 147 | if archive_name == 'mts_archive': 148 | from models import noise_gan_expdim as noise_gan 149 | elif archive_name == 'UCRArchive_2018': 150 | from models import noise_gan_3L as noise_gan 151 | noise_generator = noise_gan.Classifier_Noise_GAN(output_directory, input_shape, verbose=False) 152 | noise_generator.fit(x_train, y_train, x_test, y_test, y_test_classlabel, classifier, process_instances, 153 | y_target=0, poison_rate=0.1, clean_label=clean_label) 154 | elif backdoor == 'generative_test': 155 | global NOISE_GEN_INS 156 | from models import noise_gan 157 | NOISE_GEN_INS = noise_gan.Classifier_Noise_GAN(output_directory, input_shape, verbose=False) 158 | 159 | else: 160 | print('NOT IMPLEMENTED!!!') 161 | return None 162 | 163 | 164 | def create_classifier(classifier_name, input_shape, nb_classes, output_directory, verbose=True): 165 | if classifier_name == 'fcn': 166 | from models import fcn 167 | return fcn.Classifier_FCN(output_directory, input_shape, nb_classes, verbose) 168 | if classifier_name == 'resnet': 169 | from models import resnet 170 | return resnet.Classifier_RESNET(output_directory, input_shape, nb_classes, verbose) 171 | 172 | 173 | ############################################### main 174 | 175 | # change this directory for your machine 176 | root_dir = '.' 177 | 178 | if sys.argv[1] in ['run_baseline', 'run_backdoor']: 179 | if sys.argv[1] == 'run_backdoor': 180 | attack_method = sys.argv[2] 181 | result_string = '_' + attack_method 182 | else: 183 | attack_method = None 184 | result_string = '' 185 | for classifier_name in CLASSIFIERS[0:]: 186 | print('classifier_name', classifier_name) 187 | 188 | for archive_name in ARCHIVE_NAMES[1:]: 189 | print('\tarchive_name', archive_name) 190 | 191 | datasets_dict = read_all_datasets(root_dir, archive_name) 192 | 193 | for iter in range(ITERATIONS): 194 | print('\t\titer', iter) 195 | 196 | trr = '' 197 | if iter != 0: 198 | trr = '_itr_' + str(iter) 199 | 200 | tmp_output_directory = root_dir + '/results/' + classifier_name + result_string + '/' + archive_name + trr + '/' 201 | 202 | for dataset_name in utils.constants.dataset_names_for_archive[archive_name]: 203 | print('\t\t\tdataset_name: ', dataset_name) 204 | 205 | output_directory = tmp_output_directory + dataset_name + '/' 206 | 207 | create_directory(output_directory) 208 | if sys.argv[2] == 'generator': 209 | create_directory(output_directory + 'generator_save/') 210 | create_directory(output_directory + 'backdoor_save/') 211 | 212 | fit_classifier(backdoor=attack_method) 213 | 214 | print('\t\t\t\tDONE') 215 | 216 | # the creation of this directory means 217 | create_directory(output_directory + '/DONE') 218 | 219 | elif sys.argv[1] == 'transform_mts_to_ucr_format': 220 | transform_mts_to_ucr_format() 221 | elif sys.argv[1] == 'generate_results_csv': 222 | res = generate_results_csv('results.csv', root_dir) 223 | print(res.to_string()) 224 | else: 225 | # this is the code used to launch an experiment on a dataset 226 | archive_name = sys.argv[1] 227 | dataset_name = sys.argv[2] 228 | classifier_name = sys.argv[3] 229 | itr = sys.argv[4] 230 | 231 | if itr == '_itr_0': 232 | itr = '' 233 | 234 | output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + itr + '/' + \ 235 | dataset_name + '/' 236 | 237 | test_dir_df_metrics = output_directory + 'df_metrics.csv' 238 | 239 | print('Method: ', archive_name, dataset_name, classifier_name, itr) 240 | 241 | if os.path.exists(test_dir_df_metrics): 242 | print('Already done') 243 | else: 244 | 245 | create_directory(output_directory) 246 | datasets_dict = read_dataset(root_dir, archive_name, dataset_name) 247 | 248 | fit_classifier() 249 | 250 | print('DONE') 251 | 252 | # the creation of this directory means 253 | create_directory(output_directory + '/DONE') 254 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | from builtins import print 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib 5 | 6 | matplotlib.use('agg') 7 | import matplotlib.pyplot as plt 8 | 9 | matplotlib.rcParams['font.family'] = 'sans-serif' 10 | matplotlib.rcParams['font.sans-serif'] = 'Arial' 11 | import os 12 | import operator 13 | 14 | import utils 15 | 16 | from utils.constants import UNIVARIATE_DATASET_NAMES as DATASET_NAMES 17 | from utils.constants import UNIVARIATE_DATASET_NAMES_2018 as DATASET_NAMES_2018 18 | from utils.constants import ARCHIVE_NAMES as ARCHIVE_NAMES 19 | from utils.constants import CLASSIFIERS 20 | from utils.constants import ITERATIONS 21 | from utils.constants import MTS_DATASET_NAMES 22 | 23 | from sklearn.metrics import accuracy_score 24 | from sklearn.metrics import precision_score 25 | from sklearn.metrics import recall_score 26 | from sklearn.preprocessing import LabelEncoder 27 | 28 | from scipy.interpolate import interp1d 29 | from scipy.io import loadmat 30 | 31 | 32 | def readucr(filename): 33 | data = np.loadtxt(filename, delimiter=',') 34 | Y = data[:, 0] 35 | X = data[:, 1:] 36 | return X, Y 37 | 38 | 39 | def create_directory(directory_path): 40 | if os.path.exists(directory_path): 41 | return None 42 | else: 43 | try: 44 | os.makedirs(directory_path) 45 | except: 46 | # in case another machine created the path meanwhile !:( 47 | return None 48 | return directory_path 49 | 50 | 51 | def reate_path(root_dir, classifier_name, archive_name): 52 | output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + '/' 53 | if os.path.exists(output_directory): 54 | return None 55 | else: 56 | os.makedirs(output_directory) 57 | return output_directory 58 | 59 | 60 | def read_dataset(root_dir, archive_name, dataset_name): 61 | datasets_dict = {} 62 | cur_root_dir = root_dir.replace('-temp', '') 63 | 64 | if archive_name == 'mts_archive': 65 | file_name = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' 66 | x_train = np.load(file_name + 'x_train.npy') 67 | y_train = np.load(file_name + 'y_train.npy') 68 | x_test = np.load(file_name + 'x_test.npy') 69 | y_test = np.load(file_name + 'y_test.npy') 70 | 71 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 72 | y_test.copy()) 73 | 74 | elif archive_name == 'UCRArchive_2018': 75 | root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' 76 | df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None) 77 | 78 | df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None) 79 | 80 | y_train = df_train.values[:, 0] 81 | y_test = df_test.values[:, 0] 82 | 83 | x_train = df_train.drop(columns=[0]) 84 | x_test = df_test.drop(columns=[0]) 85 | 86 | x_train.columns = range(x_train.shape[1]) 87 | x_test.columns = range(x_test.shape[1]) 88 | 89 | x_train = x_train.values 90 | x_test = x_test.values 91 | 92 | # znorm 93 | std_ = x_train.std(axis=1, keepdims=True) 94 | std_[std_ == 0] = 1.0 95 | x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_ 96 | 97 | std_ = x_test.std(axis=1, keepdims=True) 98 | std_[std_ == 0] = 1.0 99 | x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_ 100 | 101 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 102 | y_test.copy()) 103 | else: 104 | file_name = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' + dataset_name 105 | x_train, y_train = readucr(file_name + '_TRAIN') 106 | x_test, y_test = readucr(file_name + '_TEST') 107 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 108 | y_test.copy()) 109 | 110 | return datasets_dict 111 | 112 | 113 | def read_all_datasets(root_dir, archive_name, split_val=False): 114 | datasets_dict = {} 115 | cur_root_dir = root_dir.replace('-temp', '') 116 | dataset_names_to_sort = [] 117 | 118 | if archive_name == 'mts_archive': 119 | 120 | for dataset_name in MTS_DATASET_NAMES: 121 | root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' 122 | 123 | x_train = np.load(root_dir_dataset + 'x_train.npy') 124 | y_train = np.load(root_dir_dataset + 'y_train.npy') 125 | x_test = np.load(root_dir_dataset + 'x_test.npy') 126 | y_test = np.load(root_dir_dataset + 'y_test.npy') 127 | 128 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 129 | y_test.copy()) 130 | elif archive_name == 'UCRArchive_2018': 131 | for dataset_name in DATASET_NAMES_2018: 132 | root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' 133 | 134 | df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None) 135 | 136 | df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None) 137 | 138 | y_train = df_train.values[:, 0] 139 | y_test = df_test.values[:, 0] 140 | 141 | x_train = df_train.drop(columns=[0]) 142 | x_test = df_test.drop(columns=[0]) 143 | 144 | x_train.columns = range(x_train.shape[1]) 145 | x_test.columns = range(x_test.shape[1]) 146 | 147 | x_train = x_train.values 148 | x_test = x_test.values 149 | 150 | # znorm 151 | std_ = x_train.std(axis=1, keepdims=True) 152 | std_[std_ == 0] = 1.0 153 | x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_ 154 | 155 | std_ = x_test.std(axis=1, keepdims=True) 156 | std_[std_ == 0] = 1.0 157 | x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_ 158 | 159 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 160 | y_test.copy()) 161 | 162 | else: 163 | for dataset_name in DATASET_NAMES: 164 | root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' 165 | file_name = root_dir_dataset + dataset_name 166 | x_train, y_train = readucr(file_name + '_TRAIN') 167 | x_test, y_test = readucr(file_name + '_TEST') 168 | 169 | datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(), 170 | y_test.copy()) 171 | 172 | dataset_names_to_sort.append((dataset_name, len(x_train))) 173 | 174 | dataset_names_to_sort.sort(key=operator.itemgetter(1)) 175 | 176 | for i in range(len(DATASET_NAMES)): 177 | DATASET_NAMES[i] = dataset_names_to_sort[i][0] 178 | 179 | return datasets_dict 180 | 181 | 182 | def get_func_length(x_train, x_test, func): 183 | if func == min: 184 | func_length = np.inf 185 | else: 186 | func_length = 0 187 | 188 | n = x_train.shape[0] 189 | for i in range(n): 190 | func_length = func(func_length, x_train[i].shape[1]) 191 | 192 | n = x_test.shape[0] 193 | for i in range(n): 194 | func_length = func(func_length, x_test[i].shape[1]) 195 | 196 | return func_length 197 | 198 | 199 | def transform_to_same_length(x, n_var, max_length): 200 | n = x.shape[0] 201 | 202 | # the new set in ucr form np array 203 | ucr_x = np.zeros((n, max_length, n_var), dtype=np.float64) 204 | 205 | # loop through each time series 206 | for i in range(n): 207 | mts = x[i] 208 | curr_length = mts.shape[1] 209 | idx = np.array(range(curr_length)) 210 | idx_new = np.linspace(0, idx.max(), max_length) 211 | for j in range(n_var): 212 | ts = mts[j] 213 | # linear interpolation 214 | f = interp1d(idx, ts, kind='cubic') 215 | new_ts = f(idx_new) 216 | ucr_x[i, :, j] = new_ts 217 | 218 | return ucr_x 219 | 220 | 221 | def transform_mts_to_ucr_format(): 222 | mts_root_dir = './archives/_mts_mat/' 223 | mts_out_dir = './archives/mts_archive/' 224 | for dataset_name in MTS_DATASET_NAMES: 225 | print('dataset_name', dataset_name) 226 | 227 | out_dir = mts_out_dir + dataset_name + '/' 228 | 229 | # if create_directory(out_dir) is None: 230 | # print('Already_done') 231 | # continue 232 | 233 | a = loadmat(mts_root_dir + dataset_name + '/' + dataset_name + '.mat') 234 | a = a['mts'] 235 | a = a[0, 0] 236 | 237 | dt = a.dtype.names 238 | dt = list(dt) 239 | 240 | for i in range(len(dt)): 241 | if dt[i] == 'train': 242 | x_train = a[i].reshape(max(a[i].shape)) 243 | elif dt[i] == 'test': 244 | x_test = a[i].reshape(max(a[i].shape)) 245 | elif dt[i] == 'trainlabels': 246 | y_train = a[i].reshape(max(a[i].shape)) 247 | elif dt[i] == 'testlabels': 248 | y_test = a[i].reshape(max(a[i].shape)) 249 | 250 | # x_train = a[1][0] 251 | # y_train = a[0][:,0] 252 | # x_test = a[3][0] 253 | # y_test = a[2][:,0] 254 | 255 | n_var = x_train[0].shape[0] 256 | 257 | max_length = get_func_length(x_train, x_test, func=max) 258 | min_length = get_func_length(x_train, x_test, func=min) 259 | 260 | print(dataset_name, 'max', max_length, 'min', min_length) 261 | print() 262 | # continue 263 | 264 | x_train = transform_to_same_length(x_train, n_var, max_length) 265 | x_test = transform_to_same_length(x_test, n_var, max_length) 266 | 267 | if os.path.exists(out_dir): 268 | return None 269 | else: 270 | os.makedirs(out_dir) 271 | 272 | # save them 273 | np.save(out_dir + 'x_train.npy', x_train) 274 | np.save(out_dir + 'y_train.npy', y_train) 275 | np.save(out_dir + 'x_test.npy', x_test) 276 | np.save(out_dir + 'y_test.npy', y_test) 277 | 278 | print('Done') 279 | 280 | 281 | def calculate_metrics(y_true, y_pred, duration, y_true_val=None, y_pred_val=None): 282 | res = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0], 283 | columns=['precision', 'accuracy', 'recall', 'duration']) 284 | res['precision'] = precision_score(y_true, y_pred, average='macro') 285 | res['accuracy'] = accuracy_score(y_true, y_pred) 286 | 287 | if not y_true_val is None: 288 | # this is useful when transfer learning is used with cross validation 289 | res['accuracy_val'] = accuracy_score(y_true_val, y_pred_val) 290 | 291 | res['recall'] = recall_score(y_true, y_pred, average='macro') 292 | res['duration'] = duration 293 | return res 294 | 295 | 296 | def save_test_duration(file_name, test_duration): 297 | res = pd.DataFrame(data=np.zeros((1, 1), dtype=np.float), index=[0], 298 | columns=['test_duration']) 299 | res['test_duration'] = test_duration 300 | res.to_csv(file_name, index=False) 301 | 302 | 303 | def generate_results_csv(output_file_name, root_dir): 304 | res = pd.DataFrame(data=np.zeros((0, 7), dtype=np.float), index=[], 305 | columns=['classifier_name', 'archive_name', 'dataset_name', 306 | 'precision', 'accuracy', 'recall', 'duration']) 307 | for classifier_name in CLASSIFIERS: 308 | for archive_name in ARCHIVE_NAMES: 309 | datasets_dict = read_all_datasets(root_dir, archive_name) 310 | for it in range(ITERATIONS): 311 | curr_archive_name = archive_name 312 | if it != 0: 313 | curr_archive_name = curr_archive_name + '_itr_' + str(it) 314 | for dataset_name in datasets_dict.keys(): 315 | output_dir = root_dir + '/results/' + classifier_name + '/' \ 316 | + curr_archive_name + '/' + dataset_name + '/' + 'df_metrics.csv' 317 | if not os.path.exists(output_dir): 318 | continue 319 | df_metrics = pd.read_csv(output_dir) 320 | df_metrics['classifier_name'] = classifier_name 321 | df_metrics['archive_name'] = archive_name 322 | df_metrics['dataset_name'] = dataset_name 323 | res = pd.concat((res, df_metrics), axis=0, sort=False) 324 | 325 | res.to_csv(root_dir + output_file_name, index=False) 326 | # aggreagte the accuracy for iterations on same dataset 327 | res = pd.DataFrame({ 328 | 'accuracy': res.groupby( 329 | ['classifier_name', 'archive_name', 'dataset_name'])['accuracy'].mean() 330 | }).reset_index() 331 | 332 | return res 333 | 334 | 335 | def plot_epochs_metric(hist, file_name, metric='loss'): 336 | plt.figure() 337 | plt.plot(hist.history[metric]) 338 | plt.plot(hist.history['val_' + metric]) 339 | plt.title('model ' + metric) 340 | plt.ylabel(metric, fontsize='large') 341 | plt.xlabel('epoch', fontsize='large') 342 | plt.legend(['train', 'val'], loc='upper left') 343 | plt.savefig(file_name, bbox_inches='tight') 344 | plt.close() 345 | 346 | 347 | def save_logs(output_directory, hist, y_pred, y_true, duration, 348 | lr=True, y_true_val=None, y_pred_val=None, 349 | calc_ASR=False, y_pred_backdoor=None, y_target=None): 350 | hist_df = pd.DataFrame(hist.history) 351 | hist_df.to_csv(output_directory + 'history.csv', index=False) 352 | 353 | df_metrics = calculate_metrics(y_true, y_pred, duration, y_true_val, y_pred_val) 354 | df_metrics.to_csv(output_directory + 'df_metrics.csv', index=False) 355 | 356 | index_best_model = hist_df['val_accuracy'].idxmax() 357 | row_best_model = hist_df.loc[index_best_model] 358 | 359 | df_best_model = pd.DataFrame(data=np.zeros((1, 6), dtype=np.float), index=[0], 360 | columns=['best_model_train_loss', 'best_model_val_loss', 'best_model_train_acc', 361 | 'best_model_val_acc', 'best_model_learning_rate', 'best_model_nb_epoch']) 362 | 363 | if calc_ASR: 364 | df_best_model['best_model_ASR'] = accuracy_score(y_pred_backdoor, y_target) 365 | 366 | df_best_model['best_model_train_loss'] = row_best_model['loss'] 367 | df_best_model['best_model_val_loss'] = row_best_model['val_loss'] 368 | df_best_model['best_model_train_acc'] = row_best_model['accuracy'] 369 | df_best_model['best_model_val_acc'] = row_best_model['val_accuracy'] 370 | if lr == True: 371 | df_best_model['best_model_learning_rate'] = row_best_model['lr'] 372 | df_best_model['best_model_nb_epoch'] = index_best_model 373 | 374 | df_best_model.to_csv(output_directory + 'df_best_model.csv', index=False) 375 | 376 | # for FCN there is no hyperparameters fine tuning - everything is static in code 377 | 378 | return df_metrics 379 | --------------------------------------------------------------------------------