├── powerline_pattern.npy
├── models
    ├── __pycache__
    │   ├── fcn.cpython-39.pyc
    │   └── noise_gan.cpython-39.pyc
    ├── noise_gan.py
    ├── noise_gan_expdim.py
    ├── fcn.py
    ├── noise_gan_3L.py
    └── resnet.py
├── utils
    ├── __pycache__
    │   ├── utils.cpython-39.pyc
    │   └── constants.cpython-39.pyc
    ├── grad_cam.py
    ├── constants.py
    └── utils.py
├── requirements.txt
├── README.md
└── main.py


/powerline_pattern.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/powerline_pattern.npy


--------------------------------------------------------------------------------
/models/__pycache__/fcn.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/models/__pycache__/fcn.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/utils/__pycache__/utils.cpython-39.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/constants.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/utils/__pycache__/constants.cpython-39.pyc


--------------------------------------------------------------------------------
/models/__pycache__/noise_gan.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yujingmarkjiang/Time_Series_Backdoor_Attack/HEAD/models/__pycache__/noise_gan.cpython-39.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn==0.24.1
2 | matplotlib==3.3.4
3 | keras-nightly==2.5.0.dev2021032900
4 | h5py==3.1.0
5 | pandas==1.2.4
6 | numpy==1.19.5
7 | scipy==1.6.2
8 | tensorflow==2.5.0


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TSBA: Time Series Backdoor Attack
 2 | 
 3 | This is the code for the [SaTML'23 paper](https://arxiv.org/pdf/2211.07915.pdf) "Backdoor Attacks on Time Series: A Generative Approach" by Yujing Jiang, Xingjun Ma, Sarah Monazam Erfani, and James Bailey.
 4 | 
 5 | ## Prerequisites
 6 | * Python (3.9.7)
 7 | * Pytorch (1.10.0)
 8 | * CUDA (with 4 GPUs)
 9 | 
10 | ## Data 
11 | The data used in this project comes from two sources: 
12 | * The [UCR/UEA archive](http://timeseriesclassification.com/TSC.zip), which contains the 85 **univariate** time series datasets. 
13 | * The [MTS archive](https://drive.google.com/drive/folders/1FgIPN3uUT-b1tiHG-ONi5B31iPN3BMNy?usp=sharing), which contains the 13 **multivariate** time series datasets.
14 | 
15 | ## How to run
16 | 
17 | To run the clean model:
18 | ```
19 | python main.py run_baseline
20 | ```
21 | 
22 | To run the vanilla backdoor method:
23 | ```
24 | python main.py run_backdoor vanilla
25 | ```
26 | 
27 | To run the static noise backdoor method:
28 | ```
29 | python main.py run_backdoor powerline
30 | ```
31 | 
32 | To run our proposed TSBA:
33 | ```
34 | python main.py run_backdoor generator
35 | ```
36 | 
37 | To test the generator from trained TSBA model:
38 | ```
39 | python main.py run_backdoor generative_test
40 | ```
41 | 
42 | ## Reference
43 | For technical details and full experimental results, please check [the paper](https://arxiv.org/pdf/2211.07915.pdf).
44 | ```
45 | @inproceedings{xxxxx,
46 |   title={Backdoor Attacks on Time Series: A Generative Approach},
47 |   author={Jiang, Yujing and Ma, Xingjun and Erfani, Sarah Monazam and Bailey, James},
48 | }
49 | ```
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/utils/grad_cam.py:
--------------------------------------------------------------------------------
 1 | 
 2 | ##  make_gradcam_heatmap(x_train_backdoor, self.model)
 3 | ##  import matplotlib.pyplot as plt
 4 | 
 5 | def make_gradcam_heatmap(dataset, model, last_conv_layer_name='activation_8', pred_index=None):
 6 |     # First, we create a model that maps the input image to the activations
 7 |     # of the last conv layer as well as the output predictions
 8 |     img_array = dataset[0:1,:]
 9 | 
10 |     grad_model = tf.keras.models.Model(
11 |         [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
12 |     )
13 | 
14 |     # Then, we compute the gradient of the top predicted class for our input image
15 |     # with respect to the activations of the last conv layer
16 |     with tf.GradientTape() as tape:
17 |         last_conv_layer_output, preds = grad_model(img_array)
18 |         if pred_index is None:
19 |             pred_index = tf.argmax(preds[0])
20 |         class_channel = preds[:, pred_index]
21 | 
22 |     # This is the gradient of the output neuron (top predicted or chosen)
23 |     # with regard to the output feature map of the last conv layer
24 |     grads = tape.gradient(class_channel, last_conv_layer_output)
25 | 
26 |     # This is a vector where each entry is the mean intensity of the gradient
27 |     # over a specific feature map channel
28 |     pooled_grads = tf.reduce_mean(grads, axis=(0, 1))
29 | 
30 |     # We multiply each channel in the feature map array
31 |     # by "how important this channel is" with regard to the top predicted class
32 |     # then sum all the channels to obtain the heatmap class activation
33 |     last_conv_layer_output = last_conv_layer_output[0]
34 |     heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
35 |     heatmap = tf.squeeze(heatmap)
36 | 
37 |     # For visualization purpose, we will also normalize the heatmap between 0 & 1
38 |     heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
39 |     return heatmap.numpy()


--------------------------------------------------------------------------------
/utils/constants.py:
--------------------------------------------------------------------------------
 1 | UNIVARIATE_DATASET_NAMES = ['BirdChicken', 'ECG5000', 'Earthquakes', 'ElectricDevices', 'Haptics', 'PowerCons',
 2 |                             'ShapeletSim', 'Wine']
 3 | 
 4 | UNIVARIATE_DATASET_NAMES_2018 = ['BirdChicken', 'ECG5000', 'Earthquakes', 'ElectricDevices', 'Haptics', 'PowerCons',
 5 |                                  'ShapeletSim', 'Wine']
 6 | 
 7 | MTS_DATASET_NAMES = ['ArabicDigits', 'ECG', 'KickvsPunch', 'NetFlow', 'UWave']
 8 | 
 9 | UNIVARIATE_TEST = ['BirdChicken',  #Image
10 |                    'ECG5000',  #ECG
11 |                    'Earthquakes',  #Sensor
12 |                    'ElectricDevices',  #Device
13 |                    'Haptics',  #Motion
14 |                    'PowerCons',  #Power
15 |                    'ShapeletSim',  #Simulated
16 |                    'Wine'  #Spectro
17 |                    ]
18 | 
19 | MTS_TEST = ['ArabicDigits',
20 |             'ECG',
21 |             'KickvsPunch',
22 |             'NetFlow',
23 |             'UWave']
24 | 
25 | ITERATIONS = 1  # nb of random runs for random initializations
26 | 
27 | ARCHIVE_NAMES = ['UCRArchive_2018', 'mts_archive']
28 | 
29 | dataset_names_for_archive = {'UCRArchive_2018': UNIVARIATE_TEST,
30 |                              'mts_archive': MTS_TEST}
31 | 
32 | CLASSIFIERS = ['fcn', 'resnet']
33 | 
34 | dataset_types = {'ElectricDevices': 'DEVICE', 'FordB': 'SENSOR',
35 |                  'FordA': 'SENSOR', 'NonInvasiveFatalECG_Thorax2': 'ECG',
36 |                  'NonInvasiveFatalECG_Thorax1': 'ECG', 'PhalangesOutlinesCorrect': 'IMAGE',
37 |                  'HandOutlines': 'IMAGE', 'StarLightCurves': 'SENSOR',
38 |                  'wafer': 'SENSOR', 'Two_Patterns': 'SIMULATED',
39 |                  'UWaveGestureLibraryAll': 'MOTION', 'uWaveGestureLibrary_Z': 'MOTION',
40 |                  'uWaveGestureLibrary_Y': 'MOTION', 'uWaveGestureLibrary_X': 'MOTION',
41 |                  'Strawberry': 'SPECTRO', 'ShapesAll': 'IMAGE',
42 |                  'ProximalPhalanxOutlineCorrect': 'IMAGE', 'MiddlePhalanxOutlineCorrect': 'IMAGE',
43 |                  'DistalPhalanxOutlineCorrect': 'IMAGE', 'FaceAll': 'IMAGE',
44 |                  'ECG5000': 'ECG', 'SwedishLeaf': 'IMAGE', 'ChlorineConcentration': 'SIMULATED',
45 |                  '50words': 'IMAGE', 'ProximalPhalanxTW': 'IMAGE', 'ProximalPhalanxOutlineAgeGroup': 'IMAGE',
46 |                  'MiddlePhalanxOutlineAgeGroup': 'IMAGE', 'DistalPhalanxTW': 'IMAGE',
47 |                  'DistalPhalanxOutlineAgeGroup': 'IMAGE', 'MiddlePhalanxTW': 'IMAGE',
48 |                  'Cricket_Z': 'MOTION', 'Cricket_Y': 'MOTION',
49 |                  'Cricket_X': 'MOTION', 'Adiac': 'IMAGE',
50 |                  'MedicalImages': 'IMAGE', 'SmallKitchenAppliances': 'DEVICE',
51 |                  'ScreenType': 'DEVICE', 'RefrigerationDevices': 'DEVICE',
52 |                  'LargeKitchenAppliances': 'DEVICE', 'Earthquakes': 'SENSOR',
53 |                  'yoga': 'IMAGE', 'synthetic_control': 'SIMULATED',
54 |                  'WordsSynonyms': 'IMAGE', 'Computers': 'DEVICE',
55 |                  'InsectWingbeatSound': 'SENSOR', 'Phoneme': 'SENSOR',
56 |                  'OSULeaf': 'IMAGE', 'FacesUCR': 'IMAGE',
57 |                  'WormsTwoClass': 'MOTION', 'Worms': 'MOTION',
58 |                  'FISH': 'IMAGE', 'Haptics': 'MOTION',
59 |                  'Epilepsy': 'HAR', 'Ham': 'SPECTRO',
60 |                  'Plane': 'SENSOR', 'InlineSkate': 'MOTION',
61 |                  'Trace': 'SENSOR', 'ECG200': 'ECG',
62 |                  'Lighting7': 'SENSOR', 'ItalyPowerDemand': 'SENSOR',
63 |                  'Herring': 'IMAGE', 'Lighting2': 'SENSOR',
64 |                  'Car': 'SENSOR', 'Meat': 'SPECTRO',
65 |                  'Wine': 'SPECTRO', 'MALLAT': 'SIMULATED',
66 |                  'Gun_Point': 'MOTION', 'CinC_ECG_torso': 'ECG',
67 |                  'ToeSegmentation1': 'MOTION', 'ToeSegmentation2': 'MOTION',
68 |                  'ArrowHead': 'IMAGE', 'OliveOil': 'SPECTRO',
69 |                  'Beef': 'SPECTRO', 'CBF': 'SIMULATED',
70 |                  'Coffee': 'SPECTRO', 'SonyAIBORobotSurfaceII': 'SENSOR',
71 |                  'Symbols': 'IMAGE', 'FaceFour': 'IMAGE',
72 |                  'ECGFiveDays': 'ECG', 'TwoLeadECG': 'ECG',
73 |                  'BirdChicken': 'IMAGE', 'BeetleFly': 'IMAGE',
74 |                  'ShapeletSim': 'SIMULATED', 'MoteStrain': 'SENSOR',
75 |                  'SonyAIBORobotSurface': 'SENSOR', 'DiatomSizeReduction': 'IMAGE'}
76 | 
77 | themes_colors = {'IMAGE': 'red', 'SENSOR': 'blue', 'ECG': 'green',
78 |                  'SIMULATED': 'yellow', 'SPECTRO': 'orange',
79 |                  'MOTION': 'purple', 'DEVICE': 'gray'}
80 | 


--------------------------------------------------------------------------------
/models/noise_gan.py:
--------------------------------------------------------------------------------
  1 | # Noise-GAN model
  2 | import tensorflow.keras as keras
  3 | from keras import backend as K
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import time
  7 | 
  8 | from utils.utils import save_logs
  9 | from utils.utils import calculate_metrics
 10 | 
 11 | from models.fcn import callback_val_ASR
 12 | 
 13 | 
 14 | class Classifier_Noise_GAN:
 15 |     def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None):
 16 |         self.output_directory = output_directory
 17 |         if build == True:
 18 |             self.model = self.build_model(input_shape, c_loss)
 19 |             self.model.summary()
 20 |             self.verbose = verbose
 21 |             self.model.save_weights(self.output_directory + 'generator_init.hdf5')
 22 |         return
 23 | 
 24 |     def build_model(self, input_shape, c_loss=None):
 25 |         input_layer = keras.layers.Input(input_shape)
 26 | 
 27 |         conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer)
 28 |         conv1 = keras.layers.BatchNormalization()(conv1)
 29 |         conv1 = keras.layers.Activation(activation='relu')(conv1)
 30 | 
 31 |         conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1)
 32 |         conv2 = keras.layers.BatchNormalization()(conv2)
 33 |         conv2 = keras.layers.Activation('relu')(conv2)
 34 | 
 35 |         fc1 = keras.layers.Dense(256, activation='relu')(conv2)
 36 |         fc2 = keras.layers.Dense(input_shape[1], activation='tanh')(fc1)
 37 |         output_layer = fc2
 38 |         model = keras.models.Model(inputs=input_layer, outputs=output_layer)
 39 | 
 40 |         return model
 41 | 
 42 |     def clip_add(self, pattern, ori_data):
 43 |         return (1 + pattern* 0.1) * ori_data
 44 | 
 45 |     def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True):
 46 |         final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0]))
 47 |         full_model = keras.models.Model(inputs=self.model.input, outputs=final_out)
 48 |         backdoor_clf.model.trainable = bd_trainable
 49 |         self.model.trainable = gen_trainable
 50 |         full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
 51 |                            metrics=['accuracy'])
 52 |         return full_model
 53 | 
 54 |     def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
 55 |                       poison_rate, clean_label):
 56 |         x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target,
 57 |                                                                   poison_rate=1.0, clean_label=clean_label,
 58 |                                                                   one_hot=True)
 59 |         x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target,
 60 |                                                                         poison_rate=1.0, clean_label=clean_label,
 61 |                                                                         one_hot=True)
 62 | 
 63 |         print(backdoor_clf.model.evaluate(x_test, y_test)[1])
 64 | 
 65 |         x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target,
 66 |                                                                     poison_rate, clean_label, one_hot=True,
 67 |                                                                     only_target=True)
 68 | 
 69 |         # mm = keras.models.Model(inputs=self.model.input, outputs=self.model.outputs[0])
 70 |         # xx = x_train[0:1,:,:]
 71 |         # np.save('sample1.npy', xx)
 72 |         # np.save('pattern1.npy', xx * mm(xx))
 73 |         for e in range(1, 50):
 74 |             print("Epoch:", e)
 75 |             '''
 76 |             for layer in backdoor_clf.model.layers:
 77 |                 layer.trainable = False
 78 |             '''
 79 | 
 80 |             # Train noise generator
 81 |             full_model = self.get_full_model(backdoor_clf, True, False)
 82 |             #K.set_value(full_model.optimizer.learning_rate, 0.001)
 83 | 
 84 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=20)
 85 | 
 86 |             # Train backdoor classifier
 87 |             full_model = self.get_full_model(backdoor_clf, False, True)
 88 | 
 89 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=5)
 90 | 
 91 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
 92 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
 93 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
 94 |             print('#' * 10 + ' Now ' + '#' * 10)
 95 |             print("Clean acc.:", val_clean_acc)
 96 |             print("ASR:", val_ASR)
 97 |             print("ASR_train:", val_ASR_train)
 98 |             print('#' * 10 + ' Now ' + '#' * 10)
 99 | 
100 |             #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.005)
101 |             backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(4*e**0.3))
102 | 
103 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
104 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
105 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
106 |             print('#' * 20)
107 |             print("Clean acc.:", val_clean_acc)
108 |             print("ASR:", val_ASR)
109 |             print("ASR_train:", val_ASR_train)
110 |             print('#' * 20)
111 |             self.model.save_weights(self.output_directory + 'generator_save/' + \
112 |                                     f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
113 |             backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \
114 |                                             f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
115 | 
116 |         return
117 | 
118 |     def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances,
119 |             y_target=0, poison_rate=0.1, clean_label=False):
120 |         self.process_instances = process_instances
121 | 
122 |         print("Pre-training...")
123 |         backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=10)
124 |         self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
125 |                            poison_rate, clean_label)
126 |         self.model.save_weights(self.output_directory + 'generator_final.hdf5')
127 |         backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5')
128 | 
129 |         return
130 | 


--------------------------------------------------------------------------------
/models/noise_gan_expdim.py:
--------------------------------------------------------------------------------
  1 | # Noise-GAN model
  2 | import tensorflow.keras as keras
  3 | from keras import backend as K
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import time
  7 | 
  8 | from utils.utils import save_logs
  9 | from utils.utils import calculate_metrics
 10 | 
 11 | from models.fcn import callback_val_ASR
 12 | 
 13 | 
 14 | class Classifier_Noise_GAN:
 15 |     def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None):
 16 |         self.output_directory = output_directory
 17 |         if build == True:
 18 |             self.model = self.build_model(input_shape, c_loss)
 19 |             self.model.summary()
 20 |             self.verbose = verbose
 21 |             self.model.save_weights(self.output_directory + 'generator_init.hdf5')
 22 |         return
 23 | 
 24 |     def build_model(self, input_shape, c_loss=None):
 25 |         input_layer = keras.layers.Input(tuple(list(input_shape) + [1]))
 26 | 
 27 |         conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer)
 28 |         conv1 = keras.layers.BatchNormalization()(conv1)
 29 |         conv1 = keras.layers.Activation(activation='relu')(conv1)
 30 | 
 31 |         conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1)
 32 |         conv2 = keras.layers.BatchNormalization()(conv2)
 33 |         conv2 = keras.layers.Activation('relu')(conv2)
 34 | 
 35 |         conv3 = keras.layers.Conv1D(filters=512, kernel_size=15, padding='same', name='conv3')(conv2)
 36 |         conv3 = keras.layers.BatchNormalization()(conv3)
 37 |         conv3 = keras.layers.Activation('relu')(conv3)
 38 | 
 39 |         fc1 = keras.layers.Dense(256, activation='relu')(conv3)
 40 |         fc2 = keras.layers.Dense(1, activation='relu')(fc1)
 41 |         output_layer = fc2
 42 |         model = keras.models.Model(inputs=input_layer, outputs=output_layer)
 43 | 
 44 |         return model
 45 | 
 46 |     def clip_add(self, pattern, ori_data):
 47 |         return (1 + pattern* 0.2) * ori_data
 48 | 
 49 |     def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True):
 50 |         final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0]))
 51 |         full_model = keras.models.Model(inputs=self.model.input, outputs=final_out)
 52 |         backdoor_clf.model.trainable = bd_trainable
 53 |         self.model.trainable = gen_trainable
 54 |         full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
 55 |                            metrics=['accuracy'])
 56 |         return full_model
 57 | 
 58 |     def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
 59 |                       poison_rate, clean_label):
 60 |         x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target,
 61 |                                                                   poison_rate=1.0, clean_label=clean_label,
 62 |                                                                   one_hot=True)
 63 |         x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target,
 64 |                                                                         poison_rate=1.0, clean_label=clean_label,
 65 |                                                                         one_hot=True)
 66 | 
 67 |         print(backdoor_clf.model.evaluate(x_test, y_test)[1])
 68 | 
 69 |         x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target,
 70 |                                                                     poison_rate, clean_label, one_hot=True,
 71 |                                                                     only_target=True)
 72 |         for e in range(1, 40):
 73 |             print("Epoch:", e)
 74 |             '''
 75 |             for layer in backdoor_clf.model.layers:
 76 |                 layer.trainable = False
 77 |             '''
 78 | 
 79 |             # Train noise generator
 80 |             full_model = self.get_full_model(backdoor_clf, True, False)
 81 |             #K.set_value(full_model.optimizer.learning_rate, 0.01)
 82 | 
 83 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=10)
 84 | 
 85 |             # Train backdoor classifier
 86 |             full_model = self.get_full_model(backdoor_clf, False, True)
 87 | 
 88 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=10)
 89 | 
 90 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
 91 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
 92 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
 93 |             print('!' * 10 + ' In Progress ' + '!' * 10)
 94 |             print("Clean acc.:", val_clean_acc)
 95 |             print("ASR:", val_ASR)
 96 |             print("ASR_train:", val_ASR_train)
 97 |             print('!' * 10 + ' In Progress ' + '!' * 10)
 98 | 
 99 |             #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.002)
100 |             backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(4*e**0.3))
101 | 
102 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
103 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
104 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
105 |             print('#' * 20)
106 |             print("Clean acc.:", val_clean_acc)
107 |             print("ASR:", val_ASR)
108 |             print("ASR_train:", val_ASR_train)
109 |             print('#' * 20)
110 |             self.model.save_weights(self.output_directory + 'generator_save/' + \
111 |                                     f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
112 |             backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \
113 |                                             f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
114 | 
115 |         return
116 | 
117 |     def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances,
118 |             y_target=0, poison_rate=0.1, clean_label=False):
119 |         self.process_instances = process_instances
120 | 
121 |         print("Pre-training...")
122 |         backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=100)
123 |         self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
124 |                            poison_rate, clean_label)
125 |         self.model.save_weights(self.output_directory + 'generator_final.hdf5')
126 |         backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5')
127 | 
128 |         return
129 | 


--------------------------------------------------------------------------------
/models/fcn.py:
--------------------------------------------------------------------------------
  1 | # FCN model
  2 | # when tuning start with learning rate->mini_batch_size ->
  3 | # momentum-> #hidden_units -> # learning_rate_decay -> #layers
  4 | import tensorflow.keras as keras
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import time
  8 | 
  9 | from utils.utils import save_logs
 10 | from utils.utils import calculate_metrics
 11 | 
 12 | 
 13 | class callback_val_ASR(tf.keras.callbacks.Callback):
 14 |     def __init__(self, x_ASR, y_ASR, x_ASR_train, y_ASR_train):
 15 |         self.x_ASR = x_ASR
 16 |         self.y_ASR = y_ASR
 17 |         self.x_ASR_train = x_ASR_train
 18 |         self.y_ASR_train = y_ASR_train
 19 | 
 20 |     def on_epoch_end(self, epoch, logs=None):
 21 |         val_ASR = self.model.evaluate(self.x_ASR, self.y_ASR, verbose=0)
 22 |         val_ASR_train = self.model.evaluate(self.x_ASR_train, self.y_ASR_train, verbose=0)
 23 |         logs['ASR'] = val_ASR[1]
 24 |         print('ASR_test:', val_ASR[1])
 25 |         print('ASR_train:', val_ASR_train[1])
 26 | 
 27 | 
 28 | class Classifier_FCN:
 29 |     def __init__(self, output_directory, input_shape, nb_classes, verbose=False, build=True, c_loss=None):
 30 |         self.output_directory = output_directory
 31 |         if build == True:
 32 |             self.model = self.build_model(input_shape, nb_classes, c_loss)
 33 |             if (verbose == True):
 34 |                 self.model.summary()
 35 |             self.verbose = verbose
 36 |             self.model.save_weights(self.output_directory + 'model_init.hdf5')
 37 |         return
 38 | 
 39 |     def build_model(self, input_shape, nb_classes, c_loss=None):
 40 |         input_layer = keras.layers.Input(input_shape)
 41 | 
 42 |         conv1 = keras.layers.Conv1D(filters=128, kernel_size=8, padding='same')(input_layer)
 43 |         conv1 = keras.layers.BatchNormalization()(conv1)
 44 |         conv1 = keras.layers.Activation(activation='relu')(conv1)
 45 | 
 46 |         conv2 = keras.layers.Conv1D(filters=256, kernel_size=5, padding='same')(conv1)
 47 |         conv2 = keras.layers.BatchNormalization()(conv2)
 48 |         conv2 = keras.layers.Activation('relu')(conv2)
 49 | 
 50 |         conv3 = keras.layers.Conv1D(128, kernel_size=3, padding='same')(conv2)
 51 |         conv3 = keras.layers.BatchNormalization()(conv3)
 52 |         conv3 = keras.layers.Activation('relu')(conv3)
 53 | 
 54 |         gap_layer = keras.layers.GlobalAveragePooling1D()(conv3)
 55 | 
 56 |         output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)
 57 | 
 58 |         model = keras.models.Model(inputs=input_layer, outputs=output_layer)
 59 |         if c_loss is None:
 60 |             model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
 61 |                           metrics=['accuracy'])
 62 |         else:
 63 |             model.compile(loss=c_loss, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])
 64 | 
 65 |         reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50,
 66 |                                                       min_lr=0.0001)
 67 | 
 68 |         file_path = self.output_directory + 'best_model.hdf5'
 69 | 
 70 |         model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss',
 71 |                                                            save_best_only=True)
 72 | 
 73 |         self.callbacks = [reduce_lr, model_checkpoint]
 74 | 
 75 |         return model
 76 | 
 77 |     def _fit_model(self, x_train, y_train, x_val, y_val, eval_val_ASR, batch_size, nb_epochs):
 78 |         if not tf.test.is_gpu_available:
 79 |             print('error')
 80 |             exit()
 81 |         # x_val and y_val are only used to monitor the test loss and NOT for training
 82 | 
 83 |         mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))
 84 | 
 85 |         start_time = time.time()
 86 | 
 87 |         hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,
 88 |                               verbose=self.verbose, validation_data=(x_val, y_val),
 89 |                               callbacks=self.callbacks + eval_val_ASR)
 90 | 
 91 |         duration = time.time() - start_time
 92 | 
 93 |         self.model.save(self.output_directory + 'last_model.hdf5')
 94 | 
 95 |         model = keras.models.load_model(self.output_directory + 'best_model.hdf5')
 96 | 
 97 |         return model, hist, duration
 98 | 
 99 |     def fit(self, x_train, y_train, x_val, y_val, y_test_classlabel, batch_size=16, nb_epochs=50):
100 | 
101 |         model, hist, duration = self._fit_model(x_train, y_train, x_val, y_val, [], batch_size, nb_epochs)
102 | 
103 |         y_pred = model.predict(x_val)
104 | 
105 |         # convert the predicted from binary to integer
106 |         y_pred = np.argmax(y_pred, axis=1)
107 | 
108 |         save_logs(self.output_directory, hist, y_pred, y_test_classlabel, duration, calc_ASR=False)
109 | 
110 |         keras.backend.clear_session()
111 | 
112 |     def fit_backdoor(self, x_train, y_train, x_val, y_val,
113 |                      pattern_generator, y_target=0, poison_rate=0.1,
114 |                      clean_label=True, batch_size=16):
115 |         y_val_classlabel = np.argmax(y_val, axis=1)
116 | 
117 |         x_ASR, y_ASR = pattern_generator(x_val, y_val, y_target, poison_rate=1.0,
118 |                                          clean_label=False, one_hot=True, exclude_target=True)
119 |         x_ASR_train, y_ASR_train = pattern_generator(x_train, y_train, y_target, poison_rate=1.0,
120 |                                                      clean_label=False, one_hot=True, exclude_target=True)
121 |         eval_val_ASR = callback_val_ASR(x_ASR, y_ASR, x_ASR_train, y_ASR_train)
122 | 
123 |         for e in range(500):
124 |             x_train_backdoor, y_train_backdoor = pattern_generator(x_train, y_train, y_target,
125 |                                                                    poison_rate=poison_rate, clean_label=clean_label,
126 |                                                                    one_hot=True)
127 |             print("Epoch:", e + 1)
128 |             model, hist, duration = self._fit_model(x_train_backdoor, y_train_backdoor,
129 |                                                     x_val, y_val, [eval_val_ASR], batch_size, nb_epochs=2)
130 | 
131 |         y_pred = model.predict(x_val)
132 | 
133 |         # convert the predicted from binary to integer
134 |         y_pred_classlabel = np.argmax(y_pred, axis=1)
135 | 
136 |         # Backdoor attack
137 |         x_val_backdoor, y_val_backdoor = pattern_generator(x_val, y_val, y_target,
138 |                                                            poison_rate=1.0, clean_label=False, one_hot=False)
139 |         y_pred_backdoor = np.argmax(model.predict(x_val_backdoor), axis=1)
140 | 
141 |         save_logs(self.output_directory, hist, y_pred_classlabel, y_val_classlabel, duration,
142 |                   calc_ASR=True, y_pred_backdoor=y_pred_backdoor, y_target=y_val_backdoor)
143 | 
144 |         keras.backend.clear_session()
145 | 


--------------------------------------------------------------------------------
/models/noise_gan_3L.py:
--------------------------------------------------------------------------------
  1 | # Noise-GAN model
  2 | import tensorflow.keras as keras
  3 | from keras import backend as K
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | import time
  7 | 
  8 | from utils.utils import save_logs
  9 | from utils.utils import calculate_metrics
 10 | 
 11 | from models.fcn import callback_val_ASR
 12 | 
 13 | 
 14 | class Classifier_Noise_GAN:
 15 |     def __init__(self, output_directory, input_shape, verbose=False, build=True, c_loss=None):
 16 |         self.output_directory = output_directory
 17 |         if build == True:
 18 |             self.model = self.build_model(input_shape, c_loss)
 19 |             self.model.summary()
 20 |             self.verbose = verbose
 21 |             self.model.save_weights(self.output_directory + 'generator_init.hdf5')
 22 |         return
 23 | 
 24 |     def build_model(self, input_shape, c_loss=None):
 25 |         input_layer = keras.layers.Input(input_shape)
 26 | 
 27 |         conv1 = keras.layers.Conv1D(filters=128*input_shape[1], kernel_size=15, padding='same', name='conv1')(input_layer)
 28 |         conv1 = keras.layers.BatchNormalization()(conv1)
 29 |         conv1 = keras.layers.Activation(activation='relu')(conv1)
 30 | 
 31 |         conv2 = keras.layers.Conv1D(filters=512*input_shape[1], kernel_size=21, padding='same', name='conv2')(conv1)
 32 |         conv2 = keras.layers.BatchNormalization()(conv2)
 33 |         conv2 = keras.layers.Activation('relu')(conv2)
 34 | 
 35 |         conv3 = keras.layers.Conv1D(filters=1024, kernel_size=8, padding='same', name='conv3')(conv2)
 36 |         conv3 = keras.layers.BatchNormalization()(conv3)
 37 |         conv3 = keras.layers.Activation('relu')(conv3)
 38 | 
 39 |         fc1 = keras.layers.Dense(512, activation='relu')(conv3)
 40 |         fc2 = keras.layers.Dense(input_shape[1], activation='tanh')(fc1)
 41 |         output_layer = fc2
 42 |         model = keras.models.Model(inputs=input_layer, outputs=output_layer)
 43 | 
 44 |         return model
 45 | 
 46 |     def clip_add(self, pattern, ori_data):
 47 |         return (1 + pattern* 0.2) * ori_data
 48 | 
 49 |     def get_full_model(self, backdoor_clf, gen_trainable=True, bd_trainable=True):
 50 |         final_out = backdoor_clf.model(self.clip_add(self.model.outputs[0], self.model.inputs[0]))
 51 |         full_model = keras.models.Model(inputs=self.model.input, outputs=final_out)
 52 |         backdoor_clf.model.trainable = bd_trainable
 53 |         self.model.trainable = gen_trainable
 54 |         full_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
 55 |                            metrics=['accuracy'])
 56 |         return full_model
 57 | 
 58 |     def _fit_backdoor(self, backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
 59 |                       poison_rate, clean_label):
 60 |         x_test_backdoor, y_test_backdoor = self.process_instances(x_test, y_test, y_target,
 61 |                                                                   poison_rate=1.0, clean_label=clean_label,
 62 |                                                                   one_hot=True)
 63 |         x_train_backdoor_f, y_train_backdoor_f = self.process_instances(x_train, y_train, y_target,
 64 |                                                                         poison_rate=1.0, clean_label=clean_label,
 65 |                                                                         one_hot=True)
 66 | 
 67 |         print(backdoor_clf.model.evaluate(x_test, y_test)[1])
 68 | 
 69 |         x_train_backdoor, y_train_backdoor = self.process_instances(x_train, y_train, y_target,
 70 |                                                                     poison_rate, clean_label, one_hot=True,
 71 |                                                                     only_target=True)
 72 |         for e in range(1, 40):
 73 |             print("Epoch:", e)
 74 |             '''
 75 |             for layer in backdoor_clf.model.layers:
 76 |                 layer.trainable = False
 77 |             '''
 78 | 
 79 |             # Train noise generator
 80 |             full_model = self.get_full_model(backdoor_clf, True, False)
 81 |             #K.set_value(full_model.optimizer.learning_rate, 0.01)
 82 | 
 83 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=4, epochs=10)
 84 | 
 85 |             # Train backdoor classifier
 86 |             full_model = self.get_full_model(backdoor_clf, False, True)
 87 | 
 88 |             full_model.fit(x_train_backdoor, y_train_backdoor, batch_size=16, epochs=4)
 89 | 
 90 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
 91 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
 92 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
 93 |             print('!' * 10 + ' In Progress ' + '!' * 10)
 94 |             print("Clean acc.:", val_clean_acc)
 95 |             print("ASR:", val_ASR)
 96 |             print("ASR_train:", val_ASR_train)
 97 |             print('!' * 10 + ' In Progress ' + '!' * 10)
 98 |             self.model.save_weights(self.output_directory + 'generator_save/' + \
 99 |                                     f'epoch{e}p_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
100 |             backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \
101 |                                             f'epoch{e}p_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
102 | 
103 |             #K.set_value(backdoor_clf.model.optimizer.learning_rate, 0.002)
104 |             backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=int(3*e**0.3))
105 | 
106 |             val_clean_acc = backdoor_clf.model.evaluate(x_test, y_test)[1]
107 |             val_ASR = full_model.evaluate(x_test_backdoor, y_test_backdoor)[1]
108 |             val_ASR_train = full_model.evaluate(x_train_backdoor_f, y_train_backdoor_f)[1]
109 |             print('#' * 20)
110 |             print("Clean acc.:", val_clean_acc)
111 |             print("ASR:", val_ASR)
112 |             print("ASR_train:", val_ASR_train)
113 |             print('#' * 20)
114 |             self.model.save_weights(self.output_directory + 'generator_save/' + \
115 |                                     f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
116 |             backdoor_clf.model.save_weights(self.output_directory + 'backdoor_save/' + \
117 |                                             f'epoch{e}_{val_clean_acc:.3f}_{val_ASR:.3f}_{val_ASR_train:.3f}.hdf5')
118 | 
119 |         return
120 | 
121 |     def fit(self, x_train, y_train, x_test, y_test, y_test_classlabel, backdoor_clf, process_instances,
122 |             y_target=0, poison_rate=0.1, clean_label=False):
123 |         self.process_instances = process_instances
124 | 
125 |         print("Pre-training...")
126 |         backdoor_clf.fit(x_train, y_train, x_test, y_test, y_test_classlabel, nb_epochs=100)
127 |         self._fit_backdoor(backdoor_clf, x_train, y_train, x_test, y_test, y_target, y_test_classlabel,
128 |                            poison_rate, clean_label)
129 |         self.model.save_weights(self.output_directory + 'generator_final.hdf5')
130 |         backdoor_clf.model.save_weights(self.output_directory + 'backdoor_final.hdf5')
131 | 
132 |         return
133 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | # resnet model 
  2 | # when tuning start with learning rate->mini_batch_size -> 
  3 | # momentum-> #hidden_units -> # learning_rate_decay -> #layers 
  4 | import tensorflow.keras as keras
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import time
  8 | 
  9 | import matplotlib
 10 | from utils.utils import save_test_duration
 11 | 
 12 | matplotlib.use('agg')
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | from utils.utils import save_logs
 16 | from utils.utils import calculate_metrics
 17 | from models.fcn import callback_val_ASR
 18 | 
 19 | 
 20 | class Classifier_RESNET:
 21 | 
 22 |     def __init__(self, output_directory, input_shape, nb_classes, verbose=False, build=True, load_weights=False):
 23 |         self.output_directory = output_directory
 24 |         if build == True:
 25 |             self.model = self.build_model(input_shape, nb_classes)
 26 |             if (verbose == True):
 27 |                 self.model.summary()
 28 |             self.verbose = verbose
 29 |             if load_weights == True:
 30 |                 self.model.load_weights(self.output_directory
 31 |                                         .replace('resnet_augment', 'resnet')
 32 |                                         .replace('TSC_itr_augment_x_10', 'TSC_itr_10')
 33 |                                         + '/model_init.hdf5')
 34 |             else:
 35 |                 self.model.save_weights(self.output_directory + 'model_init.hdf5')
 36 |         return
 37 | 
 38 |     def build_model(self, input_shape, nb_classes):
 39 |         n_feature_maps = 64
 40 | 
 41 |         input_layer = keras.layers.Input(input_shape)
 42 | 
 43 |         # BLOCK 1
 44 | 
 45 |         conv_x = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer)
 46 |         conv_x = keras.layers.BatchNormalization()(conv_x)
 47 |         conv_x = keras.layers.Activation('relu')(conv_x)
 48 | 
 49 |         conv_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=5, padding='same')(conv_x)
 50 |         conv_y = keras.layers.BatchNormalization()(conv_y)
 51 |         conv_y = keras.layers.Activation('relu')(conv_y)
 52 | 
 53 |         conv_z = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y)
 54 |         conv_z = keras.layers.BatchNormalization()(conv_z)
 55 | 
 56 |         # expand channels for the sum
 57 |         shortcut_y = keras.layers.Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer)
 58 |         shortcut_y = keras.layers.BatchNormalization()(shortcut_y)
 59 | 
 60 |         output_block_1 = keras.layers.add([shortcut_y, conv_z])
 61 |         output_block_1 = keras.layers.Activation('relu')(output_block_1)
 62 | 
 63 |         # BLOCK 2
 64 | 
 65 |         conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_1)
 66 |         conv_x = keras.layers.BatchNormalization()(conv_x)
 67 |         conv_x = keras.layers.Activation('relu')(conv_x)
 68 | 
 69 |         conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
 70 |         conv_y = keras.layers.BatchNormalization()(conv_y)
 71 |         conv_y = keras.layers.Activation('relu')(conv_y)
 72 | 
 73 |         conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
 74 |         conv_z = keras.layers.BatchNormalization()(conv_z)
 75 | 
 76 |         # expand channels for the sum
 77 |         shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1)
 78 |         shortcut_y = keras.layers.BatchNormalization()(shortcut_y)
 79 | 
 80 |         output_block_2 = keras.layers.add([shortcut_y, conv_z])
 81 |         output_block_2 = keras.layers.Activation('relu')(output_block_2)
 82 | 
 83 |         # BLOCK 3
 84 | 
 85 |         conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2)
 86 |         conv_x = keras.layers.BatchNormalization()(conv_x)
 87 |         conv_x = keras.layers.Activation('relu')(conv_x)
 88 | 
 89 |         conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
 90 |         conv_y = keras.layers.BatchNormalization()(conv_y)
 91 |         conv_y = keras.layers.Activation('relu')(conv_y)
 92 | 
 93 |         conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
 94 |         conv_z = keras.layers.BatchNormalization()(conv_z)
 95 | 
 96 |         # no need to expand channels because they are equal
 97 |         shortcut_y = keras.layers.BatchNormalization()(output_block_2)
 98 | 
 99 |         output_block_3 = keras.layers.add([shortcut_y, conv_z])
100 |         output_block_3 = keras.layers.Activation('relu')(output_block_3)
101 | 
102 |         # FINAL
103 | 
104 |         gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3)
105 | 
106 |         output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)
107 | 
108 |         model = keras.models.Model(inputs=input_layer, outputs=output_layer)
109 | 
110 |         model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(),
111 |                       metrics=['accuracy'])
112 | 
113 |         reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=50, min_lr=0.0001)
114 | 
115 |         file_path = self.output_directory + 'best_model.hdf5'
116 | 
117 |         model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss',
118 |                                                            save_best_only=True)
119 | 
120 |         self.callbacks = [reduce_lr, model_checkpoint]
121 | 
122 |         return model
123 | 
124 |     def fit(self, x_train, y_train, x_val, y_val, y_true, nb_epochs=50):
125 |         if not tf.test.is_gpu_available:
126 |             print('error')
127 |             exit()
128 |         # x_val and y_val are only used to monitor the test loss and NOT for training
129 |         batch_size = 64
130 | 
131 |         mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))
132 | 
133 |         start_time = time.time()
134 | 
135 |         hist = self.model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,
136 |                               verbose=self.verbose, validation_data=(x_val, y_val), callbacks=self.callbacks)
137 | 
138 |         duration = time.time() - start_time
139 | 
140 |         self.model.save(self.output_directory + 'last_model.hdf5')
141 | 
142 |         y_pred = self.predict(x_val, y_true, x_train, y_train, y_val,
143 |                               return_df_metrics=False)
144 | 
145 |         # save predictions
146 |         np.save(self.output_directory + 'y_pred.npy', y_pred)
147 | 
148 |         # convert the predicted from binary to integer
149 |         y_pred = np.argmax(y_pred, axis=1)
150 | 
151 |         df_metrics = save_logs(self.output_directory, hist, y_pred, y_true, duration)
152 | 
153 |         keras.backend.clear_session()
154 | 
155 |         return df_metrics
156 | 
157 |     def fit_backdoor(self, x_train, y_train, x_val, y_val,
158 |                      pattern_generator, y_target=0, poison_rate=0.1,
159 |                      clean_label=True, batch_size=64, nb_epochs=50):
160 |         if not tf.test.is_gpu_available:
161 |             print('error')
162 |             exit()
163 |         # x_val and y_val are only used to monitor the test loss and NOT for training
164 |         mini_batch_size = int(min(x_train.shape[0] / 10, batch_size))
165 | 
166 |         start_time = time.time()
167 | 
168 |         duration = time.time() - start_time
169 | 
170 |         y_val_classlabel = np.argmax(y_val, axis=1)
171 | 
172 |         x_ASR, y_ASR = pattern_generator(x_val, y_val, y_target, poison_rate=1.0,
173 |                                          clean_label=False, one_hot=True, exclude_target=True)
174 |         x_ASR_train, y_ASR_train = pattern_generator(x_train, y_train, y_target, poison_rate=1.0,
175 |                                                      clean_label=False, one_hot=True, exclude_target=True)
176 |         eval_val_ASR = callback_val_ASR(x_ASR, y_ASR, x_ASR_train, y_ASR_train)
177 | 
178 |         for e in range(500):
179 |             x_train_backdoor, y_train_backdoor = pattern_generator(x_train, y_train, y_target,
180 |                                                                    poison_rate=poison_rate, clean_label=clean_label,
181 |                                                                    one_hot=True)
182 |             print("Epoch:", e + 1)
183 |             hist = self.model.fit(x_train_backdoor, y_train_backdoor, batch_size=mini_batch_size, epochs=nb_epochs,
184 |                                   verbose=self.verbose, validation_data=(x_val, y_val),
185 |                                   callbacks=self.callbacks + [eval_val_ASR])
186 | 
187 |         y_pred = self.model.predict(x_val)
188 | 
189 |         # convert the predicted from binary to integer
190 |         y_pred_classlabel = np.argmax(y_pred, axis=1)
191 | 
192 |         # Backdoor attack
193 |         x_val_backdoor, y_val_backdoor = pattern_generator(x_val, y_val, y_target,
194 |                                                            poison_rate=1.0, clean_label=False, one_hot=False)
195 |         y_pred_backdoor = np.argmax(self.model.predict(x_val_backdoor), axis=1)
196 | 
197 |         df_metrics = save_logs(self.output_directory, hist, y_pred_classlabel, y_val_classlabel, duration,
198 |                                calc_ASR=True, y_pred_backdoor=y_pred_backdoor, y_target=y_val_backdoor)
199 | 
200 |         keras.backend.clear_session()
201 | 
202 |         return df_metrics
203 | 
204 |     def predict(self, x_test, y_true, x_train, y_train, y_test, return_df_metrics=True):
205 |         start_time = time.time()
206 |         model_path = self.output_directory + 'best_model.hdf5'
207 |         model = keras.models.load_model(model_path)
208 |         y_pred = model.predict(x_test)
209 |         if return_df_metrics:
210 |             y_pred = np.argmax(y_pred, axis=1)
211 |             df_metrics = calculate_metrics(y_true, y_pred, 0.0)
212 |             return df_metrics
213 |         else:
214 |             test_duration = time.time() - start_time
215 |             save_test_duration(self.output_directory + 'test_duration.csv', test_duration)
216 |             return y_pred
217 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from utils.utils import generate_results_csv
  2 | from utils.utils import create_directory
  3 | from utils.utils import read_dataset
  4 | from utils.utils import transform_mts_to_ucr_format
  5 | 
  6 | import os
  7 | import numpy as np
  8 | import sys
  9 | import sklearn
 10 | import utils
 11 | from utils.constants import CLASSIFIERS
 12 | from utils.constants import ARCHIVE_NAMES
 13 | from utils.constants import ITERATIONS
 14 | from utils.utils import read_all_datasets
 15 | 
 16 | 
 17 | def gen_vanilla_pattern(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False):
 18 |     # num of instance in target class < poison_rate * total num of instances
 19 |     INTENSITY = 0.02
 20 |     x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target)
 21 | 
 22 |     pattern_max = np.max(x, axis=1)
 23 |     pattern_max = pattern_max.reshape(pattern_max.shape[0], 1, pattern_max.shape[1])
 24 |     pattern_min = np.min(x, axis=1)
 25 |     pattern_min = pattern_min.reshape(pattern_min.shape[0], 1, pattern_min.shape[1])
 26 | 
 27 |     pattern = np.concatenate((pattern_max, pattern_min), axis=1)
 28 |     #pattern[:, 1, :] = -pattern[:, 1, :]
 29 |     pattern = np.tile(pattern, (int(INTENSITY * x.shape[1] / 2), 1))
 30 |     x_backdoor = x.copy()
 31 |     x_backdoor[:, 0:int(INTENSITY * x.shape[1] / 2) * 2, :] = pattern
 32 | 
 33 |     return x_backdoor, y_backdoor
 34 | 
 35 | 
 36 | def gen_powerline_noise(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False):
 37 |     PATTERN_FILE = './powerline_pattern.npy'
 38 |     x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target)
 39 |     pattern = np.load(PATTERN_FILE)
 40 |     pattern = (pattern - np.mean(pattern)) / np.std(pattern)
 41 | 
 42 |     if x.shape[1] < pattern.shape[0] * 5:
 43 |         pattern = pattern[::pattern.shape[0] // x.shape[1] * 5, 0]
 44 |     pattern = np.resize(pattern, (1, x.shape[1], 1)).repeat(x.shape[2], axis=2).repeat(x.shape[0], axis=0)
 45 |     normal_mul = (np.max(x, axis=1) - np.min(x, axis=1)).reshape(x.shape[0], 1, x.shape[2]).repeat(pattern.shape[1],
 46 |                                                                                                    axis=1) / 10
 47 | 
 48 |     pattern *= normal_mul
 49 |     x_backdoor = x.copy() + pattern
 50 | 
 51 |     return x_backdoor, y_backdoor
 52 | 
 53 | 
 54 | def generative_pattern(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False):
 55 |     global NOISE_GEN_INS
 56 |     noise_generator = NOISE_GEN_INS
 57 | 
 58 |     x, y_backdoor = process_instances(x, y, y_target, poison_rate, clean_label, one_hot, exclude_target)
 59 |     #noise_generator.model.load_weights('./results/fcn_generator/mts_archive/ECG/generator_final.hdf5')
 60 | 
 61 |     pattern = noise_generator.model(x)
 62 |     pattern = (pattern - pattern.numpy().mean()) / pattern.numpy().std()
 63 |     data_std = np.resize(x.std(axis=1), (x.shape[0], 1, x.shape[2])).repeat(x.shape[1], axis=1)
 64 |     data_mean = np.resize(x.mean(axis=1), (x.shape[0], 1, x.shape[2])).repeat(x.shape[1], axis=1)
 65 |     x_backdoor = x.copy() + pattern * data_std + data_mean
 66 |     print(f'Generative rate: {poison_rate}')
 67 |     return x_backdoor, y_backdoor
 68 | 
 69 | 
 70 | def process_instances(x, y, y_target, poison_rate, clean_label, one_hot=False, exclude_target=False, only_target=False):
 71 |     y_classlabel = np.argmax(y, axis=1)
 72 |     enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
 73 |     enc.fit(y_classlabel.reshape(-1, 1))
 74 | 
 75 |     if exclude_target:
 76 |         index_exclude = np.where(y_classlabel != y_target)[0]
 77 |         x = x[index_exclude]
 78 |         y_classlabel = y_classlabel[index_exclude]
 79 | 
 80 |     if clean_label:
 81 |         index = np.where(y_classlabel == y_target)[0]
 82 |         if len(index) / len(y_classlabel) < poison_rate:
 83 |             print('!!!ACTUAL POISON RATE:', len(index) / len(y_classlabel))
 84 | 
 85 |     else:
 86 |         index = np.where(y_classlabel != y_target)[0]
 87 |         if poison_rate < 1.0:
 88 |             index = np.random.choice(index, size=int(len(y_classlabel) * poison_rate), replace=False)
 89 | 
 90 |     y_backdoor = y_classlabel.copy()
 91 |     y_backdoor[index] = y_target
 92 | 
 93 |     if only_target:
 94 |         index_target = np.where(y_backdoor == y_target)[0]
 95 |         x = x[index_target]
 96 |         y_backdoor = y_backdoor[index_target]
 97 | 
 98 |     if one_hot:
 99 |         y_backdoor = enc.transform(y_backdoor.reshape(-1, 1)).toarray()
100 | 
101 |     return x, y_backdoor
102 | 
103 | 
104 | def fit_classifier(backdoor=None, clean_label=False):
105 |     x_train = datasets_dict[dataset_name][0]
106 |     y_train = datasets_dict[dataset_name][1]
107 |     x_test = datasets_dict[dataset_name][2]
108 |     y_test = datasets_dict[dataset_name][3]
109 | 
110 |     nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))
111 | 
112 |     # transform the labels from integers to one hot vectors
113 |     enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
114 |     enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
115 |     y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
116 |     y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
117 | 
118 |     # save orignal y because later we will use binary
119 |     y_test_classlabel = np.argmax(y_test, axis=1)
120 | 
121 |     if len(x_train.shape) == 2:  # if univariate
122 |         # add a dimension to make it multivariate with one dimension 
123 |         x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
124 |         x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
125 | 
126 |     input_shape = x_train.shape[1:]
127 | 
128 |     classifier = create_classifier(classifier_name, input_shape, nb_classes, output_directory)
129 | 
130 |     # print(dataset_name, x_train.shape)
131 | 
132 |     if backdoor is None:
133 |         classifier.fit(x_train, y_train, x_test, y_test, y_test_classlabel)
134 | 
135 |     elif backdoor == 'vanilla':
136 |         classifier.fit_backdoor(x_train, y_train, x_test, y_test,
137 |                                 gen_vanilla_pattern, y_target=0, poison_rate=0.1,
138 |                                 clean_label=clean_label)
139 |     elif backdoor == 'powerline':
140 |         classifier.fit_backdoor(x_train, y_train, x_test, y_test,
141 |                                 gen_powerline_noise, y_target=0, poison_rate=0.1,
142 |                                 clean_label=clean_label)
143 |     elif backdoor == 'generator':
144 |         if classifier_name == 'fcn':
145 |             from models import noise_gan
146 |         if classifier_name == 'resnet':
147 |             if archive_name == 'mts_archive':
148 |                 from models import noise_gan_expdim as noise_gan
149 |             elif archive_name == 'UCRArchive_2018':
150 |                 from models import noise_gan_3L as noise_gan
151 |         noise_generator = noise_gan.Classifier_Noise_GAN(output_directory, input_shape, verbose=False)
152 |         noise_generator.fit(x_train, y_train, x_test, y_test, y_test_classlabel, classifier, process_instances,
153 |                             y_target=0, poison_rate=0.1, clean_label=clean_label)
154 |     elif backdoor == 'generative_test':
155 |         global NOISE_GEN_INS
156 |         from models import noise_gan
157 |         NOISE_GEN_INS = noise_gan.Classifier_Noise_GAN(output_directory, input_shape, verbose=False)
158 | 
159 |     else:
160 |         print('NOT IMPLEMENTED!!!')
161 |         return None
162 | 
163 | 
164 | def create_classifier(classifier_name, input_shape, nb_classes, output_directory, verbose=True):
165 |     if classifier_name == 'fcn':
166 |         from models import fcn
167 |         return fcn.Classifier_FCN(output_directory, input_shape, nb_classes, verbose)
168 |     if classifier_name == 'resnet':
169 |         from models import resnet
170 |         return resnet.Classifier_RESNET(output_directory, input_shape, nb_classes, verbose)
171 | 
172 | 
173 | ############################################### main
174 | 
175 | # change this directory for your machine
176 | root_dir = '.'
177 | 
178 | if sys.argv[1] in ['run_baseline', 'run_backdoor']:
179 |     if sys.argv[1] == 'run_backdoor':
180 |         attack_method = sys.argv[2]
181 |         result_string = '_' + attack_method
182 |     else:
183 |         attack_method = None
184 |         result_string = ''
185 |     for classifier_name in CLASSIFIERS[0:]:
186 |         print('classifier_name', classifier_name)
187 | 
188 |         for archive_name in ARCHIVE_NAMES[1:]:
189 |             print('\tarchive_name', archive_name)
190 | 
191 |             datasets_dict = read_all_datasets(root_dir, archive_name)
192 | 
193 |             for iter in range(ITERATIONS):
194 |                 print('\t\titer', iter)
195 | 
196 |                 trr = ''
197 |                 if iter != 0:
198 |                     trr = '_itr_' + str(iter)
199 | 
200 |                 tmp_output_directory = root_dir + '/results/' + classifier_name + result_string + '/' + archive_name + trr + '/'
201 | 
202 |                 for dataset_name in utils.constants.dataset_names_for_archive[archive_name]:
203 |                     print('\t\t\tdataset_name: ', dataset_name)
204 | 
205 |                     output_directory = tmp_output_directory + dataset_name + '/'
206 | 
207 |                     create_directory(output_directory)
208 |                     if sys.argv[2] == 'generator':
209 |                         create_directory(output_directory + 'generator_save/')
210 |                         create_directory(output_directory + 'backdoor_save/')
211 | 
212 |                     fit_classifier(backdoor=attack_method)
213 | 
214 |                     print('\t\t\t\tDONE')
215 | 
216 |                     # the creation of this directory means
217 |                     create_directory(output_directory + '/DONE')
218 | 
219 | elif sys.argv[1] == 'transform_mts_to_ucr_format':
220 |     transform_mts_to_ucr_format()
221 | elif sys.argv[1] == 'generate_results_csv':
222 |     res = generate_results_csv('results.csv', root_dir)
223 |     print(res.to_string())
224 | else:
225 |     # this is the code used to launch an experiment on a dataset
226 |     archive_name = sys.argv[1]
227 |     dataset_name = sys.argv[2]
228 |     classifier_name = sys.argv[3]
229 |     itr = sys.argv[4]
230 | 
231 |     if itr == '_itr_0':
232 |         itr = ''
233 | 
234 |     output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + itr + '/' + \
235 |                        dataset_name + '/'
236 | 
237 |     test_dir_df_metrics = output_directory + 'df_metrics.csv'
238 | 
239 |     print('Method: ', archive_name, dataset_name, classifier_name, itr)
240 | 
241 |     if os.path.exists(test_dir_df_metrics):
242 |         print('Already done')
243 |     else:
244 | 
245 |         create_directory(output_directory)
246 |         datasets_dict = read_dataset(root_dir, archive_name, dataset_name)
247 | 
248 |         fit_classifier()
249 | 
250 |         print('DONE')
251 | 
252 |         # the creation of this directory means
253 |         create_directory(output_directory + '/DONE')
254 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from builtins import print
  2 | import numpy as np
  3 | import pandas as pd
  4 | import matplotlib
  5 | 
  6 | matplotlib.use('agg')
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | matplotlib.rcParams['font.family'] = 'sans-serif'
 10 | matplotlib.rcParams['font.sans-serif'] = 'Arial'
 11 | import os
 12 | import operator
 13 | 
 14 | import utils
 15 | 
 16 | from utils.constants import UNIVARIATE_DATASET_NAMES as DATASET_NAMES
 17 | from utils.constants import UNIVARIATE_DATASET_NAMES_2018 as DATASET_NAMES_2018
 18 | from utils.constants import ARCHIVE_NAMES as ARCHIVE_NAMES
 19 | from utils.constants import CLASSIFIERS
 20 | from utils.constants import ITERATIONS
 21 | from utils.constants import MTS_DATASET_NAMES
 22 | 
 23 | from sklearn.metrics import accuracy_score
 24 | from sklearn.metrics import precision_score
 25 | from sklearn.metrics import recall_score
 26 | from sklearn.preprocessing import LabelEncoder
 27 | 
 28 | from scipy.interpolate import interp1d
 29 | from scipy.io import loadmat
 30 | 
 31 | 
 32 | def readucr(filename):
 33 |     data = np.loadtxt(filename, delimiter=',')
 34 |     Y = data[:, 0]
 35 |     X = data[:, 1:]
 36 |     return X, Y
 37 | 
 38 | 
 39 | def create_directory(directory_path):
 40 |     if os.path.exists(directory_path):
 41 |         return None
 42 |     else:
 43 |         try:
 44 |             os.makedirs(directory_path)
 45 |         except:
 46 |             # in case another machine created the path meanwhile !:(
 47 |             return None
 48 |         return directory_path
 49 | 
 50 | 
 51 | def reate_path(root_dir, classifier_name, archive_name):
 52 |     output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + '/'
 53 |     if os.path.exists(output_directory):
 54 |         return None
 55 |     else:
 56 |         os.makedirs(output_directory)
 57 |         return output_directory
 58 | 
 59 | 
 60 | def read_dataset(root_dir, archive_name, dataset_name):
 61 |     datasets_dict = {}
 62 |     cur_root_dir = root_dir.replace('-temp', '')
 63 | 
 64 |     if archive_name == 'mts_archive':
 65 |         file_name = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
 66 |         x_train = np.load(file_name + 'x_train.npy')
 67 |         y_train = np.load(file_name + 'y_train.npy')
 68 |         x_test = np.load(file_name + 'x_test.npy')
 69 |         y_test = np.load(file_name + 'y_test.npy')
 70 | 
 71 |         datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
 72 |                                        y_test.copy())
 73 | 
 74 |     elif archive_name == 'UCRArchive_2018':
 75 |         root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
 76 |         df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None)
 77 | 
 78 |         df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None)
 79 | 
 80 |         y_train = df_train.values[:, 0]
 81 |         y_test = df_test.values[:, 0]
 82 | 
 83 |         x_train = df_train.drop(columns=[0])
 84 |         x_test = df_test.drop(columns=[0])
 85 | 
 86 |         x_train.columns = range(x_train.shape[1])
 87 |         x_test.columns = range(x_test.shape[1])
 88 | 
 89 |         x_train = x_train.values
 90 |         x_test = x_test.values
 91 | 
 92 |         # znorm
 93 |         std_ = x_train.std(axis=1, keepdims=True)
 94 |         std_[std_ == 0] = 1.0
 95 |         x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_
 96 | 
 97 |         std_ = x_test.std(axis=1, keepdims=True)
 98 |         std_[std_ == 0] = 1.0
 99 |         x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
100 | 
101 |         datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
102 |                                        y_test.copy())
103 |     else:
104 |         file_name = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' + dataset_name
105 |         x_train, y_train = readucr(file_name + '_TRAIN')
106 |         x_test, y_test = readucr(file_name + '_TEST')
107 |         datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
108 |                                        y_test.copy())
109 | 
110 |     return datasets_dict
111 | 
112 | 
113 | def read_all_datasets(root_dir, archive_name, split_val=False):
114 |     datasets_dict = {}
115 |     cur_root_dir = root_dir.replace('-temp', '')
116 |     dataset_names_to_sort = []
117 | 
118 |     if archive_name == 'mts_archive':
119 | 
120 |         for dataset_name in MTS_DATASET_NAMES:
121 |             root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
122 | 
123 |             x_train = np.load(root_dir_dataset + 'x_train.npy')
124 |             y_train = np.load(root_dir_dataset + 'y_train.npy')
125 |             x_test = np.load(root_dir_dataset + 'x_test.npy')
126 |             y_test = np.load(root_dir_dataset + 'y_test.npy')
127 | 
128 |             datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
129 |                                            y_test.copy())
130 |     elif archive_name == 'UCRArchive_2018':
131 |         for dataset_name in DATASET_NAMES_2018:
132 |             root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
133 | 
134 |             df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None)
135 | 
136 |             df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None)
137 | 
138 |             y_train = df_train.values[:, 0]
139 |             y_test = df_test.values[:, 0]
140 | 
141 |             x_train = df_train.drop(columns=[0])
142 |             x_test = df_test.drop(columns=[0])
143 | 
144 |             x_train.columns = range(x_train.shape[1])
145 |             x_test.columns = range(x_test.shape[1])
146 | 
147 |             x_train = x_train.values
148 |             x_test = x_test.values
149 | 
150 |             # znorm
151 |             std_ = x_train.std(axis=1, keepdims=True)
152 |             std_[std_ == 0] = 1.0
153 |             x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_
154 | 
155 |             std_ = x_test.std(axis=1, keepdims=True)
156 |             std_[std_ == 0] = 1.0
157 |             x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
158 | 
159 |             datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
160 |                                            y_test.copy())
161 | 
162 |     else:
163 |         for dataset_name in DATASET_NAMES:
164 |             root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
165 |             file_name = root_dir_dataset + dataset_name
166 |             x_train, y_train = readucr(file_name + '_TRAIN')
167 |             x_test, y_test = readucr(file_name + '_TEST')
168 | 
169 |             datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
170 |                                            y_test.copy())
171 | 
172 |             dataset_names_to_sort.append((dataset_name, len(x_train)))
173 | 
174 |         dataset_names_to_sort.sort(key=operator.itemgetter(1))
175 | 
176 |         for i in range(len(DATASET_NAMES)):
177 |             DATASET_NAMES[i] = dataset_names_to_sort[i][0]
178 | 
179 |     return datasets_dict
180 | 
181 | 
182 | def get_func_length(x_train, x_test, func):
183 |     if func == min:
184 |         func_length = np.inf
185 |     else:
186 |         func_length = 0
187 | 
188 |     n = x_train.shape[0]
189 |     for i in range(n):
190 |         func_length = func(func_length, x_train[i].shape[1])
191 | 
192 |     n = x_test.shape[0]
193 |     for i in range(n):
194 |         func_length = func(func_length, x_test[i].shape[1])
195 | 
196 |     return func_length
197 | 
198 | 
199 | def transform_to_same_length(x, n_var, max_length):
200 |     n = x.shape[0]
201 | 
202 |     # the new set in ucr form np array
203 |     ucr_x = np.zeros((n, max_length, n_var), dtype=np.float64)
204 | 
205 |     # loop through each time series
206 |     for i in range(n):
207 |         mts = x[i]
208 |         curr_length = mts.shape[1]
209 |         idx = np.array(range(curr_length))
210 |         idx_new = np.linspace(0, idx.max(), max_length)
211 |         for j in range(n_var):
212 |             ts = mts[j]
213 |             # linear interpolation
214 |             f = interp1d(idx, ts, kind='cubic')
215 |             new_ts = f(idx_new)
216 |             ucr_x[i, :, j] = new_ts
217 | 
218 |     return ucr_x
219 | 
220 | 
221 | def transform_mts_to_ucr_format():
222 |     mts_root_dir = './archives/_mts_mat/'
223 |     mts_out_dir = './archives/mts_archive/'
224 |     for dataset_name in MTS_DATASET_NAMES:
225 |         print('dataset_name', dataset_name)
226 | 
227 |         out_dir = mts_out_dir + dataset_name + '/'
228 | 
229 |         # if create_directory(out_dir) is None:
230 |         #     print('Already_done')
231 |         #     continue
232 | 
233 |         a = loadmat(mts_root_dir + dataset_name + '/' + dataset_name + '.mat')
234 |         a = a['mts']
235 |         a = a[0, 0]
236 | 
237 |         dt = a.dtype.names
238 |         dt = list(dt)
239 | 
240 |         for i in range(len(dt)):
241 |             if dt[i] == 'train':
242 |                 x_train = a[i].reshape(max(a[i].shape))
243 |             elif dt[i] == 'test':
244 |                 x_test = a[i].reshape(max(a[i].shape))
245 |             elif dt[i] == 'trainlabels':
246 |                 y_train = a[i].reshape(max(a[i].shape))
247 |             elif dt[i] == 'testlabels':
248 |                 y_test = a[i].reshape(max(a[i].shape))
249 | 
250 |         # x_train = a[1][0]
251 |         # y_train = a[0][:,0]
252 |         # x_test = a[3][0]
253 |         # y_test = a[2][:,0]
254 | 
255 |         n_var = x_train[0].shape[0]
256 | 
257 |         max_length = get_func_length(x_train, x_test, func=max)
258 |         min_length = get_func_length(x_train, x_test, func=min)
259 | 
260 |         print(dataset_name, 'max', max_length, 'min', min_length)
261 |         print()
262 |         # continue
263 | 
264 |         x_train = transform_to_same_length(x_train, n_var, max_length)
265 |         x_test = transform_to_same_length(x_test, n_var, max_length)
266 | 
267 |         if os.path.exists(out_dir):
268 |             return None
269 |         else:
270 |             os.makedirs(out_dir)
271 | 
272 |         # save them
273 |         np.save(out_dir + 'x_train.npy', x_train)
274 |         np.save(out_dir + 'y_train.npy', y_train)
275 |         np.save(out_dir + 'x_test.npy', x_test)
276 |         np.save(out_dir + 'y_test.npy', y_test)
277 | 
278 |         print('Done')
279 | 
280 | 
281 | def calculate_metrics(y_true, y_pred, duration, y_true_val=None, y_pred_val=None):
282 |     res = pd.DataFrame(data=np.zeros((1, 4), dtype=np.float), index=[0],
283 |                        columns=['precision', 'accuracy', 'recall', 'duration'])
284 |     res['precision'] = precision_score(y_true, y_pred, average='macro')
285 |     res['accuracy'] = accuracy_score(y_true, y_pred)
286 | 
287 |     if not y_true_val is None:
288 |         # this is useful when transfer learning is used with cross validation
289 |         res['accuracy_val'] = accuracy_score(y_true_val, y_pred_val)
290 | 
291 |     res['recall'] = recall_score(y_true, y_pred, average='macro')
292 |     res['duration'] = duration
293 |     return res
294 | 
295 | 
296 | def save_test_duration(file_name, test_duration):
297 |     res = pd.DataFrame(data=np.zeros((1, 1), dtype=np.float), index=[0],
298 |                        columns=['test_duration'])
299 |     res['test_duration'] = test_duration
300 |     res.to_csv(file_name, index=False)
301 | 
302 | 
303 | def generate_results_csv(output_file_name, root_dir):
304 |     res = pd.DataFrame(data=np.zeros((0, 7), dtype=np.float), index=[],
305 |                        columns=['classifier_name', 'archive_name', 'dataset_name',
306 |                                 'precision', 'accuracy', 'recall', 'duration'])
307 |     for classifier_name in CLASSIFIERS:
308 |         for archive_name in ARCHIVE_NAMES:
309 |             datasets_dict = read_all_datasets(root_dir, archive_name)
310 |             for it in range(ITERATIONS):
311 |                 curr_archive_name = archive_name
312 |                 if it != 0:
313 |                     curr_archive_name = curr_archive_name + '_itr_' + str(it)
314 |                 for dataset_name in datasets_dict.keys():
315 |                     output_dir = root_dir + '/results/' + classifier_name + '/' \
316 |                                  + curr_archive_name + '/' + dataset_name + '/' + 'df_metrics.csv'
317 |                     if not os.path.exists(output_dir):
318 |                         continue
319 |                     df_metrics = pd.read_csv(output_dir)
320 |                     df_metrics['classifier_name'] = classifier_name
321 |                     df_metrics['archive_name'] = archive_name
322 |                     df_metrics['dataset_name'] = dataset_name
323 |                     res = pd.concat((res, df_metrics), axis=0, sort=False)
324 | 
325 |     res.to_csv(root_dir + output_file_name, index=False)
326 |     # aggreagte the accuracy for iterations on same dataset
327 |     res = pd.DataFrame({
328 |         'accuracy': res.groupby(
329 |             ['classifier_name', 'archive_name', 'dataset_name'])['accuracy'].mean()
330 |     }).reset_index()
331 | 
332 |     return res
333 | 
334 | 
335 | def plot_epochs_metric(hist, file_name, metric='loss'):
336 |     plt.figure()
337 |     plt.plot(hist.history[metric])
338 |     plt.plot(hist.history['val_' + metric])
339 |     plt.title('model ' + metric)
340 |     plt.ylabel(metric, fontsize='large')
341 |     plt.xlabel('epoch', fontsize='large')
342 |     plt.legend(['train', 'val'], loc='upper left')
343 |     plt.savefig(file_name, bbox_inches='tight')
344 |     plt.close()
345 | 
346 | 
347 | def save_logs(output_directory, hist, y_pred, y_true, duration,
348 |               lr=True, y_true_val=None, y_pred_val=None,
349 |               calc_ASR=False, y_pred_backdoor=None, y_target=None):
350 |     hist_df = pd.DataFrame(hist.history)
351 |     hist_df.to_csv(output_directory + 'history.csv', index=False)
352 | 
353 |     df_metrics = calculate_metrics(y_true, y_pred, duration, y_true_val, y_pred_val)
354 |     df_metrics.to_csv(output_directory + 'df_metrics.csv', index=False)
355 | 
356 |     index_best_model = hist_df['val_accuracy'].idxmax()
357 |     row_best_model = hist_df.loc[index_best_model]
358 | 
359 |     df_best_model = pd.DataFrame(data=np.zeros((1, 6), dtype=np.float), index=[0],
360 |                                  columns=['best_model_train_loss', 'best_model_val_loss', 'best_model_train_acc',
361 |                                           'best_model_val_acc', 'best_model_learning_rate', 'best_model_nb_epoch'])
362 | 
363 |     if calc_ASR:
364 |         df_best_model['best_model_ASR'] = accuracy_score(y_pred_backdoor, y_target)
365 | 
366 |     df_best_model['best_model_train_loss'] = row_best_model['loss']
367 |     df_best_model['best_model_val_loss'] = row_best_model['val_loss']
368 |     df_best_model['best_model_train_acc'] = row_best_model['accuracy']
369 |     df_best_model['best_model_val_acc'] = row_best_model['val_accuracy']
370 |     if lr == True:
371 |         df_best_model['best_model_learning_rate'] = row_best_model['lr']
372 |     df_best_model['best_model_nb_epoch'] = index_best_model
373 | 
374 |     df_best_model.to_csv(output_directory + 'df_best_model.csv', index=False)
375 | 
376 |     # for FCN there is no hyperparameters fine tuning - everything is static in code
377 | 
378 |     return df_metrics
379 | 


--------------------------------------------------------------------------------