├── README.md └── anonimyGAN.py /README.md: -------------------------------------------------------------------------------- 1 | # Prerequistes 2 | - Scikit-learn library package (version 0.18) 3 | - Keras library package (version 2.0.6) 4 | - tensorflow (1.11.0) 5 | -------------------------------------------------------------------------------- /anonimyGAN.py: -------------------------------------------------------------------------------- 1 | from keras.layers import BatchNormalization, Activation, ZeroPadding2D 2 | from keras.layers.advanced_activations import LeakyReLU 3 | from keras.layers import Dropout, Input 4 | from keras.layers import Dense, Flatten, GlobalMaxPooling1D 5 | from keras.layers.convolutional import Convolution1D, MaxPooling1D 6 | from keras.models import Sequential, Model 7 | from keras.optimizers import Adam, SGD 8 | 9 | from scipy import stats 10 | from scipy.signal import butter, lfilter, freqz 11 | 12 | from sklearn import preprocessing 13 | from sklearn.model_selection import StratifiedKFold 14 | from sklearn.preprocessing import MaxAbsScaler 15 | from sklearn.model_selection import KFold 16 | from sklearn.metrics import roc_curve, auc 17 | 18 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) 19 | import numpy as np 20 | import tensorflow as tf 21 | 22 | import matplotlib.pyplot as plt 23 | 24 | import random 25 | import sys 26 | import os 27 | 28 | GPUID = 1 29 | os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUID) 30 | 31 | class ANOMIGAN(): 32 | def __init__(self): 33 | self.testfile = #TEST_FILE 34 | self.data = #TRAIN_FILE 35 | self.num_feature = 30 36 | self.X_test = 0 37 | self.X_gen = 0 38 | self.scaler = MaxAbsScaler() 39 | self.input_shape = (-1,-1) 40 | self.latent_dim = 100 41 | self.C= tf.placeholder(tf.float32, [None, 512]) 42 | self.C_prime = tf.placeholder(tf.float32, [None, 512]) 43 | 44 | # models 45 | self.generator = None 46 | self.discriminator = None 47 | self.preTrainedModel = Sequential() 48 | 49 | # hyperparameter for loss 50 | self.lambda_a = 0.5 51 | self.lambda_b = 1 - self.lambda_a 52 | self.confidence = 1.0 53 | self.batch_size = 32 54 | self.num_variance = 5 55 | 56 | # temp Lists 57 | self.bList = [] 58 | self.aList = [] 59 | 60 | self.bFpr = [] 61 | self.bTpr = [] 62 | self.bThresholds = [] 63 | 64 | self.aFpr = [] 65 | self.aTpr = [] 66 | self.aThresholds = [] 67 | 68 | self.t_var = {} 69 | 70 | ######## drawring functions ############### 71 | def butter_lowpass_filter(self, data, cutoff, fs, order=5): 72 | b, a = self.butter_lowpass(cutoff, fs, order=order) 73 | y = lfilter(b, a, data) 74 | return y 75 | 76 | def butter_lowpass(self, cutoff, fs, order=5): 77 | nyq = 0.5 * fs 78 | normal_cutoff = cutoff / nyq 79 | b, a = butter(order, normal_cutoff, btype='low', analog=False) 80 | return b, a 81 | 82 | def drawLoss(self, S_loss_list, E_loss_list): 83 | # Filter requirements. 84 | order = 6 85 | fs = 30.0 # sample rate, Hz 86 | cutoff = 3.667 # desired cutoff frequency of the filter, Hz 87 | 88 | s_filter = self.butter_lowpass_filter(S_loss_list, cutoff, fs, order) 89 | d_filter = self.butter_lowpass_filter(E_loss_list, cutoff, fs, order) 90 | 91 | ylim = [0,3] 92 | f = plt.figure(tight_layout=True) 93 | ax = f.add_subplot(111, ylim=ylim) 94 | ax.set_xlabel("Epochs",fontsize=20) 95 | ax.set_ylabel("Loss",fontsize=20) 96 | ax.plot(s_filter, label='Discriminator', color='blue', linewidth=1, linestyle='--' ) 97 | ax.plot(d_filter, label='Encoder', color='green', linewidth=1, alpha=0.5 ) 98 | ax.legend(loc=1,fontsize=15) 99 | 100 | plt.show() 101 | 102 | def drawAccuracyPlot(self): 103 | ylim = [0,105] 104 | xlim = [0, 10] 105 | f = plt.figure(tight_layout=True) 106 | ax = f.add_subplot(111, ylim=ylim) 107 | ax.set_xlabel("Random Iterative Steps",fontsize=20) 108 | ax.set_ylabel("Accuracy",fontsize=20) 109 | plt.plot(self.bList, label='Original Samples', color='blue', linewidth=1, linestyle='--' ) 110 | plt.plot(self.aList, label='Generated Samples', color='green', linewidth=1, ) 111 | plt.legend() 112 | 113 | plt.show() 114 | 115 | def drawRocPlot(self): 116 | fpr1, tpr1, thresholds1 = self.bFpr, self.bTpr, self.bThresholds 117 | roc_auc1 = auc(fpr1, tpr1) 118 | 119 | fpr2, tpr2, thresholds2 = self.aFpr, self.aTpr, self.aThresholds 120 | roc_auc2 = auc(fpr2, tpr2) 121 | 122 | plt.figure() 123 | plt.plot(fpr1, tpr1, color='blue', linestyle='--', linewidth=2, label='ROC curve with original samples (area = %0.2f)' % roc_auc1) 124 | plt.plot(fpr2, tpr2, color='green', linewidth=1, label='ROC curve with generated samples (area = %0.2f)' % roc_auc2) 125 | plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle=':') 126 | plt.xlim([0.0, 1.0]) 127 | plt.ylim([0.0, 1.05]) 128 | plt.xlabel('False Positive Rate') 129 | plt.ylabel('True Positive Rate') 130 | plt.legend(loc="lower right") 131 | plt.show() 132 | 133 | ######## target classifer model functions ############### 134 | def get_pretrainModel(self): 135 | #self.preTrainedModel ; # USE API to get target pretrained Model 136 | 137 | def get_target_features(self): 138 | X = 1 # Define input features of X 139 | Y = 1 # Define label of input features of X 140 | return X, Y 141 | 142 | ######## AnomiGAN model functions ############### 143 | def discriminator(self, x): 144 | with tf.variable_scope("discriminator"): 145 | x_reshaped = tf.reshape(x, (-1, self.num_feature, 1)) 146 | conv1 = tf.layers.conv1d(x_reshaped, filters=32, kernel_size=4, 147 | strides=2, 148 | padding='VALID', 149 | activation=tf.nn.relu) 150 | conv2 = tf.layers.conv1d(conv1, filters=10, 151 | kernel_size=2, 152 | strides=1, 153 | padding='SAME', 154 | activation=tf.nn.tanh) 155 | conv3 = tf.layers.conv1d(conv1, filters=20, 156 | kernel_size=2, 157 | strides=1, 158 | padding='SAME', 159 | activation=tf.nn.tanh) 160 | conv4 = tf.layers.conv1d(conv1, filters=30, 161 | kernel_size=2, 162 | strides=1, 163 | padding='SAME', 164 | activation=tf.nn.tanh) 165 | flatten = tf.layers.flatten(conv4) 166 | out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu) 167 | return out 168 | 169 | def operation_mode(self, x, message): 170 | if mode == 1: 171 | dtype = x.dtype 172 | x_btensor = tf.cast(x, tf.int32) 173 | m_btensor = tf.cast(message, tf.int32) 174 | xor = tf.bitwise.bitwise_xor(x_btensor, m_btensor) 175 | random = tf.cast(xor, dtype) 176 | else: 177 | random = x*message % np.amax(x) 178 | 179 | def encoder(self, x, message, mode): 180 | with tf.variable_scope("encoder"): 181 | random = operation_mode(x, message, mode) 182 | x_flatten = tf.layers.flatten(random) 183 | fc1 = tf.reshape(x_flatten, (-1, self.num_feature, 1)) 184 | conv1d_t1 = tf.layers.conv1d(fc1, filters=64, kernel_size=4, 185 | strides=2, 186 | padding='VALID', 187 | activation=tf.nn.relu) 188 | bn1 = tf.layers.batch_normalization(conv1d_t1) 189 | conv1d_t2 = tf.layers.conv1d(bn1, filters=32, 190 | kernel_size=2, 191 | strides=1, 192 | padding='SAME', 193 | activation=tf.nn.tanh) 194 | 195 | bn2 = tf.layers.batch_normalization(conv1d_t2) 196 | conv1d_t3 = tf.layers.conv1d(bn2, filters=16, 197 | kernel_size=2, 198 | strides=1, 199 | padding='SAME', 200 | activation=tf.nn.tanh) 201 | 202 | bn3 = tf.layers.batch_normalization(conv1d_t3) 203 | conv1d_t4 = tf.layers.conv1d(bn3, filters=8, 204 | kernel_size=2, 205 | strides=1, 206 | padding='SAME', 207 | activation=tf.nn.tanh) 208 | 209 | bn4 = tf.layers.batch_normalization(conv1d_t4) 210 | conv1d_t5 = tf.layers.conv1d(bn4, filters=4, 211 | kernel_size=2, 212 | strides=1, 213 | padding='SAME', 214 | activation=tf.nn.tanh) 215 | 216 | bn5 = tf.layers.batch_normalization(conv1d_t5) 217 | conv1d_t6 = tf.layers.conv1d(bn5, filters=8, 218 | kernel_size=2, 219 | strides=1, 220 | padding='SAME', 221 | activation=tf.nn.tanh) 222 | 223 | bn6 = tf.layers.batch_normalization(conv1d_t6) 224 | conv1d_t7 = tf.layers.conv1d(bn6, filters=16, 225 | kernel_size=2, 226 | strides=1, 227 | padding='SAME', 228 | activation=tf.nn.tanh) 229 | 230 | bn7 = tf.layers.batch_normalization(conv1d_t7) 231 | conv1d_t8 = tf.layers.conv1d(bn7, filters=self.num_feature, 232 | kernel_size=2, 233 | strides=1, 234 | padding='SAME', 235 | activation=tf.nn.tanh) 236 | flatten = tf.layers.flatten(conv1d_t8) 237 | out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu) 238 | return out 239 | 240 | def get_solvers(self, learning_rate=1e-3, beta1=0.5): 241 | E_solver = tf.train.AdamOptimizer(learning_rate, beta1) 242 | S_solver = tf.train.AdamOptimizer(learning_rate, beta1) 243 | return E_solver, S_solver 244 | 245 | def train(self, sess, E_train_step, S_train_step, E_loss, S_loss, epochs=3000, batch_size=10): 246 | X, Y = self.get_target_features() 247 | for it in range(epochs): 248 | minibatch, labels = self.get_shuffle_batch(X, Y, batch_size) 249 | minibatch = minibatch.reshape(batch_size, -1) 250 | 251 | if epochs > (epochs - 2000): 252 | self.store_parameters(sess) 253 | 254 | #randomize original data 255 | fake = np.random.normal(0, 1, (batch_size, 30)) 256 | randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake}) 257 | loss = self.target_classifier(randomized, labels, batch_size) 258 | 259 | _, S_loss_curr = sess.run([S_train_step, S_loss], feed_dict={self.C:minibatch, self.random:fake, 260 | self.loss:loss}) 261 | 262 | _, E_loss_curr = sess.run([E_train_step, E_loss], feed_dict={self.C:minibatch, self.random:fake, 263 | self.loss:loss}) 264 | 265 | S_loss_list.append(S_loss_curr) 266 | E_loss_list.append(np.mean(E_loss_curr)) 267 | 268 | #self.drawLoss(S_loss_list, E_loss_list) 269 | print ("Train Finishied") 270 | 271 | def target_classifier(self, fake, fake_label, batch_size=32): 272 | cvscores = [] 273 | scores = self.preTrainedModel.evaluate(fake, fake_label, verbose=0) 274 | output = np.mean(scores[1]) 275 | return output 276 | 277 | def calculate_loss(self, C, C_prime, logit_real, logit_fake, loss): 278 | real_label = tf.ones_like(logit_real) 279 | fake_label = tf.zeros_like(logit_fake) 280 | 281 | loss_S_real = tf.nn.sigmoid_cross_entropy_with_logits( 282 | labels=real_label, logits=logit_real) 283 | loss_S_fake = tf.nn.sigmoid_cross_entropy_with_logits( 284 | labels=fake_label, logits=logit_fake) 285 | 286 | loss_S = (tf.reduce_mean(loss_S_real) + (tf.reduce_mean(loss_S_fake)* (1-tf.reduce_mean(loss)))) 287 | C_flatten = tf.layers.flatten(C) 288 | C_prime_flatten = tf.layers.flatten(C_prime) 289 | distance = (tf.sqrt(tf.reduce_sum(tf.square(C_flatten - C_prime_flatten), axis=1))) 290 | distance = tf.reduce_mean(distance) 291 | loss_E = (self.lambda_a * (distance*self.confidence) ) + (self.lambda_b * loss_S) 292 | return loss_E, loss_S 293 | 294 | def get_session(self): 295 | config = tf.ConfigProto() 296 | config.gpu_options.allow_growth = True 297 | session = tf.Session(config=config) 298 | return session 299 | 300 | def get_shuffle_batch(self, X, Y, batch_size=32): 301 | idx = random.randint(1, len(X)-batch_size) 302 | return X[idx:idx+batch_size], Y[idx:idx+batch_size] 303 | 304 | def get_next_batch(self, X, Y, start, end, batch_size=32): 305 | X_train = [] 306 | Y_train = [] 307 | start = 0 308 | end = batch_size 309 | for i in range(len(X)-batch_size): 310 | start+=i 311 | end+=i 312 | X_train.append(X[start:end]) 313 | Y_train.append(Y[start:end]) 314 | return X_train, Y_train 315 | 316 | def anonymize_sample(self, sess, batch_size): 317 | minibatch, labels = self.get_target_features() 318 | batch_size = len(Y) 319 | fake = np.random.normal(0, 1, (batch_size, 30)) 320 | randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake}) 321 | scores1 = self.preTrainedModel.evaluate(randomized, Y, verbose=0) 322 | cvscores1.append(scores1[1] * 100) 323 | self.get_inversed(randomized) 324 | 325 | def get_inversed(self, normalized): 326 | np.set_printoptions(precision=6, suppress=True) 327 | inversed = self.scaler.inverse_transform(normalized) 328 | np.savetxt('fileout.txt', inversed, delimiter=',', fmt='%1.3f') 329 | return inversed 330 | 331 | def store_parameters(self, sess): 332 | for i in range(1, 7): 333 | name = 'Encoder/conv1d_' + i + '/kernel:0' 334 | conv = sess.graph.get_tensor_by_name(name) 335 | self.t_var[name] = conv 336 | self.t_var.append(name, sess.run(conv)) 337 | 338 | def add_variance(self, sess, num_var): 339 | for i in range(num_var): 340 | num = random.randint(1, 7) 341 | name = 'Encoder/conv1d_' + num + '/kernel:0' 342 | conv = sess.graph.get_tensor_by_name(name) 343 | var = np.var(self.t_var.get(name)), axis=0) 344 | sess.run(tf.assign(conv, conv + var)) 345 | 346 | def get_pvalue(self, a, b): 347 | a = a.flatten() 348 | b = b.flatten() 349 | t, p = stats.pearsonr(a,b) 350 | 351 | def main(self): 352 | self.get_pretrainModel() 353 | 354 | tf.reset_default_graph() 355 | 356 | self.C = tf.placeholder(tf.float32, [None, self.num_feature]) 357 | self.random = tf.placeholder(tf.float32, [None, self.num_feature]) 358 | 359 | self.C_prime = self.encoder(self.C, self.random, mode=2) 360 | self.loss = tf.placeholder(tf.float32) 361 | 362 | with tf.variable_scope("") as scope: 363 | logit_real = self.discriminator(self.C) 364 | scope.reuse_variables() 365 | logit_fake = self.discriminator(self.C_prime) 366 | 367 | 368 | encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder") 369 | steganalayzer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator") 370 | 371 | E_solver, S_solver = self.get_solvers() 372 | 373 | E_loss, S_loss = self.calculate_loss(self.C, self.C_prime, logit_real, logit_fake, self.loss) 374 | E_train_step = E_solver.minimize(E_loss, var_list=encoder_vars) 375 | S_train_step = E_solver.minimize(S_loss, var_list=steganalayzer_vars) 376 | 377 | tf.executing_eagerly() 378 | sess = self.get_session() 379 | sess.run(tf.global_variables_initializer()) 380 | self.train(sess, E_train_step, S_train_step, E_loss, S_loss) 381 | 382 | self.add_variance(sess, self.num_variance) 383 | self.anonymize_sample(sess, self.batch_size) 384 | 385 | 386 | if __name__ == '__main__': 387 | anomigan = ANOMIGAN() 388 | anomigan.main() 389 | --------------------------------------------------------------------------------