├── README.md
└── anonimyGAN.py


/README.md:
--------------------------------------------------------------------------------
1 | # Prerequistes
2 | - Scikit-learn library package  (version 0.18) 
3 | - Keras library package (version 2.0.6) 
4 | - tensorflow (1.11.0)
5 | 


--------------------------------------------------------------------------------
/anonimyGAN.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import BatchNormalization, Activation, ZeroPadding2D
  2 | from keras.layers.advanced_activations import LeakyReLU
  3 | from keras.layers import Dropout, Input
  4 | from keras.layers import Dense, Flatten, GlobalMaxPooling1D 
  5 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
  6 | from keras.models import Sequential, Model
  7 | from keras.optimizers import Adam, SGD
  8 | 
  9 | from scipy import stats
 10 | from scipy.signal import butter, lfilter, freqz
 11 | 
 12 | from sklearn import preprocessing
 13 | from sklearn.model_selection import StratifiedKFold
 14 | from sklearn.preprocessing import MaxAbsScaler
 15 | from sklearn.model_selection import KFold
 16 | from sklearn.metrics import roc_curve, auc
 17 | 
 18 | import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
 19 | import numpy as np
 20 | import tensorflow as tf
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | 
 24 | import random
 25 | import sys
 26 | import os
 27 | 
 28 | GPUID = 1
 29 | os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUID)
 30 | 
 31 | class ANOMIGAN():
 32 |     def __init__(self):
 33 |         self.testfile = #TEST_FILE 
 34 |         self.data = #TRAIN_FILE
 35 |         self.num_feature = 30
 36 |         self.X_test = 0 
 37 |         self.X_gen = 0 
 38 |         self.scaler = MaxAbsScaler() 
 39 |         self.input_shape = (-1,-1) 
 40 |         self.latent_dim = 100
 41 |         self.C= tf.placeholder(tf.float32, [None, 512])
 42 |         self.C_prime = tf.placeholder(tf.float32, [None, 512])
 43 | 
 44 |         # models
 45 |         self.generator = None
 46 |         self.discriminator = None
 47 |         self.preTrainedModel = Sequential()
 48 | 
 49 |         # hyperparameter for loss
 50 |         self.lambda_a = 0.5
 51 |         self.lambda_b = 1 - self.lambda_a
 52 |         self.confidence = 1.0
 53 |         self.batch_size = 32
 54 |         self.num_variance = 5
 55 | 
 56 |         # temp Lists
 57 |         self.bList = []
 58 |         self.aList = []
 59 | 
 60 |         self.bFpr = []
 61 |         self.bTpr = []
 62 |         self.bThresholds = []
 63 | 
 64 |         self.aFpr = []
 65 |         self.aTpr = []
 66 |         self.aThresholds = []
 67 | 
 68 |         self.t_var = {}
 69 | 
 70 | ######## drawring functions ###############
 71 |     def butter_lowpass_filter(self, data, cutoff, fs, order=5):
 72 |         b, a = self.butter_lowpass(cutoff, fs, order=order)
 73 |         y = lfilter(b, a, data)
 74 |         return y
 75 | 
 76 |     def butter_lowpass(self, cutoff, fs, order=5):
 77 |         nyq = 0.5 * fs
 78 |         normal_cutoff = cutoff / nyq
 79 |         b, a = butter(order, normal_cutoff, btype='low', analog=False)
 80 |         return b, a
 81 | 
 82 |     def drawLoss(self, S_loss_list, E_loss_list):
 83 |         # Filter requirements.
 84 |         order = 6
 85 |         fs = 30.0       # sample rate, Hz
 86 |         cutoff = 3.667  # desired cutoff frequency of the filter, Hz
 87 | 
 88 |         s_filter = self.butter_lowpass_filter(S_loss_list, cutoff, fs, order)
 89 |         d_filter = self.butter_lowpass_filter(E_loss_list, cutoff, fs, order)
 90 | 
 91 |         ylim = [0,3]
 92 |         f = plt.figure(tight_layout=True)  
 93 |         ax = f.add_subplot(111, ylim=ylim)
 94 |         ax.set_xlabel("Epochs",fontsize=20)
 95 |         ax.set_ylabel("Loss",fontsize=20)
 96 |         ax.plot(s_filter, label='Discriminator', color='blue', linewidth=1, linestyle='--' )
 97 |         ax.plot(d_filter, label='Encoder', color='green', linewidth=1, alpha=0.5 )
 98 |         ax.legend(loc=1,fontsize=15)
 99 | 
100 |         plt.show()
101 | 
102 |     def drawAccuracyPlot(self):
103 |         ylim = [0,105]
104 |         xlim = [0, 10]
105 |         f = plt.figure(tight_layout=True)  
106 |         ax = f.add_subplot(111, ylim=ylim)
107 |         ax.set_xlabel("Random Iterative Steps",fontsize=20)
108 |         ax.set_ylabel("Accuracy",fontsize=20)
109 |         plt.plot(self.bList, label='Original Samples', color='blue', linewidth=1, linestyle='--' )
110 |         plt.plot(self.aList, label='Generated Samples', color='green', linewidth=1, )
111 |         plt.legend()
112 | 
113 |         plt.show()
114 | 
115 |     def drawRocPlot(self):
116 |         fpr1, tpr1, thresholds1 = self.bFpr, self.bTpr, self.bThresholds   
117 |         roc_auc1 = auc(fpr1, tpr1)
118 |  
119 |         fpr2, tpr2, thresholds2 = self.aFpr, self.aTpr, self.aThresholds
120 |         roc_auc2 = auc(fpr2, tpr2)
121 | 
122 |         plt.figure()
123 |         plt.plot(fpr1, tpr1, color='blue', linestyle='--', linewidth=2, label='ROC curve with original samples (area = %0.2f)' % roc_auc1)
124 |         plt.plot(fpr2, tpr2, color='green', linewidth=1, label='ROC curve with generated samples (area = %0.2f)' % roc_auc2)
125 |         plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle=':')
126 |         plt.xlim([0.0, 1.0])
127 |         plt.ylim([0.0, 1.05])
128 |         plt.xlabel('False Positive Rate')
129 |         plt.ylabel('True Positive Rate')
130 |         plt.legend(loc="lower right")
131 |         plt.show()
132 | 
133 | ######## target classifer model functions ###############
134 |     def get_pretrainModel(self):
135 |         #self.preTrainedModel ; # USE API to get target pretrained Model
136 | 
137 |     def get_target_features(self):
138 |         X = 1 # Define input features of X
139 |         Y = 1 # Define label of input features of X
140 |         return X, Y 
141 | 
142 | ######## AnomiGAN model functions ###############
143 |     def discriminator(self, x):
144 |         with tf.variable_scope("discriminator"):
145 |             x_reshaped = tf.reshape(x, (-1, self.num_feature, 1))
146 |             conv1 = tf.layers.conv1d(x_reshaped, filters=32, kernel_size=4, 
147 |                                                         strides=2,
148 |                                                         padding='VALID',
149 |                                                         activation=tf.nn.relu)
150 |             conv2 = tf.layers.conv1d(conv1, filters=10,
151 |                                                        kernel_size=2,
152 |                                                        strides=1,
153 |                                                        padding='SAME',
154 |                                                        activation=tf.nn.tanh)
155 |             conv3 = tf.layers.conv1d(conv1, filters=20,
156 |                                                        kernel_size=2,
157 |                                                        strides=1,
158 |                                                        padding='SAME',
159 |                                                        activation=tf.nn.tanh)
160 |             conv4 = tf.layers.conv1d(conv1, filters=30,
161 |                                                        kernel_size=2,
162 |                                                        strides=1,
163 |                                                        padding='SAME',
164 |                                                        activation=tf.nn.tanh)
165 |             flatten = tf.layers.flatten(conv4)
166 |             out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu)
167 |             return out
168 | 
169 |     def operation_mode(self, x, message):
170 |         if mode == 1:
171 |             dtype = x.dtype
172 |             x_btensor = tf.cast(x, tf.int32)
173 |             m_btensor = tf.cast(message, tf.int32)
174 |             xor = tf.bitwise.bitwise_xor(x_btensor, m_btensor)
175 |             random = tf.cast(xor, dtype)
176 |         else:
177 |             random = x*message % np.amax(x) 
178 | 
179 |     def encoder(self, x, message, mode):
180 |         with tf.variable_scope("encoder"):
181 |             random = operation_mode(x, message, mode)
182 |             x_flatten = tf.layers.flatten(random)
183 |             fc1 = tf.reshape(x_flatten, (-1, self.num_feature, 1)) 
184 |             conv1d_t1 = tf.layers.conv1d(fc1, filters=64, kernel_size=4, 
185 |                                                         strides=2,
186 |                                                         padding='VALID',
187 |                                                         activation=tf.nn.relu)
188 |             bn1 = tf.layers.batch_normalization(conv1d_t1)
189 |             conv1d_t2 = tf.layers.conv1d(bn1, filters=32,
190 |                                                        kernel_size=2,
191 |                                                        strides=1,
192 |                                                        padding='SAME',
193 |                                                        activation=tf.nn.tanh)
194 | 
195 |             bn2 = tf.layers.batch_normalization(conv1d_t2)
196 |             conv1d_t3 = tf.layers.conv1d(bn2, filters=16,
197 |                                                        kernel_size=2,
198 |                                                        strides=1,
199 |                                                        padding='SAME',
200 |                                                        activation=tf.nn.tanh)
201 | 
202 |             bn3 = tf.layers.batch_normalization(conv1d_t3)
203 |             conv1d_t4 = tf.layers.conv1d(bn3, filters=8,
204 |                                                        kernel_size=2,
205 |                                                        strides=1,
206 |                                                        padding='SAME',
207 |                                                        activation=tf.nn.tanh)
208 | 
209 |             bn4 = tf.layers.batch_normalization(conv1d_t4)
210 |             conv1d_t5 = tf.layers.conv1d(bn4, filters=4,
211 |                                                        kernel_size=2,
212 |                                                        strides=1,
213 |                                                        padding='SAME',
214 |                                                        activation=tf.nn.tanh)
215 |     
216 |             bn5 = tf.layers.batch_normalization(conv1d_t5)
217 |             conv1d_t6 = tf.layers.conv1d(bn5, filters=8, 
218 |                                                        kernel_size=2,
219 |                                                        strides=1,
220 |                                                        padding='SAME',
221 |                                                        activation=tf.nn.tanh)
222 |  
223 |             bn6 = tf.layers.batch_normalization(conv1d_t6)
224 |             conv1d_t7 = tf.layers.conv1d(bn6, filters=16, 
225 |                                                        kernel_size=2,
226 |                                                        strides=1,
227 |                                                        padding='SAME',
228 |                                                        activation=tf.nn.tanh)
229 | 
230 |             bn7 = tf.layers.batch_normalization(conv1d_t7)
231 |             conv1d_t8 = tf.layers.conv1d(bn7, filters=self.num_feature, 
232 |                                                        kernel_size=2,
233 |                                                        strides=1,
234 |                                                        padding='SAME',
235 |                                                        activation=tf.nn.tanh)
236 |             flatten = tf.layers.flatten(conv1d_t8)
237 |             out = tf.layers.dense(flatten, self.num_feature, activation=tf.nn.relu)
238 |             return out
239 | 
240 |     def get_solvers(self, learning_rate=1e-3, beta1=0.5):
241 |         E_solver = tf.train.AdamOptimizer(learning_rate, beta1)
242 |         S_solver = tf.train.AdamOptimizer(learning_rate, beta1)
243 |         return E_solver, S_solver
244 | 
245 |     def train(self, sess, E_train_step, S_train_step, E_loss, S_loss, epochs=3000, batch_size=10):
246 |         X, Y = self.get_target_features() 
247 |         for it in range(epochs):
248 |             minibatch, labels = self.get_shuffle_batch(X, Y, batch_size)
249 |             minibatch = minibatch.reshape(batch_size, -1)
250 | 
251 |             if epochs > (epochs - 2000): 
252 |                 self.store_parameters(sess)
253 | 
254 |             #randomize original data
255 |             fake = np.random.normal(0, 1, (batch_size, 30))
256 |             randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake})
257 |             loss = self.target_classifier(randomized, labels, batch_size) 
258 | 
259 |             _, S_loss_curr = sess.run([S_train_step, S_loss], feed_dict={self.C:minibatch, self.random:fake,
260 |             self.loss:loss})
261 | 
262 |             _, E_loss_curr = sess.run([E_train_step, E_loss], feed_dict={self.C:minibatch, self.random:fake,
263 |             self.loss:loss})
264 | 
265 |             S_loss_list.append(S_loss_curr)
266 |             E_loss_list.append(np.mean(E_loss_curr))
267 | 
268 |         #self.drawLoss(S_loss_list, E_loss_list) 
269 |         print ("Train Finishied")
270 | 
271 |     def target_classifier(self, fake, fake_label, batch_size=32):
272 |         cvscores = []
273 |         scores = self.preTrainedModel.evaluate(fake, fake_label, verbose=0)
274 |         output = np.mean(scores[1])
275 |         return output 
276 | 
277 |     def calculate_loss(self, C, C_prime, logit_real, logit_fake, loss):
278 |         real_label = tf.ones_like(logit_real)
279 |         fake_label = tf.zeros_like(logit_fake)
280 | 
281 |         loss_S_real = tf.nn.sigmoid_cross_entropy_with_logits(
282 |             labels=real_label, logits=logit_real)
283 |         loss_S_fake = tf.nn.sigmoid_cross_entropy_with_logits(
284 |             labels=fake_label, logits=logit_fake)
285 | 
286 |         loss_S = (tf.reduce_mean(loss_S_real) + (tf.reduce_mean(loss_S_fake)* (1-tf.reduce_mean(loss))))
287 |         C_flatten = tf.layers.flatten(C)
288 |         C_prime_flatten = tf.layers.flatten(C_prime)
289 |         distance = (tf.sqrt(tf.reduce_sum(tf.square(C_flatten - C_prime_flatten), axis=1))) 
290 |         distance = tf.reduce_mean(distance)
291 |         loss_E = (self.lambda_a * (distance*self.confidence) )  + (self.lambda_b * loss_S)
292 |         return loss_E, loss_S 
293 | 
294 |     def get_session(self):
295 |         config = tf.ConfigProto()
296 |         config.gpu_options.allow_growth = True
297 |         session = tf.Session(config=config)
298 |         return session
299 | 
300 |     def get_shuffle_batch(self, X, Y, batch_size=32):
301 |         idx = random.randint(1, len(X)-batch_size)
302 |         return  X[idx:idx+batch_size], Y[idx:idx+batch_size]
303 | 
304 |     def get_next_batch(self, X, Y, start, end, batch_size=32):
305 |         X_train = []
306 |         Y_train = []
307 |         start = 0
308 |         end = batch_size
309 |         for i in range(len(X)-batch_size):
310 |             start+=i
311 |             end+=i
312 |             X_train.append(X[start:end])
313 |             Y_train.append(Y[start:end])
314 |         return X_train, Y_train 
315 | 
316 |     def anonymize_sample(self, sess, batch_size):
317 |         minibatch, labels = self.get_target_features() 
318 |         batch_size = len(Y)
319 |         fake = np.random.normal(0, 1, (batch_size, 30))
320 |         randomized = sess.run(self.C_prime, feed_dict = {self.C:minibatch, self.random:fake})
321 |         scores1 = self.preTrainedModel.evaluate(randomized, Y, verbose=0)
322 |         cvscores1.append(scores1[1] * 100)
323 |         self.get_inversed(randomized) 
324 | 
325 |     def get_inversed(self, normalized):
326 |         np.set_printoptions(precision=6, suppress=True)
327 |         inversed = self.scaler.inverse_transform(normalized)
328 |         np.savetxt('fileout.txt', inversed, delimiter=',', fmt='%1.3f') 
329 |         return inversed
330 | 
331 |     def store_parameters(self, sess):
332 |         for i in range(1, 7):
333 |             name = 'Encoder/conv1d_' + i + '/kernel:0'
334 |             conv = sess.graph.get_tensor_by_name(name)
335 |             self.t_var[name] = conv
336 |             self.t_var.append(name, sess.run(conv))
337 | 
338 |     def add_variance(self, sess, num_var):
339 |         for i in range(num_var):
340 |             num = random.randint(1, 7)
341 |             name = 'Encoder/conv1d_' + num + '/kernel:0'
342 |             conv = sess.graph.get_tensor_by_name(name)
343 |             var = np.var(self.t_var.get(name)), axis=0)
344 |             sess.run(tf.assign(conv, conv + var))
345 | 
346 |     def get_pvalue(self, a, b):
347 |         a = a.flatten()
348 |         b = b.flatten() 
349 |         t, p = stats.pearsonr(a,b)
350 | 
351 |     def main(self):
352 |         self.get_pretrainModel()
353 | 
354 |         tf.reset_default_graph()
355 | 
356 |         self.C = tf.placeholder(tf.float32, [None, self.num_feature])
357 |         self.random = tf.placeholder(tf.float32, [None, self.num_feature])
358 | 
359 |         self.C_prime = self.encoder(self.C, self.random, mode=2)
360 |         self.loss = tf.placeholder(tf.float32)
361 | 
362 |         with tf.variable_scope("") as scope:
363 |             logit_real = self.discriminator(self.C)
364 |             scope.reuse_variables()
365 |             logit_fake = self.discriminator(self.C_prime)
366 | 
367 | 
368 |         encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoder")
369 |         steganalayzer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discriminator")
370 | 
371 |         E_solver, S_solver = self.get_solvers()
372 | 
373 |         E_loss, S_loss = self.calculate_loss(self.C, self.C_prime, logit_real, logit_fake, self.loss)
374 |         E_train_step = E_solver.minimize(E_loss, var_list=encoder_vars)
375 |         S_train_step = E_solver.minimize(S_loss, var_list=steganalayzer_vars)
376 | 
377 |         tf.executing_eagerly()
378 |         sess = self.get_session() 
379 |         sess.run(tf.global_variables_initializer())
380 |         self.train(sess, E_train_step, S_train_step, E_loss, S_loss)
381 | 
382 |         self.add_variance(sess, self.num_variance) 
383 |         self.anonymize_sample(sess, self.batch_size)       
384 | 
385 | 
386 | if __name__ == '__main__':
387 |     anomigan = ANOMIGAN()
388 |     anomigan.main()
389 | 


--------------------------------------------------------------------------------