├── README.md └── android malware ├── dynamic ├── newandroid │ ├── all.py │ ├── lstm1.py │ ├── lstm1test.py │ ├── lstm2.py │ ├── lstm2test.py │ ├── lstm3.py │ ├── lstm3test.py │ ├── lstm4.py │ ├── lstm4test.py │ ├── lstm6.py │ └── lstm6test.py └── newandroidrnn │ ├── lstm1.py │ ├── lstm1test.py │ ├── lstm2.py │ ├── lstm2test.py │ ├── lstm3.py │ ├── lstm3test.py │ ├── lstm4.py │ ├── lstm4test.py │ ├── lstm6.py │ └── lstm6test.py └── static └── android malware classification ├── crossval1.py ├── crossval2.py ├── ker1.py ├── ker2.py ├── ker3.py ├── val.py ├── val1.py └── val2.py /README.md: -------------------------------------------------------------------------------- 1 | # Android-Malware-Detection 2 | 3 | Please cite the following papers, if you use the code as part of your research 4 | 5 | Detecting Android Malware using Long Short-term Memory-LSTM." Journal of Intelligent and Fuzzy Systems - IOS Press" 6 | 7 | DOI 8 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/all.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.kernel_approximation import RBFSampler 4 | from sklearn.linear_model import SGDClassifier 5 | from sklearn.cross_validation import train_test_split 6 | from sklearn import svm 7 | from sklearn.metrics import classification_report 8 | from sklearn import metrics 9 | from sklearn.linear_model import LogisticRegression 10 | from sklearn.naive_bayes import GaussianNB 11 | from sklearn.neighbors import KNeighborsClassifier 12 | from sklearn.tree import DecisionTreeClassifier 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn.ensemble import AdaBoostClassifier 15 | from sklearn.ensemble import RandomForestClassifier 16 | from sklearn.preprocessing import Normalizer 17 | 18 | 19 | traindata = pd.read_csv('new/Testing.csv', header=None) 20 | testdata = pd.read_csv('new/Training.csv', header=None) 21 | 22 | 23 | X = traindata.iloc[:,0:42] 24 | Y = traindata.iloc[:,42] 25 | C = testdata.iloc[:,42] 26 | T = testdata.iloc[:,0:42] 27 | 28 | scaler = Normalizer().fit(X) 29 | traindata = scaler.transform(X) 30 | 31 | scaler = Normalizer().fit(T) 32 | testdata = scaler.transform(T) 33 | 34 | trainlabel = np.array(Y) 35 | testlabel = np.array(C) 36 | 37 | 38 | model = KNeighborsClassifier() 39 | model.fit(traindata, trainlabel) 40 | expected = testlabel 41 | predicted = model.predict(testdata) 42 | print("--------knearest predicted-------------==============") 43 | #print(predicted) 44 | #e = [0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1 45 | #,1,1,1,0,1,1,1,0,1,1,1,1,0,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1 46 | #,1,0,1,0,0,1,1,1,1,0,0,0] 47 | #p = [1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1 48 | #,1,1,1,0,1,0,1,0,0,1,1,1,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 49 | #,0,1,0,1,0,0,1,0,0,1,1,1] 50 | #expected = np.array(e) 51 | #predicted = np.array(p) 52 | accuracy = accuracy_score(expected, predicted) 53 | recall = recall_score(expected, predicted, average="binary") 54 | precision = precision_score(expected, predicted) 55 | f1 = f1_score(expected, predicted , average="binary") 56 | print("--------------------------------------") 57 | cm = metrics.confusion_matrix(expected, predicted) 58 | print("==============================================") 59 | print(cm) 60 | tpr = float(cm[0][0])/np.sum(cm[0]) 61 | fpr = float(cm[1][1])/np.sum(cm[1]) 62 | print(tpr) 63 | print(fpr) 64 | print("==================================================") 65 | print(metrics.confusion_matrix(expected, predicted)) 66 | print("**************************k-nearest**************************") 67 | print("Accuracy") 68 | print("%.3f" %accuracy) 69 | print("precision") 70 | print("%.3f" % precision) 71 | print("((((((((((((((((((((((((") 72 | print("recall") 73 | print("%.3f" %recall) 74 | print("f-score") 75 | print("%.3f" %f1) 76 | print("fpr") 77 | print("%.3f" %fpr) 78 | print("tpr") 79 | print("%.3f" %tpr) 80 | print("***************************************************************") 81 | 82 | 83 | 84 | model = AdaBoostClassifier(n_estimators=500) 85 | model.fit(traindata, trainlabel) 86 | 87 | # make predictions 88 | expected = testlabel 89 | predicted = model.predict(testdata) 90 | 91 | predicted1 = model.predict_proba(testdata) 92 | np.savetxt("res/rnn.csv", predicted1[:,1]) 93 | # summarize the fit of the model 94 | #predicted1 = model.predict_probs(testdata) 95 | accuracy = accuracy_score(expected, predicted) 96 | recall = recall_score(expected, predicted, average="binary") 97 | precision = precision_score(expected, predicted , average="binary") 98 | f1 = f1_score(expected, predicted , average="binary") 99 | 100 | 101 | print("**************************Adaboost**************************") 102 | print("==============================================") 103 | cm = metrics.confusion_matrix(expected, predicted) 104 | 105 | print(cm) 106 | tpr = float(cm[0][0])/np.sum(cm[0]) 107 | fpr = float(cm[1][1])/np.sum(cm[1]) 108 | print(tpr) 109 | print(fpr) 110 | print("==================================================") 111 | 112 | print("Accuracy") 113 | print("%.3f" %accuracy) 114 | print("precision") 115 | print("%.3f" %precision) 116 | print("recall") 117 | print("%.3f" %recall) 118 | print("f-score") 119 | print("%.3f" %f1) 120 | print("fpr") 121 | print("%.3f" %fpr) 122 | print("tpr") 123 | print("%.3f" %tpr) 124 | print("***************************************************************") 125 | 126 | 127 | model = RandomForestClassifier(n_estimators=500) 128 | model.fit(traindata, trainlabel) 129 | 130 | # make predictions 131 | expected = testlabel 132 | predicted = model.predict(testdata) 133 | # summarize the fit of the model 134 | predicted1 = model.predict_proba(testdata) 135 | np.savetxt("res/svm.csv", predicted1[:,1]) 136 | accuracy = accuracy_score(expected, predicted) 137 | recall = recall_score(expected, predicted, average="binary") 138 | precision = precision_score(expected, predicted , average="binary") 139 | f1 = f1_score(expected, predicted , average="binary") 140 | 141 | 142 | print("**************************Rf**************************") 143 | print("==============================================") 144 | cm = metrics.confusion_matrix(expected, predicted) 145 | 146 | print(cm) 147 | tpr = float(cm[0][0])/np.sum(cm[0]) 148 | fpr = float(cm[1][1])/np.sum(cm[1]) 149 | print(tpr) 150 | print(fpr) 151 | print("==================================================") 152 | 153 | print("Accuracy") 154 | print("%.3f" %accuracy) 155 | print("%.3f" %accuracy) 156 | print("precision") 157 | print("%.3f" %precision) 158 | print("recall") 159 | print("%.3f" %recall) 160 | print("f-score") 161 | print("%.3f" %f1) 162 | print("fpr") 163 | print("%.3f" %fpr) 164 | print("tpr") 165 | print("%.3f" %tpr) 166 | print("***************************************************************") 167 | 168 | 169 | # fit a CART model to the data 170 | model = DecisionTreeClassifier() 171 | model.fit(traindata, trainlabel) 172 | print(model) 173 | # make predictions 174 | expected = testlabel 175 | predicted = model.predict(testdata) 176 | # summarize the fit of the model 177 | predicted1 = model.predict_proba(testdata) 178 | np.savetxt("res/mlpnew.csv", predicted1[:,1]) 179 | accuracy = accuracy_score(expected, predicted) 180 | recall = recall_score(expected, predicted, average="binary") 181 | precision = precision_score(expected, predicted , average="binary") 182 | f1 = f1_score(expected, predicted , average="weighted") 183 | print("**************************CART**************************") 184 | cm = metrics.confusion_matrix(expected, predicted) 185 | print("==============================================") 186 | print(cm) 187 | tpr = float(cm[0][0])/np.sum(cm[0]) 188 | fpr = float(cm[1][1])/np.sum(cm[1]) 189 | print("%.3f" %tpr) 190 | print("%.3f" %fpr) 191 | print("Accuracy") 192 | print("%.3f" %accuracy) 193 | print("precision") 194 | print("%.3f" %precision) 195 | print("recall") 196 | print("%.3f" %recall) 197 | print("f-score") 198 | print("%.3f" %f1) 199 | print("fpr") 200 | print("%.3f" %fpr) 201 | print("tpr") 202 | print("%.3f" %tpr) 203 | print("***************************************************************") 204 | 205 | model = svm.SVC(kernel='linear', C=0.0001) 206 | model.fit(traindata, trainlabel) 207 | print(model) 208 | # make predictions 209 | expected = testlabel 210 | predicted = model.predict(testdata) 211 | # summarize the fit of the model 212 | 213 | accuracy = accuracy_score(expected, predicted) 214 | recall = recall_score(expected, predicted, average="binary") 215 | precision = precision_score(expected, predicted , average="binary") 216 | f1 = f1_score(expected, predicted , average="binary") 217 | print("**************************SVM linear**************************") 218 | cm = metrics.confusion_matrix(expected, predicted) 219 | print("==============================================") 220 | print(cm) 221 | tpr = float(cm[0][0])/np.sum(cm[0]) 222 | fpr = float(cm[1][1])/np.sum(cm[1]) 223 | print("%.3f" %tpr) 224 | print("%.3f" %fpr) 225 | print("Accuracy") 226 | print("%.3f" %accuracy) 227 | print("precision") 228 | print("%.3f" %precision) 229 | print("recall") 230 | print("%.3f" %recall) 231 | print("f-score") 232 | print("%.3f" %f1) 233 | print("fpr") 234 | print("%.3f" %fpr) 235 | print("tpr") 236 | print("%.3f" %tpr) 237 | print("***************************************************************") 238 | 239 | model = svm.SVC(kernel='rbf') 240 | model = model.fit(traindata, trainlabel) 241 | 242 | # make predictions 243 | expected = testlabel 244 | predicted = model.predict(testdata) 245 | # summarize the fit of the model 246 | 247 | accuracy = accuracy_score(expected, predicted) 248 | recall = recall_score(expected, predicted, average="binary") 249 | precision = precision_score(expected, predicted , average="binary") 250 | f1 = f1_score(expected, predicted , average="binary") 251 | print("**************************SVM rbf**************************") 252 | cm = metrics.confusion_matrix(expected, predicted) 253 | print("==============================================") 254 | print(cm) 255 | tpr = float(cm[0][0])/np.sum(cm[0]) 256 | fpr = float(cm[1][1])/np.sum(cm[1]) 257 | print("%.3f" %tpr) 258 | print("%.3f" %fpr) 259 | print("Accuracy") 260 | print("%.3f" %accuracy) 261 | print("precision") 262 | print("%.3f" %precision) 263 | print("recall") 264 | print("%.3f" %recall) 265 | print("f-score") 266 | print("%.3f" %f1) 267 | print("fpr") 268 | print("%.3f" %fpr) 269 | print("tpr") 270 | print("%.3f" %tpr) 271 | print("***************************************************************") 272 | 273 | 274 | model = LogisticRegression() 275 | model = model.fit(traindata, trainlabel) 276 | 277 | # make predictions 278 | expected = testlabel 279 | predicted = model.predict(testdata) 280 | # summarize the fit of the model 281 | 282 | accuracy = accuracy_score(expected, predicted) 283 | recall = recall_score(expected, predicted, average="binary") 284 | precision = precision_score(expected, predicted , average="binary") 285 | f1 = f1_score(expected, predicted , average="binary") 286 | print("**************************SVM rbf**************************") 287 | cm = metrics.confusion_matrix(expected, predicted) 288 | print("==============================================") 289 | print(cm) 290 | tpr = float(cm[0][0])/np.sum(cm[0]) 291 | fpr = float(cm[1][1])/np.sum(cm[1]) 292 | print("%.3f" %tpr) 293 | print("%.3f" %fpr) 294 | print("Accuracy") 295 | print("%.3f" %accuracy) 296 | recall = recall_score(expected, predicted, average="binary") 297 | precision = precision_score(expected, predicted , average="binary") 298 | f1 = f1_score(expected, predicted , average="binary") 299 | print("**************************SVM rbf**************************") 300 | cm = metrics.confusion_matrix(expected, predicted) 301 | print("==============================================") 302 | print(cm) 303 | tpr = float(cm[0][0])/np.sum(cm[0]) 304 | fpr = float(cm[1][1])/np.sum(cm[1]) 305 | print("%.3f" %tpr) 306 | print("%.3f" %fpr) 307 | print("Accuracy") 308 | print("%.3f" %accuracy) 309 | print("precision") 310 | print("%.3f" %precision) 311 | print("recall") 312 | print("%.3f" %recall) 313 | print("f-score") 314 | print("%.3f" %f1) 315 | print("fpr") 316 | print("%.3f" %fpr) 317 | print("tpr") 318 | print("%.3f" %tpr) 319 | print("***************************************************************") 320 | 321 | 322 | 323 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm1.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(4,input_dim=42)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(Dense(1)) 58 | model.add(Activation('sigmoid')) 59 | 60 | # try using different optimizers and different optimizer config 61 | # try using different optimizers and different optimizer configs 62 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 63 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/1/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 64 | csv_logger = CSVLogger('logs/1/training_set_iranalysis.csv',separator=',', append=False) 65 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 66 | model.save("logs/1/lstm1layer_model.hdf5") 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm1test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 20 | 21 | traindata = pd.read_csv('new/Training.csv', header=None) 22 | testdata = pd.read_csv('new/Training.csv', header=None) 23 | 24 | 25 | X = traindata.iloc[:,0:42] 26 | Y = traindata.iloc[:,42] 27 | C = testdata.iloc[:,42] 28 | T = testdata.iloc[:,0:42] 29 | 30 | scaler = Normalizer().fit(X) 31 | trainX = scaler.transform(X) 32 | # summarize transformed data 33 | np.set_printoptions(precision=3) 34 | #print(trainX[0:5,:]) 35 | 36 | scaler = Normalizer().fit(T) 37 | testT = scaler.transform(T) 38 | # summarize transformed data 39 | np.set_printoptions(precision=3) 40 | #print(testT[0:5,:]) 41 | 42 | 43 | y_train = np.array(Y) 44 | y_test = np.array(C) 45 | 46 | 47 | # reshape input to be [samples, time steps, features] 48 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 49 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 50 | 51 | 52 | batch_size = 32 53 | 54 | # 1. define the network 55 | model = Sequential() 56 | model.add(LSTM(4,input_dim=42)) # try using a GRU instead, for fun 57 | model.add(Dropout(0.1)) 58 | model.add(Dense(1)) 59 | model.add(Activation('sigmoid')) 60 | model.load_weights("logs/1/lstm1layer_model.hdf5") 61 | 62 | 63 | # try using different optimizers and different optimizer configs 64 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 65 | ''' 66 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/1/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 67 | csv_logger = CSVLogger('logs/1/training_set_iranalysis.csv',separator=',', append=False) 68 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=10000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 69 | model.save("logs/1/lstm1layer_model.hdf5") 70 | loss,accuracy = model.evaluate(X_train,y_train) 71 | print(accuracy) 72 | ''' 73 | loss, accuracy = model.evaluate(X_train, y_train) 74 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 75 | 76 | 77 | 78 | expected = y_train 79 | predicted = model.predict_classes(X_train) 80 | accuracy = accuracy_score(expected, predicted) 81 | precision = precision_score(expected, predicted) 82 | recall = recall_score(expected, predicted, average="binary") 83 | f1 = f1_score(expected, predicted , average="binary") 84 | 85 | print("") 86 | print("Accuracy") 87 | print("%.3f" %accuracy) 88 | print("precision") 89 | print("%.3f" % precision) 90 | print("recall") 91 | print("%.3f" %recall) 92 | print("f-score") 93 | print("%.3f" %f1) 94 | 95 | 96 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(8,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(8, return_sequences=False)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(Dense(1)) 60 | model.add(Activation('sigmoid')) 61 | 62 | # try using different optimizers and different optimizer configs 63 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 64 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/2/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 65 | csv_logger = CSVLogger('logs/2/training_set_iranalysis.csv',separator=',', append=False) 66 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 67 | model.save("logs/2/lstm1layer_model.hdf5") 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm2test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(8,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(8, return_sequences=False)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(Dense(1)) 60 | model.add(Activation('sigmoid')) 61 | model.load_weights("logs/2/checkpoint-29.hdf5") 62 | ''' 63 | # try using different optimizers and different optimizer configs 64 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 65 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/2/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 66 | csv_logger = CSVLogger('logs/2/training_set_iranalysis.csv',separator=',', append=False) 67 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 68 | model.save("logs/2/lstm1layer_model.hdf5") 69 | ''' 70 | 71 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 72 | loss, accuracy = model.evaluate(X_train, y_train) 73 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 74 | 75 | 76 | 77 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 78 | 79 | expected = y_train 80 | predicted = model.predict_classes(X_train) 81 | 82 | accuracy = accuracy_score(expected, predicted) 83 | precision = precision_score(expected, predicted) 84 | recall = recall_score(expected, predicted, average="binary") 85 | f1 = f1_score(expected, predicted , average="binary") 86 | 87 | print("Accuracy") 88 | print("%.3f" %accuracy) 89 | print("precision") 90 | print("%.3f" % precision) 91 | print("recall") 92 | print("%.3f" %recall) 93 | print("f-score") 94 | print("%.3f" %f1) 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm3.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(16,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(16, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(16, return_sequences=False)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(Dense(1)) 62 | model.add(Activation('sigmoid')) 63 | 64 | # try using different optimizers and different optimizer configs 65 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 66 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/3/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 67 | csv_logger = CSVLogger('logs/3/training_set_iranalysis.csv',separator=',', append=False) 68 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 69 | model.save("logs/3/lstm1layer_model.hdf5") 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm3test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(16,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(16, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(16, return_sequences=False)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(Dense(1)) 62 | model.add(Activation('sigmoid')) 63 | 64 | ''' 65 | # try using different optimizers and different optimizer configs 66 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 67 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/3/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 68 | csv_logger = CSVLogger('logs/3/training_set_iranalysis.csv',separator=',', append=False) 69 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 70 | model.save("logs/3/lstm1layer_model.hdf5") 71 | ''' 72 | 73 | model.load_weights("logs/3/checkpoint-291.hdf5") 74 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 75 | loss, accuracy = model.evaluate(X_train, y_train) 76 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 77 | 78 | 79 | 80 | 81 | 82 | 83 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 84 | 85 | expected = y_train 86 | predicted = model.predict_classes(X_train) 87 | 88 | accuracy = accuracy_score(expected, predicted) 89 | precision = precision_score(expected, predicted) 90 | recall = recall_score(expected, predicted, average="binary") 91 | f1 = f1_score(expected, predicted , average="binary") 92 | 93 | print("Accuracy") 94 | print("%.3f" %accuracy) 95 | print("precision") 96 | print("%.3f" % precision) 97 | print("recall") 98 | print("%.3f" %recall) 99 | print("f-score") 100 | print("%.3f" %f1) 101 | 102 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm4.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(32,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(LSTM(32, return_sequences=False)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(Dense(1)) 64 | model.add(Activation('sigmoid')) 65 | 66 | # try using different optimizers and different optimizer configs 67 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 68 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/4/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 69 | csv_logger = CSVLogger('logs/4/training_set_iranalysis.csv',separator=',', append=False) 70 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 71 | model.save("logs/4/lstm1layer_model.hdf5") 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm4test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(32,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(LSTM(32, return_sequences=False)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(Dense(1)) 64 | model.add(Activation('sigmoid')) 65 | 66 | ''' 67 | # try using different optimizers and different optimizer configs 68 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 69 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/4/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 70 | csv_logger = CSVLogger('logs/4/training_set_iranalysis.csv',separator=',', append=False) 71 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 72 | model.save("logs/4/lstm1layer_model.hdf5") 73 | 74 | ''' 75 | 76 | model.load_weights("logs/4/checkpoint-10.hdf5") 77 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 78 | loss, accuracy = model.evaluate(X_train, y_train) 79 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 80 | 81 | 82 | 83 | 84 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 85 | 86 | expected = y_train 87 | predicted = model.predict_classes(X_train) 88 | 89 | accuracy = accuracy_score(expected, predicted) 90 | precision = precision_score(expected, predicted) 91 | recall = recall_score(expected, predicted, average="binary") 92 | f1 = f1_score(expected, predicted , average="binary") 93 | 94 | print("Accuracy") 95 | print("%.3f" %accuracy) 96 | print("precision") 97 | print("%.3f" % precision) 98 | print("recall") 99 | print("%.3f" %recall) 100 | print("f-score") 101 | print("%.3f" %f1) 102 | 103 | 104 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm6.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(64,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 64 | model.add(Dropout(0.1)) 65 | #model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 66 | #model.add(Dropout(0.1)) 67 | #model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 68 | #model.add(Dropout(0.1)) 69 | 70 | model.add(LSTM(64, return_sequences=False)) # try using a GRU instead, for fun 71 | model.add(Dropout(0.1)) 72 | model.add(Dense(1)) 73 | model.add(Activation('sigmoid')) 74 | 75 | # try using different optimizers and different optimizer configs 76 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 77 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/5/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 78 | csv_logger = CSVLogger('logs/5/training_set_iranalysis.csv',separator=',', append=False) 79 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 80 | model.save("logs/5/lstm1layer_model.hdf5") 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroid/lstm6test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(LSTM(64,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(LSTM(64, return_sequences=True)) # try using a GRU instead, for fun 64 | model.add(Dropout(0.1)) 65 | #model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 66 | #model.add(Dropout(0.1)) 67 | #model.add(LSTM(32, return_sequences=True)) # try using a GRU instead, for fun 68 | #model.add(Dropout(0.1)) 69 | 70 | model.add(LSTM(64, return_sequences=False)) # try using a GRU instead, for fun 71 | model.add(Dropout(0.1)) 72 | model.add(Dense(1)) 73 | model.add(Activation('sigmoid')) 74 | 75 | ''' 76 | # try using different optimizers and different optimizer configs 77 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 78 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/5/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 79 | csv_logger = CSVLogger('logs/5/training_set_iranalysis.csv',separator=',', append=False) 80 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 81 | model.save("logs/5/lstm1layer_model.hdf5") 82 | ''' 83 | 84 | model.load_weights("logs/5/checkpoint-499.hdf5") 85 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 86 | loss, accuracy = model.evaluate(X_train, y_train) 87 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 88 | 89 | 90 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 91 | 92 | expected = y_train 93 | predicted = model.predict_classes(X_train) 94 | 95 | accuracy = accuracy_score(expected, predicted) 96 | precision = precision_score(expected, predicted) 97 | recall = recall_score(expected, predicted, average="binary") 98 | f1 = f1_score(expected, predicted , average="binary") 99 | 100 | print("Accuracy") 101 | print("%.3f" %accuracy) 102 | print("precision") 103 | print("%.3f" % precision) 104 | print("recall") 105 | print("%.3f" %recall) 106 | print("f-score") 107 | print("%.3f" %f1) 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm1.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(4,input_dim=42)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(Dense(1)) 58 | model.add(Activation('sigmoid')) 59 | 60 | # try using different optimizers and different optimizer configs 61 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 62 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/1/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 63 | csv_logger = CSVLogger('logs/1/training_set_iranalysis.csv',separator=',', append=False) 64 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=10000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 65 | model.save("logs/1/lstm1layer_model.hdf5") 66 | loss,accuracy = model.evaluate(X_train,y_train) 67 | print(accuracy) 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm1test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(4,input_dim=42)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(Dense(1)) 58 | model.add(Activation('sigmoid')) 59 | 60 | ''' 61 | # try using different optimizers and different optimizer configs 62 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 63 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/1/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 64 | csv_logger = CSVLogger('logs/1/training_set_iranalysis.csv',separator=',', append=False) 65 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=10000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 66 | model.save("logs/1/lstm1layer_model.hdf5") 67 | loss,accuracy = model.evaluate(X_train,y_train) 68 | print(accuracy) 69 | ''' 70 | 71 | model.load_weights("logs/1/checkpoint-13.hdf5") 72 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 73 | loss, accuracy = model.evaluate(X_train, y_train) 74 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 75 | 76 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 77 | 78 | expected = y_train 79 | predicted = model.predict_classes(X_train) 80 | 81 | accuracy = accuracy_score(expected, predicted) 82 | precision = precision_score(expected, predicted) 83 | recall = recall_score(expected, predicted, average="binary") 84 | f1 = f1_score(expected, predicted , average="binary") 85 | 86 | print("Accuracy") 87 | print("%.3f" %accuracy) 88 | print("precision") 89 | print("%.3f" % precision) 90 | print("recall") 91 | print("%.3f" %recall) 92 | print("f-score") 93 | print("%.3f" %f1) 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(8,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(8, return_sequences=False)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(Dense(1)) 60 | model.add(Activation('sigmoid')) 61 | 62 | # try using different optimizers and different optimizer configs 63 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 64 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/2/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 65 | csv_logger = CSVLogger('logs/2/training_set_iranalysis.csv',separator=',', append=False) 66 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 67 | model.save("logs/2/lstm1layer_model.hdf5") 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm2test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 20 | 21 | traindata = pd.read_csv('new/Training.csv', header=None) 22 | testdata = pd.read_csv('new/Training.csv', header=None) 23 | 24 | 25 | X = traindata.iloc[:,0:42] 26 | Y = traindata.iloc[:,42] 27 | C = testdata.iloc[:,42] 28 | T = testdata.iloc[:,0:42] 29 | 30 | scaler = Normalizer().fit(X) 31 | trainX = scaler.transform(X) 32 | # summarize transformed data 33 | np.set_printoptions(precision=3) 34 | #print(trainX[0:5,:]) 35 | 36 | scaler = Normalizer().fit(T) 37 | testT = scaler.transform(T) 38 | # summarize transformed data 39 | np.set_printoptions(precision=3) 40 | #print(testT[0:5,:]) 41 | 42 | 43 | y_train = np.array(Y) 44 | y_test = np.array(C) 45 | 46 | 47 | # reshape input to be [samples, time steps, features] 48 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 49 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 50 | 51 | 52 | batch_size = 32 53 | 54 | # 1. define the network 55 | model = Sequential() 56 | model.add(SimpleRNN(8,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 57 | model.add(Dropout(0.1)) 58 | model.add(SimpleRNN(8, return_sequences=False)) # try using a GRU instead, for fun 59 | model.add(Dropout(0.1)) 60 | model.add(Dense(1)) 61 | model.add(Activation('sigmoid')) 62 | ''' 63 | # try using different optimizers and different optimizer configs 64 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 65 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/2/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 66 | csv_logger = CSVLogger('logs/2/training_set_iranalysis.csv',separator=',', append=False) 67 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 68 | model.save("logs/2/lstm1layer_model.hdf5") 69 | ''' 70 | 71 | model.load_weights("logs/2/checkpoint-05.hdf5") 72 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 73 | loss, accuracy = model.evaluate(X_train, y_train) 74 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 75 | 76 | expected = y_train 77 | predicted = model.predict_classes(X_train) 78 | 79 | accuracy = accuracy_score(expected, predicted) 80 | precision = precision_score(expected, predicted) 81 | recall = recall_score(expected, predicted, average="binary") 82 | f1 = f1_score(expected, predicted , average="binary") 83 | 84 | print("Accuracy") 85 | print("%.3f" %accuracy) 86 | print("precision") 87 | print("%.3f" % precision) 88 | print("recall") 89 | print("%.3f" %recall) 90 | print("f-score") 91 | print("%.3f" %f1) 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm3.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(16,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(16, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(16, return_sequences=False)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(Dense(1)) 62 | model.add(Activation('sigmoid')) 63 | 64 | # try using different optimizers and different optimizer configs 65 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 66 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/3/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 67 | csv_logger = CSVLogger('logs/3/training_set_iranalysis.csv',separator=',', append=False) 68 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 69 | model.save("logs/3/lstm1layer_model.hdf5") 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm3test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(16,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(16, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(16, return_sequences=False)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(Dense(1)) 62 | model.add(Activation('sigmoid')) 63 | 64 | ''' 65 | # try using different optimizers and different optimizer configs 66 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 67 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/3/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 68 | csv_logger = CSVLogger('logs/3/training_set_iranalysis.csv',separator=',', append=False) 69 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 70 | model.save("logs/3/lstm1layer_model.hdf5") 71 | ''' 72 | 73 | model.load_weights("logs/3/lstm1layer_model.hdf5") 74 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 75 | loss, accuracy = model.evaluate(X_train, y_train) 76 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 77 | 78 | 79 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 80 | 81 | expected = y_train 82 | predicted = model.predict_classes(X_train) 83 | 84 | accuracy = accuracy_score(expected, predicted) 85 | precision = precision_score(expected, predicted) 86 | recall = recall_score(expected, predicted, average="binary") 87 | f1 = f1_score(expected, predicted , average="binary") 88 | 89 | print("Accuracy") 90 | print("%.3f" %accuracy) 91 | print("precision") 92 | print("%.3f" % precision) 93 | print("recall") 94 | print("%.3f" %recall) 95 | print("f-score") 96 | print("%.3f" %f1) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm4.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(32,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(32, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(32, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(SimpleRNN(32, return_sequences=False)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(Dense(1)) 64 | model.add(Activation('sigmoid')) 65 | 66 | # try using different optimizers and different optimizer configs 67 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 68 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/4/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 69 | csv_logger = CSVLogger('logs/4/training_set_iranalysis.csv',separator=',', append=False) 70 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 71 | model.save("logs/4/lstm1layer_model.hdf5") 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm4test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(32,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(32, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(32, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(SimpleRNN(32, return_sequences=False)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(Dense(1)) 64 | model.add(Activation('sigmoid')) 65 | ''' 66 | # try using different optimizers and different optimizer configs 67 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 68 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/4/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 69 | csv_logger = CSVLogger('logs/4/training_set_iranalysis.csv',separator=',', append=False) 70 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 71 | model.save("logs/4/lstm1layer_model.hdf5") 72 | ''' 73 | 74 | model.load_weights("logs/4/checkpoint-00.hdf5") 75 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 76 | loss, accuracy = model.evaluate(X_train, y_train) 77 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 78 | 79 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 80 | 81 | expected = y_train 82 | predicted = model.predict_classes(X_train) 83 | 84 | accuracy = accuracy_score(expected, predicted) 85 | precision = precision_score(expected, predicted) 86 | recall = recall_score(expected, predicted, average="binary") 87 | f1 = f1_score(expected, predicted , average="binary") 88 | 89 | print("Accuracy") 90 | print("%.3f" %accuracy) 91 | print("precision") 92 | print("%.3f" % precision) 93 | print("recall") 94 | print("%.3f" %recall) 95 | print("f-score") 96 | print("%.3f" %f1) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm6.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Testing.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(64,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 64 | model.add(Dropout(0.1)) 65 | model.add(SimpleRNN(64, return_sequences=False)) # try using a GRU instead, for fun 66 | model.add(Dropout(0.1)) 67 | model.add(Dense(1)) 68 | model.add(Activation('sigmoid')) 69 | 70 | # try using different optimizers and different optimizer configs 71 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 72 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/5/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 73 | csv_logger = CSVLogger('logs/5/training_set_iranalysis.csv',separator=',', append=False) 74 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 75 | model.save("logs/5/lstm1layer_model.hdf5") 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /android malware/dynamic/newandroidrnn/lstm6test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from sklearn.cross_validation import train_test_split 3 | import pandas as pd 4 | import numpy as np 5 | np.random.seed(1337) # for reproducibility 6 | from keras.preprocessing import sequence 7 | from keras.utils import np_utils 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Activation, Embedding 10 | from keras.layers import LSTM, SimpleRNN, GRU 11 | from keras.datasets import imdb 12 | from keras.utils.np_utils import to_categorical 13 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 14 | from sklearn import metrics 15 | from sklearn.preprocessing import Normalizer 16 | import h5py 17 | from keras import callbacks 18 | from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger 19 | 20 | traindata = pd.read_csv('new/Training.csv', header=None) 21 | testdata = pd.read_csv('new/Training.csv', header=None) 22 | 23 | 24 | X = traindata.iloc[:,0:42] 25 | Y = traindata.iloc[:,42] 26 | C = testdata.iloc[:,42] 27 | T = testdata.iloc[:,0:42] 28 | 29 | scaler = Normalizer().fit(X) 30 | trainX = scaler.transform(X) 31 | # summarize transformed data 32 | np.set_printoptions(precision=3) 33 | #print(trainX[0:5,:]) 34 | 35 | scaler = Normalizer().fit(T) 36 | testT = scaler.transform(T) 37 | # summarize transformed data 38 | np.set_printoptions(precision=3) 39 | #print(testT[0:5,:]) 40 | 41 | 42 | y_train = np.array(Y) 43 | y_test = np.array(C) 44 | 45 | 46 | # reshape input to be [samples, time steps, features] 47 | X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 48 | X_test = np.reshape(testT, (testT.shape[0], 1, testT.shape[1])) 49 | 50 | 51 | batch_size = 32 52 | 53 | # 1. define the network 54 | model = Sequential() 55 | model.add(SimpleRNN(64,input_dim=42, return_sequences=True)) # try using a GRU instead, for fun 56 | model.add(Dropout(0.1)) 57 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 58 | model.add(Dropout(0.1)) 59 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 60 | model.add(Dropout(0.1)) 61 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 62 | model.add(Dropout(0.1)) 63 | model.add(SimpleRNN(64, return_sequences=True)) # try using a GRU instead, for fun 64 | model.add(Dropout(0.1)) 65 | model.add(SimpleRNN(64, return_sequences=False)) # try using a GRU instead, for fun 66 | model.add(Dropout(0.1)) 67 | model.add(Dense(1)) 68 | model.add(Activation('sigmoid')) 69 | ''' 70 | # try using different optimizers and different optimizer configs 71 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 72 | checkpointer = callbacks.ModelCheckpoint(filepath="logs/5/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='val_acc',mode='max') 73 | csv_logger = CSVLogger('logs/5/training_set_iranalysis.csv',separator=',', append=False) 74 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1000, validation_data=(X_test, y_test),callbacks=[checkpointer,csv_logger]) 75 | model.save("logs/5/lstm1layer_model.hdf5") 76 | ''' 77 | 78 | model.load_weights("logs/5/lstm1layer_model.hdf5") 79 | model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) 80 | loss, accuracy = model.evaluate(X_train, y_train) 81 | print("\nLoss: %.2f, Accuracy: %.2f%%" % (loss, accuracy*100)) 82 | 83 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 84 | 85 | expected = y_train 86 | predicted = model.predict_classes(X_train) 87 | 88 | accuracy = accuracy_score(expected, predicted) 89 | precision = precision_score(expected, predicted) 90 | recall = recall_score(expected, predicted, average="binary") 91 | f1 = f1_score(expected, predicted , average="binary") 92 | 93 | print("Accuracy") 94 | print("%.3f" %accuracy) 95 | print("precision") 96 | print("%.3f" % precision) 97 | print("recall") 98 | print("%.3f" %recall) 99 | print("f-score") 100 | print("%.3f" %f1) 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/crossval1.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | from sklearn.cross_validation import StratifiedKFold 17 | from sklearn.cross_validation import cross_val_score 18 | from keras.wrappers.scikit_learn import KerasClassifier 19 | 20 | print("Loading") 21 | 22 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 23 | 24 | 25 | x = traindata.iloc[:,1] 26 | y = traindata.iloc[:,0] 27 | 28 | 29 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 30 | tk.fit_on_texts(x) 31 | X_train = tk.texts_to_sequences(x) 32 | 33 | 34 | X_train=np.array(X_train) 35 | y_train = np.array(y) 36 | 37 | 38 | batch_size = 64 39 | max_len = 500 40 | print "max_len ", max_len 41 | print('Pad sequences (samples x time)') 42 | 43 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 44 | 45 | max_features = 5000 46 | model = Sequential() 47 | print('Build model...') 48 | embedding_vecor_length = 32 49 | 50 | def create_model(): 51 | model = Sequential() 52 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 53 | model.add(Dropout(0.2)) 54 | model.add(LSTM(100)) 55 | model.add(Dropout(0.2)) 56 | model.add(Dense(1)) 57 | model.add(Activation('sigmoid')) 58 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 59 | print(model.summary()) 60 | return model 61 | 62 | 63 | # fix random seed for reproducibility 64 | seed = 7 65 | np.random.seed(seed) 66 | 67 | model = KerasClassifier(build_fn=create_model, nb_epoch=20, batch_size=32) 68 | 69 | # evaluate using 10-fold cross validation 70 | kfold = StratifiedKFold(y=y_train, n_folds=10, shuffle=True, random_state=seed) 71 | results = cross_val_score(model, X_train, y_train, cv=kfold) 72 | print(results.mean()) 73 | 74 | 75 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/crossval2.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | from keras.layers import LSTM 17 | from keras.layers.convolutional import Convolution1D 18 | from keras.layers.convolutional import MaxPooling1D 19 | from keras.layers.embeddings import Embedding 20 | from keras.preprocessing import sequence 21 | from theano.tensor.shared_randomstreams import RandomStreams 22 | from sklearn.cross_validation import StratifiedKFold 23 | from sklearn.cross_validation import cross_val_score 24 | from keras.wrappers.scikit_learn import KerasClassifier 25 | 26 | 27 | # fix random seed for reproducibility 28 | np.random.seed(7) 29 | srng = RandomStreams(7) 30 | 31 | print("Loading") 32 | 33 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 34 | 35 | 36 | x = traindata.iloc[:,1] 37 | y = traindata.iloc[:,0] 38 | 39 | 40 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 41 | tk.fit_on_texts(x) 42 | X_train = tk.texts_to_sequences(x) 43 | 44 | 45 | X_train=np.array(X_train) 46 | y_train = np.array(y) 47 | 48 | 49 | batch_size = 64 50 | max_len = 500 51 | print "max_len ", max_len 52 | print('Pad sequences (samples x time)') 53 | 54 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 55 | 56 | 57 | 58 | max_features = 5000 59 | embedding_vecor_length = 32 60 | 61 | 62 | def create_model(): 63 | model = Sequential() 64 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 65 | model.add(Convolution1D(nb_filter=32, filter_length=3, border_mode='same', activation='relu')) 66 | model.add(MaxPooling1D(pool_length=2)) 67 | model.add(LSTM(100)) 68 | model.add(Dense(1, activation='sigmoid')) 69 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 70 | print(model.summary()) 71 | return model 72 | 73 | # fix random seed for reproducibility 74 | seed = 7 75 | np.random.seed(seed) 76 | 77 | model = KerasClassifier(build_fn=create_model, nb_epoch=20, batch_size=32) 78 | 79 | # evaluate using 10-fold cross validation 80 | kfold = StratifiedKFold(y=y_train, n_folds=10, shuffle=True, random_state=seed) 81 | results = cross_val_score(model, X_train, y_train, cv=kfold) 82 | print(results.mean()) 83 | 84 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/ker1.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | 14 | 15 | print("Loading") 16 | 17 | #traindata = pd.read_csv('CDMC2016_AndroidLabel.Train.csv', header=None) 18 | #testdata = pd.read_csv('CDMC2016_AndroidPermissions.Test.csv', header=None) 19 | 20 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 21 | testdata = pd.read_csv('space/CDMC2016_AndroidPermissions.Test.csv', header=None) 22 | 23 | x = traindata.iloc[:,1] 24 | y = traindata.iloc[:,0] 25 | t = testdata.iloc[:,0] 26 | 27 | 28 | tk = keras.preprocessing.text.Tokenizer(nb_words=500,filters=keras.preprocessing.text.base_filter(), lower=True, split=" ") 29 | tk.fit_on_texts(x) 30 | 31 | x = tk.texts_to_sequences(x) 32 | print(x) 33 | ''' 34 | tk = keras.preprocessing.text.Tokenizer(nb_words=500, filters=keras.preprocessing.text.base_filter(), lower=True, split=" ") 35 | tk.fit_on_texts(t) 36 | t = tk.texts_to_sequences(t) 37 | print(t) 38 | ''' 39 | ''' 40 | max_len = 200 41 | print "max_len ", max_len 42 | print('Pad sequences (samples x time)') 43 | 44 | x = sequence.pad_sequences(x, maxlen=max_len) 45 | t = sequence.pad_sequences(t, maxlen=max_len) 46 | 47 | max_features = 500 48 | 49 | model = Sequential() 50 | print('Build model...') 51 | 52 | model = Sequential() 53 | model.add(Embedding(max_features, 128, input_length=max_len, dropout=0.1)) 54 | model.add(LSTM(128, dropout_W=0.1, dropout_U=0.1)) 55 | model.add(Dense(1)) 56 | model.add(Activation('sigmoid')) 57 | 58 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 59 | model.fit(x, y, batch_size=32, nb_epoch=30) 60 | score, acc = model.evaluate(x, y, batch_size=32) 61 | print('Test score:', score) 62 | print('Test accuracy:', acc) 63 | 64 | y_pred = model.predict_classes(t) 65 | 66 | np.savetxt('output.txt', y_pred, fmt='%01d') 67 | 68 | 69 | 70 | ''' 71 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/ker2.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | 17 | print("Loading") 18 | 19 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 20 | 21 | 22 | x = traindata.iloc[:,1] 23 | y = traindata.iloc[:,0] 24 | 25 | 26 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 27 | tk.fit_on_texts(x) 28 | X_train = tk.texts_to_sequences(x) 29 | 30 | 31 | 32 | 33 | X_train=np.array(X_train) 34 | 35 | 36 | 37 | y_train = np.array(y) 38 | 39 | 40 | batch_size = 64 41 | max_len = 500 42 | print "max_len ", max_len 43 | print('Pad sequences (samples x time)') 44 | 45 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 46 | 47 | 48 | #y_train= to_categorical(y_train) 49 | #y_test = to_categorical(y_test) 50 | 51 | 52 | max_features = 5000 53 | model = Sequential() 54 | print('Build model...') 55 | embedding_vecor_length = 32 56 | 57 | model = Sequential() 58 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 59 | model.add(Dropout(0.2)) 60 | model.add(LSTM(100)) 61 | model.add(Dropout(0.2)) 62 | model.add(Dense(1)) 63 | model.add(Activation('sigmoid')) 64 | 65 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 66 | print(model.summary()) 67 | 68 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, 69 | validation_split=0.20, shuffle=True) 70 | score, acc = model.evaluate(X_train, y_train, 71 | batch_size=64) 72 | print('Test score:', score) 73 | print('Test accuracy:', acc) 74 | 75 | 76 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/ker3.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | 17 | print("Loading") 18 | 19 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 20 | testdata = pd.read_csv('train_test.csv', header=None) 21 | 22 | x = traindata.iloc[:,1] 23 | y = traindata.iloc[:,0] 24 | xt = testdata.iloc[:,1] 25 | yt = testdata.iloc[:,0] 26 | 27 | 28 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 29 | tk.fit_on_texts(x) 30 | X_train = tk.texts_to_sequences(x) 31 | 32 | 33 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 34 | tk.fit_on_texts(xt) 35 | X_test = tk.texts_to_sequences(xt) 36 | 37 | 38 | 39 | X_train=np.array(X_train) 40 | X_test=np.array(X_test) 41 | 42 | 43 | y_train = np.array(y) 44 | y_test = np.array(yt) 45 | 46 | batch_size = 64 47 | max_len = 500 48 | print "max_len ", max_len 49 | print('Pad sequences (samples x time)') 50 | 51 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 52 | X_test = sequence.pad_sequences(X_test, maxlen=max_len) 53 | 54 | max_features = 5000 55 | model = Sequential() 56 | print('Build model...') 57 | embedding_vecor_length = 32 58 | 59 | model = Sequential() 60 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 61 | model.add(Dropout(0.2)) 62 | model.add(LSTM(100)) 63 | model.add(Dropout(0.2)) 64 | model.add(Dense(1)) 65 | model.add(Activation('sigmoid')) 66 | 67 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 68 | print(model.summary()) 69 | 70 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=50,validation_data=(X_test, y_test),shuffle=True) 71 | score, acc = model.evaluate(X_test, y_test) 72 | 73 | print('Test score:', score) 74 | print('Test accuracy:', acc) 75 | y_pred = model.predict_classes(X_test) 76 | np.savetxt('output.txt', np.transpose([y_test,y_pred]), fmt='%01d') 77 | 78 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/val.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | 16 | 17 | print("Loading") 18 | 19 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 20 | 21 | 22 | x = traindata.iloc[:,1] 23 | y = traindata.iloc[:,0] 24 | 25 | 26 | X_train, X_test, y_train, y_test = train_test_split(x, y,test_size=0.1,random_state=42) 27 | 28 | 29 | tk = keras.preprocessing.text.Tokenizer(nb_words=2000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 30 | tk.fit_on_texts(X_train) 31 | X_train = tk.texts_to_sequences(X_train) 32 | 33 | 34 | tk = keras.preprocessing.text.Tokenizer(nb_words=2000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 35 | tk.fit_on_texts(X_test) 36 | X_test = tk.texts_to_sequences(X_test) 37 | 38 | X_train=np.array(X_train) 39 | X_test=np.array(X_test) 40 | 41 | 42 | y_train = np.array(y_train) 43 | y_test = np.array(y_test) 44 | 45 | batch_size = 2 46 | max_len = 400 47 | print "max_len ", max_len 48 | print('Pad sequences (samples x time)') 49 | 50 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 51 | X_test = sequence.pad_sequences(X_test, maxlen=max_len) 52 | 53 | #y_train= to_categorical(y_train) 54 | #y_test = to_categorical(y_test) 55 | 56 | 57 | max_features = 2000 58 | model = Sequential() 59 | print('Build model...') 60 | 61 | model = Sequential() 62 | model.add(Embedding(max_features, 128, input_length=max_len)) 63 | model.add(LSTM(128, dropout_W=0.4, dropout_U=0.4)) 64 | model.add(Dense(1)) 65 | model.add(Activation('softmax')) 66 | 67 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 68 | 69 | 70 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=30, 71 | validation_data=(X_test, y_test), shuffle=True) 72 | score, acc = model.evaluate(X_test, y_test, 73 | batch_size=batch_size) 74 | print('Test score:', score) 75 | print('Test accuracy:', acc) 76 | 77 | 78 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/val1.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | 17 | print("Loading") 18 | 19 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 20 | 21 | 22 | x = traindata.iloc[:,1] 23 | y = traindata.iloc[:,0] 24 | 25 | 26 | X_train, X_test, y_train, y_test = train_test_split(x, y,test_size=0.1,random_state=42) 27 | 28 | 29 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 30 | tk.fit_on_texts(X_train) 31 | X_train = tk.texts_to_sequences(X_train) 32 | 33 | 34 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 35 | tk.fit_on_texts(X_test) 36 | X_test = tk.texts_to_sequences(X_test) 37 | 38 | X_train=np.array(X_train) 39 | X_test=np.array(X_test) 40 | 41 | 42 | y_train = np.array(y_train) 43 | y_test = np.array(y_test) 44 | 45 | batch_size = 64 46 | max_len = 500 47 | print "max_len ", max_len 48 | print('Pad sequences (samples x time)') 49 | 50 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 51 | X_test = sequence.pad_sequences(X_test, maxlen=max_len) 52 | 53 | #y_train= to_categorical(y_train) 54 | #y_test = to_categorical(y_test) 55 | 56 | 57 | max_features = 5000 58 | model = Sequential() 59 | print('Build model...') 60 | embedding_vecor_length = 32 61 | 62 | model = Sequential() 63 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 64 | model.add(Dropout(0.2)) 65 | model.add(LSTM(100)) 66 | model.add(Dropout(0.2)) 67 | model.add(Dense(1)) 68 | model.add(Activation('sigmoid')) 69 | 70 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 71 | print(model.summary()) 72 | 73 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1, 74 | validation_data=(X_test, y_test), shuffle=True) 75 | score, acc = model.evaluate(X_test, y_test, 76 | batch_size=64) 77 | print('Test score:', score) 78 | print('Test accuracy:', acc) 79 | 80 | 81 | -------------------------------------------------------------------------------- /android malware/static/android malware classification/val2.py: -------------------------------------------------------------------------------- 1 | import keras.preprocessing.text 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(1337) # for reproducibility 5 | from keras.preprocessing import sequence 6 | from keras.models import Sequential 7 | from keras.layers.core import Dense, Activation 8 | from keras.layers.embeddings import Embedding 9 | from keras.layers.recurrent import LSTM 10 | from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) 11 | from sklearn import metrics 12 | from sklearn.metrics import roc_auc_score 13 | from keras.utils.np_utils import to_categorical 14 | from sklearn.cross_validation import train_test_split 15 | from keras.layers import Dropout 16 | from keras.layers import LSTM 17 | from keras.layers.convolutional import Convolution1D 18 | from keras.layers.convolutional import MaxPooling1D 19 | from keras.layers.embeddings import Embedding 20 | from keras.preprocessing import sequence 21 | from theano.tensor.shared_randomstreams import RandomStreams 22 | # fix random seed for reproducibility 23 | np.random.seed(7) 24 | srng = RandomStreams(7) 25 | 26 | print("Loading") 27 | 28 | traindata = pd.read_csv('space/CDMC2016_AndroidLabel.Train.csv', header=None) 29 | 30 | 31 | x = traindata.iloc[:,1] 32 | y = traindata.iloc[:,0] 33 | 34 | 35 | X_train, X_test, y_train, y_test = train_test_split(x, y,test_size=0.1,random_state=42) 36 | 37 | 38 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 39 | tk.fit_on_texts(X_train) 40 | X_train = tk.texts_to_sequences(X_train) 41 | 42 | 43 | tk = keras.preprocessing.text.Tokenizer(nb_words=5000, filters=keras.preprocessing.text.base_filter(), lower=True, split=",") 44 | tk.fit_on_texts(X_test) 45 | X_test = tk.texts_to_sequences(X_test) 46 | 47 | X_train=np.array(X_train) 48 | X_test=np.array(X_test) 49 | 50 | 51 | y_train = np.array(y_train) 52 | y_test = np.array(y_test) 53 | 54 | batch_size = 64 55 | max_len = 500 56 | print "max_len ", max_len 57 | print('Pad sequences (samples x time)') 58 | 59 | X_train = sequence.pad_sequences(X_train, maxlen=max_len) 60 | X_test = sequence.pad_sequences(X_test, maxlen=max_len) 61 | 62 | #y_train= to_categorical(y_train) 63 | #y_test = to_categorical(y_test) 64 | 65 | 66 | max_features = 5000 67 | model = Sequential() 68 | print('Build model...') 69 | embedding_vecor_length = 32 70 | 71 | model = Sequential() 72 | model.add(Embedding(max_features, embedding_vecor_length, input_length=max_len)) 73 | model.add(Convolution1D(nb_filter=32, filter_length=3, border_mode='same', activation='relu')) 74 | model.add(MaxPooling1D(pool_length=2)) 75 | model.add(LSTM(100)) 76 | model.add(Dense(1, activation='sigmoid')) 77 | 78 | model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy']) 79 | print(model.summary()) 80 | 81 | model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=1, 82 | validation_data=(X_test, y_test), shuffle=True) 83 | score, acc = model.evaluate(X_test, y_test, 84 | batch_size=64) 85 | print('Test score:', score) 86 | print('Test accuracy:', acc) 87 | 88 | 89 | --------------------------------------------------------------------------------