├── Feature_extraction_&_selection ├── Bfeatures_jsons.json ├── Feature_selection │ ├── MSGmalware_analysis_dataset_all_features.csv │ ├── MSGmalware_analysis_dataset_if.csv │ ├── M_Bfeatures_jsons.json │ ├── get_important_features.py │ ├── script3_create_final_combined_dataset_important_features.py │ └── significant_features.py ├── MSGmalware_analysis_dataset_all_features.csv ├── M_Bfeatures_jsons.json ├── Mfeatures_jsons.json └── script2_create_combined_dataset_all_features.py ├── Generating adversarial examples with GAN ├── GAN_4_SmartAM.py ├── MSGmalware_analysis_dataset_if.csv ├── advers │ ├── adverV1.npz │ ├── adverV2.npz │ ├── adverV3.npz │ ├── adverV4.npz │ ├── adverV5.npz │ ├── adverV6.npz │ ├── adverV7.npz │ ├── adverV8.npz │ ├── adver_dataset_if_test_1.csv │ ├── adver_dataset_if_train_1.csv │ └── prepare_adver_dataset.py ├── create_input_for_gan.py └── dataset_if.npz ├── README.md └── SmartAM_ANN_Model ├── MSGmalware_analysis_dataset_if.csv ├── SmartAM1_ANN.py ├── SmartAM2_ANN.py ├── adver_dataset_if_test_1.csv └── adver_dataset_if_train_1.csv /Feature_extraction_&_selection/Feature_selection/get_important_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 14 10:16:05 2019 4 | @author: MSG 5 | =============================================================================== 6 | ****this script is to determine importance of each feature 7 | such that less significant features can be ignored****** 8 | =============================================================================== 9 | """ 10 | import json 11 | import pandas as pd 12 | import numpy as np 13 | 14 | #**************POPULATE GENERAL FEATURES LISTS********************************* 15 | #general lists upon which feature selection will be made basing on feature importance 16 | 17 | Glist_of_receivers_actions = [] 18 | Glist_of_permissions = [] 19 | Glist_of_apis = [] 20 | Gruntime_registered_receivers = [] 21 | Glist_of_fingerprints = [] 22 | #populate lists 23 | with open("M_Bfeatures_jsons.json", "r") as d: 24 | json_dataset = json.load(d) 25 | for key, value in json_dataset.items(): 26 | list_of_receivers_actions = value['list_of_receivers_actions'] 27 | list_of_permissions = value['list_of_permissions'] 28 | apis = value['apis'] 29 | runtime_registered_receivers = value['runtime_registered_receivers'] 30 | list_of_fingerprints = value['list_of_fingerprints'] 31 | 32 | 33 | #POPULATE GENERAL LISTS UPON WHICH FEATURE PRESENCE CHECK WILL BE DONE 34 | #list_of_receivers_actions 35 | for i in range(len(list_of_receivers_actions)): 36 | r = list_of_receivers_actions[i] 37 | if r not in Glist_of_receivers_actions: 38 | if r.startswith("android.intent.action."): 39 | Glist_of_receivers_actions.append(r) 40 | 41 | #list_of_permissions 42 | for i in range(len(list_of_permissions)): 43 | s = list_of_permissions[i] 44 | if s not in Glist_of_permissions: 45 | if s.startswith('android.permission.'): 46 | Glist_of_permissions.append(s) 47 | 48 | #list_of_api_names 49 | for key in apis.keys(): 50 | if key not in Glist_of_apis: 51 | Glist_of_apis.append(key) 52 | 53 | #registered_receivers 54 | for i in range(len(runtime_registered_receivers)): 55 | rt = runtime_registered_receivers[i] 56 | if rt not in Gruntime_registered_receivers: 57 | if rt.startswith("android.intent.action."): 58 | Gruntime_registered_receivers.append(rt) 59 | 60 | #list_of_fingerprints 61 | for i in range(len(list_of_fingerprints)): 62 | if list_of_fingerprints[i] not in Glist_of_fingerprints: 63 | Glist_of_fingerprints.append(list_of_fingerprints[i]) 64 | 65 | 66 | others = ['malware'] 67 | 68 | all_features = (Glist_of_permissions + Glist_of_receivers_actions + Gruntime_registered_receivers+ 69 | Glist_of_fingerprints + Glist_of_apis + others) 70 | 71 | 72 | #****GET FEATURE IMPORTANCES ************************************************ 73 | # Importing the dataset 74 | dataset = pd.read_csv("MSGmalware_analysis_dataset_all_features.csv", delimiter=",") 75 | # split into input (X) and output (Y) variables 76 | X = dataset.iloc[:, 1:714].values 77 | y = dataset.iloc[:, 714].values 78 | # Splitting the dataset into the Training set and Test set 79 | from sklearn.model_selection import train_test_split 80 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, 81 | random_state = 0) 82 | ## Import the random forest model. 83 | from sklearn.ensemble import RandomForestClassifier 84 | ## This line instantiates the model. 85 | rf = RandomForestClassifier() 86 | ## Fit the model on your training data. 87 | rf.fit(X_train, y_train) 88 | ## And score it on your testing data. 89 | rf.score(X_test, y_test) 90 | 91 | importances = rf.feature_importances_ 92 | std = np.std([tree.feature_importances_ for tree in rf.estimators_], 93 | axis=0) 94 | indices = np.argsort(importances)[::-1] 95 | # Print the feature ranking 96 | print("Feature ranking:") 97 | important_features = [] 98 | important_features_scores = [] 99 | for f in range(X.shape[1]): 100 | print("%d. %-*s %f" % (f + 1, 0, all_features[indices[f]], importances[indices[f]])) 101 | #store important features 102 | if importances[indices[f]] > 0.000004: 103 | important_features.append(all_features[indices[f]]) 104 | important_features_scores.append(all_features[indices[f]]+' >>> '+str(round(importances[indices[f]],6))) 105 | 106 | #new general lists of important features(if) 107 | if_list_of_receivers_actions = [] 108 | if_list_of_permissions = [] 109 | if_list_of_apis = [] 110 | if_runtime_registered_receivers = [] 111 | if_list_of_fingerprints = [] 112 | if_others = [] 113 | 114 | #update if_list_of_receivers_actions 115 | for i in range (len(Glist_of_receivers_actions)): 116 | if Glist_of_receivers_actions[i] in important_features: 117 | if_list_of_receivers_actions.append(Glist_of_receivers_actions[i]) 118 | #update if_others 119 | for i in range(len(others)): 120 | if others[i] in important_features: 121 | if_others.append(others[i]) 122 | #update if_list_of_permissions 123 | for i in range(len(Glist_of_permissions)): 124 | if Glist_of_permissions[i] in important_features: 125 | if_list_of_permissions.append(Glist_of_permissions[i]) 126 | #update if_list_of_apis 127 | for i in range(len(Glist_of_apis)): 128 | if Glist_of_apis[i] in important_features: 129 | if_list_of_apis.append(Glist_of_apis[i]) 130 | #update if_runtime_registered_receivers 131 | for i in range(len(Gruntime_registered_receivers)): 132 | if Gruntime_registered_receivers[i] in important_features: 133 | if_runtime_registered_receivers.append(Gruntime_registered_receivers[i]) 134 | #update if_list_of_fingerprints 135 | for i in range(len(Glist_of_fingerprints)): 136 | if Glist_of_fingerprints[i] in important_features: 137 | if_list_of_fingerprints.append(Glist_of_fingerprints[i]) 138 | """******************************END****************************************""" -------------------------------------------------------------------------------- /Feature_extraction_&_selection/Feature_selection/script3_create_final_combined_dataset_important_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Mar 2 18:43:35 2019 4 | 5 | @author: MSG 6 | =============================================================================== 7 | ***this script creates final dataset that considers most important features 8 | =============================================================================== 9 | """ 10 | import json 11 | import pandas as pd 12 | import time 13 | 14 | start_time = time.time() 15 | #LOAD DATA 16 | #import important feature lists 17 | from significant_features import (if_list_of_permissions, if_list_of_receivers_actions , 18 | if_runtime_registered_receivers ,if_list_of_fingerprints , if_list_of_apis) 19 | 20 | 21 | #CREATE PANDAS DATAFRAME 22 | # pandas Dataframe with the columns from the json 23 | #dataset for all samples(malware & benign) 24 | other = ['malware'] 25 | data = pd.DataFrame(columns = if_list_of_permissions 26 | + if_list_of_receivers_actions + if_runtime_registered_receivers+ 27 | if_list_of_fingerprints + if_list_of_apis + other) 28 | 29 | #initialize dataset with 0 values for all features 30 | with open("M_Bfeatures_jsons.json", "r") as d: 31 | json_dataset = json.load(d) 32 | #add rows with only index values(md5) 33 | for key in json_dataset.keys(): 34 | data.loc[key] = 0 35 | 36 | #check for features and update corresponding dataset values 37 | with open("M_Bfeatures_jsons.json", "r") as d: 38 | json_dataset = json.load(d) 39 | for key, value in json_dataset.items(): 40 | # append data to a pandas DataFrame 41 | # each key(sample) forms a row 42 | list_of_permissions = value['list_of_permissions'] 43 | list_of_receivers = value['list_of_receivers'] 44 | list_of_receivers_actions = value['list_of_receivers_actions'] 45 | runtime_registered_receivers = value['runtime_registered_receivers'] 46 | list_of_fingerprints = value['list_of_fingerprints'] 47 | apis = value['apis'] 48 | malware = value['malware'] 49 | 50 | #update presence of given permission for sample(key) 51 | for i in range(len(list_of_permissions)): 52 | m = list_of_permissions[i] 53 | if m in if_list_of_permissions: 54 | data.loc[key, m] = 1 55 | 56 | #update presence of given receivers_action for sample(key) 57 | for i in range(len(list_of_receivers_actions)): 58 | m = list_of_receivers_actions[i] 59 | if m in if_list_of_receivers_actions: 60 | data.loc[key, m] = 1 61 | 62 | #update presence of given registered_receiver for sample(key) 63 | for i in range(len(runtime_registered_receivers)): 64 | m = runtime_registered_receivers[i] 65 | if m in if_runtime_registered_receivers: 66 | data.loc[key, m] = 1 67 | 68 | #update presence of given fingerprint for sample(key) 69 | for i in range(len(list_of_fingerprints)): 70 | m = list_of_fingerprints[i] 71 | if m in if_list_of_fingerprints: 72 | data.loc[key, m] = 1 73 | 74 | #update presence of a given api call for sample(key) 75 | for api in apis.keys(): 76 | m = api 77 | if m in if_list_of_apis: 78 | data.loc[key, m] = 1 79 | 80 | #update others features 81 | data.loc[key, 'malware'] = malware 82 | 83 | #save dataset in csv format 84 | data.to_csv("MSGmalware_analysis_dataset_if.csv", encoding='utf8') 85 | 86 | end_time=time.time() 87 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 88 | """******************************END****************************************""" 89 | 90 | 91 | -------------------------------------------------------------------------------- /Feature_extraction_&_selection/Feature_selection/significant_features.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 14 12:42:36 2019 4 | @author: MSG 5 | =============================================================================== 6 | *******contains lists of importanat features which are used for 7 | feature presence check while generating the final combined dataset******* 8 | =============================================================================== 9 | """ 10 | 11 | 12 | if_list_of_receivers_actions = ['android.intent.action.NEW_OUTGOING_CALL', 13 | 'android.intent.action.BOOT_COMPLETED', 14 | 'android.intent.action.USER_PRESENT', 15 | 'android.intent.action.PHONE_STATE', 16 | 'android.intent.action.AIRPLANE_MODE', 17 | 'android.intent.action.SERVICE_STATE', 18 | 'android.intent.action.ACTION_POWER_CONNECTED', 19 | 'android.intent.action.ACTION_POWER_DISCONNECTED', 20 | 'android.intent.action.PACKAGE_ADDED', 21 | 'android.intent.action.PACKAGE_REMOVED', 22 | 'android.intent.action.DATA_SMS_RECEIVED', 23 | 'android.intent.action.QUICKBOOT_POWERON', 24 | 'android.intent.action.PACKAGE_REPLACED', 25 | 'android.intent.action.MEDIA_BUTTON', 26 | 'android.intent.action.DOWNLOAD_NOTIFICATION_CLICKED', 27 | 'android.intent.action.DOWNLOAD_COMPLETE', 28 | 'android.intent.action.MEDIA_MOUNTED', 29 | 'android.intent.action.UMS_CONNECTED', 30 | 'android.intent.action.PACKAGE_CHANGED', 31 | 'android.intent.action.PACKAGE_RESTARTED', 32 | 'android.intent.action.PACKAGE_INSTALL', 33 | 'android.intent.action.PACKAGE_DATA_CLEARED', 34 | 'android.intent.action.WALLPAPER_CHANGED', 35 | 'android.intent.action.SCREEN_ON', 36 | 'android.intent.action.SCREEN_OFF', 37 | 'android.intent.action.USER_INITIALIZE', 38 | 'android.intent.action.DATE_CHANGED', 39 | 'android.intent.action.SIG_STR', 40 | 'android.intent.action.MEDIA_CHECKING', 41 | 'android.intent.action.BATTERY_CHANGED', 42 | 'android.intent.action.TIME_TICK', 43 | 'android.intent.action.TIME_SET', 44 | 'android.intent.action.TIMEZONE_CHANGED', 45 | 'android.intent.action.ACTION_EXTERNAL_APPLICATIONS_AVAILABLE', 46 | 'android.intent.action.ANY_DATA_STATE', 47 | 'android.intent.action.REBOOT', 48 | 'android.intent.action.ACTION_SHUTDOWN', 49 | 'android.intent.action.LOCALE_CHANGED', 50 | 'android.intent.action.BATTERY_LOW', 51 | 'android.intent.action.MY_PACKAGE_REPLACED', 52 | 'android.intent.action.PACKAGE_FULLY_REMOVED', 53 | 'android.intent.action.PROVIDER_CHANGED', 54 | 'android.intent.action.BATTERY_OKAY', 55 | 'android.intent.action.DEVICE_STORAGE_LOW', 56 | 'android.intent.action.DEVICE_STORAGE_OK', 57 | 'android.intent.action.ACTION_BOOT_COMPLETED', 58 | 'android.intent.action.NOTIFICATION_UPDATE', 59 | 'android.intent.action.CLOSE_SYSTEM_DIALOGS', 60 | 'android.intent.action.CONFIGURATION_CHANGED', 61 | 'android.intent.action.EXTERNAL_APPLICATIONS_AVAILABLE', 62 | 'android.intent.action.BATTERY_CHANGED_ACTION', 63 | 'android.intent.action.MEDIA_UNMOUNTED', 64 | 'android.intent.action.MEDIA_REMOVED', 65 | 'android.intent.action.MEDIA_EJECT', 66 | 'android.intent.action.MEDIA_BAD_REMOVAL', 67 | 'android.intent.action.HEADSET_PLUG', 68 | 'android.intent.action.ALARM_CHANGED', 69 | 'android.intent.action.CHECK', 70 | 'android.intent.action.PACKAGE_FIRST_LAUNCH', 71 | 'android.intent.action.HEART_CODE', 72 | 'android.intent.action.UNINSTALL_PACKAGE', 73 | 'android.intent.action.MEDIA_SHARED', 74 | 'android.intent.action.MEDIA_SCANNER_STARTED', 75 | 'android.intent.action.MEDIA_SCANNER_FINISHED', 76 | 'android.intent.action.VIEW', 77 | 'android.intent.action.DOCK_EVENT', 78 | 'android.intent.action.media_checking', 79 | 'android.intent.action.RUN', 80 | 'android.intent.action.JUMP_TICKER_LOCAL', 81 | 'android.intent.action.gg_903.c2dm_notification', 82 | 'android.intent.action.c2dm_notification.gg_1331', 83 | 'android.intent.action.c2dm_rereceve.gg_1331', 84 | 'android.intent.action.ACTION_BATTERY_LOW', 85 | 'android.intent.action.CAMERA_BUTTON'] 86 | 87 | if_list_of_permissions=['android.permission.INTERNET', 88 | 'android.permission.ACCESS_NETWORK_STATE', 89 | 'android.permission.READ_PHONE_STATE', 90 | 'android.permission.MOUNT_UNMOUNT_FILESYSTEMS', 91 | 'android.permission.WRITE_EXTERNAL_STORAGE', 92 | 'android.permission.ACCESS_WIFI_STATE', 93 | 'android.permission.SEND_SMS', 94 | 'android.permission.RECEIVE_SMS', 95 | 'android.permission.WAKE_LOCK', 96 | 'android.permission.DELETE_PACKAGES', 97 | 'android.permission.READ_SMS', 98 | 'android.permission.MODIFY_PHONE_STATE', 99 | 'android.permission.CALL_PHONE', 100 | 'android.permission.PROCESS_OUTGOING_CALLS', 101 | 'android.permission.WRITE_CONTACTS', 102 | 'android.permission.READ_CONTACTS', 103 | 'android.permission.RECEIVE_BOOT_COMPLETED', 104 | 'android.permission.BATTERY_STATS', 105 | 'android.permission.BLUETOOTH', 106 | 'android.permission.CAMERA', 107 | 'android.permission.FLASHLIGHT', 108 | 'android.permission.VIBRATE', 109 | 'android.permission.GET_TASKS', 110 | 'android.permission.SET_WALLPAPER', 111 | 'android.permission.ACCESS_COARSE_LOCATION', 112 | 'android.permission.RAISED_THREAD_PRIORITY', 113 | 'android.permission.WRITE_SECURE_SETTINGS', 114 | 'android.permission.CHANGE_WIFI_STATE', 115 | 'android.permission.CHANGE_NETWORK_STATE', 116 | 'android.permission.WRITE_APN_SETTINGS', 117 | 'android.permission.RECEIVE_MMS', 118 | 'android.permission.RECEIVE_WAP_PUSH', 119 | 'android.permission.WRITE_SETTINGS', 120 | 'android.permission.GET_ACCOUNTS', 121 | 'android.permission.WRITE_SMS', 122 | 'android.permission.INSTALL_PACKAGES', 123 | 'android.permission.ACCESS_FINE_LOCATION', 124 | 'android.permission.ACCESS_COARSE_UPDATES', 125 | 'android.permission.SYSTEM_ALERT_WINDOW', 126 | 'android.permission.RESTART_PACKAGES', 127 | 'android.permission.KILL_BACKGROUND_PROCESSES', 128 | 'android.permission.READ_LOGS', 129 | 'android.permission.RECEIVE_USER_PRESENT', 130 | 'android.permission.SYSTEM_OVERLAY_WINDOW', 131 | 'android.permission.READ_EXTERNAL_STORAGE', 132 | 'android.permission.MODIFY_AUDIO_SETTINGS', 133 | 'android.permission.DISABLE_KEYGUARD', 134 | 'android.permission.BIND_APPWIDGET', 135 | 'android.permission.EXPAND_STATUS_BAR', 136 | 'android.permission.SET_WALLPAPER_HINTS', 137 | 'android.permission.FORCE_STOP_PACKAGES', 138 | 'android.permission.RECORD_AUDIO', 139 | 'android.permission.ACCESS_LOCATION_EXTRA_COMMANDS', 140 | 'android.permission.MANAGE_ACCOUNTS', 141 | 'android.permission.ACCOUNT_MANAGER', 142 | 'android.permission.AUTHENTICATE_ACCOUNTS', 143 | 'android.permission.CLEAR_APP_CACHE', 144 | 'android.permission.WRITE_INTERNAL_STORAGE', 145 | 'android.permission.DELETE_CACHE_FILES', 146 | 'android.permission.BROADCAST_STICKY', 147 | 'android.permission.CHANGE_CONFIGURATION', 148 | 'android.permission.DEVICE_POWER', 149 | 'android.permission.WRITE_OWNER_DATA', 150 | 'android.permission.DOWNLOAD_WITHOUT_NOTIFICATION', 151 | 'android.permission.UPDATE_DEVICE_STATS', 152 | 'android.permission.GET_PACKAGE_SIZE', 153 | 'android.permission.REORDER_TASKS', 154 | 'android.permission.ACCESS_LOCATION', 155 | 'android.permission.ACCESS_GPS', 156 | 'android.permission.ACCESS_MOCK_LOCATION', 157 | 'android.permission.BLUETOOTH_ADMIN', 158 | 'android.permission.WRITE_SYNC_SETTINGS', 159 | 'android.permission.READ_OWNER_DATA', 160 | 'android.permission.CLEAR_APP_USER_DATA', 161 | 'android.permission.READ_CALL_LOG', 162 | 'android.permission.MOUNT_FORMAT_FILESYSTEMS', 163 | 'android.permission.WRITE_CALL_LOG', 164 | 'android.permission.READ_PROFILE', 165 | 'android.permission.BIND_DEVICE_ADMIN', 166 | 'android.permission.DUMP', 167 | 'android.permission.FORCE_BACK', 168 | 'android.permission.SET_ALWAYS_FINISH', 169 | 'android.permission.SET_DEBUG_APP', 170 | 'android.permission.SET_PREFERRED_APPLICATIONS', 171 | 'android.permission.SET_TIME_ZONE', 172 | 'android.permission.STATUS_BAR', 173 | 'android.permission.USE_CREDENTIALS', 174 | 'android.permission.USE_SIP', 175 | 'android.permission.CHANGE_WIFI_MULTICAST_STATE', 176 | 'android.permission.READ_SYNC_SETTINGS', 177 | 'android.permission.READ_CALENDAR', 178 | 'android.permission.WRITE_CALENDAR', 179 | 'android.permission.INTERACT_ACROSS_USERS_FULL', 180 | 'android.permission.READ_SYNC_STATS', 181 | 'android.permission.READ_SECURE_SETTINGS', 182 | 'android.permission.ACCESS_SUPERUSER', 183 | 'android.permission.NFC', 184 | 'android.permission.SUBSCRIBED_FEEDS_WRITE', 185 | 'android.permission.RECORD_VIDEO', 186 | 'android.permission.BAIDU_LOCATION_SERVICE', 187 | 'android.permission.USE_FINGERPRINT', 188 | 'android.permission.ACCESS_WEATHERCLOCK_PROVIDER', 189 | 'android.permission.GET_ACCOUNTS_PRIVILEGED', 190 | 'android.permission.CONTROL_INCALL_EXPERIENCE', 191 | 'android.permission.FOREGROUND_SERVICE', 192 | 'android.permission.READ_APP_BADGE', 193 | 'android.permission.PACKAGE_USAGE_STATS', 194 | 'android.permission.BIND_ACCESSIBILITY_SERVICE', 195 | 'android.permission.BROADCAST_PACKAGE_ADDED', 196 | 'android.permission.INSTALL_SHORTCUT', 197 | 'android.permission.ACCESS_WIMAX_STATE', 198 | 'android.permission.BACKUP', 199 | 'android.permission.SEND_DOWNLOAD_COMPLETED_INTENTS', 200 | 'android.permission.MOVE_PACKAGE', 201 | 'android.permission.START_BACKGROUND_SERVICE', 202 | 'android.permission.SENDTO', 203 | 'android.permission.CAPTURE_SECURE_VIDEO_OUTPUT', 204 | 'android.permission.CAPTURE_VIDEO_OUTPUT', 205 | 'android.permission.ACCESS_ALL_DOWNLOADS', 206 | 'android.permission.UPDATE_APP_OPS_STATS', 207 | 'android.permission.REQUEST_INSTALL_PACKAGES', 208 | 'android.permission.READ_USER_DICTIONARY', 209 | 'android.permission.WRITE_USER_DICTIONARY', 210 | 'android.permission.TYPE_KEYGUARD_DIALOG', 211 | 'android.permission.MANAGE_DOCUMENTS', 212 | 'android.permission.PROCESS_INCOMING_CALLS', 213 | 'android.permission.WRITE_SETTING', 214 | 'android.permission.ALLOCATE_AGGRESSIVE', 215 | 'android.permission.GET_INTENT_SENDER_INTENT', 216 | 'android.permission.LOCATION_HARDWARE', 217 | 'android.permission.NETWORK_STACK', 218 | 'android.permission.CONFIGURE_SIP', 219 | 'android.permission.EXT', 220 | 'android.permission.SHOW_WHEN_LOCK', 221 | 'android.permission.LOCATION', 222 | 'android.permission.PREVENT_POWER_KEY'] 223 | 224 | if_list_of_apis=['android_telephony_TelephonyManager_getLine1Number', 225 | 'android_telephony_TelephonyManager_getDeviceId', 226 | 'android_app_SharedPreferencesImpl_EditorImpl_putString', 227 | 'java_lang_reflect_Method_invoke', 228 | 'java_net_ProxySelectorImpl_select', 229 | 'android_telephony_TelephonyManager_getNetworkOperatorName', 230 | 'android_telephony_TelephonyManager_getSimSerialNumber', 231 | 'org_apache_http_impl_client_AbstractHttpClient_execute', 232 | 'android_app_ContextImpl_registerReceiver', 233 | 'java_io_File_exists', 234 | 'android_app_SharedPreferencesImpl_EditorImpl_putLong', 235 | 'libcore_io_IoBridge_open', 236 | 'java_net_URL_openConnection', 237 | 'android_os_SystemProperties_get', 238 | 'android_app_SharedPreferencesImpl_EditorImpl_putInt', 239 | 'android_telephony_SmsManager_sendTextMessage', 240 | 'android_app_Activity_startActivity', 241 | 'android_app_ApplicationPackageManager_setComponentEnabledSetting', 242 | 'android_webkit_WebView_setWebViewClient', 243 | 'android_app_SharedPreferencesImpl_EditorImpl_putBoolean', 244 | 'android_webkit_WebView_addJavascriptInterface', 245 | 'android_telephony_TelephonyManager_getSimCountryIso', 246 | 'android_telephony_TelephonyManager_getSubscriberId', 247 | 'android_content_ContextWrapper_startService', 248 | 'android_content_ContentValues_put', 249 | 'android_content_ContentResolver_insert', 250 | 'android_webkit_WebView_setWebChromeClient', 251 | 'android_content_ContentResolver_query', 252 | 'android_util_Base64_decode', 253 | 'android_telephony_TelephonyManager_getNetworkCountryIso', 254 | 'dalvik_system_DexFile_openDexFile', 255 | 'android_telephony_TelephonyManager_getNetworkOperator', 256 | 'android_content_ContextWrapper_sendBroadcast', 257 | 'android_app_NotificationManager_notify', 258 | 'javax_crypto_Cipher_doFinal', 259 | 'android_app_ActivityManager_getRunningTasks', 260 | 'dalvik_system_DexClassLoader_dalvik_system_DexClassLoader', 261 | 'android_app_ApplicationPackageManager_getInstalledPackages', 262 | 'dalvik_system_DexFile_loadDex', 263 | 'dalvik_system_DexFile_dalvik_system_DexFile', 264 | 'javax_crypto_Mac_doFinal', 265 | 'android_util_Base64_encode', 266 | 'android_util_Base64_encodeToString', 267 | 'android_content_ContentResolver_registerContentObserver', 268 | 'javax_crypto_spec_SecretKeySpec_javax_crypto_spec_SecretKeySpec', 269 | 'android_app_AlarmManager_set', 270 | 'dalvik_system_BaseDexClassLoader_findResource', 271 | 'android_net_wifi_WifiInfo_getMacAddress', 272 | 'android_content_ContextWrapper_openFileOutput', 273 | 'java_lang_Runtime_exec', 274 | 'java_io_FileInputStream_read', 275 | 'dalvik_system_BaseDexClassLoader_findLibrary', 276 | 'android_telephony_TelephonyManager_listen', 277 | 'android_location_Location_getLatitude', 278 | 'android_content_ContextWrapper_startActivity', 279 | 'android_telephony_TelephonyManager_getSimOperatorName', 280 | 'android_app_ActivityManager_getRunningAppProcesses', 281 | 'android_app_ActivityThread_handleReceiver', 282 | 'android_location_Location_getLongitude', 283 | 'android_telephony_TelephonyManager_getDeviceSoftwareVersion', 284 | 'android_app_SharedPreferencesImpl_EditorImpl_putFloat', 285 | 'dalvik_system_BaseDexClassLoader_findResources', 286 | 'android_accounts_AccountManager_getAccountsByType', 287 | 'java_lang_ProcessBuilder_start', 288 | 'android_content_ContentResolver_delete', 289 | 'java_io_FileOutputStream_write', 290 | 'dalvik_system_DexFile_loadClass', 291 | 'android_os_Debug_isDebuggerConnected', 292 | 'dalvik_system_PathClassLoader_dalvik_system_PathClassLoader', 293 | 'java_lang_Runtime_load', 294 | 'android_accounts_AccountManager_getAccounts', 295 | 'android_media_AudioRecord_startRecording'] 296 | 297 | if_runtime_registered_receivers= ['android.intent.action.PACKAGE_ADDED', 298 | 'android.intent.action.PROXY_CHANGE', 299 | 'android.intent.action.PACKAGE_REMOVED', 300 | 'android.intent.action.PACKAGE_CHANGED', 301 | 'android.intent.action.PACKAGE_INSTALL', 302 | 'android.intent.action.BATTERY_CHANGED', 303 | 'android.intent.action.CONFIGURATION_CHANGED', 304 | 'android.intent.action.MEDIA_CHECKING', 305 | 'android.intent.action.MEDIA_UNMOUNTED', 306 | 'android.intent.action.HEADSET_PLUG', 307 | 'android.intent.action.MEDIA_MOUNTED', 308 | 'android.intent.action.MEDIA_REMOVED', 309 | 'android.intent.action.MEDIA_BUTTON', 310 | 'android.intent.action.MEDIA_SHARED', 311 | 'android.intent.action.SCREEN_OFF', 312 | 'android.intent.action.USER_PRESENT', 313 | 'android.intent.action.TIME_SET', 314 | 'android.intent.action.TIMEZONE_CHANGED', 315 | 'android.intent.action.TIME_TICK', 316 | 'android.intent.action.AIRPLANE_MODE', 317 | 'android.intent.action.MEDIA_SCANNER_FINISHED', 318 | 'android.intent.action.NEW_OUTGOING_CALL', 319 | 'android.intent.action.PHONE_STATE', 320 | 'android.intent.action.SCREEN_ON', 321 | 'android.intent.action.MEDIA_BAD_REMOVAL', 322 | 'android.intent.action.MEDIA_EJECT', 323 | 'android.intent.action.MEDIA_SCANNER_STARTED', 324 | 'android.intent.action.CLOSE_SYSTEM_DIALOGS', 325 | 'android.intent.action.LOCALE_CHANGED', 326 | 'android.intent.action.EXTERNAL_APPLICATIONS_AVAILABLE', 327 | 'android.intent.action.BOOT_COMPLETED', 328 | 'android.intent.action.WALLPAPER_CHANGED', 329 | 'android.intent.action.ACTION_POWER_DISCONNECTED', 330 | 'android.intent.action.PACKAGE_REPLACED', 331 | 'android.intent.action.DEVICE_STORAGE_LOW', 332 | 'android.intent.action.ACTION_POWER_CONNECTED', 333 | 'android.intent.action.ANY_DATA_STATE', 334 | 'android.intent.action.SERVICE_STATE', 335 | 'android.intent.action.BATTERY_OKAY', 336 | 'android.intent.action.BATTERY_LOW', 337 | 'android.intent.action.ALARM_CHANGED', 338 | 'android.intent.action.DOWNLOAD_COMPLETE', 339 | 'android.intent.action.PACKAGE_RESTARTED', 340 | 'android.intent.action.update', 341 | 'android.intent.action.DATE_CHANGED', 342 | 'android.intent.action.DOWNLOAD_NOTIFICATION_CLICKED', 343 | 'android.intent.action.ACTION_SHUTDOWN', 344 | 'android.intent.action.BCAST_UPDATE_NOTE', 345 | 'android.intent.action.VIEW', 346 | 'android.intent.action.DEVICE_STORAGE_OK', 347 | 'android.intent.action.PACKAGE_FIRST_LAUNCH', 348 | 'android.intent.action.MY_PACKAGE_REPLACED', 349 | 'android.intent.action.CAMERA_BUTTON', 350 | 'android.intent.action.PROVIDER_CHANGED', 351 | 'android.intent.action.UMS_CONNECTED', 352 | 'android.intent.action.DATA_SMS_RECEIVED', 353 | 'android.intent.action.REBOOT', 354 | 'android.intent.action.PACKAGE_DATA_CLEARED', 355 | 'android.intent.action.PACKAGE_FULLY_REMOVED', 356 | 'android.intent.action.DOCK_EVENT', 357 | 'android.intent.action.ggee.ticketloader.update.903', 358 | 'android.intent.action.ggee.ticketloader.update.1331', 359 | 'android.intent.action.SEND_MESSAGE', 360 | 'android.intent.action.UNINSTALL_PACKAGE'] 361 | 362 | if_runtime_registered_receivers=['android.intent.action.PACKAGE_ADDED', 363 | 'android.intent.action.PROXY_CHANGE', 364 | 'android.intent.action.PACKAGE_REMOVED', 365 | 'android.intent.action.PACKAGE_CHANGED', 366 | 'android.intent.action.PACKAGE_INSTALL', 367 | 'android.intent.action.BATTERY_CHANGED', 368 | 'android.intent.action.CONFIGURATION_CHANGED', 369 | 'android.intent.action.MEDIA_CHECKING', 370 | 'android.intent.action.MEDIA_UNMOUNTED', 371 | 'android.intent.action.HEADSET_PLUG', 372 | 'android.intent.action.MEDIA_MOUNTED', 373 | 'android.intent.action.MEDIA_REMOVED', 374 | 'android.intent.action.MEDIA_BUTTON', 375 | 'android.intent.action.MEDIA_SHARED', 376 | 'android.intent.action.SCREEN_OFF', 377 | 'android.intent.action.USER_PRESENT', 378 | 'android.intent.action.TIME_SET', 379 | 'android.intent.action.TIMEZONE_CHANGED', 380 | 'android.intent.action.TIME_TICK', 381 | 'android.intent.action.AIRPLANE_MODE', 382 | 'android.intent.action.MEDIA_SCANNER_FINISHED', 383 | 'android.intent.action.NEW_OUTGOING_CALL', 384 | 'android.intent.action.PHONE_STATE', 385 | 'android.intent.action.SCREEN_ON', 386 | 'android.intent.action.MEDIA_BAD_REMOVAL', 387 | 'android.intent.action.MEDIA_EJECT', 388 | 'android.intent.action.MEDIA_SCANNER_STARTED', 389 | 'android.intent.action.CLOSE_SYSTEM_DIALOGS', 390 | 'android.intent.action.LOCALE_CHANGED', 391 | 'android.intent.action.EXTERNAL_APPLICATIONS_AVAILABLE', 392 | 'android.intent.action.BOOT_COMPLETED', 393 | 'android.intent.action.WALLPAPER_CHANGED', 394 | 'android.intent.action.ACTION_POWER_DISCONNECTED', 395 | 'android.intent.action.PACKAGE_REPLACED', 396 | 'android.intent.action.DEVICE_STORAGE_LOW', 397 | 'android.intent.action.ACTION_POWER_CONNECTED', 398 | 'android.intent.action.ANY_DATA_STATE', 399 | 'android.intent.action.SERVICE_STATE', 400 | 'android.intent.action.BATTERY_OKAY', 401 | 'android.intent.action.BATTERY_LOW', 402 | 'android.intent.action.ALARM_CHANGED', 403 | 'android.intent.action.DOWNLOAD_COMPLETE', 404 | 'android.intent.action.PACKAGE_RESTARTED', 405 | 'android.intent.action.update', 406 | 'android.intent.action.DATE_CHANGED', 407 | 'android.intent.action.DOWNLOAD_NOTIFICATION_CLICKED', 408 | 'android.intent.action.ACTION_SHUTDOWN', 409 | 'android.intent.action.BCAST_UPDATE_NOTE', 410 | 'android.intent.action.VIEW', 411 | 'android.intent.action.DEVICE_STORAGE_OK', 412 | 'android.intent.action.PACKAGE_FIRST_LAUNCH', 413 | 'android.intent.action.MY_PACKAGE_REPLACED', 414 | 'android.intent.action.CAMERA_BUTTON', 415 | 'android.intent.action.PROVIDER_CHANGED', 416 | 'android.intent.action.UMS_CONNECTED', 417 | 'android.intent.action.DATA_SMS_RECEIVED', 418 | 'android.intent.action.REBOOT', 419 | 'android.intent.action.PACKAGE_DATA_CLEARED', 420 | 'android.intent.action.PACKAGE_FULLY_REMOVED', 421 | 'android.intent.action.DOCK_EVENT', 422 | 'android.intent.action.ggee.ticketloader.update.903', 423 | 'android.intent.action.ggee.ticketloader.update.1331', 424 | 'android.intent.action.SEND_MESSAGE', 425 | 'android.intent.action.UNINSTALL_PACKAGE'] 426 | 427 | if_list_of_fingerprints=['getSimSerialNumber', 428 | 'getDeviceId', 429 | 'getNetworkOperatorName', 430 | 'getLine1Number', 431 | 'getSimCountryIso', 432 | 'getSubscriberId', 433 | 'getNetworkCountryIso', 434 | 'getNetworkOperator', 435 | 'getMacAddress', 436 | 'getSimOperatorName', 437 | 'getDeviceSoftwareVersion'] 438 | 439 | """******************************END****************************************""" 440 | -------------------------------------------------------------------------------- /Feature_extraction_&_selection/script2_create_combined_dataset_all_features.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Oct 13 12:49:34 2018 5 | @author: MSG 6 | =============================================================================== 7 | ***this script creates a dataset with all features*** 8 | =============================================================================== 9 | """ 10 | import json 11 | import pandas as pd 12 | import time 13 | #LOAD DATA 14 | start_time = time.time() 15 | #load .json datasets for malware and benign 16 | #concatenate them 17 | with open("Mfeatures_jsons.json", "r") as mal: 18 | data_m = json.load(mal) 19 | with open("Bfeatures_jsons.json", "r") as beg: 20 | data_b = json.load(beg) 21 | 22 | data_m.update(data_b) 23 | 24 | with open("M_Bfeatures_jsons.json", "w") as fo: 25 | json.dump(data_m, fo, indent = 2) 26 | 27 | #POPULATE FEATURES LISTS 28 | #general lists upon which feature presence check will be made 29 | #these are the features under investigation for each app 30 | 31 | Glist_of_receivers_actions = [] 32 | Glist_of_permissions = [] 33 | Glist_of_apis = [] 34 | Gruntime_registered_receivers = [] 35 | Glist_of_fingerprints = [] 36 | 37 | #populate lists 38 | with open("M_Bfeatures_jsons.json", "r") as d: 39 | json_dataset = json.load(d) 40 | 41 | 42 | for key, value in json_dataset.items(): 43 | 44 | # here I push a list of data into a pandas DataFrame 45 | # each key(sample) forms a row 46 | #this function will be transfered to the final script that 47 | #reads the json file created here 48 | 49 | list_of_receivers_actions = value['list_of_receivers_actions'] 50 | list_of_permissions = value['list_of_permissions'] 51 | 52 | apis = value['apis'] 53 | runtime_registered_receivers = value['runtime_registered_receivers'] 54 | list_of_fingerprints = value['list_of_fingerprints'] 55 | 56 | 57 | #POPULATE GENERAL LISTS UPON WHICH FEATURE PRESENCE CHECK WILL BE DONE 58 | #list_of_receivers_actions 59 | for i in range(len(list_of_receivers_actions)): 60 | r = list_of_receivers_actions[i] 61 | if r not in Glist_of_receivers_actions: 62 | if r.startswith("android.intent.action."): 63 | Glist_of_receivers_actions.append(r) 64 | 65 | #list_of_permissions 66 | for i in range(len(list_of_permissions)): 67 | s = list_of_permissions[i] 68 | if s not in Glist_of_permissions: 69 | if s.startswith('android.permission.'): 70 | Glist_of_permissions.append(s) 71 | 72 | #list_of_api_names 73 | for key in apis.keys(): 74 | if key not in Glist_of_apis: 75 | Glist_of_apis.append(key) 76 | 77 | #registered_receivers 78 | for i in range(len(runtime_registered_receivers)): 79 | rt = runtime_registered_receivers[i] 80 | if rt not in Gruntime_registered_receivers: 81 | if rt.startswith("android.intent.action."): 82 | Gruntime_registered_receivers.append(rt) 83 | 84 | #list_of_fingerprints 85 | for i in range(len(list_of_fingerprints)): 86 | if list_of_fingerprints[i] not in Glist_of_fingerprints: 87 | Glist_of_fingerprints.append(list_of_fingerprints[i]) 88 | 89 | 90 | #CREATE PANDAS DATAFRAME 91 | #here I define my pandas Dataframe with the columns I want to get from the json 92 | #dataset for all samples(malware & benign) 93 | 94 | others = ['malware'] 95 | 96 | #create dataset(1527 columns of features) 97 | 98 | data = pd.DataFrame(columns = Glist_of_permissions 99 | + Glist_of_receivers_actions + Gruntime_registered_receivers+ 100 | Glist_of_fingerprints + Glist_of_apis + others) 101 | 102 | #initialize dataset with 0 values for all features 103 | with open("M_Bfeatures_jsons.json", "r") as d: 104 | json_dataset = json.load(d) 105 | 106 | #add rows with only index values(md5) 107 | for key in json_dataset.keys(): 108 | data.loc[key] = 0 109 | 110 | #CHECK FOR Features and update corresponding dataset values 111 | with open("M_Bfeatures_jsons.json", "r") as d: 112 | json_dataset = json.load(d) 113 | 114 | for key, value in json_dataset.items(): 115 | 116 | # here I push append data to a pandas DataFrame 117 | # each key(sample) forms a row 118 | 119 | list_of_permissions = value['list_of_permissions'] 120 | list_of_receivers = value['list_of_receivers'] 121 | list_of_receivers_actions = value['list_of_receivers_actions'] 122 | runtime_registered_receivers = value['runtime_registered_receivers'] 123 | list_of_fingerprints = value['list_of_fingerprints'] 124 | apis = value['apis'] 125 | malware = value['malware'] 126 | 127 | #update presence of given permission for sample(key) 128 | for i in range(len(list_of_permissions)): 129 | m = list_of_permissions[i] 130 | if m in Glist_of_permissions: 131 | data.loc[key, m] = 1 132 | 133 | #update presence of given receivers_action for sample(key) 134 | for i in range(len(list_of_receivers_actions)): 135 | m = list_of_receivers_actions[i] 136 | if m in Glist_of_receivers_actions: 137 | data.loc[key, m] = 1 138 | 139 | #update presence of given registered_receiver for sample(key) 140 | for i in range(len(runtime_registered_receivers)): 141 | m = runtime_registered_receivers[i] 142 | if m in Gruntime_registered_receivers: 143 | data.loc[key, m] = 1 144 | 145 | #update presence of given fingerprint for sample(key) 146 | for i in range(len(list_of_fingerprints)): 147 | m = list_of_fingerprints[i] 148 | if m in Glist_of_fingerprints: 149 | data.loc[key, m] = 1 150 | 151 | #update presence of a given api call for sample(key) 152 | for api in apis.keys(): 153 | m = api 154 | if m in Glist_of_apis: 155 | data.loc[key, m] = 1 156 | 157 | #update others features 158 | data.loc[key, 'malware'] = malware 159 | 160 | #save dataset in csv format 161 | data.to_csv("MSGmalware_analysis_dataset_all_features.csv", encoding='utf8') 162 | end_time = time.time() 163 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 164 | """******************************END****************************************""" 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/GAN_4_SmartAM.py: -------------------------------------------------------------------------------- 1 | """GAN built here is to generate adversarial malware samples that will be used 2 | to test SmartAM1, train SmartAM2, test SmartAM2""" 3 | 4 | from __future__ import print_function, division 5 | from keras.layers import Input, Dense, Activation 6 | from keras.layers.merge import Maximum, Concatenate 7 | from keras.models import Model 8 | from keras.optimizers import Adam 9 | from sklearn.ensemble import RandomForestClassifier 10 | from sklearn.neural_network import MLPClassifier 11 | from sklearn.model_selection import train_test_split 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | import numpy as np 15 | import time 16 | start_time=time.time() 17 | 18 | class SmartAM(): 19 | def __init__(self): 20 | self.feature_dims = 357 21 | self.noise_dims = 40 22 | self.hide_layers = 700 23 | self.generator_layers = [self.feature_dims+self.noise_dims, 24 | self.hide_layers, self.feature_dims] 25 | self.discriminator_layers = [self.feature_dims, self.hide_layers, 1] 26 | self.blackbox = 'MLP' 27 | optimizer = Adam(lr=0.001) 28 | 29 | 30 | # Build and Train blackbox_detector 31 | self.blackbox_detector = self.build_blackbox_detector() 32 | 33 | # Build and compile the discriminator 34 | self.discriminator = self.build_discriminator() 35 | self.discriminator.compile(loss='binary_crossentropy', 36 | optimizer=optimizer, metrics=['accuracy']) 37 | 38 | # Build the generator 39 | self.generator = self.build_generator() 40 | 41 | # The generator takes malware and noise as input and 42 | #generates adversarial malware examples 43 | example = Input(shape=(self.feature_dims,)) 44 | noise = Input(shape=(self.noise_dims,)) 45 | input = [example, noise] 46 | malware_examples = self.generator(input) 47 | 48 | # For the combined model we will only train the generator 49 | self.discriminator.trainable = False 50 | 51 | # The discriminator takes generated samples as input 52 | #and determines validity 53 | validity = self.discriminator(malware_examples) 54 | 55 | # The combined model (stacked generator and discriminator) 56 | # Trains the generator to fool the discriminator 57 | self.combined = Model(input, validity) 58 | self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) 59 | 60 | def build_blackbox_detector(self): 61 | 62 | if self.blackbox is 'MLP': 63 | blackbox_detector = MLPClassifier(hidden_layer_sizes=(20,), 64 | max_iter=10, alpha=1e-4, 65 | solver='sgd', verbose=0, 66 | tol=1e-4, random_state=1, 67 | learning_rate_init=.1) 68 | return blackbox_detector 69 | 70 | def build_generator(self): 71 | 72 | example = Input(shape=(self.feature_dims,)) 73 | noise = Input(shape=(self.noise_dims,)) 74 | x = Concatenate(axis=1)([example, noise]) 75 | for dim in self.generator_layers[1:]: 76 | x = Dense(dim)(x) 77 | x = Activation(activation='sigmoid')(x) 78 | x = Maximum()([example, x]) 79 | generator = Model([example, noise], x, name='generator') 80 | generator.summary() 81 | return generator 82 | 83 | def build_discriminator(self): 84 | 85 | input = Input(shape=(self.discriminator_layers[0],)) 86 | x = input 87 | for dim in self.discriminator_layers[1:]: 88 | x = Dense(dim)(x) 89 | x = Activation(activation='sigmoid')(x) 90 | discriminator = Model(input, x, name='discriminator') 91 | discriminator.summary() 92 | return discriminator 93 | 94 | def load_data(self, filename): 95 | 96 | data = np.load(filename) 97 | xmal, ymal, xben, yben = data['xmal'], data['ymal'], data['xben'],data['yben'] 98 | return (xmal, ymal), (xben, yben) 99 | 100 | def train(self, epochs, batch_size=80): 101 | 102 | # Load the dataset 103 | (xmal, ymal), (xben, yben) = self.load_data('dataset_if.npz') 104 | xtrain_mal, xtest_mal, ytrain_mal, ytest_mal = train_test_split(xmal, ymal, test_size=0.25) 105 | xtrain_ben, xtest_ben, ytrain_ben, ytest_ben = train_test_split(xben, yben, test_size=0.25) 106 | 107 | # Train blackbox_detctor 108 | self.blackbox_detector.fit(np.concatenate([xmal, xben]), 109 | np.concatenate([ymal, yben])) 110 | 111 | ytrain_ben_blackbox = self.blackbox_detector.predict(xtrain_ben) 112 | Original_Train_TPR = self.blackbox_detector.score(xtrain_mal, ytrain_mal) 113 | Original_Test_TPR = self.blackbox_detector.score(xtest_mal, ytest_mal) 114 | Train_TPR, Test_TPR = [], [] 115 | 116 | for epoch in range(epochs): 117 | 118 | for step in range(1):#range(xtrain_mal.shape[0] // batch_size): 119 | # --------------------- 120 | # Train discriminator 121 | # --------------------- 122 | 123 | # Select a random batch of malware examples 124 | idx = np.random.randint(0, xtrain_mal.shape[0], batch_size) 125 | xmal_batch = xtrain_mal[idx] 126 | noise = np.random.uniform(0, 1, (batch_size, self.noise_dims)) 127 | idx = np.random.randint(0, xmal_batch.shape[0], batch_size) 128 | xben_batch = xtrain_ben[idx] 129 | yben_batch = ytrain_ben_blackbox[idx] 130 | 131 | # Generate a batch of new malware examples 132 | gen_examples = self.generator.predict([xmal_batch, noise]) 133 | ymal_batch = self.blackbox_detector.predict(np.ones 134 | (gen_examples.shape)* 135 | (gen_examples > 0.7)) 136 | 137 | #save adversarial samples 138 | np.savez("adverV8", xmal_adver = gen_examples) 139 | 140 | # Train the discriminator 141 | d_loss_real = self.discriminator.train_on_batch(gen_examples, ymal_batch) 142 | d_loss_fake = self.discriminator.train_on_batch(xben_batch, yben_batch) 143 | d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) 144 | 145 | # --------------------- 146 | # Train Generator 147 | # --------------------- 148 | 149 | idx = np.random.randint(0, xtrain_mal.shape[0], batch_size) 150 | xmal_batch = xtrain_mal[idx] 151 | noise = np.random.uniform(0, 1, (batch_size, self.noise_dims)) 152 | 153 | # Train the generator 154 | g_loss = self.combined.train_on_batch([xmal_batch, noise], 155 | np.zeros((batch_size, 1))) 156 | 157 | # Compute Train TRR 158 | noise = np.random.uniform(0, 1, (xtrain_mal.shape[0], self.noise_dims)) 159 | gen_examples = self.generator.predict([xtrain_mal, noise]) 160 | TPR = self.blackbox_detector.score(np.ones(gen_examples.shape) * 161 | (gen_examples > 0.5), ytrain_mal) 162 | Train_TPR.append(TPR) 163 | 164 | # Compute Test TRR 165 | noise = np.random.uniform(0, 1, (xtest_mal.shape[0], self.noise_dims)) 166 | gen_examples = self.generator.predict([xtest_mal, noise]) 167 | TPR = self.blackbox_detector.score(np.ones(gen_examples.shape) * 168 | (gen_examples > 0.5), ytest_mal) 169 | Test_TPR.append(TPR) 170 | 171 | # Plot the progress 172 | print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 173 | 100*d_loss[1], g_loss)) 174 | 175 | print('Original_Train_TPR: {0}, Adver_Train_TPR: {1}'.format(Original_Train_TPR, Train_TPR[-1])) 176 | print('Original_Test_TPR: {0}, Adver_Test_TPR: {1}'.format(Original_Test_TPR, Test_TPR[-1])) 177 | 178 | # Plot TRR 179 | plt.figure() 180 | plt.plot(range(epochs), Train_TPR, c='g', label='Training Set', linewidth=2) 181 | plt.plot(range(epochs), Test_TPR, c='r', linestyle='--', label='Validation Set', linewidth=2) 182 | plt.xlabel("Epoch") 183 | plt.ylabel("TPR") 184 | plt.legend() 185 | plt.show() 186 | 187 | if __name__ == '__main__': 188 | model = SmartAM() 189 | model.train(epochs=1000, batch_size=300) 190 | 191 | end_time=time.time() 192 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 193 | -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV1.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV2.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV2.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV3.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV3.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV4.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV4.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV5.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV5.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV6.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV6.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV7.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV7.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/adverV8.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/advers/adverV8.npz -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/advers/prepare_adver_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jan 16 23:30:46 2019 4 | 5 | @author: MSG 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | 10 | data = np.load('adverV7.npz') 11 | adver_dataset = data['xmal_adver'] 12 | adver_dataset= pd.DataFrame(adver_dataset) 13 | #adver_dataset to csv 14 | adver_dataset.to_csv("adver_dataset_test.csv", encoding='utf8') 15 | 16 | 17 | #load adver_dataset.csv 18 | adver_dataset = pd.read_csv('adver_dataset_test.csv') 19 | #assign 1 to dependent variable 'malware' 20 | adver_dataset = adver_dataset.replace(np.nan, 1) 21 | #for each feature value > 0.5, ssign it 1 else 0 22 | adver_dataset_if = adver_dataset.where(adver_dataset<.5, 1) 23 | adver_dataset_if = adver_dataset_if.where(adver_dataset_if>.5, 0) 24 | 25 | #adver_dataset_if to csv 26 | adver_dataset_if.to_csv("adver_dataset_if_test_2.csv", encoding='utf8') 27 | #adver_dataset_if.to_csv("adver_dataset_if_train2.csv", encoding='utf8') 28 | """******************************END****************************************""" 29 | -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/create_input_for_gan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 14 12:39:22 2019 4 | @author: MSG 5 | =============================================================================== 6 | **********Prepare data for GAN usage********** 7 | *******This script converts csv dataset to .npz format for GAN input******* 8 | =============================================================================== 9 | """ 10 | 11 | import pandas as pd 12 | import numpy as np 13 | import time 14 | 15 | start_time =time.time() 16 | dataset = pd.read_csv('MSGmalware_analysis_dataset_if.csv') 17 | 18 | #spliting dataset into x and y 19 | dataset_b = dataset[dataset['malware']==0] 20 | dataset_m = dataset[dataset['malware']==1] 21 | 22 | xben = dataset_b.iloc[:, 1:358].values 23 | yben = dataset_b.iloc[:, 358].values 24 | 25 | xmal = dataset_m.iloc[:, 1:358].values 26 | ymal = dataset_m.iloc[:, 358].values 27 | 28 | #Save data in .npz format......under test 29 | 30 | np.savez('dataset_if.npz', xmal=xmal, ymal=ymal, xben=xben, yben=yben) 31 | end_time=time.time() 32 | 33 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 34 | """******************************END****************************************""" 35 | 36 | 37 | -------------------------------------------------------------------------------- /Generating adversarial examples with GAN/dataset_if.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSG-Xtra/Deep_learning_for_android_malware_detection/3de760a99a05dc558af430f48fe3ace6d16b34d2/Generating adversarial examples with GAN/dataset_if.npz -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep_learning_for_android_malware_detection 2 | Effectiveness of additional training of an ANN based model in detecting android malware. 3 | 4 | ![proposed_scheme](https://user-images.githubusercontent.com/36197370/59243894-db837100-8c44-11e9-8fe3-d9c8d1b2a980.PNG) 5 | 6 | **The goal of this project is to show the weakness of an ANN based malware detection model in detecting adversarial samples and how to boost its performance.** 7 | 8 | 9 | *In this project, I build an ANN based android malware detection model. It is trained and tested on data extracted from both benign and malware samples.* 10 | 11 | *Using a GAN, we generate adversarial malware samples that we use to attack the model.* 12 | *To boost the model's performance, we retrain it using adversarial samples.* 13 | 14 | 15 | # ----------Content Description-------------------------- 16 | 17 | **Feature_Extraction_&_Selection--Directory** 18 | 19 | In *Feature_Selection folder*, we do feature selection with get_important_features.py and generated final dataset with script3_create_final_combined_dataset_important_features.py. 20 | 21 | *Bfeatures_jsons.json*--contains data for 3090 benign samples analysed. 22 | 23 | *Mfeatures_jsons.json*--contains data for 3090 malware samples analysed. 24 | 25 | *M_Bfeatures_jsons.json*---combined raw data for all samples(Benign and Malware). 26 | 27 | *script2_create_combined_dataset_all_features.py* --for generating dataset with all features for each sample(observation). 28 | 29 | **Generating adversarial examples with GAN---Directory** 30 | 31 | *Code contained here is to generate adversarial malware examples that are used 32 | to test SmartAM1, train SmartAM2, test SmartAM2.* 33 | 34 | *Step1* 35 | 36 | Create input for GAN using create_input_for_gan.py which loads MSGmalware_analysis_dataset_if.csv and returns dataset_if.npz which is the input for GAN(GAN_4_SmartAM.py). 37 | 38 | *Step2* 39 | 40 | Execute GAN_4_SmartAM.py. At each run, it returns an adversarial batch e.g. adverV1-8.npz(as in advers directory). 41 | 42 | *step3* 43 | 44 | In advers directory, execute prepare_adver_dataset.py. Its input is adverVX.npz and output is adver_dataset.csv (adversarial dataset like adver_dataset_if_test_1 and adver_dataset_if_train_1 that are used in the next phase). 45 | 46 | **SmartAM_ANN_Model--Directory** 47 | 48 | *Contains original samples dataset(MSGmalware_analysis_dataset_if.csv), adversarial examples datasets for training , and testing( 49 | adver_dataset_if_train_1.csv, adver_dataset_if_test_1.csv), SmartAM1_ANN.py(weak model), and SmartAM2_ANN.py(boosted model).* 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /SmartAM_ANN_Model/SmartAM1_ANN.py: -------------------------------------------------------------------------------- 1 | """In this version(SmartAM1), we train and test on origonal samples, 2 | for validation, we use unseen batch of adversarial samples""" 3 | 4 | # Importing the libraries 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import pandas as pd 8 | from keras.models import Sequential 9 | from keras.layers import Dense 10 | from keras.layers import Dropout 11 | from sklearn.metrics import roc_curve 12 | import time 13 | 14 | start_time = time.time() 15 | #****PART1---DATA PREPROCESSING************************************************ 16 | # Importing the dataset 17 | dataset = pd.read_csv("MSGmalware_analysis_dataset_if.csv", delimiter=",") 18 | # split into input (X) and output (Y) variables 19 | X = dataset.iloc[:, 1:358].values 20 | y = dataset.iloc[:, 358].values 21 | # Splitting the dataset into the Training set and Test set 22 | from sklearn.model_selection import train_test_split 23 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, 24 | random_state = 0) 25 | 26 | #***PART2----BUILDING THE ANN(SmartAM2)**************************************** 27 | # Initialising the ANN 28 | model = Sequential() 29 | # Adding the input layer and the first hidden layer 30 | model.add(Dense(units = 300, kernel_initializer = 'uniform', 31 | activation = 'relu', input_dim = 357)) 32 | # Adding the second hidden layer 33 | model.add(Dense(units = 250, kernel_initializer = 'uniform', 34 | activation = 'relu')) 35 | # Adding the third hidden layer 36 | model.add(Dense(units = 50, kernel_initializer = 'uniform', 37 | activation = 'relu')) 38 | # Adding the output layer 39 | model.add(Dense(units = 1, kernel_initializer = 'uniform', 40 | activation = 'sigmoid')) 41 | # Compiling the ANN 42 | model.compile(optimizer = 'adam', loss = 'binary_crossentropy', 43 | metrics = ['accuracy']) 44 | # Fitting the ANN to the Training set 45 | model.fit(X_train, y_train, batch_size = 100, epochs = 700) 46 | 47 | #****PART3----MAKING PREDICTIONS & EVALUATING THE MODEL************************ 48 | # Predicting the Test set results 49 | y_pred = model.predict(X_test) 50 | y_pred = (y_pred > 0.5) 51 | # Making the Confusion Matrix 52 | from sklearn.metrics import confusion_matrix 53 | cm_org = confusion_matrix(y_test, y_pred) 54 | # evaluate the model 55 | scores = model.evaluate(X_train, y_train) 56 | print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) 57 | 58 | #ROC 59 | y_org_pred = model.predict(X_test).ravel() 60 | fpr_org, tpr_org, thresholds_org = roc_curve(y_test, y_org_pred) 61 | 62 | #AUC value can also be calculated like this. 63 | from sklearn.metrics import auc 64 | auc_org = auc(fpr_org, tpr_org) 65 | 66 | #*****PART4----VALIDATING WITH UNSEEN ADVERSARIAL SAMPLES********************** 67 | dataset_adver = pd.read_csv('adver_dataset_if_test_1.csv') 68 | x_adver = dataset_adver.iloc[:, 1:-1].values 69 | y_adver = dataset_adver.iloc[:, 358].values 70 | # Predicting the Test set results for adver 71 | y_adver_pred = model.predict(x_adver) 72 | y_adver_pred = (y_adver_pred > 0.5) 73 | # Making the Confusion Matrix 74 | from sklearn.metrics import confusion_matrix 75 | cm_adver = confusion_matrix(y_adver, y_adver_pred) 76 | 77 | #Now, let’s plot the ROC for the MODEL; 78 | plt.figure(1) 79 | plt.plot([0, 1], [0, 1], 'k--') 80 | plt.plot(fpr_org, tpr_org, label='Original samples (area = {:.3f})'.format(auc_org)) 81 | plt.xlabel('False positive rate') 82 | plt.ylabel('True positive rate') 83 | plt.title('ROC curve') 84 | plt.legend(loc='best') 85 | plt.show() 86 | 87 | end_time=time.time() 88 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 89 | 90 | #******SAVING THE MODEL******************************************************** 91 | #save json 92 | model_json = model.to_json() 93 | with open("SmartAM1.json", "w") as json_file: 94 | json_file.write(model_json) 95 | # serialize weights to HDF5 96 | model.save_weights("SmartAM1_1.h5") 97 | print("Saved model to disk") 98 | """ 99 | 100 | #visualize the ann 101 | from ann_visualizer.visualize import ann_viz 102 | ann_viz(model, view=True, filename="SmartAM1.gv", title="SmartAM1") 103 | """ 104 | -------------------------------------------------------------------------------- /SmartAM_ANN_Model/SmartAM2_ANN.py: -------------------------------------------------------------------------------- 1 | """In this version(SmartAM2), we train and test on a combination of 2 | origonal samples and adversarial samples, for validation, we use unseen batch 3 | of adversarial samples""" 4 | 5 | # Importing the libraries 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import pandas as pd 9 | from keras.models import Sequential 10 | from keras.layers import Dense 11 | from keras.layers import Dropout 12 | from sklearn.metrics import roc_curve 13 | import time 14 | 15 | start_time = time.time() 16 | #****PART1---DATA PREPROCESSING************************************************ 17 | # Importing the datasets 18 | dataset_org = pd.read_csv("MSGmalware_analysis_dataset_if.csv", delimiter=",") 19 | dataset_adver = pd.read_csv('adver_dataset_if_train_1.csv', delimiter=",") 20 | #concatenate original and adversarial samples 21 | dataset_retrain = pd.concat([dataset_org, dataset_adver], axis=0) 22 | # split into input (X) and output (Y) variables 23 | X = dataset_retrain.iloc[:, 1:358].values 24 | y = dataset_retrain.iloc[:, 358].values 25 | # Splitting the dataset into the Training set and Test set 26 | from sklearn.model_selection import train_test_split 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 28 | random_state = 0) 29 | 30 | #***PART2----BUILDING THE ANN(SmartAM2)**************************************** 31 | # Initialising the ANN 32 | model = Sequential() 33 | # Adding the input layer and the first hidden layer 34 | model.add(Dense(units = 300, kernel_initializer = 'uniform', 35 | activation = 'relu', input_dim = 357)) 36 | # Adding the second hidden layer 37 | model.add(Dense(units = 200, kernel_initializer = 'uniform', 38 | activation = 'relu')) 39 | # Adding the third hidden layer 40 | model.add(Dense(units = 80, kernel_initializer = 'uniform', 41 | activation = 'relu')) 42 | # Adding the output layer 43 | model.add(Dense(units = 1, kernel_initializer = 'uniform', 44 | activation = 'sigmoid')) 45 | # Compiling the ANN 46 | model.compile(optimizer = 'adam', loss = 'binary_crossentropy', 47 | metrics = ['accuracy']) 48 | # Fitting the ANN to the Training set 49 | model.fit(X_train, y_train, batch_size = 100, epochs = 800) 50 | 51 | 52 | #****PART3----MAKING PREDICTIONS & EVALUATING THE MODEL************************ 53 | # Predicting the Test set results 54 | y_pred = model.predict(X_test) 55 | y_pred = (y_pred > 0.5) 56 | # Making the Confusion Matrix 57 | from sklearn.metrics import confusion_matrix 58 | cm_org = confusion_matrix(y_test, y_pred) 59 | # evaluate the model 60 | scores = model.evaluate(X_train, y_train) 61 | print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) 62 | 63 | #ROC 64 | y_org_pred = model.predict(X_test).ravel() 65 | fpr_org, tpr_org, thresholds_org = roc_curve(y_test, y_org_pred) 66 | 67 | #AUC value can also be calculated like this. 68 | from sklearn.metrics import auc 69 | auc_org = auc(fpr_org, tpr_org) 70 | 71 | 72 | #*****PART4----VALIDATING WITH UNSEEN ADVERSARIAL SAMPLES********************** 73 | dataset_adver = pd.read_csv('adver_dataset_if_test_1.csv') 74 | x_adver = dataset_adver.iloc[:, 1:-1].values 75 | y_adver = dataset_adver.iloc[:, 358].values 76 | # Predicting the Test set results for adver 77 | y_adver_pred = model.predict(x_adver) 78 | y_adver_pred = (y_adver_pred > 0.5) 79 | # Making the Confusion Matrix 80 | from sklearn.metrics import confusion_matrix 81 | cm_adver = confusion_matrix(y_adver, y_adver_pred) 82 | 83 | 84 | #Now, let’s plot the ROC for the MODEL; 85 | plt.figure(1) 86 | plt.plot([0, 1], [0, 1], 'k--') 87 | plt.plot(fpr_org, tpr_org, label='Original+Adversarial samples (area = {:.3f})'.format(auc_org)) 88 | plt.xlabel('False positive rate') 89 | plt.ylabel('True positive rate') 90 | plt.title('ROC curve') 91 | plt.legend(loc='best') 92 | plt.show() 93 | 94 | 95 | 96 | end_time=time.time() 97 | print('Execution time: '+str(round( end_time -start_time, 3))+'seconds') 98 | 99 | #******SAVING THE MODEL******************************************************** 100 | #save json 101 | model_json = model.to_json() 102 | with open("SmartAM2.json", "w") as json_file: 103 | json_file.write(model_json) 104 | # serialize weights to HDF5 105 | model.save_weights("SmartAM2.h5") 106 | print("Saved model to disk") 107 | 108 | 109 | #visualize the ann 110 | from ann_visualizer.visualize import ann_viz 111 | ann_viz(model, view=True, filename="SmartAM2.gv", 112 | title="SmartAM2") 113 | 114 | --------------------------------------------------------------------------------