├── _config.yml ├── conf ├── __init__.py └── config.py ├── shared ├── __init__.py ├── App.py └── constants.py ├── utils ├── __init__.py ├── graphics.py ├── data.py ├── misc.py └── db.py ├── data_generation ├── collection │ ├── __init__.py │ └── playStoreCrawler.py ├── reconstruction │ ├── __init__.py │ ├── Numerical.py │ └── Trace.py ├── __init__.py └── stimulation │ ├── __init__.py │ ├── DroidbotTest.py │ ├── DroidutanTest.py │ └── Garfield.py ├── data_inference ├── extraction │ ├── __init__.py │ ├── StringKernelSVM.py │ └── featureExtraction.py ├── visualization │ ├── __init__.py │ └── visualizeData.py ├── learning │ ├── __init__.py │ └── HMM.py └── __init__.py ├── __init__.py ├── .gitignore ├── docs ├── dbrecovery2.sql ├── google_plugin.txt ├── dbrecovery.sql ├── custom_hooks_introspy.txt └── hooks.json ├── tools ├── extractStaticFeatures.py ├── staticResults.py ├── visualizeFeatureVectors.py ├── downloadAPKPlayStore.py ├── staticExperimentI.py └── runExperimentII.py └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /conf/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["config"] 2 | -------------------------------------------------------------------------------- /shared/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["App", "constants"] 2 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ "db", "data", "graphics", "misc" ] 2 | -------------------------------------------------------------------------------- /data_generation/collection/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["playStoreCrawler"] 2 | -------------------------------------------------------------------------------- /data_inference/extraction/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["featureExtraction"] 2 | -------------------------------------------------------------------------------- /data_inference/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["visualizeData"] 2 | -------------------------------------------------------------------------------- /data_inference/learning/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["HMM", "ScikitLearners"] 2 | -------------------------------------------------------------------------------- /data_generation/reconstruction/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["Trace", "Numerical"] 2 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["conf", "utils", "data_generation", "data_inference", "shared"] 2 | -------------------------------------------------------------------------------- /data_generation/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ "collection", "stimulation", "reconstruction" ] 2 | -------------------------------------------------------------------------------- /data_generation/stimulation/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["DroidbotTest", "DroidutanTest", "Garfield"] 2 | -------------------------------------------------------------------------------- /data_inference/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ "projection", "extraction", "learning", "visualization"] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore compiled python files 2 | *.pyc 3 | 4 | # ignore any APK's used for testing 5 | *.apk 6 | 7 | # ignore log file 8 | *.log 9 | 10 | # ignore .swp files 11 | *.swp 12 | 13 | # ignore configuration file 14 | conf/* 15 | 16 | # ignore python files in scripts directory 17 | files/scripts/* 18 | 19 | # Ignore "command" files 20 | *.command 21 | 22 | # Ignore text files 23 | *.txt 24 | 25 | # Ignore back up files 26 | *.py_bak 27 | 28 | # Ignore database files 29 | *.db 30 | -------------------------------------------------------------------------------- /shared/App.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | class App: 5 | """ A representation of an Android app containing basic knowledge about the app """ 6 | def __init__(self, appName, appID, appVersionCode, appOfferType, appRating, appPrice, appSize): 7 | self.appName = appName 8 | self.appID = appID 9 | self.appVersionCode = appVersionCode 10 | self.appOfferType = appOfferType 11 | self.appRating = appRating 12 | self.appPrice = appPrice 13 | self.appSize = appSize 14 | 15 | 16 | -------------------------------------------------------------------------------- /data_generation/reconstruction/Numerical.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.graphics import * 4 | from Aion.utils.data import * 5 | 6 | import numpy 7 | import os 8 | 9 | def loadNumericalFeatures(featuresFile, delimiter=","): 10 | """Loads numerical features from a file and returns a list""" 11 | try: 12 | if not os.path.exists(featuresFile): 13 | prettyPrint("Unable to find the features file \"%s\"" % featuresFile, "warning") 14 | return [] 15 | content = open(featuresFile).read() 16 | if content.lower().find("[") != -1 and content.lower().find("]") != -1: 17 | features = eval(content) 18 | else: 19 | features = [float(f) for f in content.replace(' ','').split(delimiter)] 20 | 21 | except Exception as e: 22 | prettyPrintError(e) 23 | return [] 24 | 25 | return features 26 | -------------------------------------------------------------------------------- /conf/config.py: -------------------------------------------------------------------------------- 1 | # Google Play Store Crawler Configuration 2 | LANG = # can be en_US, fr_FR, ... 3 | ANDROID_ID = # '38c6523ac43ef9e1' 4 | GOOGLE_LOGIN = # 'someone@gmail.com' 5 | GOOGLE_PASSWORD = # 'yourpassword' 6 | AUTH_TOKEN = None 7 | SEPARATOR = '|' 8 | 9 | # Plotly API Key 10 | PLOTLY_API = # API Key or None 11 | 12 | # Directories 13 | AION_DIR = # 'some directory" 14 | DOWNLOADS_DIR = AION_DIR + "files/downloads" 15 | 16 | # Logging and debug messages 17 | VERBOSE = "ON" 18 | LOGGING = "ON" 19 | LOG_FILE = AION_DIR + "/aion.log" 20 | ADMIN_EMAIL = # someone@somewhere.com 21 | 22 | # Android SDK paths and constants 23 | ANDROID_SDK = # 'some directory' 24 | ANDROID_ADB = ANDROID_SDK + "/platform-tools/adb" 25 | 26 | # Misc paths 27 | GENYMOTION_PLAYER = '/opt/genymobile/genymotion/player' 28 | 29 | # DB-related information 30 | AION_DB = AION_DIR + "/db/aion2.db" 31 | HASHES_DB = AION_DIR + "/db/hashes.db" 32 | DB_RECOVERY = AION_DIR + "/docs/dbrecovery2.sql" 33 | -------------------------------------------------------------------------------- /docs/dbrecovery2.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE learner( 2 | lrnID TEXT PRIMARY KEY, 3 | lrnParams TEXT 4 | ); 5 | 6 | CREATE TABLE run( 7 | runID INTEGER, 8 | runDataset TEXT, 9 | runStart TEXT, 10 | runEnd TEXT, 11 | runIterations INTEGER, 12 | PRIMARY KEY (runID, runDataset) 13 | ); 14 | 15 | CREATE TABLE datapoint( 16 | dpID INTEGER PRIMARY KEY AUTOINCREMENT, 17 | dpLearner TEXT, 18 | dpIteration INTEGER, 19 | dpRun INTEGER, 20 | dpTimestamp TEXT, 21 | dpFeature TEXT, 22 | dpType TEXT, 23 | dpAccuracy REAL, 24 | dpRecall REAL, 25 | dpSpecificity REAL, 26 | dpPrecision REAL, 27 | dpFscore REAL, 28 | FOREIGN KEY (dpLearner) REFERENCES parent(learnerID), 29 | FOREIGN KEY (dpRun) REFERENCES parent(runID) 30 | ); 31 | 32 | CREATE TABLE testapp( 33 | taName TEXT, 34 | taRun INTEGER, 35 | taIteration INTEGER, 36 | taType TEXT, 37 | taClassified TEXT, 38 | taLog TEXT, 39 | PRIMARY KEY (taName, taRun, taIteration), 40 | FOREIGN KEY (taRun) REFERENCES parent(runID) 41 | ); 42 | 43 | -------------------------------------------------------------------------------- /docs/google_plugin.txt: -------------------------------------------------------------------------------- 1 | =================================== 2 | | Using Google Play API in Python | 3 | =================================== 4 | 5 | [*] from googleplay_api.googleplay import GooglePlayAPI 6 | 7 | [*] Instantiate an object: api = GooglePlayAPI() 8 | [*] Login "GooglePlay": api.login(GOOGLE_LOGIN, GOOGLE_PASSWORD, AUTH_TOKEN) 9 | [*] Browse categories "api.browse()": 10 | > returns categories e.g. "cats" 11 | > "cats.category" --> a list of categories as "BrowseLink" objects 12 | > "for x in cats" --> "x.name", "x.dataUrl" e.g. "browse?c=3&cat=ANDROID" 13 | 14 | [*] Browse subcategories "api.list([category])": 15 | > returns subcategories e.g. "sub" 16 | > "sub.doc" --> a list of subcategories as "DocV2" objects 17 | > "for x in sub.doc" --> "x.title" e.g. "TopApps", "x.docid" e.g. "apps_topselling_free" 18 | 19 | [*] Browse apps in subcategory "api.list([category], [subcategory])": 20 | > "apps = api.list("c", "s").doc" --> one DocV2 element 21 | > "for x in apps.doc[0].child" --> "x.title" = app name, "x.aggregateRating.starRating", "x.docid" = app id e.g. com.whatsapp, "x.details.versionCode", "x.offer[0].offerType", "x.offer[0].formattedAmount" e.g. price, "x.details.appDetails.installationSize". 22 | > "api.download(x.docid, vc, ot)" 23 | 24 | -------------------------------------------------------------------------------- /tools/extractStaticFeatures.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | 4 | import glob, sys, timeout_decorator 5 | from Aion.data_inference.extraction.featureExtraction import * 6 | 7 | @timeout_decorator.timeout(120) # Two minutes 8 | def analyze(a): 9 | return extractStaticFeatures(a) 10 | 11 | 12 | if len(sys.argv) < 2: 13 | print "[Usage]: python extractStatic.py [app_dir]" 14 | exit(0) 15 | 16 | app_dir = sys.argv[1] 17 | 18 | alldata = glob.glob("%s/*.apk" % app_dir) 19 | 20 | if len(alldata) < 1: 21 | print "[*] Unable to retrieve APK's from the directories \"%s\"" % (app_dir) 22 | exit(0) 23 | 24 | print "[*] Successfully retrieved %s APK's from the directories \"%s\"" % (len(alldata), app_dir) 25 | 26 | # Commence analysis 27 | counter = 1 28 | for a in alldata: 29 | try: 30 | print "Analyzing app #%s out of %s apps" % (counter, len(alldata)) 31 | basic, permissions, apicalls, allfeatures = analyze(a) 32 | print "[*] Saving all features to \"%s\"" % a.replace(".apk", ".static") 33 | f = open(a.replace(".apk", ".static"), "w") 34 | f.write(str(allfeatures)) 35 | f.close() 36 | 37 | counter += 1 38 | 39 | except Exception as e: 40 | print "Error encountered: %s" % e 41 | counter += 1 42 | continue 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Welcome to Aion 2 | 3 | Aion is a framework (under construction) meant to apply the notion of active learning to the problem of stimulation, analysis, and detection of Android repackaged/piggybacked malware. 4 | 5 | In a nutshell, the framework is developed as a set of tools and utilites categorized according to their objective. For example, [data_inference] contains different machine learning feature extraction, feature selection, and classification modules and methods. Those utilities are used as an API by tools residing under the [tools] directory. 6 | 7 | We are still experimenting with the applicability of such an idea, hence the lack of proper documentation. 8 | 9 | ### Requirements 10 | 11 | Aion utilizes various tools including: 12 | 13 | - [androguard](https://github.com/androguard/androguard): for static analysis of APK's and retrieval of components and other metadata 14 | - [Genymotion](https://www.genymotion.com/fun-zone/): we rely on Genymotion to run AVD on which apps are tested and monitored. 15 | - [Droidbot](http://honeynet.github.io/droidbot): used as an option for randomly-interacting with an APK-under-test. 16 | - [droidmon](https://github.com/idanr1986/droidmon): keeps track of the app's runtime behavior in the form of API calls it issues. 17 | - [Droidutan](https://github.com/aleisalem/droidutan): a "homemade", less fancy equivalent to Droidbot. 18 | - [scikit-learn](scikit-learn.org): the main provider of machine learning algorithms. 19 | ### Support or Contact 20 | 21 | Please feel free to pull/fork the repository. We kindly ask you to cite us, if anything useful comes out of your endeavors. 22 | 23 | You can get in touch with the contributor of this repository via [salem@in.tum.de]. 24 | 25 | Happy hunting. :) 26 | -------------------------------------------------------------------------------- /utils/graphics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Python modules 4 | import time, sys, os 5 | # Aion modules 6 | from Aion.conf import config 7 | from Aion.utils.data import * 8 | from Aion.utils.misc import * 9 | 10 | # Gray, Red, Green, Yellow, Blue, Magenta, Cyan, White, Crimson 11 | colorIndex = [ "30", "31", "32", "33", "34", "35", "36", "37", "38" ] 12 | 13 | 14 | #################### 15 | # Defining Methods # 16 | #################### 17 | def prettyPrint(msg, mode="info"): 18 | """ Pretty prints a colored message. "info": Green, "error": Red, "warning": Yellow, "info2": Blue, "output": Magenta, "debug": White """ 19 | if mode == "info": 20 | color = "32" # Green 21 | elif mode == "error": 22 | color = "31" # Red 23 | elif mode == "warning": 24 | color = "33" # Yellow 25 | elif mode == "info2": 26 | color = "34" # Blue 27 | elif mode == "output": 28 | color = "35" # Magenta 29 | elif mode == "debug": 30 | color = "37" # White 31 | else: 32 | color = "32" 33 | msg = "[*] %s. %s" % (msg, getTimestamp(includeDate=True)) 34 | #print("\033[1;%sm%s\n%s\033[1;m" % (color, msg, '-'*len(msg))) # Print dashes under the message 35 | print("\033[1;%sm%s\033[1;m" % (color, msg)) 36 | # Log the message if LOGGING is enabled 37 | if loggingON() and mode != "info": 38 | logEvent("%s: %s" % (getTimestamp(includeDate=True), msg)) 39 | 40 | def prettyPrintError(ex): 41 | """ Pretty prints an error/exception message """ 42 | exc_type, exc_obj, exc_tb = sys.exc_info() 43 | fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] 44 | msg = "Error \"%s\" encountered in \"%s\" line %s: %s" % (exc_type, fname, exc_tb.tb_lineno, ex) 45 | prettyPrint(msg, "error") 46 | 47 | -------------------------------------------------------------------------------- /docs/dbrecovery.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE learner( 2 | learnerID INTEGER PRIMARY KEY AUTOINCREMENT, 3 | learnerName TEXT 4 | ); 5 | 6 | CREATE TABLE run( 7 | runID INTEGER, 8 | runDataset TEXT, 9 | runStart TEXT, 10 | runEnd TEXT, 11 | runIterations INTEGER, 12 | PRIMARY KEY (runID, runDataset) 13 | ); 14 | 15 | CREATE TABLE app( 16 | appID INTEGER PRIMARY KEY AUTOINCREMENT, 17 | appName TEXT, 18 | appType TEXT, 19 | appRunID INTEGER, 20 | appRuns INTEGER, 21 | FOREIGN KEY (appRunID) REFERENCES parent(runID) 22 | ); 23 | 24 | CREATE TABLE datapoint ( 25 | dpID INTEGER PRIMARY KEY AUTOINCREMENT, 26 | dpLearner INTEGER, 27 | dpIteration INTEGER, 28 | dpRun INTEGER, 29 | dpTimestamp TEXT, 30 | dpFeature TEXT, 31 | dpType TEXT, 32 | dpAccuracy REAL, 33 | dpRecall REAL, 34 | dpSpecificity REAL, 35 | dpPrecision REAL, 36 | dpFscore REAL, 37 | FOREIGN KEY (dpLearner) REFERENCES parent(learnerID), 38 | FOREIGN KEY (dpRun) REFERENCES parent(runID) 39 | ); 40 | 41 | INSERT INTO learner (learnerName) VALUES ("KNN10"); 42 | INSERT INTO learner (learnerName) VALUES ("KNN25"); 43 | INSERT INTO learner (learnerName) VALUES ("KNN50"); 44 | INSERT INTO learner (learnerName) VALUES ("KNN100"); 45 | INSERT INTO learner (learnerName) VALUES ("KNN250"); 46 | INSERT INTO learner (learnerName) VALUES ("KNN500"); 47 | INSERT INTO learner (learnerName) VALUES ("Trees10"); 48 | INSERT INTO learner (learnerName) VALUES ("Trees25"); 49 | INSERT INTO learner (learnerName) VALUES ("Trees50"); 50 | INSERT INTO learner (learnerName) VALUES ("Trees75"); 51 | INSERT INTO learner (learnerName) VALUES ("Trees100"); 52 | INSERT INTO learner (learnerName) VALUES ("SVM"); 53 | INSERT INTO learner (learnerName) VALUES ("Ensemble"); 54 | -------------------------------------------------------------------------------- /utils/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.conf import config 4 | 5 | def getAdminEmail(): 6 | """Returns the email address of the admin for notifications""" 7 | return config.ADMIN_EMAIL 8 | 9 | def getAionDBPath(): 10 | """Returns the path to the Aion DB""" 11 | return config.AION_DB 12 | 13 | def getAionDBRecovery(): 14 | """Returns the path to the Aion DB recovery script""" 15 | return config.DB_RECOVERY 16 | 17 | def getGoogleCreds(): 18 | """Returns the Google Account credentials""" 19 | return config.GOOGLE_LOGIN, config.GOOGLE_PASSWORD 20 | 21 | def getHashesDBPath(): 22 | """Returns the path to the hashes DB for quick lookup of app package names""" 23 | return config.HASHES_DB 24 | 25 | def getSDKDir(): 26 | """Returns the Android SDK directory path""" 27 | return config.ANDROID_SDK 28 | 29 | def getADBPath(): 30 | """Returns the path to the adb tool""" 31 | return config.ANDROID_ADB 32 | 33 | def getGenymotionPlayer(): 34 | """Returns the path to the Genymotion VM player""" 35 | return config.GENYMOTION_PLAYER 36 | 37 | def getProjectDir(): 38 | """Returns the absolute path of the project""" 39 | return config.AION_DIR 40 | 41 | def loggingON(): 42 | """Returns whether logging is on""" 43 | on = True if config.LOGGING == "ON" else False 44 | return on 45 | 46 | def verboseON(): 47 | """Returns whether verbose debug messages should be displayed""" 48 | verbose = True if config.VERBOSE == "ON" else False 49 | return verbose 50 | 51 | def loadDirs(): 52 | """Loads the directories' paths from the config.py file""" 53 | return {"Aion_DIR": config.Aion_DIR, "DOWNLOADS_DIR": config.DOWNLOADS_DIR, "ANDROID_SDK": config.ANDROID_SDK} 54 | 55 | def loadPlayStoreConfig(): 56 | """Loads the necessary configurations for crawling the Play Store""" 57 | return {"LANG": config.LANG, "ANDROID_ID": config.ANDROID_ID, "GOOGLE_LOGIN": config.GOOGLE_LOGIN, "GOOGLE_PASSWORD": config.GOOGLE_PASSWORD, "AUTH_TOKEN": config.AUTH_TOKEN} 58 | 59 | def logEvent(msg): 60 | """Logs a message to the global log file as per config.py""" 61 | if config.LOGGING == "ON": 62 | open(config.LOG_FILE, "a").write("%s\n" % msg) 63 | 64 | return True 65 | -------------------------------------------------------------------------------- /docs/custom_hooks_introspy.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------- 2 | | Classes and methods to hook and their argument types | 3 | -------------------------------------------------------- 4 | 5 | [*] android.app.Activity: 6 | > startActivity(Intent, Bundle) 7 | > setContentView(View [, ViewGroup.LayoutParams]) 8 | > setVisible(boolean) 9 | > takeKeyEvents(boolean) 10 | 11 | [*] android.app.AlertDialog: 12 | > setView(View [,int, int, int, int]) 13 | 14 | [*] android.app.Dialog: 15 | > setContentView(~) 16 | > setView(~) 17 | > show() 18 | 19 | [*] android.app.DownloadManager: 20 | > addCompletedDownload(String, String, boolean, String, String, long, boolean) 21 | > enqueue(DownloadManager.Request) 22 | > getUriForDownloadedFile(long) 23 | > openDownloadedFile(long) 24 | > query(DownloadManager.query) 25 | 26 | [*] android.app.IntentService: 27 | > onStartCommand(Intent, int, int) 28 | 29 | 30 | [*] android.content.BroadcastReceiver: 31 | > goAsync() 32 | > onReceive(Context, Intent) 33 | 34 | [*] android.content.ContentProvider: 35 | > delete(Uri, String, String[]) 36 | > insert(Uri, ContentValues) 37 | > openFile(Uri, String) 38 | > query(Uri, String[], String, String[], String [, CancellationSignal]) 39 | > update(Uri, ContentValues, String, String[]) 40 | 41 | [*] android.content.ContextWrapper: 42 | > bindService(Intent, ServiceConnection, int) 43 | > deleteFile(String) 44 | > moveDatabaseFrom(Context, String) 45 | > openFileInput(String) 46 | > openFileOutput(String) 47 | > openOrCreateDatabase(String, int, SQLiteDatabase.CursorFactory [, DatabaseErrorHandler]) 48 | > registerReceiver(BroadcastReceiver, IntentFilter [, String, Handler]) 49 | > sendBroadcast(Intent [, String]) 50 | > startActivity(Intent [, Bundle]) 51 | > startService(Intent) 52 | > stopService(Intent) 53 | > unbindService(ServiceConnection) 54 | > unregisterReceiver(BroadcastReceiver) 55 | 56 | [*] android.hardware.Camera: 57 | > open([int]) 58 | > reconnect() 59 | > release() 60 | > startPreview() 61 | > stopPreview() 62 | > takePicture(Camera.ShutterCallback, Camera.PictureCallback, Camera.PictureCallback [, Camera.PictureCallback]) 63 | 64 | [*] android.location.Location: 65 | > getLatitude() 66 | > getLongitude() 67 | 68 | 69 | [*] android.media.AudioRecord: 70 | > read(short[], int, int) 71 | > read(ByteBuffer, int) 72 | > read(byte[], int, int) 73 | > startRecording() 74 | > stop() 75 | 76 | [*] android.media.MediaRecorder: 77 | > prepare() 78 | > setCamera(Camera) 79 | > start() 80 | > stop() 81 | 82 | [*] android.net.wifi.WifiManager: 83 | > disconnect() 84 | > getScanResults() 85 | > getWifiState() 86 | > reconnect() 87 | > startScan() 88 | 89 | [*] android.os.Process: 90 | > killProcess(int) 91 | 92 | 93 | [*] android.telephony.SmsManager: 94 | > sendDataMessage(String, String, short, byte[], PendingIntent, PendingIntent) 95 | > sendTextMessage(String, String, String, PendingIntent, PendingItent) 96 | 97 | -------------------------------------------------------------------------------- /data_generation/collection/playStoreCrawler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Python modules 4 | import sys, os, shutil, glob, io 5 | 6 | # Aion modules 7 | from Aion.utils.graphics import * 8 | from Aion.utils.data import * 9 | from Aion.shared.App import App 10 | 11 | # Third-party modules 12 | from googleplay_api.googleplay import GooglePlayAPI 13 | 14 | class PlayStoreCrawler: 15 | 16 | def __init__(self): 17 | try: 18 | creds = loadPlayStoreConfig() 19 | self.googleLogin = creds['GOOGLE_LOGIN'] 20 | self.googlePassword = creds['GOOGLE_PASSWORD'] 21 | self.androidID = creds['ANDROID_ID'] 22 | self.authToken = creds['AUTH_TOKEN'] 23 | self.api = GooglePlayAPI(self.androidID) # Login to the Play Store 24 | except Exception as e: 25 | prettyPrintError(e) 26 | 27 | def login(self): 28 | """ Logs into the Google account using the received Google credentials """ 29 | try: 30 | self.api.login(self.googleLogin, self.googlePassword, self.authToken) 31 | except Exception as e: 32 | prettyPrintError(e) 33 | return False 34 | 35 | return True 36 | 37 | def getCategories(self): 38 | """ Returns a list of app categories available on Google Play Store """ 39 | try: 40 | cats = self.api.browse() 41 | categories = [c.dataUrl[c.dataUrl.rfind('=')+1:] for c in cats.category] 42 | except Exception as e: 43 | prettyPrintError(e) 44 | return [] 45 | 46 | return categories 47 | 48 | 49 | def getSubCategories(self, category): 50 | """ Returns a list of app sub-categories available on Google Play Store """ 51 | try: 52 | sub = self.api.list(category) 53 | subcategories = [s.docid for s in sub.doc] 54 | except Exception as e: 55 | prettyPrintError(e) 56 | return [] 57 | 58 | return subcategories 59 | 60 | 61 | def getApps(self, category, subcategory): 62 | """ Returns a list of "App" objects found under the given (sub)category """ 63 | try: 64 | apps = self.api.list(category, subcategory) 65 | if len(apps.doc) < 1: 66 | prettyPrint("Unable to find any apps under \"%s\" > \"%s\"" % (category, subcategory), "warning") 67 | return [] 68 | applications = [App(a.title, a.docid, a.details.appDetails.versionCode, a.offer[0].offerType, a.aggregateRating.starRating, a.offer[0].formattedAmount, a.details.appDetails.installationSize) for a in apps.doc[0].child] 69 | 70 | except Exception as e: 71 | prettyPrintError(e) 72 | return [] 73 | 74 | return applications 75 | 76 | def downloadApp(self, application): 77 | """ Downloads an app from the Google play store and moves it to the "downloads" directory """ 78 | try: 79 | if application.appPrice != "Free": 80 | prettyPrint("Warning, downloading a non free application", "warning") 81 | # Download the app 82 | data = self.api.download(application.appID, application.appVersionCode, application.appOfferType) 83 | io.open("%s.apk" % application.appID, "wb").write(data) 84 | downloadedApps = glob.glob("./*.apk") 85 | dstDir = loadDirs()["DOWNLOADS_DIR"] 86 | for da in downloadedApps: 87 | shutil.move(da, dstDir) 88 | 89 | except Exception as e: 90 | prettyPrintError(e) 91 | return False 92 | 93 | return True 94 | 95 | -------------------------------------------------------------------------------- /data_generation/reconstruction/Trace.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.data import * 4 | from Aion.utils.graphics import * 5 | 6 | import glob, os, json 7 | 8 | def loadJSONTraces(jsonFiles, filesType="malware"): 9 | """Loads and parses JSON files in a list and returns a list of comma-separated traces""" 10 | try: 11 | allTraces = [] 12 | for jsonFile in jsonFiles: 13 | currentAppName = jsonFile[jsonFile.rfind("/")+1:].replace(".json", "") 14 | # Check whether file exists 15 | if not os.path.exists(jsonFile): 16 | prettyPrint("JSON file \"%s\" could not be found. Skipping" % jsonFile, "warning") 17 | # Load JSON representation into python objects 18 | else: 19 | # Convert the JSON trace to a comma-separated string 20 | currentTrace = introspyJSONToTrace(jsonFile) 21 | # Append trace to list 22 | if filesTypes == "malware": 23 | allTraces.append((currentTrace, 1, currentAppName)) 24 | elif fileTypes == "goodware": 25 | allTraces.append((currentTrace, 0, currentAppName)) 26 | else: 27 | allTraces.append((currentTrace, -1, currentAppName)) 28 | 29 | except Exception as e: 30 | prettyPrintError(e) 31 | 32 | return allTraces 33 | 34 | def introspyJSONToTrace(jsonTraceFile): 35 | """Converts an Introspy-generated JSON trace to a comma-separated trace of API calls 36 | :param jsonTraceFile: The file containing the JSON trace 37 | :type jsonTraceFile: str 38 | :return: A '|' separated augmentation of Introspy-logged API calls. 39 | """ 40 | try: 41 | if not os.path.exists(jsonTraceFile): 42 | prettyPrint("Could not find the file \"%s\"" % jsonTraceFile, "warning") 43 | return "" 44 | # Load the trace from the file 45 | jsonTrace = json.loads(open(jsonTraceFile).read()) 46 | trace = [] 47 | if not "calls" in jsonTrace.keys(): 48 | prettyPrint("Could not find the key \"calls\" in current trace. Returning empty string", "warning") 49 | return "" 50 | # Iterate over the calls and append them to "trace" 51 | for call in jsonTrace["calls"]: 52 | callClass = call["clazz"] # A "typo" in introspy's DBAnalyzer 53 | callMethod = call["method"][:call["method"].find(" - [WARNING")] if call["method"].find("WARNING") != -1 else call["method"] 54 | if "arguments" in call["argsAndReturnValue"].keys(): 55 | #print call["argsAndReturnValue"]["arguments"].values() 56 | arguments = call["argsAndReturnValue"]["arguments"]#.values().sort() 57 | arguments = _cleanUpArgs(arguments, callClass, callMethod) 58 | callArgs = ",".join(arguments) if arguments else "" 59 | # Append call to trace list 60 | trace.append(str("%s.%s(%s)" % (callClass, callMethod, callArgs))) 61 | 62 | except Exception as e: 63 | prettyPrintError(e) 64 | return "" 65 | 66 | return "|".join(trace) 67 | 68 | def _cleanUpArgs(arguments, className="", methodName=""): 69 | """Parses and cleans up a list of method arguments""" 70 | try: 71 | #print arguments 72 | newArguments = [] 73 | # The default method of extracting arguments 74 | for argKey in arguments: 75 | newVal = arguments[argKey] 76 | newKey = argKey.lower().replace(" ", "_") 77 | if arguments[argKey].lower().find("intent") != -1: 78 | newVal = newVal[newVal.find("com."):newVal.rfind(" ")] 79 | 80 | newVal = newVal.replace("[","").replace("]","") 81 | newArguments.append("%s=\"%s\"" % (newKey, newVal)) 82 | 83 | except Exception as e: 84 | prettyPrintError(e) 85 | return arguments 86 | 87 | return newArguments 88 | 89 | -------------------------------------------------------------------------------- /tools/staticResults.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.data import * # Needed for accessing configuration files 4 | from Aion.utils.graphics import * # Needed for pretty printing 5 | from Aion.utils.misc import * 6 | 7 | import os, sys, glob, shutil, argparse 8 | 9 | 10 | 11 | def defineArguments(): 12 | parser = argparse.ArgumentParser(prog="staticResults.py", description="A tool to average the results from X runs of Aion's static experiment I") 13 | parser.add_argument("-d", "--resultdir", help="The directory containing results text files", required=True) 14 | parser.add_argument("-t", "--featuretype", help="The type of the features used in classification", required=True) 15 | parser.add_argument("-n", "--datasetname", help="The name of the dataset to which the results belong", required=True) 16 | parser.add_argument("-e", "--experiment", help="Whether the experiment is static or dynamic", choices=["static", "dynamic"], default="static", required=False) 17 | return parser 18 | 19 | def main(): 20 | try: 21 | argumentParser = defineArguments() 22 | arguments = argumentParser.parse_args() 23 | prettyPrint("Welcome to the \"Aion\"'s static experiment I printer") 24 | 25 | # 1. Retrieve files 26 | allFiles = glob.glob("%s/*.txt" % arguments.resultdir) 27 | if len(allFiles) < 1: 28 | prettyPrint("Unable to retrieve any results files. Exiting", "error") 29 | return False 30 | 31 | prettyPrint("Successfully retrieved %s result files" % len(allFiles)) 32 | # 2. Parse files 33 | results = {"training": {}, "test": {}} 34 | for f in allFiles: 35 | prettyPrint("Processing \"%s\"" % f) 36 | lines = open(f).read().split('\n') 37 | mode, classifier, result = "", "", {} 38 | for line in lines: 39 | # 2.a. Get mode i.e. training/test 40 | mode = "training" if line.lower().find("training") != -1 else mode 41 | mode = "test" if line.lower().find("test") != -1 else mode 42 | # 2.b. Get the classifier's name 43 | classifier = line[line.rfind(' ')+1:-1] if line.lower().find("results") != -1 else classifier 44 | # 2.c. Lastly, get the results 45 | result = eval(line) if line.lower().find("f1score") != -1 else result 46 | if mode != "" and classifier != "" and len(result) > 0: 47 | if classifier not in results[mode].keys(): 48 | # Add results to directionary 49 | results[mode][classifier] = {"accuracy": [result["accuracy"]], "recall": [result["recall"]], "specificity": [result["specificity"]], "precision": [result["precision"]], "f1score": [result["f1score"]]} 50 | else: 51 | # Append results 52 | results[mode][classifier]["accuracy"].append(result["accuracy"]) 53 | results[mode][classifier]["recall"].append(result["recall"]) 54 | results[mode][classifier]["specificity"].append(result["specificity"]) 55 | results[mode][classifier]["precision"].append(result["precision"]) 56 | results[mode][classifier]["f1score"].append(result["f1score"]) 57 | mode, classifier, result = "", "", {} 58 | 59 | # 3. Average the results 60 | training, test = results["training"], results["test"] 61 | resultsFile = open("avg_results_%s_%s_%s.txt" % (arguments.datasetname, arguments.featuretype, arguments.experiment), "w") 62 | learners = training.keys() 63 | learners.sort() 64 | for learner in learners: 65 | accuracy = float(sum(training[learner]["accuracy"])/float(len(allFiles))) 66 | recall = float(sum(training[learner]["recall"])/float(len(allFiles))) 67 | specificity = float(sum(training[learner]["specificity"])/float(len(allFiles))) 68 | precision = float(sum(training[learner]["precision"])/float(len(allFiles))) 69 | f1score = float(sum(training[learner]["f1score"])/float(len(allFiles))) 70 | resultsFile.write("[Training: %s]\n" % learner) 71 | resultsFile.write("Accuracy: %s, Recall: %s, Specificity: %s, Precision: %s, F1Score: %s\n\n" % (accuracy, recall, specificity, precision, f1score)) 72 | 73 | learners = test.keys() 74 | learners.sort() 75 | for learner in learners: 76 | accuracy = float(sum(test[learner]["accuracy"])/float(len(allFiles))) 77 | recall = float(sum(test[learner]["recall"])/float(len(allFiles))) 78 | specificity = float(sum(test[learner]["specificity"])/float(len(allFiles))) 79 | precision = float(sum(test[learner]["precision"])/float(len(allFiles))) 80 | f1score = float(sum(test[learner]["f1score"])/float(len(allFiles))) 81 | resultsFile.write("[Test: %s]\n" % learner) 82 | resultsFile.write("Accuracy: %s, Recall: %s, Specificity: %s, Precision: %s, F1Score: %s\n\n" % (accuracy, recall, specificity, precision, f1score)) 83 | 84 | resultsFile.close() 85 | 86 | except Exception as e: 87 | prettyPrintError(e) 88 | return False 89 | 90 | return True 91 | 92 | if __name__ == "__main__": 93 | main() 94 | -------------------------------------------------------------------------------- /data_generation/stimulation/DroidbotTest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Aion imports 4 | from Aion.utils.data import * 5 | from Aion.utils.graphics import * 6 | from Aion.utils.misc import * 7 | 8 | # Third-party software imports 9 | from androguard.session import Session 10 | 11 | # Python imports 12 | import os, sys, shutil, subprocess, threading, signal 13 | from multiprocessing import Process 14 | 15 | class DroidbotAnalysis(Process): 16 | """ 17 | Represents a Droidutan-driven test of an APK 18 | """ 19 | def __init__(self, pID, pName, pVM, pTarget, pSt="", pDuration=60): 20 | """ 21 | Initialize the test 22 | :param pID: Used to identify the process 23 | :type pID: int 24 | :param pName: A unique name given to a proces 25 | :type pName: str 26 | :param pVM: The Genymotion AVD name to run the test on 27 | :type pVM: str 28 | :param pTarget: The path to the APK under test 29 | :type pTarget: str 30 | :param pSt: The snapshot of the AVD in case restoring is needed 31 | :type pSt: str 32 | :param pDuration: The duration of the Droidutan test in seconds (default: 60s) 33 | :type pDuration: int 34 | """ 35 | Process.__init__(self, name=pName) 36 | self.processID = pID 37 | self.processName = pName 38 | self.processVM = pVM 39 | self.processTarget = pTarget 40 | self.processSnapshot = pSt 41 | self.processDuration = pDuration 42 | 43 | def run(self): 44 | """ 45 | Runs the Droidutan test against the [processTarget] for [processDuration] 46 | """ 47 | try: 48 | # A timer to guarante the process exits 49 | if verboseON(): 50 | prettyPrint("Setting timer for %s seconds" % str(float(self.processDuration)*5.0), "debug") 51 | t = threading.Timer(float(self.processDuration)*5.0, self.stop) 52 | t.start() 53 | # Step 1. Analyze APK 54 | #APKType = "malware" if self.processTarget.find("malware") != -1 else "goodware" 55 | if verboseON(): 56 | prettyPrint("Analyzing APK: \"%s\"" % self.processTarget, "debug") 57 | s = Session() 58 | s.add(self.processTarget, open(self.processTarget).read()) 59 | if len(s.analyzed_apk.values()) > 0: 60 | apk = s.analyzed_apk.values()[0] 61 | if type(apk) == list: 62 | apk = s.analyzed_apk.values()[0][0] 63 | else: 64 | prettyPrint("Could not retrieve an APK to analyze. Skipping", "warning") 65 | return False 66 | 67 | # Step 2. Get the Ip address assigned to the AVD 68 | getAVDIPCmd = ["VBoxManage", "guestproperty", "enumerate", self.processVM] 69 | avdIP = "" 70 | result = subprocess.Popen(getAVDIPCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0].replace(' ', '') 71 | if result.lower().find("error") != -1: 72 | prettyPrint("Unable to retrieve the IP address of the AVD", "error") 73 | print result 74 | return False 75 | index = result.find("androvm_ip_management,value:")+len("androvm_ip_management,value:") 76 | while result[index] != ',': 77 | avdIP += result[index] 78 | index += 1 79 | adbID = "%s:5555" % avdIP 80 | 81 | # Step 3. Define frequently-used commands 82 | droidbotOut = self.processTarget.replace(".apk", "_droidbot") 83 | droidbotCmd = ["droidbot", "-d", adbID, "-a", self.processTarget, "-o", droidbotOut, "-timeout", str(self.processDuration), "-random", "-keep_env", "-grant_perm"] 84 | 85 | # Step 4. Test the APK using Droidbot (Assuming machine is already on) 86 | prettyPrint("Testing the APK \"%s\" using Droidbot" % apk.package) 87 | # 4.a. Start Droidbot 88 | status = subprocess.Popen(droidbotCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0] 89 | 90 | # 4.b. Check for existence of output directory 91 | if not os.path.exists(droidbotOut): 92 | prettyPrint("No output folder found for \"%s\"" % self.processTarget, "warning") 93 | return False 94 | 95 | # 4.c. Filter the logcat dumped by droidbot 96 | logFile = open("%s/logcat_filtered.log" % droidbotOut, "w") 97 | catlog = subprocess.Popen(("cat", "%s/logcat.txt" % droidbotOut), stdout=subprocess.PIPE) 98 | output = subprocess.check_output(("grep", "-i", "droidmon-apimonitor-%s" % apk.package), stdin=catlog.stdout) 99 | logFile.write(output) 100 | logFile.close() 101 | 102 | except subprocess.CalledProcessError as cpe: 103 | prettyPrint("Unable to find the tag \"Droidmon-apimonitor-%s\" in the log file" % apk.package, "warning") 104 | except Exception as e: 105 | prettyPrintError(e) 106 | return False 107 | 108 | return True 109 | 110 | def stop(self): 111 | """ 112 | Stops this analysis process after uninstalling the app under test 113 | """ 114 | try: 115 | prettyPrint("Stopping the analysis process \"%s\" on \"%s\". Restoring snapshot \"%s\"" % (self.processName, self.processVM, self.processSnapshot), "warning") 116 | os.kill(os.getpid(), signal.SIGTERM) 117 | # Restore snapshot because that is probably not a good sign 118 | if self.processSnapshot != "": 119 | restoreVirtualBoxSnapshot(self.processVM, self.processSnapshot) 120 | 121 | except Exception as e: 122 | prettyPrintError(e) 123 | 124 | return True 125 | 126 | 127 | -------------------------------------------------------------------------------- /tools/visualizeFeatureVectors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.data_generation.reconstruction.Numerical import * 4 | from Aion.data_inference.visualization.visualizeData import * 5 | from Aion.utils.graphics import * 6 | from Aion.utils.data import * 7 | 8 | import pickledb 9 | 10 | import glob, sys, time, os, argparse, hashlib 11 | 12 | def defineArguments(): 13 | parser = argparse.ArgumentParser(prog="visualizeFeatureVectors.py", description="A tool to reduce the dimensionality of data points and visualize them in 2- or 3-D.") 14 | parser.add_argument("-p", "--datasetpath", help="The directory containing the feature vectors", required=True) 15 | parser.add_argument("-t", "--datasettype", help="The type of the feature vectors to load: indicates the type of experiment and the file extensions", required=True, choices=["static", "dynamic"]) 16 | parser.add_argument("-a", "--algorithm", help="The dimensionality reduction algorithm to use", required=False, default="tsne", choices=["tsne", "pca"]) 17 | parser.add_argument("-d", "--dimensionality", help="The target dimensionality to which the feature vectors are projected", required=False, default="2", choices=["2", "3"]) 18 | parser.add_argument("-s", "--figuresize", help="The size of the Plotly figure", required=False, default="(1024, 1024)") 19 | return parser 20 | 21 | def main(): 22 | try: 23 | argumentParser = defineArguments() 24 | arguments = argumentParser.parse_args() 25 | prettyPrint("Welcome to the \"Aion\"'s experiment I") 26 | 27 | # Check the existence of the dataset directories 28 | if not os.path.exists("%s/malware/" % arguments.datasetpath) or not os.path.exists("%s/goodware/" % arguments.datasetpath): 29 | prettyPrint("Could not find malware or goodware directories under \"%s\". Exiting" % arguments.datasetpath, "warning") 30 | return False 31 | 32 | # Retrieve the data 33 | fileExtension = "static" if arguments.datasettype == "static" else "num" 34 | allFiles = glob.glob("%s/malware/*.%s" % (arguments.datasetpath, fileExtension)) + glob.glob("%s/goodware/*.%s" % (arguments.datasetpath, fileExtension)) 35 | if len(allFiles) < 1: 36 | prettyPrint("Could not retrieve any \".%s\" files from the dataset directory \"%s\". Exiting" % (fileExtension, arguments.datasetpath), "warning") 37 | return False 38 | 39 | prettyPrint("Successfully retrieved %s \".%s\" files from the dataset directory \"%s\"" % (len(allFiles), fileExtension, arguments.datasetpath)) 40 | # Load the data 41 | X, y = [], [] 42 | appNames = [] 43 | hashesDB = pickledb.load(getHashesDBPath(), False) # Open the hashes key-value store 44 | prettyPrint("Attempting to load feature vectors") 45 | for f in allFiles: 46 | featureVector = loadNumericalFeatures(f) 47 | if len(featureVector) < 1: 48 | continue 49 | else: 50 | # Retrieve app name from path 51 | appKey = f[f.rfind('/')+1:].replace(".%s" % fileExtension, "").lower() 52 | appName = hashesDB.get(appKey) 53 | if appName == None: 54 | appKey = appKey + ".apk" 55 | appName = hashesDB.get(hashlib.sha256(appKey).hexdigest()) 56 | if appName == None: 57 | appName = f[f.rfind("/")+1:f.rfind(".")] 58 | 59 | if f.lower().find("malware") != -1: 60 | y.append(1) 61 | else: 62 | y.append(0) 63 | 64 | X.append(featureVector) 65 | appNames.append(appName) 66 | if verboseON(): 67 | prettyPrint("App \"%s\" matched to package name \"%s\"" % (f, appName), "debug") 68 | 69 | if len(X) < 1: 70 | prettyPrint("Could not load any numerical feature vectors. Exiting", "warning") 71 | return False 72 | 73 | prettyPrint("Successfully retrieved and parsed %s numerical feature vectors" % len(X)) 74 | # Perform visualization 75 | if arguments.datasettype == "static": 76 | # Retrieve different types of features 77 | X_basic = [x[:6] for x in X] 78 | X_perm = [x[6:10] for x in X] 79 | X_api = [x[10:] for x in X] 80 | 81 | 82 | # Reduce and visualize features 83 | figureTitle = "Combined static features in %sD" % arguments.dimensionality 84 | prettyPrint("Visualizing combined static features in %sD" % arguments.dimensionality) 85 | reduceAndVisualize(X, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames) 86 | figureTitle = "Basic static features in %sD" % arguments.dimensionality 87 | prettyPrint("Visualizing basic features in %sD" % arguments.dimensionality) 88 | reduceAndVisualize(X_basic, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames) 89 | figureTitle = "Permission-based static features in %sD" % arguments.dimensionality 90 | prettyPrint("Visualizing permission-based features in %sD" % arguments.dimensionality) 91 | reduceAndVisualize(X_perm, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames) 92 | figureTitle = "API static features in %sD" % arguments.dimensionality 93 | prettyPrint("Visualizing API call features in %sD" % arguments.dimensionality) 94 | reduceAndVisualize(X_api, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames) 95 | 96 | else: 97 | figureTitle = "Dynamic Introspy features in %sD" % arguments.dimensionality 98 | reduceAndVisualize(X, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figsize), figureTitle, appNames=appNames) 99 | 100 | except Exception as e: 101 | prettyPrintError(e) 102 | return False 103 | 104 | return True 105 | 106 | if __name__ == "__main__": 107 | main() 108 | -------------------------------------------------------------------------------- /data_generation/stimulation/DroidutanTest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Aion imports 4 | from Aion.utils.data import * 5 | from Aion.utils.graphics import * 6 | from Aion.utils.misc import * 7 | 8 | # Third-party imports 9 | from droidutan import Droidutan 10 | from androguard.session import Session 11 | 12 | # Python imports 13 | import os, sys, shutil, subprocess, threading, signal 14 | #APKType = "malware" if self.threadTarget.find("malware") != -1 else "goodware" 15 | from multiprocessing import Process 16 | 17 | class DroidutanAnalysis(Process): 18 | """ 19 | Represents a Droidutan-driven test of an APK 20 | """ 21 | def __init__(self, pID, pName, pVM, pTarget, pDuration=60, pLogName=""): 22 | """ 23 | Initialize the test 24 | :param pID: Used to identify the process 25 | :type pID: int 26 | :param pName: A unique name given to a proces 27 | :type pName: str 28 | :param pVM: The Genymotion AVD name to run the test on 29 | :type pVM: str 30 | :param pTarget: The path to the APK under test 31 | :type pTarget: str 32 | :param pDuration: The duration of the Droidutan test in seconds (default: 60s) 33 | :type pDuration: int 34 | """ 35 | Process.__init__(self, name=pName) 36 | self.processID = pID 37 | self.processName = pName 38 | self.processVM = pVM 39 | self.processTarget = pTarget 40 | self.processDuration = pDuration 41 | self.processLogFile = pLogName 42 | self.success = True 43 | 44 | def run(self): 45 | """ 46 | Runs the Droidutan test against the [processTarget] for [processDuration] 47 | """ 48 | try: 49 | # A timer to guarante the process exits 50 | if verboseON(): 51 | prettyPrint("Setting timer for %s seconds" % str(float(self.processDuration)*5.0), "debug") 52 | t = threading.Timer(float(self.processDuration)*5.0, self.stop) 53 | t.start() 54 | # Step 1. Analyze APK 55 | if verboseON(): 56 | prettyPrint("Analyzing APK: \"%s\"" % self.processTarget, "debug") 57 | apk, dx, vm = Droidutan.analyzeAPK(self.processTarget) 58 | if not apk: 59 | prettyPrint("Could not retrieve an APK to analyze. Skipping", "warning") 60 | return False 61 | # 1.a. Extract app components 62 | appComponents = Droidutan.extractAppComponents(apk) 63 | 64 | # Step 2. Get the Ip address assigned to the AVD 65 | getAVDIPCmd = ["VBoxManage", "guestproperty", "enumerate", self.processVM] 66 | avdIP = "" 67 | result = subprocess.Popen(getAVDIPCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0].replace(' ', '') 68 | if result.lower().find("error") != -1: 69 | prettyPrint("Unable to retrieve the IP address of the AVD", "error") 70 | print result 71 | return False 72 | index = result.find("androvm_ip_management,value:")+len("androvm_ip_management,value:") 73 | while result[index] != ',': 74 | avdIP += result[index] 75 | index += 1 76 | adbID = "%s:5555" % avdIP 77 | 78 | # Step 3. Define frequently-used commands 79 | adbPath = getADBPath() 80 | dumpLogcatCmd = [adbPath, "-s", adbID, "logcat", "-d"] 81 | clearLogcatCmd = [adbPath, "-s", adbID, "-c"] 82 | 83 | # Step 4. Test the APK using Droidutan (Assuming machine is already on) 84 | prettyPrint("Clearing device log before test") 85 | subprocess.Popen(clearLogcatCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) 86 | prettyPrint("Testing the APK \"%s\" using Droidutan" % appComponents["package_name"]) 87 | # 4.a. Unleash Droidutan 88 | success = Droidutan.testApp(self.processTarget, avdSerialno=avdIP, testDuration=int(self.processDuration), preExtractedComponents=appComponents, allowCrashes=False) 89 | if not success: 90 | prettyPrint("An error occurred while testing the APK \"%s\". Skipping" % self.processTarget, "warning") 91 | return False 92 | 93 | # 5. Dump the system log to file 94 | logcatFile = open(self.processTarget.replace(".apk", ".log"), "w") 95 | prettyPrint("Dumping logcat") 96 | subprocess.Popen(dumpLogcatCmd, stderr=subprocess.STDOUT, stdout=logcatFile).communicate()[0] 97 | logcatFile.close() 98 | 99 | # 6. Filter droidmon entries related to the APK under test 100 | prettyPrint("Retrieving \"Droidmon-apimonitor-%s\" tags from log" % appComponents["package_name"]) 101 | catlog = subprocess.Popen(("cat", logcatFile.name), stdout=subprocess.PIPE) 102 | try: 103 | output = subprocess.check_output(("grep", "-i", "droidmon-apimonitor-%s" % appComponents["package_name"]), stdin=catlog.stdout) 104 | except subprocess.CalledProcessError as cpe: 105 | prettyPrint("Could not find the tag \"droidmon-apimonitor-%s in the logs" % appComponents["package_name"], "warning") 106 | return True 107 | if self.processLogFile != "": 108 | logFile = open(self.processLogFile, "w") 109 | else: 110 | logFile = open("%s_filtered.log" % self.processTarget.replace(".apk", ""), "w") 111 | logFile.write(output) 112 | logFile.close() 113 | os.remove(logcatFile.name) 114 | 115 | except Exception as e: 116 | prettyPrintError(e) 117 | self.success = False 118 | return False 119 | 120 | return True 121 | 122 | 123 | def stop(self): 124 | """ 125 | Stops this analysis process 126 | """ 127 | try: 128 | prettyPrint("Stopping the analysis process \"%s\" on \"%s\"" % (self.processName, self.processVM), "warning") 129 | os.kill(os.getpid(), signal.SIGTERM) 130 | 131 | except Exception as e: 132 | prettyPrintError(e) 133 | self.success = False 134 | return False 135 | 136 | return True 137 | 138 | 139 | -------------------------------------------------------------------------------- /data_inference/extraction/StringKernelSVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | #################################################### 4 | # DISCLAIMER: This code is a slightly-edited copy # 5 | # of Tim Shenkao's "stringSVM.py" available on the # 6 | # github repo "StringKernelSVM" # 7 | # https://github.com/timshenkao/StringKernelSVM # 8 | #################################################### 9 | 10 | import numpy as np 11 | import sys 12 | from time import time 13 | 14 | def _K(n, s, t, lambda_decay=0.5): 15 | """ 16 | K_n(s,t) in the original article; recursive function 17 | :param n: length of subsequence 18 | :type n: int 19 | :param s: document #1 20 | :type s: str 21 | :param t: document #2 22 | :type t: str 23 | :return: float value for similarity between s and t 24 | """ 25 | if min(len(s), len(t)) < n: 26 | return 0 27 | else: 28 | part_sum = 0 29 | for j in range(1, len(t)): 30 | if t[j] == s[-1]: 31 | #not t[:j-1] as in the article but t[:j] because of Python slicing rules!!! 32 | part_sum += _K1(n - 1, s[:-1], t[:j]) 33 | result = _K(n, s[:-1], t) + lambda_decay ** 2 * part_sum 34 | return result 35 | 36 | def _K1(n, s, t, lambda_decay=0.5): 37 | """ 38 | K'_n(s,t) in the original article; auxiliary intermediate function; recursive function 39 | :param n: length of subsequence 40 | :type n: int 41 | :param s: document #1 42 | :type s: str 43 | :param t: document #2 44 | :type t: str 45 | :return: intermediate float value 46 | """ 47 | if n == 0: 48 | return 1 49 | elif min(len(s), len(t)) < n: 50 | return 0 51 | else: 52 | part_sum = 0 53 | for j in range(1, len(t)): 54 | if t[j] == s[-1]: 55 | #not t[:j-1] as in the article but t[:j] because of Python slicing rules!!! 56 | part_sum += _K1(n - 1, s[:-1], t[:j]) * (lambda_decay ** (len(t) - (j + 1) + 2)) 57 | result = lambda_decay * _K1(n, s[:-1], t) + part_sum 58 | return result 59 | 60 | def _gram_matrix_element(s, t, sdkvalue1, sdkvalue2, subseq_length=3): 61 | """ 62 | Helper function 63 | :param s: document #1 64 | :type s: str 65 | :param t: document #2 66 | :type t: str 67 | :param sdkvalue1: K(s,s) from the article 68 | :type sdkvalue1: float 69 | :param sdkvalue2: K(t,t) from the article 70 | :type sdkvalue2: float 71 | :return: value for the (i, j) element from Gram matrix 72 | """ 73 | if s == t: 74 | return 1 75 | else: 76 | try: 77 | return _K(subseq_length, s, t) / \ 78 | (sdkvalue1 * sdkvalue2) ** 0.5 79 | except ZeroDivisionError: 80 | print("Maximal subsequence length is less or equal to documents' minimal length. You should decrease it") 81 | sys.exit(2) 82 | 83 | def string_kernel(X1, X2, subseq_length=3, lambda_decay=0.5): 84 | """ 85 | String Kernel computation 86 | :param X1: list of documents (m rows, 1 column); each row is a single document (string) 87 | :type X1: list 88 | :param X2: list of documents (m rows, 1 column); each row is a single document (string) 89 | :type X2: list 90 | :return: Gram matrix for the given parameters 91 | """ 92 | len_X1 = len(X1) 93 | len_X2 = len(X2) 94 | # numpy array of Gram matrix 95 | gram_matrix = np.zeros((len_X1, len_X2), dtype=np.float32) 96 | sim_docs_kernel_value = {} 97 | #when lists of documents are identical 98 | if X1 == X2: 99 | #store K(s,s) values in dictionary to avoid recalculations 100 | for i in range(len_X1): 101 | sim_docs_kernel_value[i] = _K(subseq_length, X1[i], X1[i]) 102 | #calculate Gram matrix 103 | for i in range(len_X1): 104 | for j in range(i, len_X2): 105 | gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[i], sim_docs_kernel_value[j]) 106 | #using symmetry 107 | gram_matrix[j, i] = gram_matrix[i, j] 108 | #when lists of documents are not identical but of the same length 109 | elif len_X1 == len_X2: 110 | sim_docs_kernel_value[1] = {} 111 | sim_docs_kernel_value[2] = {} 112 | #store K(s,s) values in dictionary to avoid recalculations 113 | for i in range(len_X1): 114 | sim_docs_kernel_value[1][i] = _K(subseq_length, X1[i], X1[i]) 115 | for i in range(len_X2): 116 | sim_docs_kernel_value[2][i] = _K(subseq_length, X2[i], X2[i]) 117 | #calculate Gram matrix 118 | for i in range(len_X1): 119 | for j in range(i, len_X2): 120 | gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j]) 121 | #using symmetry 122 | gram_matrix[j, i] = gram_matrix[i, j] 123 | #when lists of documents are neither identical nor of the same length 124 | else: 125 | sim_docs_kernel_value[1] = {} 126 | sim_docs_kernel_value[2] = {} 127 | min_dimens = min(len_X1, len_X2) 128 | #store K(s,s) values in dictionary to avoid recalculations 129 | for i in range(len_X1): 130 | sim_docs_kernel_value[1][i] = _K(subseq_length, X1[i], X1[i]) 131 | for i in range(len_X2): 132 | sim_docs_kernel_value[2][i] = _K(subseq_length, X2[i], X2[i]) 133 | #calculate Gram matrix for square part of rectangle matrix 134 | for i in range(min_dimens): 135 | for j in range(i, min_dimens): 136 | gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j]) 137 | #using symmetry 138 | gram_matrix[j, i] = gram_matrix[i, j] 139 | 140 | #if more rows than columns 141 | if len_X1 > len_X2: 142 | for i in range(min_dimens, len_X1): 143 | for j in range(len_X2): 144 | gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j]) 145 | #if more columns than rows 146 | else: 147 | for i in range(len_X1): 148 | for j in range(min_dimens, len_X2): 149 | gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], 150 | sim_docs_kernel_value[2][j]) 151 | print sim_docs_kernel_value 152 | return gram_matrix 153 | -------------------------------------------------------------------------------- /data_inference/visualization/visualizeData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.data import * 4 | from Aion.utils.graphics import * 5 | from Aion.utils.misc import * 6 | 7 | import numpy as np 8 | from sklearn.manifold import TSNE 9 | from sklearn.decomposition import PCA 10 | 11 | import plotly.plotly as py 12 | from plotly.offline import plot, iplot 13 | from plotly.graph_objs import * 14 | 15 | def reduceAndVisualize(X, y, dim=2, reductionAlgorithm="tnse", figSize=(1024,1024), figTitle="Data visualization", appNames=[], saveProjectedData=False): 16 | """ 17 | Generates a scatter plot using "plotly" after projecting the data points into -dimensionality using tSNE or PCA 18 | :param X: The matrix containing the feature vectors 19 | :type X: list 20 | :param y: The labels of the feature vectors 21 | :type y: list 22 | :param dim: The target dimensionality to project the feature vectors to (default=2) 23 | :type dim: int 24 | :param reductionAlgorithm: The algorithm to use for dimensionality reduction 25 | :type reductionAlgorithm: str 26 | :param figSize: The size of the figure 27 | :type figSize: tuple (of ints) 28 | :param figTitle: The title of the figure and the name of the resulting HTML file 29 | :type figTitle: str 30 | :param appNames: The names of apps to be used as tooltips for each data point. Assumed to match one-to-one with the feature vectors in X 31 | :type appNames: list of str 32 | :param saveProjectedData: Whether to save the projected data in a CSV file 33 | :type saveProjectedData: bool 34 | :return: A bool depicting the success/failure of the operaiton 35 | """ 36 | try: 37 | # Prepare data 38 | X, y = np.array(X), np.array(y) 39 | # Build model 40 | reductionModel = TSNE(n_components=dim) if reductionAlgorithm == "tsne" else None 41 | # Apply transformation 42 | prettyPrint("Projecting %s feature vectors of dimensionality %s into %s-d" % (X.shape[0], X.shape[1], dim)) 43 | X_new = reductionModel.fit_transform(X) 44 | # Generate a scatter plot 45 | prettyPrint("Populating the traces for malware and goodware") 46 | x_mal, y_mal, x_good, y_good = [], [], [], [] 47 | labels_mal, labels_good = [], [] 48 | if dim == 3: 49 | z_mal, z_good = [], [] 50 | for index in range(len(y)): 51 | if y[index] == 1: 52 | x_mal.append(X_new[index][0]) 53 | y_mal.append(X_new[index][1]) 54 | if dim == 3: 55 | z_mal.append(X_new[index][2]) 56 | labels_mal.append(appNames[index]) 57 | else: 58 | x_good.append(X_new[index][0]) 59 | y_good.append(X_new[index][1]) 60 | if dim == 3: 61 | z_good.append(X_new[index][2]) 62 | labels_good.append(appNames[index]) 63 | 64 | # Create traces for the scatter plot 65 | prettyPrint("Creating a scatter plot") 66 | if dim == 2: 67 | # The trace for malware 68 | trace_malware = Scatter(x=x_mal, 69 | y=y_mal, 70 | mode='markers', 71 | name='Malware', 72 | marker=Marker(symbol='dot', 73 | size=6, 74 | color='red', 75 | opacity=0.75, 76 | line=Line(width=2.0) 77 | ), 78 | hoverinfo='text', 79 | text=labels_mal 80 | ) 81 | # The trace for goodware 82 | trace_goodware = Scatter(x=x_good, 83 | y=y_good, 84 | mode='markers', 85 | name='Goodware', 86 | marker=Marker(symbol='dot', 87 | size=6, 88 | color='blue', 89 | opacity=0.75, 90 | line=Line(width=2.0) 91 | ), 92 | hoverinfo='text', 93 | text=labels_good 94 | ) 95 | elif dim == 3: 96 | # The trace for malware 97 | trace_malware = Scatter3d(x=x_mal, 98 | y=y_mal, 99 | z=z_mal, 100 | mode='markers', 101 | name='Malware', 102 | marker=Marker(symbol='dot', 103 | size=6, 104 | color='red', 105 | opacity=0.5, 106 | line=Line(width=1.0) 107 | ), 108 | hoverinfo='text', 109 | text=labels_mal 110 | ) 111 | # The trace for goodware 112 | trace_goodware = Scatter3d(x=x_good, 113 | y=y_good, 114 | z=z_good, 115 | mode='markers', 116 | name='Goodware', 117 | marker=Marker(symbol='dot', 118 | size=6, 119 | color='blue', 120 | opacity=0.5, 121 | line=Line(width=1.0) 122 | ), 123 | hoverinfo='text', 124 | text=labels_good 125 | ) 126 | # Define the axis properties 127 | axis=dict(showbackground=False, 128 | showline=False, # hide axis line, grid, ticklabels and title 129 | zeroline=False, 130 | showgrid=False, 131 | showticklabels=False, 132 | visible=False, 133 | title='' 134 | ) 135 | # Define the figure's layout 136 | layout=Layout(title=figTitle, 137 | width=figSize[0], 138 | height=figSize[1], 139 | font= Font(size=12), 140 | showlegend=True, 141 | scene=Scene( 142 | xaxis=XAxis(axis), 143 | yaxis=YAxis(axis), 144 | zaxis=ZAxis(axis) 145 | ), 146 | margin=Margin( 147 | t=100, 148 | ), 149 | hovermode='closest', 150 | annotations=Annotations([ 151 | Annotation( 152 | showarrow=False, 153 | text=figTitle, 154 | xref='paper', 155 | yref='paper', 156 | x=0, 157 | y=0.1, 158 | xanchor='left', 159 | yanchor='bottom', 160 | font=Font( 161 | size=14 162 | ) 163 | ) 164 | ]), 165 | ) 166 | # Generate graph file 167 | data=Data([trace_malware, trace_goodware]) 168 | fig=Figure(data=data, layout=layout) 169 | plot(fig, filename=figTitle.lower().replace(' ', '_')) 170 | 171 | 172 | except Exception as e: 173 | prettyPrintError(e) 174 | return False 175 | 176 | return True 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /data_inference/learning/HMM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.graphics import * 4 | from Aion.utils.data import * 5 | from Aion.utils.misc import * 6 | 7 | import ghmm 8 | from datetime import datetime 9 | from sklearn.cross_validation import KFold 10 | from sklearn.metrics import * 11 | import numpy 12 | 13 | import time, sys 14 | 15 | class HiddenMarkovModel(): 16 | # A simple structure to represent a hidden Markov model 17 | def __init__(self, A, B, Pi, observations): 18 | if len(A) == len(Pi): 19 | self.states = range(len(A)) 20 | self.sigma = ghmm.Alphabet(observations) # The "alphabet" comprising action indices 21 | self.initA = A 22 | self.initB = B 23 | self.initPi = Pi 24 | self.ghmmModel = ghmm.HMMFromMatrices(self.sigma, ghmm.DiscreteDistribution(self.sigma), self.initA, self.initB, self.initPi) 25 | self.isTrained = False 26 | else: 27 | prettyPrint("Unable to initialize model. Unequal number of states", "error") 28 | return 29 | 30 | def train(self, X): 31 | """Uses GHMM's implementation of Baum-Welch to train an HMM""" 32 | try: 33 | if len(X) < 1: 34 | prettyPrint("Empty training set provided", "warning") 35 | return False 36 | # Now use the Baum-Welch algorithm 37 | self.ghmmModel.baumWelch(ghmm.SequenceSet(self.ghmmModel.emissionDomain, X)) 38 | self.isTrained = True 39 | if verboseON(): 40 | print "Trained model: %s" % self.ghmmModel 41 | 42 | except Exception as e: 43 | prettyPrintError(e) 44 | return False 45 | 46 | return True 47 | 48 | def cross_val_predict(X, y, tracelength, threshold, kfold=10, trainwith="malware"): 49 | """Classifies out-of-sample sequences using the trained model and KFold CV""" 50 | try: 51 | # Retrieve indices 52 | outIndices = [] # The ranges of the instances not to be used in training (Assumed trailing) 53 | for index in range(len(X)): 54 | if trainwith == "malware" and y[index] == 0: 55 | outIndices.append(index) 56 | elif trainwith =="goodware" and y[index] == 1: 57 | outIndices.append(index) 58 | 59 | # A matrix to hold the predictions (len(X) x Kfold) 60 | P = numpy.zeros((len(X), kfold)) 61 | 62 | Xmal, Xgood, ymal, ygood = [], [], [], [] 63 | for index in range(len(X)): 64 | if y[index] == 1: 65 | Xmal.append(X[index]) 66 | ymal.append(y[index]) 67 | elif y[index] == 0: 68 | Xgood.append(X[index]) 69 | ygood.append(y[index]) 70 | 71 | allFolds = KFold(len(Xmal), kfold) if trainwith == "malware" else KFold(len(Xgood), kfold) 72 | currentFold = 1 73 | for trainingIndices, testIndices in allFolds: 74 | Xtrain, Xtest = [], [] 75 | ytrain, ytest = [], [] 76 | # Populate training traces 77 | for index in trainingIndices: 78 | if trainwith == "malware": 79 | Xtrain.append(Xmal[index]) 80 | ytrain.append(ymal[index]) 81 | else: 82 | Xtrain.append(Xgood[index]) 83 | ytrain.append(ygood[index]) 84 | # Populate test traces 85 | for index in testIndices: 86 | if trainwith == "malware": 87 | Xtest.append(Xmal[index]) 88 | ytest.append(ymal[index]) 89 | else: 90 | Xtest.append(Xgood[index]) 91 | ytest.append(ygood[index]) 92 | 93 | #print ytrain, ytest 94 | if trainwith == "malware": 95 | Xtest = Xtest + Xgood 96 | ytest = ytest + ygood 97 | else: 98 | Xtest = Xtest + Xmal 99 | ytest = ytest + ymal 100 | 101 | #print ytrain, ytest 102 | Pindices = testIndices.tolist() + outIndices # TODO: Use this to populate "P" 103 | #print Pindices 104 | 105 | # Get the observations from the current training and test datasets 106 | predicted = [] 107 | allObservations = [] 108 | for trace in Xtrain + Xtest: 109 | for call in trace: 110 | if not call in allObservations: 111 | allObservations.append(call) 112 | if verboseON(): 113 | prettyPrint("Successfully retrieved %s observations from current traces" % len(allObservations), "debug") 114 | # Prepare HMM 115 | Pi = [1.0, 0.0] 116 | A = [[0.5, 0.5], [0.5, 0.5]] 117 | B = numpy.random.random((2, len(allObservations))).tolist() 118 | 119 | prettyPrint("Building the hidden Markov model") 120 | hmm = HiddenMarkovModel(A, B, Pi, allObservations) 121 | 122 | prettyPrint("Training the model") 123 | hmm.train(Xtrain) 124 | 125 | # Test model 126 | for index in range(len(Xtest)): 127 | # Retrieve and prepare trace 128 | currentTrace, currentClass = Xtest[index], ytest[index] 129 | currentTrace = currentTrace[:int(tracelength)] if len(currentTrace) > int(tracelength) else currentTrace 130 | currentTrace = ghmm.EmissionSequence(hmm.sigma, currentTrace) 131 | # Calculate log likelihood 132 | logProbability = hmm.ghmmModel.loglikelihood(currentTrace) 133 | if verboseON(): 134 | prettyPrint("P(O|lambda)=%s" % logProbability, "debug") 135 | # Classify instance 136 | if trainwith == "malware": 137 | currentPredicted = 0 if logProbability <= -int(threshold) else 1 138 | else: 139 | currentPredicted = 1 if logProbability <= -int(threshold) else 0 140 | 141 | # Append to predicted 142 | if verboseON(): 143 | prettyPrint("%s instance classified as %s" % (["Goodware", "Malware"][ytest[index]], ["Goodware", "Malware"][currentPredicted]), "debug") 144 | predicted.append(currentPredicted) 145 | 146 | # Populate the prediction matrix 147 | #print P.shape 148 | #print Pindices, len(Pindices) 149 | #print predicted, len(predicted) 150 | #print currentFold 151 | for index in range(len(predicted)): 152 | #print "P[%s][%s] = %s" % (Pindices[index], currentFold-1, predicted[index]) 153 | P[Pindices[index]][currentFold-1] = predicted[index] 154 | 155 | currentFold += 1 # Increment the fold number 156 | 157 | # For each instance, calculate the majority vote of predictons 158 | predicted = [] 159 | #print P 160 | for rIndex in range(P.shape[0]): 161 | if rIndex >= outIndices[0]: 162 | if sum(P[rIndex,:]) >= kfold/2: 163 | predicted.append(1) 164 | else: 165 | predicted.append(0) 166 | else: 167 | # Malware instances will only be used once as test instances 168 | if sum(P[rIndex,:] > 0): 169 | predicted.append(1) 170 | else: 171 | predicted.append(0) 172 | 173 | except Exception as e: 174 | prettyPrintError(e) 175 | return [] 176 | 177 | return predicted 178 | 179 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.data import * 4 | 5 | import random, string, os, glob, subprocess, time, re 6 | from datetime import datetime 7 | import smtplib 8 | from email.MIMEMultipart import MIMEMultipart 9 | from email.MIMEText import MIMEText 10 | 11 | def averageList(inputList, roundDigits=2): 12 | return round(float(sum(inputList))/float(len(inputList)), roundDigits) 13 | 14 | def checkRoot(): 15 | if os.getuid() != 0: 16 | return False 17 | else: 18 | return True 19 | 20 | def getRandomNumber(length=8): 21 | return ''.join(random.choice(string.digits) for i in range(length)) 22 | 23 | def getRandomAlphaNumeric(length=8): 24 | return ''.join(random.choice(string.ascii_letters + string.digits) for i in range(length)) 25 | 26 | def getRandomString(length=8): 27 | return ''.join(random.choice(string.lowercase) for i in range(length)) 28 | 29 | def getTimestamp(includeDate=False): 30 | if includeDate: 31 | return "[%s]"%str(datetime.now()) 32 | else: 33 | return "[%s]"%str(datetime.now()).split(" ")[1] 34 | 35 | def checkAVDState(vmName, vmState="running"): 36 | """ 37 | Checks the current VirtualBox state of an AVD (e.g., running, stopping, ...) 38 | :param vmName: The name of the AVD to check 39 | :type vmName: str 40 | :param vmState: The status to check 41 | :type vmState: str 42 | :return: A boolean depicting whether the AVD is stuck and an str of its process ID 43 | """ 44 | try: 45 | isStuck = False 46 | pID = "" 47 | vBoxInfoCmd = ["vboxmanage", "showvminfo", vmName] 48 | # Check whether the AVD is stuck in "Stopped" status 49 | status = subprocess.Popen(vBoxInfoCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0] 50 | if status.lower().find(vmState) != -1: 51 | isStuck = True 52 | # Kill the VirtualBox process 53 | # a) Get UUID of stuck AVD 54 | uuid = "" 55 | for line in status.split('\n'): 56 | if line.find("UUID") != -1: 57 | uuid = line[line.rfind(' ')+1:] 58 | break 59 | # b) Get the PID of the process 60 | ps = subprocess.Popen(["ps", "-eaf"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 61 | ps.wait() 62 | out = subprocess.Popen(["grep", "-i", uuid], stdin=ps.stdout, stdout=subprocess.PIPE).communicate()[0] 63 | numbers = re.findall("\d+", out) 64 | if len(numbers) > 0: 65 | pID = str(numbers[0]) 66 | 67 | except Exception as e: 68 | print "[*] Error encountered: %s" % e 69 | return False, "" 70 | 71 | return isStuck, pID 72 | 73 | def restoreVirtualBoxSnapshot(vmName, snapshotName, retrials=25, waitToBoot=30): 74 | """ 75 | Attempts to restore the snapshot of a VirtualBox machine 76 | :param vmName: The name of the virtual machine 77 | :type vmName: str 78 | :param snapshotName: The name of the snapshot to restore 79 | :type snapshotName: str 80 | :param retrials: In case of failure, how many attempts to restore the snapshot are made 81 | :type retrials: int 82 | :param waitToBoot:The time (in seconds) to wait for a virtual machine to boot 83 | :type waitToBoot: int 84 | :return: A boolean depicting the success/failure of the operation 85 | """ 86 | try: 87 | # Define frequently-used commands 88 | vBoxRestoreCmd = ["vboxmanage", "snapshot", vmName, "restore", snapshotName] 89 | vBoxPowerOffCmd = ["vboxmanage", "controlvm", vmName, "poweroff"] 90 | genymotionStartCmd = [getGenymotionPlayer(), "--vm-name", vmName] 91 | genymotionPowerOffCmd = [getGenymotionPlayer(), "--vm-name", vmName, "--poweroff"] 92 | # Check whether the machine is stuck in the "Stopping" phase 93 | state, pID = checkAVDState(vmName, "stopping") 94 | if state: 95 | # Kill process 96 | print "[*] KILLING STOPPING \"%s\"" % vmName 97 | subprocess.Popen(["kill", pID], stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0] 98 | # Power off the genymotion AVD 99 | print "[*] POWERING OFF \"%s\"" % vmName 100 | poweroff = subprocess.Popen(genymotionPowerOffCmd) 101 | poweroff.wait() 102 | # Make sure the AVD is dead 103 | state, pID = checkAVDState(vmName, "running") 104 | while state: 105 | print "[*] POWERING OFF \"%s\"" % vmName 106 | poweroff = subprocess.Popen(genymotionPowerOffCmd) 107 | poweroff.wait() 108 | state, pID = checkAVDState(vmName, "running") 109 | # Attempt to restore the AVD's snapshot 110 | print "[*] RESTORING SNAPSHOT \"%s\" for \"%s\"" % (snapshotName, vmName) 111 | restore = subprocess.Popen(vBoxRestoreCmd, stdout=subprocess.PIPE) 112 | restore.wait() 113 | counter = 0 114 | while restore.communicate()[0].lower().find("error") != -1: 115 | print "[*] RESTORING SNAPSHOT \"%s\" for \"%s\". Trial #%s" % (snapshotName, vmName, counter+1) 116 | if counter == retrials: 117 | return False 118 | counter += 1 119 | restore = subprocess.Popen(vBoxRestoreCmd, stdout=subprocess.PIPE) 120 | restore.wait() 121 | # Power on the Genymotion AVD again 122 | print "[*] POWERING ON \"%s\"" % vmName 123 | poweron = subprocess.Popen(genymotionStartCmd) 124 | time.sleep(waitToBoot) 125 | state, pID = checkAVDState(vmName, "powered off") 126 | #while state: 127 | # print "[*] POWERING ON \"%s\"" % vmName 128 | # poweron = subprocess.Popen(genymotionStartCmd) 129 | # time.sleep(waitToBoot) 130 | # state, pID = checkAVDState(vmName, "powered off") 131 | 132 | 133 | except Exception as e: 134 | print e 135 | return False 136 | 137 | return True 138 | 139 | def sendEmail(srcAddress, dstAddress, msgSubject, msgBody): 140 | try: 141 | # Connect to server and login 142 | server = smtplib.SMTP('smtp.gmail.com', 587) 143 | server.starttls() 144 | username, password = getGoogleCreds() 145 | server.login(username, password) 146 | # Prepare message 147 | msg = MIMEMultipart() 148 | msg['From'] = srcAddress 149 | msg['To'] = dstAddress 150 | msg['Subject'] = msgSubject 151 | msg.attach(MIMEText(msgBody, 'plain')) 152 | # Bombs away 153 | server.sendmail(srcAddress, dstAddress, msg.as_string()) 154 | server.quit() 155 | except Exception as e: 156 | print e 157 | return False 158 | 159 | return True 160 | 161 | 162 | # Copied from the "googleplay_api" helpers.py 163 | def sizeof_fmt(num): 164 | for x in ['bytes','KB','MB','GB','TB']: 165 | if num < 1024.0: 166 | return "%3.1f%s" % (num, x) 167 | num /= 1024.0 168 | 169 | def specificity_score(ground_truth, predicted, classes=(1, 0)): 170 | try: 171 | if len(ground_truth) != len(predicted): 172 | return -1 173 | positive, negative = classes[0], classes[1] 174 | tp, tn, fp, fn = 0, 0, 0, 0 175 | for index in range(len(ground_truth)): 176 | if ground_truth[index] == negative and predicted[index] == negative: 177 | tn += 1 178 | elif ground_truth[index] == negative and predicted[index] == positive: 179 | fp += 1 180 | elif ground_truth[index] == positive and predicted[index] == negative: 181 | fn += 1 182 | else: 183 | tp += 1 184 | 185 | score = float(tn)/(float(tn)+float(fp)) 186 | 187 | except Exception as e: 188 | print e 189 | return -1 190 | 191 | return score 192 | -------------------------------------------------------------------------------- /tools/downloadAPKPlayStore.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.data_generation.collection.playStoreCrawler import PlayStoreCrawler # The crawler 4 | from Aion.utils.graphics import * # Needed for pretty printing 5 | 6 | import os, sys, glob, shutil, argparse, subprocess 7 | 8 | def defineArguments(): 9 | parser = argparse.ArgumentParser(prog="downloadAPKPlayStore.py", description="Uses \"Aion\"'s Play Store crawler to download APK's of benign applications.") 10 | parser.add_argument("-m", "--mode", help="Help", required=True, choices=["download-all", "download-category", "download-subcategory", "update"]) 11 | parser.add_argument("-n", "--num", help="The number of APK's to download", required=False, default=10) 12 | parser.add_argument("-c", "--category", help="The category of the APK's to download", required=False, default="") 13 | parser.add_argument("-s", "--subcategory", help="The sub-category of the APK's to download", required=False, default="") 14 | parser.add_argument("-f", "--freeapps", help="Whether to exclusively download free apps", required=False, choices=["yes", "no"], default="no") 15 | parser.add_argument("-o", "--outdir", help="The directory to save the downloaded APK's", required=False, default=".") 16 | parser.add_argument("-r", "--repo", help="The file containing the already downloaded APK's", required=False, default="repo.csv") 17 | parser.add_argument("-v", "--verbose", help="Display debug messages", default="no", choices=["yes", "no"]) 18 | return parser 19 | 20 | def main(): 21 | try: 22 | argumentParser = defineArguments() 23 | arguments = argumentParser.parse_args() 24 | prettyPrint("Welcome to the droid hunter") 25 | 26 | # Step 0 - Load the repo of downloaded APK's 27 | if not os.path.exists(arguments.repo): 28 | prettyPrint("Could not locate the repository of downloaded APK's. Creating a new one", "warning") 29 | repoFile = open("repo.csv", "w") 30 | else: 31 | repoFile = open(arguments.repo, "a+") 32 | downloadedApps = repoFile.read().split(",") 33 | if arguments.verbose == "yes": 34 | prettyPrint("Successfully retrieved %s apps from the repository \"%s\"" % (len(downloadedApps), arguments.repo), "debug") 35 | 36 | if arguments.mode == "download-all": 37 | # Step 1 - Retrieve all categories 38 | crawler = PlayStoreCrawler() 39 | if arguments.verbose == "yes": 40 | prettyPrint("Logging into the Play store", "debug") 41 | # (1.a) Log into the play store 42 | if not crawler.login(): 43 | prettyPrint("Unable to login to the Google Play store. Check the credentials in the configuration files", "error") 44 | return False 45 | # (1.b) Retrieve app categories 46 | appCategories = crawler.getCategories() 47 | 48 | if arguments.verbose == "yes" and len(appCategories) > 0: 49 | prettyPrint("Successfully retrieved %s categories from the Play Store" % len(appCategories), "debug") 50 | 51 | # (1.c) Iterate over each category, retrieving its sub-categories, and apps 52 | downloadQueue = [] # Store the apps to be downloaded 53 | for category in appCategories: 54 | prettyPrint("Processing the category \"%s\"" % category) 55 | subCategories = crawler.getSubCategories(category) 56 | if arguments.verbose == "yes" and len(subCategories) > 0: 57 | prettyPrint("Successfully retrieved %s sub-categories from the Play Store" % len(subCategories), "debug") 58 | # (1.d) Iterate over each sub-category, retrieving the apps, and downloading them 59 | for subCategory in subCategories: 60 | prettyPrint("Processing the sub-category \"%s\"" % subCategory) 61 | apps = crawler.getApps(category, subCategory) 62 | if arguments.verbose == "yes" and len(apps) > 0: 63 | prettyPrint("Successfully retrieved %s apps from the Play Store" % len(apps), "debug") 64 | # (1.e) Add the apps to the downloading queue (if we do NOT already have them) 65 | for app in apps: 66 | # A check about the app being "free" and whether to download it 67 | if app.appPrice.lower() == "free" and arguments.freeapps == "yes": 68 | if not app in downloadedApps and not app in downloadQueue: 69 | downloadQueue.append(app) 70 | 71 | # Calculate the sizes of the to-be-downloaded apps 72 | totalSize = sum(app.appSize for app in downloadQueue) 73 | #for app in downloadQueue: 74 | # print "%s is of size %s, and costs %s" % (app.appName, sizeof_fmt(app.appSize), app.appPrice) 75 | # (1.f) Confirm downloading the queued apps 76 | prettyPrint("Successfully retrieved %s apps to download with total size of %s" % (len(downloadQueue), sizeof_fmt(totalSize))) 77 | confirmDownload = raw_input("Download apps? [Y/n] ") 78 | if confirmDownload == "" or confirmDownload.lower() == "y": 79 | # Step 2 - Download the APK's 80 | for app in downloadQueue: 81 | prettyPrint("Downloading \"%s\"." % app.appID) 82 | crawler.downloadApp(app) 83 | # Step 3 - Copy the downloaded APK to the output directory 84 | for downloadedApp in glob.glob("./*.apk"): 85 | # (3.a) Check whether the out directory exists and create it otherwise 86 | if arguments.outdir.lower().find(":") != -1: 87 | # Consider this to be a remote directory, and use "scp" to copy the app 88 | if arguments.verbose == "yes": 89 | prettyPrint("Using \"scp\" to copy the APK's to remote site", "debug") 90 | scpArgs = ["sshpass", SSH_PASSWORD, "scp", downloadedApp, "%s@" % arguments.outdir] 91 | subprocess.Popen(scpArgs, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0] 92 | os.remove(app) 93 | # Add app name to repo 94 | repoFile.write(app.appID) 95 | else: 96 | if arguments.verbose == "yes": 97 | prettyPrint("Copying %s to local directory %s" % (app.appID, arguments.outdir), "debug") 98 | if not os.path.exists(arguments.outdir): 99 | os.mkdir(arguments.outdir) 100 | # Move the APK's one-by-one, if it does not exist 101 | print "%s/%s.apk" % (arguments.outdir, app.appID) 102 | if not os.path.exists("%s/%s.apk" % (arguments.outdir, app.appID)): 103 | shutil.move(downloadedApp, arguments.outdir) 104 | # Add app name to repo 105 | repoFile.write("%s," % app.appID) 106 | repoFile.flush() # Write app names right away 107 | else: 108 | prettyPrint("App \"%s\" already exists in the output directory \"%s\". Skipping" % (app.appID, arguments.outdir), "warning") 109 | os.remove("./%s.apk" % app.appID) 110 | 111 | else: 112 | prettyPrint("As you wish") 113 | return True 114 | 115 | repoFile.close() 116 | 117 | except Exception as e: 118 | prettyPrintError(e) 119 | return False 120 | 121 | prettyPrint("Good day to you ^_^") 122 | return True 123 | 124 | if __name__ == "__main__": 125 | main() 126 | -------------------------------------------------------------------------------- /tools/staticExperimentI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.data_generation.reconstruction.Numerical import * 4 | from Aion.data_inference.learning import ScikitLearners 5 | from Aion.utils.db import * 6 | from Aion.utils.graphics import * 7 | 8 | import glob, sys,argparse 9 | 10 | def defineArguments(): 11 | parser = argparse.ArgumentParser(prog="staticExperimentI.py", description="A tool to implement the stimulation-detection feedback loop using Garfield as stimulation engine.") 12 | parser.add_argument("-x", "--malwaredir", help="The directory containing the malicious APK's to analyze and use as training/validation dataset", required=True) 13 | parser.add_argument("-g", "--goodwaredir", help="The directory containing the benign APK's to analyze and use as training/validation dataset", required=True) 14 | parser.add_argument("-d", "--datasetname", help="A unique name to give to the dataset used in the experiment (for DB storage purposes)", required=True) 15 | parser.add_argument("-f", "--featurestype", help="The type of static features to load", required=False, default="all", choices=["basic", "permission", "api", "all"]) 16 | parser.add_argument("-r", "--runnumber", help="The number of the run", required=True) 17 | return parser 18 | 19 | def main(): 20 | try: 21 | argumentParser = defineArguments() 22 | arguments = argumentParser.parse_args() 23 | prettyPrint("Welcome to the \"Aion\"'s static experiment I") 24 | 25 | # 1. Load APK's and split into training and test datasets 26 | prettyPrint("Loading APK's from \"%s\" and \"%s\"" % (arguments.malwaredir, arguments.goodwaredir)) 27 | # Retrieve malware APK's 28 | malFiles = glob.glob("%s/*.static" % arguments.malwaredir) 29 | if len(malFiles) < 1: 30 | prettyPrint("Could not find any malicious feature files" , "warning") 31 | else: 32 | prettyPrint("Successfully retrieved %s malicious feature files" % len(malFiles)) 33 | # Retrieve goodware APK's 34 | goodFiles = glob.glob("%s/*.static" % arguments.goodwaredir) 35 | if len(goodFiles) < 1: 36 | prettyPrint("Could not find any benign feature files", "warning") 37 | else: 38 | prettyPrint("Successfully retrieved %s benign feature files" % len(goodFiles)) 39 | 40 | # Split the data into training and test datasets 41 | malTraining, malTest = [], [] 42 | goodTraining, goodTest = [], [] 43 | malTestSize, goodTestSize = len(malFiles) / 3, len(goodFiles) / 3 44 | # Start with the malicious APKs 45 | while len(malTest) < malTestSize: 46 | malTest.append(malFiles.pop(random.randint(0, len(malFiles)-1))) 47 | malTraining += malFiles 48 | prettyPrint("[MALWARE] Training dataset size is %s, test dataset size is %s" % (len(malTraining), len(malTest))) 49 | # Same with benign APKs 50 | while len(goodTest) < goodTestSize: 51 | goodTest.append(goodFiles.pop(random.randint(0, len(goodFiles)-1))) 52 | goodTraining += goodFiles 53 | prettyPrint("[GOODWARE] Training dataset size is %s, test dataset size is %s" % (len(goodTraining), len(goodTest))) 54 | 55 | 56 | # 2. Load the feature vectors (Training) 57 | Xtr, ytr, Xte, yte = [], [], [], [] 58 | for x in malTraining + goodTraining: 59 | v = loadNumericalFeatures(x) 60 | if len(v) > 0: 61 | # Vector 62 | if arguments.featurestype == "all": 63 | Xtr.append(v) 64 | elif arguments.featurestype == "basic": 65 | Xtr.append(v[:6]) 66 | elif arguments.featurestype == "permission": 67 | Xtr.append(v[6:10]) 68 | else: 69 | Xtr.append(v[10:]) 70 | # Label 71 | if x in malTraining: 72 | ytr.append(1) 73 | else: 74 | ytr.append(0) 75 | 76 | # Load the feature vectors (Test) 77 | for x in malTest + goodTest: 78 | v = loadNumericalFeatures(x) 79 | if len(v) > 0: 80 | # Vector 81 | if arguments.featurestype == "all": 82 | Xte.append(v) 83 | elif arguments.featurestype == "basic": 84 | Xte.append(v[:6]) 85 | elif arguments.featurestype == "permission": 86 | Xte.append(v[6:10]) 87 | else: 88 | Xte.append(v[10:]) 89 | # Label 90 | if x in malTest: 91 | yte.append(1) 92 | else: 93 | yte.append(0) 94 | 95 | 96 | # 3. Perform the classification 97 | metricsDict, metricsDict_test = {}, {} 98 | resultsFile = open("results_static_%s_%s_run%s.txt" % (arguments.datasetname, arguments.featurestype, arguments.runnumber), "w") 99 | prettyPrint("Ensemble mode classification: K-NN, SVM, and Random Forests using %s features" % arguments.featurestype) 100 | # Classifying using K-nearest neighbors 101 | K = [10, 25, 50, 100, 250, 500] 102 | for k in K: 103 | prettyPrint("Classifying using K-nearest neighbors with K=%s" % k) 104 | clf, predicted, predicted_test = ScikitLearners.predictAndTestKNN(Xtr, ytr, Xte, yte, K=k) 105 | metrics = ScikitLearners.calculateMetrics(ytr, predicted) 106 | metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test) 107 | metricsDict["KNN%s" % k] = metrics 108 | metricsDict_test["KNN%s" % k] = metrics_test 109 | 110 | # Classifying using Random Forests 111 | E = [10, 25, 50, 75, 100] 112 | for e in E: 113 | prettyPrint("Classifying using Random Forests with %s estimators" % e) 114 | clf, predicted, predicted_test = ScikitLearners.predictAndTestRandomForest(Xtr, ytr, Xte, yte, estimators=e) 115 | metrics = ScikitLearners.calculateMetrics(ytr, predicted) 116 | metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test) 117 | metricsDict["Trees%s" % e] = metrics 118 | metricsDict_test["Trees%s" % e] = metrics_test 119 | 120 | # Classifying using SVM 121 | prettyPrint("Classifying using Support vector machines") 122 | clf, predicted, predicted_test = ScikitLearners.predictAndTestSVM(Xtr, ytr, Xte, yte) 123 | metrics = ScikitLearners.calculateMetrics(ytr, predicted) 124 | metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test) 125 | metricsDict["SVM"] = metrics 126 | metricsDict_test["SVM"] = metrics_test 127 | 128 | # Now do the majority voting ensemble 129 | allCs = ["KNN-%s" % x for x in K] + ["FOREST-%s" % e for e in E] + ["SVM"] 130 | clf, predicted, predicted_test = ScikitLearners.predictAndTestEnsemble(Xtr, ytr, Xte, yte, classifiers=allCs) 131 | metrics = ScikitLearners.calculateMetrics(predicted, ytr) # Used to decide upon whether to iterate more 132 | metrics_test = ScikitLearners.calculateMetrics(predicted_test, yte) 133 | metricsDict["Ensemble"] = metrics 134 | metricsDict_test["Ensemble"] = metrics_test 135 | 136 | # Print and save results 137 | for m in metricsDict: 138 | # The average metrics for training dataset 139 | resultsFile.write("[TRAINING] Results for %s:\n" % m) 140 | resultsFile.write("%s\n" % str(metricsDict[m])) 141 | prettyPrint("Metrics using %s" % m, "output") 142 | prettyPrint("Accuracy: %s" % str(metricsDict[m]["accuracy"]), "output") 143 | prettyPrint("Recall: %s" % str(metricsDict[m]["recall"]), "output") 144 | prettyPrint("Specificity: %s" % str(metricsDict[m]["specificity"]), "output") 145 | prettyPrint("Precision: %s" % str(metricsDict[m]["precision"]), "output") 146 | prettyPrint("F1 Score: %s" % str(metricsDict[m]["f1score"]), "output") 147 | 148 | 149 | # Print and save results [FOR THE TEST DATASET] 150 | for m in metricsDict_test: 151 | resultsFile.write("[TEST] Results for %s:\n" % m) 152 | resultsFile.write("%s\n" % str(metricsDict_test[m])) 153 | # The average metrics for training dataset 154 | prettyPrint("Metrics using cross validation and %s" % m, "output") 155 | prettyPrint("Accuracy: %s" % str(metricsDict_test[m]["accuracy"]), "output") 156 | prettyPrint("Recall: %s" % str(metricsDict_test[m]["recall"]), "output") 157 | prettyPrint("Specificity: %s" % str(metricsDict_test[m]["specificity"]), "output") 158 | prettyPrint("Precision: %s" % str(metricsDict_test[m]["precision"]), "output") 159 | prettyPrint("F1 Score: %s" % str(metricsDict_test[m]["f1score"]), "output") 160 | 161 | 162 | except Exception as e: 163 | prettyPrintError(e) 164 | return False 165 | 166 | 167 | return True 168 | 169 | if __name__ == "__main__": 170 | main() 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /utils/db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.utils.data import * 4 | from Aion.utils.graphics import * 5 | from Aion.utils.misc import * 6 | 7 | import glob, sqlite3, datetime, os 8 | from datetime import datetime 9 | 10 | class AionDB(): 11 | """ 12 | A class to handle access to the Aion SQLite database 13 | """ 14 | def __init__(self, runID, runDataset): 15 | """ 16 | Initializes an object with metadata about the current run 17 | :param runID: A unique ID given to the experiment run 18 | :type runID: int 19 | :param runDataset: The dataset used during this run 20 | :type runDataset: str 21 | """ 22 | try: 23 | # Check for the existence of the Aion database 24 | dbPath = getAionDBPath() 25 | if not os.path.exists(dbPath): 26 | prettyPrint("Unable to locate the database \"%s\". A new database will be created" % dbPath, "warning") 27 | dbScriptPath = getAionDBRecovery() 28 | if not os.path.exists(dbScriptPath): 29 | prettyPrint("Unable to locate the database script file under \"%s\". Exiting" % dbScriptPath, "error") 30 | exit(1) 31 | # Connect to database 32 | self.conn = sqlite3.connect(dbPath) # Creates a DB if it does NOT exist 33 | self.conn.executescript(open(dbScriptPath).read()) 34 | else: 35 | self.conn = sqlite3.connect(dbPath) 36 | # Insert a record about the current run 37 | startTime = getTimestamp(includeDate=True) 38 | self.insert(table="run", columns=[], values=["%s" % runID, "%s" % runDataset, "%s" % startTime, "N/A", "0"]) 39 | 40 | except Exception as e: 41 | prettyPrintError(e) 42 | 43 | def close(self): 44 | """ 45 | Saves and closes the database 46 | :return: A bool depicting the success/failure of the operation 47 | """ 48 | try: 49 | if not self.save(): 50 | prettyPrint("Unable to save the current state of the database", "warning") 51 | return False 52 | except Exception as e: 53 | prettyPrintError(e) 54 | return False 55 | 56 | return True 57 | 58 | def delete(self, table, filters, cOperator="=", lOperator="AND"): 59 | """ 60 | Deletes records from a table 61 | :param table: The name of the table to be updated 62 | :type table: str 63 | :param filters: A list of conditions to use in the WHERE clause of the query 64 | :type filters: list of tuples 65 | :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.) 66 | :type cOperator: str 67 | :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR') 68 | :type lOperator: str 69 | :return: A bool depicting the success/failure of the operation 70 | """ 71 | try: 72 | # Build query 73 | query = "DELETE FROM %s" % table 74 | # Add WHERE clause, if applicable 75 | if len(filters) > 0: 76 | query += " WHERE " 77 | temp = "" 78 | for i in range(len(filters)): 79 | query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1]) 80 | if i != len(filters) - 1: 81 | query += " %s " % lOperator 82 | # Execute query 83 | if verboseON(): 84 | prettyPrint("Executing query: %s" % query, "debug") 85 | self.conn.execute(query) 86 | 87 | except Exception as e: 88 | prettyPrintError(e) 89 | return False 90 | 91 | return True 92 | 93 | def execute(self, query): 94 | """ 95 | Executes a SQL query passed as a string 96 | :param query: The SQL query to execute 97 | :type query: str 98 | :return: A cursor of the results set or None 99 | """ 100 | try: 101 | if verboseON(): 102 | prettyPrint("Executing query: %s" % query, "debug") 103 | results = self.conn.execute(query) 104 | except Exception as e: 105 | prettyPrintError(e) 106 | return None 107 | 108 | return results 109 | 110 | def insert(self, table, columns, values): 111 | """ 112 | Inserts a new record into the database 113 | :param table: The table to insert the new values in 114 | :type table: str 115 | :param values: The new values to be inserted 116 | :type values: list 117 | :return: A bool depicting the success/failure of the INSERT operation 118 | """ 119 | try: 120 | # Prepare values 121 | values = ["'%s'" % str(v) for v in values] 122 | # Build query 123 | if len(columns) > 0: 124 | query = "INSERT INTO %s (%s) VALUES (%s)" % (table, ",".join(columns), ",".join(values)) 125 | else: 126 | query = "INSERT INTO %s VALUES(%s)" % (table, ",".join(values)) 127 | # Execute query 128 | if verboseON(): 129 | prettyPrint("Executing query: %s" % query, "debug") 130 | self.conn.execute(query) 131 | 132 | except Exception as e: 133 | prettyPrintError(e) 134 | return False 135 | 136 | return True 137 | 138 | def save(self): 139 | """ 140 | Saves the current state of the database by committing the changes 141 | :return: A bool depicting the success/failure of the operation 142 | """ 143 | try: 144 | self.conn.commit() 145 | except Exception as e: 146 | prettyPrintError(e) 147 | return False 148 | 149 | return True 150 | 151 | def select(self, columns, table, filters, cOperator="=", lOperator="AND"): 152 | """ 153 | Retrieves records from the the database 154 | :param columns: The columns to select from the table 155 | :type columns: list (Default: [] = *) 156 | :param table: The table whence the data is selected 157 | :type table: str 158 | :param filters: A list of conditions to use in the WHERE clause of the query 159 | :type filters: list of tuples 160 | :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.) 161 | :type cOperator: str 162 | :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR') 163 | :type lOperator: str 164 | :return: sqlite3.Cursor of the returned rows 165 | """ 166 | try: 167 | # Build query 168 | query = "SELECT " 169 | if len(columns) < 1: 170 | query += "*" 171 | else: 172 | query += ",".join(columns) 173 | # FROM [table] 174 | query += " FROM %s" % table 175 | # Add WHERE clause, if applicable 176 | if len(filters) > 0: 177 | query += " WHERE " 178 | temp = "" 179 | for i in range(len(filters)): 180 | query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1]) 181 | if i != len(filters) - 1: 182 | query += " %s " % lOperator 183 | # Execute query 184 | if verboseON(): 185 | prettyPrint("Executing query: %s" % query, "debug") 186 | cursor = self.conn.execute(query) 187 | 188 | except Exception as e: 189 | prettyPrintError(e) 190 | return None 191 | 192 | return cursor 193 | 194 | def update(self, table, values, filters, cOperator="=", lOperator="AND"): 195 | """ 196 | Updates records in the database 197 | :param table: The name of the table to be updated 198 | :type table: str 199 | :param values: The list of columns to be updated along with their new values 200 | :type values: list of tuples 201 | :param filters: A list of conditions to use in the WHERE clause of the query 202 | :type filters: list of tuples 203 | :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.) 204 | :type cOperator: str 205 | :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR') 206 | :type lOperator: str 207 | :return: A bool depicting the success/failure of the operation 208 | """ 209 | try: 210 | # Build query 211 | query = "UPDATE %s SET " % table 212 | # Add the columns to be updated and their values 213 | for v in values: 214 | query = query + "%s='%s'," % (v[0], v[1]) 215 | query = query[:-1] # Remove the trailing comma 216 | # Add WHERE clause, if applicable 217 | if len(filters) > 0: 218 | query += " WHERE " 219 | temp = "" 220 | for i in range(len(filters)): 221 | query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1]) 222 | if i != len(filters) - 1: 223 | query += " %s " % lOperator 224 | # Execute query 225 | if verboseON(): 226 | prettyPrint("Executing query: %s" % query, "debug") 227 | self.conn.execute(query) 228 | 229 | except Exception as e: 230 | prettyPrintError(e) 231 | return False 232 | 233 | return True 234 | 235 | -------------------------------------------------------------------------------- /shared/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | keyEvents = ["KEYCODE_UNKNOWN", "KEYCODE_MENU", "KEYCODE_SOFT_RIGHT", "KEYCODE_HOME", "KEYCODE_BACK", "KEYCODE_CALL", "KEYCODE_ENDCALL", "KEYCODE_0", "KEYCODE_1", "KEYCODE_2", "KEYCODE_3", "KEYCODE_4", "KEYCODE_5", "KEYCODE_6", "KEYCODE_7", "KEYCODE_8", "KEYCODE_9", "KEYCODE_STAR", "KEYCODE_POUND", "KEYCODE_DPAD_UP", "KEYCODE_DPAD_DOWN", "KEYCODE_DPAD_LEFT", "KEYCODE_DPAD_RIGHT", "KEYCODE_DPAD_CENTER", "KEYCODE_VOLUME_UP", "KEYCODE_VOLUME_DOWN", "KEYCODE_POWER", "KEYCODE_CAMERA", "KEYCODE_CLEAR", "KEYCODE_A", "KEYCODE_B", "KEYCODE_C", "KEYCODE_D", "KEYCODE_E", "KEYCODE_F", "KEYCODE_G", "KEYCODE_H", "KEYCODE_I", "KEYCODE_J", "KEYCODE_K", "KEYCODE_L", "KEYCODE_M", "KEYCODE_N", "KEYCODE_O", "KEYCODE_P", "KEYCODE_Q", "KEYCODE_R", "KEYCODE_S", "KEYCODE_T", "KEYCODE_U", "KEYCODE_V", "KEYCODE_W", "KEYCODE_X", "KEYCODE_Y", "KEYCODE_Z", "KEYCODE_COMMA", "KEYCODE_PERIOD", "KEYCODE_ALT_LEFT", "KEYCODE_ALT_RIGHT", "KEYCODE_SHIFT_LEFT", "KEYCODE_SHIFT_RIGHT", "KEYCODE_TAB", "KEYCODE_SPACE", "KEYCODE_SYM", "KEYCODE_EXPLORER", "KEYCODE_ENVELOPE", "KEYCODE_ENTER", "KEYCODE_DEL", "KEYCODE_GRAVE", "KEYCODE_MINUS", "KEYCODE_EQUALS", "KEYCODE_LEFT_BRACKET", "KEYCODE_RIGHT_BRACKET", "KEYCODE_BACKSLASH", "KEYCODE_SEMICOLON", "KEYCODE_APOSTROPHE", "KEYCODE_SLASH", "KEYCODE_AT", "KEYCODE_NUM", "KEYCODE_HEADSETHOOK", "KEYCODE_FOCUS", "KEYCODE_PLUS", "KEYCODE_MENU", "KEYCODE_NOTIFICATION", "KEYCODE_SEARCH", "TAG_LAST_KEYCODE"] 4 | 5 | keyEventTypes = ["DOWN", "UP", "DOWN_AND_UP"] 6 | 7 | activityActions = ["touch", "type", "press", "drag"] 8 | 9 | sensitiveAPICalls = {"android.content.ContextWrapper": ["bindService", "deleteDatabase", "deleteFile", "deleteSharedPreferences", "getSystemService", "openFileInput", "startService", "stopService", "unbindService", "unregisterReceiver"], "android.accounts.AccountManager": ["clearPassword", "getAccounts", "getPassword", "peekAuthToken", "setAuthToken", "setPassword"], "android.app.Activity": ["startActivity", "setContentView", "setVisible", "takeKeyEvents"], "android.app.DownloadManager": ["addCompletedDownload", "enqueue", "getUriForDownloadedFile", "openDownloadedFile", "query"], "android.app.IntentService": ["onStartCommand"], "android.content.ContentResolver": ["insert", "openFileDescriptor", "query", "update"], "android.content.pm.PackageInstaller": ["uninstall"], "android.database.sqlite.SQLiteDatabase": ["execSQL", "insert", "insertOrThrow", "openDatabase", "query", "rawQuery", "replace", "update"], "android.hardware.Camera": ["open", "reconnect", "release", "startPreview", "stopPreview", "takePicture"], "android.hardware.display.DisplayManager": ["getDisplay", "getDisplays"], "android.location.Location": ["getLatitude", "getLongitude"], "android.media.AudioRecord": ["read", "startRecording", "stop"], "android.media.MediaRecorder": ["prepare", "setCamera", "start", "stop"], "android.net.Network": ["bindSocket", "openConnection"], "android.net.NetworkInfo": ["isAvailable", "isConnected", "isRoaming"], "android.net.wifi.WifiInfo": ["getMacAddress", "getSSID"], "android.net.wifi.WifiManager": ["disconnect", "getScanResults", "getWifiState", "reconnect", "startScan"], "android.os.Process": ["killProcess"], "android.os.PowerManager": ["isInteractive", "isScreenOn", "reboot"], "android.telephony.SmsManager": ["sendDataMessage", "sendTextMessage"], "android.widget.Toast": ["makeText"], "dalvik.system.DexClassLoader": ["loadClass"], "dalvik.system.PathClassLoader": ["loadClass"], "java.lang.class": ["forName", "getClassLoader", "getClasses", "getField", "getFields", "getMethods", "getMethod", "getName"], "java.lang.reflect.Method": ["invoke"], "java.net.HttpCookie": ["getName", "getPath", "getSecure", "getValue", "parse", "setPath", "setSecure", "setValue"], "java.net.URL.openConnection": ["openConnection", "openStream"]} 10 | 11 | droidmonDefaultClasses = [u'android.telephony.TelephonyManager', u'android.net.wifi.WifiInfo', u'android.os.Debug', u'android.app.SharedPreferencesImpl$EditorImpl', u'android.content.ContentValues', u'java.net.URL', u'org.apache.http.impl.client.AbstractHttpClient', u'android.app.ContextImpl', u'android.app.ActivityThread', u'android.app.Activity', u'dalvik.system.BaseDexClassLoader', u'dalvik.system.DexFile', u'dalvik.system.DexClassLoader', u'dalvik.system.PathClassLoader', u'java.lang.reflect.Method', u'javax.crypto.spec.SecretKeySpec', u'javax.crypto.Cipher', u'javax.crypto.Mac', u'android.app.ApplicationPackageManager', u'android.app.NotificationManager', u'android.util.Base64', u'android.net.ConnectivityManager', u'android.content.BroadcastReceiver', u'android.telephony.SmsManager', u'java.lang.Runtime', u'java.lang.ProcessBuilder', u'java.io.FileOutputStream', u'java.io.FileInputStream', u'android.app.ActivityManager', u'android.os.Process', u'android.content.ContentResolver', u'android.accounts.AccountManager', u'android.location.Location', u'android.media.AudioRecord', u'android.media.MediaRecorder', u'android.os.SystemProperties', u'libcore.io.IoBridge'] 12 | 13 | droidmonDefaultMethods = [u'getDeviceId', u'getSubscriberId', u'getLine1Number', u'getNetworkOperator', u'getNetworkOperatorName', u'getSimOperatorName', u'getMacAddress', u'getSimCountryIso', u'getSimSerialNumber', u'getNetworkCountryIso', u'getDeviceSoftwareVersion', u'isDebuggerConnected', u'putString', u'putBoolean', u'putInt', u'putLong', u'putFloat', u'put', u'openConnection', u'execute', u'registerReceiver', u'handleReceiver', u'startActivity', u'findResource', u'findLibrary', u'loadDex',u'findResources', u'loadClass', u'invoke', u'doFinal', u'setComponentEnabledSetting', u'notify', u'decode', u'listen', u'encode', u'encodeToString', u'setMobileDataEnabled', u'abortBroadcast', u'sendTextMessage', u'sendMultipartTextMessage', u'exec', u'start', u'write', u'read', u'killBackgroundProcesses', u'killProcess', u'query', u'registerContentObserver', u'insert', u'getAccountsByType', u'getAccounts', u'getLatitude', u'getLongitude', u'delete', u'startRecording', u'get', u'getInstalledPackages', u'open'] 14 | 15 | droidmonDefaultAPIs = ['android.accounts.AccountManager.getAccounts', 'android.accounts.AccountManager.getAccountsByType', 'android.app.Activity.startActivity', 'android.app.ActivityManager.killBackgroundProcesses', 'android.app.ActivityThread.handleReceiver', 'android.app.ApplicationPackageManager.getInstalledPackages', 'android.app.ApplicationPackageManager.setComponentEnabledSetting', 'android.app.ContextImpl.registerReceiver', 'android.app.NotificationManager.notify', 'android.app.SharedPreferencesImpl$EditorImpl.putBoolean', 'android.app.SharedPreferencesImpl$EditorImpl.putFloat', 'android.app.SharedPreferencesImpl$EditorImpl.putInt', 'android.app.SharedPreferencesImpl$EditorImpl.putLong', 'android.app.SharedPreferencesImpl$EditorImpl.putString', 'android.content.BroadcastReceiver.abortBroadcast', 'android.content.ContentResolver.delete', 'android.content.ContentResolver.insert', 'android.content.ContentResolver.query', 'android.content.ContentResolver.registerContentObserver', 'android.content.ContentValues.put', 'android.location.Location.getLatitude', 'android.location.Location.getLongitude', 'android.media.AudioRecord.startRecording', 'android.media.MediaRecorder.start', 'android.net.ConnectivityManager.setMobileDataEnabled', 'android.net.wifi.WifiInfo.getMacAddress', 'android.os.Debug.isDebuggerConnected', 'android.os.Process.killProcess', 'android.os.SystemProperties.get', 'android.telephony.SmsManager.sendMultipartTextMessage', 'android.telephony.SmsManager.sendTextMessage', 'android.telephony.TelephonyManager.getDeviceId', 'android.telephony.TelephonyManager.getDeviceSoftwareVersion', 'android.telephony.TelephonyManager.getLine1Number', 'android.telephony.TelephonyManager.getNetworkCountryIso', 'android.telephony.TelephonyManager.getNetworkOperator', 'android.telephony.TelephonyManager.getNetworkOperatorName', 'android.telephony.TelephonyManager.getSimCountryIso', 'android.telephony.TelephonyManager.getSimOperatorName', 'android.telephony.TelephonyManager.getSimSerialNumber', 'android.telephony.TelephonyManager.getSubscriberId', 'android.telephony.TelephonyManager.listen', 'android.util.Base64.decode', 'android.util.Base64.encode', 'android.util.Base64.encodeToString', 'dalvik.system.BaseDexClassLoader.findLibrary', 'dalvik.system.BaseDexClassLoader.findResource', 'dalvik.system.BaseDexClassLoader.findResources', 'dalvik.system.DexFile.loadClass', 'dalvik.system.DexFile.loadDex', 'java.io.FileInputStream.read', 'java.io.FileOutputStream.write', 'java.lang.ProcessBuilder.start', 'java.lang.Runtime.exec', 'java.lang.reflect.Method.invoke', 'java.net.URL.openConnection', 'javax.crypto.Cipher.doFinal', 'javax.crypto.Mac.doFinal', 'libcore.io.IoBridge.open', 'org.apache.http.impl.client.AbstractHttpClient.execute'] 16 | 17 | droidmonDefaultHooks = {u'android.accounts.AccountManager': [u'getAccountsByType', u'getAccounts'], u'android.app.Activity': [u'startActivity'], u'android.app.ActivityManager': [u'killBackgroundProcesses'], u'android.app.ActivityThread': [u'handleReceiver'], u'android.app.ApplicationPackageManager': [u'setComponentEnabledSetting', u'getInstalledPackages'], u'android.app.ContextImpl': [u'registerReceiver'], u'android.app.NotificationManager': [u'notify'], u'android.app.SharedPreferencesImpl$EditorImpl': [u'putString', u'putBoolean', u'putInt', u'putLong', u'putFloat'], u'android.content.BroadcastReceiver': [u'abortBroadcast'], u'android.content.ContentResolver': [u'query', u'registerContentObserver', u'insert', u'delete'], u'android.content.ContentValues': [u'put'], u'android.location.Location': [u'getLatitude', u'getLongitude'], u'android.media.AudioRecord': [u'startRecording'], u'android.media.MediaRecorder': [u'start'], u'android.net.ConnectivityManager': [u'setMobileDataEnabled'], u'android.net.wifi.WifiInfo': [u'getMacAddress'], u'android.os.Debug': [u'isDebuggerConnected'], u'android.os.Process': [u'killProcess'], u'android.os.SystemProperties': [u'get'], u'android.telephony.SmsManager': [u'sendTextMessage', u'sendMultipartTextMessage'], u'android.telephony.TelephonyManager': [u'getDeviceId', u'getSubscriberId', u'getLine1Number', u'getNetworkOperator', u'getNetworkOperatorName', u'getSimOperatorName', u'getSimCountryIso', u'getSimSerialNumber', u'getNetworkCountryIso', u'getDeviceSoftwareVersion', u'listen'], u'android.util.Base64': [u'decode', u'encode', u'encodeToString'], u'dalvik.system.BaseDexClassLoader': [u'findResource', u'findLibrary', u'findResources'], u'dalvik.system.DexClassLoader': [], u'dalvik.system.DexFile': [u'loadDex', u'loadClass'], u'dalvik.system.PathClassLoader': [], u'java.io.FileInputStream': [u'read'], u'java.io.FileOutputStream': [u'write'], u'java.lang.ProcessBuilder': [u'start'], u'java.lang.Runtime': [u'exec'], u'java.lang.reflect.Method': [u'invoke'], u'java.net.URL': [u'openConnection'], u'javax.crypto.Cipher': [u'doFinal'], u'javax.crypto.Mac': [u'doFinal'], u'javax.crypto.spec.SecretKeySpec': [], u'libcore.io.IoBridge': [u'open'], u'org.apache.http.impl.client.AbstractHttpClient': [u'execute']} 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/hooks.json: -------------------------------------------------------------------------------- 1 | { 2 | "hookConfigs": [ 3 | { 4 | "class_name": "android.telephony.TelephonyManager", 5 | "method": "getDeviceId", 6 | "thisObject": false, 7 | "type": "fingerprint" 8 | }, 9 | { 10 | "class_name": "android.telephony.TelephonyManager", 11 | "method": "getSubscriberId", 12 | "thisObject": false, 13 | "type": "fingerprint" 14 | }, 15 | { 16 | "class_name": "android.telephony.TelephonyManager", 17 | "method": "getLine1Number", 18 | "thisObject": false, 19 | "type": "fingerprint" 20 | }, 21 | { 22 | "class_name": "android.telephony.TelephonyManager", 23 | "method": "getNetworkOperator", 24 | "thisObject": false, 25 | "type": "fingerprint" 26 | }, 27 | { 28 | "class_name": "android.telephony.TelephonyManager", 29 | "method": "getNetworkOperatorName", 30 | "thisObject": false, 31 | "type": "fingerprint" 32 | }, 33 | { 34 | "class_name": "android.telephony.TelephonyManager", 35 | "method": "getSimOperatorName", 36 | "thisObject": false, 37 | "type": "fingerprint" 38 | }, 39 | { 40 | "class_name": "android.net.wifi.WifiInfo", 41 | "method": "getMacAddress", 42 | "thisObject": false, 43 | "type": "fingerprint" 44 | }, 45 | { 46 | "class_name": "android.telephony.TelephonyManager", 47 | "method": "getSimCountryIso", 48 | "thisObject": false, 49 | "type": "fingerprint" 50 | }, 51 | { 52 | "class_name": "android.telephony.TelephonyManager", 53 | "method": "getSimSerialNumber", 54 | "thisObject": false, 55 | "type": "fingerprint" 56 | }, 57 | { 58 | "class_name": "android.telephony.TelephonyManager", 59 | "method": "getNetworkCountryIso", 60 | "thisObject": false, 61 | "type": "fingerprint" 62 | }, 63 | { 64 | "class_name": "android.telephony.TelephonyManager", 65 | "method": "getDeviceSoftwareVersion", 66 | "thisObject": false, 67 | "type": "fingerprint" 68 | }, 69 | { 70 | "class_name": "android.os.Debug", 71 | "method": "isDebuggerConnected", 72 | "thisObject": false, 73 | "type": "fingerprint" 74 | }, 75 | { 76 | "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 77 | "method": "putString", 78 | "thisObject": false, 79 | "type": "globals" 80 | }, 81 | { 82 | "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 83 | "method": "putBoolean", 84 | "thisObject": false, 85 | "type": "globals" 86 | }, 87 | { 88 | "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 89 | "method": "putInt", 90 | "thisObject": false, 91 | "type": "globals" 92 | }, 93 | { 94 | "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 95 | "method": "putLong", 96 | "thisObject": false, 97 | "type": "globals" 98 | }, 99 | { 100 | "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 101 | "method": "putFloat", 102 | "thisObject": false, 103 | "type": "globals" 104 | }, 105 | { 106 | "class_name": "android.content.ContentValues", 107 | "method": "put", 108 | "thisObject": false, 109 | "type": "globals" 110 | }, 111 | { 112 | "class_name": "java.net.URL", 113 | "method": "openConnection", 114 | "thisObject": true, 115 | "type": "network" 116 | }, 117 | { 118 | "class_name": "org.apache.http.impl.client.AbstractHttpClient", 119 | "method": "execute", 120 | "thisObject": false, 121 | "type": "network" 122 | }, 123 | { 124 | "class_name": "android.app.ContextImpl", 125 | "method": "registerReceiver", 126 | "thisObject": false, 127 | "type": "binder" 128 | }, 129 | { 130 | "class_name": "android.app.ActivityThread", 131 | "method": "handleReceiver", 132 | "thisObject": false, 133 | "type": "binder" 134 | }, 135 | { 136 | "class_name": "android.app.Activity", 137 | "method": "startActivity", 138 | "thisObject": false, 139 | "type": "binder" 140 | }, 141 | { 142 | "class_name": "dalvik.system.BaseDexClassLoader", 143 | "method": "findResource", 144 | "thisObject": false, 145 | "type": "dex" 146 | }, 147 | { 148 | "class_name": "dalvik.system.BaseDexClassLoader", 149 | "method": "findLibrary", 150 | "thisObject": false, 151 | "type": "dex" 152 | }, 153 | { 154 | "class_name": "dalvik.system.DexFile", 155 | "method": "loadDex", 156 | "thisObject": false, 157 | "type": "dex" 158 | }, 159 | { 160 | "class_name": "dalvik.system.DexClassLoader", 161 | "method": null, 162 | "thisObject": false, 163 | "type": "dex" 164 | }, 165 | { 166 | "class_name": "dalvik.system.BaseDexClassLoader", 167 | "method": "findResources", 168 | "thisObject": false, 169 | "type": "dex" 170 | }, 171 | { 172 | "class_name": "dalvik.system.DexFile", 173 | "method": "loadClass", 174 | "thisObject": false, 175 | "type": "dex" 176 | }, 177 | { 178 | "class_name": "dalvik.system.DexFile", 179 | "method": null, 180 | "thisObject": false, 181 | "type": "dex" 182 | }, 183 | { 184 | "class_name": "dalvik.system.PathClassLoader", 185 | "method": null, 186 | "thisObject": false, 187 | "type": "dex" 188 | }, 189 | { 190 | "class_name": "java.lang.reflect.Method", 191 | "method": "invoke", 192 | "thisObject": false, 193 | "type": "reflection" 194 | }, 195 | { 196 | "class_name": "javax.crypto.spec.SecretKeySpec", 197 | "method": null, 198 | "thisObject": false, 199 | "type": "crypto" 200 | }, 201 | { 202 | "class_name": "javax.crypto.Cipher", 203 | "method": "doFinal", 204 | "thisObject": true, 205 | "type": "crypto" 206 | }, 207 | { 208 | "class_name": "javax.crypto.Mac", 209 | "method": "doFinal", 210 | "thisObject": false, 211 | "type": "crypto" 212 | }, 213 | { 214 | "class_name": "android.app.ApplicationPackageManager", 215 | "method": "setComponentEnabledSetting", 216 | "thisObject": false, 217 | "type": "generic" 218 | }, 219 | { 220 | "class_name": "android.app.NotificationManager", 221 | "method": "notify", 222 | "thisObject": false, 223 | "type": "generic" 224 | }, 225 | { 226 | "class_name": "android.util.Base64", 227 | "method": "decode", 228 | "thisObject": false, 229 | "type": "generic" 230 | }, 231 | { 232 | "class_name": "android.telephony.TelephonyManager", 233 | "method": "listen", 234 | "thisObject": false, 235 | "type": "generic" 236 | }, 237 | { 238 | "class_name": "android.util.Base64", 239 | "method": "encode", 240 | "thisObject": false, 241 | "type": "generic" 242 | }, 243 | { 244 | "class_name": "android.util.Base64", 245 | "method": "encodeToString", 246 | "thisObject": false, 247 | "type": "generic" 248 | }, 249 | { 250 | "class_name": "android.net.ConnectivityManager", 251 | "method": "setMobileDataEnabled", 252 | "thisObject": false, 253 | "type": "generic" 254 | }, 255 | { 256 | "class_name": "android.content.BroadcastReceiver", 257 | "method": "abortBroadcast", 258 | "thisObject": false, 259 | "type": "generic" 260 | }, 261 | { 262 | "class_name": "android.telephony.SmsManager", 263 | "method": "sendTextMessage", 264 | "thisObject": false, 265 | "type": "sms" 266 | }, 267 | { 268 | "class_name": "android.telephony.SmsManager", 269 | "method": "sendMultipartTextMessage", 270 | "thisObject": false, 271 | "type": "sms" 272 | }, 273 | { 274 | "class_name": "java.lang.Runtime", 275 | "method": "exec", 276 | "thisObject": false, 277 | "type": "runtime" 278 | }, 279 | { 280 | "class_name": "java.lang.ProcessBuilder", 281 | "method": "start", 282 | "thisObject": true, 283 | "type": "runtime" 284 | }, 285 | { 286 | "class_name": "java.io.FileOutputStream", 287 | "method": "write", 288 | "thisObject": false, 289 | "type": "runtime" 290 | }, 291 | { 292 | "class_name": "java.io.FileInputStream", 293 | "method": "read", 294 | "thisObject": false, 295 | "type": "runtime" 296 | }, 297 | { 298 | "class_name": "android.app.ActivityManager", 299 | "method": "killBackgroundProcesses", 300 | "thisObject": false, 301 | "type": "runtime" 302 | }, 303 | { 304 | "class_name": "android.os.Process", 305 | "method": "killProcess", 306 | "thisObject": false, 307 | "type": "runtime" 308 | }, 309 | { 310 | "class_name": "android.content.ContentResolver", 311 | "method": "query", 312 | "thisObject": false, 313 | "type": "content" 314 | }, 315 | { 316 | "class_name": "android.content.ContentResolver", 317 | "method": "registerContentObserver", 318 | "thisObject": false, 319 | "type": "content" 320 | }, 321 | { 322 | "class_name": "android.content.ContentResolver", 323 | "method": "insert", 324 | "thisObject": false, 325 | "type": "content" 326 | }, 327 | { 328 | "class_name": "android.accounts.AccountManager", 329 | "method": "getAccountsByType", 330 | "thisObject": false, 331 | "type": "content" 332 | }, 333 | { 334 | "class_name": "android.accounts.AccountManager", 335 | "method": "getAccounts", 336 | "thisObject": false, 337 | "type": "content" 338 | }, 339 | { 340 | "class_name": "android.location.Location", 341 | "method": "getLatitude", 342 | "thisObject": false, 343 | "type": "content" 344 | }, 345 | { 346 | "class_name": "android.location.Location", 347 | "method": "getLongitude", 348 | "thisObject": false, 349 | "type": "content" 350 | }, 351 | { 352 | "class_name": "android.content.ContentResolver", 353 | "method": "delete", 354 | "thisObject": false, 355 | "type": "content" 356 | }, 357 | { 358 | "class_name": "android.media.AudioRecord", 359 | "method": "startRecording", 360 | "thisObject": false, 361 | "type": "content" 362 | }, 363 | { 364 | "class_name": "android.media.MediaRecorder", 365 | "method": "start", 366 | "thisObject": false, 367 | "type": "content" 368 | }, 369 | { 370 | "class_name": "android.os.SystemProperties", 371 | "method": "get", 372 | "thisObject": false, 373 | "type": "content" 374 | }, 375 | { 376 | "class_name": "android.app.ApplicationPackageManager", 377 | "method": "getInstalledPackages", 378 | "thisObject": false, 379 | "type": "content" 380 | }, 381 | { 382 | "class_name": "libcore.io.IoBridge", 383 | "method": "open", 384 | "thisObject": false, 385 | "type": "file" 386 | } 387 | ], 388 | "trace": false 389 | } -------------------------------------------------------------------------------- /data_inference/extraction/featureExtraction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.shared.constants import * 4 | from Aion.utils.data import * 5 | from Aion.utils.graphics import * 6 | from Aion.conf.config import * 7 | 8 | from androguard.session import Session 9 | import numpy 10 | 11 | import os, json, threading, re 12 | 13 | def returnEmptyFeatures(): 14 | """ 15 | A dummy function used by timers to return empty feature vectors (lists) 16 | """ 17 | prettyPrint("Analysis timeout. Returning empty feature vector", "warning") 18 | return [] 19 | 20 | def extractStaticFeatures(apkPath): 21 | """Extracts static numerical features from APK using Androguard""" 22 | try: 23 | features = [[], [], [], []] # Tuples are immutable 24 | if os.path.exists(apkPath.replace(".apk",".static")): 25 | prettyPrint("Found a pre-computed static features file") 26 | bFeatures, pFeatures, aFeatures, allFeatures = [], [], [], [] 27 | try: 28 | possibleExtensions = [".basic", ".perm", ".api", ".static"] 29 | for ext in possibleExtensions: 30 | if os.path.exists(apkPath.replace(".apk", ext)): 31 | content = open(apkPath.replace(".apk", ext)).read() 32 | if len(content) > 0: 33 | features[possibleExtensions.index(ext)] = [float(f) for f in content[1:-1].split(',') if len(f) > 0] 34 | 35 | return tuple(features) 36 | 37 | except Exception as e: 38 | prettyPrintError(e) 39 | prettyPrint("Could not extract features from \".static\" file. Continuing as usual", "warning") 40 | if verboseON(): 41 | prettyPrint("Starting analysis on \"%s\"" % apkPath, "debug") 42 | analysisSession = Session() 43 | if not os.path.exists(apkPath): 44 | prettyPrint("Could not find the APK file \"%s\"" % apkPath, "warning") 45 | return [], [], [], [] 46 | # 1. Analyze APK and retrieve its components 47 | #t = threading.Timer(300.0, returnEmptyFeatures) # Guarantees not being stuck on analyzing an APK 48 | #t.start() 49 | analysisSession.add(apkPath, open(apkPath).read()) 50 | if type(analysisSession.analyzed_apk.values()) == list: 51 | apk = analysisSession.analyzed_apk.values()[0][0] 52 | else: 53 | apk = analysisSession.analyzed_apk.values()[0] 54 | dex = analysisSession.analyzed_dex.values()[0][0] 55 | vm = analysisSession.analyzed_dex.values()[0][1] 56 | # 2. Add features to the features vector 57 | basicFeatures, permissionFeatures, apiCallFeatures, allFeatures = [], [], [], [] 58 | # 2.a. The APK-related features 59 | if verboseON(): 60 | prettyPrint("Extracting basic features", "debug") 61 | minSDKVersion = 0.0 if not apk.get_min_sdk_version() else float(apk.get_min_sdk_version()) 62 | maxSDKVersion = 0.0 if not apk.get_max_sdk_version() else float(apk.get_max_sdk_version()) 63 | basicFeatures.append(minSDKVersion) 64 | basicFeatures.append(maxSDKVersion) 65 | basicFeatures.append(float(len(apk.get_activities()))) # No. of activities 66 | basicFeatures.append(float(len(apk.get_services()))) # No. of services 67 | basicFeatures.append(float(len(apk.get_receivers()))) # No. of broadcast receivers 68 | basicFeatures.append(float(len(apk.get_providers()))) # No. of providers 69 | # 2.b. Harvest permission-related features 70 | if verboseON(): 71 | prettyPrint("Extracting permissions-related features", "debug") 72 | aospPermissions = float(len(apk.get_requested_aosp_permissions())) # Android permissions requested by the app 73 | declaredPermissions = float(len(apk.get_declared_permissions())) # Custom permissions declared by the app 74 | dangerousPermissions = float(len([p for p in apk.get_requested_aosp_permissions_details().values() if p["protectionLevel"] == "dangerous"])) 75 | totalPermissions = float(len(apk.get_permissions())) 76 | permissionFeatures.append(totalPermissions) # No. of permissions 77 | if totalPermissions > 0: 78 | permissionFeatures.append(aospPermissions/totalPermissions) # AOSP permissions : Total permissions 79 | permissionFeatures.append(declaredPermissions/totalPermissions) # Third-party permissions : Total permissions 80 | permissionFeatures.append(dangerousPermissions/totalPermissions) # Dangerous permissions : Total permissions 81 | else: 82 | permissionFeatures.append(0.0) 83 | permissionFeatures.append(0.0) 84 | permissionFeatures.append(0.0) 85 | # 2.c. The DEX-related features (API calls) 86 | if verboseON(): 87 | prettyPrint("Extracting API calls from dex code", "debug") 88 | apiCallFeatures.append(float(len(dex.get_classes()))) # Total number of classes 89 | apiCallFeatures.append(float(len(dex.get_strings()))) # Total number of strings 90 | apiCategories = sensitiveAPICalls.keys() 91 | apiCategoryCount = [0.0] * len(apiCategories) 92 | for c in dex.classes.get_names(): 93 | currentClass = dex.get_class(c) 94 | if not currentClass: 95 | continue 96 | code = currentClass.get_source() 97 | if len(code) < 1: 98 | continue 99 | for category in apiCategories: 100 | if code.find(category) != -1: 101 | for call in sensitiveAPICalls[category]: 102 | apiCategoryCount[apiCategories.index(category)] += float(len(re.findall(call, code))) 103 | 104 | apiCallFeatures += apiCategoryCount 105 | 106 | except Exception as e: 107 | prettyPrintError(e) 108 | return [], [], [], [] 109 | 110 | allFeatures = basicFeatures + permissionFeatures + apiCallFeatures 111 | 112 | return basicFeatures, permissionFeatures, apiCallFeatures, allFeatures 113 | 114 | 115 | def extractIntrospyFeatures(apkJSONPath): 116 | """Extracts dynamic features from a JSON-based trace generated by Introspy""" 117 | try: 118 | features = [] 119 | if not os.path.exists(apkJSONPath): 120 | prettyPrint("Could not find the JSON file \"%s\"" % apkJSONPath, "warning") 121 | else: 122 | apkJSON = json.loads(open(apkJSONPath).read()) 123 | cryptoCalls, sslCalls, hashCalls = 0.0, 0.0, 0.0 # Crypto group 124 | fsCalls, prefCalls, uriCalls = 0.0, 0.0, 0.0 # Storage group 125 | ipcCalls = 0.0 # Ipc group 126 | webviewCalls = 0.0 # Misc group 127 | accountManagerCalls, activityCalls, downloadManagerCalls = 0.0, 0.0, 0.0 128 | contentResolverCalls, contextWrapperCalls, packageInstallerCalls = 0.0, 0.0, 0.0 129 | sqliteCalls, cameraCalls, displayManagerCalls, locationCalls = 0.0, 0.0, 0.0, 0.0 130 | audioRecordCalls, mediaRecorderCalls, networkCalls, wifiManagerCalls = 0.0, 0.0, 0.0, 0.0 131 | powerManagerCalls, smsManagerCalls, toastCalls, classCalls = 0.0, 0.0, 0.0, 0.0 132 | httpCookieCalls, urlCalls = 0.0, 0.0 133 | for call in apkJSON["calls"]: 134 | group, subgroup = call["group"], call["subgroup"] 135 | if group == "Crypto": 136 | cryptoCalls = cryptoCalls + 1 if subgroup == "General crypto" else cryptoCalls 137 | hashCalls = hashCalls + 1 if subgroup == "Hash" else hashCalls 138 | sslCalls = sslCalls + 1 if subgroup == "Ssl" else sslCalls 139 | elif group == "Storage": 140 | fsCalls = storageCalls + 1 if call["group"] == "Fs" else fsCalls 141 | prefCalls = prefCalls + 1 if call["group"] == "Pref" else prefCalls 142 | uriCalls = uriCalls + 1 if call["group"] == "Uri" else uriCalls 143 | elif group == "Ipc": 144 | ipcCalls = ipcCalls + 1 if call["group"] == "Ipc" else ipcCalls 145 | elif group == "Misc": 146 | webviewCalls = webviewCalls + 1 if call["group"] == "Webview" else webviewCalls 147 | elif group.lower().find("custom") != -1: 148 | # Handle custom hooks 149 | # android.accounts.AccountManager 150 | if call["clazz"] == "android.accounts.AccountManager": 151 | accountManagerCalls += 1 152 | # android.app.Activity 153 | elif call["clazz"] == "android.app.Activity": 154 | activityCalls += 1 155 | # android.app.DownloadManager 156 | elif call["clazz"] == "android.app.DownloadManager": 157 | downloadManagerCalls += 1 158 | # android.content.ContentResolver 159 | elif call["clazz"] == "android.content.ContentResolver": 160 | contentResolverCalls += 1 161 | # android.content.ContextWrapper 162 | elif call["clazz"] == "android.content.ContextWrapper": 163 | contextWrapperCalls += 1 164 | # android.content.pm.PackageInstaller 165 | elif call["clazz"] == "android.content.pm.PackageInstaller": 166 | packageInstallerCalls += 1 167 | # android.database.sqlite.SQLiteDatabase 168 | elif call["clazz"] == "android.database.sqlite.SQLiteDatabase": 169 | sqliteCalls += 1 170 | # android.hardware.Camera 171 | elif call["clazz"] == "android.hardware.Camera": 172 | cameraCalls += 1 173 | # android.hardware.display.DisplayManager 174 | elif call["clazz"] == "android.hardware.display.DisplayManager": 175 | displayManagerCalls += 1 176 | # android.location.Location 177 | elif call["clazz"] == "android.location.Location": 178 | locationCalls += 1 179 | # android.media.AudioRecord 180 | elif call["clazz"] == "android.media.AudioRecord": 181 | audioRecordCalls += 1 182 | # android.media.MediaRecorder 183 | elif call["clazz"] == "android.media.MediaRecorder": 184 | mediaRecorderCalls += 1 185 | # android.net.Network 186 | elif call["clazz"] == "android.net.Network": 187 | networkCalls += 1 188 | # android.net.wifi.WifiManager 189 | elif call["clazz"] == "android.net.wifi.WifiManager": 190 | wifiManagerCalls += 1 191 | # android.os.PowerManager 192 | elif call["clazz"] == "android.os.PowerManager": 193 | powerManagerCalls += 1 194 | # android.telephony.SmsManager 195 | elif call["clazz"] == "android.telephony.SmsManager": 196 | smsManagerCalls += 1 197 | # android.widget.Toast 198 | elif call["clazz"] == "android.widget.Toast": 199 | toastCalls += 1 200 | # java.lang.class 201 | elif call["clazz"] == "java.lang.class": 202 | classCalls += 1 203 | # java.net.HttpCookie 204 | elif call["clazz"] == "java.net.HttpCookie": 205 | httpCookieCalls += 1 206 | # java.net.URL 207 | elif call["clazz"] == "java.net.URL": 208 | urlCalls += 1 209 | 210 | features.append(cryptoCalls) 211 | features.append(sslCalls) 212 | features.append(hashCalls) 213 | features.append(fsCalls) 214 | features.append(prefCalls) 215 | features.append(uriCalls) 216 | features.append(ipcCalls) 217 | features.append(webviewCalls) 218 | features.append(accountManagerCalls) 219 | features.append(activityCalls) 220 | features.append(downloadManagerCalls) 221 | features.append(contentResolverCalls) 222 | features.append(contextWrapperCalls) 223 | features.append(packageInstallerCalls) 224 | features.append(sqliteCalls) 225 | features.append(cameraCalls) 226 | features.append(displayManagerCalls) 227 | features.append(locationCalls) 228 | features.append(audioRecordCalls) 229 | features.append(mediaRecorderCalls) 230 | features.append(networkCalls) 231 | features.append(wifiManagerCalls) 232 | features.append(powerManagerCalls) 233 | features.append(smsManagerCalls) 234 | features.append(toastCalls) 235 | features.append(classCalls) 236 | features.append(httpCookieCalls) 237 | features.append(urlCalls) 238 | 239 | except Exception as e: 240 | prettyPrintError(e) 241 | return [] 242 | 243 | return features 244 | 245 | def extractDroidmonFeatures(logPath, mode="classes"): 246 | """ 247 | Extracts numerical features from Droidmon-generated logs 248 | :param logPath: The path to the JSON-log generated by Droidmon 249 | :type logPath: str 250 | :param mode: The type of features to extract (i.e. classes, methods, both) 251 | :type mode: str 252 | :return: Two lists depicting the trace found in the log and counts of items it contains 253 | """ 254 | try: 255 | features = [] 256 | # Parse the droidmon log 257 | if not os.path.exists(logPath): 258 | prettyPrint("Unable to locate \"%s\"" % logPath, "warning") 259 | return [], [] 260 | lines = open(logPath).read().split('\n') 261 | if VERBOSE: 262 | prettyPrint("Successfully retrieved %s lines from log" % len(lines), "debug") 263 | droidmonLines = [l for l in lines if l.lower().find("droidmon-apimonitor-") != -1] 264 | # Generate trace from lines 265 | trace = [] 266 | for line in droidmonLines: 267 | tmp = line[line.find("{"):].replace('\n','').replace('\r','') 268 | # Extract class and method 269 | c, m = "", "" 270 | #if tmp[0] == '{' and tmp[-1] == '}': 271 | # d = eval(tmp) 272 | # c, m = d["class"], d["method"] 273 | #else: 274 | pattern = "class\":\"" 275 | index = tmp.find(pattern) 276 | c = tmp[index+len(pattern):tmp.find("\"", index+len(pattern))] 277 | pattern = "method\":\"" 278 | index = tmp.find(pattern) 279 | m = tmp[index+len(pattern):tmp.find("\"", index+len(pattern))] 280 | # Append to trace 281 | if mode == "classes": 282 | trace.append(c) 283 | elif mode == "methods": 284 | trace.append(m) 285 | elif mode == "both": 286 | trace.append("%s.%s" % (c, m)) 287 | # Go over droidmon classes and count occurrences 288 | source = [] 289 | if mode == "classes": 290 | source = droidmonDefaultClasses 291 | elif mode == "methods": 292 | source = droidmonDefaultMethods 293 | elif mode == "both": 294 | source = droidmonDefaultAPIs 295 | 296 | # The loop 297 | for i in source: 298 | features.append(trace.count(i)) 299 | 300 | except Exception as e: 301 | prettyPrintError(e) 302 | return [], [] 303 | 304 | return trace, features 305 | -------------------------------------------------------------------------------- /data_generation/stimulation/Garfield.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Python modules 4 | import sys, os, shutil, glob, io 5 | 6 | # Aion modules 7 | from Aion.utils.graphics import * 8 | from Aion.utils.data import * 9 | from Aion.shared.constants import * 10 | 11 | 12 | # Third-party libraries 13 | from androguard.session import Session 14 | from androguard.misc import AXMLPrinter 15 | 16 | class Garfield(): 17 | """ Garfield is a lazy stimulation engine based on fuzzing via Monkey(runner) and Genymotion """ 18 | 19 | def __init__(self, pathToAPK, APKType="goodware"): 20 | if not os.path.exists(pathToAPK): 21 | prettyPrint("Could not find the APK \"%s\"" % pathToAPK, "warning") 22 | return None 23 | self.APKPath = pathToAPK 24 | self.APK, self.DEX, self.VMAnalysis = None, None, None 25 | self.activitiesInfo, self.servicesInfo, self.receiversInfo = {}, {}, {} 26 | self.runnerScript = "" 27 | self.APKType = APKType 28 | 29 | def analyzeAPK(self): 30 | """ Uses androguard to retrieve metadata about the app e.g. activities, permissions, intent filters, etc. """ 31 | try: 32 | prettyPrint("Analyzing app") 33 | logEvent("Analyzing app: \"%s\"" % self.APKPath) 34 | # 1. Load the APK using androguard 35 | analysisSession = Session() 36 | analysisSession.add(self.APKPath, open(self.APKPath).read()) 37 | # 2. Retrieve handles to APK and its dex code 38 | self.APK = analysisSession.analyzed_apk.values()[0] 39 | self.DEX = analysisSession.analyzed_dex.values()[0][0] 40 | self.VMAnalysis = analysisSession.analyzed_dex.values()[0][1] 41 | # 3. Retrieve information for each activity 42 | prettyPrint("Analyzing activities") 43 | self.activitiesInfo = analyzeActivities(self.APK, self.DEX) 44 | # 4. Do the same for services and broadcast receivers 45 | prettyPrint("Analyzing services") 46 | self.servicesInfo = analyzeServices(self.APK, self.DEX) 47 | prettyPrint("Analyzing broadcast receivers") 48 | self.receiversInfo = analyzeReceivers(self.APK, self.DEX) 49 | 50 | except Exception as e: 51 | prettyPrintError(e) 52 | return False 53 | 54 | prettyPrint("Success") 55 | return True 56 | 57 | def generateRunnerScript(self, scriptPath="", runningTime=60): 58 | """Generates a python script to be run by Monkeyrunner""" 59 | try: 60 | # Check whether the APK has been analyzed first 61 | if not self.APK: 62 | prettyPrint("APK needs to be analyzed first", "warning") 63 | return False 64 | 65 | self.runnerScript = "%s/files/scripts/%s.py" % (getProjectDir(), getRandomAlphaNumeric()) if scriptPath == "" else scriptPath 66 | print self.runnerScript 67 | monkeyScript = open(self.runnerScript, "w") 68 | # Preparation 69 | monkeyScript.write("#!/usr/bin/python\n\n") 70 | monkeyScript.write("from com.android.monkeyrunner import MonkeyRunner, MonkeyDevice\n") 71 | monkeyScript.write("import time, os, random\n\n") 72 | monkeyScript.write("keyEvents = %s\n" % keyEvents) 73 | monkeyScript.write("keyEventTypes = [MonkeyDevice.UP, MonkeyDevice.DOWN, MonkeyDevice.DOWN_AND_UP]\n") 74 | monkeyScript.write("activityActions = %s\n" % activityActions) 75 | monkeyScript.write("activities = %s\n" % self.activitiesInfo) 76 | monkeyScript.write("services = %s\n" % self.servicesInfo) 77 | monkeyScript.write("receivers = %s\n\n" % self.receiversInfo) 78 | # Connect to the current device and install package 79 | monkeyScript.write("print \"[*] Connecting to device.\"\n") 80 | monkeyScript.write("device = MonkeyRunner.waitForConnection(\"[ANDROID_VIRTUAL_DEVICE_ID]\")\n") 81 | monkeyScript.write("package = '%s'\n" % self.APK.package) 82 | monkeyScript.write("print \"[*] Uninstalling package %s (if exists)\"\n" % self.APK.package) 83 | monkeyScript.write("device.removePackage(package)\n") 84 | monkeyScript.write("print \"[*] Installing package %s\"\n" % self.APK.package) 85 | monkeyScript.write("device.installPackage('%s')\n" % self.APKPath) 86 | # Configure introspy for hooking and monitoring 87 | monkeyScript.write("print \"[*] Configuring Introspy\"\n") 88 | monkeyScript.write("device.shell(\"echo 'GENERAL CRYPTO,KEY,HASH,FS,IPC,PREF,URI,WEBVIEW,SSL' > /data/data/%s/introspy.config\" % package)\n") 89 | monkeyScript.write("device.shell(\"chmod 664 /data/data/%s/introspy.config\" % package)\n") 90 | # Get a handle to a file to store the commands issued during runtime 91 | monkeyScript.write("commandsFile = open(\"%s/files/scripts/%s_%s.command\", \"w\")\n" % (getProjectDir(), self.APK.package.replace('.','_'), getRandomAlphaNumeric())) 92 | # Start app 93 | #monkeyScript.write("mainActivity = '%s'\n" % APK.APK.get_main_activity()) 94 | #monkeyScript.write("device.startActivity(component=package + '/' + mainActivity)\n") 95 | # Starting the fuzzing phase for [runningTime] seconds 96 | monkeyScript.write("endTime = time.time() + %s\n" % runningTime) 97 | monkeyScript.write("print \"[*] Fuzzing app for %s seconds\"\n" % runningTime) 98 | monkeyScript.write("while time.time() < endTime:\n") 99 | # 1. Choose a random component 100 | monkeyScript.write("\tcomponentType = [\"activity\", \"service\", \"receiver\"][random.randint(0,2)]\n") 101 | # 2.a. Activities 102 | monkeyScript.write("\tif componentType == \"activity\":\n") 103 | monkeyScript.write("\t\tcurrentActivity = activities.keys()[random.randint(0,len(activities)-1)]\n") 104 | monkeyScript.write("\t\tprint \"[*] Starting activity: %s\" % currentActivity\n") 105 | monkeyScript.write("\t\tdevice.startActivity(component=package + '/' + currentActivity)\n") 106 | monkeyScript.write("\t\tcommandsFile.write(\"device.startActivity('%s/%s')\\n\" % (package, currentActivity))\n") 107 | # Choose an action 108 | monkeyScript.write("\t\tcurrentAction = activityActions[random.randint(0,len(activityActions)-1)]\n") 109 | monkeyScript.write("\t\tprint \"[*] Current action: %s\" % currentAction\n") 110 | # Touch in a random X,Y position on the screen 111 | monkeyScript.write("\t\tif currentAction == \"touch\":\n") 112 | monkeyScript.write("\t\t\twidth, height = int(device.getProperty(\"display.width\")), int(device.getProperty(\"display.height\"))\n") 113 | monkeyScript.write("\t\t\tX, Y = random.randint(0, width-1), random.randint(0, height-1)\n") 114 | monkeyScript.write("\t\t\tprint \"[*] Touching screen at (%s,%s)\" % (X,Y)\n") 115 | monkeyScript.write("\t\t\teventType = keyEventTypes[random.randint(0,2)]\n") 116 | monkeyScript.write("\t\t\tdevice.touch(X, Y, eventType)\n") 117 | monkeyScript.write("\t\t\tcommandsFile.write(\"device.touch(%s, %s, %s)\\n\" % (X, Y, eventType))\n") 118 | # Type something random 119 | monkeyScript.write("\t\telif currentAction == \"type\":\n") 120 | monkeyScript.write("\t\t\ttext = \"%s\"\n" % getRandomString(random.randint(0,100))) 121 | monkeyScript.write("\t\t\tprint \"[*] Typing %s\" % text\n") 122 | monkeyScript.write("\t\t\tdevice.type(text)\n") 123 | monkeyScript.write("\t\t\tcommandsFile.write(\"device.type('%s')\\n\" % text)\n") 124 | # Press a random key up/down 125 | monkeyScript.write("\t\telif currentAction == \"press\":\n") 126 | monkeyScript.write("\t\t\taction = keyEvents[random.randint(0, len(keyEvents)-1)]\n") 127 | monkeyScript.write("\t\t\taType = keyEventTypes[random.randint(0,2)]\n") 128 | monkeyScript.write("\t\t\tprint \"[*] Pressing: %s as %s\" % (action, aType)\n") 129 | monkeyScript.write("\t\t\tdevice.press(action, aType)\n") 130 | monkeyScript.write("\t\t\tcommandsFile.write(\"device.press(%s, %s)\\n\" % (action, aType)) \n") 131 | # Randomly drag the screen 132 | monkeyScript.write("\t\telif currentAction == \"drag\":\n") 133 | monkeyScript.write("\t\t\twidth, height = int(device.getProperty(\"display.width\")), int(device.getProperty(\"display.height\"))\n") 134 | monkeyScript.write("\t\t\tstart = (random.randint(0, width-1), random.randint(0, height-1))\n") 135 | monkeyScript.write("\t\t\tend = (random.randint(0, width-1), random.randint(0, height-1))\n") 136 | monkeyScript.write("\t\t\tprint \"[*] Dragging screen from %s to %s\" % (start, end)\n") 137 | monkeyScript.write("\t\t\tdevice.drag(start, end)\n") 138 | monkeyScript.write("\t\t\tcommandsFile.write(\"device.drag(%s, %s)\\n\" % (start, end))\n") 139 | # 2.b.Services 140 | monkeyScript.write("\telif componentType == \"service\":\n") 141 | monkeyScript.write("\t\tcurrentService = services.keys()[random.randint(0, len(services)-1)]\n") 142 | monkeyScript.write("\t\tprint \"[*] Starting Service: %s\" % currentService\n") 143 | monkeyScript.write("\t\tif \"intent-filters\" in services[currentService].keys():\n") 144 | monkeyScript.write("\t\t\tif \"action\" in services[currentService][\"intent-filters\"].keys():\n") 145 | monkeyScript.write("\t\t\t\tintentAction = services[currentService][\"intent-filters\"][\"action\"][0]\n") 146 | monkeyScript.write("\t\t\t\tprint \"[*] Broadcasting intent: %s\" % intentAction\n") 147 | monkeyScript.write("\t\t\t\tdevice.broadcastIntent(currentService, intentAction)\n") 148 | monkeyScript.write("\t\t\t\tcommandsFile.write(\"device.broadcastIntent('%s', '%s')\\n\" % (currentService, intentAction)) \n") 149 | # 2.c. Broadcast receivers 150 | monkeyScript.write("\telif componentType == \"receiver\":\n") 151 | monkeyScript.write("\t\tcurrentReceiver = receivers.keys()[random.randint(0, len(receivers)-1)]\n") 152 | monkeyScript.write("\t\tprint \"[*] Starting Receiver: %s\" % currentReceiver\n") 153 | monkeyScript.write("\t\tif \"intent-filters\" in receivers[currentReceiver].keys():\n") 154 | monkeyScript.write("\t\t\tif \"action\" in receivers[currentReceiver][\"intent-filters\"].keys():\n") 155 | monkeyScript.write("\t\t\t\tintentAction = receivers[currentReceiver][\"intent-filters\"][\"action\"][0]\n") 156 | monkeyScript.write("\t\t\t\tprint \"[*] Broadcasting intent: %s\" % intentAction\n") 157 | monkeyScript.write("\t\t\t\tdevice.broadcastIntent(currentReceiver, intentAction)\n") 158 | monkeyScript.write("\t\t\t\tcommandsFile.write(\"device.broadcastIntent('%s', '%s')\\n\" % (currentReceiver, intentAction))\n") 159 | # Sleep for 0.5 a second 160 | monkeyScript.write("\ttime.sleep(1)\n") 161 | # Uninstall package (Still need to fetch the introspy.db file from app directory before uninstallation) 162 | #monkeyScript.write("device.removePackage(package)\n") 163 | monkeyScript.write("commandsFile.close()") 164 | 165 | except Exception as e: 166 | prettyPrintError(e) 167 | return False 168 | 169 | return True 170 | def analyzeActivities(APK, DEX): 171 | """ Analyzes the passed APK and DEX objects to retrieve the elements within every activity """ 172 | try: 173 | info = {} 174 | for activity in APK.get_activities(): 175 | info[activity] = {} 176 | # 1. Add the intent filters 177 | info[activity]["intent-filters"] = APK.get_intent_filters("activity", activity) 178 | # 2. Get all classes belonging to current activity 179 | allClasses, tempList, layoutFiles = DEX.get_classes(), [], [] 180 | # 2.a. Get all classes that inherit class "Activity" i.e. corresponding to an activity 181 | for c in allClasses: 182 | if c.get_superclassname().lower().find("activity") != -1: 183 | tempList.append(c) 184 | # 2.b. Get classes belonging to CURRENT activity 185 | info[activity]["classes"] = [] 186 | for c in tempList: 187 | if c.get_name()[1:-1].replace('/','.') == activity: 188 | info[activity]["classes"].append(c) 189 | if loggingON(): 190 | prettyPrint("Activity: %s, class: %s" % (activity, c), "debug") 191 | 192 | # 3. Get UI elements in every activity 193 | # 3.a. Identify the layout file's ID in the class' setContentView function call 194 | if len(info[activity]["classes"]) < 1: 195 | prettyPrint("Could not retrieve any Activity classes. Skipping", "warning") 196 | continue 197 | source = info[activity]["classes"][0].get_source() 198 | info[activity].pop("classes") # TODO: Do we really need a reference to the class object? 199 | index1 = source.find("void onCreate(") 200 | index2 = source.find("setContentView(", index1) + len("setContentView(") 201 | layoutID = "" 202 | while str.isdigit(source[index2]): 203 | layoutID += source[index2] 204 | index2 += 1 205 | # layoutID retrieved? 206 | if len(layoutID) < 1: 207 | prettyPrint("Could not retrieve layout ID from activity class. Skipping", "warning") 208 | continue 209 | # 3.b. Look for the corresponding layout name in the R$layout file 210 | layoutClass = DEX.get_class(str("L%s/R$layout;" % APK.package.replace('.','/'))) 211 | if layoutClass: 212 | layoutContent = layoutClass.get_source() 213 | eIndex = layoutContent.find(layoutID) 214 | sIndex = layoutContent.rfind("int", 0, eIndex) 215 | layoutName = layoutContent[sIndex+len("int"):eIndex].replace(' ','').replace('=','') 216 | else: 217 | # No layout class was found: Check the public.xml file 218 | prettyPrint("Could not find a \"R$layout\" class. Checking \"public.xml\"", "warning") 219 | apkResources = APK.get_android_resources() 220 | publicResources = apkResources.get_public_resources(APK.package).split('\n') 221 | layoutIDHex = hex(int(layoutID)) 222 | for line in publicResources: 223 | if line.find(layoutIDHex) != -1: 224 | sIndex = line.find("name=\"") + len("name=\"") 225 | eIndex = line.find("\"", sIndex) 226 | layoutName = line[sIndex:eIndex] 227 | # 3.c. Retrieve layout file and get XML object 228 | if len(layoutName) < 1: 229 | prettyPrint("Could not retrieve a layout file for \"%s\". Skipping" % activity, "warning") 230 | else: 231 | if loggingON(): 232 | prettyPrint("Retrieving UI elements from %s.xml" % layoutName, "debug") 233 | info[activity]["elements"] = _parseActivityLayout("res/layout/%s.xml" % layoutName, APK) 234 | 235 | except Exception as e: 236 | prettyPrintError(e) 237 | return {} 238 | 239 | return info 240 | 241 | def analyzeServices(APK, DEX): 242 | """ Analyzes the passed APK and DEX objects to retrieve information about an app's services """ 243 | try: 244 | info = {} 245 | for service in APK.get_services(): 246 | info[service] = {} 247 | info[service]["intent-filters"] = APK.get_intent_filters("service", service) 248 | 249 | except Exception as e: 250 | prettyPrintError(e) 251 | return {} 252 | 253 | return info 254 | 255 | def analyzeReceivers(APK, DEX): 256 | """ Analyzes the passed APK and DEX objects to retrieve information about an app's broadcast receivers """ 257 | try: 258 | info = {} 259 | for receiver in APK.get_receivers(): 260 | info[receiver] = {} 261 | info[receiver]["intent-filters"] = APK.get_intent_filters("receiver", receiver) 262 | 263 | except Exception as e: 264 | prettyPrintError(e) 265 | return {} 266 | 267 | return info 268 | 269 | def _parseActivityLayout(layoutFilePath, APK): 270 | """ Parses an XML layout file of an activity and returns information about the found elements """ 271 | try: 272 | elements = {} 273 | # Read the contents of the layout file 274 | activityXML = AXMLPrinter(APK.get_file(layoutFilePath)).get_xml_obj() 275 | logEvent("Parsing the XML layout %s" % layoutFilePath) 276 | # Iterate over the elements and parse them 277 | for currentNode in activityXML.firstChild.childNodes: 278 | if currentNode.nodeName == "Button" or currentNode.nodeName == "ImageButton" or currentNode.nodeName == "RadioButton": 279 | # Handling buttons 280 | attr = {} 281 | eID = currentNode.attributes["android:id"].value 282 | attr["type"] = currentNode.nodeName 283 | if "android:onClick" in currentNode.attributes.keys(): 284 | attr["onclick"] = currentNode.attributes["android:onClick"].value 285 | if "android:visibility" in currentNode.attributes.keys(): 286 | attr["visibility"] = currentNode.attributes["android:visibility"].value 287 | if "android:clickable" in currentNode.attributes.keys(): 288 | attr["clickable"] = currentNode.attributes["android:clickable"].value 289 | if "android:longClickable" in currentNode.attributes.keys(): 290 | attr["longclickable"] = currentNode.attributes["android:longClickable"].value 291 | elements[eID] = attr 292 | elif currentNode.nodeName == "CheckBox" or currentNode.nodeName == "CheckedTextView": 293 | # Handling checkbox-like elements 294 | attr = {} 295 | eID = currentNode.attributes["android:id"].value 296 | attr["type"] = currentNode.nodeName 297 | if "android:onClick" in currentNode.attributes.keys(): 298 | attr["onclick"] = currentNode.attributes["android:onClick"].value 299 | if "android:visibility" in currentNode.attributes.keys(): 300 | attr["visibility"] = currentNode.attributes["android:visibility"].value 301 | if "android:checked" in currentNode.attributes.keys(): 302 | attr["checked"] = currentNode.attributes["android:checked"].value 303 | elements[eID] = attr 304 | elif currentNode.nodeName == "DatePicker": 305 | # Handling date pickers 306 | attr = {} 307 | eID = currentNode.attributes["android:id"].value 308 | attr["type"] = currentNode.nodeName 309 | if "android:minDate" in currentNode.attributes.keys(): 310 | attr["mindate"] = currentNode.attributes["android:minDate"] 311 | if "android:maxDate" in currentNode.attributes.keys(): 312 | attr["maxDate"] = currentNode.attributes["android:maxDate"] 313 | elements[eID] = attr 314 | elif currentNode.nodeName == "EditText": 315 | # Handling edit texts 316 | attr = {} 317 | eID = currentNode.attributes["android:id"].value 318 | attr["type"] = currentNode.nodeName 319 | if "android:editable" in currentNode.attributes.keys(): 320 | attr["editable"] = currentNode.attributes["android:editable"] 321 | if "android:inputType" in currentNode.attributes.keys(): 322 | attr["inputtype"] = currentNode.attributes["android:inputType"] 323 | elements[eID] = attr 324 | #elif currentNode.nodeName == "NumberPicker": 325 | elif currentNode.nodeName == "RadioGroup": 326 | # Handle radio group 327 | # 1. Get radio buttons 328 | buttons = currentNode.childNodes 329 | for button in buttons: 330 | attr = {} 331 | eID = currentNode.attributes["android:id"].value 332 | attr["type"] = currentNode.nodeName 333 | if "android:onClick" in currentNode.attributes.keys(): 334 | attr["onclick"] = currentNode.attributes["android:onClick"].value 335 | if "android:visibility" in currentNode.attributes.keys(): 336 | attr["visibility"] = currentNode.attributes["android:visibility"].value 337 | if "android:clickable" in currentNode.attributes.keys(): 338 | attr["clickable"] = currentNode.attributes["android:clickable"].value 339 | if "android:longClickable" in currentNode.attributes.keys(): 340 | attr["longclickable"] = currentNode.attributes["android:longClickable"].value 341 | elements[eID] = attr 342 | 343 | #elif currentNode.nodeName == "Spinner": 344 | 345 | except Exception as e: 346 | prettyPrintError(e) 347 | return {} 348 | 349 | return elements 350 | 351 | -------------------------------------------------------------------------------- /tools/runExperimentII.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from Aion.data_generation.reconstruction import * 4 | from Aion.data_generation.stimulation import * 5 | from Aion.data_inference.learning import ScikitLearners 6 | from Aion.data_inference.extraction.featureExtraction import * 7 | from Aion.utils.data import * # Needed for accessing configuration files 8 | from Aion.utils.graphics import * # Needed for pretty printing 9 | from Aion.utils.misc import * 10 | from Aion.utils.db import * 11 | from Aion.shared.DroidutanTest import * # The Droidutan-driven test thread 12 | from Aion.shared.DroidbotTest import * # The Droidbot-driven test thread 13 | 14 | from sklearn.metrics import * 15 | import hashlib, pickle 16 | from droidutan import Droidutan 17 | 18 | import os, sys, glob, shutil, argparse, subprocess, sqlite3 19 | import time, threading, pickledb, random, exceptions 20 | 21 | def defineArguments(): 22 | parser = argparse.ArgumentParser(prog="runExperimentI.py", description="The second type of experiments of the Aion active learning framework.") 23 | parser.add_argument("-x", "--malwaredir", help="The directory containing the malicious APK's to analyze and use as training/validation dataset", required=True) 24 | parser.add_argument("-g", "--goodwaredir", help="The directory containing the benign APK's to analyze and use as training/validation dataset", required=True) 25 | parser.add_argument("-d", "--datasetname", help="A unique name to give to the dataset used in the experiment (for DB storage purposes)", required=True) 26 | parser.add_argument("-r", "--runnumber", help="The number of the current run of the experiment (for DB storage purposes)", required=True) 27 | parser.add_argument("-f", "--analyzeapks", help="Whether to perform analysis on the retrieved APK's", required=False, default="no", choices=["yes", "no"]) 28 | parser.add_argument("-t", "--analysistime", help="How long to run monkeyrunner (in seconds)", required=False, default=60) 29 | parser.add_argument("-u", "--analysisengine", help="The stimulation/analysis engine to use", required=False, choices=["droidbot", "droidutan"], default="droidutan") 30 | parser.add_argument("-v", "--vmnames", help="The name(s) of the Genymotion machine(s) to use for analysis (comma-separated)", required=False, default="") 31 | parser.add_argument("-z", "--vmsnapshots", help="The name(s) of the snapshot(s) to restore before analyzing an APK (comma-separated)", required=False, default="") 32 | parser.add_argument("-a", "--algorithm", help="The algorithm used to classify apps", required=False, default="Ensemble", choices=["KNN10", "KNN25", "KNN50", "KNN100", "KNN250", "KNN500", "SVM", "Trees25", "Trees50", "Trees75", "Trees100", "Ensemble"]) 33 | parser.add_argument("-s", "--selectkbest", help="Whether to select K best features from the ones extracted from the APK's", required=False, default=0) 34 | parser.add_argument("-e", "--featuretype", help="The type of features to consider during training", required=False, default="hybrid", choices=["static", "dynamic", "hybrid"]) 35 | parser.add_argument("-m", "--accuracymargin", help="The margin (in percentage) within which the training accuracy is allowed to dip", required=False, default=1) 36 | parser.add_argument("-i", "--maxiterations", help="The maximum number of iterations to allow", required=False, default=25) 37 | return parser 38 | 39 | def main(): 40 | try: 41 | argumentParser = defineArguments() 42 | arguments = argumentParser.parse_args() 43 | prettyPrint("Welcome to the \"Aion\"'s dynamic experiment I") 44 | 45 | if arguments.vmnames == "": 46 | prettyPrint("No virtual machine names were supplied. Exiting", "warning") 47 | return False 48 | 49 | iteration = 1 # Initial values 50 | reanalysis = False 51 | currentMetrics = {"accuracy": 0.0, "recall": 0.0, "specificity": 0.0, "precision": 0.0, "f1score": 0.0} 52 | previousMetrics = {"accuracy": -1.0, "recall": -1.0, "specificity": -1.0, "precision": -1.0, "f1score": -1.0} 53 | reanalyzeMalware, reanalyzeGoodware = [], [] # Use this as a cache until conversion 54 | allVMs = arguments.vmnames.split(',') 55 | allSnapshots = arguments.vmsnapshots.split(',') 56 | availableVMs = [] + allVMs # Initially 57 | 58 | # Initialize and populate database 59 | hashesDB = pickledb.load(getHashesDBPath(), True) 60 | aionDB = AionDB(int(arguments.runnumber), arguments.datasetname) 61 | 62 | # Load APK's and split into training and test datasets 63 | prettyPrint("Loading APK's from \"%s\" and \"%s\"" % (arguments.malwaredir, arguments.goodwaredir)) 64 | # Retrieve malware APK's 65 | malAPKs = glob.glob("%s/*.apk" % arguments.malwaredir) 66 | if len(malAPKs) < 1: 67 | prettyPrint("Could not find any malicious APK's" , "warning") 68 | else: 69 | prettyPrint("Successfully retrieved %s malicious instances" % len(malAPKs)) 70 | # Retrieve goodware APK's 71 | goodAPKs = glob.glob("%s/*.apk" % arguments.goodwaredir) 72 | if len(goodAPKs) < 1: 73 | prettyPrint("Could not find any benign APK's", "warning") 74 | else: 75 | prettyPrint("Successfully retrieved %s benign instances" % len(goodAPKs)) 76 | 77 | # Split the data into training and test datasets 78 | malTraining, malTest = [], [] 79 | goodTraining, goodTest = [], [] 80 | malTestSize, goodTestSize = len(malAPKs) / 3, len(goodAPKs) / 3 81 | # Start with the malicious APKs 82 | while len(malTest) < malTestSize: 83 | malTest.append(malAPKs.pop(random.randint(0, len(malAPKs)-1))) 84 | malTraining += malAPKs 85 | prettyPrint("[MALWARE] Training dataset size is %s, test dataset size is %s" % (len(malTraining), len(malTest))) 86 | # Same with benign APKs 87 | while len(goodTest) < goodTestSize: 88 | goodTest.append(goodAPKs.pop(random.randint(0, len(goodAPKs)-1))) 89 | goodTraining += goodAPKs 90 | prettyPrint("[GOODWARE] Training dataset size is %s, test dataset size is %s" % (len(goodTraining), len(goodTest))) 91 | 92 | while (round(currentMetrics["f1score"] - previousMetrics["f1score"], 2) >= -(float(arguments.accuracymargin)/100.0)) and (iteration <= int(arguments.maxiterations)): 93 | # Set/update the reanalysis flag 94 | reanalysis = True if iteration > 1 else False 95 | prettyPrint("Experiment I: iteration #%s" % iteration, "info2") 96 | # Update the iteration number 97 | aionDB.update("run", [("runIterations", str(iteration))], [("runID", arguments.runnumber), ("runDataset", arguments.datasetname)]) # UPDATE run SET runIterations=X WHERE runID=[runnumber] 98 | if arguments.analyzeapks == "yes": 99 | allAPKs = malTraining + goodTraining + malTest + goodTest if not reanalysis else reanalyzeMalware + reanalyzeGoodware + malTest + goodTest 100 | ######################## 101 | ## Main Analysis Loop ## 102 | ######################## 103 | currentProcesses = [] 104 | while len(allAPKs) > 0: 105 | prettyPrint("Starting analysis phase") 106 | # Step 1. Pop an APK from "allAPKs" (Defaut: last element) 107 | currentAPK = allAPKs.pop() 108 | # Step 2. Check availability of VMs for test 109 | while len(availableVMs) < 1: 110 | prettyPrint("No AVD's available for analysis. Sleeping for 10 seconds") 111 | print [p.name for p in currentProcesses] 112 | print [p.is_alive() for p in currentProcesses] 113 | # 2.a. Sleep for "analysisTime" 114 | time.sleep(10) 115 | # 2.b. Check for available machines 116 | for p in currentProcesses: 117 | if not p.is_alive(): 118 | if verboseON(): 119 | prettyPrint("Process \"%s\" is dead. A new AVD is available for analysis" % p.name, "debug") 120 | availableVMs.append(p.name) 121 | currentProcesses.remove(p) 122 | # Also restore clean state of machine 123 | if len(allAPKs) % 100 == 0: # How often to restore snapshot? 124 | vm = p.name 125 | snapshot = allSnapshots[allVMs.index(vm)] 126 | prettyPrint("Restoring snapshot \"%s\" for AVD \"%s\"" % (snapshot, vm)) 127 | restoreVirtualBoxSnapshot(vm, snapshot) 128 | 129 | elif checkAVDState(p.name, "stopping")[0] or checkAVDState(p.name, "powered off")[0] or checkAVDState(p.name, "restoring snapshot")[0]: 130 | prettyPrint("AVD \"%s\" is stuck. Forcing a restoration" % p.name, "warning") 131 | vm = p.name 132 | snapshot = allSnapshots[allVMs.index(vm)] 133 | restoreVirtualBoxSnapshot(vm, snapshot) 134 | 135 | print [p.name for p in currentProcesses] 136 | print [p.is_alive() for p in currentProcesses] 137 | 138 | # Step 3. Pop one VM from "availableVMs" 139 | currentVM = availableVMs.pop() 140 | 141 | if verboseON(): 142 | prettyPrint("Running \"%s\" on AVD \"%s\"" % (currentAPK, currentVM)) 143 | 144 | # Step 4. Start the analysis thread 145 | pID = int(time.time()) 146 | if arguments.analysisengine == "droidutan": 147 | if currentAPK in malTest+goodTest: 148 | p = DroidutanAnalysis(pID, currentVM, currentVM, currentAPK, int(arguments.analysistime), currentAPK.replace(".apk", "_test_itn%s_filtered.log" % iteration)) 149 | else: 150 | p = DroidutanAnalysis(pID, currentVM, currentVM, currentAPK, int(arguments.analysistime)) 151 | elif arguments.analysisengine == "droidbot": 152 | p = DroidbotAnalysis(pID, currentVM, currentVM, currentAPK, allSnapshots[allVMs.index(currentVM)], int(arguments.analysistime)) 153 | p.daemon = True # Process will be killed if main thread exits 154 | p.start() 155 | currentProcesses.append(p) 156 | 157 | prettyPrint("%s APKs left to analyze" % len(allAPKs), "output") 158 | 159 | # Just make sure all VMs are done 160 | while len(availableVMs) < len(allVMs): 161 | prettyPrint("Waiting for AVD's to complete analysis") 162 | # 2.a. Sleep for "analysisTime" 163 | time.sleep(int(arguments.analysistime)) 164 | # 2.b. Check for available machines 165 | for p in currentProcesses: 166 | if not p.is_alive(): 167 | availableVMs.append(p.name) 168 | currentProcesses.remove(p) 169 | try: 170 | if not p.success: 171 | prettyPrint("Testing app \"%s\" failed. Re-analyzing later" % p.processTarget, "warning") 172 | allAPKs.append(p.processTarget) 173 | except exceptions.AttributeError as ae: 174 | prettyPrint("Oops!! No attribute called \"success\"", "warning") 175 | 176 | 177 | ####################################### 178 | ## Analyze log files after analysis ## 179 | ####################################### 180 | # Try to save some time by only analyzing apps that have been recently (re)analyzed 181 | allApps = malTraining + goodTraining + malTest + goodTest if not reanalysis else reanalyzeMalware + reanalyzeGoodware + malTest + goodTest 182 | for app in allApps: 183 | # 0. Retrieve the database file corresponding to the app 184 | if app in malTest+goodTest: 185 | inFile = app.replace(".apk", "_test_itn%s_filtered.log" % iteration) # if arguments.analysisengine == "droidutan" else TODO 186 | else: 187 | inFile = app.replace(".apk", "_filtered.log") if arguments.analysisengine == "droidutan" else app.replace(".apk", "_droidbot/logcat_filtered.log") 188 | 189 | # 1. Check its existence 190 | if not os.path.exists(inFile): 191 | prettyPrint("Unable to find filtered log file: \"%s\". Skipping" % inFile, "warning") 192 | continue 193 | 194 | # 2. Extract and save numerical features 195 | prettyPrint("Extracting %s features from APK \"%s\"" % (arguments.featuretype, inFile)) 196 | staticFeatures, dynamicFeatures = [], [] 197 | # Save time in case of dynamic features 198 | if arguments.featuretype == "static" or arguments.featuretype == "hybrid": 199 | sfBasic, sfPermissions, sfAPI, staticFeatures = extractStaticFeatures(app) 200 | prettyPrint("Successfully extracted %s static features" % len(staticFeatures)) 201 | if arguments.featuretype == "dynamic" or arguments.featuretype == "hybrid": 202 | trace, dynamicFeatures = extractDroidmonFeatures(inFile) 203 | prettyPrint("Successfully extracted %s dynamic features" % len(dynamicFeatures)) 204 | 205 | # 3. Store the features 206 | if arguments.featuretype == "static" and len(staticFeatures) > 0: 207 | features = staticFeatures 208 | elif arguments.featuretype == "dynamic" and len(dynamicFeatures) > 0: 209 | features = dynamicFeatures 210 | elif arguments.featuretype == "hybrid" and len(staticFeatures) > 0 and len(dynamicFeatures) > 0: 211 | features = staticFeatures + dynamicFeatures 212 | 213 | # 4. Write features to file 214 | if app in malTest+goodTest: 215 | featuresFile = open(app.replace(".apk", "_test_itn%s.%s" % (iteration, arguments.featuretype)), "w") 216 | else: 217 | featuresFile = open(app.replace(".apk", ".%s" % arguments.featuretype), "w") 218 | featuresFile.write("%s\n" % str(features)) 219 | featuresFile.close() 220 | prettyPrint("Done analyzing \"%s\"" % inFile) 221 | 222 | #################################################################### 223 | # Load the JSON and feature files as traces before classification # 224 | #################################################################### 225 | # Load numerical features 226 | allFeatureFiles = glob.glob("%s/*.%s" % (arguments.malwaredir, arguments.featuretype)) + glob.glob("%s/*.%s" % (arguments.goodwaredir, arguments.featuretype)) 227 | if len(allFeatureFiles) < 1: 228 | prettyPrint("Could not retrieve any feature files. Exiting", "error") 229 | return False 230 | 231 | prettyPrint("Retrieved %s feature files" % len(allFeatureFiles)) 232 | # Split the loaded feature files as training and test 233 | Xtr, ytr = [], [] 234 | for ff in allFeatureFiles: 235 | fileName = ff.replace(".%s" % arguments.featuretype, ".apk") 236 | x = Numerical.loadNumericalFeatures(ff) 237 | if len(x) < 1: 238 | prettyPrint("Empty feature vector returned. Skipping", "warning") 239 | continue 240 | if fileName in malTraining: 241 | Xtr.append(x) 242 | ytr.append(1) 243 | elif fileName in goodTraining: 244 | Xtr.append(x) 245 | ytr.append(0) 246 | 247 | 248 | metricsDict = {} 249 | ############ 250 | # Training # 251 | ############ 252 | # Classifying using [algorithm] 253 | prettyPrint("Classifying using %s" % arguments.algorithm) 254 | clfFile = "%s/db/%s_run%s_itn%s_%s.txt" % (getProjectDir(), arguments.algorithm, arguments.runnumber, iteration, arguments.featuretype) 255 | # Train and predict 256 | if arguments.algorithm.lower().find("trees") != -1: 257 | e = int(arguments.algorithm.replace("Trees", "")) 258 | clf, predicted, predicted_test = ScikitLearners.predictAndTestRandomForest(Xtr, ytr, estimators=e, selectKBest=int(arguments.selectkbest)) 259 | elif arguments.algorithm.lower().find("knn") != -1: 260 | k = int(arguments.algorithm.replace("KNN", "")) 261 | clf, predicted, predicted_test = ScikitLearners.predictAndTestKNN(Xtr, ytr, K=k, selectKBest=int(arguments.selectkbest)) 262 | elif arguments.algorithm.lower().find("svm") != -1: 263 | clf, predicted, predicted_test = ScikitLearners.predictAndTestSVM(Xtr, ytr, selectKBest=int(arguments.selectkbest)) 264 | else: 265 | K = [10, 25, 50, 100, 250, 500] 266 | E = [10, 25, 50, 75, 100] 267 | allCs = ["KNN-%s" % k for k in K] + ["FOREST-%s" % e for e in E] + ["SVM"] 268 | clf, predicted, predicted_test = ScikitLearners.predictAndTestEnsemble(Xtr, ytr, classifiers=allCs, selectKBest=int(arguments.selectkbest)) 269 | # Write to file 270 | open(clfFile, "w").write(pickle.dumps(clf)) 271 | metrics = ScikitLearners.calculateMetrics(ytr, predicted) 272 | metricsDict = metrics 273 | 274 | # Print and save results 275 | prettyPrint("Metrics using %s at iteration %s" % (arguments.algorithm, iteration), "output") 276 | prettyPrint("Accuracy: %s" % str(metricsDict["accuracy"]), "output") 277 | prettyPrint("Recall: %s" % str(metricsDict["recall"]), "output") 278 | prettyPrint("Specificity: %s" % str(metricsDict["specificity"]), "output") 279 | prettyPrint("Precision: %s" % str(metricsDict["precision"]), "output") 280 | prettyPrint("F1 Score: %s" % str(metricsDict["f1score"]), "output") 281 | # Insert datapoint into the database 282 | tstamp = getTimestamp(includeDate=True) 283 | learnerID = "%s_run%s_itn%s" % (arguments.algorithm, arguments.runnumber, iteration) 284 | aionDB.insert(table="learner", columns=["lrnID", "lrnParams"], values=[learnerID, clfFile]) 285 | aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[learnerID, str(iteration), arguments.runnumber, tstamp, arguments.featuretype, "TRAIN", str(metricsDict["accuracy"]), str(metricsDict["recall"]), str(metricsDict["specificity"]), str(metricsDict["precision"]), str(metricsDict["f1score"])]) 286 | 287 | # Save incorrectly-classified training instances for re-analysis 288 | reanalyzeMalware, reanalyzeGoodware = [], [] # Reset the lists to store new misclassified instances 289 | for index in range(len(ytr)): 290 | if predicted[index] != ytr[index]: 291 | if allFeatureFiles[index].find("test") != -1: 292 | prettyPrint("Skipping adding test file \"%s\" to the reanalysis lists" % allFeatureFiles[index]) 293 | else: 294 | # Add to reanalysis lists 295 | if allFeatureFiles[index].find("malware") != -1: 296 | reanalyzeMalware.append(allFeatureFiles[index].replace(arguments.featuretype, "apk")) 297 | else: 298 | reanalyzeGoodware.append(allFeatureFiles[index].replace(arguments.featuretype, "apk")) 299 | 300 | prettyPrint("Reanalyzing %s benign and %s malicious training apps" % (len(reanalyzeGoodware), len(reanalyzeMalware)), "debug") 301 | 302 | # Swapping metrics 303 | previousMetrics = currentMetrics 304 | currentMetrics = metricsDict 305 | 306 | # Commit results to the database 307 | aionDB.save() 308 | 309 | # Restore snapshots of all VMs 310 | vms, snaps = arguments.vmnames.split(','), arguments.vmsnapshots.split(',') 311 | if len(vms) > len(snaps): 312 | r = range(len(snaps)) 313 | else: 314 | r = range(len(vms)) # Or of snaps doesn't matter 315 | # Killall -9 VBoxHeadless 316 | #doomsdayCmd = ["killall", "-9", "VBoxHeadless"] 317 | doomsdayCmd = ["killall", "-9", "VBoxSVC"] 318 | subprocess.Popen(doomsdayCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) 319 | for i in r: 320 | prettyPrint("Restoring snapshot \"%s\" for AVD \"%s\"" % (snaps[i], vms[i])) 321 | if restoreVirtualBoxSnapshot(vms[i], snaps[i]): 322 | prettyPrint("Successfully restored AVD") 323 | else: 324 | prettyPrint("An error occurred while restoring the AVD") 325 | 326 | # Update the iteration number 327 | iteration += 1 328 | 329 | # Final Results 330 | prettyPrint("Training results after %s iterations" % str(iteration-1), "output") 331 | prettyPrint("Accuracy: %s" % currentMetrics["accuracy"], "output") 332 | prettyPrint("Recall: %s" % currentMetrics["recall"], "output") 333 | prettyPrint("Specificity: %s" % currentMetrics["specificity"], "output") 334 | prettyPrint("Precision: %s" % currentMetrics["precision"], "output") 335 | prettyPrint("F1 Score: %s" % currentMetrics["f1score"], "output") 336 | 337 | # Update the current run's end time 338 | aionDB.update("run", [("runEnd", getTimestamp(includeDate=True))], [("runID", arguments.runnumber)]) # UPDATE run SET runEnd=X WHERE runID=[runnumber] 339 | 340 | ####################################################### 341 | # Commence the test phase using the "best classifier" # 342 | ####################################################### 343 | # 1. Retrieve the best classifier and its iteration (X) 344 | results = aionDB.execute("SELECT * FROM datapoint WHERE dpRun='%s' AND dpFeature='%s' ORDER BY dpFScore DESC" % (arguments.runnumber, arguments.featuretype)) 345 | if not results: 346 | prettyPrint("Could not retrieve data about the training phase. Exiting", "error") 347 | aionDB.close() 348 | return False 349 | 350 | data = results.fetchall() 351 | if len(data) < 1: 352 | prettyPrint("Could not retrieve data about the training phase. Exiting", "error") 353 | aionDB.close() 354 | return False 355 | 356 | # 1.a. Best classifier should be the first entry 357 | bestClassifier, bestItn, bestF1score, bestSp = data[0][1], data[0][2], data[0][11], data[0][9] 358 | if verboseON(): 359 | prettyPrint("The best classifier is %s at iteration %s with F1score of %s and Specificity score of %s" % (bestClassifier, bestItn, bestF1score, bestSp), "debug") 360 | # 1.b. Load classifier from hyper parameters file 361 | results = aionDB.execute("SELECT * FROM learner WHERE lrnID='%s'" % bestClassifier) 362 | if not results: 363 | prettyPrint("Could not find the hyperparameters file for \"%s\". Exiting" % bestClassifier, "error") 364 | aionDB.close() 365 | return False 366 | 367 | data = results.fetchall() 368 | if len(data) < 1: 369 | prettyPrint("Could not find the hyperparameters file for \"%s\". Exiting" % bestClassifier, "error") 370 | aionDB.close() 371 | return False 372 | 373 | clfFile = data[0][1] 374 | if not os.path.exists(clfFile): 375 | prettyPrint("The file \"%s\" does not exist. Exiting" % clfFile, "error") 376 | aionDB.close() 377 | return False 378 | 379 | prettyPrint("Loading classifier \"%s\" from \"%s\"" % (bestClassifier, clfFile)) 380 | clf = pickle.loads(open(clfFile).read()) 381 | 382 | # 2. Classify feature vectors 383 | P, N = 0.0, 0.0 384 | TP_maj, TN_maj, FP_maj, FN_maj = 0.0, 0.0, 0.0, 0.0 # To keep track of majority vote classification 385 | TP_one, TN_one, FP_one, FN_one = 0.0, 0.0, 0.0, 0.0 # To keep track of one-instance classification 386 | for app in malTest + goodTest: 387 | prettyPrint("Processing test app \"%s\"" % app) 388 | # 2.a. Retrieve all feature vectors up to [iteration] 389 | appVectors = {} 390 | for i in range(1, bestItn+1): 391 | if os.path.exists(app.replace(".apk", "_test_itn%s.%s" % (i, arguments.featuretype))): 392 | v = Numerical.loadNumericalFeatures(app.replace(".apk", "_test_itn%s.%s" % (i, arguments.featuretype))) 393 | if len(v) > 1: 394 | appVectors["itn%s" % i] = v 395 | 396 | if len(appVectors) < 1: 397 | prettyPrint("Could not retrieve any feature vectors. Skipping", "warning") 398 | continue 399 | 400 | prettyPrint("Successfully retrieved %s feature vectors of type \"%s\"" % (len(appVectors), arguments.featuretype)) 401 | # 2.b. Classify each feature vector using the loaded classifier 402 | appLabel = 1 if app in malTest else 0 403 | if appLabel == 1: 404 | P += 1.0 405 | else: 406 | N += 1.0 407 | labels = ["Benign", "Malicious"] 408 | appMalicious, appBenign = 0.0, 0.0 409 | for v in appVectors: 410 | predictedLabel = clf.predict(appVectors[v]).tolist()[0] 411 | prettyPrint("\"%s\" app was classified as \"%s\" according to iteration %s" % (labels[appLabel], labels[predictedLabel], v.replace("itn", "")), "output") 412 | classifiedCorrectly = "YES" if labels[appLabel] == labels[predictedLabel] else "NO" 413 | aionDB.insert("testapp", ["taName", "taRun", "taIteration", "taType", "taClassified", "taLog"], [app, arguments.runnumber, v.replace("itn", ""), labels[appLabel], classifiedCorrectly, app.replace(".apk", "_test_%s_filtered.log" % v)]) 414 | if predictedLabel == 1: 415 | appMalicious += 1.0 416 | else: 417 | appBenign += 1.0 418 | 419 | # 2.c. Decide upon the app's label according to majority vote vs. one-instance 420 | majorityLabel = 1 if (appMalicious/float(len(appVectors))) >= 0.5 else 0 421 | oneLabel = 1 if appMalicious >= 1.0 else 0 422 | if appLabel == 1: 423 | # Malicious app 424 | if majorityLabel == 1: 425 | TP_maj += 1.0 426 | else: 427 | FN_maj += 1.0 428 | if oneLabel == 1: 429 | TP_one += 1.0 430 | else: 431 | FN_one += 1.0 432 | else: 433 | # Benign app 434 | if majorityLabel == 1: 435 | FP_maj += 1.0 436 | else: 437 | TN_maj += 1.0 438 | if oneLabel == 1: 439 | FP_one += 1.0 440 | else: 441 | TN_one += 1.0 442 | # 2.d. Declare the classification of the app in question 443 | prettyPrint("\"%s\" app has been declared as \"%s\" by majority vote and as \"%s\" by one-instance votes" % (labels[appLabel], labels[majorityLabel], labels[oneLabel]), "output") 444 | 445 | # 3. Calculate metrics 446 | accuracy_maj, accuracy_one = (TP_maj+TN_maj)/(P+N), (TP_one+TN_one)/(P+N) 447 | recall_maj, recall_one = TP_maj/P, TP_one/P 448 | specificity_maj, specificity_one = TN_maj/N, TN_one/N 449 | precision_maj, precision_one = TP_maj/(TP_maj+FP_maj), TP_one/(TP_one+FP_one) 450 | f1score_maj, f1score_one = 2 * (precision_maj*recall_maj) / (precision_maj+recall_maj), 2 * (precision_one*recall_one) / (precision_one+recall_one) 451 | 452 | # 4. Display and store metrics 453 | prettyPrint("Test metrics using %s at run %s" % (arguments.algorithm, arguments.runnumber), "output") 454 | prettyPrint("Accuracy (majority): %s versus accuracy (one-instance): %s" % (str(accuracy_maj), str(accuracy_one)), "output") 455 | prettyPrint("Recall (majority): %s versus recall (one-instance): %s" % (str(recall_maj), str(recall_one)), "output") 456 | prettyPrint("Specificity (majority): %s versus specificity (one-instance): %s" % (str(specificity_maj), str(specificity_one)), "output") 457 | prettyPrint("Precision (majority): %s versus precision (one-instance): %s" % (str(precision_maj), str(precision_one)), "output") 458 | prettyPrint("F1 Score (majority): %s versus F1 score (one-instance): %s" % (str(f1score_maj), str(f1score_one)), "output") 459 | 460 | # 4.b. Store in the database 461 | aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[bestClassifier, bestItn, arguments.runnumber, tstamp, arguments.featuretype, "TEST:Maj", accuracy_maj, recall_maj, specificity_maj, precision_maj, f1score_maj]) 462 | # Same for one-instance classification scheme 463 | aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[bestClassifier, bestItn, arguments.runnumber, tstamp, arguments.featuretype, "TEST:One", accuracy_one, recall_one, specificity_one, precision_one, f1score_one]) 464 | 465 | # Don't forget to save and close the Aion database 466 | aionDB.close() 467 | 468 | # Send notification email 469 | subject = "Run %s on %s Successful" % (arguments.runnumber, arguments.datasetname) 470 | msg = "Achieved results:\nTest F1 score (majority): %s versus F1 score (one-instance): %s\nTest Specificity (majority): %s versus specificity (one-instance): %s" % (str(f1score_maj), str(f1score_one), str(specificity_maj), str(specificity_one)) 471 | sendEmail("alu-precision", getAdminEmail(), subject, msg) 472 | 473 | except Exception as e: 474 | prettyPrintError(e) 475 | subject = "Run %s on %s failed!!" % (arguments.runnumber, arguments.datasetname) 476 | msg = "Error: %s" % e 477 | sendEmail("alu-precision", getAdminEmail(), subject, msg) 478 | 479 | return False 480 | 481 | prettyPrint("Good day to you ^_^") 482 | return True 483 | 484 | if __name__ == "__main__": 485 | main() 486 | --------------------------------------------------------------------------------