├── _config.yml
├── conf
    ├── __init__.py
    └── config.py
├── shared
    ├── __init__.py
    ├── App.py
    └── constants.py
├── utils
    ├── __init__.py
    ├── graphics.py
    ├── data.py
    ├── misc.py
    └── db.py
├── data_generation
    ├── collection
    │   ├── __init__.py
    │   └── playStoreCrawler.py
    ├── reconstruction
    │   ├── __init__.py
    │   ├── Numerical.py
    │   └── Trace.py
    ├── __init__.py
    └── stimulation
    │   ├── __init__.py
    │   ├── DroidbotTest.py
    │   ├── DroidutanTest.py
    │   └── Garfield.py
├── data_inference
    ├── extraction
    │   ├── __init__.py
    │   ├── StringKernelSVM.py
    │   └── featureExtraction.py
    ├── visualization
    │   ├── __init__.py
    │   └── visualizeData.py
    ├── learning
    │   ├── __init__.py
    │   └── HMM.py
    └── __init__.py
├── __init__.py
├── .gitignore
├── docs
    ├── dbrecovery2.sql
    ├── google_plugin.txt
    ├── dbrecovery.sql
    ├── custom_hooks_introspy.txt
    └── hooks.json
├── tools
    ├── extractStaticFeatures.py
    ├── staticResults.py
    ├── visualizeFeatureVectors.py
    ├── downloadAPKPlayStore.py
    ├── staticExperimentI.py
    └── runExperimentII.py
└── README.md


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-hacker


--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["config"]
2 | 


--------------------------------------------------------------------------------
/shared/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["App", "constants"]
2 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ "db", "data", "graphics", "misc" ]
2 | 


--------------------------------------------------------------------------------
/data_generation/collection/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["playStoreCrawler"]
2 | 


--------------------------------------------------------------------------------
/data_inference/extraction/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["featureExtraction"]
2 | 


--------------------------------------------------------------------------------
/data_inference/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["visualizeData"]
2 | 


--------------------------------------------------------------------------------
/data_inference/learning/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["HMM", "ScikitLearners"]
2 | 


--------------------------------------------------------------------------------
/data_generation/reconstruction/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["Trace", "Numerical"]
2 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["conf", "utils", "data_generation", "data_inference", "shared"]
2 | 


--------------------------------------------------------------------------------
/data_generation/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ "collection", "stimulation", "reconstruction" ]
2 | 


--------------------------------------------------------------------------------
/data_generation/stimulation/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["DroidbotTest", "DroidutanTest", "Garfield"]
2 | 


--------------------------------------------------------------------------------
/data_inference/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ "projection", "extraction", "learning", "visualization"]
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # ignore compiled python files
 2 | *.pyc
 3 | 
 4 | # ignore any APK's used for testing
 5 | *.apk
 6 | 
 7 | # ignore log file
 8 | *.log
 9 | 
10 | # ignore .swp files
11 | *.swp
12 | 
13 | # ignore configuration file
14 | conf/*
15 | 
16 | # ignore python files in scripts directory
17 | files/scripts/*
18 | 
19 | # Ignore "command" files
20 | *.command
21 | 
22 | # Ignore text files
23 | *.txt
24 | 
25 | # Ignore back up files
26 | *.py_bak
27 | 
28 | # Ignore database files
29 | *.db
30 | 


--------------------------------------------------------------------------------
/shared/App.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | 
 4 | class App:
 5 |     """ A representation of an Android app containing basic knowledge about the app """
 6 |     def __init__(self, appName, appID, appVersionCode, appOfferType, appRating, appPrice, appSize):
 7 |         self.appName = appName
 8 |         self.appID = appID
 9 |         self.appVersionCode = appVersionCode
10 |         self.appOfferType = appOfferType
11 |         self.appRating = appRating
12 |         self.appPrice = appPrice
13 |         self.appSize = appSize
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/data_generation/reconstruction/Numerical.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | from Aion.utils.graphics import *
 4 | from Aion.utils.data import *
 5 | 
 6 | import numpy
 7 | import os
 8 | 
 9 | def loadNumericalFeatures(featuresFile, delimiter=","):
10 |     """Loads numerical features from a file and returns a list"""
11 |     try:
12 |         if not os.path.exists(featuresFile):
13 |             prettyPrint("Unable to find the features file \"%s\"" % featuresFile, "warning")
14 |             return []
15 |         content = open(featuresFile).read()
16 |         if content.lower().find("[") != -1 and content.lower().find("]") != -1:
17 |             features = eval(content)
18 |         else:
19 |             features = [float(f) for f in content.replace(' ','').split(delimiter)]
20 | 
21 |     except Exception as e:
22 |         prettyPrintError(e)
23 |         return []
24 | 
25 |     return features
26 | 


--------------------------------------------------------------------------------
/conf/config.py:
--------------------------------------------------------------------------------
 1 | # Google Play Store Crawler Configuration
 2 | LANG            	= # can be en_US, fr_FR, ...
 3 | ANDROID_ID      	= # '38c6523ac43ef9e1'
 4 | GOOGLE_LOGIN    	= # 'someone@gmail.com'
 5 | GOOGLE_PASSWORD 	= # 'yourpassword'
 6 | AUTH_TOKEN      	= None
 7 | SEPARATOR       	= '|'
 8 | 
 9 | # Plotly API Key
10 | PLOTLY_API		= # API Key or None
11 | 
12 | # Directories
13 | AION_DIR 		= # 'some directory"
14 | DOWNLOADS_DIR		= AION_DIR + "files/downloads"
15 | 
16 | # Logging and debug messages
17 | VERBOSE			= "ON"
18 | LOGGING			= "ON"
19 | LOG_FILE		= AION_DIR + "/aion.log"
20 | ADMIN_EMAIL		= # someone@somewhere.com
21 | 
22 | # Android SDK paths and constants
23 | ANDROID_SDK 		= # 'some directory'
24 | ANDROID_ADB 		= ANDROID_SDK + "/platform-tools/adb"
25 | 
26 | # Misc paths
27 | GENYMOTION_PLAYER 	= '/opt/genymobile/genymotion/player'
28 | 
29 | # DB-related information
30 | AION_DB			= AION_DIR + "/db/aion2.db"
31 | HASHES_DB		= AION_DIR + "/db/hashes.db"
32 | DB_RECOVERY		= AION_DIR + "/docs/dbrecovery2.sql"
33 | 


--------------------------------------------------------------------------------
/docs/dbrecovery2.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE learner( 
 2 |     lrnID			TEXT PRIMARY KEY, 
 3 |     lrnParams		TEXT
 4 | );
 5 | 
 6 | CREATE TABLE run( 
 7 |     runID       	INTEGER, 
 8 |     runDataset  	TEXT,
 9 |     runStart  		TEXT,
10 |     runEnd			TEXT,
11 |     runIterations	INTEGER,
12 |     PRIMARY KEY (runID, runDataset)
13 | );
14 | 
15 | CREATE TABLE datapoint( 
16 |     dpID        	INTEGER PRIMARY KEY AUTOINCREMENT, 
17 |     dpLearner		TEXT,
18 |     dpIteration		INTEGER,
19 |     dpRun			INTEGER,
20 |     dpTimestamp 	TEXT,
21 |     dpFeature       TEXT,
22 |     dpType          TEXT,
23 |     dpAccuracy		REAL,
24 |     dpRecall		REAL,
25 |     dpSpecificity	REAL,
26 |     dpPrecision		REAL,
27 |     dpFscore		REAL,
28 |     FOREIGN KEY (dpLearner) REFERENCES parent(learnerID),
29 |     FOREIGN KEY (dpRun) REFERENCES parent(runID)
30 | );
31 | 
32 | CREATE TABLE testapp(
33 |     taName		TEXT,
34 |     taRun		INTEGER,
35 |     taIteration		INTEGER,
36 |     taType		TEXT,
37 |     taClassified	TEXT,
38 |     taLog		TEXT,
39 |     PRIMARY KEY (taName, taRun, taIteration),
40 |     FOREIGN KEY (taRun) REFERENCES parent(runID)
41 | );
42 | 
43 | 


--------------------------------------------------------------------------------
/docs/google_plugin.txt:
--------------------------------------------------------------------------------
 1 | ===================================
 2 | | Using Google Play API in Python |
 3 | ===================================
 4 | 
 5 | [*] from googleplay_api.googleplay import GooglePlayAPI
 6 | 
 7 | [*] Instantiate an object:	api = GooglePlayAPI()
 8 | [*] Login "GooglePlay":		api.login(GOOGLE_LOGIN, GOOGLE_PASSWORD, AUTH_TOKEN)
 9 | [*] Browse categories "api.browse()":
10 |     > returns categories e.g. "cats"
11 |     > "cats.category" --> a list of categories as "BrowseLink" objects
12 |     > "for x in cats" --> "x.name", "x.dataUrl" e.g. "browse?c=3&cat=ANDROID"
13 | 
14 | [*] Browse subcategories "api.list([category])":
15 |     > returns subcategories e.g. "sub"
16 |     > "sub.doc" --> a list of subcategories as "DocV2" objects
17 |     > "for x in sub.doc" --> "x.title" e.g. "TopApps", "x.docid" e.g. "apps_topselling_free"
18 | 
19 | [*] Browse apps in subcategory "api.list([category], [subcategory])":
20 |     > "apps = api.list("c", "s").doc" --> one DocV2 element
21 |     > "for x in apps.doc[0].child" --> "x.title" = app name, "x.aggregateRating.starRating", "x.docid" = app id e.g. com.whatsapp, "x.details.versionCode", "x.offer[0].offerType", "x.offer[0].formattedAmount" e.g. price, "x.details.appDetails.installationSize".
22 |     > "api.download(x.docid, vc, ot)"
23 |     
24 | 


--------------------------------------------------------------------------------
/tools/extractStaticFeatures.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | 
 4 | import glob, sys, timeout_decorator
 5 | from Aion.data_inference.extraction.featureExtraction import *
 6 | 
 7 | @timeout_decorator.timeout(120) # Two minutes
 8 | def analyze(a):
 9 |     return extractStaticFeatures(a)
10 | 
11 | 
12 | if len(sys.argv) < 2:
13 |     print "[Usage]: python extractStatic.py [app_dir]"
14 |     exit(0)
15 | 
16 | app_dir = sys.argv[1]
17 | 
18 | alldata = glob.glob("%s/*.apk" % app_dir)
19 | 
20 | if len(alldata) < 1:
21 |     print "[*] Unable to retrieve APK's from the directories \"%s\"" % (app_dir)
22 |     exit(0)
23 | 
24 | print "[*] Successfully retrieved %s APK's from  the directories \"%s\"" % (len(alldata), app_dir)
25 | 
26 | # Commence analysis
27 | counter = 1
28 | for a in alldata:
29 |     try:
30 |         print "Analyzing app #%s out of %s apps" % (counter, len(alldata))
31 |         basic, permissions, apicalls, allfeatures = analyze(a)
32 |         print "[*] Saving all features to \"%s\""  % a.replace(".apk", ".static")
33 |         f = open(a.replace(".apk", ".static"), "w")
34 |         f.write(str(allfeatures))
35 |         f.close()
36 | 
37 |         counter += 1
38 | 
39 |     except Exception as e:
40 |         print "Error encountered: %s" % e
41 |         counter += 1
42 |         continue
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Welcome to Aion
 2 | 
 3 | Aion is a framework (under construction) meant to apply the notion of active learning to the problem of stimulation, analysis, and detection of Android repackaged/piggybacked malware.
 4 | 
 5 | In a nutshell, the framework is developed as a set of tools and utilites categorized according to their objective. For example, [data_inference] contains different machine learning feature extraction, feature selection, and classification modules and methods. Those utilities are used as an API by tools residing under the [tools] directory.
 6 | 
 7 | We are still experimenting with the applicability of such an idea, hence the lack of proper documentation.
 8 | 
 9 | ### Requirements
10 | 
11 | Aion utilizes various tools including:
12 | 
13 | - [androguard](https://github.com/androguard/androguard): for static analysis of APK's and retrieval of components and other metadata
14 | - [Genymotion](https://www.genymotion.com/fun-zone/): we rely on Genymotion to run AVD on which apps are tested and monitored.
15 | - [Droidbot](http://honeynet.github.io/droidbot): used as an option for randomly-interacting with an APK-under-test.
16 | - [droidmon](https://github.com/idanr1986/droidmon): keeps track of the app's runtime behavior in the form of API calls it issues.
17 | - [Droidutan](https://github.com/aleisalem/droidutan): a "homemade", less fancy equivalent to Droidbot.
18 | - [scikit-learn](scikit-learn.org): the main provider of machine learning algorithms.
19 | ### Support or Contact
20 | 
21 | Please feel free to pull/fork the repository. We kindly ask you to cite us, if anything useful comes out of your endeavors.
22 | 
23 | You can get in touch with the contributor of this repository via [salem@in.tum.de].
24 | 
25 | Happy hunting. :)
26 | 


--------------------------------------------------------------------------------
/utils/graphics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Python modules
 4 | import time, sys, os
 5 | # Aion modules
 6 | from Aion.conf import config
 7 | from Aion.utils.data import *
 8 | from Aion.utils.misc import *
 9 | 
10 | # Gray, Red, Green, Yellow, Blue, Magenta, Cyan, White, Crimson
11 | colorIndex = [ "30", "31", "32", "33", "34", "35", "36", "37", "38" ]
12 | 
13 | 
14 | ####################
15 | # Defining Methods #
16 | #################### 
17 | def prettyPrint(msg, mode="info"):
18 |     """ Pretty prints a colored message. "info": Green, "error": Red, "warning": Yellow, "info2": Blue, "output": Magenta, "debug": White """
19 |     if mode == "info":
20 |         color = "32" # Green
21 |     elif mode == "error":
22 |         color = "31" # Red
23 |     elif mode == "warning":
24 |         color = "33" # Yellow
25 |     elif mode == "info2":
26 |         color = "34" # Blue
27 |     elif mode == "output":
28 |         color = "35" # Magenta
29 |     elif mode == "debug":
30 |         color = "37" # White
31 |     else:
32 |         color = "32"
33 |     msg = "[*] %s. %s" % (msg, getTimestamp(includeDate=True))
34 |     #print("\033[1;%sm%s\n%s\033[1;m" % (color, msg, '-'*len(msg))) # Print dashes under the message
35 |     print("\033[1;%sm%s\033[1;m" % (color, msg))
36 |     # Log the message if LOGGING is enabled
37 |     if loggingON() and mode != "info":
38 |         logEvent("%s: %s" % (getTimestamp(includeDate=True), msg))
39 | 
40 | def prettyPrintError(ex):
41 |     """ Pretty prints an error/exception message """
42 |     exc_type, exc_obj, exc_tb = sys.exc_info()
43 |     fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
44 |     msg = "Error \"%s\" encountered in \"%s\" line %s: %s" % (exc_type, fname, exc_tb.tb_lineno, ex)
45 |     prettyPrint(msg, "error")
46 | 
47 | 


--------------------------------------------------------------------------------
/docs/dbrecovery.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE learner( 
 2 |     learnerID		INTEGER PRIMARY KEY AUTOINCREMENT, 
 3 |     learnerName 	TEXT
 4 | );
 5 | 
 6 | CREATE TABLE run( 
 7 |     runID       	INTEGER, 
 8 |     runDataset  	TEXT,
 9 |     runStart  		TEXT,
10 |     runEnd		TEXT,
11 |     runIterations	INTEGER,
12 |     PRIMARY KEY (runID, runDataset)
13 | );
14 | 
15 | CREATE TABLE app( 
16 |     appID       	INTEGER PRIMARY KEY AUTOINCREMENT, 
17 |     appName    		TEXT, 
18 |     appType 		TEXT,
19 |     appRunID  		INTEGER,
20 |     appRuns		INTEGER,
21 |     FOREIGN KEY (appRunID) REFERENCES parent(runID)
22 | );
23 | 
24 | CREATE TABLE datapoint ( 
25 |     dpID        	INTEGER PRIMARY KEY AUTOINCREMENT, 
26 |     dpLearner		INTEGER,
27 |     dpIteration		INTEGER,
28 |     dpRun		INTEGER,
29 |     dpTimestamp 	TEXT,
30 |     dpFeature           TEXT,
31 |     dpType          	TEXT,
32 |     dpAccuracy		REAL,
33 |     dpRecall		REAL,
34 |     dpSpecificity	REAL,
35 |     dpPrecision		REAL,
36 |     dpFscore		REAL,
37 |     FOREIGN KEY (dpLearner) REFERENCES parent(learnerID),
38 |     FOREIGN KEY (dpRun) REFERENCES parent(runID)
39 | );
40 | 
41 | INSERT INTO learner (learnerName) VALUES ("KNN10");
42 | INSERT INTO learner (learnerName) VALUES ("KNN25");
43 | INSERT INTO learner (learnerName) VALUES ("KNN50");
44 | INSERT INTO learner (learnerName) VALUES ("KNN100");
45 | INSERT INTO learner (learnerName) VALUES ("KNN250");
46 | INSERT INTO learner (learnerName) VALUES ("KNN500");
47 | INSERT INTO learner (learnerName) VALUES ("Trees10");
48 | INSERT INTO learner (learnerName) VALUES ("Trees25");
49 | INSERT INTO learner (learnerName) VALUES ("Trees50");
50 | INSERT INTO learner (learnerName) VALUES ("Trees75");
51 | INSERT INTO learner (learnerName) VALUES ("Trees100");
52 | INSERT INTO learner (learnerName) VALUES ("SVM");
53 | INSERT INTO learner (learnerName) VALUES ("Ensemble");
54 | 


--------------------------------------------------------------------------------
/utils/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | from Aion.conf import config
 4 | 
 5 | def getAdminEmail():
 6 |     """Returns the email address of the admin for notifications"""
 7 |     return config.ADMIN_EMAIL
 8 | 
 9 | def getAionDBPath():
10 |     """Returns the path to the Aion DB"""
11 |     return config.AION_DB
12 | 
13 | def getAionDBRecovery():
14 |     """Returns the path to the Aion DB recovery script"""
15 |     return config.DB_RECOVERY
16 | 
17 | def getGoogleCreds():
18 |     """Returns the Google Account credentials"""
19 |     return config.GOOGLE_LOGIN, config.GOOGLE_PASSWORD
20 | 
21 | def getHashesDBPath():
22 |     """Returns the path to the hashes DB for quick lookup of app package names"""
23 |     return config.HASHES_DB
24 | 
25 | def getSDKDir():
26 |     """Returns the Android SDK directory path"""
27 |     return config.ANDROID_SDK
28 | 
29 | def getADBPath():
30 |     """Returns the path to the adb tool"""
31 |     return config.ANDROID_ADB
32 | 
33 | def getGenymotionPlayer():
34 |     """Returns the path to the Genymotion VM player"""
35 |     return config.GENYMOTION_PLAYER
36 | 
37 | def getProjectDir():
38 |     """Returns the absolute path of the project"""
39 |     return config.AION_DIR
40 | 
41 | def loggingON():
42 |     """Returns whether logging is on"""
43 |     on = True if config.LOGGING == "ON" else False
44 |     return on
45 | 
46 | def verboseON():
47 |     """Returns whether verbose debug messages should be displayed"""
48 |     verbose = True if config.VERBOSE == "ON" else False
49 |     return verbose
50 | 
51 | def loadDirs():
52 |     """Loads the directories' paths from the config.py file"""
53 |     return {"Aion_DIR": config.Aion_DIR, "DOWNLOADS_DIR": config.DOWNLOADS_DIR, "ANDROID_SDK": config.ANDROID_SDK}
54 | 
55 | def loadPlayStoreConfig():
56 |     """Loads the necessary configurations for crawling the Play Store"""
57 |     return {"LANG": config.LANG, "ANDROID_ID": config.ANDROID_ID, "GOOGLE_LOGIN": config.GOOGLE_LOGIN, "GOOGLE_PASSWORD": config.GOOGLE_PASSWORD, "AUTH_TOKEN": config.AUTH_TOKEN}
58 | 
59 | def logEvent(msg):
60 |     """Logs a message to the global log file as per config.py"""
61 |     if config.LOGGING == "ON":
62 |         open(config.LOG_FILE, "a").write("%s\n" % msg)
63 | 
64 |     return True
65 | 


--------------------------------------------------------------------------------
/docs/custom_hooks_introspy.txt:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------
 2 | | Classes and methods to hook and their argument types |
 3 | --------------------------------------------------------
 4 | 
 5 | [*] android.app.Activity:
 6 |         > startActivity(Intent, Bundle)
 7 |         > setContentView(View [, ViewGroup.LayoutParams])
 8 |         > setVisible(boolean)
 9 |         > takeKeyEvents(boolean)
10 | 
11 | [*] android.app.AlertDialog:
12 |         > setView(View [,int, int, int, int])
13 | 
14 | [*] android.app.Dialog:
15 |         > setContentView(~)
16 |         > setView(~)
17 |         > show()
18 | 
19 | [*] android.app.DownloadManager:
20 |         > addCompletedDownload(String, String, boolean, String, String, long, boolean)
21 |         > enqueue(DownloadManager.Request)
22 |         > getUriForDownloadedFile(long)
23 |         > openDownloadedFile(long)
24 |         > query(DownloadManager.query)
25 | 
26 | [*] android.app.IntentService:
27 |         > onStartCommand(Intent, int, int)
28 | 
29 | 
30 | [*] android.content.BroadcastReceiver:
31 |         > goAsync()
32 |         > onReceive(Context, Intent)
33 | 
34 | [*] android.content.ContentProvider:
35 |         > delete(Uri, String, String[])
36 |         > insert(Uri, ContentValues)
37 |         > openFile(Uri, String)
38 |         > query(Uri, String[], String, String[], String [, CancellationSignal])
39 |         > update(Uri, ContentValues, String, String[]) 
40 | 
41 | [*] android.content.ContextWrapper:
42 |         > bindService(Intent, ServiceConnection, int)
43 |         > deleteFile(String)
44 |         > moveDatabaseFrom(Context, String)
45 |         > openFileInput(String)
46 |         > openFileOutput(String)
47 |         > openOrCreateDatabase(String, int, SQLiteDatabase.CursorFactory [, DatabaseErrorHandler])
48 |         > registerReceiver(BroadcastReceiver, IntentFilter [, String, Handler])
49 |         > sendBroadcast(Intent [, String])
50 |         > startActivity(Intent [, Bundle])
51 |         > startService(Intent)
52 |         > stopService(Intent)
53 |         > unbindService(ServiceConnection)
54 |         > unregisterReceiver(BroadcastReceiver)
55 | 
56 | [*] android.hardware.Camera:
57 |         > open([int])
58 |         > reconnect()
59 |         > release()
60 |         > startPreview()
61 |         > stopPreview()
62 |         > takePicture(Camera.ShutterCallback, Camera.PictureCallback, Camera.PictureCallback [, Camera.PictureCallback])
63 |         
64 | [*] android.location.Location:
65 |         > getLatitude()
66 |         > getLongitude()
67 | 
68 | 
69 | [*] android.media.AudioRecord:
70 |         > read(short[], int, int)
71 |         > read(ByteBuffer, int)
72 |         > read(byte[], int, int)
73 |         > startRecording()
74 |         > stop()
75 | 
76 | [*] android.media.MediaRecorder:
77 |         > prepare()
78 |         > setCamera(Camera)
79 |         > start()
80 |         > stop()
81 | 
82 | [*] android.net.wifi.WifiManager:
83 |         > disconnect()
84 |         > getScanResults()
85 |         > getWifiState()
86 |         > reconnect()
87 |         > startScan()
88 | 
89 | [*] android.os.Process:
90 |         > killProcess(int)
91 |         
92 | 
93 | [*] android.telephony.SmsManager:
94 |         > sendDataMessage(String, String, short, byte[], PendingIntent, PendingIntent)
95 |         > sendTextMessage(String, String, String, PendingIntent, PendingItent)
96 | 
97 | 


--------------------------------------------------------------------------------
/data_generation/collection/playStoreCrawler.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Python modules
 4 | import sys, os, shutil, glob, io
 5 | 
 6 | # Aion modules
 7 | from Aion.utils.graphics import *
 8 | from Aion.utils.data import *
 9 | from Aion.shared.App import App
10 | 
11 | # Third-party modules
12 | from googleplay_api.googleplay import GooglePlayAPI
13 | 
14 | class PlayStoreCrawler:
15 | 
16 |     def __init__(self):
17 |         try:
18 |             creds = loadPlayStoreConfig()
19 |             self.googleLogin = creds['GOOGLE_LOGIN']
20 |             self.googlePassword = creds['GOOGLE_PASSWORD']
21 |             self.androidID = creds['ANDROID_ID']
22 |             self.authToken = creds['AUTH_TOKEN']
23 |             self.api = GooglePlayAPI(self.androidID) # Login to the Play Store
24 |         except Exception as e:
25 |             prettyPrintError(e)
26 |         
27 |     def login(self):
28 |         """ Logs into the Google account using the received Google credentials """
29 |         try:
30 |             self.api.login(self.googleLogin, self.googlePassword, self.authToken)
31 |         except Exception as e:
32 |            prettyPrintError(e)
33 |            return False
34 | 
35 |         return True 
36 | 
37 |     def getCategories(self):
38 |         """ Returns a list of app categories available on Google Play Store """
39 |         try:
40 |             cats = self.api.browse()
41 |             categories = [c.dataUrl[c.dataUrl.rfind('=')+1:] for c in cats.category]
42 |         except Exception as e:
43 |             prettyPrintError(e)
44 |             return []
45 | 
46 |         return categories
47 | 
48 | 
49 |     def getSubCategories(self, category):
50 |         """ Returns a list of app sub-categories available on Google Play Store """
51 |         try:
52 |             sub = self.api.list(category)
53 |             subcategories = [s.docid for s in sub.doc]
54 |         except Exception as e:
55 |             prettyPrintError(e)
56 |             return []
57 | 
58 |         return subcategories          
59 | 
60 | 
61 |     def getApps(self, category, subcategory):
62 |         """ Returns a list of "App" objects found under the given (sub)category """
63 |         try:
64 |             apps = self.api.list(category, subcategory)
65 |             if len(apps.doc) < 1:
66 |                 prettyPrint("Unable to find any apps under \"%s\" > \"%s\"" % (category, subcategory), "warning")
67 |                 return []
68 |             applications = [App(a.title, a.docid, a.details.appDetails.versionCode, a.offer[0].offerType, a.aggregateRating.starRating, a.offer[0].formattedAmount, a.details.appDetails.installationSize) for a in apps.doc[0].child]
69 | 
70 |         except Exception as e:
71 |             prettyPrintError(e)
72 |             return []
73 | 
74 |         return applications
75 | 
76 |     def downloadApp(self, application):
77 |         """ Downloads an app from the Google play store and moves it to the "downloads" directory """
78 |         try:
79 |             if application.appPrice != "Free":
80 |                 prettyPrint("Warning, downloading a non free application", "warning")
81 |             # Download the app     
82 |             data = self.api.download(application.appID, application.appVersionCode, application.appOfferType)
83 |             io.open("%s.apk" % application.appID, "wb").write(data)
84 |             downloadedApps = glob.glob("./*.apk")
85 |             dstDir = loadDirs()["DOWNLOADS_DIR"]
86 |             for da in downloadedApps:
87 |                 shutil.move(da, dstDir)
88 |             
89 |         except Exception as e:
90 |             prettyPrintError(e)
91 |             return False
92 |  
93 |         return True
94 | 
95 | 


--------------------------------------------------------------------------------
/data_generation/reconstruction/Trace.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | from Aion.utils.data import *
 4 | from Aion.utils.graphics import *
 5 | 
 6 | import glob, os, json
 7 | 
 8 | def loadJSONTraces(jsonFiles, filesType="malware"):
 9 |     """Loads and parses JSON files in a list and returns a list of comma-separated traces"""
10 |     try:
11 |         allTraces = []
12 |         for jsonFile in jsonFiles:
13 |             currentAppName = jsonFile[jsonFile.rfind("/")+1:].replace(".json", "")
14 |             # Check whether file exists
15 |             if not os.path.exists(jsonFile):
16 |                 prettyPrint("JSON file \"%s\" could not be found. Skipping" % jsonFile, "warning")
17 |             # Load JSON representation into python objects
18 |             else:
19 |                 # Convert the JSON trace to a comma-separated string
20 |                 currentTrace = introspyJSONToTrace(jsonFile)
21 |                 # Append trace to list
22 |                 if filesTypes == "malware":
23 |                     allTraces.append((currentTrace, 1, currentAppName))
24 |                 elif fileTypes == "goodware":
25 |                     allTraces.append((currentTrace, 0, currentAppName))
26 |                 else:
27 |                     allTraces.append((currentTrace, -1, currentAppName))
28 | 
29 |     except Exception as e:
30 |         prettyPrintError(e)
31 |     
32 |     return allTraces
33 | 
34 | def introspyJSONToTrace(jsonTraceFile):
35 |     """Converts an Introspy-generated JSON trace to a comma-separated trace of API calls
36 |     :param jsonTraceFile: The file containing the JSON trace
37 |     :type jsonTraceFile: str
38 |     :return: A '|' separated augmentation of Introspy-logged API calls.
39 |     """
40 |     try:
41 |         if not os.path.exists(jsonTraceFile):
42 |             prettyPrint("Could not find the file \"%s\"" % jsonTraceFile, "warning")
43 |             return ""
44 |         # Load the trace from the file
45 |         jsonTrace = json.loads(open(jsonTraceFile).read())
46 |         trace = []
47 |         if not "calls" in jsonTrace.keys():
48 |             prettyPrint("Could not find the key \"calls\" in current trace. Returning empty string", "warning")
49 |             return ""
50 |         # Iterate over the calls and append them to "trace"
51 |         for call in jsonTrace["calls"]:
52 |             callClass = call["clazz"]  # A "typo" in introspy's DBAnalyzer
53 |             callMethod = call["method"][:call["method"].find(" - [WARNING")] if call["method"].find("WARNING") != -1 else call["method"]
54 |             if "arguments" in call["argsAndReturnValue"].keys():
55 |                 #print call["argsAndReturnValue"]["arguments"].values()
56 |                 arguments = call["argsAndReturnValue"]["arguments"]#.values().sort()
57 |                 arguments = _cleanUpArgs(arguments, callClass, callMethod)
58 |                 callArgs = ",".join(arguments) if arguments else ""
59 |             # Append call to trace list
60 |             trace.append(str("%s.%s(%s)" % (callClass, callMethod, callArgs)))
61 | 
62 |     except Exception as e:
63 |         prettyPrintError(e)
64 |         return ""
65 | 
66 |     return "|".join(trace)
67 | 
68 | def _cleanUpArgs(arguments, className="", methodName=""):
69 |     """Parses and cleans up a list of method arguments"""
70 |     try:
71 |         #print arguments
72 |         newArguments = []
73 |         # The default method of extracting arguments
74 |         for argKey in arguments:
75 |             newVal = arguments[argKey]
76 |             newKey = argKey.lower().replace(" ", "_")
77 |             if arguments[argKey].lower().find("intent") != -1:
78 |                 newVal = newVal[newVal.find("com."):newVal.rfind(" ")]
79 | 
80 |             newVal = newVal.replace("[","").replace("]","")
81 |             newArguments.append("%s=\"%s\"" % (newKey, newVal))
82 | 
83 |     except Exception as e:
84 |         prettyPrintError(e)
85 |         return arguments
86 | 
87 |     return newArguments
88 |     
89 | 


--------------------------------------------------------------------------------
/tools/staticResults.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | from Aion.utils.data import *     # Needed for accessing configuration files
 4 | from Aion.utils.graphics import * # Needed for pretty printing
 5 | from Aion.utils.misc import *
 6 | 
 7 | import os, sys, glob, shutil, argparse
 8 | 
 9 | 
10 | 
11 | def defineArguments():
12 |     parser = argparse.ArgumentParser(prog="staticResults.py", description="A tool to average the results from X runs of Aion's static experiment I")
13 |     parser.add_argument("-d", "--resultdir", help="The directory containing results text files", required=True)
14 |     parser.add_argument("-t", "--featuretype", help="The type of the features used in classification", required=True)
15 |     parser.add_argument("-n", "--datasetname", help="The name of the dataset to which the results belong", required=True)
16 |     parser.add_argument("-e", "--experiment", help="Whether the experiment is static or dynamic", choices=["static", "dynamic"], default="static", required=False)
17 |     return parser
18 | 
19 | def main():
20 |     try:
21 |         argumentParser = defineArguments()
22 |         arguments = argumentParser.parse_args()
23 |         prettyPrint("Welcome to the \"Aion\"'s static experiment I printer")
24 | 
25 |         # 1. Retrieve files
26 |         allFiles = glob.glob("%s/*.txt" % arguments.resultdir)
27 |         if len(allFiles) < 1:
28 |             prettyPrint("Unable to retrieve any results files. Exiting", "error")
29 |             return False
30 |  
31 |         prettyPrint("Successfully retrieved %s result files" % len(allFiles))
32 |         # 2. Parse files
33 |         results = {"training": {}, "test": {}}
34 |         for f in allFiles:
35 |             prettyPrint("Processing \"%s\"" % f)
36 |             lines = open(f).read().split('\n')
37 |             mode, classifier, result = "", "", {}
38 |             for line in lines:
39 |                 # 2.a. Get mode i.e. training/test
40 |                 mode = "training" if line.lower().find("training") != -1 else mode
41 |                 mode = "test" if line.lower().find("test") != -1 else mode
42 |                 # 2.b. Get the classifier's name
43 |                 classifier = line[line.rfind(' ')+1:-1] if line.lower().find("results") != -1 else classifier
44 |                 # 2.c. Lastly, get the results
45 |                 result = eval(line) if line.lower().find("f1score") != -1 else result
46 |                 if mode != "" and classifier != "" and len(result) > 0:
47 |                     if classifier not in results[mode].keys():
48 |                         # Add results to directionary
49 |                         results[mode][classifier] = {"accuracy": [result["accuracy"]], "recall": [result["recall"]], "specificity": [result["specificity"]], "precision": [result["precision"]], "f1score": [result["f1score"]]}
50 |                     else:
51 |                         # Append results
52 |                         results[mode][classifier]["accuracy"].append(result["accuracy"])
53 |                         results[mode][classifier]["recall"].append(result["recall"])
54 |                         results[mode][classifier]["specificity"].append(result["specificity"])
55 |                         results[mode][classifier]["precision"].append(result["precision"])
56 |                         results[mode][classifier]["f1score"].append(result["f1score"])
57 |                     mode, classifier, result = "", "", {}
58 | 
59 |         # 3. Average the results
60 |         training, test = results["training"], results["test"]
61 |         resultsFile = open("avg_results_%s_%s_%s.txt" % (arguments.datasetname, arguments.featuretype, arguments.experiment), "w")
62 |         learners = training.keys()
63 |         learners.sort()
64 |         for learner in learners:
65 |             accuracy = float(sum(training[learner]["accuracy"])/float(len(allFiles)))
66 |             recall = float(sum(training[learner]["recall"])/float(len(allFiles)))
67 |             specificity = float(sum(training[learner]["specificity"])/float(len(allFiles)))
68 |             precision = float(sum(training[learner]["precision"])/float(len(allFiles)))
69 |             f1score = float(sum(training[learner]["f1score"])/float(len(allFiles)))
70 |             resultsFile.write("[Training: %s]\n" % learner)
71 |             resultsFile.write("Accuracy: %s, Recall: %s, Specificity: %s, Precision: %s, F1Score: %s\n\n" % (accuracy, recall, specificity, precision, f1score))
72 | 
73 |         learners = test.keys()
74 |         learners.sort()
75 |         for learner in learners:
76 |             accuracy = float(sum(test[learner]["accuracy"])/float(len(allFiles)))
77 |             recall = float(sum(test[learner]["recall"])/float(len(allFiles)))
78 |             specificity = float(sum(test[learner]["specificity"])/float(len(allFiles)))
79 |             precision = float(sum(test[learner]["precision"])/float(len(allFiles)))
80 |             f1score = float(sum(test[learner]["f1score"])/float(len(allFiles)))
81 |             resultsFile.write("[Test: %s]\n" % learner)
82 |             resultsFile.write("Accuracy: %s, Recall: %s, Specificity: %s, Precision: %s, F1Score: %s\n\n" % (accuracy, recall, specificity, precision, f1score))
83 | 
84 |         resultsFile.close()
85 | 
86 |     except Exception as e:
87 |         prettyPrintError(e)
88 |         return False
89 | 
90 |     return True
91 | 
92 | if __name__ == "__main__":
93 |     main()
94 | 


--------------------------------------------------------------------------------
/data_generation/stimulation/DroidbotTest.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Aion imports
  4 | from Aion.utils.data import *
  5 | from Aion.utils.graphics import *
  6 | from Aion.utils.misc import *
  7 | 
  8 | # Third-party software imports
  9 | from androguard.session import Session
 10 | 
 11 | # Python imports
 12 | import os, sys, shutil, subprocess, threading, signal
 13 | from multiprocessing import Process
 14 | 
 15 | class DroidbotAnalysis(Process):
 16 |     """
 17 |     Represents a Droidutan-driven test of an APK
 18 |     """
 19 |     def __init__(self, pID, pName, pVM, pTarget, pSt="", pDuration=60):
 20 |         """
 21 |         Initialize the test
 22 |         :param pID: Used to identify the process
 23 |         :type pID: int
 24 |         :param pName: A unique name given to a proces
 25 |         :type pName: str
 26 |         :param pVM: The Genymotion AVD name to run the test on
 27 |         :type pVM: str
 28 |         :param pTarget: The path to the APK under test
 29 |         :type pTarget: str
 30 |         :param pSt: The snapshot of the AVD in case restoring is needed
 31 |         :type pSt: str
 32 |         :param pDuration: The duration of the Droidutan test in seconds (default: 60s)
 33 |         :type pDuration: int
 34 |         """
 35 |         Process.__init__(self, name=pName) 
 36 |         self.processID = pID
 37 |         self.processName = pName
 38 |         self.processVM = pVM
 39 |         self.processTarget = pTarget
 40 |         self.processSnapshot = pSt
 41 |         self.processDuration = pDuration
 42 | 
 43 |     def run(self):
 44 |         """
 45 |         Runs the Droidutan test against the [processTarget] for [processDuration]
 46 |         """
 47 |         try:
 48 |             # A timer to guarante the process exits 
 49 |             if verboseON():
 50 |                 prettyPrint("Setting timer for %s seconds" % str(float(self.processDuration)*5.0), "debug")
 51 |             t = threading.Timer(float(self.processDuration)*5.0, self.stop)
 52 |             t.start()
 53 |             # Step 1. Analyze APK
 54 |             #APKType = "malware" if self.processTarget.find("malware") != -1 else "goodware"
 55 |             if verboseON():
 56 |                 prettyPrint("Analyzing APK: \"%s\"" % self.processTarget, "debug")
 57 |             s = Session()
 58 |             s.add(self.processTarget, open(self.processTarget).read())
 59 |             if len(s.analyzed_apk.values()) > 0:
 60 |                 apk = s.analyzed_apk.values()[0]
 61 |                 if type(apk) == list:
 62 |                     apk = s.analyzed_apk.values()[0][0]
 63 |             else:
 64 |                 prettyPrint("Could not retrieve an APK to analyze. Skipping", "warning")
 65 |                 return False
 66 | 
 67 |             # Step 2. Get the Ip address assigned to the AVD
 68 |             getAVDIPCmd = ["VBoxManage", "guestproperty", "enumerate", self.processVM]
 69 |             avdIP = ""
 70 |             result = subprocess.Popen(getAVDIPCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0].replace(' ', '')
 71 |             if result.lower().find("error") != -1:
 72 |                 prettyPrint("Unable to retrieve the IP address of the AVD", "error")
 73 |                 print result
 74 |                 return False
 75 |             index = result.find("androvm_ip_management,value:")+len("androvm_ip_management,value:")
 76 |             while result[index] != ',':
 77 |                 avdIP += result[index]
 78 |                 index += 1
 79 |             adbID = "%s:5555" % avdIP
 80 | 
 81 |             # Step 3. Define frequently-used commands
 82 |             droidbotOut = self.processTarget.replace(".apk", "_droidbot")
 83 |             droidbotCmd = ["droidbot", "-d", adbID, "-a", self.processTarget, "-o", droidbotOut, "-timeout", str(self.processDuration), "-random", "-keep_env", "-grant_perm"]
 84 | 
 85 |             # Step 4. Test the APK using Droidbot (Assuming machine is already on)
 86 |             prettyPrint("Testing the APK \"%s\" using Droidbot" % apk.package)
 87 |             # 4.a. Start Droidbot
 88 |             status = subprocess.Popen(droidbotCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0]
 89 | 
 90 |             # 4.b. Check for existence of output directory
 91 |             if not os.path.exists(droidbotOut):
 92 |                 prettyPrint("No output folder found for \"%s\"" % self.processTarget, "warning")
 93 |                 return False
 94 | 
 95 |             # 4.c. Filter the logcat dumped by droidbot
 96 |             logFile = open("%s/logcat_filtered.log" % droidbotOut, "w")
 97 |             catlog = subprocess.Popen(("cat", "%s/logcat.txt" % droidbotOut), stdout=subprocess.PIPE)
 98 |             output = subprocess.check_output(("grep", "-i", "droidmon-apimonitor-%s" % apk.package), stdin=catlog.stdout)
 99 |             logFile.write(output)
100 |             logFile.close()
101 |  
102 |         except subprocess.CalledProcessError as cpe:
103 |             prettyPrint("Unable to find the tag \"Droidmon-apimonitor-%s\" in the log file" % apk.package, "warning")
104 |         except Exception as e:
105 |             prettyPrintError(e)
106 |             return False
107 |         
108 |         return True
109 | 
110 |     def stop(self):
111 |         """
112 |         Stops this analysis process after uninstalling the app under test
113 |         """
114 |         try:
115 |             prettyPrint("Stopping the analysis process \"%s\" on \"%s\". Restoring snapshot \"%s\"" % (self.processName, self.processVM, self.processSnapshot), "warning")
116 |             os.kill(os.getpid(), signal.SIGTERM)
117 |             # Restore snapshot because that is probably not a good sign
118 |             if self.processSnapshot != "":
119 |                 restoreVirtualBoxSnapshot(self.processVM, self.processSnapshot)
120 | 
121 |         except Exception as e:
122 |             prettyPrintError(e)
123 | 
124 |         return True
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/tools/visualizeFeatureVectors.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.data_generation.reconstruction.Numerical import *
  4 | from Aion.data_inference.visualization.visualizeData import *
  5 | from Aion.utils.graphics import *
  6 | from Aion.utils.data import *
  7 | 
  8 | import pickledb
  9 | 
 10 | import glob, sys, time, os, argparse, hashlib
 11 | 
 12 | def defineArguments():
 13 |     parser = argparse.ArgumentParser(prog="visualizeFeatureVectors.py", description="A tool to reduce the dimensionality of data points and visualize them in 2- or 3-D.")
 14 |     parser.add_argument("-p", "--datasetpath", help="The directory containing the feature vectors", required=True)
 15 |     parser.add_argument("-t", "--datasettype", help="The type of the feature vectors to load: indicates the type of experiment and the file extensions", required=True, choices=["static", "dynamic"])
 16 |     parser.add_argument("-a", "--algorithm", help="The dimensionality reduction algorithm to use", required=False, default="tsne", choices=["tsne", "pca"])
 17 |     parser.add_argument("-d", "--dimensionality", help="The target dimensionality to which the feature vectors are projected", required=False, default="2", choices=["2", "3"])
 18 |     parser.add_argument("-s", "--figuresize", help="The size of the Plotly figure", required=False, default="(1024, 1024)")
 19 |     return parser
 20 | 
 21 | def main():
 22 |     try:
 23 |         argumentParser = defineArguments()
 24 |         arguments = argumentParser.parse_args()
 25 |         prettyPrint("Welcome to the \"Aion\"'s experiment I")
 26 | 
 27 |         # Check the existence of the dataset directories
 28 |         if not os.path.exists("%s/malware/" % arguments.datasetpath) or not os.path.exists("%s/goodware/" % arguments.datasetpath):
 29 |             prettyPrint("Could not find malware or goodware directories under \"%s\". Exiting" % arguments.datasetpath, "warning")
 30 |             return False
 31 | 
 32 |         # Retrieve the data
 33 |         fileExtension = "static" if arguments.datasettype == "static" else "num"
 34 |         allFiles = glob.glob("%s/malware/*.%s" % (arguments.datasetpath, fileExtension)) + glob.glob("%s/goodware/*.%s" % (arguments.datasetpath, fileExtension))
 35 |         if len(allFiles) < 1:
 36 |             prettyPrint("Could not retrieve any \".%s\" files from the dataset directory \"%s\". Exiting" % (fileExtension, arguments.datasetpath), "warning")
 37 |             return False
 38 | 
 39 |         prettyPrint("Successfully retrieved %s \".%s\" files from the dataset directory \"%s\"" % (len(allFiles), fileExtension, arguments.datasetpath))
 40 |         # Load the data
 41 |         X, y = [], []
 42 |         appNames = []
 43 |         hashesDB = pickledb.load(getHashesDBPath(), False) # Open the hashes key-value store
 44 |         prettyPrint("Attempting to load feature vectors")
 45 |         for f in allFiles:
 46 |             featureVector = loadNumericalFeatures(f)
 47 |             if len(featureVector) < 1:
 48 |                 continue
 49 |             else:
 50 |                 # Retrieve app name from path
 51 |                 appKey = f[f.rfind('/')+1:].replace(".%s" % fileExtension, "").lower()
 52 |                 appName = hashesDB.get(appKey)
 53 |                 if appName == None:
 54 |                    appKey = appKey + ".apk"
 55 |                    appName = hashesDB.get(hashlib.sha256(appKey).hexdigest())
 56 |                    if appName == None:
 57 |                        appName = f[f.rfind("/")+1:f.rfind(".")]
 58 |                    
 59 |                 if f.lower().find("malware") != -1:
 60 |                     y.append(1)
 61 |                 else:
 62 |                     y.append(0)
 63 |             
 64 |                 X.append(featureVector)
 65 |                 appNames.append(appName)
 66 |                 if verboseON():
 67 |                     prettyPrint("App \"%s\" matched to package name \"%s\"" % (f, appName), "debug")
 68 | 
 69 |         if len(X) < 1:
 70 |             prettyPrint("Could not load any numerical feature vectors. Exiting", "warning")
 71 |             return False
 72 | 
 73 |         prettyPrint("Successfully retrieved and parsed %s numerical feature vectors" % len(X))
 74 |         # Perform visualization
 75 |         if arguments.datasettype == "static":
 76 |             # Retrieve different types of features
 77 |             X_basic = [x[:6] for x in X]   
 78 |             X_perm = [x[6:10] for x in X]
 79 |             X_api = [x[10:] for x in X]
 80 |            
 81 | 
 82 |             # Reduce and visualize features
 83 |             figureTitle = "Combined static features in %sD" % arguments.dimensionality
 84 |             prettyPrint("Visualizing combined static features in %sD" % arguments.dimensionality)
 85 |             reduceAndVisualize(X, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames)
 86 |             figureTitle = "Basic static features in %sD" % arguments.dimensionality
 87 |             prettyPrint("Visualizing basic features in %sD" % arguments.dimensionality)
 88 |             reduceAndVisualize(X_basic, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames)
 89 |             figureTitle = "Permission-based static features in %sD" % arguments.dimensionality
 90 |             prettyPrint("Visualizing permission-based features in %sD" % arguments.dimensionality)
 91 |             reduceAndVisualize(X_perm, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames)
 92 |             figureTitle = "API static features in %sD" % arguments.dimensionality
 93 |             prettyPrint("Visualizing API call features in %sD" % arguments.dimensionality)
 94 |             reduceAndVisualize(X_api, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figuresize), figureTitle, appNames=appNames)
 95 |            
 96 |         else:
 97 |            figureTitle = "Dynamic Introspy features in %sD" % arguments.dimensionality
 98 |            reduceAndVisualize(X, y, int(arguments.dimensionality), arguments.algorithm, eval(arguments.figsize), figureTitle, appNames=appNames) 
 99 |     
100 |     except Exception as e:
101 |         prettyPrintError(e)
102 |         return False
103 | 
104 |     return True
105 | 
106 | if __name__ == "__main__":
107 |     main()
108 | 


--------------------------------------------------------------------------------
/data_generation/stimulation/DroidutanTest.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Aion imports
  4 | from Aion.utils.data import *
  5 | from Aion.utils.graphics import *
  6 | from Aion.utils.misc import *
  7 | 
  8 | # Third-party imports
  9 | from droidutan import Droidutan
 10 | from androguard.session import Session
 11 | 
 12 | # Python imports
 13 | import os, sys, shutil, subprocess, threading, signal
 14 |             #APKType = "malware" if self.threadTarget.find("malware") != -1 else "goodware"
 15 | from multiprocessing import Process
 16 | 
 17 | class DroidutanAnalysis(Process):
 18 |     """
 19 |     Represents a Droidutan-driven test of an APK
 20 |     """
 21 |     def __init__(self, pID, pName, pVM, pTarget, pDuration=60, pLogName=""):
 22 |         """
 23 |         Initialize the test
 24 |         :param pID: Used to identify the process
 25 |         :type pID: int
 26 |         :param pName: A unique name given to a proces
 27 |         :type pName: str
 28 |         :param pVM: The Genymotion AVD name to run the test on
 29 |         :type pVM: str
 30 |         :param pTarget: The path to the APK under test
 31 |         :type pTarget: str
 32 |         :param pDuration: The duration of the Droidutan test in seconds (default: 60s)
 33 |         :type pDuration: int
 34 |         """
 35 |         Process.__init__(self, name=pName) 
 36 |         self.processID = pID
 37 |         self.processName = pName
 38 |         self.processVM = pVM
 39 |         self.processTarget = pTarget
 40 |         self.processDuration = pDuration
 41 |         self.processLogFile = pLogName
 42 |         self.success = True
 43 | 
 44 |     def run(self):
 45 |         """
 46 |         Runs the Droidutan test against the [processTarget] for [processDuration]
 47 |         """
 48 |         try:
 49 |             # A timer to guarante the process exits 
 50 |             if verboseON():
 51 |                 prettyPrint("Setting timer for %s seconds" % str(float(self.processDuration)*5.0), "debug")
 52 |             t = threading.Timer(float(self.processDuration)*5.0, self.stop)
 53 |             t.start()
 54 |             # Step 1. Analyze APK
 55 |             if verboseON():
 56 |                 prettyPrint("Analyzing APK: \"%s\"" % self.processTarget, "debug")
 57 |             apk, dx, vm = Droidutan.analyzeAPK(self.processTarget)
 58 |             if not apk:
 59 |                 prettyPrint("Could not retrieve an APK to analyze. Skipping", "warning")
 60 |                 return False
 61 |             # 1.a. Extract app components
 62 |             appComponents = Droidutan.extractAppComponents(apk)
 63 | 
 64 |             # Step 2. Get the Ip address assigned to the AVD
 65 |             getAVDIPCmd = ["VBoxManage", "guestproperty", "enumerate", self.processVM]
 66 |             avdIP = ""
 67 |             result = subprocess.Popen(getAVDIPCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0].replace(' ', '')
 68 |             if result.lower().find("error") != -1:
 69 |                 prettyPrint("Unable to retrieve the IP address of the AVD", "error")
 70 |                 print result
 71 |                 return False
 72 |             index = result.find("androvm_ip_management,value:")+len("androvm_ip_management,value:")
 73 |             while result[index] != ',':
 74 |                 avdIP += result[index]
 75 |                 index += 1
 76 |             adbID = "%s:5555" % avdIP
 77 | 
 78 |             # Step 3. Define frequently-used commands
 79 |             adbPath = getADBPath()
 80 |             dumpLogcatCmd = [adbPath, "-s", adbID, "logcat", "-d"]
 81 |             clearLogcatCmd = [adbPath, "-s", adbID, "-c"]
 82 | 
 83 |             # Step 4. Test the APK using Droidutan (Assuming machine is already on)
 84 |             prettyPrint("Clearing device log before test")
 85 |             subprocess.Popen(clearLogcatCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
 86 |             prettyPrint("Testing the APK \"%s\" using Droidutan" % appComponents["package_name"])
 87 |             # 4.a. Unleash Droidutan
 88 |             success = Droidutan.testApp(self.processTarget, avdSerialno=avdIP, testDuration=int(self.processDuration), preExtractedComponents=appComponents, allowCrashes=False)
 89 |             if not success:
 90 |                 prettyPrint("An error occurred while testing the APK \"%s\". Skipping" % self.processTarget, "warning")
 91 |                 return False
 92 | 
 93 |             # 5. Dump the system log to file
 94 |             logcatFile = open(self.processTarget.replace(".apk", ".log"), "w")
 95 |             prettyPrint("Dumping logcat")
 96 |             subprocess.Popen(dumpLogcatCmd, stderr=subprocess.STDOUT, stdout=logcatFile).communicate()[0]
 97 |             logcatFile.close()
 98 | 
 99 |             # 6. Filter droidmon entries related to the APK under test
100 |             prettyPrint("Retrieving \"Droidmon-apimonitor-%s\" tags from log" % appComponents["package_name"])
101 |             catlog = subprocess.Popen(("cat", logcatFile.name), stdout=subprocess.PIPE)
102 |             try:
103 |                 output = subprocess.check_output(("grep", "-i", "droidmon-apimonitor-%s" % appComponents["package_name"]), stdin=catlog.stdout)
104 |             except subprocess.CalledProcessError as cpe:
105 |                 prettyPrint("Could not find the tag \"droidmon-apimonitor-%s in the logs" % appComponents["package_name"], "warning")
106 |                 return True
107 |             if self.processLogFile != "":
108 |                 logFile = open(self.processLogFile, "w")
109 |             else:
110 |                 logFile = open("%s_filtered.log" % self.processTarget.replace(".apk", ""), "w")
111 |             logFile.write(output)
112 |             logFile.close()
113 |             os.remove(logcatFile.name)           
114 |  
115 |         except Exception as e:
116 |             prettyPrintError(e)
117 |             self.success = False
118 |             return False
119 | 
120 |         return True
121 | 
122 | 
123 |     def stop(self):
124 |         """
125 |         Stops this analysis process
126 |         """
127 |         try:
128 |             prettyPrint("Stopping the analysis process \"%s\" on \"%s\"" % (self.processName, self.processVM), "warning")
129 |             os.kill(os.getpid(), signal.SIGTERM)
130 | 
131 |         except Exception as e:
132 |             prettyPrintError(e)
133 |             self.success = False
134 |             return False
135 | 
136 |         return True
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/data_inference/extraction/StringKernelSVM.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | ####################################################
  4 | # DISCLAIMER: This code is a slightly-edited copy  #
  5 | # of Tim Shenkao's "stringSVM.py" available on the #
  6 | # github repo "StringKernelSVM"                    #
  7 | # https://github.com/timshenkao/StringKernelSVM    #
  8 | ####################################################
  9 | 
 10 | import numpy as np
 11 | import sys
 12 | from time import time
 13 | 
 14 | def _K(n, s, t, lambda_decay=0.5):
 15 |     """
 16 |     K_n(s,t) in the original article; recursive function
 17 |     :param n: length of subsequence
 18 |     :type n: int
 19 |     :param s: document #1
 20 |     :type s: str
 21 |     :param t: document #2
 22 |     :type t: str
 23 |     :return: float value for similarity between s and t
 24 |     """
 25 |     if min(len(s), len(t)) < n:
 26 |         return 0
 27 |     else:
 28 |         part_sum = 0
 29 |         for j in range(1, len(t)):
 30 |             if t[j] == s[-1]:
 31 |                 #not t[:j-1] as in the article but t[:j] because of Python slicing rules!!!
 32 |                 part_sum += _K1(n - 1, s[:-1], t[:j])
 33 |         result = _K(n, s[:-1], t) + lambda_decay ** 2 * part_sum
 34 |         return result
 35 | 
 36 | def _K1(n, s, t, lambda_decay=0.5):
 37 |     """
 38 |     K'_n(s,t) in the original article; auxiliary intermediate function; recursive function
 39 |     :param n: length of subsequence
 40 |     :type n: int
 41 |     :param s: document #1
 42 |     :type s: str
 43 |     :param t: document #2
 44 |     :type t: str
 45 |     :return: intermediate float value
 46 |     """
 47 |     if n == 0:
 48 |         return 1
 49 |     elif min(len(s), len(t)) < n:
 50 |         return 0
 51 |     else:
 52 |         part_sum = 0
 53 |         for j in range(1, len(t)):
 54 |             if t[j] == s[-1]:
 55 |     #not t[:j-1] as in the article but t[:j] because of Python slicing rules!!!
 56 |                 part_sum += _K1(n - 1, s[:-1], t[:j]) * (lambda_decay ** (len(t) - (j + 1) + 2))
 57 |         result = lambda_decay * _K1(n, s[:-1], t) + part_sum
 58 |         return result
 59 | 
 60 | def _gram_matrix_element(s, t, sdkvalue1, sdkvalue2, subseq_length=3):
 61 |     """
 62 |     Helper function
 63 |     :param s: document #1
 64 |     :type s: str
 65 |     :param t: document #2
 66 |     :type t: str
 67 |     :param sdkvalue1: K(s,s) from the article
 68 |     :type sdkvalue1: float
 69 |     :param sdkvalue2: K(t,t) from the article
 70 |     :type sdkvalue2: float
 71 |     :return: value for the (i, j) element from Gram matrix
 72 |     """
 73 |     if s == t:
 74 |         return 1
 75 |     else:
 76 |         try:
 77 |             return _K(subseq_length, s, t) / \
 78 |                    (sdkvalue1 * sdkvalue2) ** 0.5
 79 |         except ZeroDivisionError:
 80 |             print("Maximal subsequence length is less or equal to documents' minimal length. You should decrease it")
 81 |             sys.exit(2)
 82 | 
 83 | def string_kernel(X1, X2, subseq_length=3, lambda_decay=0.5):
 84 |     """
 85 |     String Kernel computation
 86 |     :param X1: list of documents (m rows, 1 column); each row is a single document (string)
 87 |     :type X1: list
 88 |     :param X2: list of documents (m rows, 1 column); each row is a single document (string)
 89 |     :type X2: list
 90 |     :return: Gram matrix for the given parameters
 91 |     """
 92 |     len_X1 = len(X1)
 93 |     len_X2 = len(X2)
 94 |     # numpy array of Gram matrix
 95 |     gram_matrix = np.zeros((len_X1, len_X2), dtype=np.float32)
 96 |     sim_docs_kernel_value = {}
 97 |     #when lists of documents are identical
 98 |     if X1 == X2:
 99 |     #store K(s,s) values in dictionary to avoid recalculations
100 |         for i in range(len_X1):
101 |             sim_docs_kernel_value[i] = _K(subseq_length, X1[i], X1[i])
102 |     #calculate Gram matrix
103 |         for i in range(len_X1):
104 |             for j in range(i, len_X2):
105 |                 gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[i], sim_docs_kernel_value[j])
106 |     #using symmetry
107 |                 gram_matrix[j, i] = gram_matrix[i, j]
108 |     #when lists of documents are not identical but of the same length
109 |     elif len_X1 == len_X2:
110 |         sim_docs_kernel_value[1] = {}
111 |         sim_docs_kernel_value[2] = {}
112 |     #store K(s,s) values in dictionary to avoid recalculations
113 |         for i in range(len_X1):
114 |             sim_docs_kernel_value[1][i] = _K(subseq_length, X1[i], X1[i])
115 |         for i in range(len_X2):
116 |             sim_docs_kernel_value[2][i] = _K(subseq_length, X2[i], X2[i])
117 |     #calculate Gram matrix
118 |         for i in range(len_X1):
119 |             for j in range(i, len_X2):
120 |                 gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j])
121 |     #using symmetry
122 |                 gram_matrix[j, i] = gram_matrix[i, j]
123 |     #when lists of documents are neither identical nor of the same length
124 |     else:
125 |         sim_docs_kernel_value[1] = {}
126 |         sim_docs_kernel_value[2] = {}
127 |         min_dimens = min(len_X1, len_X2)
128 |     #store K(s,s) values in dictionary to avoid recalculations
129 |         for i in range(len_X1):
130 |             sim_docs_kernel_value[1][i] = _K(subseq_length, X1[i], X1[i])
131 |         for i in range(len_X2):
132 |             sim_docs_kernel_value[2][i] = _K(subseq_length, X2[i], X2[i])
133 |     #calculate Gram matrix for square part of rectangle matrix
134 |         for i in range(min_dimens):
135 |             for j in range(i, min_dimens):
136 |                 gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j])
137 |                 #using symmetry
138 |                 gram_matrix[j, i] = gram_matrix[i, j]
139 | 
140 |     #if more rows than columns
141 |         if len_X1 > len_X2:
142 |             for i in range(min_dimens, len_X1):
143 |                 for j in range(len_X2):
144 |                     gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i], sim_docs_kernel_value[2][j])
145 |         #if more columns than rows
146 |         else:
147 |             for i in range(len_X1):
148 |                 for j in range(min_dimens, len_X2):
149 |                     gram_matrix[i, j] = _gram_matrix_element(X1[i], X2[j], sim_docs_kernel_value[1][i],
150 |                                                                      sim_docs_kernel_value[2][j])
151 |     print sim_docs_kernel_value
152 |     return gram_matrix
153 | 


--------------------------------------------------------------------------------
/data_inference/visualization/visualizeData.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.utils.data import *
  4 | from Aion.utils.graphics import *
  5 | from Aion.utils.misc import *
  6 | 
  7 | import numpy as np
  8 | from sklearn.manifold import TSNE
  9 | from sklearn.decomposition import PCA
 10 | 
 11 | import plotly.plotly as py
 12 | from plotly.offline import plot, iplot
 13 | from plotly.graph_objs import *
 14 | 
 15 | def reduceAndVisualize(X, y, dim=2, reductionAlgorithm="tnse", figSize=(1024,1024), figTitle="Data visualization", appNames=[], saveProjectedData=False):
 16 |     """
 17 |     Generates a scatter plot using "plotly" after projecting the data points into <dim>-dimensionality using tSNE or PCA
 18 |     :param X: The matrix containing the feature vectors
 19 |     :type X: list
 20 |     :param y: The labels of the feature vectors
 21 |     :type y: list
 22 |     :param dim: The target dimensionality to project the feature vectors to (default=2)
 23 |     :type dim: int
 24 |     :param reductionAlgorithm: The algorithm to use for dimensionality reduction
 25 |     :type reductionAlgorithm: str
 26 |     :param figSize: The size of the figure
 27 |     :type figSize: tuple (of ints)
 28 |     :param figTitle: The title of the figure and the name of the resulting HTML file
 29 |     :type figTitle: str
 30 |     :param appNames: The names of apps to be used as tooltips for each data point. Assumed to match one-to-one with the feature vectors in X
 31 |     :type appNames: list of str
 32 |     :param saveProjectedData: Whether to save the projected data in a CSV file
 33 |     :type saveProjectedData: bool
 34 |     :return: A bool depicting the success/failure of the operaiton
 35 |     """
 36 |     try:
 37 |         # Prepare data
 38 |         X, y = np.array(X), np.array(y)
 39 |         # Build model
 40 |         reductionModel = TSNE(n_components=dim) if reductionAlgorithm == "tsne" else None
 41 |         # Apply transformation
 42 |         prettyPrint("Projecting %s feature vectors of dimensionality %s into %s-d" % (X.shape[0], X.shape[1], dim))
 43 |         X_new = reductionModel.fit_transform(X)
 44 |         # Generate a scatter plot
 45 |         prettyPrint("Populating the traces for malware and goodware")
 46 |         x_mal, y_mal, x_good, y_good = [], [], [], []
 47 |         labels_mal, labels_good = [], []
 48 |         if dim == 3:
 49 |             z_mal, z_good = [], []
 50 |         for index in range(len(y)):
 51 |             if y[index] == 1:
 52 |                 x_mal.append(X_new[index][0])
 53 |                 y_mal.append(X_new[index][1])
 54 |                 if dim == 3:
 55 |                     z_mal.append(X_new[index][2])
 56 |                 labels_mal.append(appNames[index])
 57 |             else:
 58 |                 x_good.append(X_new[index][0])
 59 |                 y_good.append(X_new[index][1])
 60 |                 if dim == 3:
 61 |                     z_good.append(X_new[index][2])
 62 |                 labels_good.append(appNames[index])
 63 | 
 64 |         # Create traces for the scatter plot 
 65 |         prettyPrint("Creating a scatter plot")
 66 |         if dim == 2:
 67 |             # The trace for malware
 68 |             trace_malware = Scatter(x=x_mal,
 69 |                y=y_mal,
 70 |                mode='markers',
 71 |                name='Malware',
 72 |                marker=Marker(symbol='dot',
 73 |                              size=6,
 74 |                              color='red',
 75 |                              opacity=0.75,
 76 |                              line=Line(width=2.0)
 77 |                              ),
 78 |                hoverinfo='text',
 79 |                text=labels_mal
 80 |                )
 81 |             # The trace for goodware    
 82 |             trace_goodware = Scatter(x=x_good,
 83 |                 y=y_good,
 84 |                 mode='markers',
 85 |                 name='Goodware',
 86 |                 marker=Marker(symbol='dot',
 87 |                               size=6,
 88 |                               color='blue',
 89 |                               opacity=0.75,
 90 |                               line=Line(width=2.0)
 91 |                               ),
 92 |                 hoverinfo='text',
 93 |                 text=labels_good
 94 |                 )
 95 |         elif dim == 3:
 96 |             # The trace for malware
 97 |             trace_malware = Scatter3d(x=x_mal,
 98 |                 y=y_mal,
 99 |                 z=z_mal,
100 |                 mode='markers',
101 |                 name='Malware',
102 |                 marker=Marker(symbol='dot',
103 |                               size=6,
104 |                               color='red',
105 |                               opacity=0.5,
106 |                               line=Line(width=1.0)
107 |                               ),
108 |                 hoverinfo='text',
109 |                 text=labels_mal
110 |                 )
111 |             # The trace for goodware    
112 |             trace_goodware = Scatter3d(x=x_good,
113 |                 y=y_good,
114 |                 z=z_good,
115 |                 mode='markers',
116 |                 name='Goodware',
117 |                 marker=Marker(symbol='dot',
118 |                               size=6,
119 |                               color='blue',
120 |                               opacity=0.5,
121 |                               line=Line(width=1.0)
122 |                               ),
123 |                 hoverinfo='text',
124 |                 text=labels_good
125 |                 )
126 |         # Define the axis properties
127 |         axis=dict(showbackground=False,
128 |             showline=False, # hide axis line, grid, ticklabels and  title
129 |             zeroline=False,
130 |             showgrid=False,
131 |             showticklabels=False,
132 |             visible=False,
133 |             title=''
134 |             )
135 |         # Define the figure's layout
136 |         layout=Layout(title=figTitle,
137 |             width=figSize[0],
138 |             height=figSize[1],
139 |             font= Font(size=12),
140 |             showlegend=True,
141 |             scene=Scene(
142 |                 xaxis=XAxis(axis),
143 |                 yaxis=YAxis(axis),
144 |                 zaxis=ZAxis(axis)
145 |             ),
146 |             margin=Margin(
147 |                 t=100,
148 |             ),
149 |             hovermode='closest',
150 |             annotations=Annotations([
151 |                 Annotation(
152 |                 showarrow=False,
153 |                 text=figTitle,
154 |                 xref='paper',
155 |                 yref='paper',
156 |                 x=0,
157 |                 y=0.1,
158 |                 xanchor='left',
159 |                 yanchor='bottom',
160 |                 font=Font(
161 |                     size=14
162 |                     )
163 |                 )
164 |                 ]),
165 |             )
166 |         # Generate graph file
167 |         data=Data([trace_malware, trace_goodware])
168 |         fig=Figure(data=data, layout=layout)
169 |         plot(fig, filename=figTitle.lower().replace(' ', '_'))
170 | 
171 | 
172 |     except Exception as e:
173 |         prettyPrintError(e)
174 |         return False
175 | 
176 |     return True
177 | 
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/data_inference/learning/HMM.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.utils.graphics import *
  4 | from Aion.utils.data import *
  5 | from Aion.utils.misc import *
  6 | 
  7 | import ghmm
  8 | from datetime import datetime
  9 | from sklearn.cross_validation import KFold
 10 | from sklearn.metrics import *
 11 | import numpy
 12 | 
 13 | import time, sys
 14 | 
 15 | class HiddenMarkovModel():
 16 |     # A simple structure to represent a hidden Markov model
 17 |     def __init__(self, A, B, Pi, observations):
 18 |         if len(A) == len(Pi):
 19 |             self.states = range(len(A))
 20 |             self.sigma = ghmm.Alphabet(observations) # The "alphabet" comprising action indices
 21 |             self.initA = A
 22 |             self.initB = B
 23 |             self.initPi = Pi
 24 |             self.ghmmModel = ghmm.HMMFromMatrices(self.sigma, ghmm.DiscreteDistribution(self.sigma), self.initA, self.initB, self.initPi)
 25 |             self.isTrained = False
 26 |         else:
 27 |             prettyPrint("Unable to initialize model. Unequal number of states", "error")
 28 |             return
 29 | 
 30 |     def train(self, X):
 31 |         """Uses GHMM's implementation of Baum-Welch to train an HMM"""
 32 |         try:
 33 |             if len(X) < 1:
 34 |                 prettyPrint("Empty training set provided", "warning")
 35 |                 return False
 36 |             # Now use the Baum-Welch algorithm
 37 |             self.ghmmModel.baumWelch(ghmm.SequenceSet(self.ghmmModel.emissionDomain, X))
 38 |             self.isTrained = True
 39 |             if verboseON():
 40 |                 print "Trained model: %s" % self.ghmmModel
 41 | 
 42 |         except Exception as e:
 43 |             prettyPrintError(e)
 44 |             return False
 45 | 
 46 |         return True
 47 | 
 48 | def cross_val_predict(X, y, tracelength, threshold, kfold=10, trainwith="malware"):
 49 |     """Classifies out-of-sample sequences using the trained model and KFold CV"""
 50 |     try:
 51 |         # Retrieve indices
 52 |         outIndices = [] # The ranges of the instances not to be used in training (Assumed trailing)
 53 |         for index in range(len(X)):
 54 |             if trainwith == "malware" and y[index] == 0:
 55 |                 outIndices.append(index)
 56 |             elif trainwith =="goodware" and y[index] == 1:
 57 |                outIndices.append(index)
 58 | 
 59 |         # A matrix to hold the predictions (len(X) x Kfold)
 60 |         P = numpy.zeros((len(X), kfold))
 61 | 
 62 |         Xmal, Xgood, ymal, ygood = [], [], [], []
 63 |         for index in range(len(X)):
 64 |             if y[index] == 1:
 65 |                 Xmal.append(X[index])
 66 |                 ymal.append(y[index])
 67 |             elif y[index] == 0:
 68 |                 Xgood.append(X[index])
 69 |                 ygood.append(y[index]) 
 70 |  
 71 |         allFolds = KFold(len(Xmal), kfold) if trainwith == "malware" else KFold(len(Xgood), kfold)
 72 |         currentFold = 1
 73 |         for trainingIndices, testIndices in allFolds:
 74 |             Xtrain, Xtest = [], []
 75 |             ytrain, ytest = [], []
 76 |             # Populate training traces
 77 |             for index in trainingIndices:
 78 |                 if trainwith == "malware":
 79 |                     Xtrain.append(Xmal[index])
 80 |                     ytrain.append(ymal[index])
 81 |                 else:
 82 |                     Xtrain.append(Xgood[index])
 83 |                     ytrain.append(ygood[index])
 84 |             # Populate test traces
 85 |             for index in testIndices:
 86 |                 if trainwith == "malware":
 87 |                     Xtest.append(Xmal[index])
 88 |                     ytest.append(ymal[index])
 89 |                 else:
 90 |                     Xtest.append(Xgood[index])
 91 |                     ytest.append(ygood[index])
 92 |         
 93 |             #print ytrain, ytest
 94 |             if trainwith == "malware":
 95 |                 Xtest = Xtest + Xgood
 96 |                 ytest = ytest + ygood
 97 |             else:
 98 |                 Xtest = Xtest + Xmal
 99 |                 ytest = ytest + ymal
100 |         
101 |             #print ytrain, ytest
102 |             Pindices = testIndices.tolist() + outIndices # TODO: Use this to populate "P"
103 |             #print Pindices
104 |             
105 |             # Get the observations from the current training and test datasets
106 |             predicted = []
107 |             allObservations = []
108 |             for trace in Xtrain + Xtest:
109 |                 for call in trace:
110 |                     if not call in allObservations:
111 |                         allObservations.append(call)
112 |             if verboseON():
113 |                 prettyPrint("Successfully retrieved %s observations from current traces" % len(allObservations), "debug")
114 |             # Prepare HMM
115 |             Pi = [1.0, 0.0]
116 |             A = [[0.5, 0.5], [0.5, 0.5]]
117 |             B = numpy.random.random((2, len(allObservations))).tolist()
118 | 
119 |             prettyPrint("Building the hidden Markov model")
120 |             hmm = HiddenMarkovModel(A, B, Pi, allObservations)
121 | 
122 |             prettyPrint("Training the model")
123 |             hmm.train(Xtrain)
124 | 
125 |             # Test model
126 |             for index in range(len(Xtest)):
127 |                 # Retrieve and prepare trace
128 |                 currentTrace, currentClass = Xtest[index], ytest[index]
129 |                 currentTrace = currentTrace[:int(tracelength)] if len(currentTrace) > int(tracelength) else currentTrace
130 |                 currentTrace = ghmm.EmissionSequence(hmm.sigma, currentTrace)
131 |                 # Calculate log likelihood 
132 |                 logProbability = hmm.ghmmModel.loglikelihood(currentTrace)
133 |                 if verboseON():
134 |                     prettyPrint("P(O|lambda)=%s" % logProbability, "debug")
135 |                 # Classify instance
136 |                 if trainwith == "malware":
137 |                     currentPredicted = 0 if logProbability <= -int(threshold) else 1
138 |                 else:
139 |                     currentPredicted = 1 if logProbability <= -int(threshold) else 0
140 | 
141 |                 # Append to predicted
142 |                 if verboseON():
143 |                     prettyPrint("%s instance classified as %s" % (["Goodware", "Malware"][ytest[index]], ["Goodware", "Malware"][currentPredicted]), "debug")
144 |                 predicted.append(currentPredicted)
145 | 
146 |             # Populate the prediction matrix
147 |             #print P.shape
148 |             #print Pindices, len(Pindices)
149 |             #print predicted, len(predicted)
150 |             #print currentFold
151 |             for index in range(len(predicted)):
152 |                 #print "P[%s][%s] = %s" % (Pindices[index], currentFold-1, predicted[index])
153 |                 P[Pindices[index]][currentFold-1] = predicted[index]
154 | 
155 |             currentFold += 1 # Increment the fold number
156 | 
157 |         # For each instance, calculate the majority vote of predictons
158 |         predicted = []
159 |         #print P
160 |         for rIndex in range(P.shape[0]):
161 |             if rIndex >= outIndices[0]:
162 |                 if sum(P[rIndex,:]) >= kfold/2:
163 |                     predicted.append(1)
164 |                 else:
165 |                     predicted.append(0)
166 |             else:
167 |                 # Malware instances will only be used once as test instances
168 |                 if sum(P[rIndex,:] > 0):
169 |                     predicted.append(1)
170 |                 else:
171 |                     predicted.append(0)
172 | 
173 |     except Exception as e:
174 |         prettyPrintError(e)
175 |         return [] 
176 | 
177 |     return predicted
178 | 
179 | 


--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.utils.data import *
  4 | 
  5 | import random, string, os, glob, subprocess, time, re
  6 | from datetime import datetime
  7 | import smtplib
  8 | from email.MIMEMultipart import MIMEMultipart
  9 | from email.MIMEText import MIMEText
 10 | 
 11 | def averageList(inputList, roundDigits=2):
 12 |    return round(float(sum(inputList))/float(len(inputList)), roundDigits)
 13 | 
 14 | def checkRoot():
 15 |     if os.getuid() != 0:
 16 |         return False
 17 |     else:
 18 |         return True
 19 | 
 20 | def getRandomNumber(length=8):
 21 |     return ''.join(random.choice(string.digits) for i in range(length))
 22 | 
 23 | def getRandomAlphaNumeric(length=8):
 24 |     return ''.join(random.choice(string.ascii_letters + string.digits) for i in range(length))
 25 | 
 26 | def getRandomString(length=8):
 27 |     return ''.join(random.choice(string.lowercase) for i in range(length))
 28 | 
 29 | def getTimestamp(includeDate=False):
 30 |     if includeDate:
 31 |         return "[%s]"%str(datetime.now())
 32 |     else:
 33 |         return "[%s]"%str(datetime.now()).split(" ")[1]
 34 | 
 35 | def checkAVDState(vmName, vmState="running"):
 36 |     """
 37 |     Checks the current VirtualBox state of an AVD (e.g., running, stopping, ...)
 38 |     :param vmName: The name of the AVD to check
 39 |     :type vmName: str
 40 |     :param vmState: The status to check
 41 |     :type vmState: str
 42 |     :return: A boolean depicting whether the AVD is stuck and an str of its process ID
 43 |     """
 44 |     try:
 45 |         isStuck = False
 46 |         pID = ""
 47 |         vBoxInfoCmd = ["vboxmanage", "showvminfo", vmName]
 48 |         # Check whether the AVD is stuck in "Stopped" status
 49 |         status = subprocess.Popen(vBoxInfoCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0]
 50 |         if status.lower().find(vmState) != -1:
 51 |             isStuck = True
 52 |             # Kill the VirtualBox process
 53 |             # a) Get UUID of stuck AVD
 54 |             uuid = ""
 55 |             for line in status.split('\n'):
 56 |                 if line.find("UUID") != -1:
 57 |                     uuid = line[line.rfind(' ')+1:]
 58 |                     break 
 59 |             # b) Get the PID of the process
 60 |             ps = subprocess.Popen(["ps", "-eaf"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 61 |             ps.wait()
 62 |             out = subprocess.Popen(["grep", "-i", uuid], stdin=ps.stdout, stdout=subprocess.PIPE).communicate()[0]
 63 |             numbers = re.findall("\d+", out)
 64 |             if len(numbers) > 0:
 65 |                 pID = str(numbers[0])
 66 | 
 67 |     except Exception as e:
 68 |         print "[*] Error encountered: %s" % e
 69 |         return False, ""
 70 |  
 71 |     return isStuck, pID
 72 | 
 73 | def restoreVirtualBoxSnapshot(vmName, snapshotName, retrials=25, waitToBoot=30):
 74 |     """
 75 |     Attempts to restore the snapshot of a VirtualBox machine
 76 |     :param vmName: The name of the virtual machine
 77 |     :type vmName: str
 78 |     :param snapshotName: The name of the snapshot to restore
 79 |     :type snapshotName: str
 80 |     :param retrials: In case of failure, how many attempts to restore the snapshot are made
 81 |     :type retrials: int
 82 |     :param waitToBoot:The time (in seconds) to wait for a virtual machine to boot
 83 |     :type waitToBoot: int
 84 |     :return: A boolean depicting the success/failure of the operation
 85 |     """
 86 |     try:
 87 |         # Define frequently-used commands
 88 |         vBoxRestoreCmd = ["vboxmanage", "snapshot", vmName, "restore", snapshotName]
 89 |         vBoxPowerOffCmd = ["vboxmanage", "controlvm", vmName, "poweroff"]
 90 |         genymotionStartCmd = [getGenymotionPlayer(), "--vm-name", vmName]
 91 |         genymotionPowerOffCmd = [getGenymotionPlayer(), "--vm-name", vmName, "--poweroff"]
 92 |         # Check whether the machine is stuck in the "Stopping" phase
 93 |         state, pID = checkAVDState(vmName, "stopping")
 94 |         if state:
 95 |             # Kill process
 96 |             print "[*] KILLING STOPPING \"%s\"" % vmName
 97 |             subprocess.Popen(["kill", pID], stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0]
 98 |         # Power off the genymotion AVD
 99 |         print "[*] POWERING OFF \"%s\"" % vmName
100 |         poweroff = subprocess.Popen(genymotionPowerOffCmd)
101 |         poweroff.wait()
102 |         # Make sure the AVD is dead
103 |         state, pID = checkAVDState(vmName, "running")
104 |         while state:
105 |             print "[*] POWERING OFF \"%s\"" % vmName
106 |             poweroff = subprocess.Popen(genymotionPowerOffCmd)
107 |             poweroff.wait()
108 |             state, pID = checkAVDState(vmName, "running")
109 |         # Attempt to restore the AVD's snapshot
110 |         print "[*] RESTORING SNAPSHOT \"%s\" for \"%s\"" % (snapshotName, vmName)
111 |         restore = subprocess.Popen(vBoxRestoreCmd, stdout=subprocess.PIPE)
112 |         restore.wait()
113 |         counter = 0
114 |         while restore.communicate()[0].lower().find("error") != -1:
115 |             print "[*] RESTORING SNAPSHOT \"%s\" for \"%s\". Trial #%s" % (snapshotName, vmName, counter+1)
116 |             if counter == retrials:
117 |                 return False
118 |             counter += 1
119 |             restore = subprocess.Popen(vBoxRestoreCmd, stdout=subprocess.PIPE)
120 |             restore.wait()
121 |         # Power on the Genymotion AVD again
122 |         print "[*] POWERING ON \"%s\"" % vmName
123 |         poweron = subprocess.Popen(genymotionStartCmd)
124 | 	time.sleep(waitToBoot)
125 |         state, pID = checkAVDState(vmName, "powered off")
126 |         #while state:
127 |         #    print "[*] POWERING ON \"%s\"" % vmName
128 |         #    poweron = subprocess.Popen(genymotionStartCmd)
129 | 	#    time.sleep(waitToBoot)
130 |         #    state, pID = checkAVDState(vmName, "powered off")
131 |  
132 | 
133 |     except Exception as e:
134 |         print e
135 |         return False
136 | 
137 |     return True
138 | 
139 | def sendEmail(srcAddress, dstAddress, msgSubject, msgBody):
140 |     try:
141 |         # Connect to server and login
142 |         server = smtplib.SMTP('smtp.gmail.com', 587)
143 |         server.starttls()
144 |         username, password = getGoogleCreds()
145 |         server.login(username, password)
146 |         # Prepare message
147 |         msg = MIMEMultipart()
148 |         msg['From'] = srcAddress
149 |         msg['To'] = dstAddress
150 |         msg['Subject'] = msgSubject
151 |         msg.attach(MIMEText(msgBody, 'plain'))
152 |         # Bombs away
153 |         server.sendmail(srcAddress, dstAddress, msg.as_string())
154 |         server.quit()
155 |     except Exception as e:
156 |         print e
157 |         return False
158 | 
159 |     return True
160 |     
161 | 
162 | # Copied from the "googleplay_api" helpers.py
163 | def sizeof_fmt(num):
164 |     for x in ['bytes','KB','MB','GB','TB']:
165 |         if num < 1024.0:
166 |             return "%3.1f%s" % (num, x)
167 |         num /= 1024.0
168 | 
169 | def specificity_score(ground_truth, predicted, classes=(1, 0)):
170 |     try:
171 |         if len(ground_truth) != len(predicted):
172 |             return -1
173 |         positive, negative = classes[0], classes[1]
174 |         tp, tn, fp, fn = 0, 0, 0, 0
175 |         for index in range(len(ground_truth)):
176 |             if ground_truth[index] == negative and predicted[index] == negative:
177 |                 tn += 1
178 |             elif ground_truth[index] == negative and predicted[index] == positive:
179 |                 fp += 1
180 |             elif ground_truth[index] == positive and predicted[index] == negative:
181 |                 fn += 1
182 |             else:
183 |                 tp += 1
184 | 
185 |         score = float(tn)/(float(tn)+float(fp))
186 | 
187 |     except Exception as e:
188 |         print e
189 |         return -1
190 | 
191 |     return score
192 | 


--------------------------------------------------------------------------------
/tools/downloadAPKPlayStore.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.data_generation.collection.playStoreCrawler import PlayStoreCrawler # The crawler
  4 | from Aion.utils.graphics import * # Needed for pretty printing
  5 | 
  6 | import os, sys, glob, shutil, argparse, subprocess
  7 | 
  8 | def defineArguments():
  9 |     parser = argparse.ArgumentParser(prog="downloadAPKPlayStore.py", description="Uses \"Aion\"'s Play Store crawler to download APK's of benign applications.")
 10 |     parser.add_argument("-m", "--mode", help="Help", required=True, choices=["download-all", "download-category", "download-subcategory", "update"])
 11 |     parser.add_argument("-n", "--num", help="The number of APK's to download", required=False, default=10)
 12 |     parser.add_argument("-c", "--category", help="The category of the APK's to download", required=False, default="")
 13 |     parser.add_argument("-s", "--subcategory", help="The sub-category of the APK's to download", required=False, default="")
 14 |     parser.add_argument("-f", "--freeapps", help="Whether to exclusively download free apps", required=False, choices=["yes", "no"], default="no")
 15 |     parser.add_argument("-o", "--outdir", help="The directory to save the downloaded APK's", required=False, default=".")
 16 |     parser.add_argument("-r", "--repo", help="The file containing the already downloaded APK's", required=False, default="repo.csv")
 17 |     parser.add_argument("-v", "--verbose", help="Display debug messages", default="no", choices=["yes", "no"])
 18 |     return parser
 19 | 
 20 | def main():
 21 |     try:
 22 |         argumentParser = defineArguments()
 23 |         arguments = argumentParser.parse_args()
 24 |         prettyPrint("Welcome to the droid hunter")
 25 |  
 26 |         # Step 0 - Load the repo of downloaded APK's
 27 |         if not os.path.exists(arguments.repo):
 28 |             prettyPrint("Could not locate the repository of downloaded APK's. Creating a new one", "warning")
 29 |             repoFile = open("repo.csv", "w")
 30 |         else:
 31 |             repoFile = open(arguments.repo, "a+")
 32 |             downloadedApps = repoFile.read().split(",")
 33 |             if arguments.verbose == "yes":
 34 |                 prettyPrint("Successfully retrieved %s apps from the repository \"%s\"" % (len(downloadedApps), arguments.repo), "debug")
 35 | 
 36 |         if arguments.mode == "download-all":
 37 |             # Step 1 - Retrieve all categories
 38 |             crawler = PlayStoreCrawler()
 39 |             if arguments.verbose == "yes":
 40 |                 prettyPrint("Logging into the Play store", "debug")
 41 |             # (1.a) Log into the play store
 42 |             if not crawler.login():
 43 |                 prettyPrint("Unable to login to the Google Play store. Check the credentials in the configuration files", "error")
 44 |                 return False
 45 |             # (1.b) Retrieve app categories
 46 |             appCategories = crawler.getCategories()
 47 | 
 48 |             if arguments.verbose == "yes" and len(appCategories) > 0:
 49 |                 prettyPrint("Successfully retrieved %s categories from the Play Store" % len(appCategories), "debug")
 50 | 
 51 |             # (1.c) Iterate over each category, retrieving its sub-categories, and apps
 52 |             downloadQueue = [] # Store the apps to be downloaded
 53 |             for category in appCategories:
 54 |                 prettyPrint("Processing the category \"%s\"" % category)
 55 |                 subCategories = crawler.getSubCategories(category)
 56 |                 if arguments.verbose == "yes" and len(subCategories) > 0:
 57 |                     prettyPrint("Successfully retrieved %s sub-categories from the Play Store" % len(subCategories), "debug")
 58 |                 # (1.d) Iterate over each sub-category, retrieving the apps, and downloading them
 59 |                 for subCategory in subCategories:
 60 |                     prettyPrint("Processing the sub-category \"%s\"" % subCategory)
 61 |                     apps = crawler.getApps(category, subCategory)
 62 |                     if arguments.verbose == "yes" and len(apps) > 0:
 63 |                         prettyPrint("Successfully retrieved %s apps from the Play Store" % len(apps), "debug")
 64 |                     # (1.e) Add the apps to the downloading queue (if we do NOT already have them)
 65 |                     for app in apps:
 66 |                         # A check about the app being "free" and whether to download it
 67 |                         if app.appPrice.lower() == "free" and arguments.freeapps == "yes":
 68 |                             if not app in downloadedApps and not app in downloadQueue:
 69 |                                 downloadQueue.append(app)
 70 |             
 71 |             # Calculate the sizes of the to-be-downloaded apps
 72 |             totalSize = sum(app.appSize for app in downloadQueue)
 73 |             #for app in downloadQueue:
 74 |             #    print "%s is of size %s, and costs %s" % (app.appName, sizeof_fmt(app.appSize), app.appPrice)
 75 |             # (1.f) Confirm downloading the queued apps
 76 |             prettyPrint("Successfully retrieved %s apps to download with total size of %s" % (len(downloadQueue), sizeof_fmt(totalSize)))
 77 |             confirmDownload = raw_input("Download apps? [Y/n] ")
 78 |             if confirmDownload == "" or confirmDownload.lower() == "y":
 79 |                 # Step 2 - Download the APK's
 80 |                 for app in downloadQueue:
 81 |                     prettyPrint("Downloading \"%s\"." % app.appID)
 82 |                     crawler.downloadApp(app)
 83 | 		    # Step 3 - Copy the downloaded APK to the output directory
 84 |                     for downloadedApp in glob.glob("./*.apk"):
 85 |                         # (3.a) Check whether the out directory exists and create it otherwise
 86 |                         if arguments.outdir.lower().find(":") != -1:
 87 |                             # Consider this to be a remote directory, and use "scp" to copy the app
 88 |                             if arguments.verbose == "yes":
 89 |                                 prettyPrint("Using \"scp\" to copy the APK's to remote site", "debug")
 90 |                                 scpArgs = ["sshpass", SSH_PASSWORD, "scp", downloadedApp, "%s@" % arguments.outdir]
 91 |                                 subprocess.Popen(scpArgs, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0]
 92 |                                 os.remove(app)
 93 |                                 # Add app name to repo
 94 |                                 repoFile.write(app.appID)
 95 |                         else:
 96 |                             if arguments.verbose == "yes":
 97 |                                 prettyPrint("Copying %s to local directory %s" % (app.appID, arguments.outdir), "debug")
 98 |                             if not os.path.exists(arguments.outdir):
 99 |                                 os.mkdir(arguments.outdir)
100 |                             # Move the APK's one-by-one, if it does not exist
101 |                             print "%s/%s.apk" % (arguments.outdir, app.appID)
102 |                             if not os.path.exists("%s/%s.apk" % (arguments.outdir, app.appID)):
103 |                                 shutil.move(downloadedApp, arguments.outdir)
104 |                                 # Add app name to repo
105 |                                 repoFile.write("%s," % app.appID)
106 |                                 repoFile.flush() # Write app names right away
107 |                             else:
108 |                                 prettyPrint("App \"%s\" already exists in the output directory \"%s\". Skipping" % (app.appID, arguments.outdir), "warning")
109 |                                 os.remove("./%s.apk" % app.appID)
110 |                          
111 |             else:
112 |                 prettyPrint("As you wish")
113 |                 return True
114 |             
115 |         repoFile.close()
116 | 
117 |     except Exception as e:
118 |         prettyPrintError(e)
119 |         return False
120 |     
121 |     prettyPrint("Good day to you ^_^")
122 |     return True
123 | 
124 | if __name__ == "__main__":
125 |     main() 
126 | 


--------------------------------------------------------------------------------
/tools/staticExperimentI.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.data_generation.reconstruction.Numerical import *
  4 | from Aion.data_inference.learning import ScikitLearners
  5 | from Aion.utils.db import *
  6 | from Aion.utils.graphics import *
  7 | 
  8 | import glob, sys,argparse
  9 | 
 10 | def defineArguments():
 11 |     parser = argparse.ArgumentParser(prog="staticExperimentI.py", description="A tool to implement the stimulation-detection feedback loop using Garfield as stimulation engine.")
 12 |     parser.add_argument("-x", "--malwaredir", help="The directory containing the malicious APK's to analyze and use as training/validation dataset", required=True)
 13 |     parser.add_argument("-g", "--goodwaredir", help="The directory containing the benign APK's to analyze and use as training/validation dataset", required=True)
 14 |     parser.add_argument("-d", "--datasetname", help="A unique name to give to the dataset used in the experiment (for DB storage purposes)", required=True)     
 15 |     parser.add_argument("-f", "--featurestype", help="The type of static features to load", required=False, default="all", choices=["basic", "permission", "api", "all"])
 16 |     parser.add_argument("-r", "--runnumber", help="The number of the run", required=True)
 17 |     return parser
 18 | 
 19 | def main():
 20 |     try:
 21 |         argumentParser = defineArguments()
 22 |         arguments = argumentParser.parse_args()
 23 |         prettyPrint("Welcome to the \"Aion\"'s static experiment I")
 24 | 
 25 |         # 1. Load APK's and split into training and test datasets
 26 |         prettyPrint("Loading APK's from \"%s\" and \"%s\"" % (arguments.malwaredir, arguments.goodwaredir))
 27 |         # Retrieve malware APK's
 28 |         malFiles = glob.glob("%s/*.static" % arguments.malwaredir)
 29 |         if len(malFiles) < 1:
 30 |             prettyPrint("Could not find any malicious feature files" , "warning")
 31 |         else:
 32 |             prettyPrint("Successfully retrieved %s malicious feature files" % len(malFiles))
 33 |         # Retrieve goodware APK's
 34 |         goodFiles = glob.glob("%s/*.static" % arguments.goodwaredir)
 35 |         if len(goodFiles) < 1:
 36 |             prettyPrint("Could not find any benign feature files", "warning")
 37 |         else:
 38 |             prettyPrint("Successfully retrieved %s benign feature files" % len(goodFiles))
 39 | 
 40 |         # Split the data into training and test datasets
 41 |         malTraining, malTest = [], []
 42 |         goodTraining, goodTest = [], []
 43 |         malTestSize, goodTestSize = len(malFiles) / 3, len(goodFiles) / 3
 44 |         # Start with the malicious APKs
 45 |         while len(malTest) < malTestSize:
 46 |             malTest.append(malFiles.pop(random.randint(0, len(malFiles)-1)))
 47 |         malTraining += malFiles
 48 |         prettyPrint("[MALWARE] Training dataset size is %s, test dataset size is %s" % (len(malTraining), len(malTest)))
 49 |         # Same with benign APKs
 50 |         while len(goodTest) < goodTestSize:
 51 |             goodTest.append(goodFiles.pop(random.randint(0, len(goodFiles)-1)))
 52 |         goodTraining += goodFiles
 53 |         prettyPrint("[GOODWARE] Training dataset size is %s, test dataset size is %s" % (len(goodTraining), len(goodTest)))
 54 | 
 55 | 
 56 |         # 2. Load the feature vectors (Training)
 57 |         Xtr, ytr, Xte, yte = [], [], [], []
 58 |         for x in malTraining + goodTraining:
 59 |             v = loadNumericalFeatures(x)
 60 |             if len(v) > 0:
 61 |                 # Vector
 62 |                 if arguments.featurestype == "all":
 63 |                     Xtr.append(v)
 64 |                 elif arguments.featurestype == "basic":
 65 |                     Xtr.append(v[:6])
 66 |                 elif arguments.featurestype == "permission":
 67 |                     Xtr.append(v[6:10])
 68 |                 else:
 69 |                     Xtr.append(v[10:])
 70 |                 # Label
 71 |                 if x in malTraining:
 72 |                     ytr.append(1)
 73 |                 else:
 74 |                     ytr.append(0)
 75 | 
 76 |         # Load the feature vectors (Test)
 77 |         for x in malTest + goodTest:
 78 |             v = loadNumericalFeatures(x)
 79 |             if len(v) > 0:
 80 |                 # Vector
 81 |                 if arguments.featurestype == "all":
 82 |                     Xte.append(v)
 83 |                 elif arguments.featurestype == "basic":
 84 |                     Xte.append(v[:6])
 85 |                 elif arguments.featurestype == "permission":
 86 |                     Xte.append(v[6:10])
 87 |                 else:
 88 |                     Xte.append(v[10:])
 89 |                 # Label
 90 |                 if x in malTest:
 91 |                     yte.append(1)
 92 |                 else:
 93 |                     yte.append(0)
 94 | 
 95 | 
 96 |         # 3. Perform the classification
 97 |         metricsDict, metricsDict_test = {}, {}
 98 |         resultsFile = open("results_static_%s_%s_run%s.txt" % (arguments.datasetname, arguments.featurestype, arguments.runnumber), "w")
 99 |         prettyPrint("Ensemble mode classification: K-NN, SVM, and Random Forests using %s features" % arguments.featurestype)
100 |         # Classifying using K-nearest neighbors
101 |         K = [10, 25, 50, 100, 250, 500]
102 |         for k in K:
103 |             prettyPrint("Classifying using K-nearest neighbors with K=%s" % k)
104 |             clf, predicted, predicted_test = ScikitLearners.predictAndTestKNN(Xtr, ytr, Xte, yte, K=k)
105 |             metrics = ScikitLearners.calculateMetrics(ytr, predicted)
106 |             metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test)
107 |             metricsDict["KNN%s" % k] = metrics
108 |             metricsDict_test["KNN%s" % k] = metrics_test
109 | 
110 |         # Classifying using Random Forests
111 |         E = [10, 25, 50, 75, 100]
112 |         for e in E:
113 |             prettyPrint("Classifying using Random Forests with %s estimators" % e)
114 |             clf, predicted, predicted_test = ScikitLearners.predictAndTestRandomForest(Xtr, ytr, Xte, yte, estimators=e)
115 |             metrics = ScikitLearners.calculateMetrics(ytr, predicted)
116 |             metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test)
117 |             metricsDict["Trees%s" % e] = metrics
118 |             metricsDict_test["Trees%s" % e] = metrics_test
119 | 
120 |         # Classifying using SVM
121 |         prettyPrint("Classifying using Support vector machines")
122 |         clf, predicted, predicted_test = ScikitLearners.predictAndTestSVM(Xtr, ytr, Xte, yte)
123 |         metrics = ScikitLearners.calculateMetrics(ytr, predicted)
124 |         metrics_test = ScikitLearners.calculateMetrics(yte, predicted_test)
125 |         metricsDict["SVM"] = metrics
126 |         metricsDict_test["SVM"] = metrics_test
127 |                 
128 |         # Now do the majority voting ensemble
129 |         allCs = ["KNN-%s" % x for x in K] + ["FOREST-%s" % e for e in E] + ["SVM"]
130 |         clf, predicted, predicted_test = ScikitLearners.predictAndTestEnsemble(Xtr, ytr, Xte, yte, classifiers=allCs)
131 |         metrics = ScikitLearners.calculateMetrics(predicted, ytr) # Used to decide upon whether to iterate more
132 |         metrics_test = ScikitLearners.calculateMetrics(predicted_test, yte)
133 |         metricsDict["Ensemble"] = metrics
134 |         metricsDict_test["Ensemble"] = metrics_test
135 |       
136 |         # Print and save results
137 |         for m in metricsDict:
138 |             # The average metrics for training dataset
139 |             resultsFile.write("[TRAINING] Results for %s:\n" % m)
140 |             resultsFile.write("%s\n" % str(metricsDict[m]))
141 |             prettyPrint("Metrics using %s" % m, "output")
142 |             prettyPrint("Accuracy: %s" % str(metricsDict[m]["accuracy"]), "output")
143 |             prettyPrint("Recall: %s" % str(metricsDict[m]["recall"]), "output")
144 |             prettyPrint("Specificity: %s" % str(metricsDict[m]["specificity"]), "output")
145 |             prettyPrint("Precision: %s" % str(metricsDict[m]["precision"]), "output")
146 |             prettyPrint("F1 Score: %s" %  str(metricsDict[m]["f1score"]), "output")
147 |         
148 |        
149 |         # Print and save results [FOR THE TEST DATASET]
150 |         for m in metricsDict_test:
151 |             resultsFile.write("[TEST] Results for %s:\n" % m)
152 |             resultsFile.write("%s\n" % str(metricsDict_test[m]))
153 |             # The average metrics for training dataset
154 |             prettyPrint("Metrics using cross validation and %s" % m, "output")
155 |             prettyPrint("Accuracy: %s" % str(metricsDict_test[m]["accuracy"]), "output")
156 |             prettyPrint("Recall: %s" % str(metricsDict_test[m]["recall"]), "output")
157 |             prettyPrint("Specificity: %s" % str(metricsDict_test[m]["specificity"]), "output")
158 |             prettyPrint("Precision: %s" % str(metricsDict_test[m]["precision"]), "output")
159 |             prettyPrint("F1 Score: %s" %  str(metricsDict_test[m]["f1score"]), "output")
160 |         
161 | 
162 |     except Exception as e:
163 |         prettyPrintError(e)
164 |         return False
165 | 
166 | 
167 |     return True
168 | 
169 | if __name__ == "__main__":
170 |     main()
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------
/utils/db.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.utils.data import *
  4 | from Aion.utils.graphics import *
  5 | from Aion.utils.misc import *
  6 | 
  7 | import glob, sqlite3, datetime, os
  8 | from datetime import datetime
  9 | 
 10 | class AionDB():
 11 |     """
 12 |     A class to handle access to the Aion SQLite database
 13 |     """
 14 |     def __init__(self, runID, runDataset):
 15 |         """
 16 |         Initializes an object with metadata about the current run
 17 |         :param runID: A unique ID given to the experiment run
 18 |         :type runID: int
 19 |         :param runDataset: The dataset used during this run
 20 |         :type runDataset: str
 21 |         """
 22 |         try:
 23 |             # Check for the existence of the Aion database
 24 |             dbPath = getAionDBPath()
 25 |             if not os.path.exists(dbPath):
 26 |                 prettyPrint("Unable to locate the database \"%s\". A new database will be created" % dbPath, "warning")
 27 |                 dbScriptPath = getAionDBRecovery()
 28 |                 if not os.path.exists(dbScriptPath):
 29 |                     prettyPrint("Unable to locate the database script file under \"%s\". Exiting" % dbScriptPath, "error")
 30 |                     exit(1)
 31 |                 # Connect to database
 32 |                 self.conn = sqlite3.connect(dbPath) # Creates a DB if it does NOT exist
 33 |                 self.conn.executescript(open(dbScriptPath).read())
 34 |             else:
 35 |                 self.conn = sqlite3.connect(dbPath)
 36 |             # Insert a record about the current run
 37 |             startTime = getTimestamp(includeDate=True)
 38 |             self.insert(table="run", columns=[], values=["%s" % runID, "%s" % runDataset, "%s" % startTime, "N/A", "0"])
 39 | 
 40 |         except Exception as e:
 41 |             prettyPrintError(e)
 42 | 
 43 |     def close(self):
 44 |         """
 45 |         Saves and closes the database
 46 |         :return: A bool depicting the success/failure of the operation
 47 |         """
 48 |         try:
 49 |             if not self.save():
 50 |                 prettyPrint("Unable to save the current state of the database", "warning")
 51 |                 return False
 52 |         except Exception as e:
 53 |             prettyPrintError(e)
 54 |             return False
 55 | 
 56 |         return True
 57 |        
 58 |     def delete(self, table, filters, cOperator="=", lOperator="AND"):
 59 |         """
 60 |         Deletes records from a table
 61 |         :param table: The name of the table to be updated
 62 |         :type table: str
 63 |         :param filters: A list of conditions to use in the WHERE clause of the query
 64 |         :type filters: list of tuples
 65 |         :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.)
 66 |         :type cOperator: str
 67 |         :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR')
 68 |         :type lOperator: str
 69 |         :return: A bool depicting the success/failure of the operation
 70 |         """
 71 |         try:
 72 |             # Build query
 73 |             query = "DELETE FROM %s" % table
 74 |             # Add WHERE clause, if applicable
 75 |             if len(filters) > 0:
 76 |                 query += " WHERE "
 77 |                 temp = ""
 78 |                 for i in range(len(filters)):
 79 |                     query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1])
 80 |                     if i != len(filters) - 1:
 81 |                         query += " %s " % lOperator
 82 |             # Execute query
 83 |             if verboseON():
 84 |                 prettyPrint("Executing query: %s" % query, "debug")
 85 |             self.conn.execute(query)
 86 |     
 87 |         except Exception as e:
 88 |             prettyPrintError(e)
 89 |             return False
 90 | 
 91 |         return True
 92 |     
 93 |     def execute(self, query):
 94 |         """
 95 |         Executes a SQL query passed as a string
 96 |         :param query: The SQL query to execute
 97 |         :type query: str
 98 |         :return: A cursor of the results set or None
 99 |         """
100 |         try:
101 |             if verboseON():
102 |                 prettyPrint("Executing query: %s" % query, "debug")
103 |             results = self.conn.execute(query)
104 |         except Exception as e:
105 |             prettyPrintError(e)
106 |             return None
107 | 
108 |         return results
109 | 
110 |     def insert(self, table, columns, values):
111 |         """
112 |         Inserts a new record into the database
113 |         :param table: The table to insert the new values in
114 |         :type table: str
115 |         :param values: The new values to be inserted
116 |         :type values: list
117 |         :return: A bool depicting the success/failure of the INSERT operation
118 |         """
119 |         try:
120 |             # Prepare values
121 |             values = ["'%s'" % str(v) for v in values]
122 |             # Build query
123 |             if len(columns) > 0:
124 |                 query = "INSERT INTO %s (%s) VALUES (%s)" % (table, ",".join(columns), ",".join(values))
125 |             else:
126 |                 query = "INSERT INTO %s VALUES(%s)" % (table, ",".join(values))
127 |             # Execute query
128 |             if verboseON():
129 |                 prettyPrint("Executing query: %s" % query, "debug")
130 |             self.conn.execute(query)
131 |             
132 |         except Exception as e:
133 |             prettyPrintError(e)
134 |             return False
135 | 
136 |         return True
137 | 
138 |     def save(self):
139 |         """
140 |         Saves the current state of the database by committing the changes
141 |         :return: A bool depicting the success/failure of the operation
142 |         """
143 |         try:
144 |             self.conn.commit()
145 |         except Exception as e:
146 |             prettyPrintError(e)
147 |             return False
148 | 
149 |         return True
150 | 
151 |     def select(self, columns, table, filters, cOperator="=", lOperator="AND"):
152 |         """
153 |         Retrieves records from the the database
154 |         :param columns: The columns to select from the table
155 |         :type columns: list (Default: [] = *)
156 |         :param table: The table whence the data is selected
157 |         :type table: str
158 |         :param filters: A list of conditions to use in the WHERE clause of the query
159 |         :type filters: list of tuples
160 |         :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.)
161 |         :type cOperator: str
162 |         :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR')
163 |         :type lOperator: str
164 |         :return: sqlite3.Cursor of the returned rows
165 |         """
166 |         try:
167 |             # Build query
168 |             query = "SELECT "
169 |             if len(columns) < 1:
170 |                 query += "*"
171 |             else:
172 |                 query += ",".join(columns)
173 |             # FROM [table]
174 |             query += " FROM %s" % table
175 |             # Add WHERE clause, if applicable
176 |             if len(filters) > 0:
177 |                 query += " WHERE "
178 |                 temp = ""
179 |                 for i in range(len(filters)):
180 |                     query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1])
181 |                     if i != len(filters) - 1:
182 |                         query += " %s " % lOperator
183 |             # Execute query
184 |             if verboseON():
185 |                 prettyPrint("Executing query: %s" % query, "debug")
186 |             cursor = self.conn.execute(query)
187 |                         
188 |         except Exception as e:
189 |             prettyPrintError(e)
190 |             return None
191 | 
192 |         return cursor
193 | 
194 |     def update(self, table, values, filters, cOperator="=", lOperator="AND"):
195 |         """
196 |         Updates records in the database
197 |         :param table: The name of the table to be updated
198 |         :type table: str
199 |         :param values: The list of columns to be updated along with their new values
200 |         :type values: list of tuples
201 |         :param filters: A list of conditions to use in the WHERE clause of the query
202 |         :type filters: list of tuples
203 |         :param cOperator: The comparison operator used in the WHERE clause (i.e. '=', '>', '<', 'LIKE', etc.)
204 |         :type cOperator: str
205 |         :param lOperator: The logic operator used to join the filters in the WHERE clause (i.e. 'AND' or 'OR')
206 |         :type lOperator: str
207 |         :return: A bool depicting the success/failure of the operation
208 |         """
209 |         try:
210 |             # Build query
211 |             query = "UPDATE %s SET " % table
212 |             # Add the columns to be updated and their values
213 |             for v in values:
214 |                 query = query + "%s='%s'," % (v[0], v[1])
215 |             query = query[:-1] # Remove the trailing comma
216 |             # Add WHERE clause, if applicable
217 |             if len(filters) > 0:
218 |                 query += " WHERE "
219 |                 temp = ""
220 |                 for i in range(len(filters)):
221 |                     query = query + "%s %s '%s'" % (filters[i][0], cOperator, filters[i][1])
222 |                     if i != len(filters) - 1:
223 |                         query += " %s " % lOperator
224 |             # Execute query
225 |             if verboseON():
226 |                 prettyPrint("Executing query: %s" % query, "debug")
227 |             self.conn.execute(query)
228 |             
229 |         except Exception as e:
230 |             prettyPrintError(e)
231 |             return False
232 | 
233 |         return True
234 | 
235 | 


--------------------------------------------------------------------------------
/shared/constants.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | keyEvents = ["KEYCODE_UNKNOWN", "KEYCODE_MENU", "KEYCODE_SOFT_RIGHT", "KEYCODE_HOME", "KEYCODE_BACK", "KEYCODE_CALL", "KEYCODE_ENDCALL", "KEYCODE_0", "KEYCODE_1", "KEYCODE_2", "KEYCODE_3", "KEYCODE_4", "KEYCODE_5", "KEYCODE_6", "KEYCODE_7", "KEYCODE_8", "KEYCODE_9", "KEYCODE_STAR", "KEYCODE_POUND", "KEYCODE_DPAD_UP", "KEYCODE_DPAD_DOWN", "KEYCODE_DPAD_LEFT", "KEYCODE_DPAD_RIGHT", "KEYCODE_DPAD_CENTER", "KEYCODE_VOLUME_UP", "KEYCODE_VOLUME_DOWN", "KEYCODE_POWER", "KEYCODE_CAMERA", "KEYCODE_CLEAR", "KEYCODE_A", "KEYCODE_B", "KEYCODE_C", "KEYCODE_D", "KEYCODE_E", "KEYCODE_F", "KEYCODE_G", "KEYCODE_H", "KEYCODE_I", "KEYCODE_J", "KEYCODE_K", "KEYCODE_L", "KEYCODE_M", "KEYCODE_N", "KEYCODE_O", "KEYCODE_P", "KEYCODE_Q", "KEYCODE_R", "KEYCODE_S", "KEYCODE_T", "KEYCODE_U", "KEYCODE_V", "KEYCODE_W", "KEYCODE_X", "KEYCODE_Y", "KEYCODE_Z", "KEYCODE_COMMA", "KEYCODE_PERIOD", "KEYCODE_ALT_LEFT", "KEYCODE_ALT_RIGHT", "KEYCODE_SHIFT_LEFT", "KEYCODE_SHIFT_RIGHT", "KEYCODE_TAB", "KEYCODE_SPACE", "KEYCODE_SYM", "KEYCODE_EXPLORER", "KEYCODE_ENVELOPE", "KEYCODE_ENTER", "KEYCODE_DEL", "KEYCODE_GRAVE", "KEYCODE_MINUS", "KEYCODE_EQUALS", "KEYCODE_LEFT_BRACKET", "KEYCODE_RIGHT_BRACKET", "KEYCODE_BACKSLASH", "KEYCODE_SEMICOLON", "KEYCODE_APOSTROPHE", "KEYCODE_SLASH", "KEYCODE_AT", "KEYCODE_NUM", "KEYCODE_HEADSETHOOK", "KEYCODE_FOCUS", "KEYCODE_PLUS", "KEYCODE_MENU", "KEYCODE_NOTIFICATION", "KEYCODE_SEARCH", "TAG_LAST_KEYCODE"]
 4 | 
 5 | keyEventTypes = ["DOWN", "UP", "DOWN_AND_UP"]
 6 | 
 7 | activityActions = ["touch", "type", "press", "drag"]
 8 | 
 9 | sensitiveAPICalls = {"android.content.ContextWrapper": ["bindService", "deleteDatabase", "deleteFile", "deleteSharedPreferences", "getSystemService", "openFileInput", "startService", "stopService", "unbindService", "unregisterReceiver"], "android.accounts.AccountManager": ["clearPassword", "getAccounts", "getPassword", "peekAuthToken", "setAuthToken", "setPassword"], "android.app.Activity": ["startActivity", "setContentView", "setVisible", "takeKeyEvents"], "android.app.DownloadManager": ["addCompletedDownload", "enqueue", "getUriForDownloadedFile", "openDownloadedFile", "query"], "android.app.IntentService": ["onStartCommand"], "android.content.ContentResolver": ["insert", "openFileDescriptor", "query", "update"], "android.content.pm.PackageInstaller": ["uninstall"], "android.database.sqlite.SQLiteDatabase": ["execSQL", "insert", "insertOrThrow", "openDatabase", "query", "rawQuery", "replace", "update"], "android.hardware.Camera": ["open", "reconnect", "release", "startPreview", "stopPreview", "takePicture"], "android.hardware.display.DisplayManager": ["getDisplay", "getDisplays"], "android.location.Location": ["getLatitude", "getLongitude"], "android.media.AudioRecord": ["read", "startRecording", "stop"], "android.media.MediaRecorder": ["prepare", "setCamera", "start", "stop"], "android.net.Network": ["bindSocket", "openConnection"], "android.net.NetworkInfo": ["isAvailable", "isConnected", "isRoaming"], "android.net.wifi.WifiInfo": ["getMacAddress", "getSSID"], "android.net.wifi.WifiManager": ["disconnect", "getScanResults", "getWifiState", "reconnect", "startScan"], "android.os.Process": ["killProcess"], "android.os.PowerManager": ["isInteractive", "isScreenOn", "reboot"], "android.telephony.SmsManager": ["sendDataMessage", "sendTextMessage"], "android.widget.Toast": ["makeText"], "dalvik.system.DexClassLoader": ["loadClass"], "dalvik.system.PathClassLoader": ["loadClass"], "java.lang.class": ["forName", "getClassLoader", "getClasses", "getField", "getFields", "getMethods", "getMethod", "getName"], "java.lang.reflect.Method": ["invoke"], "java.net.HttpCookie": ["getName", "getPath", "getSecure", "getValue", "parse", "setPath", "setSecure", "setValue"], "java.net.URL.openConnection": ["openConnection", "openStream"]}
10 | 
11 | droidmonDefaultClasses = [u'android.telephony.TelephonyManager', u'android.net.wifi.WifiInfo', u'android.os.Debug', u'android.app.SharedPreferencesImpl$EditorImpl', u'android.content.ContentValues', u'java.net.URL', u'org.apache.http.impl.client.AbstractHttpClient', u'android.app.ContextImpl', u'android.app.ActivityThread', u'android.app.Activity', u'dalvik.system.BaseDexClassLoader', u'dalvik.system.DexFile', u'dalvik.system.DexClassLoader', u'dalvik.system.PathClassLoader', u'java.lang.reflect.Method', u'javax.crypto.spec.SecretKeySpec', u'javax.crypto.Cipher', u'javax.crypto.Mac', u'android.app.ApplicationPackageManager', u'android.app.NotificationManager', u'android.util.Base64', u'android.net.ConnectivityManager', u'android.content.BroadcastReceiver', u'android.telephony.SmsManager', u'java.lang.Runtime', u'java.lang.ProcessBuilder', u'java.io.FileOutputStream', u'java.io.FileInputStream', u'android.app.ActivityManager', u'android.os.Process', u'android.content.ContentResolver', u'android.accounts.AccountManager', u'android.location.Location', u'android.media.AudioRecord', u'android.media.MediaRecorder', u'android.os.SystemProperties', u'libcore.io.IoBridge']
12 | 
13 | droidmonDefaultMethods = [u'getDeviceId', u'getSubscriberId', u'getLine1Number', u'getNetworkOperator', u'getNetworkOperatorName', u'getSimOperatorName', u'getMacAddress', u'getSimCountryIso', u'getSimSerialNumber', u'getNetworkCountryIso', u'getDeviceSoftwareVersion', u'isDebuggerConnected', u'putString', u'putBoolean', u'putInt', u'putLong', u'putFloat', u'put', u'openConnection', u'execute', u'registerReceiver', u'handleReceiver', u'startActivity', u'findResource', u'findLibrary', u'loadDex',u'findResources', u'loadClass', u'invoke', u'doFinal', u'setComponentEnabledSetting', u'notify', u'decode', u'listen', u'encode', u'encodeToString', u'setMobileDataEnabled', u'abortBroadcast', u'sendTextMessage', u'sendMultipartTextMessage', u'exec', u'start', u'write', u'read', u'killBackgroundProcesses', u'killProcess', u'query', u'registerContentObserver', u'insert', u'getAccountsByType', u'getAccounts', u'getLatitude', u'getLongitude', u'delete', u'startRecording', u'get', u'getInstalledPackages', u'open']
14 | 
15 | droidmonDefaultAPIs = ['android.accounts.AccountManager.getAccounts', 'android.accounts.AccountManager.getAccountsByType', 'android.app.Activity.startActivity', 'android.app.ActivityManager.killBackgroundProcesses', 'android.app.ActivityThread.handleReceiver', 'android.app.ApplicationPackageManager.getInstalledPackages', 'android.app.ApplicationPackageManager.setComponentEnabledSetting', 'android.app.ContextImpl.registerReceiver', 'android.app.NotificationManager.notify', 'android.app.SharedPreferencesImpl$EditorImpl.putBoolean', 'android.app.SharedPreferencesImpl$EditorImpl.putFloat', 'android.app.SharedPreferencesImpl$EditorImpl.putInt', 'android.app.SharedPreferencesImpl$EditorImpl.putLong', 'android.app.SharedPreferencesImpl$EditorImpl.putString', 'android.content.BroadcastReceiver.abortBroadcast', 'android.content.ContentResolver.delete', 'android.content.ContentResolver.insert', 'android.content.ContentResolver.query', 'android.content.ContentResolver.registerContentObserver', 'android.content.ContentValues.put', 'android.location.Location.getLatitude', 'android.location.Location.getLongitude', 'android.media.AudioRecord.startRecording', 'android.media.MediaRecorder.start', 'android.net.ConnectivityManager.setMobileDataEnabled', 'android.net.wifi.WifiInfo.getMacAddress', 'android.os.Debug.isDebuggerConnected', 'android.os.Process.killProcess', 'android.os.SystemProperties.get', 'android.telephony.SmsManager.sendMultipartTextMessage', 'android.telephony.SmsManager.sendTextMessage', 'android.telephony.TelephonyManager.getDeviceId', 'android.telephony.TelephonyManager.getDeviceSoftwareVersion', 'android.telephony.TelephonyManager.getLine1Number', 'android.telephony.TelephonyManager.getNetworkCountryIso', 'android.telephony.TelephonyManager.getNetworkOperator', 'android.telephony.TelephonyManager.getNetworkOperatorName', 'android.telephony.TelephonyManager.getSimCountryIso', 'android.telephony.TelephonyManager.getSimOperatorName', 'android.telephony.TelephonyManager.getSimSerialNumber', 'android.telephony.TelephonyManager.getSubscriberId', 'android.telephony.TelephonyManager.listen', 'android.util.Base64.decode', 'android.util.Base64.encode', 'android.util.Base64.encodeToString', 'dalvik.system.BaseDexClassLoader.findLibrary', 'dalvik.system.BaseDexClassLoader.findResource', 'dalvik.system.BaseDexClassLoader.findResources', 'dalvik.system.DexFile.loadClass', 'dalvik.system.DexFile.loadDex', 'java.io.FileInputStream.read', 'java.io.FileOutputStream.write', 'java.lang.ProcessBuilder.start', 'java.lang.Runtime.exec', 'java.lang.reflect.Method.invoke', 'java.net.URL.openConnection', 'javax.crypto.Cipher.doFinal', 'javax.crypto.Mac.doFinal', 'libcore.io.IoBridge.open', 'org.apache.http.impl.client.AbstractHttpClient.execute']
16 | 
17 | droidmonDefaultHooks = {u'android.accounts.AccountManager': [u'getAccountsByType', u'getAccounts'], u'android.app.Activity': [u'startActivity'], u'android.app.ActivityManager': [u'killBackgroundProcesses'], u'android.app.ActivityThread': [u'handleReceiver'], u'android.app.ApplicationPackageManager': [u'setComponentEnabledSetting',  u'getInstalledPackages'], u'android.app.ContextImpl': [u'registerReceiver'], u'android.app.NotificationManager': [u'notify'], u'android.app.SharedPreferencesImpl$EditorImpl': [u'putString', u'putBoolean', u'putInt', u'putLong', u'putFloat'], u'android.content.BroadcastReceiver': [u'abortBroadcast'], u'android.content.ContentResolver': [u'query', u'registerContentObserver', u'insert', u'delete'], u'android.content.ContentValues': [u'put'], u'android.location.Location': [u'getLatitude', u'getLongitude'], u'android.media.AudioRecord': [u'startRecording'], u'android.media.MediaRecorder': [u'start'], u'android.net.ConnectivityManager': [u'setMobileDataEnabled'], u'android.net.wifi.WifiInfo': [u'getMacAddress'], u'android.os.Debug': [u'isDebuggerConnected'], u'android.os.Process': [u'killProcess'], u'android.os.SystemProperties': [u'get'], u'android.telephony.SmsManager': [u'sendTextMessage', u'sendMultipartTextMessage'], u'android.telephony.TelephonyManager': [u'getDeviceId', u'getSubscriberId', u'getLine1Number', u'getNetworkOperator', u'getNetworkOperatorName', u'getSimOperatorName', u'getSimCountryIso', u'getSimSerialNumber', u'getNetworkCountryIso', u'getDeviceSoftwareVersion', u'listen'], u'android.util.Base64': [u'decode', u'encode', u'encodeToString'], u'dalvik.system.BaseDexClassLoader': [u'findResource', u'findLibrary', u'findResources'], u'dalvik.system.DexClassLoader': [], u'dalvik.system.DexFile': [u'loadDex', u'loadClass'], u'dalvik.system.PathClassLoader': [], u'java.io.FileInputStream': [u'read'], u'java.io.FileOutputStream': [u'write'], u'java.lang.ProcessBuilder': [u'start'], u'java.lang.Runtime': [u'exec'], u'java.lang.reflect.Method': [u'invoke'], u'java.net.URL': [u'openConnection'], u'javax.crypto.Cipher': [u'doFinal'], u'javax.crypto.Mac': [u'doFinal'], u'javax.crypto.spec.SecretKeySpec': [], u'libcore.io.IoBridge': [u'open'], u'org.apache.http.impl.client.AbstractHttpClient': [u'execute']}
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/hooks.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "hookConfigs": [
  3 |         {
  4 |             "class_name": "android.telephony.TelephonyManager", 
  5 |             "method": "getDeviceId", 
  6 |             "thisObject": false, 
  7 |             "type": "fingerprint"
  8 |         }, 
  9 |         {
 10 |             "class_name": "android.telephony.TelephonyManager", 
 11 |             "method": "getSubscriberId", 
 12 |             "thisObject": false, 
 13 |             "type": "fingerprint"
 14 |         }, 
 15 |         {
 16 |             "class_name": "android.telephony.TelephonyManager", 
 17 |             "method": "getLine1Number", 
 18 |             "thisObject": false, 
 19 |             "type": "fingerprint"
 20 |         }, 
 21 |         {
 22 |             "class_name": "android.telephony.TelephonyManager", 
 23 |             "method": "getNetworkOperator", 
 24 |             "thisObject": false, 
 25 |             "type": "fingerprint"
 26 |         }, 
 27 |         {
 28 |             "class_name": "android.telephony.TelephonyManager", 
 29 |             "method": "getNetworkOperatorName", 
 30 |             "thisObject": false, 
 31 |             "type": "fingerprint"
 32 |         }, 
 33 |         {
 34 |             "class_name": "android.telephony.TelephonyManager", 
 35 |             "method": "getSimOperatorName", 
 36 |             "thisObject": false, 
 37 |             "type": "fingerprint"
 38 |         }, 
 39 |         {
 40 |             "class_name": "android.net.wifi.WifiInfo", 
 41 |             "method": "getMacAddress", 
 42 |             "thisObject": false, 
 43 |             "type": "fingerprint"
 44 |         }, 
 45 |         {
 46 |             "class_name": "android.telephony.TelephonyManager", 
 47 |             "method": "getSimCountryIso", 
 48 |             "thisObject": false, 
 49 |             "type": "fingerprint"
 50 |         }, 
 51 |         {
 52 |             "class_name": "android.telephony.TelephonyManager", 
 53 |             "method": "getSimSerialNumber", 
 54 |             "thisObject": false, 
 55 |             "type": "fingerprint"
 56 |         }, 
 57 |         {
 58 |             "class_name": "android.telephony.TelephonyManager", 
 59 |             "method": "getNetworkCountryIso", 
 60 |             "thisObject": false, 
 61 |             "type": "fingerprint"
 62 |         }, 
 63 |         {
 64 |             "class_name": "android.telephony.TelephonyManager", 
 65 |             "method": "getDeviceSoftwareVersion", 
 66 |             "thisObject": false, 
 67 |             "type": "fingerprint"
 68 |         }, 
 69 |         {
 70 |             "class_name": "android.os.Debug", 
 71 |             "method": "isDebuggerConnected", 
 72 |             "thisObject": false, 
 73 |             "type": "fingerprint"
 74 |         }, 
 75 |         {
 76 |             "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 
 77 |             "method": "putString", 
 78 |             "thisObject": false, 
 79 |             "type": "globals"
 80 |         }, 
 81 |         {
 82 |             "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 
 83 |             "method": "putBoolean", 
 84 |             "thisObject": false, 
 85 |             "type": "globals"
 86 |         }, 
 87 |         {
 88 |             "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 
 89 |             "method": "putInt", 
 90 |             "thisObject": false, 
 91 |             "type": "globals"
 92 |         }, 
 93 |         {
 94 |             "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 
 95 |             "method": "putLong", 
 96 |             "thisObject": false, 
 97 |             "type": "globals"
 98 |         }, 
 99 |         {
100 |             "class_name": "android.app.SharedPreferencesImpl$EditorImpl", 
101 |             "method": "putFloat", 
102 |             "thisObject": false, 
103 |             "type": "globals"
104 |         }, 
105 |         {
106 |             "class_name": "android.content.ContentValues", 
107 |             "method": "put", 
108 |             "thisObject": false, 
109 |             "type": "globals"
110 |         }, 
111 |         {
112 |             "class_name": "java.net.URL", 
113 |             "method": "openConnection", 
114 |             "thisObject": true, 
115 |             "type": "network"
116 |         }, 
117 |         {
118 |             "class_name": "org.apache.http.impl.client.AbstractHttpClient", 
119 |             "method": "execute", 
120 |             "thisObject": false, 
121 |             "type": "network"
122 |         }, 
123 |         {
124 |             "class_name": "android.app.ContextImpl", 
125 |             "method": "registerReceiver", 
126 |             "thisObject": false, 
127 |             "type": "binder"
128 |         }, 
129 |         {
130 |             "class_name": "android.app.ActivityThread", 
131 |             "method": "handleReceiver", 
132 |             "thisObject": false, 
133 |             "type": "binder"
134 |         }, 
135 |         {
136 |             "class_name": "android.app.Activity", 
137 |             "method": "startActivity", 
138 |             "thisObject": false, 
139 |             "type": "binder"
140 |         }, 
141 |         {
142 |             "class_name": "dalvik.system.BaseDexClassLoader", 
143 |             "method": "findResource", 
144 |             "thisObject": false, 
145 |             "type": "dex"
146 |         }, 
147 |         {
148 |             "class_name": "dalvik.system.BaseDexClassLoader", 
149 |             "method": "findLibrary", 
150 |             "thisObject": false, 
151 |             "type": "dex"
152 |         }, 
153 |         {
154 |             "class_name": "dalvik.system.DexFile", 
155 |             "method": "loadDex", 
156 |             "thisObject": false, 
157 |             "type": "dex"
158 |         }, 
159 |         {
160 |             "class_name": "dalvik.system.DexClassLoader", 
161 |             "method": null, 
162 |             "thisObject": false, 
163 |             "type": "dex"
164 |         }, 
165 |         {
166 |             "class_name": "dalvik.system.BaseDexClassLoader", 
167 |             "method": "findResources", 
168 |             "thisObject": false, 
169 |             "type": "dex"
170 |         }, 
171 |         {
172 |             "class_name": "dalvik.system.DexFile", 
173 |             "method": "loadClass", 
174 |             "thisObject": false, 
175 |             "type": "dex"
176 |         }, 
177 |         {
178 |             "class_name": "dalvik.system.DexFile", 
179 |             "method": null, 
180 |             "thisObject": false, 
181 |             "type": "dex"
182 |         }, 
183 |         {
184 |             "class_name": "dalvik.system.PathClassLoader", 
185 |             "method": null, 
186 |             "thisObject": false, 
187 |             "type": "dex"
188 |         }, 
189 |         {
190 |             "class_name": "java.lang.reflect.Method", 
191 |             "method": "invoke", 
192 |             "thisObject": false, 
193 |             "type": "reflection"
194 |         }, 
195 |         {
196 |             "class_name": "javax.crypto.spec.SecretKeySpec", 
197 |             "method": null, 
198 |             "thisObject": false, 
199 |             "type": "crypto"
200 |         }, 
201 |         {
202 |             "class_name": "javax.crypto.Cipher", 
203 |             "method": "doFinal", 
204 |             "thisObject": true, 
205 |             "type": "crypto"
206 |         }, 
207 |         {
208 |             "class_name": "javax.crypto.Mac", 
209 |             "method": "doFinal", 
210 |             "thisObject": false, 
211 |             "type": "crypto"
212 |         }, 
213 |         {
214 |             "class_name": "android.app.ApplicationPackageManager", 
215 |             "method": "setComponentEnabledSetting", 
216 |             "thisObject": false, 
217 |             "type": "generic"
218 |         }, 
219 |         {
220 |             "class_name": "android.app.NotificationManager", 
221 |             "method": "notify", 
222 |             "thisObject": false, 
223 |             "type": "generic"
224 |         }, 
225 |         {
226 |             "class_name": "android.util.Base64", 
227 |             "method": "decode", 
228 |             "thisObject": false, 
229 |             "type": "generic"
230 |         }, 
231 |         {
232 |             "class_name": "android.telephony.TelephonyManager", 
233 |             "method": "listen", 
234 |             "thisObject": false, 
235 |             "type": "generic"
236 |         }, 
237 |         {
238 |             "class_name": "android.util.Base64", 
239 |             "method": "encode", 
240 |             "thisObject": false, 
241 |             "type": "generic"
242 |         }, 
243 |         {
244 |             "class_name": "android.util.Base64", 
245 |             "method": "encodeToString", 
246 |             "thisObject": false, 
247 |             "type": "generic"
248 |         }, 
249 |         {
250 |             "class_name": "android.net.ConnectivityManager", 
251 |             "method": "setMobileDataEnabled", 
252 |             "thisObject": false, 
253 |             "type": "generic"
254 |         }, 
255 |         {
256 |             "class_name": "android.content.BroadcastReceiver", 
257 |             "method": "abortBroadcast", 
258 |             "thisObject": false, 
259 |             "type": "generic"
260 |         }, 
261 |         {
262 |             "class_name": "android.telephony.SmsManager", 
263 |             "method": "sendTextMessage", 
264 |             "thisObject": false, 
265 |             "type": "sms"
266 |         }, 
267 |         {
268 |             "class_name": "android.telephony.SmsManager", 
269 |             "method": "sendMultipartTextMessage", 
270 |             "thisObject": false, 
271 |             "type": "sms"
272 |         }, 
273 |         {
274 |             "class_name": "java.lang.Runtime", 
275 |             "method": "exec", 
276 |             "thisObject": false, 
277 |             "type": "runtime"
278 |         }, 
279 |         {
280 |             "class_name": "java.lang.ProcessBuilder", 
281 |             "method": "start", 
282 |             "thisObject": true, 
283 |             "type": "runtime"
284 |         }, 
285 |         {
286 |             "class_name": "java.io.FileOutputStream", 
287 |             "method": "write", 
288 |             "thisObject": false, 
289 |             "type": "runtime"
290 |         }, 
291 |         {
292 |             "class_name": "java.io.FileInputStream", 
293 |             "method": "read", 
294 |             "thisObject": false, 
295 |             "type": "runtime"
296 |         }, 
297 |         {
298 |             "class_name": "android.app.ActivityManager", 
299 |             "method": "killBackgroundProcesses", 
300 |             "thisObject": false, 
301 |             "type": "runtime"
302 |         }, 
303 |         {
304 |             "class_name": "android.os.Process", 
305 |             "method": "killProcess", 
306 |             "thisObject": false, 
307 |             "type": "runtime"
308 |         }, 
309 |         {
310 |             "class_name": "android.content.ContentResolver", 
311 |             "method": "query", 
312 |             "thisObject": false, 
313 |             "type": "content"
314 |         }, 
315 |         {
316 |             "class_name": "android.content.ContentResolver", 
317 |             "method": "registerContentObserver", 
318 |             "thisObject": false, 
319 |             "type": "content"
320 |         }, 
321 |         {
322 |             "class_name": "android.content.ContentResolver", 
323 |             "method": "insert", 
324 |             "thisObject": false, 
325 |             "type": "content"
326 |         }, 
327 |         {
328 |             "class_name": "android.accounts.AccountManager", 
329 |             "method": "getAccountsByType", 
330 |             "thisObject": false, 
331 |             "type": "content"
332 |         }, 
333 |         {
334 |             "class_name": "android.accounts.AccountManager", 
335 |             "method": "getAccounts", 
336 |             "thisObject": false, 
337 |             "type": "content"
338 |         }, 
339 |         {
340 |             "class_name": "android.location.Location", 
341 |             "method": "getLatitude", 
342 |             "thisObject": false, 
343 |             "type": "content"
344 |         }, 
345 |         {
346 |             "class_name": "android.location.Location", 
347 |             "method": "getLongitude", 
348 |             "thisObject": false, 
349 |             "type": "content"
350 |         }, 
351 |         {
352 |             "class_name": "android.content.ContentResolver", 
353 |             "method": "delete", 
354 |             "thisObject": false, 
355 |             "type": "content"
356 |         }, 
357 |         {
358 |             "class_name": "android.media.AudioRecord", 
359 |             "method": "startRecording", 
360 |             "thisObject": false, 
361 |             "type": "content"
362 |         }, 
363 |         {
364 |             "class_name": "android.media.MediaRecorder", 
365 |             "method": "start", 
366 |             "thisObject": false, 
367 |             "type": "content"
368 |         }, 
369 |         {
370 |             "class_name": "android.os.SystemProperties", 
371 |             "method": "get", 
372 |             "thisObject": false, 
373 |             "type": "content"
374 |         }, 
375 |         {
376 |             "class_name": "android.app.ApplicationPackageManager", 
377 |             "method": "getInstalledPackages", 
378 |             "thisObject": false, 
379 |             "type": "content"
380 |         }, 
381 |         {
382 |             "class_name": "libcore.io.IoBridge", 
383 |             "method": "open", 
384 |             "thisObject": false, 
385 |             "type": "file"
386 |         }
387 |     ], 
388 |     "trace": false
389 | }


--------------------------------------------------------------------------------
/data_inference/extraction/featureExtraction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.shared.constants import *
  4 | from Aion.utils.data import *
  5 | from Aion.utils.graphics import *
  6 | from Aion.conf.config import *
  7 | 
  8 | from androguard.session import Session
  9 | import numpy
 10 | 
 11 | import os, json, threading, re
 12 | 
 13 | def returnEmptyFeatures():
 14 |     """
 15 |     A dummy function used by timers to return empty feature vectors (lists)
 16 |     """
 17 |     prettyPrint("Analysis timeout. Returning empty feature vector", "warning")
 18 |     return []
 19 | 
 20 | def extractStaticFeatures(apkPath):
 21 |     """Extracts static numerical features from APK using Androguard"""
 22 |     try:
 23 |         features = [[], [], [], []] # Tuples are immutable
 24 |         if os.path.exists(apkPath.replace(".apk",".static")):
 25 |             prettyPrint("Found a pre-computed static features file")
 26 |             bFeatures, pFeatures, aFeatures, allFeatures = [], [], [], []
 27 |             try:
 28 |                 possibleExtensions = [".basic", ".perm", ".api", ".static"]
 29 |                 for ext in possibleExtensions:
 30 |                     if os.path.exists(apkPath.replace(".apk", ext)):
 31 |                         content = open(apkPath.replace(".apk", ext)).read()
 32 |                         if len(content) > 0:
 33 |                             features[possibleExtensions.index(ext)] = [float(f) for f in content[1:-1].split(',') if len(f) > 0]
 34 | 
 35 |                 return tuple(features)
 36 | 
 37 |             except Exception as e:
 38 |                 prettyPrintError(e)
 39 |                 prettyPrint("Could not extract features from \".static\" file. Continuing as usual", "warning")
 40 |         if verboseON():
 41 |             prettyPrint("Starting analysis on \"%s\"" % apkPath, "debug")
 42 |         analysisSession = Session()
 43 |         if not os.path.exists(apkPath):
 44 |             prettyPrint("Could not find the APK file \"%s\"" % apkPath, "warning")
 45 |             return [], [], [], []
 46 |         # 1. Analyze APK and retrieve its components
 47 |         #t = threading.Timer(300.0, returnEmptyFeatures) # Guarantees not being stuck on analyzing an APK
 48 |         #t.start()
 49 |         analysisSession.add(apkPath, open(apkPath).read())
 50 |         if type(analysisSession.analyzed_apk.values()) == list:
 51 |             apk = analysisSession.analyzed_apk.values()[0][0]
 52 |         else:
 53 |             apk = analysisSession.analyzed_apk.values()[0]
 54 |         dex = analysisSession.analyzed_dex.values()[0][0]
 55 |         vm = analysisSession.analyzed_dex.values()[0][1]
 56 |         # 2. Add features to the features vector
 57 |         basicFeatures, permissionFeatures, apiCallFeatures, allFeatures = [], [], [], []
 58 |         # 2.a. The APK-related features
 59 |         if verboseON():
 60 |             prettyPrint("Extracting basic features", "debug")
 61 |         minSDKVersion = 0.0 if not apk.get_min_sdk_version() else float(apk.get_min_sdk_version())
 62 |         maxSDKVersion = 0.0 if not apk.get_max_sdk_version() else float(apk.get_max_sdk_version())
 63 |         basicFeatures.append(minSDKVersion)
 64 |         basicFeatures.append(maxSDKVersion)
 65 |         basicFeatures.append(float(len(apk.get_activities()))) # No. of activities
 66 |         basicFeatures.append(float(len(apk.get_services()))) # No. of services
 67 |         basicFeatures.append(float(len(apk.get_receivers()))) # No. of broadcast receivers
 68 |         basicFeatures.append(float(len(apk.get_providers()))) # No. of providers
 69 |         # 2.b. Harvest permission-related features
 70 |         if verboseON():
 71 |             prettyPrint("Extracting permissions-related features", "debug")
 72 |         aospPermissions = float(len(apk.get_requested_aosp_permissions())) # Android permissions requested by the app
 73 |         declaredPermissions = float(len(apk.get_declared_permissions())) # Custom permissions declared by the app
 74 |         dangerousPermissions = float(len([p for p in apk.get_requested_aosp_permissions_details().values() if p["protectionLevel"] == "dangerous"]))
 75 |         totalPermissions = float(len(apk.get_permissions()))
 76 |         permissionFeatures.append(totalPermissions) # No. of permissions
 77 |         if totalPermissions > 0:
 78 |             permissionFeatures.append(aospPermissions/totalPermissions) # AOSP permissions : Total permissions
 79 |             permissionFeatures.append(declaredPermissions/totalPermissions) # Third-party permissions : Total permissions
 80 |             permissionFeatures.append(dangerousPermissions/totalPermissions) # Dangerous permissions : Total permissions
 81 |         else:
 82 |             permissionFeatures.append(0.0)
 83 |             permissionFeatures.append(0.0)
 84 |             permissionFeatures.append(0.0)
 85 |         # 2.c. The DEX-related features (API calls)
 86 |         if verboseON():
 87 |             prettyPrint("Extracting API calls from dex code", "debug")
 88 |         apiCallFeatures.append(float(len(dex.get_classes()))) # Total number of classes
 89 |         apiCallFeatures.append(float(len(dex.get_strings()))) # Total number of strings
 90 |         apiCategories = sensitiveAPICalls.keys()
 91 |         apiCategoryCount = [0.0] * len(apiCategories)
 92 |         for c in dex.classes.get_names():
 93 |             currentClass = dex.get_class(c)
 94 |             if not currentClass:
 95 |                 continue
 96 |             code = currentClass.get_source()
 97 |             if len(code) < 1:
 98 |                 continue
 99 |             for category in apiCategories:
100 |                 if code.find(category) != -1:
101 |                     for call in sensitiveAPICalls[category]:
102 |                         apiCategoryCount[apiCategories.index(category)] += float(len(re.findall(call, code)))
103 | 
104 |         apiCallFeatures += apiCategoryCount
105 | 
106 |     except Exception as e:
107 |         prettyPrintError(e)
108 |         return [], [], [], []
109 |     
110 |     allFeatures = basicFeatures + permissionFeatures + apiCallFeatures
111 | 
112 |     return basicFeatures, permissionFeatures, apiCallFeatures, allFeatures
113 | 
114 | 
115 | def extractIntrospyFeatures(apkJSONPath):
116 |     """Extracts dynamic features from a JSON-based trace generated by Introspy"""
117 |     try:
118 |         features = []
119 |         if not os.path.exists(apkJSONPath):
120 |             prettyPrint("Could not find the JSON file \"%s\"" % apkJSONPath, "warning")
121 |         else:
122 |             apkJSON = json.loads(open(apkJSONPath).read())
123 |             cryptoCalls, sslCalls, hashCalls = 0.0, 0.0, 0.0 # Crypto group
124 |             fsCalls, prefCalls, uriCalls = 0.0, 0.0, 0.0 # Storage group
125 |             ipcCalls = 0.0 # Ipc group
126 |             webviewCalls = 0.0  # Misc group
127 |             accountManagerCalls, activityCalls, downloadManagerCalls = 0.0, 0.0, 0.0
128 |             contentResolverCalls, contextWrapperCalls, packageInstallerCalls = 0.0, 0.0, 0.0
129 |             sqliteCalls, cameraCalls, displayManagerCalls, locationCalls = 0.0, 0.0, 0.0, 0.0
130 |             audioRecordCalls, mediaRecorderCalls, networkCalls, wifiManagerCalls = 0.0, 0.0, 0.0, 0.0
131 |             powerManagerCalls, smsManagerCalls, toastCalls, classCalls = 0.0, 0.0, 0.0, 0.0
132 |             httpCookieCalls, urlCalls = 0.0, 0.0
133 |             for call in apkJSON["calls"]:
134 |                 group, subgroup = call["group"], call["subgroup"]
135 |                 if group == "Crypto":
136 |                     cryptoCalls = cryptoCalls + 1 if subgroup == "General crypto" else cryptoCalls
137 |                     hashCalls = hashCalls + 1 if subgroup == "Hash" else hashCalls
138 |                     sslCalls = sslCalls + 1 if subgroup == "Ssl" else sslCalls
139 |                 elif group == "Storage":
140 |                     fsCalls = storageCalls + 1 if call["group"] == "Fs" else fsCalls
141 |                     prefCalls = prefCalls + 1 if call["group"] == "Pref" else prefCalls
142 |                     uriCalls = uriCalls + 1 if call["group"] == "Uri" else uriCalls
143 |                 elif group == "Ipc":
144 |                     ipcCalls = ipcCalls + 1 if call["group"] == "Ipc" else ipcCalls
145 |                 elif group == "Misc":
146 |                     webviewCalls = webviewCalls + 1 if call["group"] == "Webview" else webviewCalls
147 |                 elif group.lower().find("custom") != -1:
148 |                     # Handle custom hooks
149 |                     # android.accounts.AccountManager
150 |                     if call["clazz"] == "android.accounts.AccountManager":
151 |                         accountManagerCalls += 1
152 |                     # android.app.Activity
153 |                     elif call["clazz"] == "android.app.Activity":
154 |                         activityCalls += 1
155 |                     # android.app.DownloadManager
156 |                     elif call["clazz"] == "android.app.DownloadManager":
157 |                         downloadManagerCalls += 1 
158 |                     # android.content.ContentResolver
159 |                     elif call["clazz"] == "android.content.ContentResolver":
160 |                         contentResolverCalls += 1
161 |                     # android.content.ContextWrapper
162 |                     elif call["clazz"] == "android.content.ContextWrapper":
163 |                         contextWrapperCalls += 1
164 |                     # android.content.pm.PackageInstaller
165 |                     elif call["clazz"] == "android.content.pm.PackageInstaller":
166 |                         packageInstallerCalls += 1
167 |                     # android.database.sqlite.SQLiteDatabase
168 |                     elif call["clazz"] == "android.database.sqlite.SQLiteDatabase":
169 |                         sqliteCalls += 1
170 |                     # android.hardware.Camera
171 |                     elif call["clazz"] == "android.hardware.Camera":
172 |                         cameraCalls += 1
173 |                     # android.hardware.display.DisplayManager
174 |                     elif call["clazz"] ==  "android.hardware.display.DisplayManager":
175 |                         displayManagerCalls += 1
176 |                     # android.location.Location
177 |                     elif call["clazz"] == "android.location.Location":
178 |                         locationCalls += 1
179 |                     # android.media.AudioRecord
180 |                     elif call["clazz"] == "android.media.AudioRecord":
181 |                         audioRecordCalls += 1
182 |                     # android.media.MediaRecorder
183 |                     elif call["clazz"] == "android.media.MediaRecorder":
184 |                         mediaRecorderCalls += 1
185 |                     # android.net.Network
186 |                     elif call["clazz"] == "android.net.Network":
187 |                         networkCalls += 1
188 |                     # android.net.wifi.WifiManager
189 |                     elif call["clazz"] == "android.net.wifi.WifiManager":
190 |                         wifiManagerCalls += 1
191 |                     # android.os.PowerManager
192 |                     elif call["clazz"] == "android.os.PowerManager":
193 |                         powerManagerCalls += 1
194 |                     # android.telephony.SmsManager
195 |                     elif call["clazz"] == "android.telephony.SmsManager":
196 |                         smsManagerCalls += 1
197 |                     # android.widget.Toast
198 |                     elif call["clazz"] == "android.widget.Toast":
199 |                         toastCalls += 1
200 |                     # java.lang.class
201 |                     elif call["clazz"] == "java.lang.class":
202 |                         classCalls += 1
203 |                     # java.net.HttpCookie
204 |                     elif call["clazz"] == "java.net.HttpCookie":
205 |                         httpCookieCalls += 1
206 |                     # java.net.URL
207 |                     elif call["clazz"] == "java.net.URL":
208 |                         urlCalls += 1
209 | 
210 |             features.append(cryptoCalls)
211 |             features.append(sslCalls)
212 |             features.append(hashCalls)
213 |             features.append(fsCalls)
214 |             features.append(prefCalls)
215 |             features.append(uriCalls)
216 |             features.append(ipcCalls)
217 |             features.append(webviewCalls)
218 |             features.append(accountManagerCalls)
219 |             features.append(activityCalls)
220 |             features.append(downloadManagerCalls)
221 |             features.append(contentResolverCalls)
222 |             features.append(contextWrapperCalls)
223 |             features.append(packageInstallerCalls)
224 |             features.append(sqliteCalls)
225 |             features.append(cameraCalls)
226 |             features.append(displayManagerCalls)
227 |             features.append(locationCalls)
228 |             features.append(audioRecordCalls)
229 |             features.append(mediaRecorderCalls)
230 |             features.append(networkCalls)
231 |             features.append(wifiManagerCalls)
232 |             features.append(powerManagerCalls)
233 |             features.append(smsManagerCalls)
234 |             features.append(toastCalls)
235 |             features.append(classCalls)
236 |             features.append(httpCookieCalls)
237 |             features.append(urlCalls)
238 | 
239 |     except Exception as e:
240 |         prettyPrintError(e)
241 |         return []
242 | 
243 |     return features
244 | 
245 | def extractDroidmonFeatures(logPath, mode="classes"):
246 |     """
247 |     Extracts numerical features from Droidmon-generated logs
248 |     :param logPath: The path to the JSON-log generated by Droidmon
249 |     :type logPath: str
250 |     :param mode: The type of features to extract (i.e. classes, methods, both)
251 |     :type mode: str
252 |     :return: Two lists depicting the trace found in the log and counts of items it contains
253 |     """
254 |     try:
255 |         features = []
256 |         # Parse the droidmon log
257 |         if not os.path.exists(logPath):
258 |             prettyPrint("Unable to locate \"%s\"" % logPath, "warning")
259 |             return [], []
260 |         lines = open(logPath).read().split('\n')
261 |         if VERBOSE:
262 |             prettyPrint("Successfully retrieved %s lines from log" % len(lines), "debug")
263 |         droidmonLines = [l for l in lines if l.lower().find("droidmon-apimonitor-") != -1]
264 |         # Generate trace from lines
265 |         trace = []
266 |         for line in droidmonLines:
267 |             tmp = line[line.find("{"):].replace('\n','').replace('\r','')
268 |             # Extract class and method
269 |             c, m = "", ""
270 |             #if tmp[0] == '{' and tmp[-1] == '}':
271 |             #    d = eval(tmp)
272 |             #    c, m = d["class"], d["method"]
273 |             #else:
274 |             pattern = "class\":\""
275 |             index = tmp.find(pattern)
276 |             c = tmp[index+len(pattern):tmp.find("\"", index+len(pattern))]
277 |             pattern = "method\":\""
278 |             index = tmp.find(pattern)
279 |             m = tmp[index+len(pattern):tmp.find("\"", index+len(pattern))]
280 |             # Append to trace
281 |             if mode == "classes":
282 |                 trace.append(c)
283 |             elif mode == "methods":
284 |                 trace.append(m)
285 |             elif mode == "both":
286 |                 trace.append("%s.%s" % (c, m))
287 |         # Go over droidmon classes and count occurrences
288 |         source = []
289 |         if mode == "classes":
290 |             source = droidmonDefaultClasses
291 |         elif mode == "methods":
292 |             source = droidmonDefaultMethods
293 |         elif mode == "both":
294 |             source = droidmonDefaultAPIs
295 | 
296 |         # The loop
297 |         for i in source:
298 |             features.append(trace.count(i))
299 | 
300 |     except Exception as e:
301 |         prettyPrintError(e)
302 |         return [], []
303 | 
304 |     return trace, features
305 | 


--------------------------------------------------------------------------------
/data_generation/stimulation/Garfield.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Python modules
  4 | import sys, os, shutil, glob, io
  5 | 
  6 | # Aion modules
  7 | from Aion.utils.graphics import *
  8 | from Aion.utils.data import *
  9 | from Aion.shared.constants import *
 10 | 
 11 | 
 12 | # Third-party libraries
 13 | from androguard.session import Session
 14 | from androguard.misc import AXMLPrinter
 15 | 
 16 | class Garfield():
 17 |     """ Garfield is a lazy stimulation engine based on fuzzing via Monkey(runner) and Genymotion """
 18 |     
 19 |     def __init__(self, pathToAPK, APKType="goodware"):
 20 |         if not os.path.exists(pathToAPK):
 21 |             prettyPrint("Could not find the APK \"%s\"" % pathToAPK, "warning")
 22 |             return None
 23 |         self.APKPath = pathToAPK
 24 |         self.APK, self.DEX, self.VMAnalysis = None, None, None
 25 |         self.activitiesInfo, self.servicesInfo, self.receiversInfo = {}, {}, {}
 26 |         self.runnerScript = ""
 27 |         self.APKType = APKType
 28 |    
 29 |     def analyzeAPK(self):
 30 |         """ Uses androguard to retrieve metadata about the app e.g. activities, permissions, intent filters, etc. """
 31 |         try:
 32 |             prettyPrint("Analyzing app")
 33 |             logEvent("Analyzing app: \"%s\"" % self.APKPath)
 34 |             # 1. Load the APK using androguard
 35 |             analysisSession = Session()
 36 |             analysisSession.add(self.APKPath, open(self.APKPath).read())
 37 |             # 2. Retrieve handles to APK and its dex code
 38 |             self.APK = analysisSession.analyzed_apk.values()[0]
 39 |             self.DEX = analysisSession.analyzed_dex.values()[0][0]
 40 |             self.VMAnalysis = analysisSession.analyzed_dex.values()[0][1]
 41 |             # 3. Retrieve information for each activity
 42 |             prettyPrint("Analyzing activities")
 43 |             self.activitiesInfo = analyzeActivities(self.APK, self.DEX)
 44 |             # 4. Do the same for services and broadcast receivers
 45 |             prettyPrint("Analyzing services")
 46 |             self.servicesInfo = analyzeServices(self.APK, self.DEX)
 47 |             prettyPrint("Analyzing broadcast receivers")
 48 |             self.receiversInfo = analyzeReceivers(self.APK, self.DEX)
 49 |            
 50 |         except Exception as e:
 51 |             prettyPrintError(e)
 52 |             return False
 53 | 
 54 |         prettyPrint("Success")
 55 |         return True
 56 | 
 57 |     def generateRunnerScript(self, scriptPath="", runningTime=60):
 58 |         """Generates a python script to be run by Monkeyrunner"""
 59 |         try:
 60 |             # Check whether the APK has been analyzed first
 61 |             if not self.APK:
 62 |                 prettyPrint("APK needs to be analyzed first", "warning")
 63 |                 return False
 64 | 
 65 |             self.runnerScript = "%s/files/scripts/%s.py" % (getProjectDir(), getRandomAlphaNumeric()) if scriptPath == "" else scriptPath
 66 |             print self.runnerScript
 67 |             monkeyScript = open(self.runnerScript, "w")
 68 |             # Preparation
 69 |             monkeyScript.write("#!/usr/bin/python\n\n")
 70 |             monkeyScript.write("from com.android.monkeyrunner import MonkeyRunner, MonkeyDevice\n")
 71 |             monkeyScript.write("import time, os, random\n\n")
 72 |             monkeyScript.write("keyEvents = %s\n" % keyEvents)
 73 |             monkeyScript.write("keyEventTypes = [MonkeyDevice.UP, MonkeyDevice.DOWN, MonkeyDevice.DOWN_AND_UP]\n")
 74 |             monkeyScript.write("activityActions = %s\n" % activityActions)
 75 |             monkeyScript.write("activities = %s\n" % self.activitiesInfo)
 76 |             monkeyScript.write("services = %s\n" % self.servicesInfo)
 77 |             monkeyScript.write("receivers = %s\n\n" % self.receiversInfo)
 78 |             # Connect to the current device and install package
 79 |             monkeyScript.write("print \"[*] Connecting to device.\"\n")
 80 |             monkeyScript.write("device = MonkeyRunner.waitForConnection(\"[ANDROID_VIRTUAL_DEVICE_ID]\")\n")
 81 |             monkeyScript.write("package = '%s'\n" % self.APK.package)
 82 |             monkeyScript.write("print \"[*] Uninstalling package %s (if exists)\"\n" % self.APK.package)
 83 |             monkeyScript.write("device.removePackage(package)\n")
 84 |             monkeyScript.write("print \"[*] Installing package %s\"\n" % self.APK.package)
 85 |             monkeyScript.write("device.installPackage('%s')\n" % self.APKPath)
 86 |             # Configure introspy for hooking and monitoring
 87 |             monkeyScript.write("print \"[*] Configuring Introspy\"\n")
 88 |             monkeyScript.write("device.shell(\"echo 'GENERAL CRYPTO,KEY,HASH,FS,IPC,PREF,URI,WEBVIEW,SSL' > /data/data/%s/introspy.config\" % package)\n")
 89 |             monkeyScript.write("device.shell(\"chmod 664 /data/data/%s/introspy.config\" % package)\n")
 90 |             # Get a handle to a file to store the commands issued during runtime
 91 |             monkeyScript.write("commandsFile = open(\"%s/files/scripts/%s_%s.command\", \"w\")\n" % (getProjectDir(), self.APK.package.replace('.','_'), getRandomAlphaNumeric()))
 92 |             # Start app
 93 |             #monkeyScript.write("mainActivity = '%s'\n" % APK.APK.get_main_activity()) 
 94 |             #monkeyScript.write("device.startActivity(component=package + '/' + mainActivity)\n")
 95 |             # Starting the fuzzing phase for [runningTime] seconds<F12>
 96 |             monkeyScript.write("endTime = time.time() + %s\n" % runningTime)
 97 |             monkeyScript.write("print \"[*] Fuzzing app for %s seconds\"\n" % runningTime)
 98 |             monkeyScript.write("while time.time() < endTime:\n")
 99 |             # 1. Choose a random component
100 |             monkeyScript.write("\tcomponentType = [\"activity\", \"service\", \"receiver\"][random.randint(0,2)]\n")
101 |             # 2.a. Activities
102 |             monkeyScript.write("\tif componentType == \"activity\":\n")
103 |             monkeyScript.write("\t\tcurrentActivity = activities.keys()[random.randint(0,len(activities)-1)]\n")
104 |             monkeyScript.write("\t\tprint \"[*] Starting activity: %s\" % currentActivity\n")
105 |             monkeyScript.write("\t\tdevice.startActivity(component=package + '/' + currentActivity)\n")
106 |             monkeyScript.write("\t\tcommandsFile.write(\"device.startActivity('%s/%s')\\n\" % (package, currentActivity))\n")
107 |             # Choose an action 
108 |             monkeyScript.write("\t\tcurrentAction = activityActions[random.randint(0,len(activityActions)-1)]\n")
109 |             monkeyScript.write("\t\tprint \"[*] Current action: %s\" % currentAction\n")
110 |             # Touch in a random X,Y position on the screen
111 |             monkeyScript.write("\t\tif currentAction == \"touch\":\n")
112 |             monkeyScript.write("\t\t\twidth, height = int(device.getProperty(\"display.width\")), int(device.getProperty(\"display.height\"))\n")
113 |             monkeyScript.write("\t\t\tX, Y = random.randint(0, width-1), random.randint(0, height-1)\n")
114 |             monkeyScript.write("\t\t\tprint \"[*] Touching screen at (%s,%s)\" % (X,Y)\n")
115 |             monkeyScript.write("\t\t\teventType = keyEventTypes[random.randint(0,2)]\n")
116 |             monkeyScript.write("\t\t\tdevice.touch(X, Y, eventType)\n")
117 |             monkeyScript.write("\t\t\tcommandsFile.write(\"device.touch(%s, %s, %s)\\n\" % (X, Y, eventType))\n")
118 |             # Type something random
119 |             monkeyScript.write("\t\telif currentAction == \"type\":\n")
120 |             monkeyScript.write("\t\t\ttext = \"%s\"\n" % getRandomString(random.randint(0,100)))
121 |             monkeyScript.write("\t\t\tprint \"[*] Typing %s\" % text\n")
122 |             monkeyScript.write("\t\t\tdevice.type(text)\n")
123 |             monkeyScript.write("\t\t\tcommandsFile.write(\"device.type('%s')\\n\" % text)\n")
124 |             # Press a random key up/down
125 |             monkeyScript.write("\t\telif currentAction == \"press\":\n")
126 |             monkeyScript.write("\t\t\taction = keyEvents[random.randint(0, len(keyEvents)-1)]\n")
127 |             monkeyScript.write("\t\t\taType =  keyEventTypes[random.randint(0,2)]\n")
128 |             monkeyScript.write("\t\t\tprint \"[*] Pressing: %s as %s\" % (action, aType)\n")
129 |             monkeyScript.write("\t\t\tdevice.press(action, aType)\n")
130 |             monkeyScript.write("\t\t\tcommandsFile.write(\"device.press(%s, %s)\\n\" % (action, aType)) \n")
131 |             # Randomly drag the screen
132 |             monkeyScript.write("\t\telif currentAction == \"drag\":\n")
133 |             monkeyScript.write("\t\t\twidth, height = int(device.getProperty(\"display.width\")), int(device.getProperty(\"display.height\"))\n")
134 |             monkeyScript.write("\t\t\tstart = (random.randint(0, width-1), random.randint(0, height-1))\n")
135 |             monkeyScript.write("\t\t\tend = (random.randint(0, width-1), random.randint(0, height-1))\n")
136 |             monkeyScript.write("\t\t\tprint \"[*] Dragging screen from %s to %s\" % (start, end)\n")
137 |             monkeyScript.write("\t\t\tdevice.drag(start, end)\n")
138 |             monkeyScript.write("\t\t\tcommandsFile.write(\"device.drag(%s, %s)\\n\" % (start, end))\n")
139 |             # 2.b.Services
140 |             monkeyScript.write("\telif componentType == \"service\":\n")
141 |             monkeyScript.write("\t\tcurrentService = services.keys()[random.randint(0, len(services)-1)]\n")
142 |             monkeyScript.write("\t\tprint \"[*] Starting Service: %s\" % currentService\n")
143 |             monkeyScript.write("\t\tif \"intent-filters\" in services[currentService].keys():\n")
144 |             monkeyScript.write("\t\t\tif \"action\" in services[currentService][\"intent-filters\"].keys():\n")
145 |             monkeyScript.write("\t\t\t\tintentAction = services[currentService][\"intent-filters\"][\"action\"][0]\n")
146 |             monkeyScript.write("\t\t\t\tprint \"[*] Broadcasting intent: %s\" % intentAction\n")
147 |             monkeyScript.write("\t\t\t\tdevice.broadcastIntent(currentService, intentAction)\n")
148 |             monkeyScript.write("\t\t\t\tcommandsFile.write(\"device.broadcastIntent('%s', '%s')\\n\" % (currentService, intentAction)) \n")
149 |             # 2.c. Broadcast receivers
150 |             monkeyScript.write("\telif componentType == \"receiver\":\n")
151 |             monkeyScript.write("\t\tcurrentReceiver = receivers.keys()[random.randint(0, len(receivers)-1)]\n")
152 |             monkeyScript.write("\t\tprint \"[*] Starting Receiver: %s\" % currentReceiver\n")
153 |             monkeyScript.write("\t\tif \"intent-filters\" in receivers[currentReceiver].keys():\n")
154 |             monkeyScript.write("\t\t\tif \"action\" in receivers[currentReceiver][\"intent-filters\"].keys():\n")
155 |             monkeyScript.write("\t\t\t\tintentAction = receivers[currentReceiver][\"intent-filters\"][\"action\"][0]\n")
156 |             monkeyScript.write("\t\t\t\tprint \"[*] Broadcasting intent: %s\" % intentAction\n")
157 |             monkeyScript.write("\t\t\t\tdevice.broadcastIntent(currentReceiver, intentAction)\n")
158 |             monkeyScript.write("\t\t\t\tcommandsFile.write(\"device.broadcastIntent('%s', '%s')\\n\" % (currentReceiver, intentAction))\n")
159 |             # Sleep for 0.5 a second
160 |             monkeyScript.write("\ttime.sleep(1)\n")
161 |             # Uninstall package (Still need to fetch the introspy.db file from app directory before uninstallation)
162 |             #monkeyScript.write("device.removePackage(package)\n")
163 |             monkeyScript.write("commandsFile.close()")
164 |         
165 |         except Exception as e:
166 |             prettyPrintError(e)
167 |             return False
168 | 
169 |         return True    
170 | def analyzeActivities(APK, DEX):
171 |     """ Analyzes the passed APK and DEX objects to retrieve the elements within every activity """
172 |     try:
173 |         info = {}
174 |         for activity in APK.get_activities():
175 |             info[activity] = {}
176 |             # 1. Add the intent filters
177 |             info[activity]["intent-filters"] = APK.get_intent_filters("activity", activity)
178 |             # 2. Get all classes belonging to current activity
179 |             allClasses, tempList, layoutFiles = DEX.get_classes(), [], []
180 |             # 2.a. Get all classes that inherit class "Activity" i.e. corresponding to an activity 
181 |             for c in allClasses:
182 |                 if c.get_superclassname().lower().find("activity") != -1:
183 |                     tempList.append(c)
184 |             # 2.b. Get classes belonging to CURRENT activity
185 |             info[activity]["classes"] = []
186 |             for c in tempList:
187 |                 if c.get_name()[1:-1].replace('/','.') == activity:
188 |                     info[activity]["classes"].append(c)
189 |                     if loggingON():
190 |                         prettyPrint("Activity: %s, class: %s" % (activity, c), "debug")
191 |             
192 |             # 3. Get UI elements in every activity
193 |             # 3.a. Identify the layout file's ID in the class' setContentView function call
194 |             if len(info[activity]["classes"]) < 1:
195 |                 prettyPrint("Could not retrieve any Activity classes. Skipping", "warning")
196 |                 continue
197 |             source = info[activity]["classes"][0].get_source()
198 |             info[activity].pop("classes") # TODO: Do we really need a reference to the class object?
199 |             index1 = source.find("void onCreate(")
200 |             index2 = source.find("setContentView(", index1) + len("setContentView(")
201 |             layoutID = ""
202 |             while str.isdigit(source[index2]):
203 |                 layoutID += source[index2]
204 |                 index2 += 1
205 |             # layoutID retrieved?
206 |             if len(layoutID) < 1:
207 |                 prettyPrint("Could not retrieve layout ID from activity class. Skipping", "warning")
208 |                 continue
209 |             # 3.b. Look for the corresponding layout name in the R$layout file
210 |             layoutClass = DEX.get_class(str("L%s/R$layout;" % APK.package.replace('.','/')))
211 |             if layoutClass:
212 |                 layoutContent = layoutClass.get_source() 
213 |                 eIndex = layoutContent.find(layoutID)
214 |                 sIndex = layoutContent.rfind("int", 0, eIndex)
215 |                 layoutName = layoutContent[sIndex+len("int"):eIndex].replace(' ','').replace('=','')
216 |             else:
217 |                 # No layout class was found: Check the public.xml file
218 |                 prettyPrint("Could not find a \"R$layout\" class. Checking \"public.xml\"", "warning")
219 |                 apkResources = APK.get_android_resources()
220 |                 publicResources = apkResources.get_public_resources(APK.package).split('\n')
221 |                 layoutIDHex = hex(int(layoutID))
222 |                 for line in publicResources:
223 |                     if line.find(layoutIDHex) != -1:
224 |                         sIndex = line.find("name=\"") + len("name=\"")
225 |                         eIndex = line.find("\"", sIndex)
226 |                         layoutName = line[sIndex:eIndex]
227 |             # 3.c. Retrieve layout file and get XML object
228 |             if len(layoutName) < 1:
229 |                 prettyPrint("Could not retrieve a layout file for \"%s\". Skipping" % activity, "warning")
230 |             else:
231 |                 if loggingON():
232 |                     prettyPrint("Retrieving UI elements from %s.xml" % layoutName, "debug")
233 |                 info[activity]["elements"] = _parseActivityLayout("res/layout/%s.xml" % layoutName, APK)
234 |                 
235 |     except Exception as e:
236 |         prettyPrintError(e)
237 |         return {}
238 | 
239 |     return info
240 | 
241 | def analyzeServices(APK, DEX):
242 |     """ Analyzes the passed APK and DEX objects to retrieve information about an app's services """
243 |     try:
244 |         info = {}
245 |         for service in APK.get_services():
246 |             info[service] = {}
247 |             info[service]["intent-filters"] = APK.get_intent_filters("service", service)
248 | 
249 |     except Exception as e:
250 |         prettyPrintError(e)
251 |         return {}
252 | 
253 |     return info
254 | 
255 | def analyzeReceivers(APK, DEX):
256 |     """ Analyzes the passed APK and DEX objects to retrieve information about an app's broadcast receivers """
257 |     try:
258 |         info = {}
259 |         for receiver in APK.get_receivers():
260 |             info[receiver] = {}
261 |             info[receiver]["intent-filters"] = APK.get_intent_filters("receiver", receiver)
262 | 
263 |     except Exception as e:
264 |         prettyPrintError(e)
265 |         return {}
266 | 
267 |     return info
268 | 
269 | def _parseActivityLayout(layoutFilePath, APK):
270 |     """ Parses an XML layout file of an activity and returns information about the found elements """
271 |     try:
272 |         elements = {}
273 |         # Read the contents of the layout file
274 |         activityXML = AXMLPrinter(APK.get_file(layoutFilePath)).get_xml_obj()
275 |         logEvent("Parsing the XML layout %s" % layoutFilePath)
276 |         # Iterate over the elements and parse them
277 |         for currentNode in activityXML.firstChild.childNodes:
278 |             if currentNode.nodeName == "Button" or currentNode.nodeName == "ImageButton" or currentNode.nodeName == "RadioButton":
279 |                 # Handling buttons
280 |                 attr = {}
281 |                 eID = currentNode.attributes["android:id"].value
282 |                 attr["type"] = currentNode.nodeName
283 |                 if "android:onClick" in currentNode.attributes.keys():
284 |                     attr["onclick"] = currentNode.attributes["android:onClick"].value
285 |                 if "android:visibility" in currentNode.attributes.keys():
286 |                     attr["visibility"] = currentNode.attributes["android:visibility"].value
287 |                 if "android:clickable" in currentNode.attributes.keys():
288 |                     attr["clickable"] = currentNode.attributes["android:clickable"].value
289 |                 if "android:longClickable" in currentNode.attributes.keys():
290 |                     attr["longclickable"] = currentNode.attributes["android:longClickable"].value
291 |                 elements[eID] = attr
292 |             elif currentNode.nodeName == "CheckBox" or currentNode.nodeName == "CheckedTextView":
293 |                 # Handling checkbox-like elements
294 |                 attr = {}
295 |                 eID = currentNode.attributes["android:id"].value
296 |                 attr["type"] = currentNode.nodeName
297 |                 if "android:onClick" in currentNode.attributes.keys():
298 |                     attr["onclick"] = currentNode.attributes["android:onClick"].value
299 |                 if "android:visibility" in currentNode.attributes.keys():
300 |                     attr["visibility"] = currentNode.attributes["android:visibility"].value
301 |                 if "android:checked" in currentNode.attributes.keys():
302 |                     attr["checked"] = currentNode.attributes["android:checked"].value
303 |                 elements[eID] = attr
304 |             elif currentNode.nodeName == "DatePicker":
305 |                 # Handling date pickers
306 |                 attr = {}
307 |                 eID = currentNode.attributes["android:id"].value
308 |                 attr["type"] = currentNode.nodeName
309 |                 if "android:minDate" in currentNode.attributes.keys():
310 |                     attr["mindate"] = currentNode.attributes["android:minDate"]
311 |                 if "android:maxDate" in currentNode.attributes.keys():
312 |                     attr["maxDate"] = currentNode.attributes["android:maxDate"]
313 |                 elements[eID] = attr
314 |             elif currentNode.nodeName == "EditText":
315 |                 # Handling edit texts
316 |                 attr = {}
317 |                 eID = currentNode.attributes["android:id"].value
318 |                 attr["type"] = currentNode.nodeName
319 |                 if "android:editable" in currentNode.attributes.keys():
320 |                     attr["editable"] = currentNode.attributes["android:editable"]
321 |                 if "android:inputType" in currentNode.attributes.keys():
322 |                     attr["inputtype"] = currentNode.attributes["android:inputType"]
323 |                 elements[eID] = attr
324 |             #elif currentNode.nodeName == "NumberPicker":
325 |             elif currentNode.nodeName == "RadioGroup":
326 |                 # Handle radio group
327 |                 # 1. Get radio buttons
328 |                 buttons = currentNode.childNodes
329 |                 for button in buttons:
330 |                     attr = {}
331 |                     eID = currentNode.attributes["android:id"].value
332 |                     attr["type"] = currentNode.nodeName
333 |                     if "android:onClick" in currentNode.attributes.keys():
334 |                         attr["onclick"] = currentNode.attributes["android:onClick"].value
335 |                     if "android:visibility" in currentNode.attributes.keys():
336 |                         attr["visibility"] = currentNode.attributes["android:visibility"].value
337 |                     if "android:clickable" in currentNode.attributes.keys():
338 |                         attr["clickable"] = currentNode.attributes["android:clickable"].value
339 |                     if "android:longClickable" in currentNode.attributes.keys():
340 |                         attr["longclickable"] = currentNode.attributes["android:longClickable"].value
341 |                     elements[eID] = attr
342 | 
343 |             #elif currentNode.nodeName == "Spinner":
344 | 
345 |     except Exception as e:
346 |         prettyPrintError(e)
347 |         return {}
348 | 
349 |     return elements
350 | 
351 | 


--------------------------------------------------------------------------------
/tools/runExperimentII.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | from Aion.data_generation.reconstruction import *
  4 | from Aion.data_generation.stimulation import *
  5 | from Aion.data_inference.learning import ScikitLearners
  6 | from Aion.data_inference.extraction.featureExtraction import *
  7 | from Aion.utils.data import *     # Needed for accessing configuration files
  8 | from Aion.utils.graphics import * # Needed for pretty printing
  9 | from Aion.utils.misc import *
 10 | from Aion.utils.db import *
 11 | from Aion.shared.DroidutanTest import * # The Droidutan-driven test thread
 12 | from Aion.shared.DroidbotTest import * # The Droidbot-driven test thread
 13 | 
 14 | from sklearn.metrics import *
 15 | import hashlib, pickle
 16 | from droidutan import Droidutan
 17 | 
 18 | import os, sys, glob, shutil, argparse, subprocess, sqlite3
 19 | import time, threading, pickledb, random, exceptions
 20 | 
 21 | def defineArguments():
 22 |     parser = argparse.ArgumentParser(prog="runExperimentI.py", description="The second type of experiments of the Aion active learning framework.")
 23 |     parser.add_argument("-x", "--malwaredir", help="The directory containing the malicious APK's to analyze and use as training/validation dataset", required=True)
 24 |     parser.add_argument("-g", "--goodwaredir", help="The directory containing the benign APK's to analyze and use as training/validation dataset", required=True)
 25 |     parser.add_argument("-d", "--datasetname", help="A unique name to give to the dataset used in the experiment (for DB storage purposes)", required=True)
 26 |     parser.add_argument("-r", "--runnumber", help="The number of the current run of the experiment (for DB storage purposes)", required=True)
 27 |     parser.add_argument("-f", "--analyzeapks", help="Whether to perform analysis on the retrieved APK's", required=False, default="no", choices=["yes", "no"])
 28 |     parser.add_argument("-t", "--analysistime", help="How long to run monkeyrunner (in seconds)", required=False, default=60)
 29 |     parser.add_argument("-u", "--analysisengine", help="The stimulation/analysis engine to use", required=False, choices=["droidbot", "droidutan"], default="droidutan")
 30 |     parser.add_argument("-v", "--vmnames", help="The name(s) of the Genymotion machine(s) to use for analysis (comma-separated)", required=False, default="")
 31 |     parser.add_argument("-z", "--vmsnapshots", help="The name(s) of the snapshot(s) to restore before analyzing an APK (comma-separated)", required=False, default="")
 32 |     parser.add_argument("-a", "--algorithm", help="The algorithm used to classify apps", required=False, default="Ensemble", choices=["KNN10", "KNN25", "KNN50", "KNN100", "KNN250", "KNN500", "SVM", "Trees25", "Trees50", "Trees75", "Trees100", "Ensemble"])
 33 |     parser.add_argument("-s", "--selectkbest", help="Whether to select K best features from the ones extracted from the APK's", required=False, default=0)
 34 |     parser.add_argument("-e", "--featuretype", help="The type of features to consider during training", required=False, default="hybrid", choices=["static", "dynamic", "hybrid"])
 35 |     parser.add_argument("-m", "--accuracymargin", help="The margin (in percentage) within which the training accuracy is allowed to dip", required=False, default=1)
 36 |     parser.add_argument("-i", "--maxiterations", help="The maximum number of iterations to allow", required=False, default=25)
 37 |     return parser
 38 | 
 39 | def main():
 40 |     try:
 41 |         argumentParser = defineArguments()
 42 |         arguments = argumentParser.parse_args()
 43 |         prettyPrint("Welcome to the \"Aion\"'s dynamic experiment I")
 44 | 
 45 |         if arguments.vmnames == "":
 46 |             prettyPrint("No virtual machine names were supplied. Exiting", "warning")
 47 |             return False
 48 | 
 49 |         iteration = 1 # Initial values
 50 |         reanalysis = False
 51 |         currentMetrics = {"accuracy": 0.0, "recall": 0.0, "specificity": 0.0, "precision": 0.0, "f1score": 0.0}
 52 |         previousMetrics = {"accuracy": -1.0, "recall": -1.0, "specificity": -1.0, "precision": -1.0, "f1score": -1.0}
 53 |         reanalyzeMalware, reanalyzeGoodware = [], [] # Use this as a cache until conversion
 54 |         allVMs = arguments.vmnames.split(',')
 55 |         allSnapshots = arguments.vmsnapshots.split(',')
 56 |         availableVMs = [] + allVMs # Initially
 57 | 
 58 |         # Initialize and populate database
 59 |         hashesDB = pickledb.load(getHashesDBPath(), True)
 60 |         aionDB = AionDB(int(arguments.runnumber), arguments.datasetname)
 61 | 
 62 |         # Load APK's and split into training and test datasets
 63 |         prettyPrint("Loading APK's from \"%s\" and \"%s\"" % (arguments.malwaredir, arguments.goodwaredir))
 64 |         # Retrieve malware APK's
 65 |         malAPKs = glob.glob("%s/*.apk" % arguments.malwaredir)
 66 |         if len(malAPKs) < 1:
 67 |             prettyPrint("Could not find any malicious APK's" , "warning")
 68 |         else:
 69 |             prettyPrint("Successfully retrieved %s malicious instances" % len(malAPKs))
 70 |         # Retrieve goodware APK's
 71 |         goodAPKs = glob.glob("%s/*.apk" % arguments.goodwaredir)
 72 |         if len(goodAPKs) < 1:
 73 |             prettyPrint("Could not find any benign APK's", "warning")
 74 |         else:
 75 |             prettyPrint("Successfully retrieved %s benign instances" % len(goodAPKs))
 76 | 
 77 |         # Split the data into training and test datasets
 78 |         malTraining, malTest = [], []
 79 |         goodTraining, goodTest = [], []
 80 |         malTestSize, goodTestSize = len(malAPKs) / 3, len(goodAPKs) / 3
 81 |         # Start with the malicious APKs
 82 |         while len(malTest) < malTestSize:
 83 |             malTest.append(malAPKs.pop(random.randint(0, len(malAPKs)-1)))
 84 |         malTraining += malAPKs
 85 |         prettyPrint("[MALWARE] Training dataset size is %s, test dataset size is %s" % (len(malTraining), len(malTest)))
 86 |         # Same with benign APKs
 87 |         while len(goodTest) < goodTestSize:
 88 |             goodTest.append(goodAPKs.pop(random.randint(0, len(goodAPKs)-1)))
 89 |         goodTraining += goodAPKs
 90 |         prettyPrint("[GOODWARE] Training dataset size is %s, test dataset size is %s" % (len(goodTraining), len(goodTest)))
 91 | 
 92 |         while (round(currentMetrics["f1score"] - previousMetrics["f1score"], 2) >= -(float(arguments.accuracymargin)/100.0)) and (iteration <= int(arguments.maxiterations)):
 93 |             # Set/update the reanalysis flag
 94 |             reanalysis = True if iteration > 1 else False
 95 |             prettyPrint("Experiment I: iteration #%s" % iteration, "info2")
 96 |             # Update the iteration number
 97 |             aionDB.update("run", [("runIterations", str(iteration))], [("runID", arguments.runnumber), ("runDataset", arguments.datasetname)]) # UPDATE run SET runIterations=X WHERE runID=[runnumber]
 98 |             if arguments.analyzeapks == "yes":
 99 |                 allAPKs = malTraining + goodTraining + malTest + goodTest if not reanalysis else reanalyzeMalware + reanalyzeGoodware + malTest + goodTest
100 |                 ########################
101 |                 ## Main Analysis Loop ##
102 |                 ########################
103 |                 currentProcesses = []
104 |                 while len(allAPKs) > 0:
105 |                     prettyPrint("Starting analysis phase")
106 |                     # Step 1. Pop an APK from "allAPKs" (Defaut: last element)
107 |                     currentAPK = allAPKs.pop()
108 |                     # Step 2. Check availability of VMs for test
109 |                     while len(availableVMs) < 1:
110 |                         prettyPrint("No AVD's available for analysis. Sleeping for 10 seconds")
111 |                         print [p.name for p in currentProcesses]
112 |                         print [p.is_alive() for p in currentProcesses]
113 |                         # 2.a. Sleep for "analysisTime"
114 |                         time.sleep(10)
115 |                         # 2.b. Check for available machines
116 |                         for p in currentProcesses:
117 |                             if not p.is_alive():
118 |                                 if verboseON():
119 |                                      prettyPrint("Process \"%s\" is dead. A new AVD is available for analysis" % p.name, "debug")
120 |                                 availableVMs.append(p.name)
121 |                                 currentProcesses.remove(p)
122 |                                 # Also restore clean state of machine 
123 |                                 if len(allAPKs) % 100 == 0: # How often to restore snapshot?
124 |                                     vm = p.name
125 |                                     snapshot = allSnapshots[allVMs.index(vm)]
126 |                                     prettyPrint("Restoring snapshot \"%s\" for AVD \"%s\"" % (snapshot, vm))
127 |                                     restoreVirtualBoxSnapshot(vm, snapshot)
128 | 
129 |                             elif checkAVDState(p.name, "stopping")[0] or checkAVDState(p.name, "powered off")[0] or checkAVDState(p.name, "restoring snapshot")[0]:
130 |                                 prettyPrint("AVD \"%s\" is stuck. Forcing a restoration" % p.name, "warning")
131 |                                 vm = p.name
132 |                                 snapshot = allSnapshots[allVMs.index(vm)]
133 |                                 restoreVirtualBoxSnapshot(vm, snapshot)
134 |                                           
135 |                         print [p.name for p in currentProcesses]
136 |                         print [p.is_alive() for p in currentProcesses]
137 | 
138 |                     # Step 3. Pop one VM from "availableVMs"
139 |                     currentVM = availableVMs.pop()
140 | 
141 |                     if verboseON():
142 |                         prettyPrint("Running \"%s\" on AVD \"%s\"" % (currentAPK, currentVM))
143 | 
144 |                     # Step 4. Start the analysis thread
145 |                     pID = int(time.time())
146 |                     if arguments.analysisengine == "droidutan":
147 |                         if currentAPK in malTest+goodTest:
148 |                             p = DroidutanAnalysis(pID, currentVM, currentVM, currentAPK, int(arguments.analysistime), currentAPK.replace(".apk", "_test_itn%s_filtered.log" % iteration))
149 |                         else:
150 |                             p = DroidutanAnalysis(pID, currentVM, currentVM, currentAPK, int(arguments.analysistime))
151 |                     elif arguments.analysisengine == "droidbot":
152 |                         p = DroidbotAnalysis(pID, currentVM, currentVM, currentAPK, allSnapshots[allVMs.index(currentVM)], int(arguments.analysistime))
153 |                     p.daemon = True # Process will be killed if main thread exits
154 |                     p.start()
155 |                     currentProcesses.append(p)
156 |                       
157 |                     prettyPrint("%s APKs left to analyze" % len(allAPKs), "output")
158 |     
159 |                 # Just make sure all VMs are done
160 |                 while len(availableVMs) < len(allVMs):
161 |                     prettyPrint("Waiting for AVD's to complete analysis")
162 |                     # 2.a. Sleep for "analysisTime"
163 |                     time.sleep(int(arguments.analysistime))
164 |                     # 2.b. Check for available machines
165 |                     for p in currentProcesses:
166 |                         if not p.is_alive():
167 |                             availableVMs.append(p.name)
168 |                             currentProcesses.remove(p)
169 |                             try:
170 |                                 if not p.success:
171 |                                     prettyPrint("Testing app \"%s\" failed. Re-analyzing later" % p.processTarget, "warning")
172 | 				    allAPKs.append(p.processTarget)
173 |                             except exceptions.AttributeError as ae:
174 |                                 prettyPrint("Oops!! No attribute called \"success\"", "warning")
175 | 
176 |                 
177 |                 #######################################
178 |                 ## Analyze log files  after analysis ##
179 |                 #######################################
180 |                 # Try to save some time by only analyzing apps that have been recently (re)analyzed
181 |                 allApps = malTraining + goodTraining + malTest + goodTest if not reanalysis else reanalyzeMalware + reanalyzeGoodware + malTest + goodTest
182 |                 for app in allApps:
183 |                     # 0. Retrieve the database file corresponding to the app
184 |                     if app in malTest+goodTest:
185 |                         inFile = app.replace(".apk", "_test_itn%s_filtered.log" % iteration) # if arguments.analysisengine == "droidutan" else TODO
186 |                     else:
187 |                         inFile = app.replace(".apk", "_filtered.log") if arguments.analysisengine == "droidutan" else app.replace(".apk", "_droidbot/logcat_filtered.log")
188 | 
189 |                     # 1. Check its existence
190 |                     if not os.path.exists(inFile):
191 |                         prettyPrint("Unable to find filtered log file: \"%s\". Skipping" % inFile, "warning")
192 |                         continue
193 | 
194 |                     # 2. Extract and save numerical features
195 |                     prettyPrint("Extracting %s features from APK \"%s\"" % (arguments.featuretype, inFile))
196 |                     staticFeatures, dynamicFeatures = [], []
197 |                     # Save time in case of dynamic features
198 |                     if arguments.featuretype == "static" or arguments.featuretype == "hybrid":
199 |                         sfBasic, sfPermissions, sfAPI, staticFeatures = extractStaticFeatures(app)
200 |                         prettyPrint("Successfully extracted %s static features" % len(staticFeatures))
201 |                     if arguments.featuretype == "dynamic" or arguments.featuretype == "hybrid":
202 |                         trace, dynamicFeatures = extractDroidmonFeatures(inFile)
203 |                         prettyPrint("Successfully extracted %s dynamic features" % len(dynamicFeatures))
204 | 
205 |                     # 3. Store the features
206 |                     if arguments.featuretype == "static" and len(staticFeatures) > 0:
207 |                         features = staticFeatures
208 |                     elif arguments.featuretype == "dynamic" and len(dynamicFeatures) > 0:
209 |                         features = dynamicFeatures
210 |                     elif arguments.featuretype == "hybrid" and len(staticFeatures) > 0 and len(dynamicFeatures) > 0:
211 |                         features = staticFeatures + dynamicFeatures
212 |                            
213 |                     # 4. Write features to file
214 |                     if app in malTest+goodTest:
215 |                         featuresFile = open(app.replace(".apk", "_test_itn%s.%s" % (iteration, arguments.featuretype)), "w")
216 |                     else:
217 |                         featuresFile = open(app.replace(".apk", ".%s" % arguments.featuretype), "w")
218 |                     featuresFile.write("%s\n" % str(features))
219 |                     featuresFile.close()
220 |                     prettyPrint("Done analyzing \"%s\"" % inFile)
221 | 
222 |             ####################################################################
223 |             # Load the JSON  and feature files as traces before classification #
224 |             ####################################################################
225 |             # Load numerical features
226 |             allFeatureFiles = glob.glob("%s/*.%s" % (arguments.malwaredir, arguments.featuretype)) + glob.glob("%s/*.%s" % (arguments.goodwaredir, arguments.featuretype))
227 |             if len(allFeatureFiles) < 1:
228 |                 prettyPrint("Could not retrieve any feature files. Exiting", "error")
229 |                 return False
230 | 
231 |             prettyPrint("Retrieved %s feature files" % len(allFeatureFiles))
232 |             # Split the loaded feature files as training and test 
233 |             Xtr, ytr = [], []
234 |             for ff in allFeatureFiles:
235 |                 fileName = ff.replace(".%s" % arguments.featuretype, ".apk")
236 |                 x = Numerical.loadNumericalFeatures(ff)
237 |                 if len(x) < 1:
238 |                     prettyPrint("Empty feature vector returned. Skipping", "warning")
239 |                     continue
240 |                 if fileName in malTraining:
241 |                     Xtr.append(x)
242 |                     ytr.append(1) 
243 |                 elif fileName in goodTraining:
244 |                     Xtr.append(x)
245 |                     ytr.append(0)
246 | 
247 | 
248 |             metricsDict = {}
249 |             ############
250 |             # Training #
251 |             ############
252 |             # Classifying using [algorithm]
253 |             prettyPrint("Classifying using %s" % arguments.algorithm)
254 |             clfFile = "%s/db/%s_run%s_itn%s_%s.txt" % (getProjectDir(), arguments.algorithm, arguments.runnumber, iteration, arguments.featuretype)
255 |             # Train and predict
256 |             if arguments.algorithm.lower().find("trees") != -1:
257 |                 e = int(arguments.algorithm.replace("Trees", ""))
258 |                 clf, predicted, predicted_test = ScikitLearners.predictAndTestRandomForest(Xtr, ytr, estimators=e, selectKBest=int(arguments.selectkbest))
259 |             elif arguments.algorithm.lower().find("knn") != -1:
260 |                 k = int(arguments.algorithm.replace("KNN", ""))
261 |                 clf, predicted, predicted_test = ScikitLearners.predictAndTestKNN(Xtr, ytr, K=k, selectKBest=int(arguments.selectkbest))
262 |             elif arguments.algorithm.lower().find("svm") != -1:
263 |                 clf, predicted, predicted_test = ScikitLearners.predictAndTestSVM(Xtr, ytr, selectKBest=int(arguments.selectkbest))
264 |             else:
265 |                 K = [10, 25, 50, 100, 250, 500]
266 |                 E = [10, 25, 50, 75, 100]
267 |                 allCs = ["KNN-%s" % k for k in K] + ["FOREST-%s" % e for e in E] + ["SVM"]
268 |                 clf, predicted, predicted_test = ScikitLearners.predictAndTestEnsemble(Xtr, ytr, classifiers=allCs, selectKBest=int(arguments.selectkbest))
269 |             # Write to file
270 |             open(clfFile, "w").write(pickle.dumps(clf))
271 |             metrics = ScikitLearners.calculateMetrics(ytr, predicted)
272 |             metricsDict = metrics
273 | 
274 |             # Print and save results
275 |             prettyPrint("Metrics using %s at iteration %s" % (arguments.algorithm, iteration), "output")
276 |             prettyPrint("Accuracy: %s" % str(metricsDict["accuracy"]), "output")
277 |             prettyPrint("Recall: %s" % str(metricsDict["recall"]), "output")
278 |             prettyPrint("Specificity: %s" % str(metricsDict["specificity"]), "output")
279 |             prettyPrint("Precision: %s" % str(metricsDict["precision"]), "output")
280 |             prettyPrint("F1 Score: %s" %  str(metricsDict["f1score"]), "output")
281 |             # Insert datapoint into the database
282 |             tstamp = getTimestamp(includeDate=True)
283 |             learnerID = "%s_run%s_itn%s" % (arguments.algorithm, arguments.runnumber, iteration)
284 |             aionDB.insert(table="learner", columns=["lrnID", "lrnParams"], values=[learnerID, clfFile])
285 |             aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[learnerID, str(iteration), arguments.runnumber, tstamp, arguments.featuretype, "TRAIN", str(metricsDict["accuracy"]), str(metricsDict["recall"]), str(metricsDict["specificity"]), str(metricsDict["precision"]), str(metricsDict["f1score"])])
286 | 
287 |             # Save incorrectly-classified training instances for re-analysis
288 |             reanalyzeMalware, reanalyzeGoodware = [], [] # Reset the lists to store new misclassified instances
289 |             for index in range(len(ytr)):
290 |                 if predicted[index] != ytr[index]:
291 |                     if allFeatureFiles[index].find("test") != -1:
292 |                         prettyPrint("Skipping adding test file \"%s\" to the reanalysis lists" %  allFeatureFiles[index])
293 |                     else:
294 |                         # Add to reanalysis lists
295 |                         if allFeatureFiles[index].find("malware") != -1:
296 |                             reanalyzeMalware.append(allFeatureFiles[index].replace(arguments.featuretype, "apk"))
297 |                         else:
298 |                             reanalyzeGoodware.append(allFeatureFiles[index].replace(arguments.featuretype, "apk"))
299 | 
300 |             prettyPrint("Reanalyzing %s benign and %s malicious training apps" % (len(reanalyzeGoodware), len(reanalyzeMalware)), "debug")
301 | 
302 |             # Swapping metrics
303 |             previousMetrics = currentMetrics
304 |             currentMetrics = metricsDict
305 | 
306 |             # Commit results to the database
307 |             aionDB.save()
308 | 
309 |             # Restore snapshots of all VMs
310 |             vms, snaps = arguments.vmnames.split(','), arguments.vmsnapshots.split(',')
311 |             if len(vms) > len(snaps):
312 |                 r = range(len(snaps))
313 |             else:
314 |                 r = range(len(vms)) # Or of snaps doesn't matter
315 |             # Killall -9 VBoxHeadless
316 |             #doomsdayCmd = ["killall", "-9", "VBoxHeadless"]
317 |             doomsdayCmd = ["killall", "-9", "VBoxSVC"]
318 |             subprocess.Popen(doomsdayCmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
319 |             for i in r:
320 |                   prettyPrint("Restoring snapshot \"%s\" for AVD \"%s\"" % (snaps[i], vms[i]))
321 |                   if restoreVirtualBoxSnapshot(vms[i], snaps[i]):
322 |                       prettyPrint("Successfully restored AVD")
323 |                   else:
324 |                       prettyPrint("An error occurred while restoring the AVD")
325 | 
326 |             # Update the iteration number
327 |             iteration += 1
328 |             
329 |         # Final Results
330 |         prettyPrint("Training results after %s iterations" % str(iteration-1), "output")
331 |         prettyPrint("Accuracy: %s" % currentMetrics["accuracy"], "output")
332 |         prettyPrint("Recall: %s" % currentMetrics["recall"], "output")
333 |         prettyPrint("Specificity: %s" % currentMetrics["specificity"], "output")
334 |         prettyPrint("Precision: %s" % currentMetrics["precision"], "output")
335 |         prettyPrint("F1 Score: %s" % currentMetrics["f1score"], "output")
336 | 
337 |         # Update the current run's end time
338 |         aionDB.update("run", [("runEnd", getTimestamp(includeDate=True))], [("runID", arguments.runnumber)]) # UPDATE run SET runEnd=X WHERE runID=[runnumber]
339 | 
340 |         #######################################################
341 |         # Commence the test phase using the "best classifier" #
342 |         #######################################################
343 |         # 1. Retrieve the best classifier and its iteration (X)
344 |         results = aionDB.execute("SELECT * FROM datapoint WHERE dpRun='%s' AND dpFeature='%s' ORDER BY dpFScore DESC" % (arguments.runnumber, arguments.featuretype))
345 |         if not results:
346 |             prettyPrint("Could not retrieve data about the training phase. Exiting", "error")
347 |             aionDB.close()
348 |             return False
349 | 
350 |         data = results.fetchall()
351 |         if len(data) < 1:
352 |             prettyPrint("Could not retrieve data about the training phase. Exiting", "error")
353 |             aionDB.close()
354 |             return False
355 |         
356 |         # 1.a. Best classifier should be the first entry
357 |         bestClassifier, bestItn, bestF1score, bestSp = data[0][1], data[0][2], data[0][11], data[0][9]
358 |         if verboseON():
359 |             prettyPrint("The best classifier is %s at iteration %s with F1score of %s and Specificity score of %s" % (bestClassifier, bestItn, bestF1score, bestSp), "debug")
360 |         # 1.b. Load classifier from hyper parameters file
361 |         results = aionDB.execute("SELECT * FROM learner WHERE lrnID='%s'" % bestClassifier)
362 |         if not results:
363 |             prettyPrint("Could not find the hyperparameters file for \"%s\". Exiting" % bestClassifier, "error")
364 |             aionDB.close()
365 |             return False
366 | 
367 |         data = results.fetchall()
368 |         if len(data) < 1:
369 |             prettyPrint("Could not find the hyperparameters file for \"%s\". Exiting" % bestClassifier, "error")
370 |             aionDB.close()
371 |             return False
372 | 
373 |         clfFile = data[0][1]
374 |         if not os.path.exists(clfFile):
375 |             prettyPrint("The file \"%s\" does not exist. Exiting" % clfFile, "error")
376 |             aionDB.close()
377 |             return False
378 |  
379 |         prettyPrint("Loading classifier \"%s\" from \"%s\"" % (bestClassifier, clfFile))
380 |         clf = pickle.loads(open(clfFile).read())
381 | 
382 |         # 2. Classify feature vectors
383 |         P, N = 0.0, 0.0
384 |         TP_maj, TN_maj, FP_maj, FN_maj = 0.0, 0.0, 0.0, 0.0 # To keep track of majority vote classification
385 |         TP_one, TN_one, FP_one, FN_one = 0.0, 0.0, 0.0, 0.0 # To keep track of one-instance classification
386 |         for app in malTest + goodTest:
387 |             prettyPrint("Processing test app \"%s\"" % app)
388 |             # 2.a.  Retrieve all feature vectors up to [iteration]
389 |             appVectors = {}
390 |             for i in range(1, bestItn+1):
391 |                  if os.path.exists(app.replace(".apk", "_test_itn%s.%s" % (i, arguments.featuretype))):
392 |                      v = Numerical.loadNumericalFeatures(app.replace(".apk", "_test_itn%s.%s" % (i, arguments.featuretype)))
393 |                      if len(v) > 1:
394 |                          appVectors["itn%s" % i] = v
395 | 
396 |             if len(appVectors) < 1:
397 |                 prettyPrint("Could not retrieve any feature vectors. Skipping", "warning")
398 |                 continue
399 |                
400 |             prettyPrint("Successfully retrieved %s feature vectors of type \"%s\"" % (len(appVectors), arguments.featuretype))
401 |             # 2.b. Classify each feature vector using the loaded classifier
402 |             appLabel = 1 if app in malTest else 0
403 |             if appLabel == 1:
404 |                 P += 1.0
405 |             else:
406 |                 N += 1.0
407 |             labels = ["Benign", "Malicious"]
408 |             appMalicious, appBenign = 0.0, 0.0
409 |             for v in appVectors:
410 |                 predictedLabel = clf.predict(appVectors[v]).tolist()[0]
411 |                 prettyPrint("\"%s\" app was classified as \"%s\" according to iteration %s" % (labels[appLabel], labels[predictedLabel], v.replace("itn", "")), "output")
412 |                 classifiedCorrectly = "YES" if labels[appLabel] == labels[predictedLabel] else "NO"
413 |                 aionDB.insert("testapp", ["taName", "taRun", "taIteration", "taType", "taClassified", "taLog"], [app, arguments.runnumber, v.replace("itn", ""), labels[appLabel], classifiedCorrectly, app.replace(".apk", "_test_%s_filtered.log" % v)])
414 |                 if predictedLabel == 1:
415 |                     appMalicious += 1.0
416 |                 else:
417 |                     appBenign += 1.0
418 | 
419 |             # 2.c. Decide upon the app's label according to majority vote vs. one-instance
420 |             majorityLabel = 1 if (appMalicious/float(len(appVectors))) >= 0.5 else 0
421 |             oneLabel = 1 if appMalicious >= 1.0 else 0
422 |             if appLabel == 1:
423 |                 # Malicious app
424 |                 if majorityLabel == 1:
425 |                     TP_maj += 1.0
426 |                 else:
427 |                     FN_maj += 1.0
428 |                 if oneLabel == 1:
429 |                     TP_one += 1.0
430 |                 else:
431 |                     FN_one += 1.0
432 |             else:
433 |                 # Benign app
434 |                 if majorityLabel == 1:
435 |                     FP_maj += 1.0
436 |                 else:
437 |                     TN_maj += 1.0
438 |                 if oneLabel == 1:
439 |                     FP_one += 1.0
440 |                 else:
441 |                     TN_one += 1.0
442 |             # 2.d. Declare the classification of the app in question
443 |             prettyPrint("\"%s\" app has been declared as \"%s\" by majority vote and as \"%s\" by one-instance votes" % (labels[appLabel], labels[majorityLabel], labels[oneLabel]), "output")
444 | 
445 |         # 3. Calculate metrics
446 |         accuracy_maj, accuracy_one = (TP_maj+TN_maj)/(P+N), (TP_one+TN_one)/(P+N)
447 |         recall_maj, recall_one = TP_maj/P, TP_one/P
448 |         specificity_maj, specificity_one = TN_maj/N, TN_one/N
449 |         precision_maj, precision_one = TP_maj/(TP_maj+FP_maj), TP_one/(TP_one+FP_one)
450 |         f1score_maj, f1score_one = 2 * (precision_maj*recall_maj) / (precision_maj+recall_maj), 2 * (precision_one*recall_one) / (precision_one+recall_one)
451 | 
452 |         # 4. Display and store metrics
453 |         prettyPrint("Test metrics using %s at run %s" % (arguments.algorithm, arguments.runnumber), "output")
454 |         prettyPrint("Accuracy (majority): %s versus accuracy (one-instance): %s" % (str(accuracy_maj), str(accuracy_one)), "output")
455 |         prettyPrint("Recall (majority): %s versus recall (one-instance): %s" % (str(recall_maj), str(recall_one)), "output")
456 |         prettyPrint("Specificity (majority): %s versus specificity (one-instance): %s" % (str(specificity_maj), str(specificity_one)), "output")
457 |         prettyPrint("Precision (majority): %s versus precision (one-instance): %s" % (str(precision_maj), str(precision_one)), "output")
458 |         prettyPrint("F1 Score (majority): %s versus F1 score (one-instance): %s" % (str(f1score_maj), str(f1score_one)), "output")
459 |         
460 |         # 4.b. Store in the database
461 |         aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[bestClassifier, bestItn, arguments.runnumber, tstamp, arguments.featuretype, "TEST:Maj", accuracy_maj, recall_maj, specificity_maj, precision_maj, f1score_maj])
462 |         # Same for one-instance classification scheme
463 |         aionDB.insert(table="datapoint", columns=["dpLearner", "dpIteration", "dpRun", "dpTimestamp", "dpFeature", "dpType", "dpAccuracy", "dpRecall", "dpSpecificity", "dpPrecision", "dpFscore"], values=[bestClassifier, bestItn, arguments.runnumber, tstamp, arguments.featuretype, "TEST:One", accuracy_one, recall_one, specificity_one, precision_one, f1score_one])
464 | 
465 |         # Don't forget to save and close the Aion database
466 |         aionDB.close()
467 | 
468 |         # Send notification email
469 |         subject = "Run %s on %s Successful" % (arguments.runnumber, arguments.datasetname)
470 |         msg = "Achieved results:\nTest F1 score (majority): %s versus F1 score (one-instance): %s\nTest Specificity (majority): %s versus specificity (one-instance): %s" % (str(f1score_maj), str(f1score_one), str(specificity_maj), str(specificity_one))
471 |         sendEmail("alu-precision", getAdminEmail(), subject, msg)
472 | 
473 |     except Exception as e:
474 |         prettyPrintError(e)
475 |         subject = "Run %s on %s failed!!" % (arguments.runnumber, arguments.datasetname)
476 |         msg = "Error: %s" % e
477 |         sendEmail("alu-precision", getAdminEmail(), subject, msg)
478 | 
479 |         return False
480 |     
481 |     prettyPrint("Good day to you ^_^")
482 |     return True
483 | 
484 | if __name__ == "__main__":
485 |     main() 
486 | 


--------------------------------------------------------------------------------