├── .gitignore ├── LICENSE ├── README.md ├── collectors └── __init__.py ├── maldrolyzer.py ├── plugins ├── __init__.py ├── androrat.py ├── droidian.py ├── marcher.py ├── sandrorat.py ├── thoughtcrime.py ├── xbot007.py └── z3core.py ├── processing ├── __init__.py └── hashes.py └── templates.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # maldrolyzer 2 | Simple framework to extract "actionable" data from Android malware (C&Cs, phone numbers etc.) 3 | 4 | ### Installation 5 | You have to install the following packets before you start using this project: 6 | 7 | * Androguard (`git clone https://github.com/androguard/androguard; cd androguard; sudo python setup.py install`) 8 | * PyCrypto (`easy_install pycrypto`) 9 | * pyelftools (`easy_install pyelftools`) 10 | * yara (`easy_install yara`) 11 | 12 | ### Architecture 13 | Idea is really simple and modular. The project has couple of directories, which host a place for you static analysis or output processing: 14 | * `plugins` - this is were the code responsible for the malware identification and data extraction is. Every class has to inherit from `Plugin` class from `templates`. 15 | * Method `recon` idetifies the malware - put there all of the code you need to make sure you can extract the data. 16 | * Method `extract` does the usual extraction. There is no specific format for the extracted data, but it's good to keep it in Python dictionary, so that the ouput processors could read it in a uniform way. 17 | * `processing` - this is were you put classes that inherit from `OutputProcessor` class. They are invoked after the data extraction and get the extracted info. 18 | * `process` method takes the data and produces some kind of a result (i.e. adds a file or C&C to you database, checks if the C&C is live etc.) 19 | 20 | If you want to contribute, write a plugin that decodes some new malware family. It's easy, just look at the existing plugins. 21 | 22 | ### Usage 23 | So, you have an APK sample and you don't know what it is and where is the C&C? Type: 24 | 25 | ``` 26 | python maldrolyzer.py [sample_path] 27 | ``` 28 | 29 | If maldrolyzer knows the malware family it will display some useful information like: 30 | 31 | ``` 32 | {'c2': ['http://esaphapss.net/bn/save_message.php'], 33 | 'malware': 'xbot007', 34 | 'md5': 'ce17e4b04536deac4672b98fbee905e0', 35 | 'sha1': 'a48a2b8a5e1cae168ea42bd271f5b5a0c65f59a9', 36 | 'sha256': 'c3a24d1df11baf2614d7b934afba897ce282f961e2988ac7fa85e270e3b3ea7d', 37 | 'sha512': 'a47f3db765bff9a8d794031632a3cf98bffb3e833f90639b18be7e4642845da2ee106a8947338b9244f50b918a32f1a6a952bb18a1f86f8c176e81c2cb4862b9'} 38 | ``` 39 | And you can track the C&Cs from several malware families using http://amtrckr.info 40 | -------------------------------------------------------------------------------- /collectors/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | modules = glob.glob(os.path.dirname(__file__)+"/*.py") 5 | __all__ = [ os.path.basename(f)[:-3] for f in modules] 6 | 7 | -------------------------------------------------------------------------------- /maldrolyzer.py: -------------------------------------------------------------------------------- 1 | from plugins import * 2 | from processing import * 3 | import argparse 4 | from templates import Plugin, OutputProcessor, get_plugin_prevalues 5 | from pprint import pprint 6 | 7 | def load_plugins(): 8 | result = [cls for cls in Plugin.__subclasses__()] 9 | return result 10 | 11 | def run_plugins(args, plugins, filename): 12 | anything = False 13 | processors = [cls(args) for cls in OutputProcessor.__subclasses__()] 14 | prevalues = get_plugin_prevalues(args, filename) 15 | for plugin in plugins: 16 | plugin = plugin(args, filename, prevalues) 17 | if plugin.recon(): 18 | anything = True 19 | data = plugin.extract() 20 | for processor in processors: 21 | processor.process(filename, plugin.NAME, data) 22 | if not anything: 23 | print 'Sorry, no plugin could handle the file' 24 | 25 | def main(): 26 | argparser = argparse.ArgumentParser() 27 | argparser.add_argument("file", type=str, 28 | help="file to analyze") 29 | args = argparser.parse_args() 30 | plugins = load_plugins() 31 | run_plugins(args, plugins, args.file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /plugins/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | modules = glob.glob(os.path.dirname(__file__)+"/*.py") 5 | __all__ = [ os.path.basename(f)[:-3] for f in modules] 6 | 7 | -------------------------------------------------------------------------------- /plugins/androrat.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | 3 | class Androrat(Plugin): 4 | NAME = 'Androrat' 5 | 6 | def recon(self): 7 | for cls in self.dvm.get_classes(): 8 | if 'Lmy/app/client/ProcessCommand;'.lower() in cls.get_name().lower(): 9 | self.process_class = cls 10 | return True 11 | return False 12 | 13 | def extract(self): 14 | c2Found = False 15 | portFound = False 16 | c2 = "" 17 | port = "" 18 | string = None 19 | for method in self.process_class.get_methods(): 20 | if method.name == 'loadPreferences': 21 | for inst in method.get_instructions(): 22 | if inst.get_name() == 'const-string': 23 | string = inst.get_output().split(',')[-1].strip(" '") 24 | if c2Found == True: 25 | c2 = string 26 | c2Found = False 27 | if string == 'ip': 28 | c2Found = True 29 | if string == 'port': 30 | portFound = True 31 | if inst.get_name() == 'const/16': 32 | if portFound == True: 33 | string = inst.get_output().split(',')[-1].strip(" '") 34 | port = string 35 | if c2 and port: 36 | break 37 | 38 | server = "" 39 | if port: 40 | server = "{0}:{1}".format(c2, str(port)) 41 | else: 42 | server = c2 43 | 44 | return {'c2': [server]} 45 | 46 | -------------------------------------------------------------------------------- /plugins/droidian.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | import base64 3 | 4 | class Droidian(Plugin): 5 | 6 | NAME = 'droidian' 7 | 8 | def recon(self): 9 | for cls in self.dvm.get_classes(): 10 | for field in cls.get_fields(): 11 | if field.name in ['backupURL', 'encodedURL']: 12 | self.droidian_service = cls 13 | return True 14 | return False 15 | 16 | def extract(self): 17 | hosts = set() 18 | string = None 19 | for method in self.droidian_service.get_methods(): 20 | if method.name == '': 21 | for inst in method.get_instructions(): 22 | if inst.get_name() == 'const-string': 23 | string = inst.get_output().split(',')[-1].strip(" '") 24 | try: 25 | string = base64.b64decode(string) 26 | except: 27 | string = None 28 | elif string and inst.get_name() == 'iput-object' and inst.get_output().split('->')[-1].startswith('encodedURL') or inst.get_output().split('->')[-1].startswith('backupURL'): 29 | hosts.add(string) 30 | 31 | return {'c2': list(hosts)} 32 | -------------------------------------------------------------------------------- /plugins/marcher.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | 3 | class Marcher(Plugin): 4 | 5 | NAME = 'Marcher' 6 | 7 | def recon(self): 8 | for s in self.dvm.get_strings(): 9 | if s.startswith('get.php'): 10 | self.gate = s 11 | return True 12 | 13 | def extract(self): 14 | tainted = self.dx.get_tainted_variables().get_string(self.gate) 15 | m_idx = tainted.get_paths()[0][1] 16 | url_cls = self.dvm.get_cm_method(m_idx)[0] 17 | for cls in self.dvm.get_classes(): 18 | if cls.name == url_cls: 19 | url_cls = cls.get_superclassname() 20 | break 21 | for cls in self.dvm.get_classes(): 22 | if cls.name == url_cls: 23 | for method in cls.get_methods(): 24 | if method.name == '': 25 | for inst in method.get_instructions(): 26 | if inst.get_name() == 'const-string': 27 | c2 = inst.get_output().split(',')[-1].strip("' ") 28 | return {'c2': [c2 + self.gate]} 29 | 30 | -------------------------------------------------------------------------------- /plugins/sandrorat.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | 3 | class Sandrorat(Plugin): 4 | NAME = 'Sandrorat' 5 | 6 | def recon(self): 7 | d = self.dvm 8 | for s in d.get_strings(): 9 | if 'sandrorat' in s.lower() or 'droidjack' in s.lower(): 10 | return True 11 | return False 12 | 13 | def extract(self): 14 | d = self.dvm 15 | c2 = [] 16 | port = [] 17 | for cls in d.get_classes(): 18 | if len(cls.get_fields()) == 3 and\ 19 | set(['a', 'b', 'c']) == set(map(lambda x: x.name, cls.get_fields())) and\ 20 | len(cls.get_methods()) == 1 and\ 21 | cls.get_methods()[0].name.endswith(''): 22 | clinit = cls.get_methods()[0] 23 | cc = [] 24 | for inst in clinit.get_instructions(): 25 | if inst.get_name() == 'const-string': 26 | c2.append(inst.get_output().split(',')[-1].strip(" '")) 27 | elif inst.get_name() == 'const/16': 28 | port.append(int(inst.get_output().split(',')[-1].strip(" '"))) 29 | servers = [] 30 | for i, server in enumerate(c2): 31 | if len(port) > i: 32 | servers.append(server + ':' + str(port[i])) 33 | else: 34 | servers.append(server) 35 | return {'c2': servers} 36 | 37 | -------------------------------------------------------------------------------- /plugins/thoughtcrime.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | from zipfile import ZipFile 3 | import base64 4 | from Crypto.Cipher import Blowfish 5 | import xml.etree.ElementTree as ET 6 | 7 | class Thoughtcrime(Plugin): 8 | NAME = "thoughtcrime" 9 | 10 | def recon(self): 11 | return 'res/raw/blfs.key' in self.zipfile.namelist() and \ 12 | 'res/raw/config.cfg' in self.zipfile.namelist() 13 | 14 | def extract(self): 15 | raw_resources = filter(lambda x: x.startswith('res/raw'), self.zipfile.namelist()) 16 | iv = "12345678" # this has to be done better 17 | key = self.zipfile.open('res/raw/blfs.key').read() 18 | key = ''.join(['%x' % ord(x) for x in key])[0:50] 19 | cipher = Blowfish.new(key, Blowfish.MODE_CBC, iv) 20 | decode = base64.b64decode(self.zipfile.open('res/raw/config.cfg').read()) 21 | config = cipher.decrypt(decode) 22 | config = config[:config.find('')+9] 23 | config = ET.fromstring(config) 24 | c2 = config.findall('.//data')[0].get('url_main').split(';') 25 | phone = config.findall('.//data')[0].get('phone_number') 26 | return {'c2': c2, 'phone': phone} 27 | 28 | -------------------------------------------------------------------------------- /plugins/xbot007.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | 3 | class Xbot007(Plugin): 4 | NAME = 'xbot007' 5 | 6 | def recon(self): 7 | for s in self.dvm.get_strings(): 8 | if 'xbot007' in s.lower().translate(None, '#%'): 9 | return True 10 | return False 11 | 12 | def extract(self): 13 | php_end = None 14 | for string in self.dvm.get_strings(): 15 | if string.endswith('.php'): 16 | php_end = string 17 | host = [] 18 | hostname = self.apk.get_android_resources().get_string(self.apk.get_package(), 'domain') 19 | if hostname: 20 | host.append(hostname[1]) 21 | hostname = self.apk.get_android_resources().get_string(self.apk.get_package(), 'domain2') 22 | if hostname: 23 | host.append(hostname[1]) 24 | for cls in self.dvm.get_classes(): 25 | # There has to be a better method to do THIS 26 | if len(cls.get_methods()) == 1 and\ 27 | cls.get_methods()[0].name == '' and\ 28 | len(cls.get_fields()) >= 2 and\ 29 | len(cls.get_fields()) < 10: 30 | for inst in cls.get_methods()[0].get_instructions(): 31 | if inst.get_name() == 'const-string': 32 | host.append(inst.get_output().translate(None, '#%').split(',')[-1].strip("' ")) 33 | host = filter(lambda x : not x.endswith('.apk'), host) 34 | host = filter(lambda x : x, host) 35 | result = {'c2': map(lambda h: ('http://' + h + '/' + php_end), host)} 36 | return result 37 | -------------------------------------------------------------------------------- /plugins/z3core.py: -------------------------------------------------------------------------------- 1 | from templates import Plugin 2 | from zipfile import ZipFile 3 | from elftools.elf.elffile import ELFFile 4 | from cStringIO import StringIO 5 | import gzip, string 6 | import yara 7 | 8 | def get_strings(data): 9 | result = "" 10 | for c in data: 11 | if c in string.printable: 12 | result += c 13 | continue 14 | if len(result) >= 8: 15 | yield result 16 | result = "" 17 | 18 | class Z3Code(Plugin): 19 | 20 | NAME = 'Z3Core' 21 | 22 | WHITELISTED_DLL = ['System_Core_dll', 'NLua_Android_dll', 23 | 'KopiLua_Android_dll', 'Mono_Android_dll', 24 | 'Z_VFS_Android_dll', 'Xamarin_Mobile_dll', 25 | 'mscorlib_dll', 'System_dll', 'Mono_Android_Export_dll', 26 | 'System_Xml_dll' 27 | ] 28 | 29 | def recon(self): 30 | z = ZipFile(self.filename) 31 | bundle = False 32 | if 'lib/armeabi-v7a/libmonodroid.so' in z.namelist() and 'lib/armeabi-v7a/libmonodroid_bundle_app.so' in z.namelist(): 33 | bundle = 'lib/armeabi-v7a/libmonodroid_bundle_app.so' 34 | elif 'lib/armeabi/libmonodroid.so' in z.namelist() and 'lib/armeabi/libmonodroid_bundle_app.so' in z.namelist(): 35 | bundle = 'lib/armeabi/libmonodroid_bundle_app.so' 36 | if not bundle: 37 | return False 38 | self.bundle = bundle 39 | f = z.open(bundle) 40 | f = StringIO(f.read()) 41 | elffile = ELFFile(f) 42 | section = elffile.get_section_by_name('.dynsym') 43 | for symbol in section.iter_symbols(): 44 | if symbol['st_shndx'] != 'SHN_UNDEF' and symbol.name == 'mono_mkbundle_init': 45 | return True 46 | return False 47 | 48 | def extract(self): 49 | c2 = [] 50 | z = ZipFile(self.filename) 51 | data = z.open(self.bundle).read() 52 | f = StringIO(data) 53 | elffile = ELFFile(f) 54 | section = elffile.get_section_by_name('.dynsym') 55 | for symbol in section.iter_symbols(): 56 | if symbol['st_shndx'] != 'SHN_UNDEF' and symbol.name.startswith('assembly_data_'): 57 | if symbol.name[14:] in self.WHITELISTED_DLL: 58 | continue 59 | dll_data = data[symbol['st_value']:symbol['st_value']+symbol['st_size']] 60 | dll_data = gzip.GzipFile(fileobj=StringIO(dll_data)).read() 61 | regexp = """rule find_url { 62 | strings: 63 | $url = /http:\/\/[A-Za-z0-9\.\/$\-_+!\*'(),]*/ wide 64 | condition: 65 | $url}""" 66 | compiled = yara.compile(source = regexp) 67 | s = compiled.match(data = dll_data) 68 | for entry in s['main'][0]['strings']: 69 | cc = dll_data[entry['offset']:entry['offset']+len(entry['data'])].decode('utf-16') 70 | c2.append(cc) 71 | return {'c2': c2} 72 | 73 | -------------------------------------------------------------------------------- /processing/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | 4 | modules = glob.glob(os.path.dirname(__file__)+"/*.py") 5 | __all__ = [ os.path.basename(f)[:-3] for f in modules] 6 | 7 | -------------------------------------------------------------------------------- /processing/hashes.py: -------------------------------------------------------------------------------- 1 | from templates import OutputProcessor 2 | import hashlib 3 | from pprint import pprint 4 | 5 | class Hashes(OutputProcessor): 6 | def process(self, filename, name, data): 7 | filedata = open(filename).read() 8 | result = {'malware': name} 9 | result['md5'] = hashlib.md5(filedata).hexdigest() 10 | result['sha256'] = hashlib.sha256(filedata).hexdigest() 11 | result['sha1'] = hashlib.sha1(filedata).hexdigest() 12 | result['sha512'] = hashlib.sha512(filedata).hexdigest() 13 | if data: 14 | result.update(data) 15 | pprint(result) 16 | -------------------------------------------------------------------------------- /templates.py: -------------------------------------------------------------------------------- 1 | from androguard.core.bytecodes import dvm, apk 2 | from androguard.core.analysis import analysis 3 | from zipfile import ZipFile 4 | 5 | def get_plugin_prevalues(args, filename): 6 | result = {} 7 | a = apk.APK(filename) 8 | result['apk'] = a 9 | result['dvm'] = dvm.DalvikVMFormat(a.get_dex()) 10 | result['dx'] = analysis.VMAnalysis(result['dvm']) 11 | result['zipfile'] = ZipFile(filename) 12 | return result 13 | 14 | 15 | class Plugin(object): 16 | NAMES = [] 17 | 18 | def __init__(self, args, filename, prevalues={}): 19 | self.filename = filename 20 | self.args = args 21 | self.dvm = self.apk = None 22 | for name, value in prevalues.iteritems(): 23 | setattr(self, name, value) 24 | 25 | def recon(self): 26 | return False 27 | 28 | def extract(self): 29 | pass 30 | 31 | class OutputProcessor(object): 32 | def __init__(self, args): 33 | self.args = args 34 | 35 | def process(self, filename, data): 36 | pass 37 | 38 | class Collector(object): 39 | def __init__(self, args): 40 | self.args = args 41 | 42 | def get_samples(self): 43 | pass 44 | --------------------------------------------------------------------------------