├── malware ├── __init__.py ├── pickle_tool.py ├── virustotal.py ├── decoder.py ├── sql_tool.py ├── sandbox.py ├── snort.py └── utils.py ├── clean.sh ├── .gitignore ├── pkt_reconstruct.py ├── sandbox_run.py ├── README.md ├── rule_engine.py ├── LICENSE └── ruleEngine.py /malware/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -rf ./rules/*.rules 3 | rm -rf ./*.rules 4 | rm -rf ./*.log 5 | find "$PWD/" -name '*.pyc' -exec rm {} \; 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /pkt_reconstruct.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from malware import utils 4 | 5 | parser = argparse.ArgumentParser(description='''This is a packet reconstruct 6 | tool to help reconstruct 7 | the packet payload.''') 8 | parser.add_argument("-d", "--directory", type=str, 9 | help="Specify a path which place pcap file") 10 | args = parser.parse_args() 11 | 12 | 13 | def get_pcap_list(path): 14 | pcap_list = [] 15 | dirs = os.listdir(path) 16 | dirs.sort() 17 | for item in dirs: 18 | # if item.split('.')[-1] == 'pcap': 19 | pcap_list.append(item) 20 | return pcap_list 21 | 22 | if __name__ == '__main__': 23 | pcap_list = get_pcap_list(args.directory) 24 | for pcap in pcap_list: 25 | save_path = './{log}/{path}/'.format(log=args.directory, 26 | path=pcap) 27 | pcap_path = './{log}/{path}/{path2}.pcap'.format(log=args.directory, 28 | path=pcap, path2=pcap) 29 | connection = utils.follow_tcp_stream(pcap_path) 30 | utils.dump_tcp_stream_content(connection, save_path, True) 31 | 32 | udp_connection = utils.follow_udp_stream(pcap_path) 33 | utils.dump_udp_stream_content(udp_connection, save_path, True) 34 | -------------------------------------------------------------------------------- /malware/pickle_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cPickle as pickle 3 | import simplejson as json 4 | import sql_tool 5 | 6 | 7 | def update_pickle(new_cache): 8 | # print('Updated pickle') 9 | pickle.dump(new_cache, open('url_cache.pkl', 'wb'), 2) 10 | 11 | 12 | def check_pickle(): 13 | # print('Checking in pickle') 14 | if not os.path.isfile('url_cache.pkl'): 15 | malicious_url = {} 16 | pickle.dump(malicious_url, open('url_cache.pkl', 'wb'), 2) 17 | 18 | cache = pickle.load(open('url_cache.pkl', 'rb')) 19 | return cache 20 | 21 | 22 | def update_json(data): 23 | with open("url_cache.json", "wb") as fp: 24 | try: 25 | json.dump(data, fp) 26 | except: 27 | print "UnicodeDecodeError: 'utf8' codec can't decode byte 0xb8" 28 | 29 | 30 | def check_json(): 31 | if not os.path.isfile('url_cache.json'): 32 | init = {} 33 | with open("url_cache.json", "wb") as fp: 34 | json.dump(init, fp) 35 | 36 | with open("url_cache.json", "rb") as fp: 37 | # cache = json.load(fp) 38 | cache = json.load(fp, "ISO-8859-1") 39 | return cache 40 | 41 | 42 | def pickle2json(pkl): 43 | if os.path.isfile(pkl): 44 | cache = pickle.load(open(pkl, 'rb')) 45 | with open('pkl2json.json', 'wb') as fp: 46 | json.dump(cache, fp) 47 | else: 48 | print "No such file" 49 | 50 | 51 | def json2sql(): 52 | db = sql_tool.SQLiteTool() 53 | db.creat() 54 | 55 | with open('pkl2json.json', 'r') as fp: 56 | cache = json.load(fp) 57 | 58 | for key, value in cache.iteritems(): 59 | db.insert2(key, value[0], value[1]) 60 | db.show() 61 | -------------------------------------------------------------------------------- /sandbox_run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import argparse 4 | from malware.sandbox import Win32Sandbox 5 | 6 | logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s',) 7 | 8 | 9 | parser = argparse.ArgumentParser(description='''This is a automatic script 10 | which record malware network behavior in 11 | the isolation environment.''') 12 | parser.add_argument("-n", "--name", type=str, 13 | help="Specify a VM name.") 14 | parser.add_argument("-t", "--time", type=int, 15 | help='''Specify a time in seconds 16 | for recording network traffic.''') 17 | parser.add_argument("-d", "--malware", type=str, 18 | help='''Specify a path which place 19 | malware will be copy into sandbox.''') 20 | args = parser.parse_args() 21 | 22 | 23 | def get_malware_table(path): 24 | malware_table = {} 25 | dirs = os.listdir(path) 26 | dirs.sort() 27 | uuid = 0 28 | for malware in dirs: 29 | uuid += 1 30 | malware_table[uuid] = malware 31 | return malware_table 32 | 33 | 34 | def show_times(times): 35 | logging.info("#" * 80) 36 | logging.info("#" + " " * 39 + str(times) + " " * 38 + "#") 37 | logging.info("#" * 80) 38 | 39 | if __name__ == "__main__": 40 | malware_table = get_malware_table(args.malware) 41 | 42 | for uuid in malware_table.keys(): 43 | malware_path = './{path}/{malware}'.format(path=args.malware, 44 | malware=malware_table[uuid]) 45 | show_times(uuid) 46 | winxp = Win32Sandbox(args.name) 47 | winxp.start() 48 | winxp.login('john', 'john') 49 | winxp.copy_malware_in(malware_path) 50 | winxp.active_capture_parallel(args.time) 51 | winxp.power_off() 52 | winxp.restore_snapshot('Origin') 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Dynamic Malware Analysis 2 | ======= 3 | 4 | This is a malware analysis project which expecte to generate snort rule via malicious network traffic 5 | 6 | 7 | ## Requirement package 8 | - [pyvbox](https://pypi.python.org/pypi/pyvbox): for creating an sandbox. 9 | - [dpkt](https://code.google.com/p/dpkt/): for parsering network packet. 10 | - [requests](http://docs.python-requests.org/en/latest/): for sending requests to [VirusTotal](https://www.virustotal.com/en/) 11 | 12 | 13 | ## Isolation environment in VirtualBox 14 | 15 | 1. Take the snapshot (manually) 16 | 2. Copy the malware into virtual machine 17 | 3. Active the malware 18 | 4. Record the network traffic (Save in the host) in the virtual machinei 19 | 5. Restore the snapshot (Which means goto step 0) 20 | 21 | 22 | ## Usage 23 | 24 | - Isolation environment usage 25 | 26 | Show help messages 27 | 28 | ``` 29 | $ python sandbox_run.py -h 30 | 31 | usage: sandbox_run.py [-h] [-n NAME] [-t TIME] [-m MALWARE] 32 | 33 | This is a automatic script which record malware network behavior in the 34 | isolation environment. 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | -n NAME, --name NAME Specify a VM name. 39 | -t TIME, --time TIME Specify a time in seconds for recording network 40 | traffic. 41 | -m MALWARE, --malware MALWARE 42 | Specify a path which place malware will be copy into 43 | sandbox. 44 | ``` 45 | 46 | The following example is starting a virtual machine named WinXPSP3 and capturing network traffic for 60 seconds. 47 | 48 | ``` 49 | $ python sandbox_run.py -n WinXPSP3 -t 60 -m malware_bin 50 | ``` 51 | 52 | - Packet reconstruct Usage 53 | 54 | Show help message 55 | ``` 56 | $ python pkt_reconstruct.py -h 57 | 58 | usage: pkt_reconstruct.py [-h] [-d DIRECTORY] 59 | 60 | This is a packet reconstruct tool to help reconstruct the packet payload. 61 | 62 | optional arguments: 63 | -h, --help show this help message and exit 64 | -d DIRECTORY, --directory DIRECTORY 65 | Specify a path which place pcap file 66 | ``` 67 | 68 | ## Todo 69 | 70 | 1. Analysis malicious network traffic from packet reconstruct 71 | 2. Automatic generate the snort rule depending on analyse results 72 | 3. A Web GUI interface for user can upload the malware and generate snort rule 73 | -------------------------------------------------------------------------------- /malware/virustotal.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import apikey 3 | from pprint import pprint 4 | 5 | 6 | def domain_report(resource): 7 | parameters = {"domain": resource, "apikey": apikey.APIKEY_0} 8 | res = requests.get('https://www.virustotal.com/vtapi/v2/domain/report', 9 | params=parameters) 10 | response_json = res.json() 11 | # pprint(response_json) 12 | if response_json: 13 | positives = response_json.get('detected_urls')[0].get('positives') 14 | return positives 15 | else: 16 | return -1 17 | 18 | 19 | def ip_report(resource): 20 | parameters = {"ip": resource, "apikey": apikey.APIKEY_0} 21 | res = requests.get( 22 | 'https://www.virustotal.com/vtapi/v2/ip-address/report', 23 | params=parameters) 24 | response_json = res.json() 25 | # pprint(response_json) 26 | if response_json: 27 | positives = response_json.get('detected_urls')[0].get('positives') 28 | return positives 29 | else: 30 | return -1 31 | 32 | 33 | def url_report(resource): 34 | '''Sent a URLs sacn request to virustotal''' 35 | parameters = {"resource": resource, "apikey": apikey.APIKEY_0, "scan": "1"} 36 | res = requests.post('https://www.virustotal.com/vtapi/v2/url/report', 37 | params=parameters) 38 | response_json = res.json() 39 | # pprint(response_json) 40 | if response_json: 41 | response_code = response_json.get('response_code') 42 | 43 | if response_code != 1: 44 | print "Submitted Failed" 45 | with open('submitted_failed.log', 'a') as fp: 46 | fp.write('{u}\n'.format(u=resource)) 47 | else: 48 | positives = response_json.get('positives') 49 | return positives 50 | else: 51 | return -1 52 | 53 | 54 | def submit_url(url): 55 | parameters = {"apikey": apikey.APIKEY_0, 'url': url} 56 | res = requests.post('https://www.virustotal.com/vtapi/v2/url/scan', 57 | params=parameters) 58 | response_json = res.json() 59 | if response_json: 60 | response_code = response_json.get('response_code') 61 | # pprint(response_json) 62 | if response_code != 1: 63 | print "Submitted Failed" 64 | with open('submitted_failed.log', 'a') as fp: 65 | fp.write('{u}\n'.format(u=url)) 66 | 67 | 68 | if __name__ == '__main__': 69 | pass 70 | -------------------------------------------------------------------------------- /malware/decoder.py: -------------------------------------------------------------------------------- 1 | import dpkt 2 | import socket 3 | import binascii 4 | 5 | 6 | def hexify(x): 7 | h = binascii.hexlify(x) 8 | tohex = " ".join(h[i:i+2] for i in range(0, len(h), 2)) 9 | return tohex 10 | 11 | 12 | def truncate_dns(x): 13 | return x[36:-12] 14 | 15 | 16 | def _udp_iterator(pc): 17 | for ts, pkt in pc: 18 | try: 19 | eth = dpkt.ethernet.Ethernet(pkt) 20 | except dpkt.dpkt.NeedData: 21 | continue 22 | 23 | if eth.type == dpkt.ethernet.ETH_TYPE_IP: 24 | ip = eth.data 25 | if ip.p == dpkt.ip.IP_PROTO_UDP: 26 | udp = ip.data 27 | yield (ip.src, udp.sport, ip.dst, udp.dport, udp.data) 28 | else: 29 | pass 30 | # Not UDP packets 31 | else: 32 | pass 33 | # Not ether packets 34 | return 35 | 36 | 37 | def _tcp_iterator(pc): 38 | for ts, pkt in pc: 39 | try: 40 | eth = dpkt.ethernet.Ethernet(pkt) 41 | except dpkt.dpkt.NeedData: 42 | continue 43 | 44 | if eth.type == dpkt.ethernet.ETH_TYPE_IP: 45 | ip = eth.data 46 | if ip.p == dpkt.ip.IP_PROTO_TCP: 47 | tcp = ip.data 48 | yield (ip.src, tcp.sport, ip.dst, tcp.dport, tcp.data) 49 | else: 50 | pass 51 | # Not TCP packets 52 | else: 53 | pass 54 | # Not ether packets 55 | return 56 | 57 | 58 | def decode_dns_qd_name(pcap_path): 59 | qd_name_list = [] 60 | five_tuple = [] 61 | conn = {} 62 | fp = open(pcap_path) 63 | pc = dpkt.pcap.Reader(fp) 64 | unknown_opcode_counter = 0 65 | for (src, sport, dst, dport, data) in _udp_iterator(pc): 66 | if dport == 53: 67 | key = (src, sport, dst, dport) 68 | # UDP/53 is a DNS query 69 | try: 70 | dns = dpkt.dns.DNS(data) 71 | conn[key] = [dns.qd[0].name, truncate_dns(hexify(data))] 72 | except (dpkt.dpkt.UnpackError, IndexError): 73 | unknown_opcode_counter += 1 74 | # An unknown opcode maybe malicious traffic 75 | # print unknown_opcode_counter 76 | key = (src, sport, dst, dport, unknown_opcode_counter) 77 | # print 'UNKNOWN_DNS_DATA:', hexify(data) 78 | conn[key] = ['UNKNOWN_DNS', truncate_dns(hexify(data))] 79 | 80 | # qd_name_list.append(dns.qd[0].name) 81 | # five_tuple.append((src, sport, dst, dport)) 82 | # print truncate_dns(hexify(data)) 83 | # print "Query for", repr(dns.qd[0].name) 84 | fp.close() 85 | return conn 86 | 87 | 88 | def decode_http_req_header(pcap_path): 89 | host_list = [] 90 | uri_list = [] 91 | five_tuple = [] 92 | user_agent_list = [] 93 | fp = open(pcap_path) 94 | pc = dpkt.pcap.Reader(fp) 95 | for (src, sport, dst, dport, data) in _tcp_iterator(pc): 96 | if dport == 80 and len(data) > 0: 97 | key = (src, sport, dst, dport) 98 | http_req = dpkt.http.Request(data) 99 | # host_list.append(http_req.headers['host']) 100 | # uri_list.append(http_req.uri) 101 | # user_agent_list.append(http_req.headers['user-agent']) 102 | # five_tuple.append((src, sport, dst, dport)) 103 | 104 | conn[key] = [http_req.headers['host'], 105 | http_req.uri, 106 | http_req.headers['user-agent']] 107 | # print http_req.headers.keys() 108 | # print "URI is ", http_req.uri 109 | # for header in http_req.headers.keys() : 110 | # pass 111 | # print header, http_req.headers[header] 112 | # print "method is ", http_req.method 113 | # print "HTTP headers, packed ", http_req.pack() 114 | # print "HTTP version", http_req.version 115 | # print "HTTP data ", http_req.data 116 | fp.close() 117 | return conn 118 | 119 | 120 | if __name__ == '__main__': 121 | conn = decode_dns_qd_name('./2a.pcap') 122 | print len(conn) 123 | conn2 = decode_http_req_header('./2a.pcap') 124 | print conn2 125 | -------------------------------------------------------------------------------- /malware/sql_tool.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | class SQLiteTool(object): 7 | def __init__(self): 8 | self.conn = sqlite3.connect('url_reports.db') 9 | 10 | def creat_url_report(self): 11 | try: 12 | self.conn.execute('''CREATE TABLE URLREPORTS 13 | (ID TEXT PRIMARY KEY NOT NULL, 14 | URL TEXT NOT NULL, 15 | POSITIVE INT);''') 16 | # print "Table created successfully" 17 | except sqlite3.OperationalError, e: 18 | pass 19 | # print e 20 | 21 | def creat_domain_report(self): 22 | try: 23 | self.conn.execute('''CREATE TABLE DOMAINREPORTS 24 | (ID TEXT PRIMARY KEY NOT NULL, 25 | DOMAIN TEXT NOT NULL, 26 | POSITIVE INT);''') 27 | # print "Table created successfully" 28 | except sqlite3.OperationalError, e: 29 | pass 30 | # print e 31 | 32 | def __update_pos(self, key, pos): 33 | self.conn.execute("UPDATE URLREPORTS set POSITIVE = %d where ID = '%s'" % (pos, key)) 34 | # print "Update Operation done successfully"; 35 | self.conn.commit() 36 | 37 | def __update_url(self, key, url): 38 | self.conn.execute("UPDATE URLREPORTS set URL = '%s' where ID = '%s'" % (url, key)) 39 | # print "Update Operation done successfully"; 40 | self.conn.commit() 41 | # TODO 42 | # Updated the url from Virustotal not from pcap 43 | # print "Total number of rows updated :", self.conn.total_changes 44 | 45 | def __insert(self, key, url, pos): 46 | try: 47 | self.conn.execute('''INSERT INTO URLREPORTS (ID,URL,POSITIVE) 48 | VALUES (?,?,?)''', (key, url, pos)) 49 | self.conn.commit() 50 | except sqlite3.IntegrityError, e: 51 | print '[ERROR]', e 52 | 53 | def insert2(self, key, url, pos): 54 | 55 | cursor = self.conn.execute("SELECT ID from URLREPORTS WHERE ID = '%s'" % key) 56 | 57 | try: 58 | key_in_table = cursor.next()[0] 59 | except StopIteration: 60 | key_in_table = None 61 | 62 | if key_in_table and key_in_table == key: 63 | logger.info("[SQL] Update it") 64 | # if the URL in the table update it 65 | self.__update_pos(key, pos) 66 | self.__update_url(key, url) 67 | else: 68 | logger.info("[SQL] Insert the table") 69 | # else insert the table 70 | self.__insert(key, url, pos) 71 | 72 | def show_positive(self, key): 73 | cursor = self.conn.execute("SELECT POSITIVE from URLREPORTS WHERE ID = '%s'" % key) 74 | try: 75 | pos = int(cursor.next()[0]) 76 | except StopIteration: 77 | pos = None 78 | 79 | return pos 80 | 81 | def show_url(self, key): 82 | cursor = self.conn.execute("SELECT URL from URLREPORTS WHERE ID = '%s'" % key) 83 | try: 84 | url = str(cursor.next()[0]) 85 | except StopIteration: 86 | url = None 87 | 88 | return url 89 | 90 | def is_key(self, key): 91 | cursor = self.conn.execute("SELECT URL from URLREPORTS WHERE ID = '%s'" % key) 92 | try: 93 | key_in_table = cursor.next()[0] 94 | key_in_table = True 95 | except StopIteration: 96 | key_in_table = False 97 | 98 | return key_in_table 99 | 100 | def show(self): 101 | cursor = self.conn.execute("SELECT ID, URL, POSITIVE from URLREPORTS") 102 | for row in cursor: 103 | print "ID = ", row[0] 104 | print "URL = ", row[1] 105 | print "POSITIVE = ", row[2] 106 | print "=== END ===" 107 | 108 | def __del__(self): 109 | self.conn.close() 110 | 111 | 112 | def test(): 113 | import hashlib 114 | url = 'www.google.com' 115 | key = hashlib.sha1(url).hexdigest() 116 | print "The key is: ", key 117 | 118 | sql = SQLiteTool() 119 | sql.creat() 120 | 121 | sql.insert2(key, url, 100) 122 | 123 | print "=== NOW TABLE ===" 124 | sql.show() 125 | print "=== SHOW POSITIVE ===" 126 | print sql.show_positive(key) 127 | print "=== SHOW URL ===" 128 | print sql.show_url(key) 129 | 130 | 131 | if __name__ == '__main__': 132 | import hashlib 133 | url = 'www.baidu.com/s?wd=m16M-MM-bM-9M-R' 134 | key = hashlib.sha1(url).hexdigest() 135 | print "The key is: ", key 136 | 137 | sql = SQLiteTool() 138 | # print "=== TABLE ===" 139 | # sql.show() 140 | print sql.show_url(key) 141 | -------------------------------------------------------------------------------- /malware/sandbox.py: -------------------------------------------------------------------------------- 1 | import virtualbox 2 | import os 3 | import time 4 | import threading 5 | import logging 6 | 7 | logging.basicConfig(level=logging.DEBUG, 8 | format='(%(threadName)-10s) %(message)s',) 9 | 10 | 11 | class Sandbox(object): 12 | def __init__(self, name): 13 | vbox = virtualbox.VirtualBox() 14 | self.vm = vbox.find_machine(name) 15 | self.session = None 16 | self.gs = None 17 | self.malware_name = None 18 | 19 | def start(self): 20 | logging.info("Starting the VM") 21 | session = virtualbox.Session() 22 | power_up_process = self.vm.launch_vm_process(session, 'gui', '') 23 | power_up_process.wait_for_completion() 24 | session.unlock_machine() 25 | 26 | def power_off(self): 27 | logging.info("Shutdown the VM") 28 | self.gs.close() 29 | power_down_process = self.session.console.power_down() 30 | power_down_process.wait_for_completion() 31 | time.sleep(5) 32 | 33 | def restore_snapshot(self, point): 34 | logging.info("Restored the VM") 35 | session = self.vm.create_session() 36 | origin_point = session.machine.find_snapshot(point) 37 | restore_process = session.console.restore_snapshot(origin_point) 38 | restore_process.wait_for_completion() 39 | session.unlock_machine() 40 | 41 | def take_snapshot(self, point): 42 | logging.info("Take the snapshoot on %s" % (point)) 43 | session = self.vm.create_session() 44 | localtime = time.asctime(time.localtime(time.time())) 45 | snapshot_process = self.session.console.take_snapshot(point, localtime) 46 | snapshot_process.wait_for_completion() 47 | session.unlock_machine() 48 | 49 | def delete_snapshot(self, point): 50 | logging.info("Delete the snapshoot %s" % (point)) 51 | session = self.vm.create_session() 52 | origin_point = session.machine.find_snapshot(point) 53 | delete_process = session.console.delete_snapshot(origin_point.id_p) 54 | delete_process.wait_for_completion() 55 | session.unlock_machine() 56 | 57 | def capture_traffic(self, wait_sec, interface=0): 58 | save_path = './PCAPLog/{d}'.format(d=self.malware_name) 59 | # save_file = './PCAPLog/{d}Log/{f}.pcap'.format(d=self.malware_name, 60 | # f=self.malware_name) 61 | if not os.path.isdir(save_path): 62 | os.makedirs(save_path) 63 | logging.debug('Capture the network traffic') 64 | session = self.vm.create_session() 65 | adapter = session.machine.get_network_adapter(interface) 66 | adapter.trace_file = os.path.abspath(save_path + '/%s.pcap' 67 | % (self.malware_name)) 68 | adapter.trace_enabled = True 69 | time.sleep(wait_sec) 70 | adapter.trace_enabled = False 71 | session.unlock_machine() 72 | logging.debug('Exiting') 73 | 74 | 75 | class Win32Sandbox(Sandbox): 76 | def __init__(self, name): 77 | super(Win32Sandbox, self).__init__(name) 78 | 79 | def __forward_timer(self, output, forward_hour): 80 | # output[0] == process; output[1] == stdout; output[2] == stderr 81 | current_time = output[1][5:-2] 82 | hour = int(current_time[0:2]) 83 | if hour == 12: 84 | hour = 1 85 | else: 86 | hour = hour + forward_hour 87 | new_time = '{hh}:{mm}'.format(hh=str(hour), mm=current_time[3:5]) 88 | logging.debug('Correct the time to: ' + new_time) 89 | return new_time 90 | 91 | def login(self, account, password): 92 | logging.info("Login to Windows XP") 93 | self.session = self.vm.create_session() 94 | self.gs = self.session.console.guest.create_session('john', 'john') 95 | time.sleep(10) 96 | try: 97 | self.gs.execute('C:\\Windows\\System32\\cmd.exe', 98 | ['/C', 'tasklist']) 99 | except: 100 | logging.info("Waiting for logins to complete") 101 | time.sleep(15) 102 | 103 | def copy_malware_in(self, path): 104 | self.malware_name = path.split('/')[-1] 105 | logging.info("Copy malware %s into sandbox" % (self.malware_name)) 106 | copy_process = self.gs.copy_to(path, 'C:\\%s.exe' % (self.malware_name)) 107 | copy_process.wait_for_completion() 108 | 109 | def active_malware(self, name, timeout=5000): 110 | logging.debug('Activate the malware') 111 | try: 112 | process, stdout, stderr = self.gs.execute('C:\\%s.exe' % (name), 113 | timeout_ms=timeout) 114 | except: 115 | f = open("./cannot_exec.out", "a") 116 | f.write(name + '\n') 117 | f.close() 118 | logging.info('Execute Error') 119 | logging.debug('Exiting') 120 | return 121 | 122 | output = self.gs.execute('C:\\Windows\\System32\\cmd.exe', 123 | ['/C', 'time', '/T'], timeout_ms=timeout) 124 | 125 | new_time = self.__forward_timer(output, 1) 126 | 127 | self.gs.execute('C:\\Windows\\System32\\cmd.exe', 128 | ['/C', 'time', new_time], timeout_ms=timeout) 129 | 130 | output = self.gs.execute('C:\\Windows\\System32\\cmd.exe', 131 | ['/C', 'time', '/T'], timeout_ms=timeout) 132 | 133 | logging.debug('Exiting') 134 | 135 | def capture_traffic(self, wait_sec, interface=0): 136 | super(Win32Sandbox, self).capture_traffic(wait_sec, interface) 137 | 138 | def active_capture_parallel(self, time): 139 | capture_worker = threading.Thread(name='Capture Traffic', 140 | target=self.capture_traffic, 141 | args=(time, )) 142 | active_worker = threading.Thread(name='Active Malware', 143 | target=self.active_malware, 144 | args=(self.malware_name, )) 145 | capture_worker.start() 146 | active_worker.start() 147 | capture_worker.join() 148 | active_worker.join(5) 149 | 150 | 151 | class LinuxSandbox(Sandbox): 152 | def __init__(self, name): 153 | super(LinuxSandbox, self).__init__(name) 154 | 155 | def bash_echo(self): 156 | # To be test on linux sandbox 157 | process, stdout, stderr = self.gs.execute('/bin/bash', 158 | ['-c', 'echo hello world']) 159 | if stdout: 160 | logging.info(stdout) 161 | if stderr: 162 | logging.info(stderr) 163 | 164 | def capture_traffic(self, wait_sec, interface=0): 165 | super(LinuxSandbox, self).capture_traffic(wait_sec, interface) 166 | 167 | 168 | if __name__ == "__main__": 169 | pass 170 | -------------------------------------------------------------------------------- /malware/snort.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class SnortRuleBase(object): 4 | def __init__(self): 5 | # Rule Headers 6 | self.action = 'alert' 7 | self.protocol = 'tcp' 8 | self.src_ip = 'any' 9 | self.src_port = 'any' 10 | self.dst_ip = 'any' 11 | self.dst_port = 'any' 12 | 13 | # General Rule 14 | self.msg = None 15 | self.ref = None 16 | self.sid = None 17 | self.gid = None 18 | self.rev = None 19 | self.classtype = None 20 | self.priority = None 21 | self.metadata = None 22 | 23 | # Payload Detection Rule 24 | self.content = None 25 | self.uricontent = None 26 | # behavior keyword in payload detection 27 | # self.depth = None 28 | # self.offset = None 29 | # self.nocase = None 30 | 31 | 32 | class SnortRule(SnortRuleBase): 33 | def __init__(self): 34 | super(SnortRule, self).__init__() 35 | 36 | def __str__(self): 37 | '''Return complete snort rule in string''' 38 | header = self.__rule_header() 39 | options = self.__rule_options() 40 | return "{header}{options}".format(header=header, options=options) 41 | 42 | def __rule_header(self): 43 | '''Generate snort rule header. 44 | There are five information in the Rules Headers 45 | Rule Actions, Protocols, IP Addresses, Port Numbers, 46 | The Direction Operator''' 47 | header = [self.action, self.protocol, self.src_ip, 48 | self.src_port, '->', self.dst_ip, self.dst_port] 49 | header = [str(i) for i in header] 50 | return ' '.join(header) 51 | 52 | def __rule_options(self): 53 | '''Generate rule options. 54 | There are four major categories of Rules Options. 55 | general, payload, non-payload, post-detection''' 56 | general = self.__rule_general() 57 | payload = self.__rule_payload() 58 | return "({general}; {payload};)".format(general=general, 59 | payload=payload) 60 | 61 | def __rule_general(self): 62 | '''Generate snort general rule options. 63 | There are eight keyword in general rule. 64 | msg, reference, gid, sid, rev, classtype, priority, metadata''' 65 | references = self.__slice_multiple('reference', self.ref) 66 | 67 | rest_general = ['msg:{message}'.format(message=self.msg), 68 | 'sid:{sid}'.format(sid=self.sid), 69 | 'gid:{gid}'.format(gid=self.gid), 70 | 'rev:{rev}'.format(rev=self.rev), 71 | 'classtype:{clstype}'.format(clstype=self.classtype), 72 | 'priority:{priority}'.format(priority=self.priority), 73 | 'metadata:{metadata}'.format(metadata=self.metadata)] 74 | general = rest_general + references 75 | general = self.__clean_none(general) 76 | return '; '.join(general) 77 | 78 | def __rule_payload(self): 79 | '''Generate snort payload detection rule options''' 80 | contents = self.__slice_multiple('content', self.content) 81 | uricontent = self.__slice_multiple('uricontent', self.uricontent) 82 | 83 | payload = contents + uricontent 84 | payload = self.__clean_none(payload) 85 | return '; '.join(payload) 86 | 87 | def __rule_non_payload(self): 88 | '''Generate snort non-payload detection rule options''' 89 | pass 90 | 91 | def __clean_none(self, option_list): 92 | '''Clean the None in the list''' 93 | update = [] 94 | for option in option_list: 95 | if "None" not in option: 96 | update.append(option) 97 | return update 98 | 99 | def __slice_multiple(self, tag, data): 100 | '''Slice the multiple item''' 101 | behavior_keyword = ['nocase', 'depth', 'offset', 'rawbytes', 102 | 'distance', 'http_method'] 103 | if data is None: 104 | return [] 105 | else: 106 | assert isinstance(data, list) 107 | sliced = [] 108 | for item in data: 109 | if item in behavior_keyword: 110 | sliced.append('{keyword}'.format(keyword=item)) 111 | elif item.split(':')[0] in behavior_keyword: 112 | behavior = item.split(':')[0] 113 | size = item.split(':')[1] 114 | sliced.append('{behavior}:{size}'.format(behavior=behavior, 115 | size=size)) 116 | else: 117 | sliced.append('{tag}:{item}'.format(tag=tag, item=item)) 118 | return sliced 119 | 120 | def set_malicious_pattern(self, **kwargs): 121 | '''Get snort rule rule header, options.''' 122 | # Rule Headers 123 | self.action = kwargs.get('action', 'alert') 124 | self.protocol = kwargs.get('protocol', 'tcp') 125 | self.src_ip = kwargs.get('src_ip', 'any') 126 | self.src_port = kwargs.get('src_port', 'any') 127 | self.dst_ip = kwargs.get('dst_ip', 'any') 128 | self.dst_port = kwargs.get('dst_port', 'any') 129 | 130 | # General Rule 131 | self.msg = kwargs.get('msg') 132 | self.ref = kwargs.get('reference') 133 | self.sid = kwargs.get('sid') 134 | self.gid = kwargs.get('gid') 135 | self.rev = kwargs.get('rev') 136 | self.classtype = kwargs.get('classtype') 137 | self.priority = kwargs.get('priority') 138 | self.metadata = kwargs.get('metadata') 139 | 140 | # Payload Dection Rule 141 | self.content = kwargs.get('content') 142 | self.uricontent = kwargs.get('uricontent') 143 | 144 | # behavior keyword in payload detection 145 | # self.nocase = kwargs.get('nocase') 146 | # self.offset = kwargs.get('offset') 147 | # self.depth = kwargs.get('depth') 148 | 149 | def serialize(self): 150 | pass 151 | 152 | def parser(self, rule_str): 153 | header = rule_str.split('(')[0] 154 | # header 155 | self.action = header.split(' ')[0] 156 | self.protocol = header.split(' ')[1] 157 | self.src_ip = header.split(' ')[2] 158 | self.src_port = header.split(' ')[3] 159 | self.dst_ip = header.split(' ')[5] 160 | self.dst_port = header.split(' ')[6] 161 | 162 | d = {} 163 | # options 164 | options = rule_str.split('(')[1] 165 | options_list = options.split(';') 166 | 167 | # for item in options_list: 168 | # if item != 'nocase': 169 | # print item 170 | # d[item.split(':')[0]] = item.split(':')[1] 171 | 172 | # print d 173 | 174 | 175 | if __name__ == "__main__": 176 | 177 | pattern = { 178 | 'dst_port': 21, 179 | 'msg': '"Trojan.VB.Gen"', 180 | 'content': ['"|2f 70 6b 69 2f 63 72 6c 2f 70 72 6f|"', 'nocase', 181 | '"|2f 70 6b 69 2f 63 72 6c 2f 70 72 6f|"', 'nocase', 182 | 'depth:4', 'offset:10'], 183 | 'uricontent': ['"/DirectDownloaderInstaller.exe"', 'nocase'], 184 | 'reference': ['md5,c1920c396043d109af6d8315cc66ba44', 185 | 'email,rules@anti-botnet.edu.tw'], 186 | 'sid': 1000001} 187 | rule = SnortRule() 188 | # rule.set_malicious_pattern(**pattern) 189 | rule.dst_port = 21 190 | rule.msg = "Trojan.VB.Gen" 191 | rule.content = ['"|2f 70 6b 69 2f 63 72 6c 2f 70 72 6f|"', 'nocase'] 192 | rule.uricontent = ['"/DirectDownloaderInstaller.exe"', 'nocase'] 193 | rule.protocol = 'udp' 194 | # rule.generate() 195 | print rule 196 | 197 | rule.parser(str(rule)) 198 | -------------------------------------------------------------------------------- /rule_engine.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import os 4 | import logging 5 | import time 6 | import hashlib 7 | from malware import utils 8 | from malware.snort import SnortRule 9 | from malware import pickle_tool 10 | from malware import apikey 11 | from virus_total_apis import PrivateApi as VirusTotal 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class Validator(object): 17 | def is_valid_url(self, url): 18 | regex = re.compile( 19 | # r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately 20 | r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(? 0: 68 | logger.info("Waiting %s seconds", (str(waiting))) 69 | time.sleep(waiting) 70 | self.vt_req_timer = time.time() 71 | 72 | def _make_rule(self, content, uricontent, dst_port, sid=0): 73 | rule = SnortRule() 74 | pattern = dict() 75 | pattern['msg'] = '"Trojan.Gen.uricontent"' 76 | pattern['content'] = ['"{host}"'.format(host=content), 'nocase'] 77 | pattern['uricontent'] = ['"{uri}"'.format(uri=uricontent), 'nocase'] 78 | # pattern['sid'] = sid 79 | pattern['dst_port'] = dst_port 80 | rule.set_malicious_pattern(**pattern) 81 | return rule 82 | 83 | def _get_url_positive(self, resource): 84 | m = hashlib.sha1(resource) 85 | urlkey = m.hexdigest() 86 | if urlkey in self.cache.keys(): 87 | # logger.info("%s in cache" % resource) 88 | positives = self.cache.get(urlkey)[1] 89 | return positives 90 | else: 91 | self.vt_req_counter += 1 92 | logger.info("Search on VirusTotal counter: %s", 93 | str(self.vt_req_counter)) 94 | 95 | response = self.vt.get_url_report(resource) 96 | 97 | if response.get('error') is not None: 98 | logger.info("Error: {e}".format(e=response.get('error'))) 99 | sys.exit(0) 100 | 101 | results = response.get('results') 102 | positives = results.get('positives') 103 | self._check_timer_counter() 104 | 105 | if positives >= 0: 106 | self.cache[urlkey] = [resource, positives] 107 | return positives 108 | elif positives is None: 109 | logger.info('''No report. 110 | Submmit the URL to VirusTotal countert: %s''', 111 | str(self.vt_req_counter)) 112 | self.vt.scan_url(resource) 113 | self._check_timer_counter() 114 | return None 115 | else: 116 | logger.debug("Get reports failed.") 117 | return None 118 | 119 | def gen_rule(self, pcap_path): 120 | self.cache = pickle_tool.check_json() 121 | for content, conn in self._iterpayload('%s' % (pcap_path)): 122 | # print content, utils.connection_key_2_str(conn) 123 | get_method = self.vd.is_get_method(content) 124 | host = self.vd.is_hsot(content) 125 | if host and get_method: 126 | if get_method.group(1) == '/': 127 | url = self.vd.is_valid_url(host.group(1).rstrip()) 128 | else: 129 | url = self.vd.is_valid_url(host.group(1).rstrip() + 130 | get_method.group(1)) 131 | 132 | if url is not None: 133 | # valid_utf8 = True 134 | try: 135 | url.group(0).decode('utf-8') 136 | except UnicodeDecodeError: 137 | with open('invalid_utf8', 'a') as fp: 138 | fp.write('{u}\n'.format(u=url.group(0))) 139 | url = None 140 | # valid_utf8 = False 141 | 142 | if url is not None: 143 | host_content = host.group(0).rstrip() 144 | uricontent = get_method.group(1) 145 | pos = self._get_url_positive(url.group(0)) 146 | 147 | if pos > 0: 148 | if uricontent == '/': 149 | uricontent = None 150 | #print host_content 151 | rule = self._make_rule(host_content, uricontent, 152 | conn[3], 0) 153 | 154 | with open('uricontent.rules', 'a') as fp: 155 | fp.write('{r}\n'.format(r=str(rule))) 156 | yield rule 157 | else: 158 | # positives == 0 or positives == None 159 | pass 160 | else: 161 | # invalid_url 162 | pass 163 | else: 164 | pass 165 | 166 | pickle_tool.update_json(self.cache) 167 | 168 | class PayloadIterator2(object): 169 | def __init__(self, path): 170 | self.index = 0 171 | self.path = path 172 | self.pcap_list = list() 173 | self.content = list() 174 | self.five_tuple = list() 175 | 176 | def __iter__(self): 177 | for dirPath, dirNames, fileNames in os.walk(self.path): 178 | for f in fileNames: 179 | if f.split('.')[1] == 'pcap': 180 | self.pcap_list.append(os.path.join(dirPath, f)) 181 | else: 182 | # Not a pcap file 183 | pass 184 | 185 | for p in self.pcap_list: 186 | connection = utils.follow_tcp_stream(p) 187 | for five_tuple, frame in connection.iteritems(): 188 | for seq, content in frame.iteritems(): 189 | if content: 190 | # Generate the content and 5-tuple 191 | self.content.append(content) 192 | self.five_tuple.append(five_tuple) 193 | else: 194 | # Some packets have no payload 195 | pass 196 | return self 197 | 198 | def next(self): 199 | try: 200 | five_tuple = self.five_tuple[self.index] 201 | content = self.content[self.index] 202 | except IndexError: 203 | raise StopIteration 204 | self.index += 1 205 | return content, five_tuple 206 | 207 | def main(): 208 | logging.basicConfig(level=logging.INFO, 209 | format='[%(levelname)s] %(message)s',) 210 | 211 | rule_engine = RuleEngine() 212 | # print dir(rule_engine) 213 | rule_instances = list() 214 | RULE = list() 215 | 216 | for pcap in rule_engine.iterpcap('./PCAPLog/'): 217 | # print pcap 218 | for rule in rule_engine.gen_rule(pcap): 219 | rule_instances.append(rule) 220 | 221 | for ruleobj in rule_instances: 222 | RULE.append(str(ruleobj)) 223 | 224 | RULE = list(set(RULE)) 225 | 226 | for r in RULE: 227 | print r 228 | 229 | 230 | if __name__ == "__main__": 231 | main() 232 | -------------------------------------------------------------------------------- /malware/utils.py: -------------------------------------------------------------------------------- 1 | import dpkt 2 | import socket 3 | # import logging 4 | # from pprint import pprint 5 | 6 | # logging.basicConfig(level=logging.INFO, 7 | # format='[%(levelname)s] %(message)s', 8 | # ) 9 | 10 | 11 | def connection_key_2_str(key): 12 | src_ip = socket.inet_ntoa(key[0]) 13 | dst_ip = socket.inet_ntoa(key[2]) 14 | return src_ip + ":" + str(key[1]) + " -> " + dst_ip + ":" + str(key[3]) 15 | 16 | 17 | def _follow_tcp_stream(pcapfile): 18 | connection = {} 19 | frame_counter = 0 20 | f = open(pcapfile) 21 | pcap = dpkt.pcap.Reader(f) 22 | for ts, buf in pcap: 23 | frame_counter += 1 24 | try: 25 | eth = dpkt.ethernet.Ethernet(buf) 26 | except: 27 | continue 28 | ip = eth.data 29 | if eth.type != dpkt.ethernet.ETH_TYPE_IP: 30 | continue 31 | if ip.p != dpkt.ip.IP_PROTO_TCP: 32 | continue 33 | 34 | tcp = ip.data 35 | 36 | connection_key = (ip.src, tcp.sport, ip.dst, tcp.dport) 37 | # logging.debug(connection_key_2_str(connection_key)) 38 | 39 | fin_flag = (tcp.flags & dpkt.tcp.TH_FIN) != 0 40 | syn_flag = (tcp.flags & dpkt.tcp.TH_SYN) != 0 41 | # rst_flag = (tcp.flags & dpkt.tcp.TH_RST) != 0 42 | # psh_flag = (tcp.flags & dpkt.tcp.TH_PUSH) != 0 43 | ack_flag = (tcp.flags & dpkt.tcp.TH_ACK) != 0 44 | # urg_flag = (tcp.flags & dpkt.tcp.TH_URG) != 0 45 | # ece_flag = (tcp.flags & dpkt.tcp.TH_ECE) != 0 46 | # cwr_flag = (tcp.flags & dpkt.tcp.TH_CWR) != 0 47 | 48 | if syn_flag and not ack_flag: 49 | connection[connection_key] = [] 50 | elif syn_flag and ack_flag: 51 | connection[connection_key] = [] 52 | elif not syn_flag and ack_flag: 53 | if connection_key not in connection.keys(): 54 | # logging.info(connection_key_2_str(connection_key) + 55 | # ' Not a complete connection.') 56 | # Not an complete connection 57 | connection[connection_key] = [] 58 | connection[connection_key].append(tcp.data) 59 | else: 60 | if tcp.data != '': 61 | connection[connection_key].append(tcp.data) 62 | # logging.debug("TCP seq: %d" % tcp.seq) 63 | else: 64 | pass 65 | # logging.debug("TCP seq %d is empty" % tcp.seq) 66 | elif fin_flag: 67 | pass 68 | else: 69 | pass 70 | f.close() 71 | return connection 72 | 73 | 74 | def follow_tcp_stream(pcapfile): 75 | connection2 = {} 76 | frame_counter = 0 77 | f = open(pcapfile) 78 | pcap = dpkt.pcap.Reader(f) 79 | for ts, buf in pcap: 80 | frame_counter += 1 81 | try: 82 | eth = dpkt.ethernet.Ethernet(buf) 83 | except: 84 | continue 85 | ip = eth.data 86 | if eth.type != dpkt.ethernet.ETH_TYPE_IP: 87 | continue 88 | if ip.p != dpkt.ip.IP_PROTO_TCP: 89 | continue 90 | 91 | tcp = ip.data 92 | 93 | connection_key = (ip.src, tcp.sport, ip.dst, tcp.dport) 94 | # logging.debug(connection_key_2_str(connection_key)) 95 | 96 | fin_flag = (tcp.flags & dpkt.tcp.TH_FIN) != 0 97 | syn_flag = (tcp.flags & dpkt.tcp.TH_SYN) != 0 98 | # rst_flag = (tcp.flags & dpkt.tcp.TH_RST) != 0 99 | # psh_flag = (tcp.flags & dpkt.tcp.TH_PUSH) != 0 100 | ack_flag = (tcp.flags & dpkt.tcp.TH_ACK) != 0 101 | # urg_flag = (tcp.flags & dpkt.tcp.TH_URG) != 0 102 | # ece_flag = (tcp.flags & dpkt.tcp.TH_ECE) != 0 103 | # cwr_flag = (tcp.flags & dpkt.tcp.TH_CWR) != 0 104 | 105 | if syn_flag and not ack_flag: 106 | connection2.setdefault(connection_key, {tcp.seq: {}}) 107 | elif syn_flag and ack_flag: 108 | connection2.setdefault(connection_key, {tcp.seq: {}}) 109 | elif not syn_flag and ack_flag: 110 | if connection_key not in connection2.keys(): 111 | # logging.info(connection_key_2_str(connection_key) + 112 | # ' Not a complete connection.') 113 | # Not an complete connection 114 | connection2.setdefault(connection_key, {tcp.seq: {}}) 115 | connection2[connection_key][tcp.seq] = tcp.data 116 | else: 117 | if tcp.data != '': 118 | # logging.debug("TCP seq: %d" % tcp.seq) 119 | connection2[connection_key][tcp.seq] = tcp.data 120 | else: 121 | pass 122 | # logging.debug("TCP seq %d is empty" % tcp.seq) 123 | elif fin_flag: 124 | pass 125 | else: 126 | pass 127 | f.close() 128 | return connection2 129 | 130 | 131 | def follow_udp_stream(pcapfile): 132 | connection = {} 133 | frame_counter = 0 134 | f = open(pcapfile) 135 | pcap = dpkt.pcap.Reader(f) 136 | for ts, buf in pcap: 137 | frame_counter += 1 138 | try: 139 | eth = dpkt.ethernet.Ethernet(buf) 140 | except: 141 | continue 142 | ip = eth.data 143 | if eth.type != dpkt.ethernet.ETH_TYPE_IP: 144 | continue 145 | if ip.p != dpkt.ip.IP_PROTO_UDP: 146 | continue 147 | 148 | udp = ip.data 149 | connection_key = (ip.src, udp.sport, ip.dst, udp.dport) 150 | 151 | if connection_key not in connection.keys(): 152 | connection[connection_key] = [] 153 | connection[connection_key].append(udp.data) 154 | else: 155 | if udp.data != '': 156 | connection[connection_key].append(udp.data) 157 | else: 158 | pass 159 | # logging.info(udp.data) 160 | f.close() 161 | return connection 162 | 163 | 164 | def tcp_stream(pcapfile): 165 | connection = {} 166 | frame_counter = 0 167 | for ts, buf in dpkt.pcap.Reader(open(pcapfile)): 168 | frame_counter += 1 169 | try: 170 | eth = dpkt.ethernet.Ethernet(buf) 171 | except: 172 | continue 173 | ip = eth.data 174 | if eth.type != dpkt.ethernet.ETH_TYPE_IP: 175 | continue 176 | if ip.p != dpkt.ip.IP_PROTO_TCP: 177 | continue 178 | tcp = ip.data 179 | if tcp.data == '': 180 | continue 181 | 182 | tuple_key = (ip.src, tcp.sport, ip.dst, tcp.dport) 183 | reverse_key = (ip.dst, tcp.dport, ip.src, tcp.sport) 184 | 185 | if (tuple_key and reverse_key) not in connection.keys(): 186 | connection.setdefault(tuple_key, {tcp.seq: tcp.data}) 187 | else: 188 | connection[reverse_key][tcp.seq] = tcp.data 189 | 190 | return connection 191 | # for key, value in connection.iteritems(): 192 | # for key2 in sorted(value.keys()): 193 | # print connection_key_2_str(key), key2 194 | 195 | 196 | def dump_udp_stream_content(connection, save_path, combine=False): 197 | # save_path = './payload_dump/' 198 | # if not os.path.isdir(save_path): 199 | # os.makedirs(save_path) 200 | 201 | if combine: 202 | combine_connection = {} 203 | for tuple_key, value in sorted(connection.iteritems()): 204 | reverse_key = (tuple_key[2], tuple_key[3], 205 | tuple_key[0], tuple_key[1]) 206 | if (tuple_key and reverse_key) not in combine_connection.keys(): 207 | combine_connection[tuple_key] = value 208 | else: 209 | combine_connection[reverse_key] = ( 210 | combine_connection[reverse_key] + value) 211 | connection = combine_connection 212 | 213 | for key, value in connection.iteritems(): 214 | filename = connection_key_2_str(key).replace(' -> ', '_') + '.out' 215 | filename = 'udp_' + filename 216 | w = open(save_path + filename, 'w') 217 | # value = filter(None, value) 218 | value = "".join(value) 219 | w.write(value) 220 | w.close() 221 | # logging.info('{conn}\t{cont} bytes\tFilename: {fn}'.format( 222 | # conn=connection_key_2_str(key), 223 | # cont=str(len(value)), 224 | # fn=filename)) 225 | 226 | 227 | def dump_tcp_stream_content(connection, save_path, combine=False): 228 | # Sort TCP sequence number in connection2 (Experimental) 229 | # connection2:{ 230 | # HostA:{ 231 | # Seq1:[payload] 232 | # Seq2:[payload] 233 | # } 234 | # HostB:{ 235 | # Seq1:[payload] 236 | # Seq2:[payload] 237 | # } 238 | # } 239 | 240 | # save_path = './payload_dump/' 241 | # if not os.path.isdir(save_path): 242 | # os.makedirs(save_path) 243 | 244 | if combine: 245 | combine_connection = {} 246 | for tuple_key, value in sorted(connection.iteritems()): 247 | reverse_key = (tuple_key[2], tuple_key[3], 248 | tuple_key[0], tuple_key[1]) 249 | if (tuple_key and reverse_key) not in combine_connection.keys(): 250 | combine_connection[tuple_key] = value 251 | else: 252 | combine_connection[reverse_key].update(value) 253 | connection = combine_connection 254 | 255 | for key, value in connection.iteritems(): 256 | # pprint(value) 257 | content_whole = [] 258 | for key2 in sorted(value.iterkeys()): 259 | content = "".join(value[key2]) 260 | content_whole.append(content) 261 | 262 | filename = connection_key_2_str(key).replace(' -> ', '_') + '.out' 263 | filename = 'tcp_' + filename 264 | w = open(save_path + filename, 'w') 265 | content_whole = "".join(content_whole) 266 | w.write(content_whole) 267 | w.close() 268 | # logging.info('{conn}\t{cont} bytes\tFilename: {fn}'.format( 269 | # conn=connection_key_2_str(key), 270 | # cont=str(len(content_whole)), 271 | # fn=filename)) 272 | 273 | # Unsort TCP sequence number in connection 274 | # connection:{ 275 | # HostA:[payload, payload, payload] 276 | # HostB:[payload, payload, payload] 277 | # } 278 | 279 | # for key, value in connection.iteritems(): 280 | # filename = connection_key_2_str(key).replace(' -> ', '_') 281 | # w = open(filename, 'w') 282 | # # value = filter(None, value) 283 | # content = "".join(value) 284 | # logging.info(connection_key_2_str(key) + '\t' + 285 | # str(len(content)) + '\t' + filename) 286 | # w.write(content) 287 | # w.close() 288 | 289 | 290 | if __name__ == '__main__': 291 | connection = follow_tcp_stream('./2a.pcap') 292 | udp_connection = follow_udp_stream('./2a.pcap') 293 | print('--- TCP ---') 294 | dump_tcp_stream_content(connection, combine=True) 295 | print ('--- UDP ---') 296 | dump_udp_stream_content(udp_connection, combine=True) 297 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /ruleEngine.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import os 4 | import logging 5 | import time 6 | import hashlib 7 | from urlparse import urlparse 8 | from malware import utils 9 | from malware import decoder 10 | from malware.snort import SnortRule 11 | from malware.sql_tool import SQLiteTool 12 | from malware import apikey 13 | from virus_total_apis import PrivateApi as VirusTotal 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | REQUEST_RATE = 300 18 | APIKEY = apikey.APIKEY_0 19 | 20 | 21 | def clean_spaces(s): 22 | s = s.replace('\r', '') 23 | return s 24 | 25 | 26 | class RuleEngineBase(object): 27 | def __init__(self, path='../PCAPLog/'): 28 | self.rules = list() 29 | self._db = SQLiteTool() 30 | self._db.creat_url_report() 31 | self.tcp_paylpad_iter = PayloadIterator2(path, 'tcp') 32 | self.udp_paylpad_iter = PayloadIterator2(path, 'udp') 33 | self.vd = Validator() 34 | self.vt = VirusTotal(APIKEY) 35 | 36 | def _make_rule(self, **kwargs): 37 | rule = SnortRule() 38 | rule.msg = '"Trojan.Gen"' 39 | 40 | content = kwargs.get('content') 41 | uricontent = kwargs.get('uricontent') 42 | dst_port = kwargs.get('dst_port') 43 | ref = kwargs.get('ref') 44 | protocol = kwargs.get('protocol') 45 | dst_port = kwargs.get('dst_port') 46 | 47 | if protocol is not None: 48 | rule.protocol = protocol 49 | if dst_port is not None: 50 | rule.dst_port = dst_port 51 | if content is not None: 52 | rule.content = content 53 | if uricontent is not None and uricontent != '/': 54 | rule.uricontent = uricontent 55 | if ref is not None: 56 | rule.ref = ref 57 | # pattern['sid'] = sid 58 | 59 | self.rules.append(rule) 60 | self._log_rules(rule, ref[0].split(',')[-1]) 61 | 62 | def _get_url_positive(self, resource): 63 | urlkey = hashlib.sha1(resource).hexdigest() 64 | 65 | if self._db.is_key(urlkey): 66 | # print "In Table!!" 67 | return self._db.show_positive(urlkey) 68 | 69 | def _log_rules(self, data, filename): 70 | # print str(data) 71 | if not os.path.exists('./rules'): 72 | os.makedirs('./rules') 73 | 74 | with open('./rules/{m}_rule.rules'.format(m=filename), 'a') as fp: 75 | fp.write('{r}\n'.format(r=str(data))) 76 | 77 | 78 | class RuleEngineOnline(RuleEngineBase): 79 | def __init__(self, path='../PCAPLog/'): 80 | self.vt_req_counter = 0 81 | self.vt_req_timer = time.time() 82 | super(RuleEngineOnline, self).__init__(path) 83 | 84 | def _check_timer_counter(self): 85 | if self.vt_req_counter == REQUEST_RATE: 86 | self.vt_req_counter = 0 87 | period = time.time() - self.vt_req_timer 88 | waiting = 60 - period + 1 89 | if waiting > 0: 90 | logger.info("Waiting %s seconds", (str(waiting))) 91 | time.sleep(waiting) 92 | self.vt_req_timer = time.time() 93 | 94 | def _make_rule(self, **kwargs): 95 | super(RuleEngineOnline, self)._make_rule(**kwargs) 96 | 97 | def _get_url_positive(self, resource): 98 | urlkey = hashlib.sha1(resource).hexdigest() 99 | 100 | if self._db.is_key(urlkey): 101 | # print "In Table!!" 102 | update_database = False 103 | if update_database: 104 | # ============== Updated the Database URL column =============== 105 | self._check_timer_counter() 106 | self.vt_req_counter += 1 107 | response = self.vt.get_url_report(resource) 108 | if response.get('error') is not None: 109 | logger.info("Error: {e}".format(e=response.get('error'))) 110 | return None 111 | # sys.exit(0) 112 | 113 | results = response.get('results') 114 | positives = results.get('positives') 115 | url = results.get('url') 116 | 117 | if positives >= 0: 118 | self._db.insert2(urlkey, url, positives) 119 | # ============== Updated the Database URL column =============== 120 | return self._db.show_positive(urlkey) 121 | else: 122 | self._check_timer_counter() 123 | self.vt_req_counter += 1 124 | logger.info("Search on VirusTotal counter: %s", 125 | str(self.vt_req_counter)) 126 | logger.info(resource) 127 | response = self.vt.get_url_report(resource) 128 | 129 | if response.get('error') is not None: 130 | logger.info("Error: {e}".format(e=response.get('error'))) 131 | return None 132 | # sys.exit(0) 133 | 134 | results = response.get('results') 135 | positives = results.get('positives') 136 | url = results.get('url') 137 | 138 | if positives >= 0: 139 | self._db.insert2(urlkey, url, positives) 140 | # self._db.insert2(url_id, url, positives) 141 | return positives 142 | elif positives is None: 143 | self._check_timer_counter() 144 | self.vt_req_counter += 1 145 | logger.info('''No report. Submmit the URL to VirusTotal countert: %s''', 146 | str(self.vt_req_counter)) 147 | self.vt.scan_url(resource) 148 | return None 149 | else: 150 | logger.debug("Get reports failed.") 151 | return None 152 | 153 | def _get_domain_positive(self, resource): 154 | domainkey = hashlib.sha1(resource).hexdigest() 155 | if self._db.is_key(domainkey): 156 | pass 157 | # return self._db.show_positive(urlkey) 158 | else: 159 | pass 160 | 161 | def http_rule_generate(self): 162 | for content, conn, filename in self.tcp_paylpad_iter: 163 | try: 164 | get_obj = self.vd.is_get_method(content) 165 | host_obj = self.vd.is_hsot(content) 166 | if host_obj and get_obj: 167 | uri = get_obj.group(1) 168 | host_field = clean_spaces(host_obj.group(1)) 169 | o = urlparse('http://'+ host_field + uri) 170 | # domian = o.netloc 171 | # uri = o.path 172 | if o.path == '/': 173 | # Proberbly an malicious domain name 174 | domain_obj = self.vd.is_valid_url(host_field) 175 | if domain_obj is not None: 176 | domain_pos = self._get_url_positive(domain_obj.group(0)) 177 | if domain_pos > 0: 178 | self._make_rule(protocol='tcp', 179 | content=['"{h}"'.format(h=clean_spaces(host_obj.group(0))), 'nocase'], 180 | dst_port=conn[3], 181 | ref=['md5,{m}'.format(m=filename.split('.')[0])]) 182 | # md5=filename.split('.')[0]) 183 | else: 184 | # Is a invalid url 185 | pass 186 | 187 | else: 188 | # o.path != '/' 189 | # string = self.vd.is_valid_utf8(host_field + uri) 190 | # if string is not None: 191 | # Do search on VT 192 | 193 | url_obj = self.vd.is_valid_url(host_field + uri) 194 | if url_obj is not None: 195 | url_pos = self._get_url_positive(url_obj.group(0)) 196 | if url_pos > 0: 197 | self._make_rule(protocol='tcp', 198 | content=['"{h}"'.format(h=clean_spaces(host_obj.group(0))), 'nocase'], 199 | uricontent=['"{u}"'.format(u=o.path), 'nocase'], 200 | dst_port=conn[3], 201 | ref=['md5,{m}'.format(m=filename.split('.')[0])]) 202 | # md5=filename.split('.')[0]) 203 | else: 204 | # Is a invalid url 205 | pass 206 | else: 207 | pass 208 | except KeyboardInterrupt: 209 | logger.info("Quit") 210 | sys.exit() 211 | 212 | def dns_rule_generate(self): 213 | for content, conn, filename in self.udp_paylpad_iter: 214 | try: 215 | # print content, filename, conn[3] 216 | if content[0] == 'UNKNOWN_DNS': 217 | # Bad DNS query opcode != 0 218 | # print "Bad DNS query opcode != 0, %r" % content[1] 219 | self._make_rule(protocol='udp', 220 | dst_port=conn[3], 221 | content=['"|'+content[1]+'|"'], 222 | ref=['md5,{m}'.format(m=filename.split('.')[0])]) 223 | else: 224 | domain_obj = self.vd.is_valid_url(content[0]) 225 | if domain_obj is not None: 226 | domain_pos = self._get_url_positive(content[0]) 227 | if domain_pos > 0: 228 | self._make_rule(protocol='udp', 229 | dst_port=conn[3], 230 | content=['"|'+content[1]+'|"'], 231 | ref=['md5,{m}'.format(m=filename.split('.')[0])]) 232 | else: 233 | # Is a invalid domain name 234 | with open('invalid_domain_name.log', 'a') as fp: 235 | fp.write(filename+'\n') 236 | fp.write(content[0]+'\n') 237 | except KeyboardInterrupt: 238 | logger.info("Quit") 239 | sys.exit() 240 | 241 | def _log_rules(self, data, filename): 242 | super(RuleEngineOnline, self)._log_rules(data, filename) 243 | 244 | 245 | class Validator(object): 246 | def __init__(self): 247 | pass 248 | 249 | def is_valid_url(self, url): 250 | regex = re.compile( 251 | # r'^(?:[a-z0-9\.\-]*)://' # scheme is validated separately 252 | r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}(?