├── .gitignore ├── README.md ├── analyzer ├── __init__.py └── analyzer.py ├── disassembler ├── __init__.py └── disassembler.py ├── emulator ├── __init__.py └── emulator.py ├── main.py ├── networker ├── __init__.py └── networker.py └── utils ├── __init__.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.ipynb 3 | .vscode/* 4 | .ipynb_checkpoints/* 5 | .DS_STORE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scanr 2 | Detect x86 shellcode in files and traffic. 3 | 4 | ## Usage 5 | ``` 6 | > python main.py --help 7 | Usage: main.py [-h] [-f OBJ_PATH] [-o START_OFFSET] [-l LIMIT] [-d] 8 | [-i INTERFACE] [-c CAPTURE] 9 | 10 | Windows shellcode emulation and detection tool 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | -f OBJ_PATH input file 15 | -o START_OFFSET shellcode start offset 16 | -l LIMIT max instructions to analyze 17 | -d enable debug mode 18 | -i INTERFACE network interface 19 | -c CAPTURE network capture 20 | ``` 21 | Example: `python main.py -c test-http-get.pcap` 22 | 23 | ## Output 24 | ``` 25 | python main.py -d -f call4_dword_xor_shell 26 | [!] Starting analysis in file mode 27 | [*] Analyzing file 28 | [+] Found 1 potential offsets: 29 | 0x00000000 30 | 0x0 : xor ecx, ecx 31 | 0x2 : sub ecx, -0x54 32 | 0x5 : call 9 33 | 0xa : rcr byte ptr [esi - 0x7f], 0x76 34 | 0xe : push cs 35 | 0xf : js 0xfffffff5 36 | 0x11 : dec eax 37 | 0x12 : mov eax, dword ptr [0xe2fcee83] 38 | 0x17 : hlt 39 | 0x18 : test byte ptr [edx + ecx*8], cl 40 | 0x1b : mov eax, dword ptr [0x2828e478] 41 | 0x20 : popfd 42 | 0x21 : aad 0x88 43 | [!] Trying with offset number 0 at 0x00000000 44 | [*] Emulator processing shellcode 45 | 0x2000: xor ecx, ecx 46 | 0x2002: sub ecx, -0x54 47 | 0x2005: call 4 48 | 0x2009: inc eax 49 | 0x200b: pop esi 50 | mem READ: 0x4, data size = 4, data value = 0x0 51 | near deref: 52 | 0a 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 53 | 0x200c: xor dword ptr [esi + 0xe], 0xa148e478 54 | mem READ: 0x2018, data size = 4, data value = 0x0 55 | near deref: 56 | 84 0c ca a1 78 e4 28 28 9d d5 88 c5 f3 b4 78 2a 57 | [!] Self-modyfying code heuristic triggered! 58 | 0x2013: sub esi, -4 59 | 0x2016: loop 0xfffffff6 60 | 0x200c: xor dword ptr [esi + 0xe], 0xa148e478 61 | mem READ: 0x201c, data size = 4, data value = 0x0 62 | near deref: 63 | 78 e4 28 28 9d d5 88 c5 f3 b4 78 2a 2a e8 c3 f3 64 | # skipped.. 65 | [!] Self-modyfying code heuristic triggered! 66 | 0x2013: sub esi, -4 67 | 0x2016: loop 0xfffffff6 68 | 0x200c: xor dword ptr [esi + 0xe], 0xa148e478 69 | mem READ: 0x2164, data size = 4, data value = 0x0 70 | near deref: 71 | ad e4 48 a1 cc cc cc cc 00 00 00 00 00 00 00 00 72 | [!] Self-modyfying code heuristic triggered! 73 | 0x2013: sub esi, -4 74 | 0x2016: loop 0xfffffff6 75 | 0x2018: cld 76 | 0x2019: call 0x87 77 | 0x20a0: pop ebp 78 | [!] GetPC (callpop) heuristic triggered! 79 | mem READ: 0x4, data size = 4, data value = 0x0 80 | near deref: 81 | 1e 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 82 | 0x20a1: push 0x3233 83 | 0x20a6: push 0x5f327377 84 | 0x20ab: push esp 85 | 0x20ac: push 0x726774c 86 | 0x20b1: call ebp 87 | 0x201e: pushal 88 | 0x201f: mov ebp, esp 89 | 0x2021: xor eax, eax 90 | 0x2023: mov edx, dword ptr fs:[eax + 0x30] 91 | 92 | [!] PEB accessed! 93 | mem READ: 0x30, data size = 4, data value = 0x0 94 | near deref: 95 | 30 00 00 00 00 00 00 00 00 00 00 00 3c 00 00 00 96 | 0x2027: mov edx, dword ptr [edx + 0xc] 97 | 98 | [!] PEB_Ldr accessed! 99 | [!] HEUR level 3, shellcode detected. Exiting! 100 | mem READ: 0x3c, data size = 4, data value = 0x0 101 | near deref: 102 | 3c 00 00 00 00 00 00 00 00 00 00 00 48 00 00 00 103 | [+] Processed! 104 | Shellcode address ranges: 105 | low: 0x2018 106 | high: 0x2164 107 | 108 | Decoded shellcode: 109 | 0x0 : cld 110 | 0x1 : call 0x88 111 | 0x6 : pushal 112 | 0x7 : mov ebp, esp 113 | 0x9 : xor eax, eax 114 | 0xb : mov edx, dword ptr fs:[eax + 0x30] 115 | 0xf : mov edx, dword ptr [edx + 0xc] 116 | 0x12 : mov edx, dword ptr [edx + 0x14] 117 | 0x15 : mov esi, dword ptr [edx + 0x28] 118 | 0x18 : movzx ecx, word ptr [edx + 0x26] 119 | 0x1c : xor edi, edi 120 | 0x1e : lodsb al, byte ptr [esi] 121 | # skipped.. 122 | [+] Finished analysis, took 0.067544 seconds 123 | ``` 124 | ## Dependencies 125 | - [Unicorn Engine](https://github.com/unicorn-engine/unicorn) 126 | - [Capstone Engine](https://github.com/aquynh/capstone/) 127 | - [Pypcap](https://github.com/pynetwork/pypcap) 128 | -------------------------------------------------------------------------------- /analyzer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Project/Scanr/332b7f51356919655a088f168bb7ba057e561309/analyzer/__init__.py -------------------------------------------------------------------------------- /analyzer/analyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | import re 3 | # import disassembler engine 4 | from disassembler.disassembler import * 5 | # import emulator engine 6 | from emulator.emulator import * 7 | 8 | 9 | class AnalyzerEngine(object): 10 | BACKREF = 0x10 11 | 12 | OFFS_DB = ( 13 | re.compile(r"\xE8.?\x00\x00\x00"), 14 | re.compile(r"\xE8\xFF\xFF\xFF"), 15 | re.compile(r"\x90\x90\x90\x90"), 16 | re.compile(r"\xD9\x74\x24[\x80-\xFF]"), 17 | re.compile(r"\xD9[\x70-\x7F]\x00\x00"), 18 | re.compile(r"\xEB\x5A"), 19 | re.compile(r"\xEB\x0C"), 20 | re.compile(r"\x64[\xA0-\xAF]\x30\x00\x00\x00"), 21 | re.compile(r"\xd9[\x70-\x7F][\x80-\xFF]"), 22 | ) 23 | 24 | def get_offsets(self, code): 25 | offsets = list() 26 | match_list = list() 27 | 28 | for o in self.OFFS_DB: 29 | m = re.search(o, code) 30 | if m is not None: 31 | match_list.append(m) 32 | for m in match_list: 33 | offsets.append(max(0, m.start() - self.BACKREF)) 34 | 35 | # heur match found nothing 36 | if len(offsets) == 0: 37 | offsets.append(0) 38 | 39 | return offsets 40 | 41 | def analyze(self, code, offset=0): 42 | self.emulator.emulate(code, offset=offset) 43 | 44 | def __init__(self, mode=32, debug=False): 45 | self.mode = mode 46 | self.debug = debug 47 | self.disassembler = DisassemblerEngine(self.mode) 48 | self.emulator = EmulatorEngine( 49 | self.disassembler, self.mode, self.debug) 50 | -------------------------------------------------------------------------------- /disassembler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Project/Scanr/332b7f51356919655a088f168bb7ba057e561309/disassembler/__init__.py -------------------------------------------------------------------------------- /disassembler/disassembler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # import disassemble engine 3 | from capstone import * 4 | 5 | 6 | class DisassemblerEngine(object): 7 | BASE_ADDR = 0 8 | 9 | def __init__(self, mode=32): 10 | if mode == 32: 11 | cur_mode = CS_MODE_32 12 | elif mode == 16: 13 | cur_mode = CS_MODE_16 14 | elif mode == 64: 15 | cur_mode = CS_MODE_64 16 | else: 17 | raise Exception('Unspecified mode for the Disassembler Engine') 18 | 19 | self.capmd = Cs(CS_ARCH_X86, cur_mode) 20 | 21 | def get_disas_single(self, data, offs=0, addr=BASE_ADDR): 22 | for (address, size, mnemonic, op_str) in self.capmd.disasm_lite(data[offs:], self.BASE_ADDR): 23 | return (addr, size, mnemonic, op_str) 24 | 25 | def disas_single(self, data, offs=0): 26 | for (address, size, mnemonic, op_str) in self.capmd.disasm_lite(data[offs:], self.BASE_ADDR): 27 | print(" 0x%-4x:\t%s\t%s" % (address, mnemonic, op_str)) 28 | break 29 | 30 | def get_disas_n(self, data, n, offs=0): 31 | return self.capmd.disasm_lite(data[offs:], self.BASE_ADDR, count=n) 32 | 33 | def disas_n(self, data, n, offs=0): 34 | for (address, size, mnemonic, op_str) in self.capmd.disasm_lite(data[offs:], self.BASE_ADDR, count=n): 35 | print(" 0x%-4x:\t%s\t%s" % (address, mnemonic, op_str)) 36 | break 37 | 38 | def disas_all(self, data, offs=0): 39 | for (address, size, mnemonic, op_str) in self.capmd.disasm_lite(data[offs:], self.BASE_ADDR): 40 | print(" 0x%-4x:\t%s\t%s" % (address, mnemonic, op_str)) 41 | 42 | def get_disas_all(self, data, offs=0): 43 | for (address, size, mnemonic, op_str) in self.capmd.disasm_lite(data[offs:], self.BASE_ADDR): 44 | yield (address, size, mnemonic, op_str) 45 | -------------------------------------------------------------------------------- /emulator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Project/Scanr/332b7f51356919655a088f168bb7ba057e561309/emulator/__init__.py -------------------------------------------------------------------------------- /emulator/emulator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | # import emu engine and disassembler 5 | from unicorn import * 6 | from unicorn.x86_const import * 7 | from disassembler.disassembler import * 8 | # import tools 9 | from utils.utils import * 10 | # other imports 11 | import sys 12 | import binascii 13 | import struct 14 | 15 | # globals for the hooks 16 | write_bound_high = None 17 | write_bound_low = None 18 | 19 | 20 | def print_registers(uc): 21 | eip = uc.reg_read(UC_X86_REG_EIP) 22 | esp = uc.reg_read(UC_X86_REG_ESP) 23 | ebp = uc.reg_read(UC_X86_REG_EBP) 24 | 25 | print('[!] Registers:') 26 | print('\teip: %x, esp: %x, ebp: %x' % (eip, esp, ebp)) 27 | 28 | 29 | def mem_reader(uc, addr, size): 30 | tmp = uc.mem_read(addr, size) 31 | 32 | print('\t\t', end='') 33 | for i in tmp: 34 | print("%.2x " % i, end=''), 35 | 36 | 37 | def hook_intr(uc, intno, user_data): 38 | if intno == 0x3: 39 | return False 40 | else: 41 | return True 42 | 43 | 44 | def hook_mem_invalid(uc, access, address, size, value, user_data): 45 | eip = uc.reg_read(UC_X86_REG_EIP) 46 | 47 | if access == UC_MEM_WRITE: 48 | print("\tinvalid WRITE of 0x%x at 0x%X, data size = %u, data value = 0x%x" % 49 | (address, eip, size, value)) 50 | if access == UC_MEM_READ: 51 | print("\tinvalid READ of 0x%x at 0x%X, data size = %u" % 52 | (address, eip, size)) 53 | 54 | return False 55 | 56 | 57 | def hook_mem_unmapped(uc, access, address, size, value, user_data): 58 | eip = uc.reg_read(UC_X86_REG_EIP) 59 | 60 | if access == UC_MEM_WRITE: 61 | print("\tinvalid WRITE of 0x%x at 0x%X, data size = %u, data value = 0x%x" % 62 | (address, eip, size, value)) 63 | if access == UC_MEM_READ: 64 | print("\tinvalid READ of 0x%x at 0x%X, data size = %u" % 65 | (address, eip, size)) 66 | 67 | return False 68 | 69 | 70 | class EmulatorEngine(object): 71 | # initialization of system structures 72 | STRUCTURES_X86 = dict() 73 | STRUCTURES_X86['FS_0'] = 0 74 | STRUCTURES_X86['TEB'] = STRUCTURES_X86['FS_0'] + 0x18 75 | STRUCTURES_X86['PEB'] = STRUCTURES_X86['FS_0'] + 0x30 76 | STRUCTURES_X86['PEB_Ldr'] = STRUCTURES_X86['PEB'] + 0x0C 77 | STRUCTURES_X86['PEB_Ldr_InLoadOrder'] = STRUCTURES_X86['PEB_Ldr'] + 0x0C 78 | STRUCTURES_X86['PEB_Ldr_InMemOrder'] = STRUCTURES_X86['PEB_Ldr'] + 0x14 79 | STRUCTURES_X86['PEB_Ldr_InInitOrder'] = STRUCTURES_X86['PEB_Ldr'] + 0x1C 80 | 81 | EMU_ESP = 0x500 82 | EMU_EBP = EMU_ESP = 0x8 83 | EMU_MAX_INSTRUCTIONS = 0x1000 84 | SC_MAX_LENGTH = 0x2000 85 | SC_OFFSET = 0x2000 86 | SMC_BOUND = 0x200 87 | BASE_ADDR = 0 88 | 89 | @property 90 | def heur_level(self): 91 | return self.__heur_level 92 | 93 | @heur_level.setter 94 | def heur_level(self, value): 95 | self.__heur_level = value 96 | if (self.__heur_level >= self.max_heur_level): 97 | if self.stop: 98 | self.emu_engine.emu_stop() 99 | return False 100 | 101 | print('[!] HEUR level %d, shellcode detected. Exiting!' % 102 | self.__heur_level) 103 | self.stop = True 104 | self.emu_engine.emu_stop() 105 | return False 106 | 107 | def hook_smc_check(self, uc, access, address, size, value, user_data): 108 | eip = uc.reg_read(UC_X86_REG_EIP) 109 | global write_bound_high, write_bound_low 110 | # Just check if the write target addr is near EIP 111 | if abs(eip - address - self.BASE_ADDR) < self.SMC_BOUND: 112 | print('[!] Self-modyfying code heuristic triggered!') 113 | self.heur_level += 0 114 | 115 | if write_bound_low is None: 116 | write_bound_low = address 117 | write_bound_high = address 118 | elif address < write_bound_low: 119 | write_bound_low = address 120 | elif address > write_bound_high: 121 | write_bound_high = address 122 | 123 | def hook_code(self, uc, addr, size, user_data): 124 | if self.stop: 125 | self.emu_engine.emu_stop() 126 | return False 127 | 128 | mem = uc.mem_read(addr, size) 129 | offs = 0 130 | (address, size, mnemonic, op_str) = self.disasm_engine.get_disas_single( 131 | str(mem), offs, addr=addr 132 | ) 133 | print(" 0x%x:\t%s\t%s" % (address, mnemonic, op_str)) 134 | if (self.previous_instruction == 'call' and mnemonic == 'pop'): 135 | print('[!] GetPC (callpop) heuristic triggered!') 136 | self.heur_level += 1 137 | if (mnemonic == 'fnstenv'): 138 | print('[!] GetPC (fnstenv) heuristic triggered!') 139 | self.heur_level += 1 140 | self.previous_instruction = mnemonic 141 | self.previous_instruction_addr = struct.pack(" MAX_OFFSETS: 21 | print('[!] Potential offsets (%d) is greater than the current MAX_OFFSETS (%d)!' % ( 22 | len(offsets), MAX_OFFSETS)) 23 | for offset in offsets[:MAX_OFFSETS]: 24 | print('\t0x%08x' % offset) 25 | # print('\t\t', end=' ') 26 | analyzer.disassembler.disas_all(sc, offset) 27 | 28 | for i, offset in enumerate(offsets): 29 | try: 30 | print('[!] Trying with offset number %d at 0x%08x' % (i, offset)) 31 | analyzer.analyze(sc, offset) 32 | except UcError as uce: 33 | # print_registers(self.emu_engine) 34 | print("[-] Emulator error: %s" % uce) 35 | except Exception as e: 36 | print("[-] ERROR: %s" % e) 37 | 38 | 39 | def main(obj_path=None, capture=None, interface=None, start_offset=0, limit=0x1000, debug=False): 40 | mode = None 41 | before = time.time() 42 | if obj_path: 43 | print('[!] Starting analysis in file mode') 44 | print('[*] Analyzing file %s' % obj_path) 45 | obj = obj_path.read() 46 | analyzeObject(obj, start_offset=start_offset, 47 | limit=limit, debug=debug) 48 | elif capture: 49 | print('[!] Starting analysis in capture mode') 50 | print('[*] Analyzing capture %s' % capture) 51 | net = Networker(debug=debug) 52 | obj = net.analyzeCapture(capture) 53 | analyzeObject(obj, start_offset=start_offset, 54 | limit=limit, debug=debug) 55 | elif interface: 56 | print('[!] Starting analysis in live capture mode') 57 | print('[*] Analyzing capture from %s interface' % interface) 58 | pass 59 | else: 60 | print('[-] Error: You have to specify file, capture or interface!') 61 | sys.exit(1) 62 | 63 | after = time.time() 64 | print('[+] Finished analysis, took %f seconds' % (after - before)) 65 | 66 | 67 | if __name__ == '__main__': 68 | # parse input argument 69 | parser = argparse.ArgumentParser( 70 | description='Windows shellcode emulation and detection tool') 71 | parser.add_argument('-f', dest='obj_path', 72 | help='input file', type=file, required=False) 73 | parser.add_argument('-o', dest='start_offset', 74 | help='shellcode start offset', required=False, default=0, type=int) 75 | parser.add_argument('-l', dest='limit', help='max instructions to analyze', 76 | required=False, default=-1, type=int) 77 | parser.add_argument('-d', dest='debug', help='enable debug mode', 78 | required=False, default=False, action='store_true') 79 | parser.add_argument('-i', dest='interface', help='network interface', 80 | type=str, required=False) 81 | parser.add_argument('-c', dest='capture', help='network capture', 82 | type=file, required=False) 83 | 84 | arguments = parser.parse_args() 85 | main(obj_path=arguments.obj_path, interface=arguments.interface, capture=arguments.capture, start_offset=arguments.start_offset, 86 | limit=arguments.limit, debug=arguments.debug) 87 | -------------------------------------------------------------------------------- /networker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Project/Scanr/332b7f51356919655a088f168bb7ba057e561309/networker/__init__.py -------------------------------------------------------------------------------- /networker/networker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | import re 3 | import pcap 4 | import dpkt 5 | import time 6 | # import disassembler engine 7 | from disassembler.disassembler import * 8 | # import emulator engine 9 | from emulator.emulator import * 10 | 11 | 12 | class Networker(object): 13 | def analyzeInterface(self, interface): 14 | pass 15 | 16 | def analyzeCapture(self, capture): 17 | pcap_reader = dpkt.pcap.Reader(capture) 18 | conn = {} 19 | 20 | for ts, buf in pcap_reader: 21 | # Unpack the Ethernet frame (mac src/dst, ethertype) 22 | eth = dpkt.ethernet.Ethernet(buf) 23 | # It this an IP packet? 24 | if not isinstance(eth.data, dpkt.ip.IP): 25 | print '[-] Error: Non IP Packet type, not supported %s\n' % eth.data.__class__.__name__ 26 | continue 27 | 28 | # Grab ip packet 29 | ip = eth.data 30 | 31 | # Pull out fragment information 32 | df = bool(ip.off & dpkt.ip.IP_DF) 33 | mf = bool(ip.off & dpkt.ip.IP_MF) 34 | offset = ip.off & dpkt.ip.IP_OFFMASK 35 | 36 | # It this a TCP packet? 37 | if not isinstance(ip.data, dpkt.tcp.TCP): 38 | print '[-] Error: Non TCP Packet type, not supported %s\n' % eth.data.__class__.__name__ 39 | continue 40 | 41 | tcp = ip.data 42 | tupl = (ip.src, ip.dst, tcp.sport, tcp.dport) 43 | if tupl in conn: 44 | conn[tupl] = conn[tupl] + tcp.data 45 | else: 46 | conn[tupl] = tcp.data 47 | 48 | for k in conn: 49 | # Try and parse what we have 50 | try: 51 | stream = conn[k] 52 | if stream[:4] == 'HTTP': 53 | http = dpkt.http.Response(stream) 54 | # print http.status 55 | else: 56 | http = dpkt.http.Request(stream) 57 | # print http.method, http.uri 58 | 59 | except dpkt.UnpackError: 60 | pass 61 | 62 | return str(http) 63 | 64 | def __init__(self, debug=False): 65 | self.fromFile, self.fromInterface = False, False 66 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1Project/Scanr/332b7f51356919655a088f168bb7ba057e561309/utils/__init__.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | import struct 3 | 4 | p32 = lambda x: struct.pack('