├── README.md ├── decoder.py └── testcases ├── alpha_mixed_linux_1round.bin ├── call4_dword_xor_linux_1round.bin ├── fnstenv_mov_linux_1round.bin ├── shikata_ga_nai_linux_1round.bin └── xor64_linux_1round.bin /README.md: -------------------------------------------------------------------------------- 1 | #unicorn-decoder 2 | 3 | A simple shellcode decoder that uses the unicorn engine as the backend to emulate a shellcode file to find self modifying code and attempt to disassemble the resulting output of the decoder stub. 4 | 5 | ##Usage 6 | 7 | usage: decoder.py [-h] -f FILE [-m MODE] [-i MAX_INSTRUCTION] [-d] 8 | 9 | Decode supplied x86 / x64 shellcode automatically with the unicorn engine 10 | 11 | optional arguments: 12 | -h, --help show this help message and exit 13 | -f FILE file to shellcode binary file 14 | -m MODE mode of the emulator (32|64) 15 | -i MAX_INSTRUCTION max instructions to emulate 16 | -d Enable extra hooks for debugging of shellcode 17 | 18 | 19 | ##Example 20 | 21 | Here is the decoder walking through the shikata_ga_nai test case 22 | 23 | % python decoder.py -f testcases/shikata_ga_nai_linux_1round.bin 24 | 25 | Shellcode address ranges: 26 | low: 0x19 27 | high: 0x68 28 | 29 | Original shellcode: 30 | 0x19: loop 0x10 31 | 0x1b: xor ebx, ebx 32 | 0x1d: mul ebx 33 | 0x1f: push ebx 34 | 0x20: inc ebx 35 | 0x21: push ebx 36 | 0x22: push 2 37 | 0x24: mov ecx, esp 38 | 0x26: mov al, 0x66 39 | 0x28: int 0x80 40 | 0x2a: pop ebx 41 | 0x2b: pop esi 42 | 0x2c: push edx 43 | 0x2d: push 0x5c110002 44 | 0x32: push 0x10 45 | 0x34: push ecx 46 | 0x35: push eax 47 | 0x36: mov ecx, esp 48 | 0x38: push 0x66 49 | 0x3a: pop eax 50 | 0x3b: int 0x80 51 | 0x3d: mov dword ptr [ecx + 4], eax 52 | 0x40: mov bl, 4 53 | 0x42: mov al, 0x66 54 | 0x44: int 0x80 55 | 0x46: inc ebx 56 | 0x47: mov al, 0x66 57 | 0x49: int 0x80 58 | 0x4b: xchg eax, ebx 59 | 0x4c: pop ecx 60 | 0x4d: push 0x3f 61 | 0x4f: pop eax 62 | 0x50: int 0x80 63 | 0x52: dec ecx 64 | 0x53: jns 0x4d 65 | 0x55: push 0x68732f2f 66 | 0x5a: push 0x6e69622f 67 | 0x5f: mov ebx, esp 68 | 0x61: push eax 69 | 0x62: push ebx 70 | 0x63: mov ecx, esp 71 | 0x65: mov al, 0xb 72 | 73 | ##Limitation 74 | 75 | Multiple rounds of any encoder will require supplying new -i counts, this can cause deadlocks with some encoders. 76 | 77 | Only i386 right now, ARM and others will come later. 78 | 79 | The encoder has to self modify for the detection to work, this decoder is unable to correctly detect decoded shellcode that is written to a new location in memory. -------------------------------------------------------------------------------- /decoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | from unicorn import * 4 | from unicorn.x86_const import * 5 | from capstone import * 6 | import argparse 7 | 8 | class SimpleEngine: 9 | def __init__(self, mode): 10 | if mode == '32': 11 | cur_mode = CS_MODE_32 12 | elif mode == '16': 13 | cur_mode = CS_MODE_16 14 | else: 15 | cur_mode = CS_MODE_64 16 | 17 | self.capmd = Cs(CS_ARCH_X86, cur_mode) 18 | 19 | def disas_single(self, data, addr): 20 | for i in self.capmd.disasm(data, addr): 21 | print(" 0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) 22 | break 23 | 24 | def disas_all(self, data, addr): 25 | for i in self.capmd.disasm(data, addr): 26 | print(" 0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str)) 27 | 28 | # globals for the hooks 29 | write_bounds = [None, None] 30 | 31 | def mem_reader(uc, addr, size): 32 | tmp = uc.mem_read(addr, size) 33 | 34 | for i in tmp: 35 | print(" 0x%x" % i), 36 | print("") 37 | 38 | # bail out on INT 0x3 (0xCC) 39 | def hook_intr(uc, intno, user_data): 40 | if intno == 0x3: 41 | return False; 42 | else: 43 | return True 44 | 45 | def hook_mem_invalid(uc, access, address, size, value, user_data): 46 | eip = uc.reg_read(UC_X86_REG_EIP) 47 | 48 | if access == UC_MEM_WRITE: 49 | print("invalid WRITE of 0x%x at 0x%X, data size = %u, data value = 0x%x" % (address, eip, size, value)) 50 | if access == UC_MEM_READ: 51 | print("invalid READ of 0x%x at 0x%X, data size = %u" % (address, eip, size)) 52 | 53 | return False 54 | 55 | def hook_smc_check(uc, access, address, size, value, user_data): 56 | SMC_BOUND = 0x200 57 | eip = uc.reg_read(UC_X86_REG_EIP) 58 | 59 | # Just check if the write target addr is near EIP 60 | if abs(eip - address) < SMC_BOUND: 61 | if write_bounds[0] == None: 62 | write_bounds[0] = address 63 | write_bounds[1] = address 64 | elif address < write_bounds[0]: 65 | write_bounds[0] = address 66 | elif address > write_bounds[1]: 67 | write_bounds[1] = address 68 | 69 | def hook_mem_read(uc, access, address, size, value, user_data): 70 | print("mem READ: 0x%x, data size = %u, data value = 0x%x" % (address, size, value)) 71 | print("Printing near deref:") 72 | mem_reader(uc, address, 32) 73 | 74 | return True 75 | 76 | def hook_code(uc, addr, size, user_data): 77 | mem = uc.mem_read(addr, size) 78 | uc.disasm.disas_single(str(mem), addr) 79 | return True 80 | 81 | # Using new JIT blocks as a heuristic could really add to the simple SMC system if implemented correctly. 82 | # TODO: attempt to make a new-block based heuristic, I am thinking repeated addresses / size of blocks, 83 | # maybe even disasm them and poke around. 84 | 85 | def main(): 86 | parser = argparse.ArgumentParser(description='Decode supplied x86 / x64 shellcode automatically with the unicorn engine') 87 | parser.add_argument('-f', dest='file', help='file to shellcode binary file', required=True, type=file) 88 | parser.add_argument('-m', dest='mode', help='mode of the emulator (16|32|64)', required=False, default="32") 89 | parser.add_argument('-i', dest='max_instruction', help='max instructions to emulate', required=False) 90 | parser.add_argument('-d', dest='debug', help='Enable extra hooks for debugging of shellcode', required=False, default=False, action='store_true') 91 | 92 | args = parser.parse_args() 93 | 94 | bin_code = args.file.read() 95 | disas_engine = SimpleEngine(args.mode) 96 | 97 | if args.mode == "32": 98 | cur_mode = UC_MODE_32 99 | elif args.mode == "16": 100 | cur_mode = UC_MODE_16 101 | else: 102 | cur_mode = UC_MODE_64 103 | 104 | PAGE_SIZE = 2 * 1024 * 1024 105 | START_RIP = 0x0 106 | 107 | # setup engine and write the memory there. 108 | emu = Uc(UC_ARCH_X86, cur_mode) 109 | emu.disasm = disas_engine # python is silly but it works. 110 | emu.mem_map(0, PAGE_SIZE) 111 | # write machine code to be emulated to memory 112 | emu.mem_write(START_RIP, bin_code) 113 | 114 | # write a INT 0x3 near the end of the code blob to make sure emulation ends 115 | emu.mem_write(len(bin_code) + 0xff, "\xcc\xcc\xcc\xcc") 116 | 117 | emu.hook_add(UC_HOOK_MEM_INVALID, hook_mem_invalid) 118 | emu.hook_add(UC_HOOK_MEM_WRITE, hook_smc_check) 119 | emu.hook_add(UC_HOOK_INTR, hook_intr) 120 | 121 | if args.debug: 122 | emu.hook_add(UC_HOOK_MEM_READ, hook_mem_read) 123 | emu.hook_add(UC_HOOK_CODE, hook_code) 124 | 125 | # arbitrary address for ESP. 126 | emu.reg_write(UC_X86_REG_ESP, 0x2000) 127 | 128 | if args.max_instruction: 129 | end_addr = -1 130 | else: 131 | args.max_instruction = 0x1000 132 | end_addr = len(bin_code) 133 | 134 | try: 135 | emu.emu_start(START_RIP, end_addr, 0, int(args.max_instruction)) 136 | except UcError as e: 137 | print("ERROR: %s" % e) 138 | 139 | if write_bounds[0] != None: 140 | print("Shellcode address ranges:") 141 | print(" low: 0x%X" % write_bounds[0]) 142 | print(" high: 0x%X" % write_bounds[1]) 143 | print("") 144 | print("Decoded shellcode:") 145 | mem = emu.mem_read(write_bounds[0], (write_bounds[1] - write_bounds[0])) 146 | emu.disasm.disas_all(str(mem), write_bounds[0]) 147 | 148 | else: 149 | print("No SMC hits, no encoder detected") 150 | 151 | if __name__ == '__main__': 152 | main() 153 | 154 | -------------------------------------------------------------------------------- /testcases/alpha_mixed_linux_1round.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/alpha_mixed_linux_1round.bin -------------------------------------------------------------------------------- /testcases/call4_dword_xor_linux_1round.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/call4_dword_xor_linux_1round.bin -------------------------------------------------------------------------------- /testcases/fnstenv_mov_linux_1round.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/fnstenv_mov_linux_1round.bin -------------------------------------------------------------------------------- /testcases/shikata_ga_nai_linux_1round.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/shikata_ga_nai_linux_1round.bin -------------------------------------------------------------------------------- /testcases/xor64_linux_1round.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/xor64_linux_1round.bin --------------------------------------------------------------------------------