├── README.md
├── decoder.py
└── testcases
    ├── alpha_mixed_linux_1round.bin
    ├── call4_dword_xor_linux_1round.bin
    ├── fnstenv_mov_linux_1round.bin
    ├── shikata_ga_nai_linux_1round.bin
    └── xor64_linux_1round.bin


/README.md:
--------------------------------------------------------------------------------
 1 | #unicorn-decoder
 2 | 
 3 | A simple shellcode decoder that uses the unicorn engine as the backend to emulate a shellcode file to find self modifying code and attempt to disassemble the resulting output of the decoder stub. 
 4 | 
 5 | ##Usage
 6 | 
 7 | 	usage: decoder.py [-h] -f FILE [-m MODE] [-i MAX_INSTRUCTION] [-d]
 8 | 
 9 | 	Decode supplied x86 / x64 shellcode automatically with the unicorn engine
10 | 
11 | 	optional arguments:
12 | 	  -h, --help          show this help message and exit
13 | 	  -f FILE             file to shellcode binary file
14 | 	  -m MODE             mode of the emulator (32|64)
15 | 	  -i MAX_INSTRUCTION  max instructions to emulate
16 | 	  -d                  Enable extra hooks for debugging of shellcode
17 | 
18 | 
19 | ##Example
20 | 
21 | Here is the decoder walking through the shikata_ga_nai test case
22 | 
23 | 	% python decoder.py -f testcases/shikata_ga_nai_linux_1round.bin
24 | 
25 | 	Shellcode address ranges:
26 | 	   low:  0x19
27 | 	   high: 0x68
28 | 
29 | 	Original shellcode:
30 | 	  0x19:	loop	0x10
31 | 	  0x1b:	xor	ebx, ebx
32 | 	  0x1d:	mul	ebx
33 | 	  0x1f:	push	ebx
34 | 	  0x20:	inc	ebx
35 | 	  0x21:	push	ebx
36 | 	  0x22:	push	2
37 | 	  0x24:	mov	ecx, esp
38 | 	  0x26:	mov	al, 0x66
39 | 	  0x28:	int	0x80
40 | 	  0x2a:	pop	ebx
41 | 	  0x2b:	pop	esi
42 | 	  0x2c:	push	edx
43 | 	  0x2d:	push	0x5c110002
44 | 	  0x32:	push	0x10
45 | 	  0x34:	push	ecx
46 | 	  0x35:	push	eax
47 | 	  0x36:	mov	ecx, esp
48 | 	  0x38:	push	0x66
49 | 	  0x3a:	pop	eax
50 | 	  0x3b:	int	0x80
51 | 	  0x3d:	mov	dword ptr [ecx + 4], eax
52 | 	  0x40:	mov	bl, 4
53 | 	  0x42:	mov	al, 0x66
54 | 	  0x44:	int	0x80
55 | 	  0x46:	inc	ebx
56 | 	  0x47:	mov	al, 0x66
57 | 	  0x49:	int	0x80
58 | 	  0x4b:	xchg	eax, ebx
59 | 	  0x4c:	pop	ecx
60 | 	  0x4d:	push	0x3f
61 | 	  0x4f:	pop	eax
62 | 	  0x50:	int	0x80
63 | 	  0x52:	dec	ecx
64 | 	  0x53:	jns	0x4d
65 | 	  0x55:	push	0x68732f2f
66 | 	  0x5a:	push	0x6e69622f
67 | 	  0x5f:	mov	ebx, esp
68 | 	  0x61:	push	eax
69 | 	  0x62:	push	ebx
70 | 	  0x63:	mov	ecx, esp
71 | 	  0x65:	mov	al, 0xb
72 | 
73 | ##Limitation
74 | 
75 | Multiple rounds of any encoder will require supplying new -i counts, this can cause deadlocks with some encoders.
76 | 
77 | Only i386 right now, ARM and others will come later.
78 | 
79 | The encoder has to self modify for the detection to work, this decoder is unable to correctly detect decoded shellcode that is written to a new location in memory. 


--------------------------------------------------------------------------------
/decoder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys
  3 | from unicorn import *
  4 | from unicorn.x86_const import *
  5 | from capstone import *
  6 | import argparse
  7 | 
  8 | class SimpleEngine:
  9 | 	def __init__(self, mode):
 10 | 		if mode == '32':
 11 | 			cur_mode = CS_MODE_32
 12 | 		elif mode == '16':
 13 | 			cur_mode = CS_MODE_16
 14 | 		else:
 15 | 			cur_mode = CS_MODE_64
 16 | 
 17 | 		self.capmd = Cs(CS_ARCH_X86, cur_mode)
 18 | 
 19 | 	def disas_single(self, data, addr):
 20 | 		for i in self.capmd.disasm(data, addr):
 21 | 			print("  0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
 22 | 			break
 23 | 
 24 | 	def disas_all(self, data, addr):
 25 | 		for i in self.capmd.disasm(data, addr):
 26 | 			print("  0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
 27 | 
 28 | # globals for the hooks
 29 | write_bounds = [None, None]
 30 | 
 31 | def mem_reader(uc, addr, size):
 32 | 	tmp = uc.mem_read(addr, size)
 33 | 
 34 | 	for i in tmp:
 35 | 		print("   0x%x" % i),
 36 | 	print("")
 37 | 
 38 | # bail out on INT 0x3 (0xCC)
 39 | def hook_intr(uc, intno, user_data):
 40 | 	if intno == 0x3:
 41 | 		return False;
 42 | 	else:
 43 | 		return True
 44 | 
 45 | def hook_mem_invalid(uc, access, address, size, value, user_data):
 46 | 	eip = uc.reg_read(UC_X86_REG_EIP)
 47 | 
 48 | 	if access == UC_MEM_WRITE:
 49 | 		print("invalid WRITE of 0x%x at 0x%X, data size = %u, data value = 0x%x" % (address, eip, size, value))
 50 | 	if access == UC_MEM_READ:
 51 | 		print("invalid READ of 0x%x at 0x%X, data size = %u" % (address, eip, size))
 52 | 
 53 | 	return False
 54 | 
 55 | def hook_smc_check(uc, access, address, size, value, user_data):
 56 | 	SMC_BOUND = 0x200
 57 | 	eip = uc.reg_read(UC_X86_REG_EIP)
 58 | 
 59 | 	# Just check if the write target addr is near EIP
 60 | 	if abs(eip - address) < SMC_BOUND:
 61 | 		if write_bounds[0] == None:
 62 | 			write_bounds[0] = address
 63 | 			write_bounds[1] = address
 64 | 		elif address < write_bounds[0]:
 65 | 			write_bounds[0] = address
 66 | 		elif address > write_bounds[1]:
 67 | 			write_bounds[1] = address
 68 | 
 69 | def hook_mem_read(uc, access, address, size, value, user_data):
 70 | 	print("mem READ:  0x%x, data size = %u, data value = 0x%x" % (address, size, value))
 71 | 	print("Printing near deref:")
 72 | 	mem_reader(uc, address, 32)
 73 | 
 74 | 	return True
 75 | 
 76 | def hook_code(uc, addr, size, user_data):
 77 | 	mem = uc.mem_read(addr, size)
 78 | 	uc.disasm.disas_single(str(mem), addr)
 79 | 	return True
 80 | 
 81 | # Using new JIT blocks as a heuristic could really add to the simple SMC system if implemented correctly.
 82 | # TODO: attempt to make a new-block based heuristic, I am thinking repeated addresses / size of blocks, 
 83 | # maybe even disasm them and poke around.
 84 | 
 85 | def main():
 86 | 	parser = argparse.ArgumentParser(description='Decode supplied x86 / x64 shellcode automatically with the unicorn engine')
 87 | 	parser.add_argument('-f', dest='file', help='file to shellcode binary file', required=True, type=file)
 88 | 	parser.add_argument('-m', dest='mode', help='mode of the emulator (16|32|64)', required=False, default="32")
 89 | 	parser.add_argument('-i', dest='max_instruction', help='max instructions to emulate', required=False)
 90 | 	parser.add_argument('-d', dest='debug', help='Enable extra hooks for debugging of shellcode', required=False, default=False, action='store_true')
 91 | 
 92 | 	args = parser.parse_args()
 93 | 
 94 | 	bin_code = args.file.read()
 95 | 	disas_engine = SimpleEngine(args.mode)
 96 | 
 97 | 	if args.mode == "32":
 98 | 		cur_mode = UC_MODE_32
 99 | 	elif args.mode == "16":
100 | 		cur_mode = UC_MODE_16
101 | 	else:
102 | 		cur_mode = UC_MODE_64
103 | 
104 | 	PAGE_SIZE = 2 * 1024 * 1024
105 | 	START_RIP = 0x0
106 | 
107 | 	# setup engine and write the memory there.
108 | 	emu = Uc(UC_ARCH_X86, cur_mode)
109 | 	emu.disasm = disas_engine # python is silly but it works.
110 | 	emu.mem_map(0, PAGE_SIZE)
111 | 	# write machine code to be emulated to memory
112 | 	emu.mem_write(START_RIP, bin_code)
113 | 
114 | 	# write a INT 0x3 near the end of the code blob to make sure emulation ends
115 | 	emu.mem_write(len(bin_code) + 0xff, "\xcc\xcc\xcc\xcc")
116 | 
117 | 	emu.hook_add(UC_HOOK_MEM_INVALID, hook_mem_invalid)
118 | 	emu.hook_add(UC_HOOK_MEM_WRITE, hook_smc_check)
119 | 	emu.hook_add(UC_HOOK_INTR, hook_intr)
120 | 	
121 | 	if args.debug:
122 | 		emu.hook_add(UC_HOOK_MEM_READ, hook_mem_read)
123 | 		emu.hook_add(UC_HOOK_CODE, hook_code)
124 | 
125 | 	# arbitrary address for ESP.
126 | 	emu.reg_write(UC_X86_REG_ESP, 0x2000)
127 | 
128 | 	if args.max_instruction:
129 | 		end_addr = -1
130 | 	else:
131 | 		args.max_instruction = 0x1000
132 | 		end_addr = len(bin_code)
133 | 
134 | 	try: 
135 | 		emu.emu_start(START_RIP, end_addr, 0, int(args.max_instruction))
136 | 	except UcError as e:
137 | 		print("ERROR: %s" % e)
138 | 
139 | 	if write_bounds[0] != None:
140 | 		print("Shellcode address ranges:")
141 | 		print("   low:  0x%X" % write_bounds[0])
142 | 		print("   high: 0x%X" % write_bounds[1])
143 | 		print("")
144 | 		print("Decoded shellcode:")
145 | 		mem = emu.mem_read(write_bounds[0], (write_bounds[1] - write_bounds[0]))
146 | 		emu.disasm.disas_all(str(mem), write_bounds[0])
147 | 
148 | 	else:
149 | 		print("No SMC hits, no encoder detected")
150 | 
151 | if __name__ == '__main__':
152 | 	main()
153 | 
154 | 


--------------------------------------------------------------------------------
/testcases/alpha_mixed_linux_1round.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/alpha_mixed_linux_1round.bin


--------------------------------------------------------------------------------
/testcases/call4_dword_xor_linux_1round.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/call4_dword_xor_linux_1round.bin


--------------------------------------------------------------------------------
/testcases/fnstenv_mov_linux_1round.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/fnstenv_mov_linux_1round.bin


--------------------------------------------------------------------------------
/testcases/shikata_ga_nai_linux_1round.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/shikata_ga_nai_linux_1round.bin


--------------------------------------------------------------------------------
/testcases/xor64_linux_1round.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mothran/unicorn-decoder/ec9d8473fd74f8f5ca203aba948f52da0f030f42/testcases/xor64_linux_1round.bin


--------------------------------------------------------------------------------