├── .gitignore
├── README.md
├── export_unicorn.py
├── import_unicorn.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.sw*
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Ghidra Unicorn
 2 | ------
 3 | This is a work in progress, possibly worse than Ghidra's built in emulator and certainly full of issues.
 4 | 
 5 | In the future, I'd like to get Unicorn and Capstone installed within Ghidra's jython environment, but for now this script
 6 | will export a pickle file from Ghidra and then a seperate python3 script will import the data and run unicorn
 7 | 
 8 | ## Installation
 9 | Copy `export_unicorn.py` into your ghidra scripts directory. You should then see `UnicornExport` under the tools menu.
10 | Then install dependencies with `pip3 install -r requirements.txt`
11 | 
12 | ## Usage
13 | 1. Open an ARM program in Ghidra
14 | 2. Run autoanalysis
15 | 3. ~Export the binary with `Tools->UnicornExport`~ Open the script editor and run `export_unicorn.py`
16 | 4. Select a location to the output pickle file to
17 | 5. Configure your START and END variables in `import_unicorn.py` to control what code you'll run
18 | 6. From an environment with a real Python3 shell, run `import_unicorn.py [path_to_your_generated_pickle_file]`
19 | 7. Modify `import_unicorn.py`'s `hook_code` function to do useful things
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/export_unicorn.py:
--------------------------------------------------------------------------------
 1 | # Export a pickle that can be injested by unicorn
 2 | 
 3 | #@author Andrew Fasano
 4 | #@category Emulation
 5 | #@keybinding 
 6 | #@menupath Tools.UnicornExport
 7 | #@toolbar 
 8 | 
 9 | import pickle
10 | import jarray
11 | 
12 | output = {"mem_regions": [], # (start,size) tuples
13 |           "data": {}}  # address: data
14 | 
15 | # Chop program into 4K aligned memory regions for unicorn
16 | def get_address_ranges(program, blocks):
17 |     address_ranges = []
18 | 
19 |     for block in blocks:
20 |         if block.getStart().addressSpace .name != u'ram':
21 |             continue
22 |         (s,e) = (int(block.getStart().offset) & ~0x4000, int(block.getEnd().offset) | 0x3FFF) # round to 0x4k boundries
23 |         #print(hex(s) + " - " + hex(e))
24 | 
25 |         assert(s < e), "Can't have start < end"
26 |         (start_overlaps, end_overlaps) = (None, None)
27 |         for (start, end) in address_ranges:
28 |             if s >= start and e <= end:
29 |                 break
30 |             elif s >= start and s <= end: # Start overlaps with existing range - extend
31 |                 start_overlaps = (start, end)
32 |                 break
33 |             elif e >= start and e <= end: # End overlaps with existing range - extend
34 |                 end_overlaps = (start, end)
35 |                 break
36 |         else: # No overlaps or existing data - insert
37 |             address_ranges.append((s, e))
38 |             continue
39 | 
40 |         assert(not(start_overlaps and end_overlaps))
41 |         if start_overlaps: # Expand existing range to have new end
42 |             address_ranges[:] = [(x,y) for (x,y) in address_ranges if (x,y) != start_overlaps]
43 |             address_ranges.append((start_overlaps[0], e))
44 | 
45 |         elif end_overlaps: # Move start of existing range to be earlier
46 |             address_ranges[:] = [(x,y) for (x,y) in address_ranges if (x,y) != end_overlaps]
47 |             address_ranges.append((s, end_overlaps[1]))
48 | 
49 |     #print("ADDRESS RANGES:")
50 |     #for (s, e) in address_ranges:
51 |     #    print(hex(s) + " - " + hex(e))
52 | 
53 |     return address_ranges
54 | 
55 | blocks = currentProgram.getMemory().getBlocks()
56 | ranges = get_address_ranges(currentProgram, blocks)
57 | 
58 | # Map memory (TODO: also store memory permissions)
59 | for (start,end) in ranges:
60 |     output["mem_regions"].append((start, end-start+1))
61 |     assert(end-start >0)
62 | 
63 | # Set up data
64 | for block in blocks:
65 |     if block.getStart().addressSpace .name != u'ram': # Only copy ram (TODO: should we copy other regions?)
66 |         continue
67 |     l = block.size
68 |     if block.isInitialized(): # Only read initialized blocks
69 |         java_bytes = jarray.zeros(l, "b")
70 |         len_bytes = block.getBytes(block.start, java_bytes)
71 |         block_bytes = bytearray([int((b+2**32)&0xff)  for b in java_bytes[:len_bytes]])
72 | 
73 |         mapped = False
74 |         for (s,e) in ranges:
75 |             if block.start.offset >= s and block.start.offset <= e:
76 |                 break
77 |         else:
78 |             raise RuntimeError("Block at 0x{:x} is unmapped".format(block.start.offset))
79 | 
80 |         # Now write from start to end
81 |         output["data"][int(block.start.offset)] = block_bytes
82 |         print("Wrote {} bytes at {:x}".format(len(block_bytes), int(block.start.offset)))
83 |     #else:
84 |     #    print("Ignore Uninitialized block at ", block.start)
85 | 
86 | 
87 | out = askDirectory("Save location", "Save")
88 | d = out.toString().encode("utf8")
89 | with open(d, "wb") as f:
90 |     pickle.dump(output, f)
91 | 


--------------------------------------------------------------------------------
/import_unicorn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Emulate an arm system using uincorn with a binary produced by ghidra
 4 | # Given a pickle file as arg from Ghidra produced with `export_unicorn.py`,
 5 | 
 6 | # Usage: python3 import_unicorn.py pickle_file_path
 7 | 
 8 | # If you get an invalid memory write error:
 9 | #   - Did you set START and END to sane values?
10 | #   - Turn on debug, and identify the bad instruction. Maybe you need to map additional memory that we failed to export from Ghidra?
11 | 
12 | 
13 | (START,END) = (0x0, 0xFFFFFF)
14 | DEBUG = False
15 | 
16 | import os
17 | import sys
18 | import pickle
19 | 
20 | from unicorn import *
21 | from unicorn.arm_const import *
22 | from capstone import *
23 | 
24 | assert(len(sys.argv) > 1), f"USAGE: {sys.argv[0]} pickle_path"
25 | in_path = sys.argv[1]
26 | 
27 | assert(os.path.isfile(sys.argv[1])), f"File {sys.argv[1]} not found"
28 | 
29 | with open(in_path, "rb") as f:
30 |     data = pickle.load(f, encoding='utf8')
31 | 
32 | BAD_ADDRS = [] # Put addresses in here that you want to skip emulation of
33 | 
34 | def hook_code(emu, address, size, user_data):  
35 |     global last_ib
36 |     global idx
37 |     if DEBUG:
38 |         print('>>> Tracing instruction at 0x%x, instruction size = 0x%x' %(address, size))
39 |         code = emu.mem_read(address, size)
40 |         for i in disasm.disasm(code, size):
41 |             print("\t0x{:x}\t{}\t{}".format(i.address, i.mnemonic, i.op_str))
42 | 
43 |     if address in BAD_ADDRS:
44 |         emu.reg_write(UC_ARM_REG_PC, (address+size)|1)
45 | 
46 | 
47 | # Initialize decompiler
48 | disasm = Cs(CS_ARCH_ARM, CS_MODE_THUMB)
49 | disasm.detail = True
50 | 
51 | # Initialize emulator and add hook callback
52 | emu = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
53 | emu.hook_add(UC_HOOK_CODE, hook_code)
54 | 
55 | # Map memory (TODO: differentiate RWX permissions?)
56 | for (start,sz) in data["mem_regions"]:
57 |     #print(f"Mapping from 0x{start:x} to 0x{start+sz:x}")
58 |     emu.mem_map(start, sz)
59 | 
60 | # Set up data
61 | for offset, raw_data in data["data"].items():
62 |     #print(f"Filling in {len(raw_data)} bytes at 0x{offset:x}")
63 |     emu.mem_write(offset, bytes(raw_data))
64 | 
65 | # Set up stack higher than all the functions we've mapped in already
66 | stack_start = (max(e for (_, e) in data["mem_regions"])+0x10000) & ~0x1000
67 | stack_size = 0x1000
68 | emu.mem_map(stack_start, stack_size)
69 | 
70 | # Add a single argument to the stack and set up stack pointer
71 | emu.mem_write(stack_start+0x100, bytes([0x12, 0x34, 0x56, 0x78]))
72 | emu.reg_write(UC_ARM_REG_SP, stack_start+0x100)
73 | 
74 | # Run emulator in thumb mode
75 | emu.emu_start(START | 1, END)
76 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | unicorn
2 | capstone
3 | 


--------------------------------------------------------------------------------