├── .gitignore ├── README.md ├── export_unicorn.py ├── import_unicorn.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.sw* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Ghidra Unicorn 2 | ------ 3 | This is a work in progress, possibly worse than Ghidra's built in emulator and certainly full of issues. 4 | 5 | In the future, I'd like to get Unicorn and Capstone installed within Ghidra's jython environment, but for now this script 6 | will export a pickle file from Ghidra and then a seperate python3 script will import the data and run unicorn 7 | 8 | ## Installation 9 | Copy `export_unicorn.py` into your ghidra scripts directory. You should then see `UnicornExport` under the tools menu. 10 | Then install dependencies with `pip3 install -r requirements.txt` 11 | 12 | ## Usage 13 | 1. Open an ARM program in Ghidra 14 | 2. Run autoanalysis 15 | 3. ~Export the binary with `Tools->UnicornExport`~ Open the script editor and run `export_unicorn.py` 16 | 4. Select a location to the output pickle file to 17 | 5. Configure your START and END variables in `import_unicorn.py` to control what code you'll run 18 | 6. From an environment with a real Python3 shell, run `import_unicorn.py [path_to_your_generated_pickle_file]` 19 | 7. Modify `import_unicorn.py`'s `hook_code` function to do useful things 20 | 21 | 22 | -------------------------------------------------------------------------------- /export_unicorn.py: -------------------------------------------------------------------------------- 1 | # Export a pickle that can be injested by unicorn 2 | 3 | #@author Andrew Fasano 4 | #@category Emulation 5 | #@keybinding 6 | #@menupath Tools.UnicornExport 7 | #@toolbar 8 | 9 | import pickle 10 | import jarray 11 | 12 | output = {"mem_regions": [], # (start,size) tuples 13 | "data": {}} # address: data 14 | 15 | # Chop program into 4K aligned memory regions for unicorn 16 | def get_address_ranges(program, blocks): 17 | address_ranges = [] 18 | 19 | for block in blocks: 20 | if block.getStart().addressSpace .name != u'ram': 21 | continue 22 | (s,e) = (int(block.getStart().offset) & ~0x4000, int(block.getEnd().offset) | 0x3FFF) # round to 0x4k boundries 23 | #print(hex(s) + " - " + hex(e)) 24 | 25 | assert(s < e), "Can't have start < end" 26 | (start_overlaps, end_overlaps) = (None, None) 27 | for (start, end) in address_ranges: 28 | if s >= start and e <= end: 29 | break 30 | elif s >= start and s <= end: # Start overlaps with existing range - extend 31 | start_overlaps = (start, end) 32 | break 33 | elif e >= start and e <= end: # End overlaps with existing range - extend 34 | end_overlaps = (start, end) 35 | break 36 | else: # No overlaps or existing data - insert 37 | address_ranges.append((s, e)) 38 | continue 39 | 40 | assert(not(start_overlaps and end_overlaps)) 41 | if start_overlaps: # Expand existing range to have new end 42 | address_ranges[:] = [(x,y) for (x,y) in address_ranges if (x,y) != start_overlaps] 43 | address_ranges.append((start_overlaps[0], e)) 44 | 45 | elif end_overlaps: # Move start of existing range to be earlier 46 | address_ranges[:] = [(x,y) for (x,y) in address_ranges if (x,y) != end_overlaps] 47 | address_ranges.append((s, end_overlaps[1])) 48 | 49 | #print("ADDRESS RANGES:") 50 | #for (s, e) in address_ranges: 51 | # print(hex(s) + " - " + hex(e)) 52 | 53 | return address_ranges 54 | 55 | blocks = currentProgram.getMemory().getBlocks() 56 | ranges = get_address_ranges(currentProgram, blocks) 57 | 58 | # Map memory (TODO: also store memory permissions) 59 | for (start,end) in ranges: 60 | output["mem_regions"].append((start, end-start+1)) 61 | assert(end-start >0) 62 | 63 | # Set up data 64 | for block in blocks: 65 | if block.getStart().addressSpace .name != u'ram': # Only copy ram (TODO: should we copy other regions?) 66 | continue 67 | l = block.size 68 | if block.isInitialized(): # Only read initialized blocks 69 | java_bytes = jarray.zeros(l, "b") 70 | len_bytes = block.getBytes(block.start, java_bytes) 71 | block_bytes = bytearray([int((b+2**32)&0xff) for b in java_bytes[:len_bytes]]) 72 | 73 | mapped = False 74 | for (s,e) in ranges: 75 | if block.start.offset >= s and block.start.offset <= e: 76 | break 77 | else: 78 | raise RuntimeError("Block at 0x{:x} is unmapped".format(block.start.offset)) 79 | 80 | # Now write from start to end 81 | output["data"][int(block.start.offset)] = block_bytes 82 | print("Wrote {} bytes at {:x}".format(len(block_bytes), int(block.start.offset))) 83 | #else: 84 | # print("Ignore Uninitialized block at ", block.start) 85 | 86 | 87 | out = askDirectory("Save location", "Save") 88 | d = out.toString().encode("utf8") 89 | with open(d, "wb") as f: 90 | pickle.dump(output, f) 91 | -------------------------------------------------------------------------------- /import_unicorn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Emulate an arm system using uincorn with a binary produced by ghidra 4 | # Given a pickle file as arg from Ghidra produced with `export_unicorn.py`, 5 | 6 | # Usage: python3 import_unicorn.py pickle_file_path 7 | 8 | # If you get an invalid memory write error: 9 | # - Did you set START and END to sane values? 10 | # - Turn on debug, and identify the bad instruction. Maybe you need to map additional memory that we failed to export from Ghidra? 11 | 12 | 13 | (START,END) = (0x0, 0xFFFFFF) 14 | DEBUG = False 15 | 16 | import os 17 | import sys 18 | import pickle 19 | 20 | from unicorn import * 21 | from unicorn.arm_const import * 22 | from capstone import * 23 | 24 | assert(len(sys.argv) > 1), f"USAGE: {sys.argv[0]} pickle_path" 25 | in_path = sys.argv[1] 26 | 27 | assert(os.path.isfile(sys.argv[1])), f"File {sys.argv[1]} not found" 28 | 29 | with open(in_path, "rb") as f: 30 | data = pickle.load(f, encoding='utf8') 31 | 32 | BAD_ADDRS = [] # Put addresses in here that you want to skip emulation of 33 | 34 | def hook_code(emu, address, size, user_data): 35 | global last_ib 36 | global idx 37 | if DEBUG: 38 | print('>>> Tracing instruction at 0x%x, instruction size = 0x%x' %(address, size)) 39 | code = emu.mem_read(address, size) 40 | for i in disasm.disasm(code, size): 41 | print("\t0x{:x}\t{}\t{}".format(i.address, i.mnemonic, i.op_str)) 42 | 43 | if address in BAD_ADDRS: 44 | emu.reg_write(UC_ARM_REG_PC, (address+size)|1) 45 | 46 | 47 | # Initialize decompiler 48 | disasm = Cs(CS_ARCH_ARM, CS_MODE_THUMB) 49 | disasm.detail = True 50 | 51 | # Initialize emulator and add hook callback 52 | emu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) 53 | emu.hook_add(UC_HOOK_CODE, hook_code) 54 | 55 | # Map memory (TODO: differentiate RWX permissions?) 56 | for (start,sz) in data["mem_regions"]: 57 | #print(f"Mapping from 0x{start:x} to 0x{start+sz:x}") 58 | emu.mem_map(start, sz) 59 | 60 | # Set up data 61 | for offset, raw_data in data["data"].items(): 62 | #print(f"Filling in {len(raw_data)} bytes at 0x{offset:x}") 63 | emu.mem_write(offset, bytes(raw_data)) 64 | 65 | # Set up stack higher than all the functions we've mapped in already 66 | stack_start = (max(e for (_, e) in data["mem_regions"])+0x10000) & ~0x1000 67 | stack_size = 0x1000 68 | emu.mem_map(stack_start, stack_size) 69 | 70 | # Add a single argument to the stack and set up stack pointer 71 | emu.mem_write(stack_start+0x100, bytes([0x12, 0x34, 0x56, 0x78])) 72 | emu.reg_write(UC_ARM_REG_SP, stack_start+0x100) 73 | 74 | # Run emulator in thumb mode 75 | emu.emu_start(START | 1, END) 76 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | unicorn 2 | capstone 3 | --------------------------------------------------------------------------------