├── .gitignore ├── images ├── new.png └── orig.png ├── test_assembler.py ├── patch_template.asm ├── asm_utils.py ├── README.md ├── template_utils.py ├── emulator_utils.py └── deoptimizer.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | .*.swp 3 | -------------------------------------------------------------------------------- /images/new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SamL98/GhidraStackStrings/HEAD/images/new.png -------------------------------------------------------------------------------- /images/orig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SamL98/GhidraStackStrings/HEAD/images/orig.png -------------------------------------------------------------------------------- /test_assembler.py: -------------------------------------------------------------------------------- 1 | from ghidra.app.plugin.assembler import Assemblers 2 | from ghidra.program.flatapi import FlatProgramAPI 3 | 4 | cp = currentProgram 5 | fp = FlatProgramAPI(cp) 6 | assembler = Assemblers.getAssembler(cp) 7 | 8 | while True: 9 | asm = askString('', '', '') 10 | print(asm) 11 | print(assembler.assembleLine(fp.toAddr(0), asm)) 12 | -------------------------------------------------------------------------------- /patch_template.asm: -------------------------------------------------------------------------------- 1 | push rdi 2 | push rsi 3 | push rax 4 | 5 | call ${strlen_plus_5} 6 | ${str} 7 | pop rsi 8 | 9 | @if (off == 0) 10 | @if (reg.lower() != 'rax') 11 | push reg 12 | @endif 13 | pop rdi 14 | @else 15 | lea rdi, [${reg} + ${off}] 16 | @endif 17 | 18 | call ${strcpy} 19 | 20 | @if (off != 0 or reg.lower() != 'rax') 21 | pop rax 22 | @endif 23 | pop rsi 24 | pop rdi 25 | -------------------------------------------------------------------------------- /asm_utils.py: -------------------------------------------------------------------------------- 1 | from template_utils import expand_template 2 | 3 | import subprocess 4 | import binascii 5 | 6 | 7 | def assemble(insn, at=None): 8 | args = ['rasm2', '-a', 'x86', '-b', '64'] 9 | 10 | if at is not None: 11 | args.extend(['-@', hex(at)]) 12 | 13 | args.append(insn) 14 | out = subprocess.check_output(args).strip() 15 | 16 | return [ord(d) for d in binascii.unhexlify(out)] 17 | 18 | def generate_patch(pc, strval, reg, off, strcpy): 19 | patch = [] 20 | 21 | def _assemble(line): 22 | insnBytes = assemble(line) 23 | patch.extend(insnBytes) 24 | return len(insnBytes) 25 | 26 | expand_template(pc, 27 | strval, 28 | reg, 29 | off, 30 | strcpy, 31 | size_cb=_assemble) 32 | 33 | return patch 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ghidra Stack Strings 2 | 3 | This project provides a first-pass hack for getting stack strings to display in Ghidra's decompiler window. 4 | 5 | Here's an example of the results: 6 | 7 | ![orig](/images/orig.png) 8 | 9 | to 10 | 11 | ![new](/images/new.png) 12 | 13 | The methodology is detailed in [here](https://saml98.github.io/jekyll/update/2020/05/03/ghidra-stack-strings.html) 14 | 15 | ## Dependencies 16 | 17 | This project has dependencies?! Yep. Ghidra's built-in assembler kept yelling at me so at 2am, I decided to give up and call `rasm2` as a subprocess. So yeah. You're gonna need radare2. Or some familiarity with the Ghidra API. 18 | 19 | ## Warnings 20 | 21 | Most of this code was hacked together at the wee hours of the night. Therefore, let's just say it's not production quality. There's also some hacks in the code that were specific to the binary I was looking at while developing this. I don't think it's gonna hurt on other binaries (y'never know) but it's unnecessary nonetheless. 22 | 23 | Yoour binary also needs `strcpy` for the script to work. 24 | 25 | ## Usage 26 | 27 | Copy all the scripts to `~/ghidra_scripts` or add this directory to the Ghidra script search paths. Then run `deoptimizer.py`. 28 | -------------------------------------------------------------------------------- /template_utils.py: -------------------------------------------------------------------------------- 1 | STATE_ASM = 0 2 | STATE_IF = 1 3 | STATE_SKIPPING_IF = 2 4 | STATE_SKIPPING_ELSE = 3 5 | 6 | def rindex(str, c): 7 | return len(str) - str[::-1].index(c) - 1 8 | 9 | class Parser(object): 10 | def __init__(self): 11 | self.state = STATE_ASM 12 | self.stack = [] 13 | 14 | self.stateTable = { 15 | STATE_ASM: self.asm_handleLine, 16 | STATE_IF: self.if_handleLine, 17 | STATE_SKIPPING_IF: self.skipping_if_handleLine, 18 | STATE_SKIPPING_ELSE: self.skipping_else_handleLine 19 | } 20 | 21 | def handle_if(self, line, strval, reg, off, strcpy): 22 | clause = line[line.index('(')+1:rindex(line, ')')] 23 | self.stack.append(self.state) 24 | 25 | if eval(clause): 26 | return STATE_IF, None 27 | else: 28 | return STATE_SKIPPING_IF, None 29 | 30 | def handle_endif(self, line, strval, reg, off, strcpy): 31 | prevState = self.stack.pop(-1) 32 | 33 | if len(self.stack) == 0: 34 | return STATE_ASM, None 35 | else: 36 | return prevState, None 37 | 38 | def asm_handleLine(self, line, strval, reg, off, strcpy): 39 | if line.startswith('@if'): 40 | return self.handle_if(line, strval, reg, off, strcpy) 41 | elif line.startswith('@endif'): 42 | return self.handle_endif(line, strval, reg, off, strcpy) 43 | else: 44 | return STATE_ASM, line 45 | 46 | def if_handleLine(self, line, strval, reg, off, strcpy): 47 | if line.startswith('@if'): 48 | return self.handle_if(line, strval, reg, off, strcpy) 49 | elif line.startswith('@else'): 50 | return STATE_SKIPPING_ELSE, None 51 | elif line.startswith('@endif'): 52 | return self.handle_endif(line, strval, reg, off, strcpy) 53 | else: 54 | return STATE_IF, line 55 | 56 | def skipping_if_handleLine(self, line, strval, reg, off, strcpy): 57 | if line.startswith('@if'): 58 | self.stack.append(self.state) 59 | return STATE_SKIPPING_IF, None 60 | elif line.startswith('@else'): 61 | return STATE_ASM, None 62 | elif line.startswith('@endif'): 63 | return self.handle_endif(line, strval, reg, off, strcpy) 64 | else: 65 | return STATE_SKIPPING_IF, None 66 | 67 | def skipping_else_handleLine(self, line, strval, reg, off, strcpy): 68 | if line.startswith('@if'): 69 | self.stack.append(self.state) 70 | return STATE_SKIPPING_ELSE, None 71 | elif line.startswith('@endif'): 72 | return self.handle_endif(line, strval, reg, off, strcpy) 73 | else: 74 | return STATE_SKIPPING_ELSE, None 75 | 76 | def handleLine(self, line, strval, reg, off, strcpy): 77 | if len(line) == 0: 78 | return None 79 | 80 | newState, line = self.stateTable[self.state](line, strval, reg, off, strcpy) 81 | self.state = newState 82 | return line 83 | 84 | 85 | def expand_template(pc, strval, reg, off, strcpy, size_cb): 86 | lines = [] 87 | parser = Parser() 88 | 89 | with open('patch_template.asm') as f: 90 | for line in f: 91 | line = line.strip() 92 | line = line.replace('${reg}', reg) 93 | line = line.replace('${off}', hex(off)) 94 | line = line.replace('${str}', '\n'.join(['.byte %s' % hex(ord(b)) for b in strval])) 95 | line = line.replace('${strcpy}', hex(strcpy - pc)) 96 | line = line.replace('${strlen_plus_5}', hex(len(strval) + 5)) 97 | 98 | line = parser.handleLine(line, strval, reg, off, strcpy) 99 | 100 | if line is not None: 101 | lines.append(line) 102 | pc += size_cb(line) 103 | 104 | return lines 105 | -------------------------------------------------------------------------------- /emulator_utils.py: -------------------------------------------------------------------------------- 1 | from ghidra.program.model.address import Address, AddressSet 2 | from ghidra.app.emulator import EmulatorHelper 3 | from ghidra.util.task import TaskMonitor 4 | from __main__ import * 5 | 6 | import struct as st 7 | 8 | 9 | class Emulator(object): 10 | def __init__(self): 11 | self.emu = EmulatorHelper(currentProgram) 12 | self.emu.enableMemoryWriteTracking(True) 13 | 14 | self.pc_reg = self.emu.getPCRegister() 15 | self.sp_reg = self.emu.getStackPointerRegister() 16 | 17 | self.watchpoints = {} 18 | self.writeSet = AddressSet() 19 | 20 | def get_pc(self): 21 | return self.emu.readRegister(self.pc_reg) 22 | 23 | def set_pc(self, val): 24 | if isinstance(val, Address): 25 | val = val.offset 26 | self.emu.writeRegister(self.pc_reg, val) 27 | 28 | def set_sp(self, val): 29 | if isinstance(val, Address): 30 | val = val.offset 31 | self.emu.writeRegister(self.sp_reg, val) 32 | 33 | def read(self, addr, size): 34 | return bytearray(self.emu.readMemory(addr, size)) 35 | 36 | def step(self, skip_calls): 37 | pc = toAddr(self.get_pc()) 38 | 39 | if skip_calls: 40 | insn = getInstructionAt(pc) 41 | 42 | if insn is None: 43 | print('Instruction is none @ %s' % pc) 44 | exit() 45 | 46 | if insn.flowType.isCall(): 47 | new_pc = pc.add(insn.length) 48 | self.set_pc(new_pc) 49 | return 50 | 51 | prev_pc = pc 52 | self.emu.step(TaskMonitor.DUMMY) 53 | 54 | # This isn't exactly correct since we can only call a watchpoint on an address once but it should work ok. 55 | fullWriteSet = self.emu.trackedMemoryWriteSet 56 | 57 | #for addrSet in fullWriteSet.subtract(self.writeSet): 58 | for addrSet in fullWriteSet: 59 | size = addrSet.maxAddress.subtract(addrSet.minAddress) 60 | 61 | if addrSet.minAddress.isMemoryAddress() and size > 0: 62 | calledHandles = [] 63 | 64 | for addr, handler in self.watchpoints.items(): 65 | if addr >= addrSet.minAddress and addr <= addrSet.maxAddress: 66 | val = self.read(addr, 1) 67 | handler(addr, 1, val, prev_pc.offset, emu=self) 68 | calledHandles.append(addr) 69 | 70 | for addr in calledHandles: 71 | del self.watchpoints[addr] 72 | 73 | self.writeSet = self.writeSet.union(fullWriteSet) 74 | 75 | def watch(self, addr, size, handler): 76 | for _ in range(size): 77 | self.watchpoints[addr] = handler 78 | addr = addr.add(1) 79 | 80 | def readVar(self, var): 81 | if var.isStackVariable(): 82 | stackOff = var.getStackOfset() 83 | return self.emu.readStackValue(stackOff, var.length, False) 84 | 85 | elif var.isRegisterVariable(): 86 | reg = var.getRegister() 87 | return self.emu.readRegister(reg) 88 | 89 | def writeVar(self, var, val): 90 | if var.isStackVariable(): 91 | stackOff = var.getStackOfset() 92 | return self.emu.writeStackValue(stackOff, var.length, val) 93 | 94 | elif var.isRegisterVariable(): 95 | reg = var.getRegister() 96 | return self.emu.writeRegister(reg, val) 97 | 98 | 99 | def call_hook(pc, handler, emu): 100 | args = [] 101 | func = getFunctionAt(pc) 102 | 103 | if func is not None: 104 | args = [emu.readVar(param) for param in func.parameters] 105 | 106 | retval = handler(*args, emu=emu) 107 | 108 | if retval is not None and func.getReturn() is not None: 109 | emu.writeVar(func.getReturn(), retval) 110 | 111 | def emulate(startAddr, endAddr, hooks=None, skip_calls=False): 112 | actual_hooks = {} 113 | 114 | for key, handler in hooks.items(): 115 | if isinstance(key, (str, unicode)): 116 | keys = [func.entryPoint for func in getGlobalFunctions(key)] 117 | elif isinstance(key, (int, long)): 118 | keys = [toAddr(key)] 119 | else: 120 | keys = [key] 121 | 122 | for key in keys: 123 | actual_hooks[key] = handler 124 | 125 | hooks = actual_hooks 126 | 127 | if getInstructionAt(endAddr) is None: 128 | endAddr = getInstructionBefore(endAddr).address 129 | 130 | emu = Emulator() 131 | emu.set_pc(startAddr) 132 | emu.set_sp(0xf0000000) 133 | 134 | prev_pc = None 135 | 136 | while emu.get_pc() != endAddr.offset and emu.get_pc() != prev_pc: 137 | pc = toAddr(emu.get_pc()) 138 | 139 | if pc in hooks: 140 | call_hook(pc, hooks[pc], emu) 141 | 142 | for ref in getReferencesFrom(pc): 143 | if ref.referenceType.isCall(): 144 | dest = ref.toAddress 145 | 146 | if dest in hooks: 147 | call_hook(dest, hooks[dest], emu) 148 | 149 | prev_pc = pc.offset 150 | emu.step(skip_calls) 151 | 152 | return emu 153 | 154 | def watch(addr, size, handler=None, emu=None): 155 | emu.watch(addr, size, handler) 156 | -------------------------------------------------------------------------------- /deoptimizer.py: -------------------------------------------------------------------------------- 1 | from ghidra.program.model.listing import ParameterImpl, FlowOverride 2 | from ghidra.program.model.listing.Function import FunctionUpdateType 3 | from ghidra.program.model.data import PointerDataType, CharDataType 4 | from ghidra.program.model.lang import PrototypeModel, Register 5 | from ghidra.program.model.block import BasicBlockModel 6 | from ghidra.program.model.address import AddressSet 7 | from ghidra.program.model.symbol import SourceType 8 | from ghidra.program.flatapi import FlatProgramAPI 9 | from ghidra.program.model.scalar import Scalar 10 | from ghidra.util.task import TaskMonitor 11 | 12 | import struct as st 13 | import atexit 14 | import string 15 | import math 16 | 17 | from emulator_utils import * 18 | from asm_utils import * 19 | 20 | 21 | def getBasicBlocks(func): 22 | bbm = BasicBlockModel(currentProgram) 23 | buf = bbm.getCodeBlocksContaining(func.entryPoint, TaskMonitor.DUMMY) 24 | visited = set() 25 | blocks = [] 26 | 27 | while len(buf) > 0: 28 | block = buf.pop(0) 29 | 30 | if block not in visited and getFunctionContaining(block.minAddress) == func: 31 | #print(block.minAddress) 32 | blocks.append(block) 33 | visited.add(block) 34 | 35 | iter = block.getDestinations(TaskMonitor.DUMMY) 36 | 37 | while iter.hasNext(): 38 | buf.append(iter.next().destinationBlock) 39 | 40 | return blocks 41 | 42 | class Buffer(object): 43 | def __init__(self, addr, size): 44 | self.start = addr 45 | self.end = addr.add(size) 46 | self.size = size 47 | self.reg = None 48 | self.off = 1000000 49 | self.write_start = 0xffffffffffffffff 50 | self.write_end = 0x0 51 | 52 | class StackString(object): 53 | def __init__(self, value, reg, off, write_start, write_end): 54 | self.value = value 55 | self.reg = reg 56 | self.off = off 57 | self.write_start = write_start 58 | self.write_end = write_end 59 | 60 | class Heap(object): 61 | def __init__(self, head): 62 | self.head = head 63 | 64 | def alloc(self, size): 65 | ptr = self.head 66 | self.head += size 67 | return ptr 68 | 69 | def getStackStrings(): 70 | func = getFunctionContaining(currentAddress) 71 | blocks = getBasicBlocks(func) 72 | 73 | heap = Heap(0x80000000) 74 | bufs = [] 75 | stackStrings = [] 76 | 77 | def handle_write(addr, size, val, pc, emu=None): 78 | #if sum([b in string.printable for b in val.decode('utf-8')]) / float(len(val)) < 0.5: 79 | # return 80 | 81 | for buf in bufs: 82 | if buf.start <= addr and addr <= buf.end: 83 | buf.write_start = min(buf.write_start, pc) 84 | buf.write_end = max(buf.write_end , pc) 85 | 86 | insn = getInstructionAt(toAddr(pc)) 87 | 88 | for i in range(insn.numOperands): 89 | if insn.getOperandRefType(i).isWrite(): 90 | objs = insn.getOpObjects(i) 91 | 92 | for obj in objs: 93 | if isinstance(obj, Register): 94 | buf.reg = obj 95 | elif isinstance(obj, Scalar): 96 | buf.off = min(buf.off, obj.unsignedValue) 97 | 98 | def malloc(size, *args, **kwargs): 99 | ptr = heap.alloc(size) 100 | 101 | if size > 0: 102 | addr = toAddr(ptr) 103 | bufs.append(Buffer(addr, size)) 104 | 105 | if 'emu' in kwargs: 106 | #print('Allocated 0x%x bytes to %s @ %s' % (size, addr, toAddr(kwargs['emu'].get_pc()))) 107 | watch(addr, size, handler=handle_write, emu=kwargs['emu']) 108 | 109 | return ptr 110 | 111 | for block in blocks: 112 | bufs = [] 113 | 114 | #print('Emulating block %s - %s' % (block.minAddress, block.maxAddress)) 115 | cpuState = emulate(block.minAddress, 116 | block.maxAddress, 117 | hooks={'operator.new': malloc}, 118 | skip_calls=True) 119 | 120 | for buf in bufs: 121 | try: 122 | contents = cpuState.read(buf.start, buf.size).decode('utf-8') 123 | except UnicodeDecodeError: 124 | continue 125 | 126 | for i, b in enumerate(contents): 127 | if b == u'\x00': 128 | value = contents[:i+1] 129 | 130 | if len(value) > 8: 131 | # Fixup the write start assuming the instructions are contiguous. 132 | buf.write_start = getInstructionBefore(toAddr(buf.write_start)).address.offset 133 | buf.write_end += getInstructionAt(toAddr(buf.write_end)).length 134 | 135 | print('Found stack string "%s" written to (%s, 0x%x) from %s - %s' % (value, buf.reg, buf.off, toAddr(buf.write_start), toAddr(buf.write_end))) 136 | stackString = StackString(value, buf.reg, buf.off, buf.write_start, buf.write_end) 137 | stackStrings.append(stackString) 138 | 139 | if b not in string.printable: 140 | break 141 | 142 | return stackStrings 143 | 144 | 145 | namespace_man = currentProgram.namespaceManager 146 | strcpy = None 147 | 148 | for namespace in namespace_man.getNamespacesOverlapping(AddressSet(currentProgram.minAddress, currentProgram.maxAddress)): 149 | if 'strcpy' in namespace.name: 150 | strcpy = getFunctionAt(namespace.body.minAddress) 151 | break 152 | 153 | if strcpy is None: 154 | print('Couldn\'t find strcpy') 155 | exit() 156 | 157 | char_dt = CharDataType() 158 | char_ptr_dt = PointerDataType(char_dt) 159 | 160 | cc = currentProgram.functionManager.defaultCallingConvention 161 | new_params = [] 162 | 163 | for dt, name in [(char_ptr_dt, 'dst'), (char_ptr_dt, 'src')]: 164 | arg_loc = cc.getNextArgLocation(new_params, dt, currentProgram) 165 | new_params.append(ParameterImpl(name, dt, arg_loc, currentProgram, SourceType.USER_DEFINED)) 166 | 167 | strcpy.replaceParameters(new_params, FunctionUpdateType.CUSTOM_STORAGE, True, SourceType.USER_DEFINED) 168 | strcpy.setReturnType(char_ptr_dt, SourceType.USER_DEFINED) 169 | 170 | for ss in getStackStrings(): 171 | patch = generate_patch(ss.write_start, 172 | ss.value, 173 | ss.reg.name, 174 | ss.off, 175 | strcpy.entryPoint.offset) 176 | 177 | freeSpace = ss.write_end - ss.write_start 178 | 179 | if len(patch) > freeSpace: 180 | print('Couldn\'t deoptimize %s, %d bytes short' % (ss.value[:-1], len(patch) - freeSpace)) 181 | continue 182 | 183 | # fill the rest with nops 184 | patch += [0x90] * (freeSpace - len(patch)) 185 | 186 | #print('Deoptimizing "%s"' % ss.value[:-1]) 187 | 188 | clearListing(toAddr(ss.write_start), toAddr(ss.write_end)) 189 | currentProgram.memory.setBytes(toAddr(ss.write_start), bytes(bytearray(patch))) 190 | disassemble(toAddr(ss.write_start)) 191 | 192 | # Do some final fixups for the decompiler. 193 | insn = getInstructionAt(toAddr(ss.write_start)) 194 | 195 | while insn is not None: 196 | if insn.flowType.isCall(): 197 | insn.setFlowOverride(FlowOverride.BRANCH) 198 | 199 | stringStart = insn.address.add(insn.length) 200 | clearListing(stringStart, stringStart.add(len(ss.value) - 1)) 201 | createAsciiString(stringStart) 202 | 203 | break 204 | 205 | insn = insn.next 206 | 207 | --------------------------------------------------------------------------------