├── __init__.py ├── .gitignore ├── test_prgm ├── Makefile └── test.c ├── arch ├── x64.py ├── ARM.py ├── x86.py ├── __init__.py └── arch_class.py ├── mem_markers.py ├── LICENSE ├── utils.py ├── README.md ├── pcode_inspector.py ├── notes └── found_arch_strings ├── state.py ├── pcode_interpreter.py ├── mem.py └── instr.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.class 3 | *.*.out 4 | *.pyc 5 | References/* 6 | -------------------------------------------------------------------------------- /test_prgm/Makefile: -------------------------------------------------------------------------------- 1 | CC_OPTS := -Wall 2 | 3 | OUT_ARCHES := x86 aarch64 4 | 5 | TEST_OUTS := $(patsubst %, test.%.out, ${OUT_ARCHES}) 6 | 7 | all: ${TEST_OUTS} 8 | 9 | %.x86.out: CC=gcc 10 | %.aarch64.out: CC=aarch64-linux-gnu-gcc 11 | 12 | test.%.out: test.c 13 | ${CC} ${CC_OPTS} $< -o $@ 14 | -------------------------------------------------------------------------------- /arch/x64.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .x86 import x86 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class x64(x86): 8 | LANG_DESC = "x86/.*/64/.*" 9 | INIT_STACK_SIZE = 0x100000 10 | base_ptr = "RBP" 11 | 12 | # Must come after the functions are defined... 13 | callother_dict = { 14 | } 15 | -------------------------------------------------------------------------------- /arch/ARM.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .arch_class import Architecture 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class ARM(Architecture): 8 | LANG_DESC = "ARM/.*/.*/.*" 9 | link_reg = "lr" 10 | def _arch_fake_function_call(self, state, func_addr, return_addr): 11 | # Setup LR and pcreg 12 | state.registers.store(self.lookup_reg_offset(self.link_reg), 13 | self.reg_len, return_addr) 14 | state.registers.store(self.pc_offset, self.reg_len, func_addr) 15 | 16 | # Must come after the functions are defined... 17 | callother_dict = { 18 | } 19 | -------------------------------------------------------------------------------- /mem_markers.py: -------------------------------------------------------------------------------- 1 | class UniqueMarker(object): 2 | """Subclasses may be stored in "memory" of the Uniques. These 3 | can then be used as markers for other parts of the emulator code. Ghidra 4 | won't understand them, they're just for marking points in Unique memory 5 | between pcode operations in the same assembly instruction. 6 | """ 7 | pass 8 | 9 | class CallOtherMarker(UniqueMarker): 10 | """This UniqueMarker indicates that a CallOther has occurred, and the 11 | program should interpret the return value as a "just keep going" instead 12 | of as a memory location of a function pointer. 13 | """ 14 | pass 15 | -------------------------------------------------------------------------------- /arch/x86.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .arch_class import Architecture 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | class x86(Architecture): 8 | LANG_DESC = "x86/.*/32/.*" 9 | base_ptr = "EBP" 10 | def setup_stack(self, state): 11 | super(x86, self).setup_stack(state) 12 | state.registers.store(self.lookup_reg_offset(self.base_ptr), 13 | self.reg_len, self.INIT_STACK_SIZE) 14 | 15 | def _arch_fake_function_call(self, state, func_addr, return_addr): 16 | rsp_val = state.registers.load(self.stack_ptr_ofst, self.stack_ptr_size) 17 | state.ram.store(rsp_val, self.reg_len, return_addr) 18 | state.registers.store(self.pc_offset, self.reg_len, func_addr) 19 | 20 | def co_swi(self, state, callother_index, param): 21 | state.registers.store(self.lookup_reg_offset("EAX"), self.reg_len, 0) 22 | #raise RuntimeError("Called swi, not implemented!") 23 | 24 | # This definition must come after the functions are defined... 25 | callother_dict = { 26 | 0xc: co_swi, 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Software developed by Karl Sickendick 2 | 3 | ***** 4 | MIT License 5 | 6 | Copyright 2019 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /arch/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import types 4 | 5 | from ..utils import find_all_subclasses 6 | from .arch_class import LE, BE, Architecture 7 | 8 | from .x86 import x86 9 | from .x64 import x64 10 | from .ARM import ARM 11 | 12 | def instantiate_architecture(api_base): 13 | """Instantiate the proper architecture for the current program 14 | 15 | :param api_base: The object from which Ghidra has derived the flat api 16 | :type api_base: ghidra.python.PythonScript 17 | 18 | :return: An appropriate architecture for the current program, or None 19 | :rtype: Architecture or None 20 | """ 21 | lang_desc = str(api_base.getCurrentProgram().getLanguage(). 22 | getLanguageDescription()) 23 | def yield_arch_match(): 24 | for arch in find_all_subclasses(Architecture): 25 | logging.debug("Arch lang desc {} prgm {}".format( 26 | arch.LANG_DESC, lang_desc) 27 | ) 28 | try: 29 | if re.match(arch.LANG_DESC, lang_desc): 30 | yield arch(api_base) 31 | except TypeError as e: 32 | # Occurs when an architecture has None as it's LANG_DESC 33 | pass 34 | 35 | matching_arches = [mtch for mtch in yield_arch_match()] 36 | if len(matching_arches) != 1: 37 | logging.error("Found wrong number of architecture matches: {}" 38 | "".format(matching_arches)) 39 | return None 40 | return matching_arches[0] 41 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import itertools as it 2 | 3 | def get_api_base(api_func): 4 | """Return the API base for Ghidra's flat API 5 | 6 | :param api_func: Any function in Ghidra's flat API - eg. getInstructionAt 7 | :type: function 8 | 9 | :return: The api base 10 | :rtype: ghidra.python.PythonScript 11 | """ 12 | return api_func.__self__ 13 | 14 | def find_all_subclasses(parent): 15 | """Return all descendents of the parent class. This requires all 16 | subclasses to be part of the calling scope. 17 | 18 | :param parent: The parent class 19 | :type parent: class 20 | 21 | :return: A set of all subclasses 22 | :rtype: set 23 | """ 24 | # Iteratively expand parent's subclasses, then drop parent 25 | all_insts = {parent} 26 | prev_size = 0 27 | while prev_size != len(all_insts): 28 | prev_size = len(all_insts) 29 | all_insts |= set(it.chain.from_iterable(inst.__subclasses__() 30 | for inst in all_insts)) 31 | 32 | all_insts.discard(parent) 33 | return all_insts 34 | 35 | def get_func_extents(func): 36 | addr_set = func.getBody() 37 | min_addr, max_addr = addr_set.getMinAddress(), addr_set.getMaxAddress() 38 | return min_addr, max_addr 39 | 40 | def format_loc(api_base, addr_int): 41 | func = api_base.getFunctionContaining(api_base.toAddr(addr_int)) 42 | func_st_addr, _ = get_func_extents(func) 43 | func_st = func_st_addr.offset 44 | loc_diff = addr_int - func_st 45 | return "{}+0x{:x}(0x{:x})".format(func.name, loc_diff, addr_int) 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Description 2 | This is a PCode emulator for Ghidra. 3 | 4 | # Apologies 5 | Listen - this is kinda rough. It works though! I'm a little embarrased about the quality of documentation and completeness at the time of release. This currently works best on x64, x86, and ARM architectures in Ghidra. It's not tough to add other architectures, I need to implement the initial function call environment for each though and haven't done it. There are some PCode opcodes not yet implemented - most notably the float operations. If you needed that I'm sorry, it's on the list of stuff to do. It needs a testing framework and documentation building. 6 | 7 | So, you know, I'm a pro. This bugs me. But the day of the talk is here and therefore the time to publish this code is now. 8 | 9 | # Installation 10 | From the source directory here... 11 | 12 | ``` 13 | mkdir "$HOME/ghidra_scripts" 14 | ln -s "$PWD" "$HOME/ghidra_scripts/ghidra_pcode_interpreter" 15 | ln -s "$PWD/pcode_interpreter.py" "$HOME/ghidra_scripts/pcode_interpreter.py" 16 | ln -s "$PWD/pcode_inspector.py" "$HOME/ghidra_scripts/pcode_inspector.py" 17 | ``` 18 | 19 | # Usage 20 | Refresh your script list in Ghidra. Scroll down to the PCode category. Select the function you want to execute in the decompiler or program listing window. Make sure you've committed your function prototype (right click in the decompiler and click "Commit Params/Return"). Then double click the `pcode_interpreter.py` script. 21 | 22 | Logging currently gets output both to your Ghidra console, but also `/tmp/pcode_interpret.log`. If you're on a multiuser system please be aware of this temp logging location... Also, the temp log is a debug log, so it can grow quite large. It's overwritten each run. 23 | 24 | # More Info 25 | My Saintcon 2019 talk on this is at https://github.com/kc0bfv/Saintcon2019GhidraTalk 26 | -------------------------------------------------------------------------------- /pcode_inspector.py: -------------------------------------------------------------------------------- 1 | #Prints information about pcode. Click on a function in the decompiler window, run this. 2 | #@author Karl Sickendick kc0bfv@gmail.com 3 | #@category PCode 4 | #@keybinding 5 | #@menupath 6 | #@toolbar 7 | 8 | 9 | from __future__ import print_function 10 | 11 | import logging 12 | 13 | from ghidra_pcode_interpreter.mem import InvalidAddrException 14 | from ghidra_pcode_interpreter.state import State 15 | from ghidra_pcode_interpreter.utils import get_api_base, get_func_extents 16 | 17 | logger = logging.getLogger(__name__) 18 | logging.basicConfig(level=logging.DEBUG) 19 | 20 | def print_pcode_info(func, state, stop_addr): 21 | cur_loc = state.get_pc() 22 | while cur_loc <= stop_addr: 23 | logging.info("Current location: 0x{:x}".format(cur_loc)) 24 | try: 25 | cur_loc = state.inspect_cur_location() 26 | except InvalidAddrException as e: 27 | logging.info("No code at location") 28 | state.set_pc(state.get_pc() + 1) 29 | cur_loc = state.get_pc() 30 | 31 | def main(): 32 | logging.basicConfig(level=logging.DEBUG) 33 | curr_addr = 0 34 | if currentLocation is None: 35 | curr_addr = askAddress("Starting Address", "Provide starting address:") 36 | else: 37 | curr_addr = currentLocation.address 38 | 39 | # Build the emulator state 40 | state = State(get_api_base(getInstructionAt)) 41 | 42 | # Determine the function of concern 43 | containing_func = None 44 | try: 45 | containing_func = getFunctionContaining(curr_addr) 46 | except: 47 | pass 48 | if containing_func is None: 49 | logger.error("Could not get containing function for selection") 50 | exit(1) 51 | 52 | # Print some function info 53 | start_point, func_end = get_func_extents(containing_func) 54 | logger.debug("Func body {} - {}".format(start_point, func_end)) 55 | 56 | state.setup_stack() 57 | state.fake_function_call(start_point.offset) 58 | 59 | # Print state and architecture information 60 | logging.info("State info: {}".format(state)) 61 | logging.info("Architecture info: {}".format(state.arch)) 62 | 63 | # Print some parameter info 64 | params = containing_func.getParameters() 65 | logger.info("Parameter Information") 66 | for param in params: 67 | logger.info("Paramter ordinal {} storage {} varnode {}".format( 68 | param.getOrdinal(), param.getVariableStorage(), 69 | param.getFirstStorageVarnode()) 70 | ) 71 | 72 | print_pcode_info(containing_func, state, func_end.offset) 73 | 74 | 75 | if __name__ == "__main__": 76 | main() 77 | -------------------------------------------------------------------------------- /notes/found_arch_strings: -------------------------------------------------------------------------------- 1 | id="PIC-18:LE:24:PIC-18"> 2 | id="PIC-16:LE:16:PIC-16"> 3 | id="PIC-16:LE:16:PIC-16F"> 4 | id="PIC-16:LE:16:PIC-16C5x"> 5 | id="PIC-12:LE:16:PIC-12C5xx"> 6 | id="PIC-17:LE:16:PIC-17C7xx"> 7 | id="PIC-24E:LE:24:default"> 8 | id="PIC-24F:LE:24:default"> 9 | id="PIC-24H:LE:24:default"> 10 | id="dsPIC30F:LE:24:default"> 11 | id="dsPIC33F:LE:24:default"> 12 | id="dsPIC33E:LE:24:default"> 13 | id="TI_MSP430:LE:16:default"> 14 | id="TI_MSP430X:LE:32:default"> 15 | id="8085:LE:16:default"> 16 | id="Toy:BE:32:default"> 17 | id="Toy:BE:32:posStack"> 18 | id="Toy:LE:32:default"> 19 | id="Toy:BE:32:wordSize2"> 20 | id="Toy:LE:32:wordSize2"> 21 | id="Toy:BE:64:default"> 22 | id="Toy:LE:64:default"> 23 | id="Toy:BE:32:builder"> 24 | id="Toy:LE:32:builder"> 25 | id="Toy:BE:32:builder.align2"> 26 | id="Toy:LE:32:builder.align2"> 27 | id="MIPS:BE:32:default"> 28 | id="MIPS:LE:32:default"> 29 | id="MIPS:BE:32:R6"> 30 | id="MIPS:LE:32:R6"> 31 | id="MIPS:BE:64:default"> 32 | id="MIPS:LE:64:default"> 33 | id="MIPS:BE:64:micro"> 34 | id="MIPS:LE:64:micro"> 35 | id="MIPS:BE:64:R6"> 36 | id="MIPS:LE:64:R6"> 37 | id="MIPS:BE:64:64-32addr"> 38 | id="MIPS:LE:64:64-32addr"> 39 | id="MIPS:LE:64:micro64-32addr"> 40 | id="MIPS:BE:64:micro64-32addr"> 41 | id="MIPS:BE:64:64-32R6addr"> 42 | id="MIPS:LE:64:64-32R6addr"> 43 | id="MIPS:BE:32:micro"> 44 | id="MIPS:LE:32:micro"> 45 | id="6805:BE:16:default"> 46 | id="DATA:LE:64:default"> 47 | id="DATA:BE:64:default"> 48 | id="avr8:LE:16:default"> 49 | id="avr8:LE:16:extended"> 50 | id="avr8:LE:16:atmega256"> 51 | id="avr32:BE:32:default"> 52 | id="z80:LE:16:default"> 53 | id="z8401x:LE:16:default"> 54 | id="z180:LE:16:default"> 55 | id="z182:LE:16:default"> 56 | id="68000:BE:32:default"> 57 | id="68000:BE:32:MC68030"> 58 | id="68000:BE:32:MC68020"> 59 | id="68000:BE:32:Coldfire"> 60 | id="JVM:BE:32:default"> 61 | id="ARM:LE:32:v8"> 62 | id="ARM:LEBE:32:v8LEInstruction"> 63 | id="ARM:BE:32:v8"> 64 | id="ARM:LE:32:v7"> 65 | id="ARM:LEBE:32:v7LEInstruction"> 66 | id="ARM:BE:32:v7"> 67 | id="ARM:LE:32:Cortex"> 68 | id="ARM:BE:32:Cortex"> 69 | id="ARM:LE:32:v6"> 70 | id="ARM:BE:32:v6"> 71 | id="ARM:LE:32:v5t"> 72 | id="ARM:BE:32:v5t"> 73 | id="ARM:LE:32:v5"> 74 | id="ARM:BE:32:v5"> 75 | id="ARM:LE:32:v4t"> 76 | id="ARM:BE:32:v4t"> 77 | id="ARM:LE:32:v4"> 78 | id="ARM:BE:32:v4"> 79 | id="6502:LE:16:default"> 80 | id="6502:BE:16:default"> 81 | id="PowerPC:BE:32:default"> 82 | id="PowerPC:LE:32:default"> 83 | id="PowerPC:BE:64:default"> 84 | id="PowerPC:BE:64:64-32addr"> 85 | id="PowerPC:LE:64:64-32addr"> 86 | id="PowerPC:LE:64:default"> 87 | id="PowerPC:BE:32:4xx"> 88 | id="PowerPC:LE:32:4xx"> 89 | id="PowerPC:BE:32:MPC8270"> 90 | id="PowerPC:BE:32:QUICC"> 91 | id="PowerPC:LE:32:QUICC"> 92 | id="PowerPC:BE:64:A2-32addr"> 93 | id="PowerPC:LE:64:A2-32addr"> 94 | id="PowerPC:BE:64:A2ALT-32addr"> 95 | id="PowerPC:LE:64:A2ALT-32addr"> 96 | id="PowerPC:BE:64:A2ALT"> 97 | id="PowerPC:LE:64:A2ALT"> 98 | id="PowerPC:BE:64:VLE-32addr"> 99 | id="PowerPC:BE:64:VLEALT-32addr"> 100 | id="x86:LE:32:default"> 101 | id="x86:LE:32:System Management Mode"> 102 | id="x86:LE:16:Real Mode"> 103 | id="x86:LE:64:default"> 104 | id="pa-risc:BE:32:default"> 105 | id="8051:BE:16:default"> 106 | id="80251:BE:24:default"> 107 | id="80390:BE:24:default"> 108 | id="8051:BE:24:mx51"> 109 | id="AARCH64:LE:64:v8A"> 110 | id="AARCH64:BE:64:v8A"> 111 | id="sparc:BE:32:default"> 112 | id="sparc:BE:64:default"> 113 | id="CR16AB:LE:16:default"> 114 | id="CR16C:LE:16:default"> 115 | -------------------------------------------------------------------------------- /state.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .mem import Ram, Registers, Uniques 4 | from .arch import instantiate_architecture 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | class State(object): 9 | def __init__(self, api_base): 10 | self.api_base = api_base 11 | self.arch = instantiate_architecture(self.api_base) 12 | if self.arch is None: 13 | raise RuntimeError("No supported architectures found") 14 | 15 | self.registers = Registers(self.api_base, self.arch) 16 | self.ram = Ram(self.api_base, self.arch) 17 | self.uniques = None 18 | 19 | def execute_cur_location(self): 20 | return self._step_cur_loc(True) 21 | 22 | def inspect_cur_location(self): 23 | return self._step_cur_loc(False) 24 | 25 | def _step_cur_loc(self, do_execute): 26 | self.uniques = Uniques(self.api_base, self.arch) 27 | 28 | # Get the instructions and instruction size 29 | instrs, instr_size = self.ram.get_code(self.get_pc()) 30 | 31 | # Step the prog counter before any branches might update it 32 | self.step_pc() 33 | 34 | # Execute each instruction 35 | run_type = "Executing" if do_execute else "Instruction" 36 | log_type = logger.debug if do_execute else logger.info 37 | for instr in instrs: 38 | log_type("{} {}".format(run_type, instr)) 39 | if do_execute: 40 | instr.execute(self) 41 | logger.debug(self) 42 | 43 | return self.get_pc() 44 | 45 | def __str__(self): 46 | return "Registers {}\nRam {}\nUniques {}".format(self.registers, 47 | self.ram, self.uniques) 48 | 49 | def get_pc(self): 50 | return self.registers.load(self.arch.pc_offset, self.arch.reg_len) 51 | 52 | def set_pc(self, location): 53 | self.registers.store(self.arch.pc_offset, self.arch.reg_len, location) 54 | 55 | def step_pc(self): 56 | cur_loc = self.get_pc() 57 | _, instr_size = self.ram.get_code(cur_loc) 58 | self.set_pc(cur_loc + instr_size) 59 | 60 | def set_varnode(self, varnode, value): 61 | if varnode.isRegister(): 62 | self.registers.store(varnode.offset, varnode.size, value) 63 | elif varnode.isUnique(): 64 | self.uniques.store(varnode.offset, varnode.size, value) 65 | elif varnode.isAddress(): 66 | self.ram.store(varnode.offset, varnode.size, value) 67 | elif varnode.getAddress().isStackAddress(): 68 | addr = self.arch.resolve_stack_address(self, varnode.offset) 69 | self.ram.store(addr, varnode.size, value) 70 | else: 71 | raise RuntimeError("Invalid varnode for setting: {}" 72 | "".format(varnode)) 73 | 74 | def read_varnode(self, varnode): 75 | if varnode.isRegister(): 76 | return self.registers.load(varnode.offset, varnode.size) 77 | elif varnode.isUnique(): 78 | return self.uniques.load(varnode.offset, varnode.size) 79 | elif varnode.isAddress(): 80 | return self.ram.load(varnode.offset, varnode.size) 81 | elif varnode.isConstant(): 82 | return varnode.offset 83 | elif varnode.getAddress().isStackAddress(): 84 | addr = self.arch.resolve_stack_address(self, varnode.offset) 85 | return self.ram.load(addr, varnode.size) 86 | else: 87 | raise RuntimeError("Unknown varnode type: {}".format(varnode)) 88 | 89 | def setup_stack(self): 90 | self.arch.setup_stack(self) 91 | 92 | def fake_function_call(self, func_addr): 93 | self.arch.fake_function_call(self, func_addr) 94 | -------------------------------------------------------------------------------- /pcode_interpreter.py: -------------------------------------------------------------------------------- 1 | #Emulates pcode execution. Click on a function in the decompiler window, provide the initial parameters, and go! 2 | #@author Karl Sickendick kc0bfv@gmail.com 3 | #@category PCode 4 | #@keybinding 5 | #@menupath 6 | #@toolbar 7 | 8 | 9 | from __future__ import print_function 10 | 11 | import code 12 | import logging 13 | 14 | from ghidra_pcode_interpreter.state import State 15 | from ghidra_pcode_interpreter.utils import get_api_base, get_func_extents, \ 16 | format_loc 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | PRINT_LOCATION_EVERY = 100 21 | 22 | def get_parameters(func): 23 | def get_param_val(param): 24 | ret = None 25 | while ret is None: 26 | ret = askInt("Parameter Entry", 27 | "Specify integer value for parameter " 28 | "{} {} type {} ".format( 29 | param.getOrdinal(), param.getName(), 30 | param.getDataType() 31 | ) 32 | ) 33 | return ret 34 | 35 | params = func.getParameters() 36 | if len(params) == 0: 37 | logger.warn("No parameters - did you commit the locals/params for " 38 | "the func?") 39 | 40 | return [(param, get_param_val(param)) for param in params] 41 | 42 | def run_pcode(func, state, stop_addr): 43 | cur_loc = state.get_pc() 44 | api_base = get_api_base(getFunctionAt) 45 | index = 0 46 | while cur_loc != stop_addr: 47 | if index % PRINT_LOCATION_EVERY == 0: 48 | logging.info("Current location: {}".format( 49 | format_loc(api_base, cur_loc)) 50 | ) 51 | index += 1 52 | #setCurrentLocation(toAddr(cur_loc)) // Really slows things down 53 | cur_loc = state.execute_cur_location() 54 | logging.debug("State: {}".format(state)) 55 | 56 | def analyze_func_at(addr): 57 | """Emulate the function containing an address 58 | 59 | :param addr: An address in the function of interest 60 | :type addr: int 61 | """ 62 | # Setup necessary emulator state 63 | # TODO Move this somewhere better 64 | state = State(get_api_base(getInstructionAt)) 65 | state.ram.store(0x08, 8, 0x0000000000414141) 66 | state.ram.store(0x10, 8, 0x0000000000333231) 67 | state.ram.store(0x18, 8, 0x0000000000000000) 68 | state.ram.store(0x20, 8, 0x0041414141414141) 69 | 70 | # Find the function surrounding addr 71 | containing_func = None 72 | try: 73 | containing_func = getFunctionContaining(addr) 74 | except: 75 | pass 76 | if containing_func is None: 77 | logger.error("Could not get containing function for selection") 78 | return 79 | 80 | # Input and store the function parameters 81 | param_inputs = get_parameters(containing_func) 82 | for param, param_val in param_inputs: 83 | param_vn = param.getFirstStorageVarnode() 84 | state.set_varnode(param_vn, param_val) 85 | 86 | start_point, func_end = get_func_extents(containing_func) 87 | logger.debug("Func body {} - {}".format(start_point, func_end)) 88 | 89 | # Emulate the conditions of a function call 90 | state.setup_stack() 91 | state.fake_function_call(start_point.offset) 92 | 93 | # Run the code in the function 94 | run_pcode(containing_func, state, state.arch.sentinel_return_addr) 95 | 96 | # Read the return value 97 | return_obj = containing_func.getReturn() 98 | return_varnode = return_obj.getFirstStorageVarnode() 99 | orig_outval = state.read_varnode(return_varnode) 100 | 101 | # Determine if output should be interpreted as signed 102 | interpret_as_signed = False 103 | try: 104 | interpret_as_signed = return_obj.getDataType().isSigned() 105 | except: 106 | pass 107 | 108 | # Interpret outval as signed if necessary 109 | outval = orig_outval 110 | if interpret_as_signed: 111 | bit_count = return_varnode.size * state.arch.bits_per_byte 112 | sign = (outval >> (bit_count - 1)) & 1 113 | if sign == 1: 114 | outval = -((~outval & (2**64 - 1)) + 1) 115 | 116 | logger.info("Final state: {}".format(state)) 117 | logger.info("Output value: {} or 0x{:x}".format(outval, orig_outval)) 118 | 119 | def main(): 120 | logging.basicConfig(level=logging.DEBUG, 121 | filename="/tmp/pcode_interpret.log", filemode="w") 122 | l_sh = logging.StreamHandler() 123 | l_sh.setLevel(logging.INFO) 124 | logging.getLogger('').addHandler(l_sh) 125 | 126 | curr_addr = 0 127 | if currentLocation is None: 128 | curr_addr = askAddress("Starting Address", "Provide starting address:") 129 | else: 130 | curr_addr = currentLocation.address 131 | 132 | analyze_func_at(curr_addr) 133 | 134 | if __name__ == "__main__": 135 | main() 136 | -------------------------------------------------------------------------------- /mem.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import logging 3 | 4 | from ghidra.program.model.mem import MemoryAccessException 5 | 6 | from .instr import instruction_finder 7 | from .arch import LE, BE 8 | from .mem_markers import UniqueMarker 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class InvalidAddrException(Exception): 13 | pass 14 | 15 | class NotCodeException(Exception): 16 | pass 17 | 18 | class InvalidRegException(Exception): 19 | pass 20 | 21 | class CodeWord(list): 22 | """ 23 | This class represents a memory location containing code. It will 24 | store a set of PCODE instruction classes. 25 | 26 | Note that this implementation leads to a serious deficiency - code and 27 | data are not interchangable here! For instance - if code is 28 | self-modifying then the modified result will not be interpreted as code 29 | by the emulator, and the emulator will die when it tries to execute 30 | that (see Ram's get_code for that...). Of course - in order to interpret 31 | self-modifying code, we also need a way to turn that code into PCODE, 32 | which is beyond the scope of this software, therefore, this serious 33 | deficiency is reasonable. 34 | """ 35 | pass 36 | 37 | class MemChunk(defaultdict): 38 | """ 39 | This is intended to be an implementation of both reg and ram 40 | """ 41 | def __init__(self, api_base, arch): 42 | super(MemChunk, self).__init__(int) 43 | 44 | self.arch = arch 45 | self.api_base = api_base 46 | 47 | def _validate_code(self, addr): 48 | addr = hex(addr) 49 | if addr.endswith("L"): 50 | addr = addr[:-1] 51 | return self.api_base.getInstructionAt( 52 | self.api_base.toAddr(addr)) is not None 53 | 54 | def _validate_writeable(self, addr): 55 | # TODO: check for actual writeableness, maybe 56 | return not self._validate_code(addr) 57 | 58 | def store(self, address, length, value): 59 | """Store a value at the given address, of "length" bytes, with 60 | endianness matching the architecture 61 | 62 | :param address: The index at which to store value 63 | :type addres: int 64 | :param length: The number of bytes over which to store value 65 | :type length: int 66 | :param value: The value to store 67 | :type value: int 68 | 69 | :raises InvalidAddrException: When the address was not writeable 70 | """ 71 | if not self._validate_writeable(address): 72 | raise InvalidAddrException("Tried write at non-writeable spot") 73 | 74 | address = long(address) 75 | value = long(value) 76 | 77 | # Note that python handles bitwise operation on negative numbers as 78 | # 2s complement and like there are an infinite number of 1's in 79 | # front of the most significant bit. 80 | 81 | # This means that the below operations are already sign extended, 82 | # and this is what we'd expect a processor to do. 83 | 84 | # Thus - negative numbers just work. 85 | cur_val = value 86 | for ind in range(length): 87 | if self.arch.endian is LE: 88 | st_loc = address + ind 89 | else: 90 | st_loc = address + ((length - 1) - ind) 91 | self[st_loc] = cur_val & (2**self.arch.bits_per_byte - 1) 92 | cur_val >>= self.arch.bits_per_byte 93 | 94 | def load_byte(self, address): 95 | """Load just one byte from address 96 | """ 97 | return self[address] 98 | 99 | def load(self, address, length): 100 | """Load a value from the given address, of "length" bytes, with 101 | endianness matching the architecture. 102 | 103 | :param address: The index from which to load 104 | :type addres: int 105 | :param length: The number of bytes to load 106 | :type length: int 107 | 108 | :return: The value loaded 109 | :rtype: int 110 | """ 111 | address = long(address) 112 | 113 | cur_val = 0 114 | for ind in range(length): 115 | if self.arch.endian is LE: 116 | st_loc = address + ind 117 | else: 118 | st_loc = address + ((length - 1) - ind) 119 | one_byte = self.load_byte(st_loc) % 256 120 | cur_val += one_byte << (ind * self.arch.bits_per_byte) 121 | return long(cur_val) 122 | 123 | def __str__(self): 124 | sorted_keys = sorted(self.keys()) 125 | return ", ".join("0x{:x}: {}".format(key, hex(self[key])) for key in sorted_keys) 126 | 127 | class Registers(MemChunk): 128 | def _validate_writeable(self, addr): 129 | # This assumes that Ghidra will only try to write to writeable 130 | # registers. 131 | return True 132 | 133 | def __str__(self): 134 | def fmt_key(key): 135 | reg = None 136 | try: 137 | # Get the register object if possible 138 | reg = self.arch.lookup_reg_by_offset(key) 139 | except IndexError as e: 140 | logging.debug("Register not found {} {}".format(key, e)) 141 | 142 | reg_size = 1 143 | reg_name = key 144 | if reg is not None: 145 | reg_size = reg.getMinimumByteSize() 146 | reg_name = "{}({:x})".format(reg.name, key) 147 | out_txt = "{}: 0x{:x}".format(reg_name, self.load(key, reg_size)) 148 | used_keys = set(range(key, key + reg_size)) 149 | return out_txt, used_keys 150 | sorted_keys = sorted(self.keys()) 151 | vals = list() 152 | all_used_keys = set() 153 | for key in sorted_keys: 154 | if key in all_used_keys: 155 | continue 156 | out_txt, used_keys = fmt_key(key) 157 | all_used_keys = all_used_keys.union(used_keys) 158 | vals.append(out_txt) 159 | return ", ".join(vals) 160 | 161 | class Uniques(MemChunk): 162 | def _validate_writeable(self, addr): 163 | # This assumes that Ghidra will only try to write to writeable 164 | # uniques. 165 | return True 166 | 167 | def store(self, address, length, value): 168 | """Store a value just like for the parent class, however, if 169 | the value is a UniqueMarker instance, store it as a special case 170 | at only the address. 171 | """ 172 | if isinstance(value, UniqueMarker): 173 | self[address] = value 174 | else: 175 | super(Uniques, self).store(address, length, value) 176 | def load(self, address, length): 177 | """Load a value just like for the parent class, however, if 178 | the value is a UniqueMarker instance return only it. 179 | """ 180 | if isinstance(self[address], UniqueMarker): 181 | return self[address] 182 | else: 183 | return super(Uniques, self).load(address, length) 184 | 185 | class Ram(MemChunk): 186 | def load_byte(self, address): 187 | if address in self: 188 | return self[address] 189 | else: 190 | try: 191 | # It handles 64 bit values better when they're hex strings 192 | # without an L at the end 193 | addr = self.api_base.toAddr(hex(long(address))[:-1]) 194 | return self.api_base.getByte(addr) 195 | except MemoryAccessException as e: 196 | logger.debug("mem access except") 197 | return 0 198 | 199 | def get_code(self, address): 200 | if not self._validate_code(address): 201 | raise InvalidAddrException("No code at address") 202 | 203 | inst = self.api_base.getInstructionAt(self.api_base.toAddr(address)) 204 | inst_size = inst.length 205 | pcodes = inst.getPcode() 206 | 207 | instrs = [instruction_finder(pcode, self.arch) for pcode in pcodes] 208 | 209 | return instrs, inst_size 210 | -------------------------------------------------------------------------------- /arch/arch_class.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import types 4 | 5 | from ghidra.program.util import VarnodeContext 6 | 7 | from ..utils import find_all_subclasses, get_api_base 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | LE = "LittleEndian" 12 | BE = "BigEndian" 13 | 14 | class Architecture(object): 15 | # Set LANG_DESC to the language description in Ghidra appropriate to 16 | # the architecture in question. 17 | LANG_DESC = None 18 | # Set callother_dict in subclasses to associate callother function 19 | # implementations with a callother index 20 | callother_dict = None 21 | 22 | INIT_STACK_SIZE = 0x100000 23 | 24 | def __init__(self, api_base): 25 | self.api_base = api_base 26 | self.reg_offset_lookedup = dict() 27 | 28 | # Determine settings of the current architecture 29 | self.cur_prog = self.api_base.getCurrentProgram() 30 | self.lang = self.cur_prog.getLanguage() 31 | self.lang_desc = self.lang.getLanguageDescription() 32 | 33 | # Processor properties 34 | self.proc = self.lang_desc.getProcessor() 35 | self.proc_size = self.lang_desc.getSize() 36 | 37 | # Properties of the program counter 38 | self.pc = self.lang.getProgramCounter() 39 | self.pc_offset = self.pc.getOffset() 40 | self.pc_byte_size = self.pc.getMinimumByteSize() 41 | self.pc_bit_count = self.pc.getBitLength() 42 | self.pc_bits_per_byte = self.pc_bit_count // self.pc_byte_size 43 | 44 | # Register properties 45 | self.endian = BE if self.lang.isBigEndian() else LE 46 | self.reg_len = self.pc_byte_size 47 | self.bits_per_byte = self.pc_bits_per_byte 48 | 49 | # This address stands in as a return address for the original function 50 | # call. Therefore - it should be a return address that will never 51 | # appear during legitimate program execution. That may be 52 | # architecture dependent... 53 | self.sentinel_return_addr = 0xff * self.reg_len 54 | 55 | # Determine how the stack is recorded 56 | self.varnode_context = VarnodeContext( 57 | self.cur_prog, 58 | self.cur_prog.getProgramContext(), 59 | self.cur_prog.getProgramContext()) 60 | self.stack_varnode = self.varnode_context.getStackVarnode() 61 | self.stack_ptr_ofst = self.stack_varnode.offset 62 | self.stack_ptr_size = self.stack_varnode.size 63 | 64 | def lookup_reg_offset(self, reg_name): 65 | """Lookup Ghidra's offset for a register. 66 | 67 | :param reg_name: The register name to lookup 68 | :type reg_name: string 69 | 70 | :raises RuntimeError: When an invalid register name is provided 71 | 72 | :return: The offset of the register in the registers space 73 | :rtype: int 74 | """ 75 | try: 76 | return self.lang.getRegister(reg_name).getOffset() 77 | except AttributeError as e: 78 | raise RuntimeError("Invalid register name {}".format(reg_name)) 79 | 80 | def lookup_reg_by_offset(self, offset): 81 | """Lookup the matching register name based on Ghidra's offset 82 | 83 | :param offset: The register offset to lookup 84 | :type offset: int 85 | 86 | :raises IndexError: When the offset was not valid for registers 87 | 88 | :return: The register name corresponding to offset 89 | :rtype: string 90 | """ 91 | if offset not in self.reg_offset_lookedup: 92 | # Multiple regs might have the same offset, e.g. EIP and RIP 93 | matching_regs = [reg for reg in self.lang.getRegisters() 94 | if long(reg.getOffset()) == long(offset)] 95 | # We want to return the highest parent reg that has same offset 96 | best_match = matching_regs[0] 97 | next_try = best_match.getParentRegister() 98 | while next_try is not None and next_try.getOffset() == offset: 99 | best_match = next_try 100 | next_try = best_match.getParentRegister() 101 | self.reg_offset_lookedup[offset] = best_match 102 | 103 | return self.reg_offset_lookedup[offset] 104 | 105 | def return_callother_names(self): 106 | """Return a list of callother operation names for the architecture. 107 | This is more a way of learning about Ghidra, which has an internal 108 | list of these, with each name corresponding to an index. The indexes 109 | of the names in the list this function returns will correspond with 110 | the indexes in Ghidra. 111 | 112 | :return: List of callother names, ordered/indexed same as in Ghidra 113 | :rtype: list 114 | """ 115 | udopcnt = self.lang.getNumberOfUserDefinedOpNames() 116 | return [self.lang.getUserDefinedOpName(ind) for ind in range(udopcnt)] 117 | 118 | def setup_stack(self, state): 119 | """Setup the stack registers as necessary. 120 | 121 | The base class version only sets the stack pointer. 122 | """ 123 | state.registers.store(self.stack_ptr_ofst, 124 | self.stack_ptr_size, self.INIT_STACK_SIZE) 125 | 126 | def fake_function_call(self, state, func_addr, return_addr = None): 127 | """Setup the stack and registers to fake a function call 128 | """ 129 | if return_addr is None: 130 | return_addr = self.sentinel_return_addr 131 | self._arch_fake_function_call(state, func_addr, return_addr) 132 | 133 | def _arch_fake_function_call(self, state, func_addr, return_addr): 134 | raise RuntimeError("Called fake_function_call on base class") 135 | 136 | def resolve_stack_address(self, state, stack_offset): 137 | """Resolve a stack offset into a RAM address. 138 | """ 139 | base = state.registers.load(self.stack_ptr_ofst, self.stack_ptr_size) 140 | return base + stack_offset 141 | 142 | def resolve_callother(self, callother_index, param): 143 | """Return the callother function that corresponds to callother_index 144 | and, optionally, param. This version assumes that the architecture 145 | subclasses have a dictionary/list called callother_dict with 146 | indexes as keys and architecture class functions as values. 147 | 148 | :param callother_index: Ghidra's index of the callother 149 | :type callother_index: int 150 | :param param: The parameter Ghidra specifies for the callother 151 | :type param: int 152 | 153 | :raises RuntimeError: When there's no existing callother_index 154 | 155 | :return: The function corresponding to callother_index and param. The 156 | function must take as arguments the program state, the 157 | callother_index, and parameter. It must return either None or 158 | the new program counter. If it returns None, the program 159 | counter will continue unchanged. 160 | :rtype: function(State, callother_index, param) -> 161 | None or new_program_counter 162 | """ 163 | if callother_index in self.callother_dict: 164 | unbound_version = self.callother_dict[callother_index] 165 | # Bind the unbound version and return 166 | return types.MethodType(unbound_version, self) 167 | else: 168 | raise RuntimeError("No callother implemented for {} {}".format( 169 | callother_index, param)) 170 | 171 | def __str__(self): 172 | format_parts = [ 173 | ("Architecture language description", self.lang_desc), 174 | ("Processor", self.proc), 175 | ("Prog ctr", self.pc), 176 | ("Endianness", self.endian), 177 | ("Register length", self.reg_len), 178 | ("Stack varnode", self.stack_varnode), 179 | ("Callothers Known", self.return_callother_names()) 180 | ] 181 | return ", ".join("{}: {}".format(tup[0], tup[1]) 182 | for tup in format_parts) 183 | -------------------------------------------------------------------------------- /test_prgm/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #if defined __amd64__ 5 | // These macros put in out_var the flags resulting from inval1 +/- inval2 6 | #define ADD_OUT_FLAGS(out_var, inval1, inval2) asm ( \ 7 | "add %[invalc], %[invald];" \ 8 | "pushfq;" \ 9 | "pop %[outval];"\ 10 | : [outval] "=r" (out_var) \ 11 | : [invalc] "r" (inval1), [invald] "r" (inval2) \ 12 | ) 13 | 14 | #define SUB_OUT_FLAGS(out_var, inval1, inval2) asm ( \ 15 | "sub %[invald], %[invalc];" \ 16 | "pushfq;" \ 17 | "pop %[outval];"\ 18 | : [outval] "=r" (out_var) \ 19 | : [invalc] "r" (inval1), [invald] "r" (inval2) \ 20 | ) 21 | 22 | // This one returns the value of the instruction pointer in retval 23 | #define READ_RIP(retval) \ 24 | asm ( \ 25 | "lea (%%rip), %[outval];" \ 26 | : [outval] "=r" (retval) \ 27 | ); 28 | 29 | // This one just demonstrates a bunch of register updates, for sure 30 | #define UPDATE_MANY_REGS(retval) { \ 31 | long unsigned int temp_retval = 0; \ 32 | asm ( \ 33 | "mov %[inval], %%rax;" \ 34 | "mov %%rax, %%rbx;" \ 35 | "mov %%rbx, %%rcx;" \ 36 | "mov %%rcx, %%rdx;" \ 37 | "mov %%rdi, %%rsi;" \ 38 | "mov %%rsi, %%rdi;" \ 39 | "mov %%rdi, %[temp_retval];" \ 40 | : [temp_retval] "=r" (temp_retval) \ 41 | : [inval] "r" (inval) \ 42 | : "cc", "rax", "rbx", "rcx", "rdx", "rsi", "rdi" \ 43 | ); \ 44 | asm ( \ 45 | "mov %[temp_retval], %%r8;" \ 46 | "mov %%r8, %%r9;" \ 47 | "mov %%r9, %%r10;" \ 48 | "mov %%r10, %%r11;" \ 49 | "mov %%r11, %%r12;" \ 50 | "mov %%r12, %%r13;" \ 51 | "mov %%r13, %%r14;" \ 52 | "mov %%r14, %%r15;" \ 53 | "mov %%r15, %[retval];" \ 54 | : [retval] "=r" (retval) \ 55 | : [temp_retval] "r" (temp_retval) \ 56 | : "cc", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" \ 57 | ); \ 58 | } 59 | 60 | #elif defined __aarch64__ 61 | // TODO: implement these in assembly. They're totally wrong here. 62 | #define ADD_OUT_FLAGS(out_var, inval1, inval2) out_var = inval1 + inval2; 63 | #define SUB_OUT_FLAGS(out_var, inval1, inval2) out_var = inval1 - inval2; 64 | #define READ_RIP(retval) retval = 1 65 | #define UPDATE_MANY_REGS(retval) retval = 2 66 | #endif 67 | 68 | 69 | long int simple_adds(int inval) { 70 | return inval + inval; 71 | } 72 | 73 | int big_sub(int inval) { 74 | return inval - 0x7fffffff; 75 | } 76 | 77 | long int another_sub(long int inval1, long int inval2) { 78 | return inval1 - inval2; 79 | } 80 | 81 | long int investigate_add_flags(int invala, int invalb) { 82 | long int retval = 0; 83 | ADD_OUT_FLAGS(retval, invala, invalb); 84 | return retval; 85 | } 86 | 87 | long int investigate_sub_flags(int invala, int invalb) { 88 | long int retval = 0; 89 | SUB_OUT_FLAGS(retval, invala, invalb); 90 | return retval; 91 | } 92 | 93 | long int div(long int invala, long int invalb) { 94 | return invala / invalb; 95 | } 96 | 97 | unsigned long int unsign_div(unsigned long int invala, 98 | unsigned long int invalb) { 99 | return invala / invalb; 100 | } 101 | 102 | long int sub_if(int inval) { 103 | if( inval - 1 > 0 ) { 104 | return 0; 105 | } else { 106 | return 1; 107 | } 108 | } 109 | 110 | long unsigned int read_rip() { 111 | long unsigned int retval = 0; 112 | READ_RIP(retval); 113 | return retval; 114 | } 115 | 116 | long unsigned int update_many_regs(long int inval) { 117 | long unsigned int retval = 0; 118 | UPDATE_MANY_REGS(retval); 119 | return retval; 120 | } 121 | 122 | void try_print_simple_adds(int val) { 123 | printf("Simple adds %x result %lx\n", val, simple_adds(val)); 124 | } 125 | 126 | float float_test(float float1, float float2) { 127 | return float1 + float2; 128 | } 129 | 130 | double double_test(double double1, double double2) { 131 | return double1 + double2; 132 | } 133 | 134 | int try_sleep(time_t secs, long nanosecs) { 135 | struct timespec sleep_len = { 0 }, rem = { 0 }; 136 | int retval = -1, tries = 0; 137 | sleep_len.tv_sec = secs; 138 | sleep_len.tv_nsec = nanosecs; 139 | while( retval != 0 ) { 140 | tries += 1; 141 | retval = nanosleep(&sleep_len, &rem); 142 | sleep_len.tv_sec = rem.tv_sec; 143 | sleep_len.tv_nsec = rem.tv_nsec; 144 | } 145 | return tries; 146 | } 147 | 148 | int main(int argc, char * argv[], char * envp[]) { 149 | if( argc >= 1 ) { 150 | printf("Prog name: %s\n", argv[0]); 151 | } 152 | char **cur_strp = argv; 153 | while( *cur_strp != 0 ) { 154 | printf("Arg: %s\n", *cur_strp); 155 | cur_strp += 1; 156 | try_sleep(0, 1e8); 157 | } 158 | cur_strp = envp; 159 | while( *cur_strp != 0 ) { 160 | printf("Env: %s\n", *cur_strp); 161 | cur_strp += 1; 162 | try_sleep(0, 1e8); 163 | } 164 | try_print_simple_adds(0x10001000); 165 | try_print_simple_adds(0x20002000); 166 | try_print_simple_adds(0x60006000); 167 | try_print_simple_adds(0x80008000); 168 | try_print_simple_adds(0xd000d000); 169 | printf("Update many regs result 0x%lx\n", update_many_regs(argc)); 170 | printf("RIP value 0x%lx\n", read_rip()); 171 | 172 | printf("Another sub 0x%lx\n", another_sub(0x10, 0x40)); 173 | 174 | int test_val = 0x7fffffff; 175 | // With adding - when overflowing signed you get "overflow" set. When overflowing unsigned you get "carry" set. Overflow matters with signed add, carry with unsigned. 176 | // If you're adding signed 0xffffffff twice, you get carry set, but that doesn't matter because it's not helpful for signed add operations. If it was an unsigned add instead, you'd still get carry set and it would matter, because then you added too large. Overflow isn't set with this add. Overflow matters for signed, carry for unsigned add. 177 | // If you add 0x7fffffff twice you get overflow bit set and carry unset. If it was a signed add, overflow matters. 178 | // Ghidra uses INT_CARRY to determine CF in an add, and INT_SCARRY to determine OF in an add. 179 | // 180 | printf("Investigate add flags 0x%x 0x%lx\n", test_val, investigate_add_flags(test_val, test_val)); 181 | 182 | test_val = 0x0; 183 | int test_val_2 = 0x80000001; 184 | /* With subtracting: 185 | 0xfff - 0x1000 - carry bit, no overflow - 0xffffffff 186 | 0x0 - 0x1000 - carry bit, no overflow - 0xfffff000 187 | 0xffffffff - 0x1000 - no carry bit, no overflow 188 | 0x80001000 - 0x1000 - no carry bit, no overflow 189 | 0x80000fff - 0x1000 - no carry bit, overflow - 0x7fffffff 190 | 0x80000000 - 0x1000 - no carry bit, overflow - 0x7ffff000 191 | 0x80000000 - 0x7fffffff - no carry bit, overflow 192 | 0x80000000 - 0x80000000 - no carry bit, no overflow (zero) 193 | 0x80000000 - 0x80000001 - carry bit, no overflow 194 | 0x1000 - 0x80000000 - carry bit, overflow - 0x80001000 195 | 196 | pos minus neg - should be pos 197 | neg minus pos - should be neg 198 | pos minus pos - always gonna be pos or 0, no overflow 199 | neg minus neg - always gonna be neg or 0, no overflow 200 | */ 201 | // For sub, carry bit indicates unsigned overflow. It happens when a bigger number is subtracted from a smaller number. Ghidra uses INT_LESS to calculate CF in a sub. 202 | // For sub, overflow bit indicates signed overflow. It can only happen when the operand signs are different. If the result, in that case, doesn't have the same sign as the first operand, then it's an overflow. Ghidra uses INT_SBORROW to calculat OF in a sub. 203 | printf("Investigate sub flags 0x%x 0x%x 0x%lx\n", test_val, test_val_2, investigate_sub_flags(test_val, test_val_2)); 204 | 205 | printf("Big sub 1 %i\n", big_sub(1)); 206 | printf("Big sub 0 %i\n", big_sub(0)); 207 | printf("Big sub -1 %i\n", big_sub(-1)); 208 | printf("Big sub -2 %i\n", big_sub(-2)); 209 | printf("Big sub 0x80000001 %i\n", big_sub(0x80000001)); 210 | printf("Big sub 0x80000000 %i\n", big_sub(0x80000000)); 211 | printf("Big sub 0x7fffffff %i\n", big_sub(0x7fffffff)); 212 | 213 | printf("Flags: C1Px AxZS TIDO\n"); 214 | // carry, parity, adjust, zero, sign, trap, interupt enable, direction overflow 215 | 216 | printf("Div result 10/2 %li\n", div(10, 2)); 217 | printf("Unsigned div result 10 / 2 %li\n", unsign_div(10, 2)); 218 | printf("Div result -10/2 %li\n", div(-10, 2)); 219 | printf("Unsigned div result -10 / 2 %li\n", unsign_div(-10, 2)); 220 | 221 | float f_a = 1.1, f_b = 2.2; 222 | printf("Float test %f + %f = %f\n", f_a, f_b, float_test(f_a, f_b)); 223 | 224 | double d_a = 1.1, d_b = 2.2; 225 | printf("Double test %f + %f = %f\n", d_a, d_b, double_test(d_a, d_b)); 226 | 227 | return 0; 228 | } 229 | -------------------------------------------------------------------------------- /instr.py: -------------------------------------------------------------------------------- 1 | """Implements the pcode instructions. 2 | 3 | Intended usage is to call "instruction_finder", providing a Ghidra pcode 4 | object. That will return an appropriate instance of an Instruction object 5 | corresponding to the pcode instruction. That instance can then have "execute" 6 | run on it. 7 | """ 8 | 9 | import logging 10 | import re 11 | 12 | from .utils import find_all_subclasses 13 | from .mem_markers import CallOtherMarker 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | class Instruction(object): 18 | """The base class for implementation of PCode instructions. 19 | 20 | Instantiate the proper Instruction object, then run execute, to modify 21 | emulator state corresponding to instruction execution. 22 | 23 | :param pcode: The ghidra pcode object from which to pull parameter 24 | information. 25 | :type pcode: ghidra.program.model.pcode.PcodeOp 26 | :param arch: The architecture for the instruction 27 | :type arch: Architecture 28 | """ 29 | 30 | opcode = None 31 | def __init__(self, pcode, arch): 32 | """Constructor 33 | """ 34 | self.pcode = pcode 35 | self.arch = arch 36 | 37 | def execute(self, state): 38 | """Execute an opcode and set emulator state as necessary. 39 | """ 40 | inputs = self._resolve_inputs(state) 41 | ret = self._simple_exec_handler(state, inputs) 42 | if ret is not None: 43 | self._store_in_output(state, ret) 44 | 45 | def _simple_exec_handler(self, state, inputs): 46 | """A simpler version of execution handler for implementation. 47 | 48 | :param inputs: A list of input values for the pcode 49 | :type inputs: list 50 | 51 | :raises RuntimeError: When called on an instance of the base class 52 | instead of a subclass 53 | 54 | :return: The output value from the pcode instruction, for storage 55 | in the output location 56 | :rtype: int 57 | """ 58 | raise RuntimeError("Called _simple_exec_handler on top-most class") 59 | 60 | def __str__(self): 61 | return "{}: {}".format(self.__class__.__name__, self.pcode) 62 | 63 | def _resolve_inputs(self, state): 64 | return [state.read_varnode(in_op) for in_op in self.pcode.inputs] 65 | 66 | def _store_in_output(self, state, value): 67 | return state.set_varnode(self.pcode.output, value) 68 | 69 | def _get_sign_bit(self, val, size): 70 | """Return the sign bit for a given value, for a set size and emulator 71 | register type. 72 | 73 | :param val: The value to calculate twos complement for 74 | :type val: int 75 | :param size: The number of bytes in the result 76 | :type size: int 77 | """ 78 | # TODO - better place to pull bit count from? 79 | end_bits = val >> ((size * self.arch.bits_per_byte) - 1) 80 | return end_bits & 0x1 81 | 82 | def _get_2s_comp(self, val, size): 83 | """Return the twos complement of a value, for a set size and emulator 84 | register type. 85 | 86 | :param val: The value to calculate twos complement for 87 | :type val: int 88 | :param size: The number of bytes in the result 89 | :type size: int 90 | """ 91 | return ((~val) + 1) & (2 ** (size * self.arch.bits_per_byte) - 1) 92 | 93 | 94 | class Fill_In_Inst(Instruction): 95 | opcode = -1 96 | def _simple_exec_handler(self, state, inputs): 97 | raise RuntimeError("Tried to run fill-in instruction for opcode {}" 98 | "".format(self.pcode.opcode)) 99 | 100 | class Copy(Instruction): 101 | opcode = 1 102 | def _simple_exec_handler(self, state, inputs): 103 | return inputs[0] 104 | 105 | class Load(Instruction): 106 | opcode = 2 107 | def _simple_exec_handler(self, state, inputs): 108 | return state.ram.load(inputs[1], self.pcode.inputs[1].size) 109 | 110 | class Store(Instruction): 111 | opcode = 3 112 | def _simple_exec_handler(self, state, inputs): 113 | state.ram.store(inputs[1], self.pcode.inputs[2].size, inputs[2]) 114 | return None 115 | 116 | class Branch(Instruction): 117 | opcode = 4 118 | def _simple_exec_handler(self, state, inputs): 119 | state.registers.store(self.arch.pc_offset, self.arch.reg_len, 120 | self.pcode.inputs[0].offset) 121 | return None 122 | 123 | class CBranch(Branch): 124 | opcode = 5 125 | def _simple_exec_handler(self, state, inputs): 126 | if inputs[1]: 127 | return super(CBranch, self)._simple_exec_handler(state, inputs) 128 | else: 129 | return None 130 | 131 | class BranchInd(Instruction): 132 | """Indirect branch or call. Dereference input 0, then jump to the 133 | result. Alternatively - if input 0 is an instance of CallOtherMarker, 134 | that means the program should continue on without interruption. 135 | """ 136 | opcode = 6 137 | def _simple_exec_handler(self, state, inputs): 138 | if not isinstance(inputs[0], CallOtherMarker): 139 | next_loc = inputs[0] 140 | state.registers.store(self.arch.pc_offset, self.arch.reg_len, 141 | next_loc) 142 | return None 143 | 144 | 145 | next_loc = state.ram.load(inputs[0], self.pcode.inputs[0].size) 146 | logger.debug("in {} sz {} next {}".format(inputs[0], self.pcode.inputs[0].size, next_loc)) 147 | logger.debug("ins {}".format(self.pcode.inputs)) 148 | state.registers.store(self.arch.pc_offset, self.arch.reg_len, 149 | next_loc) 150 | return None 151 | 152 | class Call(Branch): 153 | opcode = 7 154 | 155 | class CallInd(BranchInd): 156 | opcode = 8 157 | 158 | class CallOther(Instruction): 159 | """CallOther instructions are described in a couple places, and implement a number of operations like software interrupts... "userop.hh" describes them as below: 160 | "Within the raw p-code framework, the CALLOTHER opcode represents a user defined operation. At this level, the operation is just a placeholder for inputs and outputs to same black-box procedure. The first input parameter (index 0) must be a constant id associated with the particular procedure. Classes derived off of this base class provide a more specialized definition of an operation/procedure. The specialized classes are managed via UserOpManage and are associated with CALLOTHER ops via the constant id. 161 | "The derived classes can in principle implement any functionality, tailored to the architecture or program. At this base level, the only commonality is a formal \b name of the operator and its CALLOTHER index. A facility for reading in implementation details is provided via restoreXml()." 162 | 163 | "improvingDisassemblyAndDecompilation.tex" has this to say about them: "These operations show up as CALLOTHER Pcode ops in the Pcode field in the Listing. They can have inputs and outputs, but otherwise are treated as black boxes by the decompiler." 164 | 165 | In this code, we have to look up the operation in an architecture 166 | specific way, then execute it. Our return value needs to be a pointer to 167 | the next location to execute, because it will get "CallInd" executed 168 | on it. Alternatively, CallInd/BranchInd will understand an instance of 169 | CallOtherMarker being returned, and will simply continue executing code 170 | without branching, when it is found. 171 | """ 172 | opcode = 9 173 | def _simple_exec_handler(self, state, inputs): 174 | other_op = state.arch.resolve_callother(inputs[0], inputs[1]) 175 | retval = other_op(state, inputs[0], inputs[1]) 176 | return CallOtherMarker() if retval is None else retval 177 | 178 | 179 | class Return(Instruction): 180 | opcode = 10 181 | def _simple_exec_handler(self, state, inputs): 182 | return None 183 | 184 | class Int_Equal(Instruction): 185 | opcode = 11 186 | def _simple_exec_handler(self, state, inputs): 187 | return 1 if inputs[0] == inputs[1] else 0 188 | 189 | class Int_NotEqual(Instruction): 190 | opcode = 12 191 | def _simple_exec_handler(self, state, inputs): 192 | return 1 if inputs[0] != inputs[1] else 0 193 | 194 | class Int_SLess(Instruction): 195 | opcode = 13 196 | def _simple_exec_handler(self, state, inputs): 197 | in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 198 | in1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 199 | # If one is neg and one is pos, then return 1 if in0 is neg 200 | if in0_bit != in1_bit: 201 | return 1 if in0_bit == 1 else 0 202 | # Otherwise, regular inequality will work 203 | return 1 if inputs[0] < inputs[1] else 0 204 | 205 | class Int_SLessEqual(Instruction): 206 | opcode = 14 207 | def _simple_exec_handler(self, state, inputs): 208 | in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 209 | in1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 210 | # If one is neg and one is pos, then return 1 if in0 is neg 211 | if in0_bit != in1_bit: 212 | return 1 if in0_bit == 1 else 0 213 | # Otherwise, regular inequality will work 214 | return 1 if inputs[0] <= inputs[1] else 0 215 | 216 | class Int_Less(Instruction): 217 | opcode = 15 218 | def _simple_exec_handler(self, state, inputs): 219 | return 1 if inputs[0] < inputs[1] else 0 220 | 221 | class Int_LessEqual(Instruction): 222 | opcode = 16 223 | def _simple_exec_handler(self, state, inputs): 224 | return 1 if inputs[0] <= inputs[1] else 0 225 | 226 | class Int_Zext(Instruction): 227 | opcode=17 228 | def _simple_exec_handler(self, state, inputs): 229 | # Things are already zero extended... 230 | return inputs[0] 231 | 232 | class Int_Sext(Instruction): 233 | opcode=18 234 | def _simple_exec_handler(self, state, inputs): 235 | in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 236 | if in0_bit == 0: 237 | return inputs[0] 238 | new_len = self.pcode.output.size * self.arch.bits_per_byte 239 | old_len = self.pcode.inputs[0].size * self.arch.bits_per_byte 240 | extension = ((2 ** new_len) - 1) ^ ((2 ** old_len) - 1) 241 | return extension | inputs[0] 242 | 243 | class Int_Add(Instruction): 244 | opcode = 19 245 | def _simple_exec_handler(self, state, inputs): 246 | return inputs[0] + inputs[1] 247 | 248 | class Int_Sub(Instruction): 249 | opcode = 20 250 | def _simple_exec_handler(self, state, inputs): 251 | return inputs[0] - inputs[1] 252 | 253 | class Int_Carry(Instruction): 254 | # This becomes the carry flag in add (matters with unsigned ints) 255 | opcode = 21 256 | 257 | def _simple_exec_handler(self, state, inputs): 258 | input_size = self.pcode.inputs[0].size 259 | add_result = inputs[0] + inputs[1] 260 | 261 | # See if there was a carry by seeing if the add_result had more bits thn 262 | # could be stored 263 | # TODO - better place to pull bit count from? 264 | leftover = add_result >> (input_size * self.arch.bits_per_byte) 265 | return 1 if leftover > 0 else 0 266 | 267 | class Int_SCarry(Instruction): 268 | # This becomes the overflow flag in add (matters with signed ints) 269 | opcode = 22 270 | def _simple_exec_handler(self, state, inputs): 271 | input_0_carry = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 272 | input_1_carry = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 273 | 274 | # Can't have signed overflow if the inputs are of different sign 275 | if input_0_carry != input_1_carry: 276 | return 0 277 | 278 | # If they are the same sign, then the result must be too 279 | add_result = inputs[0] + inputs[1] 280 | add_result_carry = self._get_sign_bit(add_result, 281 | self.pcode.inputs[1].size) 282 | return 0 if input_0_carry == add_result_carry else 1 283 | 284 | class Int_SBorrow(Instruction): 285 | # Becomes the overflow flag in sub (matters with signed ints) 286 | # SBorrow is used to determine OF flag in x64 sub/cmp 287 | # It indicates an overflow in the signed result 288 | # Int_Less determines the CF flag, which is an overflow in the unsigned result 289 | opcode = 23 290 | def _simple_exec_handler(self, state, inputs): 291 | input_0_sign = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 292 | input_1_sign = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 293 | # No signed overflow if inputs are of same sign 294 | if input_0_sign == input_1_sign: 295 | return 0 296 | 297 | input_size = self.pcode.inputs[0].size 298 | sub_result = inputs[0] - inputs[1] 299 | 300 | res_sign = self._get_sign_bit(sub_result, input_size) 301 | 302 | # I believe this is correct now. 303 | return 1 if res_sign != input_0_sign else 0 304 | 305 | class Int_2Comp(Instruction): 306 | opcode = 24 307 | def _simple_exec_handler(self, state, inputs): 308 | # TODO: Is this correct? I think Python's gonna handle the 309 | # negation for me, correctly 310 | return -inputs[0] 311 | 312 | class Int_Negate(Instruction): 313 | opcode = 25 314 | def _simple_exec_handler(self, state, inputs): 315 | return ~inputs[0] 316 | 317 | class Int_Xor(Instruction): 318 | opcode = 26 319 | def _simple_exec_handler(self, state, inputs): 320 | return inputs[0] ^ inputs[1] 321 | 322 | class Int_And(Instruction): 323 | opcode = 27 324 | def _simple_exec_handler(self, state, inputs): 325 | return inputs[0] & inputs[1] 326 | 327 | class Int_Or(Instruction): 328 | opcode = 28 329 | def _simple_exec_handler(self, state, inputs): 330 | return inputs[0] | inputs[1] 331 | 332 | class Int_Left(Instruction): 333 | opcode = 29 334 | def _simple_exec_handler(self, state, inputs): 335 | return inputs[0] << inputs[1] 336 | 337 | class Int_Right(Instruction): 338 | opcode = 30 339 | def _simple_exec_handler(self, state, inputs): 340 | unsigned_ver = inputs[0] 341 | if inputs[0] < 0: 342 | unsigned_ver = self._get_2s_comp(abs(inputs[0]), 343 | self.pcode.inputs[0].size) 344 | return unsigned_ver >> inputs[1] 345 | 346 | class Int_SRight(Instruction): 347 | opcode = 31 348 | def _simple_exec_handler(self, state, inputs): 349 | return inputs[0] >> inputs[1] 350 | 351 | class Int_Mult(Instruction): 352 | opcode = 32 353 | def _simple_exec_handler(self, state, inputs): 354 | return inputs[0] * inputs[1] 355 | 356 | class Int_Div(Instruction): 357 | opcode = 33 358 | def _simple_exec_handler(self, state, inputs): 359 | return inputs[0] // inputs[1] 360 | 361 | class Int_SDiv(Instruction): 362 | opcode = 34 363 | def _simple_exec_handler(self, state, inputs): 364 | in_0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 365 | in_1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 366 | if in_0_bit == 0 and in_1_bit == 0: 367 | return inputs[0] // inputs[1] 368 | elif in_0_bit == 1 and in_1_bit == 1: 369 | in0_inv = self._get_2s_comp(inputs[0], self.pcode.inputs[0].size) 370 | in1_inv = self._get_2s_comp(inputs[1], self.pcode.inputs[1].size) 371 | return in0_inv // in0_inv 372 | else: 373 | if in_0_bit == 1: 374 | in0_pos = self._get_2s_comp(inputs[0], 375 | self.pcode.inputs[0].size) 376 | in1_pos = inputs[1] 377 | else: 378 | in0_pos = inputs[0] 379 | in1_pos = self._get_2s_comp(inputs[1], 380 | self.pcode.inputs[1].size) 381 | 382 | res = in0_pos // in1_pos 383 | return self._get_2s_comp(res, self.pcode.inputs[0].size) 384 | 385 | class Int_Rem(Instruction): 386 | opcode = 35 387 | def _simple_exec_handler(self, state, inputs): 388 | return inputs[0] % inputs[1] 389 | 390 | class Int_SRem(Instruction): 391 | opcode = 36 392 | def _simple_exec_handler(self, state, inputs): 393 | in_0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size) 394 | in_1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size) 395 | if in_0_bit == 0 and in_1_bit == 0: 396 | return inputs[0] % inputs[1] 397 | elif in_0_bit == 1 and in_1_bit == 1: 398 | in0_inv = self._get_2s_comp(inputs[0], self.pcode.inputs[0].size) 399 | in1_inv = self._get_2s_comp(inputs[1], self.pcode.inputs[1].size) 400 | return in0_inv % in0_inv 401 | else: 402 | if in_0_bit == 1: 403 | in0_pos = self._get_2s_comp(inputs[0], 404 | self.pcode.inputs[0].size) 405 | in1_pos = inputs[1] 406 | else: 407 | in0_pos = inputs[0] 408 | in1_pos = self._get_2s_comp(inputs[1], 409 | self.pcode.inputs[1].size) 410 | 411 | res = in0_pos % in1_pos 412 | return self._get_2s_comp(res, self.pcode.inputs[0].size) 413 | 414 | class Bool_Negate(Instruction): 415 | opcode = 37 416 | def _simple_exec_handler(self, state, inputs): 417 | return 1 if inputs[0] == 0 else 0 418 | 419 | class Bool_Xor(Instruction): 420 | opcode = 38 421 | def _simple_exec_handler(self, state, inputs): 422 | return 1 if inputs[0] != inputs[1] else 0 423 | 424 | class Bool_And(Instruction): 425 | opcode = 39 426 | def _simple_exec_handler(self, state, inputs): 427 | return 1 if inputs[0] and inputs[1] else 0 428 | 429 | class Bool_Or(Instruction): 430 | opcode = 40 431 | def _simple_exec_handler(self, state, inputs): 432 | return 1 if inputs[0] or inputs[1] else 0 433 | 434 | 435 | class Subpiece(Instruction): 436 | opcode = 63 437 | def _simple_exec_handler(self, state, inputs): 438 | return inputs[0] >> (inputs[1] * self.arch.bits_per_byte) 439 | 440 | class Popcount(Instruction): 441 | opcode = 72 442 | def _simple_exec_handler(self, state, inputs): 443 | return bin(inputs[0]).count('1') 444 | 445 | """ 446 | Opcode map 447 | (1, u'COPY') 448 | (2, u'LOAD') 449 | (3, u'STORE') 450 | (4, u'BRANCH') 451 | (5, u'CBRANCH') 452 | (6, u'BRANCHIND') 453 | (7, u'CALL') 454 | (8, u'CALLIND') 455 | (9, u'CALLOTHER') 456 | (10, u'RETURN') 457 | (11, u'INT_EQUAL') 458 | (12, u'INT_NOTEQUAL') 459 | (13, u'INT_SLESS') 460 | (14, u'INT_SLESSEQUAL') 461 | (15, u'INT_LESS') 462 | (16, u'INT_LESSEQUAL') 463 | (17, u'INT_ZEXT') 464 | (18, u'INT_SEXT') 465 | (19, u'INT_ADD') 466 | (20, u'INT_SUB') 467 | (21, u'INT_CARRY') 468 | (22, u'INT_SCARRY') 469 | (23, u'INT_SBORROW') 470 | (24, u'INT_2COMP') 471 | (25, u'INT_NEGATE') 472 | (26, u'INT_XOR') 473 | (27, u'INT_AND') 474 | (28, u'INT_OR') 475 | (29, u'INT_LEFT') 476 | (30, u'INT_RIGHT') 477 | (31, u'INT_SRIGHT') 478 | (32, u'INT_MULT') 479 | (33, u'INT_DIV') 480 | (34, u'INT_SDIV') 481 | (35, u'INT_REM') 482 | (36, u'INT_SREM') 483 | (37, u'BOOL_NEGATE') 484 | (38, u'BOOL_XOR') 485 | (39, u'BOOL_AND') 486 | (40, u'BOOL_OR') 487 | (41, u'FLOAT_EQUAL') 488 | (42, u'FLOAT_NOTEQUAL') 489 | (43, u'FLOAT_LESS') 490 | (44, u'FLOAT_LESSEQUAL') 491 | (45, u'INVALID_OP') 492 | (46, u'FLOAT_NAN') 493 | (47, u'FLOAT_ADD') 494 | (48, u'FLOAT_DIV') 495 | (49, u'FLOAT_MULT') 496 | (50, u'FLOAT_SUB') 497 | (51, u'FLOAT_NEG') 498 | (52, u'FLOAT_ABS') 499 | (53, u'FLOAT_SQRT') 500 | (54, u'INT2FLOAT') 501 | (55, u'FLOAT2FLOAT') 502 | (56, u'TRUNC') 503 | (57, u'CEIL') 504 | (58, u'FLOOR') 505 | (59, u'ROUND') 506 | (60, u'MULTIEQUAL') 507 | (61, u'INDIRECT') 508 | (62, u'PIECE') 509 | (63, u'SUBPIECE') 510 | (64, u'CAST') 511 | (65, u'PTRADD') 512 | (66, u'PTRSUB') 513 | (67, u'INVALID_OP') 514 | (68, u'CPOOLREF') 515 | (69, u'NEW') 516 | (70, u'INSERT') 517 | (71, u'EXTRACT') 518 | (72, u'POPCOUNT') 519 | """ 520 | 521 | def instruction_finder(pcode, arch): 522 | """Returns the correct instruction class for a given Ghidra pcode object. 523 | Pcode objects are returned by the "getPcode" function on instruction 524 | objects. Instruction objects are returned by the "getInstructionAt" 525 | function. 526 | 527 | :param pcode: The pcode object for which to find instructions 528 | :type pcode: ghidra.program.model.pcode.PcodeOp 529 | :param arch: The architecture for the instruction 530 | :type arch: Architecture 531 | 532 | :raises RuntimeError: Occurs when multiple implementations are found 533 | for one pcode. That indicates an implementation error. 534 | 535 | :return: An instance of one instruction class implementing the pcode 536 | input. Returns "Fill_In_Inst" when no matching instruction is found. 537 | :rtype: Instruction 538 | """ 539 | opcode = pcode.opcode 540 | inst_class_matches = [cls 541 | for cls in find_all_subclasses(Instruction) 542 | if cls.opcode == opcode] 543 | if len(inst_class_matches) > 1: 544 | raise RuntimeError("Found multiple implementations for opcode {}" 545 | "".format(opcode)) 546 | elif len(inst_class_matches) < 1: 547 | """ 548 | raise RuntimeError("Found no implementation for opcode {}" 549 | "".format(opcode)) 550 | """ 551 | return Fill_In_Inst(pcode, arch) 552 | return inst_class_matches[0](pcode, arch) 553 | --------------------------------------------------------------------------------