├── __init__.py
├── .gitignore
├── test_prgm
    ├── Makefile
    └── test.c
├── arch
    ├── x64.py
    ├── ARM.py
    ├── x86.py
    ├── __init__.py
    └── arch_class.py
├── mem_markers.py
├── LICENSE
├── utils.py
├── README.md
├── pcode_inspector.py
├── notes
    └── found_arch_strings
├── state.py
├── pcode_interpreter.py
├── mem.py
└── instr.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.class
3 | *.*.out
4 | *.pyc
5 | References/*
6 | 


--------------------------------------------------------------------------------
/test_prgm/Makefile:
--------------------------------------------------------------------------------
 1 | CC_OPTS := -Wall
 2 | 
 3 | OUT_ARCHES := x86 aarch64
 4 | 
 5 | TEST_OUTS := $(patsubst %, test.%.out, ${OUT_ARCHES})
 6 | 
 7 | all: ${TEST_OUTS}
 8 | 
 9 | %.x86.out: CC=gcc
10 | %.aarch64.out: CC=aarch64-linux-gnu-gcc
11 | 
12 | test.%.out: test.c
13 | 	${CC} ${CC_OPTS} $< -o $@
14 | 


--------------------------------------------------------------------------------
/arch/x64.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .x86 import x86
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | class x64(x86):
 8 |     LANG_DESC = "x86/.*/64/.*"
 9 |     INIT_STACK_SIZE = 0x100000
10 |     base_ptr = "RBP"
11 | 
12 |     # Must come after the functions are defined...
13 |     callother_dict = {
14 |             }
15 | 


--------------------------------------------------------------------------------
/arch/ARM.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .arch_class import Architecture
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | class ARM(Architecture):
 8 |     LANG_DESC = "ARM/.*/.*/.*"
 9 |     link_reg = "lr"
10 |     def _arch_fake_function_call(self, state, func_addr, return_addr):
11 |         # Setup LR and pcreg
12 |         state.registers.store(self.lookup_reg_offset(self.link_reg),
13 |                 self.reg_len, return_addr)
14 |         state.registers.store(self.pc_offset, self.reg_len, func_addr)
15 | 
16 |     # Must come after the functions are defined...
17 |     callother_dict = {
18 |             }
19 | 


--------------------------------------------------------------------------------
/mem_markers.py:
--------------------------------------------------------------------------------
 1 | class UniqueMarker(object):
 2 |     """Subclasses may be stored in "memory" of the Uniques.  These
 3 |     can then be used as markers for other parts of the emulator code.  Ghidra
 4 |     won't understand them, they're just for marking points in Unique memory
 5 |     between pcode operations in the same assembly instruction.
 6 |     """
 7 |     pass
 8 | 
 9 | class CallOtherMarker(UniqueMarker):
10 |     """This UniqueMarker indicates that a CallOther has occurred, and the
11 |     program should interpret the return value as a "just keep going" instead
12 |     of as a memory location of a function pointer.
13 |     """
14 |     pass
15 | 


--------------------------------------------------------------------------------
/arch/x86.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .arch_class import Architecture
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | class x86(Architecture):
 8 |     LANG_DESC = "x86/.*/32/.*"
 9 |     base_ptr = "EBP"
10 |     def setup_stack(self, state):
11 |         super(x86, self).setup_stack(state)
12 |         state.registers.store(self.lookup_reg_offset(self.base_ptr),
13 |                 self.reg_len, self.INIT_STACK_SIZE)
14 | 
15 |     def _arch_fake_function_call(self, state, func_addr, return_addr):
16 |         rsp_val = state.registers.load(self.stack_ptr_ofst, self.stack_ptr_size)
17 |         state.ram.store(rsp_val, self.reg_len, return_addr)
18 |         state.registers.store(self.pc_offset, self.reg_len, func_addr)
19 | 
20 |     def co_swi(self, state, callother_index, param):
21 |         state.registers.store(self.lookup_reg_offset("EAX"), self.reg_len, 0)
22 |         #raise RuntimeError("Called swi, not implemented!")
23 | 
24 |     # This definition must come after the functions are defined...
25 |     callother_dict = {
26 |             0xc: co_swi,
27 |             }
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Software developed by Karl Sickendick
 2 | 
 3 | *****
 4 | MIT License
 5 | 
 6 | Copyright 2019
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/arch/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | import types
 4 | 
 5 | from ..utils import find_all_subclasses
 6 | from .arch_class import LE, BE, Architecture
 7 | 
 8 | from .x86 import x86
 9 | from .x64 import x64
10 | from .ARM import ARM
11 | 
12 | def instantiate_architecture(api_base):
13 |     """Instantiate the proper architecture for the current program
14 | 
15 |     :param api_base: The object from which Ghidra has derived the flat api
16 |     :type api_base: ghidra.python.PythonScript
17 | 
18 |     :return: An appropriate architecture for the current program, or None
19 |     :rtype: Architecture or None
20 |     """
21 |     lang_desc = str(api_base.getCurrentProgram().getLanguage().
22 |             getLanguageDescription())
23 |     def yield_arch_match():
24 |         for arch in find_all_subclasses(Architecture):
25 |             logging.debug("Arch lang desc {} prgm {}".format(
26 |                     arch.LANG_DESC, lang_desc)
27 |                 )
28 |             try:
29 |                 if re.match(arch.LANG_DESC, lang_desc):
30 |                     yield arch(api_base)
31 |             except TypeError as e:
32 |                 # Occurs when an architecture has None as it's LANG_DESC
33 |                 pass
34 | 
35 |     matching_arches = [mtch for mtch in yield_arch_match()]
36 |     if len(matching_arches) != 1:
37 |         logging.error("Found wrong number of architecture matches: {}"
38 |                 "".format(matching_arches))
39 |         return None
40 |     return matching_arches[0]
41 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import itertools as it
 2 | 
 3 | def get_api_base(api_func):
 4 |     """Return the API base for Ghidra's flat API
 5 | 
 6 |     :param api_func: Any function in Ghidra's flat API - eg. getInstructionAt
 7 |     :type: function
 8 | 
 9 |     :return: The api base
10 |     :rtype: ghidra.python.PythonScript
11 |     """
12 |     return api_func.__self__
13 | 
14 | def find_all_subclasses(parent):
15 |     """Return all descendents of the parent class.  This requires all
16 |     subclasses to be part of the calling scope.
17 | 
18 |     :param parent: The parent class
19 |     :type parent: class
20 | 
21 |     :return: A set of all subclasses
22 |     :rtype: set
23 |     """
24 |     # Iteratively expand parent's subclasses, then drop parent
25 |     all_insts = {parent}
26 |     prev_size = 0
27 |     while prev_size != len(all_insts):
28 |         prev_size = len(all_insts)
29 |         all_insts |= set(it.chain.from_iterable(inst.__subclasses__() 
30 |                 for inst in all_insts))
31 | 
32 |     all_insts.discard(parent)
33 |     return all_insts
34 | 
35 | def get_func_extents(func):
36 |     addr_set = func.getBody()
37 |     min_addr, max_addr = addr_set.getMinAddress(), addr_set.getMaxAddress()
38 |     return min_addr, max_addr
39 | 
40 | def format_loc(api_base, addr_int):
41 |     func = api_base.getFunctionContaining(api_base.toAddr(addr_int))
42 |     func_st_addr, _ = get_func_extents(func)
43 |     func_st = func_st_addr.offset
44 |     loc_diff = addr_int - func_st
45 |     return "{}+0x{:x}(0x{:x})".format(func.name, loc_diff, addr_int)
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | This is a PCode emulator for Ghidra.
 3 | 
 4 | # Apologies
 5 | Listen - this is kinda rough.  It works though!  I'm a little embarrased about the quality of documentation and completeness at the time of release.  This currently works best on x64, x86, and ARM architectures in Ghidra.  It's not tough to add other architectures, I need to implement the initial function call environment for each though and haven't done it.  There are some PCode opcodes not yet implemented - most notably the float operations.  If you needed that I'm sorry, it's on the list of stuff to do.  It needs a testing framework and documentation building.
 6 | 
 7 | So, you know, I'm a pro.  This bugs me.  But the day of the talk is here and therefore the time to publish this code is now.
 8 | 
 9 | # Installation
10 | From the source directory here...
11 | 
12 | ```
13 | mkdir "$HOME/ghidra_scripts"
14 | ln -s "$PWD" "$HOME/ghidra_scripts/ghidra_pcode_interpreter"
15 | ln -s "$PWD/pcode_interpreter.py" "$HOME/ghidra_scripts/pcode_interpreter.py"
16 | ln -s "$PWD/pcode_inspector.py" "$HOME/ghidra_scripts/pcode_inspector.py"
17 | ```
18 | 
19 | # Usage
20 | Refresh your script list in Ghidra.  Scroll down to the PCode category.  Select the function you want to execute in the decompiler or program listing window.  Make sure you've committed your function prototype (right click in the decompiler and click "Commit Params/Return").  Then double click the `pcode_interpreter.py` script.
21 | 
22 | Logging currently gets output both to your Ghidra console, but also `/tmp/pcode_interpret.log`.  If you're on a multiuser system please be aware of this temp logging location...  Also, the temp log is a debug log, so it can grow quite large.  It's overwritten each run.
23 | 
24 | # More Info
25 | My Saintcon 2019 talk on this is at https://github.com/kc0bfv/Saintcon2019GhidraTalk
26 | 


--------------------------------------------------------------------------------
/pcode_inspector.py:
--------------------------------------------------------------------------------
 1 | #Prints information about pcode.  Click on a function in the decompiler window, run this.
 2 | #@author Karl Sickendick kc0bfv@gmail.com
 3 | #@category PCode
 4 | #@keybinding 
 5 | #@menupath 
 6 | #@toolbar 
 7 | 
 8 | 
 9 | from __future__ import print_function
10 | 
11 | import logging
12 | 
13 | from ghidra_pcode_interpreter.mem import InvalidAddrException
14 | from ghidra_pcode_interpreter.state import State
15 | from ghidra_pcode_interpreter.utils import get_api_base, get_func_extents
16 | 
17 | logger = logging.getLogger(__name__)
18 | logging.basicConfig(level=logging.DEBUG)
19 | 
20 | def print_pcode_info(func, state, stop_addr):
21 |     cur_loc = state.get_pc()
22 |     while cur_loc <= stop_addr:
23 |         logging.info("Current location: 0x{:x}".format(cur_loc))
24 |         try:
25 |             cur_loc = state.inspect_cur_location()
26 |         except InvalidAddrException as e:
27 |             logging.info("No code at location")
28 |             state.set_pc(state.get_pc() + 1)
29 |             cur_loc = state.get_pc()
30 | 
31 | def main():
32 |     logging.basicConfig(level=logging.DEBUG)
33 |     curr_addr = 0
34 |     if currentLocation is None:
35 |         curr_addr = askAddress("Starting Address", "Provide starting address:")
36 |     else:
37 |         curr_addr = currentLocation.address
38 | 
39 |     # Build the emulator state
40 |     state = State(get_api_base(getInstructionAt))
41 | 
42 |     # Determine the function of concern
43 |     containing_func = None
44 |     try:
45 |         containing_func = getFunctionContaining(curr_addr)
46 |     except:
47 |         pass
48 |     if containing_func is None:
49 |         logger.error("Could not get containing function for selection")
50 |         exit(1)
51 | 
52 |     # Print some function info
53 |     start_point, func_end = get_func_extents(containing_func)
54 |     logger.debug("Func body {} - {}".format(start_point, func_end))
55 | 
56 |     state.setup_stack()
57 |     state.fake_function_call(start_point.offset)
58 | 
59 |     # Print state and architecture information
60 |     logging.info("State info: {}".format(state))
61 |     logging.info("Architecture info: {}".format(state.arch))
62 | 
63 |     # Print some parameter info
64 |     params = containing_func.getParameters()
65 |     logger.info("Parameter Information")
66 |     for param in params:
67 |         logger.info("Paramter ordinal {} storage {} varnode {}".format(
68 |                 param.getOrdinal(), param.getVariableStorage(),
69 |                 param.getFirstStorageVarnode())
70 |             )
71 | 
72 |     print_pcode_info(containing_func, state, func_end.offset)
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     main()
77 | 


--------------------------------------------------------------------------------
/notes/found_arch_strings:
--------------------------------------------------------------------------------
  1 | id="PIC-18:LE:24:PIC-18">
  2 | id="PIC-16:LE:16:PIC-16">
  3 | id="PIC-16:LE:16:PIC-16F">
  4 | id="PIC-16:LE:16:PIC-16C5x">
  5 | id="PIC-12:LE:16:PIC-12C5xx">
  6 | id="PIC-17:LE:16:PIC-17C7xx">
  7 | id="PIC-24E:LE:24:default">
  8 | id="PIC-24F:LE:24:default">
  9 | id="PIC-24H:LE:24:default">
 10 | id="dsPIC30F:LE:24:default">
 11 | id="dsPIC33F:LE:24:default">
 12 | id="dsPIC33E:LE:24:default">
 13 | id="TI_MSP430:LE:16:default">
 14 | id="TI_MSP430X:LE:32:default">
 15 | id="8085:LE:16:default">
 16 | id="Toy:BE:32:default">
 17 | id="Toy:BE:32:posStack">
 18 | id="Toy:LE:32:default">
 19 | id="Toy:BE:32:wordSize2">
 20 | id="Toy:LE:32:wordSize2">
 21 | id="Toy:BE:64:default">
 22 | id="Toy:LE:64:default">
 23 | id="Toy:BE:32:builder">
 24 | id="Toy:LE:32:builder">
 25 | id="Toy:BE:32:builder.align2">
 26 | id="Toy:LE:32:builder.align2">
 27 | id="MIPS:BE:32:default">
 28 | id="MIPS:LE:32:default">
 29 | id="MIPS:BE:32:R6">
 30 | id="MIPS:LE:32:R6">
 31 | id="MIPS:BE:64:default">
 32 | id="MIPS:LE:64:default">
 33 | id="MIPS:BE:64:micro">
 34 | id="MIPS:LE:64:micro">
 35 | id="MIPS:BE:64:R6">
 36 | id="MIPS:LE:64:R6">
 37 | id="MIPS:BE:64:64-32addr">
 38 | id="MIPS:LE:64:64-32addr">
 39 | id="MIPS:LE:64:micro64-32addr">
 40 | id="MIPS:BE:64:micro64-32addr">
 41 | id="MIPS:BE:64:64-32R6addr">
 42 | id="MIPS:LE:64:64-32R6addr">
 43 | id="MIPS:BE:32:micro">
 44 | id="MIPS:LE:32:micro">
 45 | id="6805:BE:16:default">
 46 | id="DATA:LE:64:default">
 47 | id="DATA:BE:64:default">
 48 | id="avr8:LE:16:default">
 49 | id="avr8:LE:16:extended">
 50 | id="avr8:LE:16:atmega256">
 51 | id="avr32:BE:32:default">
 52 | id="z80:LE:16:default">
 53 | id="z8401x:LE:16:default">
 54 | id="z180:LE:16:default">
 55 | id="z182:LE:16:default">
 56 | id="68000:BE:32:default">
 57 | id="68000:BE:32:MC68030">
 58 | id="68000:BE:32:MC68020">
 59 | id="68000:BE:32:Coldfire">
 60 | id="JVM:BE:32:default">
 61 | id="ARM:LE:32:v8">
 62 | id="ARM:LEBE:32:v8LEInstruction">
 63 | id="ARM:BE:32:v8">
 64 | id="ARM:LE:32:v7">
 65 | id="ARM:LEBE:32:v7LEInstruction">
 66 | id="ARM:BE:32:v7">
 67 | id="ARM:LE:32:Cortex">
 68 | id="ARM:BE:32:Cortex">
 69 | id="ARM:LE:32:v6">
 70 | id="ARM:BE:32:v6">
 71 | id="ARM:LE:32:v5t">
 72 | id="ARM:BE:32:v5t">
 73 | id="ARM:LE:32:v5">
 74 | id="ARM:BE:32:v5">
 75 | id="ARM:LE:32:v4t"> 
 76 | id="ARM:BE:32:v4t">
 77 | id="ARM:LE:32:v4">
 78 | id="ARM:BE:32:v4">
 79 | id="6502:LE:16:default">
 80 | id="6502:BE:16:default">
 81 | id="PowerPC:BE:32:default">
 82 | id="PowerPC:LE:32:default">
 83 | id="PowerPC:BE:64:default">
 84 | id="PowerPC:BE:64:64-32addr">
 85 | id="PowerPC:LE:64:64-32addr">
 86 | id="PowerPC:LE:64:default">
 87 | id="PowerPC:BE:32:4xx">
 88 | id="PowerPC:LE:32:4xx">
 89 | id="PowerPC:BE:32:MPC8270">
 90 | id="PowerPC:BE:32:QUICC">
 91 | id="PowerPC:LE:32:QUICC">
 92 | id="PowerPC:BE:64:A2-32addr">
 93 | id="PowerPC:LE:64:A2-32addr">
 94 | id="PowerPC:BE:64:A2ALT-32addr">
 95 | id="PowerPC:LE:64:A2ALT-32addr">
 96 | id="PowerPC:BE:64:A2ALT">
 97 | id="PowerPC:LE:64:A2ALT">
 98 | id="PowerPC:BE:64:VLE-32addr">
 99 | id="PowerPC:BE:64:VLEALT-32addr">
100 | id="x86:LE:32:default">
101 | id="x86:LE:32:System Management Mode">
102 | id="x86:LE:16:Real Mode">
103 | id="x86:LE:64:default">
104 | id="pa-risc:BE:32:default">
105 | id="8051:BE:16:default">
106 | id="80251:BE:24:default">
107 | id="80390:BE:24:default">
108 | id="8051:BE:24:mx51">
109 | id="AARCH64:LE:64:v8A">
110 | id="AARCH64:BE:64:v8A">
111 | id="sparc:BE:32:default">
112 | id="sparc:BE:64:default">
113 | id="CR16AB:LE:16:default">
114 | id="CR16C:LE:16:default">
115 | 


--------------------------------------------------------------------------------
/state.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .mem import Ram, Registers, Uniques
 4 | from .arch import instantiate_architecture
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | class State(object):
 9 |     def __init__(self, api_base):
10 |         self.api_base = api_base
11 |         self.arch = instantiate_architecture(self.api_base)
12 |         if self.arch is None:
13 |             raise RuntimeError("No supported architectures found")
14 | 
15 |         self.registers = Registers(self.api_base, self.arch)
16 |         self.ram = Ram(self.api_base, self.arch)
17 |         self.uniques = None
18 | 
19 |     def execute_cur_location(self):
20 |         return self._step_cur_loc(True)
21 | 
22 |     def inspect_cur_location(self):
23 |         return self._step_cur_loc(False)
24 | 
25 |     def _step_cur_loc(self, do_execute):
26 |         self.uniques = Uniques(self.api_base, self.arch)
27 | 
28 |         # Get the instructions and instruction size
29 |         instrs, instr_size = self.ram.get_code(self.get_pc())
30 | 
31 |         # Step the prog counter before any branches might update it
32 |         self.step_pc()
33 | 
34 |         # Execute each instruction
35 |         run_type = "Executing" if do_execute else "Instruction"
36 |         log_type = logger.debug if do_execute else logger.info
37 |         for instr in instrs:
38 |             log_type("{} {}".format(run_type, instr))
39 |             if do_execute:
40 |                 instr.execute(self)
41 |             logger.debug(self)
42 | 
43 |         return self.get_pc()
44 | 
45 |     def __str__(self):
46 |         return "Registers {}\nRam {}\nUniques {}".format(self.registers,
47 |                 self.ram, self.uniques)
48 | 
49 |     def get_pc(self):
50 |         return self.registers.load(self.arch.pc_offset, self.arch.reg_len)
51 | 
52 |     def set_pc(self, location):
53 |         self.registers.store(self.arch.pc_offset, self.arch.reg_len, location)
54 | 
55 |     def step_pc(self):
56 |         cur_loc = self.get_pc()
57 |         _, instr_size = self.ram.get_code(cur_loc)
58 |         self.set_pc(cur_loc + instr_size)
59 | 
60 |     def set_varnode(self, varnode, value):
61 |         if varnode.isRegister():
62 |             self.registers.store(varnode.offset, varnode.size, value)
63 |         elif varnode.isUnique():
64 |             self.uniques.store(varnode.offset, varnode.size, value)
65 |         elif varnode.isAddress():
66 |             self.ram.store(varnode.offset, varnode.size, value)
67 |         elif varnode.getAddress().isStackAddress():
68 |             addr = self.arch.resolve_stack_address(self, varnode.offset)
69 |             self.ram.store(addr, varnode.size, value)
70 |         else:
71 |             raise RuntimeError("Invalid varnode for setting: {}"
72 |                     "".format(varnode))
73 | 
74 |     def read_varnode(self, varnode):
75 |         if varnode.isRegister():
76 |             return self.registers.load(varnode.offset, varnode.size)
77 |         elif varnode.isUnique():
78 |             return self.uniques.load(varnode.offset, varnode.size)
79 |         elif varnode.isAddress():
80 |             return self.ram.load(varnode.offset, varnode.size)
81 |         elif varnode.isConstant():
82 |             return varnode.offset
83 |         elif varnode.getAddress().isStackAddress():
84 |             addr = self.arch.resolve_stack_address(self, varnode.offset)
85 |             return self.ram.load(addr, varnode.size)
86 |         else:
87 |             raise RuntimeError("Unknown varnode type: {}".format(varnode))
88 | 
89 |     def setup_stack(self):
90 |         self.arch.setup_stack(self)
91 | 
92 |     def fake_function_call(self, func_addr):
93 |         self.arch.fake_function_call(self, func_addr)
94 | 


--------------------------------------------------------------------------------
/pcode_interpreter.py:
--------------------------------------------------------------------------------
  1 | #Emulates pcode execution.  Click on a function in the decompiler window, provide the initial parameters, and go!
  2 | #@author Karl Sickendick kc0bfv@gmail.com
  3 | #@category PCode
  4 | #@keybinding 
  5 | #@menupath 
  6 | #@toolbar 
  7 | 
  8 | 
  9 | from __future__ import print_function
 10 | 
 11 | import code
 12 | import logging
 13 | 
 14 | from ghidra_pcode_interpreter.state import State
 15 | from ghidra_pcode_interpreter.utils import get_api_base, get_func_extents, \
 16 |         format_loc
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | PRINT_LOCATION_EVERY = 100
 21 | 
 22 | def get_parameters(func):
 23 |     def get_param_val(param):
 24 |         ret = None
 25 |         while ret is None:
 26 |             ret = askInt("Parameter Entry",
 27 |                     "Specify integer value for parameter "
 28 |                     "{} {} type {} ".format(
 29 |                         param.getOrdinal(), param.getName(),
 30 |                         param.getDataType()
 31 |                         )
 32 |                     )
 33 |         return ret
 34 | 
 35 |     params = func.getParameters()
 36 |     if len(params) == 0:
 37 |         logger.warn("No parameters - did you commit the locals/params for "
 38 |                 "the func?")
 39 | 
 40 |     return [(param, get_param_val(param)) for param in params]
 41 | 
 42 | def run_pcode(func, state, stop_addr):
 43 |     cur_loc = state.get_pc()
 44 |     api_base = get_api_base(getFunctionAt)
 45 |     index = 0
 46 |     while cur_loc != stop_addr:
 47 |         if index % PRINT_LOCATION_EVERY == 0:
 48 |             logging.info("Current location: {}".format(
 49 |                     format_loc(api_base, cur_loc))
 50 |                     )
 51 |         index += 1
 52 |         #setCurrentLocation(toAddr(cur_loc)) // Really slows things down
 53 |         cur_loc = state.execute_cur_location()
 54 |         logging.debug("State: {}".format(state))
 55 | 
 56 | def analyze_func_at(addr):
 57 |     """Emulate the function containing an address
 58 | 
 59 |     :param addr: An address in the function of interest
 60 |     :type addr: int
 61 |     """
 62 |     # Setup necessary emulator state 
 63 |     # TODO Move this somewhere better
 64 |     state = State(get_api_base(getInstructionAt))
 65 |     state.ram.store(0x08, 8, 0x0000000000414141)
 66 |     state.ram.store(0x10, 8, 0x0000000000333231)
 67 |     state.ram.store(0x18, 8, 0x0000000000000000)
 68 |     state.ram.store(0x20, 8, 0x0041414141414141)
 69 | 
 70 |     # Find the function surrounding addr
 71 |     containing_func = None
 72 |     try:
 73 |         containing_func = getFunctionContaining(addr)
 74 |     except:
 75 |         pass
 76 |     if containing_func is None:
 77 |         logger.error("Could not get containing function for selection")
 78 |         return
 79 | 
 80 |     # Input and store the function parameters
 81 |     param_inputs = get_parameters(containing_func)
 82 |     for param, param_val in param_inputs:
 83 |         param_vn = param.getFirstStorageVarnode()
 84 |         state.set_varnode(param_vn, param_val)
 85 | 
 86 |     start_point, func_end = get_func_extents(containing_func)
 87 |     logger.debug("Func body {} - {}".format(start_point, func_end))
 88 | 
 89 |     # Emulate the conditions of a function call
 90 |     state.setup_stack()
 91 |     state.fake_function_call(start_point.offset)
 92 | 
 93 |     # Run the code in the function
 94 |     run_pcode(containing_func, state, state.arch.sentinel_return_addr)
 95 | 
 96 |     # Read the return value
 97 |     return_obj = containing_func.getReturn()
 98 |     return_varnode = return_obj.getFirstStorageVarnode()
 99 |     orig_outval = state.read_varnode(return_varnode)
100 | 
101 |     # Determine if output should be interpreted as signed
102 |     interpret_as_signed = False
103 |     try:
104 |         interpret_as_signed = return_obj.getDataType().isSigned()
105 |     except:
106 |         pass
107 | 
108 |     # Interpret outval as signed if necessary
109 |     outval = orig_outval
110 |     if interpret_as_signed:
111 |         bit_count = return_varnode.size * state.arch.bits_per_byte
112 |         sign = (outval >> (bit_count - 1)) & 1
113 |         if sign == 1:
114 |             outval = -((~outval & (2**64 - 1)) + 1)
115 | 
116 |     logger.info("Final state: {}".format(state))
117 |     logger.info("Output value: {} or 0x{:x}".format(outval, orig_outval))
118 | 
119 | def main():
120 |     logging.basicConfig(level=logging.DEBUG, 
121 |             filename="/tmp/pcode_interpret.log", filemode="w")
122 |     l_sh = logging.StreamHandler()
123 |     l_sh.setLevel(logging.INFO)
124 |     logging.getLogger('').addHandler(l_sh)
125 | 
126 |     curr_addr = 0
127 |     if currentLocation is None:
128 |         curr_addr = askAddress("Starting Address", "Provide starting address:")
129 |     else:
130 |         curr_addr = currentLocation.address
131 | 
132 |     analyze_func_at(curr_addr)
133 | 
134 | if __name__ == "__main__":
135 |     main()
136 | 


--------------------------------------------------------------------------------
/mem.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | import logging
  3 | 
  4 | from ghidra.program.model.mem import MemoryAccessException
  5 | 
  6 | from .instr import instruction_finder
  7 | from .arch import LE, BE
  8 | from .mem_markers import UniqueMarker
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | class InvalidAddrException(Exception):
 13 |     pass
 14 | 
 15 | class NotCodeException(Exception):
 16 |     pass
 17 | 
 18 | class InvalidRegException(Exception):
 19 |     pass
 20 | 
 21 | class CodeWord(list):
 22 |     """
 23 |     This class represents a memory location containing code.  It will
 24 |     store a set of PCODE instruction classes.
 25 | 
 26 |     Note that this implementation leads to a serious deficiency - code and
 27 |     data are not interchangable here!  For instance - if code is
 28 |     self-modifying then the modified result will not be interpreted as code
 29 |     by the emulator, and the emulator will die when it tries to execute
 30 |     that (see Ram's get_code for that...).  Of course - in order to interpret
 31 |     self-modifying code, we also need a way to turn that code into PCODE,
 32 |     which is beyond the scope of this software, therefore, this serious
 33 |     deficiency is reasonable.
 34 |     """
 35 |     pass
 36 | 
 37 | class MemChunk(defaultdict):
 38 |     """
 39 |     This is intended to be an implementation of both reg and ram
 40 |     """
 41 |     def __init__(self, api_base, arch):
 42 |         super(MemChunk, self).__init__(int)
 43 | 
 44 |         self.arch = arch
 45 |         self.api_base = api_base
 46 | 
 47 |     def _validate_code(self, addr):
 48 |         addr = hex(addr)
 49 |         if addr.endswith("L"):
 50 |             addr = addr[:-1]
 51 |         return self.api_base.getInstructionAt(
 52 |                 self.api_base.toAddr(addr)) is not None
 53 | 
 54 |     def _validate_writeable(self, addr):
 55 |         # TODO: check for actual writeableness, maybe
 56 |         return not self._validate_code(addr)
 57 | 
 58 |     def store(self, address, length, value):
 59 |         """Store a value at the given address, of "length" bytes, with
 60 |         endianness matching the architecture
 61 | 
 62 |         :param address: The index at which to store value
 63 |         :type addres: int
 64 |         :param length: The number of bytes over which to store value
 65 |         :type length: int
 66 |         :param value: The value to store
 67 |         :type value: int
 68 | 
 69 |         :raises InvalidAddrException: When the address was not writeable
 70 |         """
 71 |         if not self._validate_writeable(address):
 72 |             raise InvalidAddrException("Tried write at non-writeable spot")
 73 | 
 74 |         address = long(address)
 75 |         value = long(value)
 76 | 
 77 |         # Note that python handles bitwise operation on negative numbers as
 78 |         # 2s complement and like there are an infinite number of 1's in
 79 |         # front of the most significant bit.
 80 | 
 81 |         # This means that the below operations are already sign extended,
 82 |         # and this is what we'd expect a processor to do.
 83 | 
 84 |         # Thus - negative numbers just work.
 85 |         cur_val = value
 86 |         for ind in range(length):
 87 |             if self.arch.endian is LE:
 88 |                 st_loc = address + ind
 89 |             else:
 90 |                 st_loc = address + ((length - 1) - ind)
 91 |             self[st_loc] = cur_val & (2**self.arch.bits_per_byte - 1)
 92 |             cur_val >>= self.arch.bits_per_byte
 93 | 
 94 |     def load_byte(self, address):
 95 |         """Load just one byte from address
 96 |         """
 97 |         return self[address]
 98 | 
 99 |     def load(self, address, length):
100 |         """Load a value from the given address, of "length" bytes, with
101 |         endianness matching the architecture.
102 | 
103 |         :param address: The index from which to load
104 |         :type addres: int
105 |         :param length: The number of bytes to load
106 |         :type length: int
107 | 
108 |         :return: The value loaded
109 |         :rtype: int
110 |         """
111 |         address = long(address)
112 | 
113 |         cur_val = 0
114 |         for ind in range(length):
115 |             if self.arch.endian is LE:
116 |                 st_loc = address + ind
117 |             else:
118 |                 st_loc = address + ((length - 1) - ind)
119 |             one_byte = self.load_byte(st_loc) % 256
120 |             cur_val += one_byte << (ind * self.arch.bits_per_byte)
121 |         return long(cur_val)
122 | 
123 |     def __str__(self):
124 |         sorted_keys = sorted(self.keys())
125 |         return ", ".join("0x{:x}: {}".format(key, hex(self[key])) for key in sorted_keys)
126 | 
127 | class Registers(MemChunk):
128 |     def _validate_writeable(self, addr):
129 |         # This assumes that Ghidra will only try to write to writeable
130 |         # registers.
131 |         return True
132 | 
133 |     def __str__(self):
134 |         def fmt_key(key):
135 |             reg = None
136 |             try:
137 |                 # Get the register object if possible
138 |                 reg = self.arch.lookup_reg_by_offset(key)
139 |             except IndexError as e:
140 |                 logging.debug("Register not found {} {}".format(key, e))
141 | 
142 |             reg_size = 1
143 |             reg_name = key
144 |             if reg is not None:
145 |                 reg_size = reg.getMinimumByteSize()
146 |                 reg_name = "{}({:x})".format(reg.name, key)
147 |             out_txt = "{}: 0x{:x}".format(reg_name, self.load(key, reg_size))
148 |             used_keys = set(range(key, key + reg_size))
149 |             return out_txt, used_keys
150 |         sorted_keys = sorted(self.keys())
151 |         vals = list()
152 |         all_used_keys = set()
153 |         for key in sorted_keys:
154 |             if key in all_used_keys:
155 |                 continue
156 |             out_txt, used_keys = fmt_key(key)
157 |             all_used_keys = all_used_keys.union(used_keys)
158 |             vals.append(out_txt)
159 |         return ", ".join(vals)
160 | 
161 | class Uniques(MemChunk):
162 |     def _validate_writeable(self, addr):
163 |         # This assumes that Ghidra will only try to write to writeable
164 |         # uniques.
165 |         return True
166 | 
167 |     def store(self, address, length, value):
168 |         """Store a value just like for the parent class, however, if
169 |         the value is a UniqueMarker instance, store it as a special case 
170 |         at only the address.
171 |         """
172 |         if isinstance(value, UniqueMarker):
173 |             self[address] = value
174 |         else:
175 |             super(Uniques, self).store(address, length, value)
176 |     def load(self, address, length):
177 |         """Load a value just like for the parent class, however, if
178 |         the value is a UniqueMarker instance return only it.
179 |         """
180 |         if isinstance(self[address], UniqueMarker):
181 |             return self[address]
182 |         else:
183 |             return super(Uniques, self).load(address, length)
184 | 
185 | class Ram(MemChunk):
186 |     def load_byte(self, address):
187 |         if address in self:
188 |             return self[address]
189 |         else:
190 |             try:
191 |                 # It handles 64 bit values better when they're hex strings
192 |                 # without an L at the end
193 |                 addr = self.api_base.toAddr(hex(long(address))[:-1])
194 |                 return self.api_base.getByte(addr)
195 |             except MemoryAccessException as e:
196 |                 logger.debug("mem access except")
197 |                 return 0
198 | 
199 |     def get_code(self, address):
200 |         if not self._validate_code(address):
201 |             raise InvalidAddrException("No code at address")
202 | 
203 |         inst = self.api_base.getInstructionAt(self.api_base.toAddr(address))
204 |         inst_size = inst.length
205 |         pcodes = inst.getPcode()
206 |         
207 |         instrs = [instruction_finder(pcode, self.arch) for pcode in pcodes]
208 | 
209 |         return instrs, inst_size
210 | 


--------------------------------------------------------------------------------
/arch/arch_class.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | import types
  4 | 
  5 | from ghidra.program.util import VarnodeContext
  6 | 
  7 | from ..utils import find_all_subclasses, get_api_base
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | LE = "LittleEndian"
 12 | BE = "BigEndian"
 13 | 
 14 | class Architecture(object):
 15 |     # Set LANG_DESC to the language description in Ghidra appropriate to
 16 |     # the architecture in question.
 17 |     LANG_DESC = None
 18 |     # Set callother_dict in subclasses to associate callother function
 19 |     # implementations with a callother index
 20 |     callother_dict = None
 21 | 
 22 |     INIT_STACK_SIZE = 0x100000
 23 | 
 24 |     def __init__(self, api_base):
 25 |         self.api_base = api_base
 26 |         self.reg_offset_lookedup = dict()
 27 | 
 28 |         # Determine settings of the current architecture
 29 |         self.cur_prog = self.api_base.getCurrentProgram()
 30 |         self.lang = self.cur_prog.getLanguage()
 31 |         self.lang_desc = self.lang.getLanguageDescription()
 32 | 
 33 |         # Processor properties
 34 |         self.proc = self.lang_desc.getProcessor()
 35 |         self.proc_size = self.lang_desc.getSize()
 36 | 
 37 |         # Properties of the program counter
 38 |         self.pc = self.lang.getProgramCounter()
 39 |         self.pc_offset = self.pc.getOffset()
 40 |         self.pc_byte_size = self.pc.getMinimumByteSize()
 41 |         self.pc_bit_count = self.pc.getBitLength()
 42 |         self.pc_bits_per_byte = self.pc_bit_count // self.pc_byte_size
 43 | 
 44 |         # Register properties
 45 |         self.endian = BE if self.lang.isBigEndian() else LE
 46 |         self.reg_len = self.pc_byte_size
 47 |         self.bits_per_byte = self.pc_bits_per_byte
 48 | 
 49 |         # This address stands in as a return address for the original function
 50 |         # call.  Therefore - it should be a return address that will never 
 51 |         # appear during legitimate program execution.  That may be
 52 |         # architecture dependent...
 53 |         self.sentinel_return_addr = 0xff * self.reg_len
 54 | 
 55 |         # Determine how the stack is recorded
 56 |         self.varnode_context = VarnodeContext(
 57 |                 self.cur_prog,
 58 |                 self.cur_prog.getProgramContext(),
 59 |                 self.cur_prog.getProgramContext())
 60 |         self.stack_varnode = self.varnode_context.getStackVarnode()
 61 |         self.stack_ptr_ofst = self.stack_varnode.offset
 62 |         self.stack_ptr_size = self.stack_varnode.size
 63 | 
 64 |     def lookup_reg_offset(self, reg_name):
 65 |         """Lookup Ghidra's offset for a register.
 66 | 
 67 |         :param reg_name: The register name to lookup
 68 |         :type reg_name: string
 69 | 
 70 |         :raises RuntimeError: When an invalid register name is provided
 71 |         
 72 |         :return: The offset of the register in the registers space
 73 |         :rtype: int
 74 |         """
 75 |         try:
 76 |             return self.lang.getRegister(reg_name).getOffset()
 77 |         except AttributeError as e:
 78 |             raise RuntimeError("Invalid register name {}".format(reg_name))
 79 | 
 80 |     def lookup_reg_by_offset(self, offset):
 81 |         """Lookup the matching register name based on Ghidra's offset
 82 | 
 83 |         :param offset: The register offset to lookup
 84 |         :type offset: int
 85 | 
 86 |         :raises IndexError: When the offset was not valid for registers
 87 | 
 88 |         :return: The register name corresponding to offset
 89 |         :rtype: string
 90 |         """
 91 |         if offset not in self.reg_offset_lookedup:
 92 |             # Multiple regs might have the same offset, e.g. EIP and RIP
 93 |             matching_regs = [reg for reg in self.lang.getRegisters()
 94 |                     if long(reg.getOffset()) == long(offset)]
 95 |             # We want to return the highest parent reg that has same offset
 96 |             best_match = matching_regs[0]
 97 |             next_try = best_match.getParentRegister()
 98 |             while next_try is not None and next_try.getOffset() == offset:
 99 |                 best_match = next_try
100 |                 next_try = best_match.getParentRegister()
101 |             self.reg_offset_lookedup[offset] = best_match
102 | 
103 |         return self.reg_offset_lookedup[offset]
104 | 
105 |     def return_callother_names(self):
106 |         """Return a list of callother operation names for the architecture.
107 |         This is more a way of learning about Ghidra, which has an internal
108 |         list of these, with each name corresponding to an index.  The indexes
109 |         of the names in the list this function returns will correspond with
110 |         the indexes in Ghidra.
111 | 
112 |         :return: List of callother names, ordered/indexed same as in Ghidra
113 |         :rtype: list
114 |         """
115 |         udopcnt = self.lang.getNumberOfUserDefinedOpNames()
116 |         return [self.lang.getUserDefinedOpName(ind) for ind in range(udopcnt)]
117 |         
118 |     def setup_stack(self, state):
119 |         """Setup the stack registers as necessary.
120 | 
121 |         The base class version only sets the stack pointer.
122 |         """
123 |         state.registers.store(self.stack_ptr_ofst,
124 |                 self.stack_ptr_size, self.INIT_STACK_SIZE)
125 | 
126 |     def fake_function_call(self, state, func_addr, return_addr = None):
127 |         """Setup the stack and registers to fake a function call
128 |         """
129 |         if return_addr is None:
130 |             return_addr = self.sentinel_return_addr
131 |         self._arch_fake_function_call(state, func_addr, return_addr)
132 | 
133 |     def _arch_fake_function_call(self, state, func_addr, return_addr):
134 |         raise RuntimeError("Called fake_function_call on base class")
135 | 
136 |     def resolve_stack_address(self, state, stack_offset):
137 |         """Resolve a stack offset into a RAM address.
138 |         """
139 |         base = state.registers.load(self.stack_ptr_ofst, self.stack_ptr_size)
140 |         return base + stack_offset
141 | 
142 |     def resolve_callother(self, callother_index, param):
143 |         """Return the callother function that corresponds to callother_index
144 |         and, optionally, param.  This version assumes that the architecture
145 |         subclasses have a dictionary/list called callother_dict with
146 |         indexes as keys and architecture class functions as values.
147 | 
148 |         :param callother_index: Ghidra's index of the callother
149 |         :type callother_index: int
150 |         :param param: The parameter Ghidra specifies for the callother
151 |         :type param: int
152 | 
153 |         :raises RuntimeError: When there's no existing callother_index
154 | 
155 |         :return: The function corresponding to callother_index and param.  The
156 |                 function must take as arguments the program state, the
157 |                 callother_index, and parameter.  It must return either None or
158 |                 the new program counter.  If it returns None, the program
159 |                 counter will continue unchanged.
160 |         :rtype: function(State, callother_index, param) -> 
161 |                 None or new_program_counter
162 |         """
163 |         if callother_index in self.callother_dict:
164 |             unbound_version = self.callother_dict[callother_index]
165 |             # Bind the unbound version and return
166 |             return types.MethodType(unbound_version, self)
167 |         else:
168 |             raise RuntimeError("No callother implemented for {} {}".format(
169 |                     callother_index, param))
170 | 
171 |     def __str__(self):
172 |         format_parts = [
173 |                 ("Architecture language description", self.lang_desc),
174 |                 ("Processor", self.proc),
175 |                 ("Prog ctr", self.pc),
176 |                 ("Endianness", self.endian),
177 |                 ("Register length", self.reg_len),
178 |                 ("Stack varnode", self.stack_varnode),
179 |                 ("Callothers Known", self.return_callother_names())
180 |             ]
181 |         return ", ".join("{}: {}".format(tup[0], tup[1])
182 |                 for tup in format_parts)
183 | 


--------------------------------------------------------------------------------
/test_prgm/test.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | 
  4 | #if defined __amd64__
  5 |     // These macros put in out_var the flags resulting from inval1 +/- inval2
  6 |     #define ADD_OUT_FLAGS(out_var, inval1, inval2) asm ( \
  7 |         "add %[invalc], %[invald];" \
  8 |         "pushfq;" \
  9 |         "pop %[outval];"\
 10 |         : [outval] "=r" (out_var) \
 11 |         : [invalc] "r" (inval1), [invald] "r" (inval2) \
 12 |     )
 13 | 
 14 |     #define SUB_OUT_FLAGS(out_var, inval1, inval2) asm ( \
 15 |         "sub %[invald], %[invalc];" \
 16 |         "pushfq;" \
 17 |         "pop %[outval];"\
 18 |         : [outval] "=r" (out_var) \
 19 |         : [invalc] "r" (inval1), [invald] "r" (inval2) \
 20 |     )
 21 | 
 22 |     // This one returns the value of the instruction pointer in retval
 23 |     #define READ_RIP(retval) \
 24 |         asm ( \
 25 |             "lea (%%rip), %[outval];" \
 26 |             : [outval] "=r" (retval) \
 27 |         );
 28 | 
 29 |     // This one just demonstrates a bunch of register updates, for sure
 30 |     #define UPDATE_MANY_REGS(retval) { \
 31 |         long unsigned int temp_retval = 0; \
 32 |         asm ( \
 33 |             "mov %[inval], %%rax;" \
 34 |             "mov %%rax, %%rbx;" \
 35 |             "mov %%rbx, %%rcx;" \
 36 |             "mov %%rcx, %%rdx;" \
 37 |             "mov %%rdi, %%rsi;" \
 38 |             "mov %%rsi, %%rdi;" \
 39 |             "mov %%rdi, %[temp_retval];" \
 40 |             : [temp_retval] "=r" (temp_retval) \
 41 |             : [inval] "r" (inval) \
 42 |             : "cc", "rax", "rbx", "rcx", "rdx", "rsi", "rdi" \
 43 |         ); \
 44 |         asm ( \
 45 |             "mov %[temp_retval], %%r8;" \
 46 |             "mov %%r8, %%r9;" \
 47 |             "mov %%r9, %%r10;" \
 48 |             "mov %%r10, %%r11;" \
 49 |             "mov %%r11, %%r12;" \
 50 |             "mov %%r12, %%r13;" \
 51 |             "mov %%r13, %%r14;" \
 52 |             "mov %%r14, %%r15;" \
 53 |             "mov %%r15, %[retval];" \
 54 |             : [retval] "=r" (retval) \
 55 |             : [temp_retval] "r" (temp_retval) \
 56 |             : "cc", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" \
 57 |         ); \
 58 |     }
 59 | 
 60 | #elif defined __aarch64__
 61 |     // TODO: implement these in assembly.  They're totally wrong here.
 62 |     #define ADD_OUT_FLAGS(out_var, inval1, inval2) out_var = inval1 + inval2;
 63 |     #define SUB_OUT_FLAGS(out_var, inval1, inval2) out_var = inval1 - inval2;
 64 |     #define READ_RIP(retval) retval = 1
 65 |     #define UPDATE_MANY_REGS(retval) retval = 2
 66 | #endif
 67 | 
 68 | 
 69 | long int simple_adds(int inval) {
 70 |     return inval + inval;
 71 | }
 72 | 
 73 | int big_sub(int inval) {
 74 |     return inval - 0x7fffffff;
 75 | }
 76 | 
 77 | long int another_sub(long int inval1, long int inval2) {
 78 |     return inval1 - inval2;
 79 | }
 80 | 
 81 | long int investigate_add_flags(int invala, int invalb) {
 82 |     long int retval = 0;
 83 |     ADD_OUT_FLAGS(retval, invala, invalb);
 84 |     return retval;
 85 | }
 86 | 
 87 | long int investigate_sub_flags(int invala, int invalb) {
 88 |     long int retval = 0;
 89 |     SUB_OUT_FLAGS(retval, invala, invalb);
 90 |     return retval;
 91 | }
 92 | 
 93 | long int div(long int invala, long int invalb) {
 94 |     return invala / invalb;
 95 | }
 96 | 
 97 | unsigned long int unsign_div(unsigned long int invala,
 98 |         unsigned long int invalb) {
 99 |     return invala / invalb;
100 | }
101 | 
102 | long int sub_if(int inval) {
103 |     if( inval - 1 > 0 ) {
104 |         return 0;
105 |     } else {
106 |         return 1;
107 |     }
108 | }
109 | 
110 | long unsigned int read_rip() {
111 |     long unsigned int retval = 0;
112 |     READ_RIP(retval);
113 |     return retval;
114 | }
115 | 
116 | long unsigned int update_many_regs(long int inval) {
117 |     long unsigned int retval = 0;
118 |     UPDATE_MANY_REGS(retval);
119 |     return retval;
120 | }
121 | 
122 | void try_print_simple_adds(int val) {
123 |     printf("Simple adds %x result %lx\n", val, simple_adds(val));
124 | }
125 | 
126 | float float_test(float float1, float float2) {
127 |     return float1 + float2;
128 | }
129 | 
130 | double double_test(double double1, double double2) {
131 |     return double1 + double2;
132 | }
133 | 
134 | int try_sleep(time_t secs, long nanosecs) {
135 |     struct timespec sleep_len = { 0 }, rem = { 0 };
136 |     int retval = -1, tries = 0;
137 |     sleep_len.tv_sec = secs;
138 |     sleep_len.tv_nsec = nanosecs;
139 |     while( retval != 0 ) {
140 |         tries += 1;
141 |         retval = nanosleep(&sleep_len, &rem);
142 |         sleep_len.tv_sec = rem.tv_sec;
143 |         sleep_len.tv_nsec = rem.tv_nsec;
144 |     }
145 |     return tries;
146 | }
147 | 
148 | int main(int argc, char * argv[], char * envp[]) {
149 |     if( argc >= 1 ) {
150 |         printf("Prog name: %s\n", argv[0]);
151 |     }
152 |     char **cur_strp = argv;
153 |     while( *cur_strp != 0 ) {
154 |         printf("Arg: %s\n", *cur_strp);
155 |         cur_strp += 1;
156 |         try_sleep(0, 1e8);
157 |     }
158 |     cur_strp = envp;
159 |     while( *cur_strp != 0 ) {
160 |         printf("Env: %s\n", *cur_strp);
161 |         cur_strp += 1;
162 |         try_sleep(0, 1e8);
163 |     }
164 |     try_print_simple_adds(0x10001000);
165 |     try_print_simple_adds(0x20002000);
166 |     try_print_simple_adds(0x60006000);
167 |     try_print_simple_adds(0x80008000);
168 |     try_print_simple_adds(0xd000d000);
169 |     printf("Update many regs result 0x%lx\n", update_many_regs(argc));
170 |     printf("RIP value 0x%lx\n", read_rip());
171 |     
172 |     printf("Another sub 0x%lx\n", another_sub(0x10, 0x40));
173 | 
174 |     int test_val = 0x7fffffff;
175 |     // With adding - when overflowing signed you get "overflow" set.  When overflowing unsigned you get "carry" set.  Overflow matters with signed add, carry with unsigned.
176 |     // If you're adding signed 0xffffffff twice, you get carry set, but that doesn't matter because it's not helpful for signed add operations.  If it was an unsigned add instead, you'd still get carry set and it would matter, because then you added too large.  Overflow isn't set with this add.  Overflow matters for signed, carry for unsigned add.
177 |     // If you add 0x7fffffff twice you get overflow bit set and carry unset.  If it was a signed add, overflow matters.
178 |     // Ghidra uses INT_CARRY to determine CF in an add, and INT_SCARRY to determine OF in an add.
179 |     // 
180 |     printf("Investigate add flags 0x%x 0x%lx\n", test_val, investigate_add_flags(test_val, test_val));
181 | 
182 |     test_val = 0x0;
183 |     int test_val_2 = 0x80000001;
184 |     /* With subtracting:
185 |         0xfff - 0x1000 - carry bit, no overflow - 0xffffffff
186 |         0x0 - 0x1000 - carry bit, no overflow - 0xfffff000
187 |         0xffffffff - 0x1000 - no carry bit, no overflow 
188 |         0x80001000 - 0x1000 - no carry bit, no overflow
189 |         0x80000fff - 0x1000 - no carry bit, overflow - 0x7fffffff
190 |         0x80000000 - 0x1000 - no carry bit, overflow - 0x7ffff000
191 |         0x80000000 - 0x7fffffff - no carry bit, overflow
192 |         0x80000000 - 0x80000000 - no carry bit, no overflow (zero)
193 |         0x80000000 - 0x80000001 - carry bit, no overflow
194 |         0x1000 - 0x80000000 - carry bit, overflow - 0x80001000
195 | 
196 |         pos minus neg - should be pos
197 |         neg minus pos - should be neg
198 |         pos minus pos - always gonna be pos or 0, no overflow
199 |         neg minus neg - always gonna be neg or 0, no overflow
200 |      */
201 |     // For sub, carry bit indicates unsigned overflow.  It happens when a bigger number is subtracted from a smaller number.  Ghidra uses INT_LESS to calculate CF in a sub.
202 |     // For sub, overflow bit indicates signed overflow.  It can only happen when the operand signs are different.  If the result, in that case, doesn't have the same sign as the first operand, then it's an overflow.  Ghidra uses INT_SBORROW to calculat OF in a sub.
203 |     printf("Investigate sub flags 0x%x 0x%x 0x%lx\n", test_val, test_val_2, investigate_sub_flags(test_val, test_val_2));
204 | 
205 |     printf("Big sub 1 %i\n", big_sub(1));
206 |     printf("Big sub 0 %i\n", big_sub(0));
207 |     printf("Big sub -1 %i\n", big_sub(-1));
208 |     printf("Big sub -2 %i\n", big_sub(-2));
209 |     printf("Big sub 0x80000001 %i\n", big_sub(0x80000001));
210 |     printf("Big sub 0x80000000 %i\n", big_sub(0x80000000));
211 |     printf("Big sub 0x7fffffff %i\n", big_sub(0x7fffffff));
212 | 
213 |     printf("Flags: C1Px AxZS TIDO\n");
214 |     // carry, parity, adjust, zero, sign, trap, interupt enable, direction overflow
215 | 
216 |     printf("Div result 10/2 %li\n", div(10, 2));
217 |     printf("Unsigned div result 10 / 2 %li\n", unsign_div(10, 2));
218 |     printf("Div result -10/2 %li\n", div(-10, 2));
219 |     printf("Unsigned div result -10 / 2 %li\n", unsign_div(-10, 2));
220 | 
221 |     float f_a = 1.1, f_b = 2.2;
222 |     printf("Float test %f + %f = %f\n", f_a, f_b, float_test(f_a, f_b));
223 | 
224 |     double d_a = 1.1, d_b = 2.2;
225 |     printf("Double test %f + %f = %f\n", d_a, d_b, double_test(d_a, d_b));
226 | 
227 |     return 0;
228 | }
229 | 


--------------------------------------------------------------------------------
/instr.py:
--------------------------------------------------------------------------------
  1 | """Implements the pcode instructions.
  2 | 
  3 | Intended usage is to call "instruction_finder", providing a Ghidra pcode
  4 | object.  That will return an appropriate instance of an Instruction object
  5 | corresponding to the pcode instruction.  That instance can then have "execute"
  6 | run on it.
  7 | """
  8 | 
  9 | import logging
 10 | import re
 11 | 
 12 | from .utils import find_all_subclasses
 13 | from .mem_markers import CallOtherMarker
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | class Instruction(object):
 18 |     """The base class for implementation of PCode instructions.
 19 | 
 20 |     Instantiate the proper Instruction object, then run execute, to modify
 21 |     emulator state corresponding to instruction execution.
 22 | 
 23 |     :param pcode: The ghidra pcode object from which to pull parameter
 24 |         information.
 25 |     :type pcode: ghidra.program.model.pcode.PcodeOp
 26 |     :param arch: The architecture for the instruction
 27 |     :type arch: Architecture
 28 |     """
 29 |     
 30 |     opcode = None
 31 |     def __init__(self, pcode, arch):
 32 |         """Constructor
 33 |         """
 34 |         self.pcode = pcode
 35 |         self.arch = arch
 36 | 
 37 |     def execute(self, state):
 38 |         """Execute an opcode and set emulator state as necessary.
 39 |         """
 40 |         inputs = self._resolve_inputs(state)
 41 |         ret = self._simple_exec_handler(state, inputs)
 42 |         if ret is not None:
 43 |             self._store_in_output(state, ret)
 44 | 
 45 |     def _simple_exec_handler(self, state, inputs):
 46 |         """A simpler version of execution handler for implementation.
 47 | 
 48 |         :param inputs: A list of input values for the pcode
 49 |         :type inputs: list
 50 | 
 51 |         :raises RuntimeError: When called on an instance of the base class
 52 |             instead of a subclass
 53 | 
 54 |         :return: The output value from the pcode instruction, for storage
 55 |             in the output location
 56 |         :rtype: int
 57 |         """
 58 |         raise RuntimeError("Called _simple_exec_handler on top-most class")
 59 | 
 60 |     def __str__(self):
 61 |         return "{}: {}".format(self.__class__.__name__, self.pcode)
 62 | 
 63 |     def _resolve_inputs(self, state):
 64 |         return [state.read_varnode(in_op) for in_op in self.pcode.inputs]
 65 | 
 66 |     def _store_in_output(self, state, value):
 67 |         return state.set_varnode(self.pcode.output, value)
 68 | 
 69 |     def _get_sign_bit(self, val, size):
 70 |         """Return the sign bit for a given value, for a set size and emulator
 71 |         register type.
 72 | 
 73 |         :param val: The value to calculate twos complement for
 74 |         :type val: int
 75 |         :param size: The number of bytes in the result
 76 |         :type size: int
 77 |         """
 78 |         # TODO - better place to pull bit count from?
 79 |         end_bits = val >> ((size * self.arch.bits_per_byte) - 1)
 80 |         return end_bits & 0x1
 81 | 
 82 |     def _get_2s_comp(self, val, size):
 83 |         """Return the twos complement of a value, for a set size and emulator
 84 |         register type.
 85 | 
 86 |         :param val: The value to calculate twos complement for
 87 |         :type val: int
 88 |         :param size: The number of bytes in the result
 89 |         :type size: int
 90 |         """
 91 |         return ((~val) + 1) & (2 ** (size * self.arch.bits_per_byte) - 1)
 92 | 
 93 | 
 94 | class Fill_In_Inst(Instruction):
 95 |     opcode = -1
 96 |     def _simple_exec_handler(self, state, inputs):
 97 |         raise RuntimeError("Tried to run fill-in instruction for opcode {}"
 98 |                 "".format(self.pcode.opcode))
 99 | 
100 | class Copy(Instruction):
101 |     opcode = 1
102 |     def _simple_exec_handler(self, state, inputs):
103 |         return inputs[0]
104 | 
105 | class Load(Instruction):
106 |     opcode = 2
107 |     def _simple_exec_handler(self, state, inputs):
108 |         return state.ram.load(inputs[1], self.pcode.inputs[1].size)
109 | 
110 | class Store(Instruction):
111 |     opcode = 3
112 |     def _simple_exec_handler(self, state, inputs):
113 |         state.ram.store(inputs[1], self.pcode.inputs[2].size, inputs[2])
114 |         return None
115 | 
116 | class Branch(Instruction):
117 |     opcode = 4
118 |     def _simple_exec_handler(self, state, inputs):
119 |         state.registers.store(self.arch.pc_offset, self.arch.reg_len,
120 |                 self.pcode.inputs[0].offset)
121 |         return None
122 | 
123 | class CBranch(Branch):
124 |     opcode = 5
125 |     def _simple_exec_handler(self, state, inputs):
126 |         if inputs[1]:
127 |             return super(CBranch, self)._simple_exec_handler(state, inputs)
128 |         else:
129 |             return None
130 | 
131 | class BranchInd(Instruction):
132 |     """Indirect branch or call.  Dereference input 0, then jump to the
133 |     result.  Alternatively - if input 0 is an instance of CallOtherMarker,
134 |     that means the program should continue on without interruption.
135 |     """
136 |     opcode = 6
137 |     def _simple_exec_handler(self, state, inputs):
138 |         if not isinstance(inputs[0], CallOtherMarker):
139 |             next_loc = inputs[0]
140 |             state.registers.store(self.arch.pc_offset, self.arch.reg_len,
141 |                     next_loc)
142 |             return None
143 | 
144 | 
145 |             next_loc = state.ram.load(inputs[0], self.pcode.inputs[0].size)
146 |             logger.debug("in {} sz {} next {}".format(inputs[0], self.pcode.inputs[0].size, next_loc))
147 |             logger.debug("ins {}".format(self.pcode.inputs))
148 |             state.registers.store(self.arch.pc_offset, self.arch.reg_len,
149 |                     next_loc)
150 |         return None
151 | 
152 | class Call(Branch):
153 |     opcode = 7
154 | 
155 | class CallInd(BranchInd):
156 |     opcode = 8
157 | 
158 | class CallOther(Instruction):
159 |     """CallOther instructions are described in a couple places, and implement a number of operations like software interrupts...  "userop.hh" describes them as below:
160 |         "Within the raw p-code framework, the CALLOTHER opcode represents a user defined operation. At this level, the operation is just a placeholder for inputs and outputs to same black-box procedure. The first input parameter (index 0) must be a constant id associated with the particular procedure. Classes derived off of this base class provide a more specialized definition of an operation/procedure. The specialized classes are managed via UserOpManage and are associated with CALLOTHER ops via the constant id.
161 |         "The derived classes can in principle implement any functionality, tailored to the architecture or program. At this base level, the only commonality is a formal \b name of the operator and its CALLOTHER index.  A facility for reading in implementation details is provided via restoreXml()."
162 | 
163 |     "improvingDisassemblyAndDecompilation.tex" has this to say about them: "These operations show up as CALLOTHER Pcode ops in the Pcode field in the Listing.  They can have inputs and outputs, but otherwise are treated as black boxes by the decompiler."
164 | 
165 |     In this code, we have to look up the operation in an architecture
166 |     specific way, then execute it.  Our return value needs to be a pointer to
167 |     the next location to execute, because it will get "CallInd" executed
168 |     on it.  Alternatively, CallInd/BranchInd will understand an instance of
169 |     CallOtherMarker being returned, and will simply continue executing code
170 |     without branching, when it is found.
171 |     """
172 |     opcode = 9
173 |     def _simple_exec_handler(self, state, inputs):
174 |         other_op = state.arch.resolve_callother(inputs[0], inputs[1])
175 |         retval = other_op(state, inputs[0], inputs[1])
176 |         return CallOtherMarker() if retval is None else retval
177 | 
178 | 
179 | class Return(Instruction):
180 |     opcode = 10
181 |     def _simple_exec_handler(self, state, inputs):
182 |         return None
183 | 
184 | class Int_Equal(Instruction):
185 |     opcode = 11
186 |     def _simple_exec_handler(self, state, inputs):
187 |         return 1 if inputs[0] == inputs[1] else 0
188 | 
189 | class Int_NotEqual(Instruction):
190 |     opcode = 12
191 |     def _simple_exec_handler(self, state, inputs):
192 |         return 1 if inputs[0] != inputs[1] else 0
193 | 
194 | class Int_SLess(Instruction):
195 |     opcode = 13
196 |     def _simple_exec_handler(self, state, inputs):
197 |         in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
198 |         in1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
199 |         # If one is neg and one is pos, then return 1 if in0 is neg
200 |         if in0_bit != in1_bit:
201 |             return 1 if in0_bit == 1 else 0
202 |         # Otherwise, regular inequality will work
203 |         return 1 if inputs[0] < inputs[1] else 0
204 | 
205 | class Int_SLessEqual(Instruction):
206 |     opcode = 14
207 |     def _simple_exec_handler(self, state, inputs):
208 |         in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
209 |         in1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
210 |         # If one is neg and one is pos, then return 1 if in0 is neg
211 |         if in0_bit != in1_bit:
212 |             return 1 if in0_bit == 1 else 0
213 |         # Otherwise, regular inequality will work
214 |         return 1 if inputs[0] <= inputs[1] else 0
215 | 
216 | class Int_Less(Instruction):
217 |     opcode = 15
218 |     def _simple_exec_handler(self, state, inputs):
219 |         return 1 if inputs[0] < inputs[1] else 0
220 | 
221 | class Int_LessEqual(Instruction):
222 |     opcode = 16
223 |     def _simple_exec_handler(self, state, inputs):
224 |         return 1 if inputs[0] <= inputs[1] else 0
225 | 
226 | class Int_Zext(Instruction):
227 |     opcode=17
228 |     def _simple_exec_handler(self, state, inputs):
229 |         # Things are already zero extended...
230 |         return inputs[0]
231 | 
232 | class Int_Sext(Instruction):
233 |     opcode=18
234 |     def _simple_exec_handler(self, state, inputs):
235 |         in0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
236 |         if in0_bit == 0:
237 |             return inputs[0]
238 |         new_len = self.pcode.output.size * self.arch.bits_per_byte
239 |         old_len = self.pcode.inputs[0].size * self.arch.bits_per_byte
240 |         extension = ((2 ** new_len) - 1) ^ ((2 ** old_len) - 1)
241 |         return extension | inputs[0]
242 | 
243 | class Int_Add(Instruction):
244 |     opcode = 19 
245 |     def _simple_exec_handler(self, state, inputs):
246 |         return inputs[0] + inputs[1]
247 | 
248 | class Int_Sub(Instruction):
249 |     opcode = 20 
250 |     def _simple_exec_handler(self, state, inputs):
251 |         return inputs[0] - inputs[1]
252 | 
253 | class Int_Carry(Instruction):
254 |     # This becomes the carry flag in add (matters with unsigned ints)
255 |     opcode = 21
256 | 
257 |     def _simple_exec_handler(self, state, inputs):
258 |         input_size = self.pcode.inputs[0].size
259 |         add_result = inputs[0] + inputs[1]
260 | 
261 |         # See if there was a carry by seeing if the add_result had more bits thn
262 |         # could be stored
263 |         # TODO - better place to pull bit count from?
264 |         leftover = add_result >> (input_size * self.arch.bits_per_byte)
265 |         return 1 if leftover > 0 else 0
266 | 
267 | class Int_SCarry(Instruction):
268 |     # This becomes the overflow flag in add (matters with signed ints)
269 |     opcode = 22
270 |     def _simple_exec_handler(self, state, inputs):
271 |         input_0_carry = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
272 |         input_1_carry = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
273 | 
274 |         # Can't have signed overflow if the inputs are of different sign
275 |         if input_0_carry != input_1_carry:
276 |             return 0
277 | 
278 |         # If they are the same sign, then the result must be too
279 |         add_result = inputs[0] + inputs[1]
280 |         add_result_carry = self._get_sign_bit(add_result,
281 |                 self.pcode.inputs[1].size)
282 |         return 0 if input_0_carry == add_result_carry else 1
283 | 
284 | class Int_SBorrow(Instruction):
285 |     # Becomes the overflow flag in sub (matters with signed ints)
286 |     # SBorrow is used to determine OF flag in x64 sub/cmp
287 |     # It indicates an overflow in the signed result
288 |     # Int_Less determines the CF flag, which is an overflow in the unsigned result
289 |     opcode = 23
290 |     def _simple_exec_handler(self, state, inputs):
291 |         input_0_sign = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
292 |         input_1_sign = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
293 |         # No signed overflow if inputs are of same sign
294 |         if input_0_sign == input_1_sign:
295 |             return 0
296 | 
297 |         input_size = self.pcode.inputs[0].size
298 |         sub_result = inputs[0] - inputs[1]
299 | 
300 |         res_sign = self._get_sign_bit(sub_result, input_size)
301 | 
302 |         # I believe this is correct now.
303 |         return 1 if res_sign != input_0_sign else 0
304 | 
305 | class Int_2Comp(Instruction):
306 |     opcode = 24
307 |     def _simple_exec_handler(self, state, inputs):
308 |         # TODO: Is this correct?  I think Python's gonna handle the 
309 |         # negation for me, correctly
310 |         return -inputs[0]
311 | 
312 | class Int_Negate(Instruction):
313 |     opcode = 25
314 |     def _simple_exec_handler(self, state, inputs):
315 |         return ~inputs[0]
316 | 
317 | class Int_Xor(Instruction):
318 |     opcode = 26
319 |     def _simple_exec_handler(self, state, inputs):
320 |         return inputs[0] ^ inputs[1]
321 | 
322 | class Int_And(Instruction):
323 |     opcode = 27
324 |     def _simple_exec_handler(self, state, inputs):
325 |         return inputs[0] & inputs[1]
326 | 
327 | class Int_Or(Instruction):
328 |     opcode = 28
329 |     def _simple_exec_handler(self, state, inputs):
330 |         return inputs[0] | inputs[1]
331 | 
332 | class Int_Left(Instruction):
333 |     opcode = 29
334 |     def _simple_exec_handler(self, state, inputs):
335 |         return inputs[0] << inputs[1]
336 | 
337 | class Int_Right(Instruction):
338 |     opcode = 30
339 |     def _simple_exec_handler(self, state, inputs):
340 |         unsigned_ver = inputs[0]
341 |         if inputs[0] < 0:
342 |             unsigned_ver = self._get_2s_comp(abs(inputs[0]),
343 |                     self.pcode.inputs[0].size)
344 |         return unsigned_ver >> inputs[1]
345 | 
346 | class Int_SRight(Instruction):
347 |     opcode = 31
348 |     def _simple_exec_handler(self, state, inputs):
349 |         return inputs[0] >> inputs[1]
350 | 
351 | class Int_Mult(Instruction):
352 |     opcode = 32
353 |     def _simple_exec_handler(self, state, inputs):
354 |         return inputs[0] * inputs[1]
355 | 
356 | class Int_Div(Instruction):
357 |     opcode = 33
358 |     def _simple_exec_handler(self, state, inputs):
359 |         return inputs[0] // inputs[1]
360 | 
361 | class Int_SDiv(Instruction):
362 |     opcode = 34
363 |     def _simple_exec_handler(self, state, inputs):
364 |         in_0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
365 |         in_1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
366 |         if in_0_bit == 0 and in_1_bit == 0:
367 |             return inputs[0] // inputs[1]
368 |         elif in_0_bit == 1 and in_1_bit == 1:
369 |             in0_inv = self._get_2s_comp(inputs[0], self.pcode.inputs[0].size)
370 |             in1_inv = self._get_2s_comp(inputs[1], self.pcode.inputs[1].size)
371 |             return in0_inv // in0_inv
372 |         else:
373 |             if in_0_bit == 1:
374 |                 in0_pos = self._get_2s_comp(inputs[0],
375 |                         self.pcode.inputs[0].size)
376 |                 in1_pos = inputs[1]
377 |             else:
378 |                 in0_pos = inputs[0]
379 |                 in1_pos = self._get_2s_comp(inputs[1],
380 |                         self.pcode.inputs[1].size)
381 | 
382 |             res = in0_pos // in1_pos
383 |             return self._get_2s_comp(res, self.pcode.inputs[0].size)
384 | 
385 | class Int_Rem(Instruction):
386 |     opcode = 35
387 |     def _simple_exec_handler(self, state, inputs):
388 |         return inputs[0] % inputs[1]
389 | 
390 | class Int_SRem(Instruction):
391 |     opcode = 36
392 |     def _simple_exec_handler(self, state, inputs):
393 |         in_0_bit = self._get_sign_bit(inputs[0], self.pcode.inputs[0].size)
394 |         in_1_bit = self._get_sign_bit(inputs[1], self.pcode.inputs[1].size)
395 |         if in_0_bit == 0 and in_1_bit == 0:
396 |             return inputs[0] % inputs[1]
397 |         elif in_0_bit == 1 and in_1_bit == 1:
398 |             in0_inv = self._get_2s_comp(inputs[0], self.pcode.inputs[0].size)
399 |             in1_inv = self._get_2s_comp(inputs[1], self.pcode.inputs[1].size)
400 |             return in0_inv % in0_inv
401 |         else:
402 |             if in_0_bit == 1:
403 |                 in0_pos = self._get_2s_comp(inputs[0],
404 |                         self.pcode.inputs[0].size)
405 |                 in1_pos = inputs[1]
406 |             else:
407 |                 in0_pos = inputs[0]
408 |                 in1_pos = self._get_2s_comp(inputs[1],
409 |                         self.pcode.inputs[1].size)
410 | 
411 |             res = in0_pos % in1_pos
412 |             return self._get_2s_comp(res, self.pcode.inputs[0].size)
413 | 
414 | class Bool_Negate(Instruction):
415 |     opcode = 37
416 |     def _simple_exec_handler(self, state, inputs):
417 |         return 1 if inputs[0] == 0 else 0
418 | 
419 | class Bool_Xor(Instruction):
420 |     opcode = 38
421 |     def _simple_exec_handler(self, state, inputs):
422 |         return 1 if inputs[0] != inputs[1] else 0
423 | 
424 | class Bool_And(Instruction):
425 |     opcode = 39
426 |     def _simple_exec_handler(self, state, inputs):
427 |         return 1 if inputs[0] and inputs[1] else 0
428 | 
429 | class Bool_Or(Instruction):
430 |     opcode = 40
431 |     def _simple_exec_handler(self, state, inputs):
432 |         return 1 if inputs[0] or inputs[1] else 0
433 | 
434 | 
435 | class Subpiece(Instruction):
436 |     opcode = 63
437 |     def _simple_exec_handler(self, state, inputs):
438 |         return inputs[0] >> (inputs[1] * self.arch.bits_per_byte)
439 | 
440 | class Popcount(Instruction):
441 |     opcode = 72
442 |     def _simple_exec_handler(self, state, inputs):
443 |         return bin(inputs[0]).count('1')
444 | 
445 | """
446 | Opcode map
447 | (1, u'COPY')
448 | (2, u'LOAD')
449 | (3, u'STORE')
450 | (4, u'BRANCH')
451 | (5, u'CBRANCH')
452 | (6, u'BRANCHIND')
453 | (7, u'CALL')
454 | (8, u'CALLIND')
455 | (9, u'CALLOTHER')
456 | (10, u'RETURN')
457 | (11, u'INT_EQUAL')
458 | (12, u'INT_NOTEQUAL')
459 | (13, u'INT_SLESS')
460 | (14, u'INT_SLESSEQUAL')
461 | (15, u'INT_LESS')
462 | (16, u'INT_LESSEQUAL')
463 | (17, u'INT_ZEXT')
464 | (18, u'INT_SEXT')
465 | (19, u'INT_ADD')
466 | (20, u'INT_SUB')
467 | (21, u'INT_CARRY')
468 | (22, u'INT_SCARRY')
469 | (23, u'INT_SBORROW')
470 | (24, u'INT_2COMP')
471 | (25, u'INT_NEGATE')
472 | (26, u'INT_XOR')
473 | (27, u'INT_AND')
474 | (28, u'INT_OR')
475 | (29, u'INT_LEFT')
476 | (30, u'INT_RIGHT')
477 | (31, u'INT_SRIGHT')
478 | (32, u'INT_MULT')
479 | (33, u'INT_DIV')
480 | (34, u'INT_SDIV')
481 | (35, u'INT_REM')
482 | (36, u'INT_SREM')
483 | (37, u'BOOL_NEGATE')
484 | (38, u'BOOL_XOR')
485 | (39, u'BOOL_AND')
486 | (40, u'BOOL_OR')
487 | (41, u'FLOAT_EQUAL')
488 | (42, u'FLOAT_NOTEQUAL')
489 | (43, u'FLOAT_LESS')
490 | (44, u'FLOAT_LESSEQUAL')
491 | (45, u'INVALID_OP')
492 | (46, u'FLOAT_NAN')
493 | (47, u'FLOAT_ADD')
494 | (48, u'FLOAT_DIV')
495 | (49, u'FLOAT_MULT')
496 | (50, u'FLOAT_SUB')
497 | (51, u'FLOAT_NEG')
498 | (52, u'FLOAT_ABS')
499 | (53, u'FLOAT_SQRT')
500 | (54, u'INT2FLOAT')
501 | (55, u'FLOAT2FLOAT')
502 | (56, u'TRUNC')
503 | (57, u'CEIL')
504 | (58, u'FLOOR')
505 | (59, u'ROUND')
506 | (60, u'MULTIEQUAL')
507 | (61, u'INDIRECT')
508 | (62, u'PIECE')
509 | (63, u'SUBPIECE')
510 | (64, u'CAST')
511 | (65, u'PTRADD')
512 | (66, u'PTRSUB')
513 | (67, u'INVALID_OP')
514 | (68, u'CPOOLREF')
515 | (69, u'NEW')
516 | (70, u'INSERT')
517 | (71, u'EXTRACT')
518 | (72, u'POPCOUNT')
519 | """
520 | 
521 | def instruction_finder(pcode, arch):
522 |     """Returns the correct instruction class for a given Ghidra pcode object.
523 |     Pcode objects are returned by the "getPcode" function on instruction
524 |     objects.  Instruction objects are returned by the "getInstructionAt"
525 |     function.
526 | 
527 |     :param pcode: The pcode object for which to find instructions
528 |     :type pcode: ghidra.program.model.pcode.PcodeOp
529 |     :param arch: The architecture for the instruction
530 |     :type arch: Architecture
531 | 
532 |     :raises RuntimeError: Occurs when multiple implementations are found
533 |         for one pcode.  That indicates an implementation error.
534 | 
535 |     :return: An instance of one instruction class implementing the pcode
536 |         input.  Returns "Fill_In_Inst" when no matching instruction is found.
537 |     :rtype: Instruction
538 |     """
539 |     opcode = pcode.opcode
540 |     inst_class_matches = [cls
541 |             for cls in find_all_subclasses(Instruction)
542 |             if cls.opcode == opcode]
543 |     if len(inst_class_matches) > 1:
544 |         raise RuntimeError("Found multiple implementations for opcode {}"
545 |                 "".format(opcode))
546 |     elif len(inst_class_matches) < 1:
547 |         """
548 |         raise RuntimeError("Found no implementation for opcode {}"
549 |                 "".format(opcode))
550 |         """
551 |         return Fill_In_Inst(pcode, arch)
552 |     return inst_class_matches[0](pcode, arch)
553 | 


--------------------------------------------------------------------------------