├── .gitignore ├── examples ├── Makefile └── simple.c ├── LICENSE ├── README.md └── plugin ├── Harness.py ├── go.py ├── Node.py ├── Struct.py └── PCodeInterpreter.py /.gitignore: -------------------------------------------------------------------------------- 1 | examples/simple 2 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | all: simple simple.so 2 | .PHONY: all 3 | 4 | simple: simple.c 5 | $(CC) -O0 -DINCLUDE_MAIN -o simple simple.c 6 | strip simple 7 | 8 | simple.so: simple.c 9 | $(CC) -shared -O0 -o simple.so simple.c 10 | strip simple.so 11 | -------------------------------------------------------------------------------- /examples/simple.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | typedef struct { 5 | int first; 6 | int second; 7 | } pair; 8 | 9 | typedef struct { 10 | int myint; 11 | char mychar; 12 | size_t mysize; 13 | pair mypair; 14 | pair *pairptr; 15 | } grabbag; 16 | 17 | void fill_pair(pair *pairptr) { 18 | pairptr->first = rand(); 19 | pairptr->second = 7; 20 | } 21 | 22 | int initgrabbag(grabbag *bag) { 23 | bag->pairptr = malloc(sizeof(bag->pairptr)); 24 | fill_pair(&bag->mypair); 25 | fill_pair(bag->pairptr); 26 | bag->myint = 2; 27 | bag->mychar = 7; 28 | bag->mysize = 8; 29 | return bag->myint; 30 | } 31 | 32 | #ifdef INCLUDE_MAIN 33 | int main() { 34 | grabbag bag; 35 | 36 | initgrabbag(&bag); 37 | } 38 | #endif 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Unless otherwise marked, this license applies to all code in this repository. 2 | 3 | University of Illinois/NCSA Open Source License (UIUC license) 4 | Copyright (c) 2020 Grimm. All rights reserved. 5 | 6 | 7 | Developed by: Software Security Group 8 | Grimm 9 | https://grimm-co.com 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of 12 | this software and associated documentation files (the "Software"), to deal with 13 | the Software without restriction, including without limitation the rights to 14 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 15 | the Software, and to permit persons to whom the Software is furnished to do so, 16 | subject to the following conditions: 17 | 18 | - Redistributions of source code must retain the above copyright notice, this 19 | list of conditions and the following disclaimers. 20 | - Redistributions in binary form must reproduce the above copyright notice, 21 | this list of conditions and the following disclaimers in the documentation 22 | and/or other materials provided with the distribution. 23 | - Neither the names of Grimm, nor the names of its contributors may be used to 24 | endorse or promote products derived from this Software without specific prior 25 | written permission. 26 | 27 | 28 | SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 29 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 30 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR 31 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 32 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GEARSHIFT 2 | GEARSHIFT is a tool that performs structure recovery for a specified function 3 | within a stripped binary. It also generates a fuzz harness that can be used 4 | to call functions in a shared object (.so) or dynamically linked library (.dll) 5 | file. 6 | 7 | The name comes from it leveraging a mix of reverse and forward engineering. 8 | 9 | ## Installation 10 | 11 | To install the Ghidra script, copy the python files to one of your Ghidra 12 | script directories: 13 | 14 | 1. In Ghidra, open the Script Manager (Window > Script Manager) 15 | 2. Click the "Script Directories" button to view the list of directories 16 | 3. Note the name of a directory. If there isn't one you can edit, add a new directory. 17 | 4. Copy all the python files in `plugin/` to the chosen directory. 18 | 5. Click the "Refresh Script List" button. The scripts should appear in the GEARSHIFT folder in the Script Manager. 19 | 20 | ## Usage 21 | 22 | 1. Select a function whose arguments you want to analyze. 23 | 2. From the Script Manager, under GEARSHIFT, select go.py and click Run. 24 | 3. Any structs that are identified from the arguments of the function will be 25 | defined in Data Type Manager under $binary_name > struct. 26 | 4. The script will generate harness code and print out the names of the files 27 | it generated 28 | 5. Compile the harness (must be compiled with `-ldl` flag for shared objects) 29 | 6. Run the harness, passing it the file name of your input file as the only 30 | argument 31 | 32 | ## Example Programs 33 | 34 | The `example/` directory contains example programs that can be used to try out 35 | the tool. Compile the example programs as follows: 36 | ``` 37 | $ cd example 38 | $ make 39 | ``` 40 | 41 | ## Limitations 42 | The harnesses generated by GEARSHIFT currently depend on the `LoadLibrary` and 43 | `dlopen` functions, which are unable to load executable files. If your target 44 | is an executable rather than a shared library, you may need to write your own 45 | harness, but you can use the generated code to create the input datastructure. 46 | 47 | If your target is an ELF executable, you may be able to fool `dlopen` into 48 | loading your binary by removing the PIE flag. The LIEF Project (versions >= 0.11.0) 49 | can be used to do so [as described 50 | here](https://lief.quarkslab.com/doc/latest/tutorials/08_elf_bin2lib.html#warning-for-glic-2-29-users). 51 | However, this may completely break your binary, depending on what relocations 52 | and other loader features it uses. 53 | 54 | ## Leveraged technologies 55 | The current tool is implemented as a Ghidra script. It leverages Ghidra's 56 | intermediate language and data dependency analysis to discover struct fields, 57 | and outputs its results to the Ghidra Data Type Manager. See 58 | [the associated blog post](https://blog.grimm-co.com/2020/11/automated-struct-identification-with.html) 59 | for more information. 60 | 61 | ## References of interest: 62 | 63 | - http://conferences.sigcomm.org/sigcomm/2010/papers/apsys/p13.pdf 64 | - https://pdfs.semanticscholar.org/1600/f73baa952cdf433f0ed6333815d3668f8f24.pdf 65 | - https://research.cs.wisc.edu/wpis/papers/cc04.pdf 66 | 67 | -------------------------------------------------------------------------------- /plugin/Harness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # @category: GEARSHIFT.internal 3 | 4 | linux_template = r"""#include 5 | #include 6 | #include 7 | #include 8 | 9 | {structs} 10 | 11 | typedef int(*func)(void* a, ...); 12 | 13 | int main(int argc, char** argv) {{ 14 | if (argc < 2) {{ 15 | if (argc < 1) {{ 16 | printf("Usage: ./gearshift_harness_linux input_file\n"); 17 | }} else {{ 18 | printf("Usage: %s input_file\n", argv[0]); 19 | }} 20 | printf("\n"); 21 | printf("\tinput_file - data to put into the arguments\n"); 22 | printf("\n"); 23 | return 1; 24 | }} 25 | void* handle = dlopen("{process_path}", RTLD_LAZY); 26 | if (handle == NULL) {{ 27 | printf("Unable to open {process_path}. Exiting.\n"); 28 | return 2; 29 | }} 30 | // In glibc, the handle points to the library base address 31 | char* base = *((char**)handle); 32 | func f = (func)(base + 0x{func_offset:x}); 33 | 34 | FILE* h = fopen(argv[1], "r"); 35 | if (h == NULL) {{ 36 | printf("Unable to open %s. Exiting.\n", argv[1]); 37 | return 3; 38 | }} 39 | 40 | {code} 41 | 42 | int res = f((void*){args}); 43 | 44 | {cleanup} 45 | 46 | printf("Result: %d\n", res); 47 | }} 48 | """ 49 | 50 | windows_template = r"""#include 51 | #include 52 | #include 53 | #include 54 | 55 | {structs} 56 | 57 | typedef int(*func)(void* a, ...); 58 | 59 | int main(int argc, char** argv) {{ 60 | if (argc < 2) {{ 61 | if (argc < 1) {{ 62 | printf("Usage: gearshift_harness_windows input_file\n"); 63 | }} else {{ 64 | printf("Usage: %s input_file\n", argv[0]); 65 | }} 66 | printf("\n"); 67 | printf("\tinput_file - data to put into the arguments\n"); 68 | printf("\n"); 69 | return 1; 70 | }} 71 | HMODULE lib = LoadLibraryA("{process_path}"); 72 | if (!lib) {{ 73 | printf("Load Library failed: %d\n", GetLastError()); 74 | exit(1); 75 | }} 76 | 77 | // On Windows, the handle is the library base address 78 | char* base = (char*)lib; 79 | func f = (func)(base + 0x{func_offset:x}); 80 | 81 | FILE* h; 82 | fopen_s(&h, argv[1], "r"); 83 | 84 | {code} 85 | 86 | int res = f((void*){args}); 87 | 88 | {cleanup} 89 | 90 | printf("Result: %d\n", res); 91 | }} 92 | """ 93 | 94 | def generate_linux_harness(struct_defs, ppath, func_off, code, cleanup, args): 95 | return linux_template.format(structs=struct_defs, process_path=ppath, func_offset=func_off, code="\t" + code.replace("\n", "\n\t"), cleanup="\t" + cleanup.replace("\n", "\n\t"), args=args) 96 | 97 | def generate_windows_harness(struct_defs, ppath, func_off, code, cleanup, args): 98 | return windows_template.format(structs=struct_defs, process_path=ppath, func_offset=func_off, code="\t" + code.replace("\n", "\n\t"), cleanup="\t" + cleanup.replace("\n", "\n\t"), args=args) 99 | -------------------------------------------------------------------------------- /plugin/go.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # GEARSHIFT struct identifier 3 | # @category: GEARSHIFT 4 | 5 | from __future__ import print_function 6 | import time 7 | import os.path 8 | 9 | from ghidra.app.decompiler import * 10 | from ghidra.program.model import address 11 | from ghidra.program.model.pcode import PcodeOp 12 | from ghidra.program.model.data import Undefined 13 | from ghidra.program.model.symbol import SourceType 14 | from ghidra.program.flatapi import FlatProgramAPI 15 | from ghidra.app.cmd.function import ApplyFunctionSignatureCmd 16 | from ghidra.program.model.pcode import HighFunctionDBUtil 17 | 18 | import PCodeInterpreter 19 | import Node 20 | import Struct 21 | from Harness import * 22 | 23 | # Global config 24 | ARCH_BITS = currentProgram.getDefaultPointerSize() * 8 25 | 26 | decompInterface = ghidra.app.decompiler.DecompInterface() 27 | decompInterface.openProgram(currentProgram) 28 | PCodeInterpreter.decompInterface = decompInterface 29 | PCodeInterpreter.monitor = monitor 30 | PCodeInterpreter.currentProgram = currentProgram 31 | PCodeInterpreter.ARCH_BITS = ARCH_BITS 32 | Node.ARCH_BITS = ARCH_BITS 33 | Struct.ARCH_BITS = ARCH_BITS 34 | Struct.struct_counter = 0 35 | Struct.currentProgram = currentProgram 36 | 37 | """ 38 | NOTES on interprocedural analysis 39 | There are two major types of analysis we want to do: FORWARD and BACKWARD 40 | - To identify the struct usage of a parameter to a function, we do FORWARD analysis on the passed parameter 41 | - To identify the types of the fields stored into a member of a parameter, we do BACKWARDs analysis on the value stored 42 | - To identify the types of the fields loaded from a member of a parameter struct, we do FORWARD analysis on the loaded value 43 | - When a stored value is derived from another function call, we must perform backwards analysis on all the return value of that function 44 | - When a loaded value is passed into another function call, we must perform forwards analysis on that parameter to determine its struct type 45 | Example backward analysis: https://www.riverloopsecurity.com/blog/2019/05/pcode/ 46 | 47 | NOTES on caching 48 | We should never run forward analysis on the same function twice. This is because we should already know the loads and stores performed on the argument after running it once. 49 | For backward analysis, we are able to cache the results of the first run by using placeholder parameter inputs, and the return types based on these placeholders. Therefore, the next run, we just DFS to replace all the placeholder inputs with our actual parameters and then we have obtained the return type. 50 | 51 | TODO: test recursive function analysis 52 | 53 | To identify arrays, we use the idea of loop variants. A loop variant is the output from a multiequal pcode op. When running analysis multiple times with different loop variant initial conditions, the loop variant changes each run. The loads or stores that change are likely array loads and stores. Using the differences in struct accesses, we can infer which ones are arrays, and the stride of the array. 54 | """ 55 | 56 | start = time.time() 57 | 58 | # get current function 59 | listing = currentProgram.getListing() 60 | currentFunction = listing.getFunctionContaining(currentAddress) 61 | entryPoint = currentFunction.getEntryPoint() 62 | base_address = currentProgram.getImageBase().getOffset() 63 | function_offset = entryPoint.getOffset() - currentProgram.getImageBase().getOffset() 64 | program_path = currentProgram.getExecutablePath() 65 | 66 | pci = PCodeInterpreter.PCodeInterpreter() 67 | pci.currentProgram = currentProgram 68 | argument_varnodes = PCodeInterpreter.analyzeFunctionForward(currentFunction, pci) 69 | 70 | important_stores = [] 71 | important_loads = [] 72 | argument_node_objs = [] 73 | for i in argument_varnodes: 74 | argument_node_objs += pci.lookup_node(i) 75 | argument_structs = [None] * len(argument_varnodes) 76 | 77 | for i in pci.stores: 78 | if i.contains(argument_node_objs): 79 | important_stores.append(i) 80 | for i in pci.loads: 81 | if i.contains(argument_node_objs): 82 | important_loads.append(i) 83 | 84 | print("Start creating struct") 85 | 86 | args = [] 87 | for i in range(len(argument_structs)): 88 | args.append(Struct.Struct(0)) 89 | 90 | used_hash = set() 91 | used_expressions = [] 92 | for i in (important_stores + important_loads): 93 | simplified = i.simplify() 94 | if hash(simplified) in used_hash: 95 | continue 96 | try: 97 | substruct, offset, grand = simplified.create_struct(args, simplified.byte_length) 98 | if i in pci.arrays and not grand[0].is_array: 99 | grand[0].make_array() 100 | used_expressions.append(simplified) 101 | used_hash.add(hash(simplified)) 102 | except ValueError as e: 103 | print(e) 104 | 105 | print("Done interpolating structs") 106 | 107 | # Apply data type to original function 108 | orig_params = currentFunction.getParameters() 109 | assert len(orig_params) == len(args) 110 | struct_code = [] 111 | for i in range(len(args)): 112 | code = args[i].pretty_print() 113 | struct_code.append(code) 114 | print(code) 115 | dt = args[i].get_dtype() 116 | print(dt) 117 | orig_params[i].setDataType(dt, SourceType.USER_DEFINED) 118 | 119 | # Apply data types to subcall functions 120 | for i in range(currentFunction.getParameterCount()): 121 | used_hash.add(hash("ARG{}".format(i))) 122 | for func in pci.subcall_parameter_cache: 123 | params = pci.subcall_parameter_cache[func] 124 | for param_idx in range(len(params)): 125 | seen = set() 126 | for j in params[param_idx]: 127 | simplified = j.simplify() 128 | h = hash(simplified) 129 | if h in used_hash and h not in seen: 130 | seen.add(h) 131 | arg_idx = simplified.find_base_idx2() 132 | t, off = simplified.traverse_struct(args[arg_idx]) 133 | if isinstance(t, Struct.Struct): 134 | print("Applying type {} to function {} parameter {}".format(t.name, func, param_idx)) 135 | func.getParameters()[param_idx].setDataType(t.get_dtype(), SourceType.USER_DEFINED) 136 | 137 | code, cleanup, arg_names = Struct.generate_struct_reader(args) 138 | struct_defs = "".join(struct_code) 139 | 140 | linux_harness = generate_linux_harness(struct_defs, program_path, function_offset, code, cleanup, arg_names) 141 | windows_harness = generate_windows_harness(struct_defs, program_path, function_offset, code, cleanup, arg_names) 142 | 143 | linux_filename = os.path.abspath('gearshift_harness_linux.c') 144 | windows_filename = os.path.abspath('gearshift_harness_windows.c') 145 | 146 | print("writing linux harness to", linux_filename) 147 | with open(linux_filename, 'w') as harness: 148 | harness.write(linux_harness) 149 | print("writing windows harness to", windows_filename) 150 | with open(windows_filename, 'w') as harness: 151 | harness.write(windows_harness) 152 | 153 | end = time.time() 154 | print("DONE - Took:", (end - start)) 155 | -------------------------------------------------------------------------------- /plugin/Node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # @category: GEARSHIFT.internal 3 | 4 | from __future__ import print_function 5 | 6 | from Struct import Struct 7 | from ghidra.program.model.pcode import Varnode 8 | 9 | # Abstract binary operation tree that stores the symbolic expression 10 | class Node: 11 | def __init__(self, operation, left, right, byte_length): 12 | self.left = left 13 | self.right = right 14 | self.operation = operation 15 | self.byte_length = byte_length 16 | 17 | def traverse_struct(self, struct): 18 | if self.is_leaf() and str(self.operation).startswith("ARG"): 19 | return struct, 0 20 | elif self.operation == "+": 21 | assert isinstance(self.left, Node) 22 | res, off = self.left.traverse_struct(struct) 23 | return res, off + self.right.operation.getOffset() 24 | elif self.operation == "*()": 25 | assert isinstance(self.left, Node) 26 | res, off = self.left.traverse_struct(struct) 27 | return res.get2(off), 0 28 | elif self.operation == "RESIZE": 29 | return self.left.traverse_struct(struct) 30 | else: 31 | print("Not yet supported", self.operation) 32 | raise ValueError("Not yet supported") 33 | 34 | # (Object reference, reference offset, (Grandparent struct, grandparent offset)) 35 | # Creates the struct specified in arg_struct_list 36 | # The intuition is that when we encounter a pointer, we also hold a pointer to that pointer (grandparent). Therefore if a pointer is dereferenced and that pointer is not yet marked a struct, then we use grandparent to change it into a struct 37 | # Otherwise, we just keep track of the current offsets into the current struct and recursive base case is the argument struct. 38 | def create_struct(self, arg_struct_list, parent_byte_length): 39 | if self.is_leaf() and str(self.operation).startswith("ARG"): 40 | arg_idx = int(self.operation[3:]) 41 | return (arg_struct_list[arg_idx], 0, None) 42 | elif self.operation == "+": 43 | assert isinstance(self.left, Node) 44 | sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length) 45 | if isinstance(self.right, Node): 46 | if not isinstance(self.right.operation, Varnode) or not self.right.operation.isConstant(): 47 | raise ValueError("Complex expression, skipping") 48 | if self.right.operation.getOffset() & (1 << (self.right.operation.getSize() * 8 - 1)) != 0: 49 | raise ValueError("Negative constaints not supported yet") 50 | offset += self.right.operation.getOffset() 51 | else: 52 | if not self.right.isConstant(): 53 | print("Non constant indexed detected: Possible array?") 54 | else: 55 | raise Exception("Shouldn't happen") 56 | return (sub_struct, offset, grand) 57 | elif self.operation == "*()": 58 | assert isinstance(self.left, Node) 59 | sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length) 60 | if not isinstance(sub_struct, Struct): 61 | temp = Struct(offset + parent_byte_length) 62 | grand[0].insert(grand[1], (temp, ARCH_BITS / 8)) 63 | sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length) 64 | sub_struct = temp 65 | sub_struct.extend(offset + parent_byte_length) 66 | if sub_struct.get(offset)[1] == 1: 67 | sub_struct.insert(offset, (0, parent_byte_length)) 68 | return (sub_struct.get(offset)[0], 0, (sub_struct, offset)) 69 | elif self.operation == "RESIZE": 70 | return self.left.create_struct(arg_struct_list, self.byte_length) 71 | else: 72 | print("Not yet supported", self.operation) 73 | raise ValueError("Not yet supported") 74 | 75 | def __str__(self): 76 | if self.is_leaf(): 77 | return str(self.operation) 78 | elif self.operation == "*()": 79 | return "*({})".format(str(self.left)) 80 | elif self.operation == "RESIZE": 81 | return "(uint{}_t)({})".format(self.byte_length * 8, str(self.left)) 82 | elif self.operation == "~": 83 | return "~({})".format(str(self.left)) 84 | else: 85 | return str(self.left) + " " + self.operation + " " + str(self.right) 86 | 87 | def __repr__(self): 88 | return '"' + self.__str__() + '"' 89 | 90 | def __hash__(self): 91 | ret = hash(str(self)) 92 | return ret 93 | 94 | def relevant(self): 95 | good = self.operation in ("+", "*()", "RESIZE", "*") or (self.is_leaf() and str(self.operation).startswith("ARG")) or (isinstance(self.operation, Varnode) and self.operation.isConstant()) or self.is_varnode_constant() 96 | if isinstance(self.left, Node): 97 | good = good and self.left.relevant() 98 | if isinstance(self.right, Node): 99 | good = good and self.right.relevant() 100 | return good 101 | 102 | def contains(self, nodes): 103 | if self is None: 104 | return False 105 | return self in nodes or (isinstance(self.left, Node) and self.left.contains(nodes)) or (isinstance(self.right, Node) and self.right.contains(nodes)) 106 | 107 | def find_base_idx2(self): 108 | if self.is_leaf() and str(self.operation).startswith("ARG"): 109 | return int(str(self.operation).split("ARG")[1]) 110 | res = None 111 | if isinstance(self.left, Node) and res is None: 112 | res = self.left.find_base_idx2() 113 | if isinstance(self.right, Node) and res is None: 114 | res = self.right.find_base_idx2() 115 | return res 116 | 117 | def find_base_idx(self, old_params): 118 | if self in old_params: 119 | idx = old_params.index(self) 120 | return idx 121 | res = None 122 | if isinstance(self.left, Node) and res is None: 123 | res = self.left.find_base_idx(old_params) 124 | if isinstance(self.right, Node) and res is None: 125 | res = self.right.find_base_idx(old_params) 126 | return res 127 | 128 | #replaces instances of old_params in the binary tree with instance in new_params, and makes a copy of all nodes 129 | def replace_base_parameters(self, old_params, new_param): 130 | if self in old_params: 131 | return new_param 132 | ret = self.shallow_copy() 133 | if isinstance(ret.left, Node): 134 | ret.left = ret.left.replace_base_parameters(old_params, new_param) 135 | if isinstance(ret.right, Node): 136 | ret.right = ret.right.replace_base_parameters(old_params, new_param) 137 | return ret 138 | 139 | def is_varnode_constant(self): 140 | return isinstance(self.operation, Varnode) and self.operation.isConstant() 141 | 142 | def _simplify(self): 143 | # TODO: better simplification in the future 144 | changed = False 145 | ret = self.shallow_copy() 146 | if ret.left is not None: 147 | ret.left, c = ret.left._simplify() 148 | changed |= c 149 | if ret.right is not None: 150 | ret.right, c = ret.right._simplify() 151 | changed |= c 152 | if ret.operation == "*" and (self.left.is_varnode_constant()) and (self.right.is_varnode_constant()) and ret.left.operation.getSize() == ret.right.operation.getSize(): 153 | temp = ret.left.operation 154 | temp2 = ret.right.operation 155 | ret = Node(Varnode(temp.getAddress().getNewAddress(temp.getOffset() * temp2.getOffset()), temp.getSize()), None, None, temp.getSize()) 156 | return ret, True 157 | elif ret.operation == "+" and (self.left.is_varnode_constant()) and (self.right.is_varnode_constant()) and ret.left.operation.getSize() == ret.right.operation.getSize(): 158 | temp = ret.left.operation 159 | temp2 = ret.right.operation 160 | ret = Node(Varnode(temp.getAddress().getNewAddress(temp.getOffset() + temp2.getOffset()), temp.getSize()), None, None, temp.getSize()) 161 | return ret, True 162 | elif ret.operation == "RESIZE" and self.left.is_varnode_constant(): 163 | return Node(Varnode(ret.left.operation.getAddress(), ret.byte_length), ret.left.left, ret.left.right, ret.byte_length), True 164 | return ret, changed 165 | 166 | def simplify(self): 167 | s, c = self._simplify() 168 | while c: 169 | s, c = s._simplify() 170 | return s 171 | 172 | def shallow_copy(self): 173 | ret = Node(self.operation, self.left, self.right, self.byte_length) 174 | return ret 175 | 176 | def deep_copy(self): 177 | left = self.left 178 | right = self.right 179 | if isinstance(left, Node): 180 | return left.deep_copy() 181 | if isinstance(right, Node): 182 | return rigth.deep_copy() 183 | return Node(self.operation, left, right, self.byte_length) 184 | 185 | def is_leaf(self): 186 | return self.left is None and self.right is None 187 | 188 | def add(self, value): 189 | return Node("+", self, value, self.byte_length) 190 | 191 | def sub(self, value): 192 | return Node("-", self, value, self.byte_length) 193 | 194 | def mult(self, value): 195 | return Node("*", self, value, self.byte_length) 196 | 197 | def div(self, value): 198 | return Node("/", self, value, self.byte_length) 199 | 200 | def shl(self, value): 201 | return Node("<<", self, value, self.byte_length) 202 | 203 | def shr(self, value): 204 | return Node(">>", self, value, self.byte_length) 205 | 206 | def bitwise_xor(self, value): 207 | return Node("^", self, value, self.byte_length) 208 | 209 | def bitwise_or(self, value): 210 | return Node("|", self, value, self.byte_length) 211 | 212 | def bitwise_and(self, value): 213 | return Node("&", self, value, self.byte_length) 214 | 215 | def ptr_deref(self): 216 | return Node("*()", self, None, self.byte_length) 217 | 218 | def resize(self, new_length): 219 | return Node("RESIZE", self, None, new_length) 220 | 221 | def eq(self, other): 222 | return Node("==", self, other, self.byte_length) 223 | 224 | def neq(self, other): 225 | return Node("neq", self, other, self.byte_length) 226 | 227 | def lt(self, other): 228 | return Node("<", self, other, self.byte_length) 229 | 230 | def le(self, other): 231 | return Node("<=", self, other, self.byte_length) 232 | 233 | def slt(self, other): 234 | return Node("s<", self, other, self.byte_length) 235 | 236 | def sle(self, other): 237 | return Node("s<=", self, other, self.byte_length) 238 | 239 | def neg(self): 240 | return Node("~", self, None, self.byte_length) 241 | 242 | def sdiv(self, other): 243 | return Node("s/", self, other, self.byte_length) 244 | 245 | def smod(self, other): 246 | return Node("s%", self, other, self.byte_length) 247 | 248 | def mod(self, other): 249 | return Node("%", self, other, self.byte_length) 250 | 251 | def sshr(self, other): 252 | return Node("s>>", self, other, self.byte_length) 253 | -------------------------------------------------------------------------------- /plugin/Struct.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # @category: GEARSHIFT.internal 3 | 4 | from __future__ import print_function 5 | 6 | from ghidra.program.model.data import StructureDataType, CategoryPath, DataTypeConflictHandler, PointerDataType, BuiltInDataTypeManager, ArrayDataType 7 | 8 | class Struct(object): 9 | def __init__(self, size): 10 | self.size = size # Total size of the struct 11 | self.members = [(0, 1)] * size # Represents member (value, member_size) 12 | self.marked = [False] * size # Marked represents offsets in the struct that are accessed 13 | self.is_array = False 14 | global struct_counter 15 | self.name = "S{}".format(struct_counter) 16 | struct_counter += 1 17 | self.dtype = None 18 | self.pretty = None 19 | 20 | def get_dtype(self): 21 | if self.dtype is not None: 22 | return self.dtype 23 | dm = currentProgram.getDataTypeManager() 24 | bdm = BuiltInDataTypeManager.getDataTypeManager() 25 | new_struct = StructureDataType(CategoryPath("/struct"), self.name, self.size) 26 | size_lookup = {} 27 | size_lookup[1] = bdm.getDataType("/char") 28 | size_lookup[2] = bdm.getDataType("/short") 29 | size_lookup[4] = bdm.getDataType("/int") 30 | size_lookup[8] = bdm.getDataType("/longlong") 31 | off = 0 32 | for i in range(len(self.members)): 33 | t, size = self.members[i][0], self.members[i][1] 34 | comment = "" 35 | if len(self.members[i]) > 2 and self.members[i][2] == False: 36 | comment = "NOT ACCESSED" 37 | if isinstance(t, Struct): 38 | if not t.is_array: 39 | sub_struct_dtype = t.get_dtype() 40 | new_struct.replaceAtOffset(off, sub_struct_dtype, ARCH_BITS / 8, "entry_{}".format(i), comment) 41 | else: 42 | arr_dtype = bdm.getPointer(size_lookup[1], ARCH_BITS / 8) 43 | new_struct.replaceAtOffset(off, arr_dtype, ARCH_BITS / 8, "entry_{}".format(i), comment) 44 | else: 45 | if size not in size_lookup: 46 | arr_dtype = ArrayDataType(size_lookup[1], size, 1) 47 | new_struct.replaceAtOffset(off, arr_dtype, size, "entry_{}".format(i), comment) 48 | else: 49 | new_struct.replaceAtOffset(off, size_lookup[size], size, "entry_{}".format(i), comment) 50 | off += size 51 | print("DONE CREATING STRUCT", self.name) 52 | dm.addDataType(new_struct, DataTypeConflictHandler.REPLACE_HANDLER) 53 | self.dtype = dm.getPointer(new_struct, ARCH_BITS / 8) 54 | return self.dtype 55 | 56 | def __str__(self): 57 | return str(self.members) 58 | 59 | def __repr__(self): 60 | return self.__str__() 61 | 62 | def make_array(self): 63 | print("Making array") 64 | print(self.members) 65 | self.is_array = True 66 | stride = self.members[0][1] 67 | self.stride = stride 68 | 69 | # Consolidates struct members of size 1 into a char array 70 | def consolidate(self): 71 | new_members = [] 72 | consolidate_length = 0 73 | cur_offset = 0 74 | for i in self.members: 75 | if self.marked[cur_offset] is True: 76 | if consolidate_length != 0: 77 | new_members.append((0, consolidate_length, False)) 78 | consolidate_length = 0 79 | new_members.append(i) 80 | else: 81 | consolidate_length += 1 82 | cur_offset += i[1] 83 | if consolidate_length != 0: 84 | new_members.append((0, consolidate_length)) 85 | consolidate_length = 0 86 | self.members = new_members 87 | 88 | def mark(self, start, end): 89 | for i in range(start, end): 90 | self.marked[i] = True 91 | 92 | # Indicates that there is a struct member (value, member_size) at given offset 93 | def insert(self, offset, member): 94 | c = 0 95 | idx = 0 96 | # find member 97 | while c < offset: 98 | c += self.members[idx][1] 99 | idx += 1 100 | if c != offset: 101 | print("Misaligned buf") 102 | self.break_member(idx - 1) 103 | self.insert(offset, member) 104 | return 105 | 106 | # combine 107 | c = 0 108 | temp = idx 109 | while c < member[1]: 110 | c += self.members[idx][1] 111 | idx += 1 112 | if c != member[1]: 113 | # Misaligned struct and data size accesses - might be an array? 114 | print("Misaligned buf") 115 | self.break_member(idx - 1) 116 | self.insert(offset, member) 117 | return 118 | c = 0 119 | idx = temp 120 | while c < member[1]: 121 | c += self.members[idx][1] 122 | del self.members[idx] 123 | self.members.insert(idx, member) 124 | self.mark(offset, offset + member[1]) 125 | 126 | def merge_until(self, idx, until): 127 | total_length = 0 128 | while idx < len(self.members) and self.members[idx][0] != until: 129 | total_length += self.members[idx][1] 130 | del self.members[idx] 131 | self.members.insert(idx, (0, total_length)) 132 | 133 | # Breaks apart the member at index self.members[idx] 134 | def break_member(self, idx): 135 | assert not isinstance(self.members[idx][0], Struct) 136 | size = self.members[idx][1] 137 | del self.members[idx] 138 | for i in range(size): 139 | self.members.insert(idx, (0, 1)) 140 | 141 | # Fetches member at given offset, and breaks apart member if there is member alignment conflict 142 | def get(self, offset): 143 | c = 0 144 | idx = 0 145 | while c < offset: 146 | c += self.members[idx][1] 147 | idx += 1 148 | if c != offset: 149 | # Same issue as insert 150 | print(self.members[idx - 1][1]) 151 | print(c) 152 | print("Get issue", self.members[idx - 1]) 153 | self.break_member(idx - 1) 154 | ret = self.get(offset) 155 | return ret 156 | self.mark(offset, offset + self.members[idx][1]) 157 | return self.members[idx] 158 | 159 | # Only fetches member at given offset 160 | def get2(self, offset): 161 | c = 0 162 | idx = 0 163 | while c < offset: 164 | c += self.members[idx][1] 165 | idx += 1 166 | if c != offset: 167 | return -1 168 | return self.members[idx][0] 169 | 170 | # Extends the size of the struct 171 | def extend(self, length): 172 | while self.size < length: 173 | self.size += 1 174 | self.members.append((0, 1)) 175 | self.marked.append(False) 176 | 177 | def get_field(self, length, entry_num): 178 | if length <= 8 and length & 1 == 0: 179 | return "uint{}_t entry_{};".format(length * 8, entry_num) 180 | elif length == 1: 181 | return "char entry_{};".format(entry_num) 182 | else: 183 | return "char entry_{}[{}];".format(entry_num, length) 184 | 185 | def pretty_print(self): 186 | if self.pretty is not None: 187 | return self.pretty 188 | self.consolidate() 189 | 190 | # first, we detect if it's size 0, or only has one member 191 | if self.size == 0: 192 | return "" 193 | elif len(self.members) == 1: 194 | return "" 195 | 196 | res = "struct {} {{\n".format(self.name) 197 | 198 | c = -1 199 | length = 0 200 | entry_counter = -1 201 | while length < self.size: 202 | c += 1 203 | entry_counter += 1 204 | if isinstance(self.members[c][0], Struct): 205 | length += ARCH_BITS / 8 206 | if not self.members[c][0].is_array: 207 | res += "struct {}* entry_{};\n".format(self.members[c][0].name, entry_counter) 208 | res = self.members[c][0].pretty_print() + "\n" + res 209 | else: 210 | res += "uint{}_t* entry_{};\n".format(self.members[c][0].stride * 8, entry_counter) 211 | else: 212 | res += self.get_field(self.members[c][1], entry_counter) + "\n" 213 | if len(self.members[c]) > 2: 214 | res = res[:-1] + " //NOT ACCESSED\n" 215 | length += self.members[c][1] 216 | self.pretty = res + "};" 217 | return self.pretty 218 | 219 | class Generator(object): 220 | def __init__(self): 221 | self.allocation_counter = 0 222 | 223 | def _new_allocation(self): 224 | alloc = "allocation{}".format(self.allocation_counter) 225 | self.allocation_counter += 1 226 | return alloc 227 | 228 | def _do_read(self, struct, current_reference): 229 | ret = "" 230 | clean = "" 231 | 232 | if not struct.is_array: 233 | curoff = 0 234 | total_length = 0 235 | for i in range(len(struct.members)): 236 | total_length += struct.members[i][1] 237 | 238 | current_allocation = self._new_allocation() 239 | ret += "void* {} = malloc({});\n".format(current_allocation, total_length) 240 | ret += "{} = (struct {}*){};\n".format(current_reference, struct.name, current_allocation) 241 | for i in range(len(struct.members)): 242 | value = struct.members[i][0] 243 | length = struct.members[i][1] 244 | if type(value) is int and value & 0xff == 0x0: 245 | ret += "fread((void*)&{}->entry_{}, 1, {}, h);\n".format(current_reference, i, length) 246 | elif type(value) is int and value & 0xff == 0x1: 247 | entry_allocation = self._new_allocation() 248 | ret += "void* {} = malloc({});\n".format(entry_allocation, (value >> 8) + 1) 249 | ret += "{}->entry_{} = (char*){};\n".format(current_reference, i, entry_allocation); 250 | ret += "{}->entry_{}[{}] = 0;\n".format(current_reference, i, (value >> 8)); 251 | ret += "fread({}->entry_{}, 1, {}, h);\n" .format(current_reference, i, value >> 8) 252 | clean += "free({});\n".format(entry_allocation) 253 | else: 254 | r, c = self._do_read(value, current_reference + "->entry_{}".format(i)) 255 | ret += r 256 | clean += c 257 | curoff += length 258 | clean += "free({});\n".format(current_allocation) 259 | else: 260 | current_allocation = self._new_allocation() 261 | ret += "void* {} = malloc({});\n".format(current_allocation, 8 * struct.stride) 262 | ret += "{} = (char*){};\n".format(current_reference, current_allocation) 263 | ret += "fread((char*){}, 1, {}, h);\n".format(current_reference, 8 * struct.stride); 264 | clean += "free({});\n".format(current_allocation) 265 | return ret, clean 266 | 267 | def generate_struct_reader(self, args): 268 | code = "" 269 | cleanup = "" 270 | arg_names = [] 271 | for i in range(len(args)): 272 | arg_names.append("arg_{}".format(i)) 273 | if args[i].size == 0: 274 | # this is an int 275 | code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "arg_{}".format(i)) + "\n" 276 | code += "fread(&arg_{}, 1, 8, h);\n".format(i) 277 | elif len(args[i].members) == 1: 278 | # this is a primitive pointer 279 | code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "temp_arg_{}".format(i)) + "\n" 280 | code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "*arg_{}".format(i))[:-1] + " = &temp_arg_{};\n".format(i) 281 | code += "fread(arg_{}, 1, 8, h);\n".format(i) 282 | else: 283 | cur = args[i] 284 | if isinstance(cur, Struct) and not cur.is_array: 285 | # struct 286 | code += "struct {}* arg_{};\n".format(cur.name, i) 287 | res, clean = self._do_read(cur, "arg_{}".format(i)) 288 | code += res 289 | cleanup += clean 290 | else: 291 | # array 292 | array_length = 8 293 | code += "char* {} = (char*)malloc({});\n".format(arg_names[-1], array_length + 1) 294 | code += "{}[{}] = 0;\n".format(arg_names[-1], array_length) 295 | code += "fread({}, 1, {}, h);\n".format(arg_names[-1], array_length) 296 | cleanup += "free({});\n".format(arg_names[-1]) 297 | return code, cleanup, ", ".join(arg_names) 298 | 299 | def generate_struct_reader(args): 300 | generator = Generator() 301 | return generator.generate_struct_reader(args) 302 | -------------------------------------------------------------------------------- /plugin/PCodeInterpreter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # @category: GEARSHIFT.internal 3 | 4 | from __future__ import print_function 5 | 6 | from ghidra.program.model.pcode import PcodeOp 7 | from ghidra.program.model.pcode import Varnode 8 | from ghidra.program.flatapi import FlatProgramAPI 9 | from Node import Node 10 | from Struct import Struct 11 | from ghidra.program.model.pcode import HighFunctionDBUtil 12 | from ghidra.program.model.symbol import SourceType 13 | from ghidra.program.model.data import Undefined 14 | from ghidra.app.cmd.function import ApplyFunctionSignatureCmd 15 | from ghidra.program.model.listing import AutoParameterImpl 16 | 17 | NODE_LIMIT = 1 18 | log = False 19 | 20 | forward_cache = {} 21 | backward_cache = {} 22 | highfunction_cache = {} 23 | 24 | # dictionary storing func: list of symbolic parameters the func is called with for each parameter 25 | # this is used to apply retyping in the future 26 | 27 | cycle_node = Node("CYCLE", None, None, 0) 28 | 29 | class PCodeInterpreter: 30 | def __init__(self): 31 | self.nodes = {} 32 | self.stores = [] 33 | self.loads = [] 34 | self.instruction = None 35 | self.cycle_exec = {} 36 | self.loop_variants = set() 37 | self.arrays = [] 38 | self.subcall_parameter_cache = {} 39 | 40 | def process(self, instruction, depth): 41 | opcode = instruction.getOpcode() 42 | output = instruction.getOutput() 43 | inputs = instruction.getInputs() 44 | self.depth = depth 45 | 46 | saved_instruction = self.instruction 47 | self.instruction = instruction 48 | 49 | if opcode == PcodeOp.INT_ADD: 50 | self.int_add(inputs, output) 51 | elif opcode == PcodeOp.INT_SDIV: 52 | self.int_sdiv(inputs, output) 53 | elif opcode == PcodeOp.INT_DIV: 54 | self.int_div(inputs, output) 55 | elif opcode == PcodeOp.INT_SREM: 56 | self.int_srem(inputs, output) 57 | elif opcode == PcodeOp.INT_REM: 58 | self.int_rem(inputs, output) 59 | elif opcode == PcodeOp.INT_RIGHT: 60 | self.int_right(inputs, output) 61 | elif opcode == PcodeOp.INT_SRIGHT: 62 | self.int_sright(inputs, output) 63 | elif opcode == PcodeOp.INT_LEFT: 64 | self.int_left(inputs, output) 65 | elif opcode == PcodeOp.INT_AND: 66 | self.int_and(inputs, output) 67 | elif opcode == PcodeOp.INT_SUB: 68 | self.int_sub(inputs, output) 69 | elif opcode == PcodeOp.INT_OR: 70 | self.int_or(inputs, output) 71 | elif opcode == PcodeOp.INT_XOR: 72 | self.int_xor(inputs, output) 73 | elif opcode == PcodeOp.INT_NEGATE: 74 | self.int_negate(inputs, output) 75 | elif opcode == PcodeOp.INT_EQUAL: 76 | self.int_equal(inputs, output) 77 | elif opcode == PcodeOp.INT_NOTEQUAL: 78 | self.int_notequal(inputs, output) 79 | elif opcode == PcodeOp.INT_LESS: 80 | self.int_less(inputs, output) 81 | elif opcode == PcodeOp.INT_LESSEQUAL: 82 | self.int_lessequal(inputs, output) 83 | elif opcode == PcodeOp.INT_SLESS: 84 | self.int_sless(inputs, output) 85 | elif opcode == PcodeOp.INT_SLESSEQUAL: 86 | self.int_slessequal(inputs, output) 87 | elif opcode == PcodeOp.INT_2COMP: 88 | self.int_2comp(inputs, output) 89 | elif opcode == PcodeOp.PTRSUB: 90 | self.ptrsub(inputs, output) 91 | elif opcode == PcodeOp.STORE: 92 | self.store(inputs, output) 93 | elif opcode == PcodeOp.LOAD: 94 | self.load(inputs, output) 95 | elif opcode == PcodeOp.SUBPIECE: 96 | self.subpiece(inputs, output) 97 | elif opcode == PcodeOp.PIECE: 98 | self.piece(inputs, output) 99 | elif opcode == PcodeOp.CAST: 100 | self.cast(inputs, output) 101 | elif opcode == PcodeOp.MULTIEQUAL: 102 | self.multiequal(inputs, output) 103 | elif opcode == PcodeOp.INT_SEXT: 104 | self.int_sext(inputs, output) 105 | elif opcode == PcodeOp.INT_ZEXT: 106 | self.int_zext(inputs, output) 107 | elif opcode == PcodeOp.INT_MULT: 108 | self.int_mult(inputs, output) 109 | elif opcode == PcodeOp.PTRADD: 110 | self.ptradd(inputs, output) 111 | elif opcode == PcodeOp.CALL: 112 | self.call(inputs, output) 113 | elif opcode == PcodeOp.CALLIND: 114 | self.callind(inputs, output) 115 | elif opcode == PcodeOp.COPY: 116 | self.copy(inputs, output) 117 | elif opcode == PcodeOp.INDIRECT: 118 | self.indirect(inputs, output) 119 | elif opcode == PcodeOp.RETURN: 120 | if len(inputs) >= 2: 121 | print("RETURN") 122 | print(self.lookup_node(inputs[1])) 123 | elif opcode == PcodeOp.CBRANCH: 124 | pass 125 | else: 126 | print("Unsupported Opcode:", instruction.getMnemonic(), inputs[0].getPCAddress()) 127 | 128 | self.instruction = saved_instruction 129 | 130 | def int_sdiv(self, inputs, output): 131 | assert len(inputs) == 2 and output is not None 132 | a = inputs[0] 133 | b = inputs[1] 134 | for i in self.lookup_node(a): 135 | for j in self.lookup_node(b): 136 | self.store_node(output, i.sdiv(j)) 137 | 138 | def int_div(self, inputs, output): 139 | assert len(inputs) == 2 and output is not None 140 | a = inputs[0] 141 | b = inputs[1] 142 | for i in self.lookup_node(a): 143 | for j in self.lookup_node(b): 144 | self.store_node(output, i.div(j)) 145 | 146 | def int_srem(self, inputs, output): 147 | assert len(inputs) == 2 and output is not None 148 | a = inputs[0] 149 | b = inputs[1] 150 | for i in self.lookup_node(a): 151 | for j in self.lookup_node(b): 152 | self.store_node(output, i.smod(j)) 153 | 154 | def int_rem(self, inputs, output): 155 | assert len(inputs) == 2 and output is not None 156 | a = inputs[0] 157 | b = inputs[1] 158 | for i in self.lookup_node(a): 159 | for j in self.lookup_node(b): 160 | self.store_node(output, i.mod(j)) 161 | 162 | def int_add(self, inputs, output): 163 | assert len(inputs) == 2 and output is not None 164 | a = inputs[0] 165 | b = inputs[1] 166 | if (a.isConstant() and b.isConstant()) or a.isConstant(): 167 | raise Exception("INT_ADD error") 168 | for i in self.lookup_node(a): 169 | for j in self.lookup_node(b): 170 | self.store_node(output, i.add(j)) 171 | 172 | def int_right(self, inputs, output): 173 | assert len(inputs) == 2 and output is not None 174 | a = inputs[0] 175 | b = inputs[1] 176 | for i in self.lookup_node(a): 177 | for j in self.lookup_node(b): 178 | self.store_node(output, i.shr(j)) 179 | 180 | def int_sright(self, inputs, output): 181 | assert len(inputs) == 2 and output is not None 182 | a = inputs[0] 183 | b = inputs[1] 184 | for i in self.lookup_node(a): 185 | for j in self.lookup_node(b): 186 | self.store_node(output, i.sshr(j)) 187 | 188 | def int_left(self, inputs, output): 189 | assert len(inputs) == 2 and output is not None 190 | a = inputs[0] 191 | b = inputs[1] 192 | for i in self.lookup_node(a): 193 | for j in self.lookup_node(b): 194 | self.store_node(output, i.shl(j)) 195 | 196 | def int_and(self, inputs, output): 197 | assert len(inputs) == 2 and output is not None 198 | a = inputs[0] 199 | b = inputs[1] 200 | for i in self.lookup_node(a): 201 | for j in self.lookup_node(b): 202 | self.store_node(output, i.bitwise_and(j)) 203 | 204 | def int_sub(self, inputs, output): 205 | assert len(inputs) == 2 and output is not None 206 | a = inputs[0] 207 | b = inputs[1] 208 | for i in self.lookup_node(a): 209 | for j in self.lookup_node(b): 210 | self.store_node(output, i.sub(j)) 211 | 212 | def int_or(self, inputs, output): 213 | assert len(inputs) == 2 and output is not None 214 | a = inputs[0] 215 | b = inputs[1] 216 | for i in self.lookup_node(a): 217 | for j in self.lookup_node(b): 218 | self.store_node(output, i.bitwise_or(j)) 219 | 220 | def int_negate(self, inputs, output): 221 | assert len(inputs) == 1 and output is not None 222 | a = inputs[0] 223 | for i in self.lookup_node(a): 224 | self.store_node(output, i.neg()) 225 | 226 | def int_xor(self, inputs, output): 227 | assert len(inputs) == 2 and output is not None 228 | a = inputs[0] 229 | b = inputs[1] 230 | for i in self.lookup_node(a): 231 | for j in self.lookup_node(b): 232 | self.store_node(output, i.bitwise_xor(j)) 233 | 234 | def int_equal(self, inputs, output): 235 | assert output is not None 236 | a = inputs[0] 237 | b = inputs[1] 238 | for i in self.lookup_node(a): 239 | for j in self.lookup_node(b): 240 | res = i.eq(j) 241 | if res.byte_length != output.getSize(): 242 | res = res.resize(output.getSize()) 243 | self.store_node(output, res) 244 | 245 | def int_notequal(self, inputs, output): 246 | assert output is not None 247 | a = inputs[0] 248 | b = inputs[1] 249 | for i in self.lookup_node(a): 250 | for j in self.lookup_node(b): 251 | res = i.neq(j) 252 | if res.byte_length != output.getSize(): 253 | res = res.resize(output.getSize()) 254 | self.store_node(output, res) 255 | 256 | def int_less(self, inputs, output): 257 | assert output is not None 258 | a = inputs[0] 259 | b = inputs[1] 260 | for i in self.lookup_node(a): 261 | for j in self.lookup_node(b): 262 | res = i.lt(j) 263 | if res.byte_length != output.getSize(): 264 | res = res.resize(output.getSize()) 265 | self.store_node(output, res) 266 | 267 | def int_lessequal(self, inputs, output): 268 | assert output is not None 269 | a = inputs[0] 270 | b = inputs[1] 271 | for i in self.lookup_node(a): 272 | for j in self.lookup_node(b): 273 | res = i.le(j) 274 | if res.byte_length != output.getSize(): 275 | res = res.resize(output.getSize()) 276 | self.store_node(output, res) 277 | 278 | def int_sless(self, inputs, output): 279 | assert output is not None 280 | a = inputs[0] 281 | b = inputs[1] 282 | for i in self.lookup_node(a): 283 | for j in self.lookup_node(b): 284 | res = i.slt(j) 285 | if res.byte_length != output.getSize(): 286 | res = res.resize(output.getSize()) 287 | self.store_node(output, res) 288 | 289 | def int_slessequal(self, inputs, output): 290 | assert output is not None 291 | a = inputs[0] 292 | b = inputs[1] 293 | for i in self.lookup_node(a): 294 | for j in self.lookup_node(b): 295 | res = i.sle(j) 296 | if res.byte_length != output.getSize(): 297 | res = res.resize(output.getSize()) 298 | self.store_node(output, res) 299 | 300 | def int_2comp(self, inputs, output): 301 | assert len(inputs) == 1 and output is not None 302 | for i in self.lookup_node(inputs[0]): 303 | self.store_node(output, i.neg()) 304 | 305 | def ptrsub(self, inputs, output): 306 | assert len(inputs) == 2 307 | assert output is not None 308 | a = inputs[0] 309 | b = inputs[1] 310 | if not b.isConstant(): 311 | raise Exception("PTRSUB error") 312 | for i in self.lookup_node(a): 313 | for j in self.lookup_node(b): 314 | self.store_node(output, i.add(j)) 315 | 316 | def store(self, inputs, output): 317 | assert len(inputs) == 3 318 | for i in self.lookup_node(inputs[1]): 319 | for j in self.lookup_node(inputs[2]): 320 | temp = i.ptr_deref() 321 | if temp.byte_length != j.byte_length: 322 | temp = temp.resize(j.byte_length) 323 | self.stores.append(temp) 324 | if log: 325 | print("[*]", "STORE:", inputs[0].getPCAddress(), temp) 326 | print("VALUE", self.lookup_node(inputs[2])) 327 | print("") 328 | 329 | def load(self, inputs, output): 330 | assert len(inputs) == 2 and output is not None 331 | for i in self.lookup_node(inputs[1]): 332 | value = i.ptr_deref() 333 | if value.byte_length != output.getSize(): 334 | value = value.resize(output.getSize()) 335 | self.store_node(output, value) 336 | self.loads.append(value) 337 | 338 | def subpiece(self, inputs, output): 339 | assert len(inputs) == 2 and output is not None 340 | for i in self.lookup_node(inputs[0]): 341 | for j in self.lookup_node(inputs[1]): 342 | value = i.shr(j.mult(Node(currentProgram.getAddressFactory().getConstantAddress(8), None, None, i.byte_length))) 343 | if value.byte_length != output.getSize(): 344 | value = value.resize(output.getSize()) 345 | self.store_node(output, value) 346 | 347 | def piece(self, inputs, output): 348 | assert len(inputs) == 2 and output is not None 349 | for i in self.lookup_node(inputs[0]): 350 | for j in self.lookup_node(inputs[1]): 351 | value = i.shl(Node(currentProgram.getAddressFactory().getConstantAddress(j.byte_length), None, None, i.byte_length)).add(j) 352 | if value.byte_length != output.getSize(): 353 | value = value.resize(output.getSize()) 354 | self.store_node(output, value) 355 | 356 | def cast(self, inputs, output): 357 | assert len(inputs) == 1 and output is not None 358 | for value in self.lookup_node(inputs[0]): 359 | assert value.byte_length == output.getSize() 360 | self.store_node(output, value) 361 | 362 | def multiequal(self, inputs, output): 363 | assert output is not None and len(inputs) >= 2 364 | possibilities = [] 365 | count = 0 366 | for i in inputs: 367 | result = self.lookup_node(i) 368 | for j in result: 369 | possibilities.append(j) 370 | self.store_node(output, j) 371 | self.loop_variants.add(output) 372 | 373 | def int_sext(self, inputs, output): 374 | assert output is not None and len(inputs) == 1 375 | for i in self.lookup_node(inputs[0]): 376 | self.store_node(output, i.resize(output.getSize())) 377 | 378 | def int_zext(self, inputs, output): 379 | assert output is not None and len(inputs) == 1 380 | for i in self.lookup_node(inputs[0]): 381 | self.store_node(output, i.resize(output.getSize())) 382 | 383 | def int_mult(self, inputs, output): 384 | assert output is not None and len(inputs) == 2 385 | a = inputs[0] 386 | b = inputs[1] 387 | for i in self.lookup_node(a): 388 | for j in self.lookup_node(b): 389 | result = i.mult(j) 390 | self.store_node(output, result) 391 | 392 | def ptradd(self, inputs, output): 393 | assert output is not None and len(inputs) == 3 394 | assert inputs[2].isConstant() and not inputs[0].isConstant() 395 | for a in self.lookup_node(inputs[0]): 396 | for b in self.lookup_node(inputs[1]): 397 | for c in self.lookup_node(inputs[2]): 398 | temp = b.mult(c) 399 | result = a.add(temp) 400 | assert output.getSize() == result.byte_length 401 | self.store_node(output, result) 402 | 403 | def callind(self, inputs, output): 404 | assert len(inputs) >= 1 405 | print("Warning: indirect call - skipping and returning 0") 406 | if output is not None: 407 | self.store_node(output, Node(Varnode(output.getAddress(), output.getSize()), None, None, output.getSize())) 408 | 409 | def call(self, inputs, output): 410 | assert len(inputs) >= 1 411 | # First we have to analyze function forward with input arguments 412 | # If output exists, then we have to analyze backwards to obtain ret value types 413 | pc_varnode = inputs[0] 414 | assert pc_varnode.isAddress() 415 | pc_addr = pc_varnode.getAddress() 416 | temp = FlatProgramAPI(currentProgram) 417 | called_func = temp.getFunctionAt(pc_addr) 418 | print("call:", inputs[0].getPCAddress()) 419 | 420 | ##### START CALL RECURSIVE FORWARD ANALYSIS 421 | 422 | # Note: the function analysis parameter's varnodes are DIFFERENT that the varnodes from our current state. Thus we replace the varnode -> Node map in the function with the calling parameters 423 | checkFixParameters(called_func, inputs[1:]) 424 | if called_func not in forward_cache: 425 | global log 426 | pci_new = PCodeInterpreter() 427 | parameter_varnodes = analyzeFunctionForward(called_func, pci_new) 428 | parameter_nodes = [] 429 | for i in parameter_varnodes: 430 | parameter_nodes.append(pci_new.lookup_node(i)[0]) 431 | forward_cache[called_func] = (pci_new.stores, pci_new.loads, parameter_nodes, pci_new.arrays, pci_new.subcall_parameter_cache) 432 | log = False 433 | 434 | stores, loads, parameter_node_objects, arrs, nested_subcall_parameter_cache = forward_cache[called_func] 435 | input_node_objects = map(self.lookup_node, inputs[1:]) 436 | if called_func not in self.subcall_parameter_cache: 437 | param_list = [] 438 | for i in range(called_func.getParameterCount()): 439 | param_list.append([]) 440 | self.subcall_parameter_cache[called_func] = param_list 441 | 442 | node_objects = map(self.lookup_node, inputs[1:]) 443 | for i in range(len(self.subcall_parameter_cache[called_func])): 444 | self.subcall_parameter_cache[called_func][i] += node_objects[i] 445 | 446 | for i in stores: 447 | arg_idx = i.find_base_idx(parameter_node_objects) 448 | if arg_idx is not None: 449 | for j in node_objects[arg_idx]: 450 | self.stores.append(i.replace_base_parameters(parameter_node_objects, j)) 451 | if i in arrs: 452 | self.arrays.append(self.stores[-1]) 453 | for i in loads: 454 | arg_idx = i.find_base_idx(parameter_node_objects) 455 | if arg_idx is not None: 456 | for j in node_objects[arg_idx]: 457 | self.loads.append(i.replace_base_parameters(parameter_node_objects, j)) 458 | if i in arrs: 459 | self.arrays.append(self.loads[-1]) 460 | 461 | ##### END CALL RECURSIVE FORWARD ANALYSIS 462 | 463 | # replace args in parameter cache: 464 | for func_name in nested_subcall_parameter_cache: 465 | current_params = nested_subcall_parameter_cache[func_name] 466 | for param_idx in range(len(current_params)): 467 | for temp in current_params[param_idx]: 468 | arg_idx = temp.find_base_idx(parameter_node_objects) 469 | if arg_idx is not None: 470 | for j in node_objects[arg_idx]: 471 | replaced = temp.replace_base_parameters(parameter_node_objects, j) 472 | if func_name not in self.subcall_parameter_cache: 473 | param_list = [] 474 | for i in range(func_name.getParameterCount()): 475 | param_list.append([]) 476 | self.subcall_parameter_cache[func_name] = param_list 477 | if arg_idx < len(self.subcall_parameter_cache[func_name]): 478 | self.subcall_parameter_cache[func_name][arg_idx].append(replaced) 479 | 480 | if output is not None: 481 | if called_func not in backward_cache: # This means we want to backwards interpolate the return type 482 | ##### START CALL RECURSIVE BACKWARDS ANALYSIS 483 | 484 | checkFixReturn(called_func, output) 485 | pci_new = PCodeInterpreter() 486 | ret_type, subfunc_parameter_varnodes = analyzeFunctionBackward(called_func, pci_new) 487 | backward_cache[called_func] = (ret_type, map(pci_new.lookup_node, subfunc_parameter_varnodes)) 488 | 489 | ##### END CALL RECURSIVE BACKWARDS ANALYSIS 490 | 491 | ret_type, subfunc_parameter_node_objs = backward_cache[called_func] 492 | replaced_rets = [] 493 | for a in ret_type: 494 | for i in a: 495 | arg_idx = i.find_base_idx(subfunc_parameter_node_objs) 496 | if arg_idx is None: 497 | node_objects = [1] # Doesn't matter 498 | else: 499 | node_objects = self.lookup_node(inputs[1:][arg_idx]) 500 | for j in node_objects: 501 | replaced_rets.append(i.replace_base_parameters(subfunc_parameter_node_objs, j)) 502 | 503 | for i in range(len(replaced_rets)): 504 | self.store_node(output, replaced_rets[i]) 505 | 506 | def copy(self, inputs, output): 507 | assert len(inputs) == 1 and output is not None 508 | for result in self.lookup_node(inputs[0]): 509 | self.store_node(output, result) 510 | 511 | def indirect(self, inputs, output): 512 | for value in self.lookup_node(inputs[0]): 513 | assert value.byte_length == output.getSize() 514 | self.store_node(output, value) 515 | 516 | # maps a Ghidra Varnode object to a binary tree object that represents its expression 517 | def lookup_node(self, varnode): 518 | # Detect cycle 519 | if varnode in self.cycle_exec: 520 | self.cycle_exec[varnode] += 1 521 | if varnode in self.cycle_exec and self.cycle_exec[varnode] > 0: 522 | if varnode not in self.nodes: 523 | self.store_node(varnode, Node(("CYCLE", varnode), None, None, varnode.getSize())) 524 | return self.nodes[varnode] 525 | if varnode.isConstant(): 526 | # create constant node 527 | return [Node(varnode, None, None, varnode.getSize())] 528 | elif varnode.isAddress(): 529 | return [Node(varnode, None, None, varnode.getSize())] 530 | elif varnode not in self.nodes or varnode in self.cycle_exec: 531 | # We have to detect cycles here, by temporarily storing "CYCLE", and if the returned value is "CYCLE", we know there is cycle 532 | if varnode not in self.cycle_exec: 533 | self.cycle_exec[varnode] = 0 534 | 535 | self.get_node_definition(varnode) 536 | 537 | if self.cycle_exec[varnode] == 0: 538 | del self.cycle_exec[varnode] 539 | 540 | return self.lookup_node(varnode) 541 | 542 | # Prune 543 | if len(self.nodes[varnode]) > NODE_LIMIT: 544 | self.nodes[varnode] = self.nodes[varnode][:NODE_LIMIT] 545 | return self.nodes[varnode] 546 | 547 | # recursively backwards traces for node's definition 548 | def get_node_definition(self, varnode): 549 | defining_instruction = varnode.getDef() 550 | if defining_instruction is None: 551 | print("WARNING: Orphaned varnode? - assuming multiequal analyzation error and skipping") 552 | self.nodes[varnode] = [Node("ORPHANED", None, None, varnode.getSize())] 553 | return 554 | self.process(defining_instruction, -1) 555 | 556 | # stores mapping between Ghidra varnode and binary tree obj 557 | def store_node(self, varnode, nodeobj): 558 | if varnode not in self.nodes: 559 | self.nodes[varnode] = [] 560 | if hash(nodeobj) not in map(hash, self.nodes[varnode]): 561 | self.nodes[varnode].append(nodeobj) 562 | 563 | def get_highfunction(func): 564 | if func not in highfunction_cache: 565 | decompileResults = decompInterface.decompileFunction(func, 30, monitor) 566 | if decompileResults.decompileCompleted(): 567 | hf = decompileResults.getHighFunction() 568 | highfunction_cache[func] = hf 569 | return hf 570 | else: 571 | return highfunction_cache[func] 572 | 573 | def checkFixParameters(func, parameters): 574 | hf = get_highfunction(func) 575 | HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT) 576 | # reload cache 577 | del highfunction_cache[func] 578 | hf = get_highfunction(func) 579 | 580 | # Check arguments 581 | func_proto = hf.getLocalSymbolMap() 582 | if func_proto.getNumParams() != len(parameters) and not func.hasVarArgs(): 583 | print(func, "call signature wrong...") 584 | raise Exception("Function call signature different") 585 | 586 | argument_varnodes = [] 587 | for i in range(func_proto.getNumParams()): 588 | cur = func_proto.getParam(i).getRepresentative() 589 | if cur.getSize() != parameters[i].getSize(): 590 | print("i: %d, cur.getSize: %d, parameters[i].getSize(): %d" % (i, cur.getSize(), parameters[i].getSize())) 591 | raise Exception("Func parameter size mismatch") 592 | 593 | # Make sure func signature matches the call 594 | def checkFixReturn(func, ret_varnode): 595 | hf = get_highfunction(func) 596 | 597 | # Check return types 598 | for i in hf.getPcodeOps(): 599 | if i.getOpcode() == PcodeOp.RETURN: 600 | if len(i.getInputs()) < 2: 601 | print(func, "has no return value, fixing type...", i.getInputs()[0].getPCAddress()) 602 | sig = func.getSignature() 603 | sig.setReturnType(Undefined.getUndefinedDataType(ret_varnode.getSize())) 604 | ApplyFunctionSignatureCmd(func.getEntryPoint(), sig, SourceType.USER_DEFINED).applyTo(currentProgram) 605 | 606 | # This function performs backwards analysis on the function return type with base case of function parameters 607 | # init_param replaces the parameters of the current func to be analyzed in terms the passed parameter expressions 608 | def analyzeFunctionBackward(func, pci, init_param=None): 609 | print("Backwards analysis", func.getName()) 610 | 611 | hf = get_highfunction(func) 612 | HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT) 613 | 614 | func_proto = hf.getLocalSymbolMap() 615 | # Grab return varnodes 616 | return_varnodes = [] 617 | for i in hf.getPcodeOps(): 618 | if i.getOpcode() == PcodeOp.RETURN: 619 | if len(i.getInputs()) >= 2: 620 | return_varnodes.append(i.getInputs()[1]) 621 | 622 | # Grab argument varnodes as base case 623 | argument_varnodes = [] 624 | for i in range(func_proto.getNumParams()): 625 | argument_varnodes.append(func_proto.getParam(i).getRepresentative()) 626 | 627 | # Sets argument as base cases 628 | for arg in range(len(argument_varnodes)): 629 | if init_param is None: 630 | pci.store_node(argument_varnodes[arg], Node("ARG" + str(arg), None, None, argument_varnodes[arg].getSize())) 631 | else: 632 | pci.store_node(argument_varnodes[arg], init_param[arg]) 633 | 634 | return_types = [] 635 | for i in return_varnodes: 636 | result = pci.lookup_node(i) 637 | return_types.append(result) 638 | return return_types, argument_varnodes 639 | 640 | def traverseForward(cur, depth, pci, visited): 641 | if cur is None: 642 | return 643 | children = cur.getDescendants() 644 | for child in children: 645 | pci.process(child, depth) 646 | if child.getOutput() is not None and child.getOutput() not in visited: 647 | visited.add(child.getOutput()) 648 | traverseForward(child.getOutput(), depth + 1, pci, visited) 649 | 650 | # This function performs forward analysis on function parameters to determine its type (struct, array, or primitive) 651 | def analyzeFunctionForward(func, pci): 652 | print("Forwards analysis", func.getName()) 653 | hf = get_highfunction(func) 654 | HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT) 655 | print(func.getParameters()) 656 | 657 | # get the varnode of function parameters 658 | func_proto = hf.getLocalSymbolMap() 659 | argument_varnodes = [] 660 | argument_nodes = [] 661 | for i in range(func_proto.getNumParams()): 662 | argument_varnodes.append(func_proto.getParam(i).getRepresentative()) 663 | argument_nodes.append(Node("ARG" + str(i), None, None, argument_varnodes[i].getSize())) 664 | 665 | hash_list = set() 666 | 667 | for a in range(2): 668 | print("Loop variants", map(id, pci.loop_variants)) 669 | 670 | variant_vals = [] 671 | new_nodes = {} 672 | 673 | for i in pci.loop_variants: 674 | new_nodes[i] = pci.nodes[i] 675 | del pci.nodes[i] 676 | visited = set() 677 | 678 | pci.nodes = new_nodes 679 | 680 | for arg in range(len(argument_varnodes)): 681 | pci.store_node(argument_varnodes[arg], argument_nodes[arg]) 682 | 683 | # recursively traverse the varnode descendants to get reaching definitions 684 | for i in argument_varnodes: 685 | traverseForward(i, 0, pci, visited) 686 | 687 | if a == 0: 688 | for i in pci.stores + pci.loads: 689 | hash_list.add(hash(i)) 690 | continue 691 | 692 | temp = pci.stores + pci.loads 693 | 694 | for i in range(len(temp))[::-1]: 695 | if hash(temp[i]) not in hash_list: 696 | pci.arrays.append(temp[i]) 697 | print("FOUND ARRAY!") 698 | 699 | return argument_varnodes 700 | --------------------------------------------------------------------------------