├── .gitignore
├── examples
    ├── Makefile
    └── simple.c
├── LICENSE
├── README.md
└── plugin
    ├── Harness.py
    ├── go.py
    ├── Node.py
    ├── Struct.py
    └── PCodeInterpreter.py


/.gitignore:
--------------------------------------------------------------------------------
1 | examples/simple
2 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | all: simple simple.so
 2 | .PHONY: all
 3 | 
 4 | simple: simple.c
 5 | 	$(CC) -O0 -DINCLUDE_MAIN -o simple simple.c
 6 | 	strip simple
 7 | 
 8 | simple.so: simple.c
 9 | 	$(CC) -shared -O0 -o simple.so simple.c
10 | 	strip simple.so
11 | 


--------------------------------------------------------------------------------
/examples/simple.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <sys/types.h>
 3 | 
 4 | typedef struct {
 5 |   int first;
 6 |   int second;
 7 | } pair;
 8 | 
 9 | typedef struct {
10 |   int myint;
11 |   char mychar;
12 |   size_t mysize;
13 |   pair mypair;
14 |   pair *pairptr;
15 | } grabbag;
16 | 
17 | void fill_pair(pair *pairptr) {
18 |   pairptr->first = rand();
19 |   pairptr->second = 7;
20 | }
21 | 
22 | int initgrabbag(grabbag *bag) {
23 |   bag->pairptr = malloc(sizeof(bag->pairptr));
24 |   fill_pair(&bag->mypair);
25 |   fill_pair(bag->pairptr);
26 |   bag->myint = 2;
27 |   bag->mychar = 7;
28 |   bag->mysize = 8;
29 |   return bag->myint;
30 | }
31 | 
32 | #ifdef INCLUDE_MAIN
33 | int main() {
34 |   grabbag bag;
35 | 
36 |   initgrabbag(&bag);
37 | }
38 | #endif
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Unless otherwise marked, this license applies to all code in this repository.
 2 | 
 3 | University of Illinois/NCSA Open Source License (UIUC license)
 4 | Copyright (c) 2020 Grimm. All rights reserved.
 5 | 
 6 | 
 7 | Developed by: Software Security Group
 8 | Grimm
 9 | https://grimm-co.com
10 | 
11 | Permission is hereby granted, free of charge, to any person obtaining a copy of
12 | this software and associated documentation files (the "Software"), to deal with
13 | the Software without restriction, including without limitation the rights to
14 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 | the Software, and to permit persons to whom the Software is furnished to do so,
16 | subject to the following conditions:
17 | 
18 | - Redistributions of source code must retain the above copyright notice, this
19 |   list of conditions and the following disclaimers.
20 | - Redistributions in binary form must reproduce the above copyright notice,
21 |   this list of conditions and the following disclaimers in the documentation
22 |   and/or other materials provided with the distribution.
23 | - Neither the names of Grimm, nor the names of its contributors may be used to
24 |   endorse or promote products derived from this Software without specific prior
25 |   written permission.
26 | 
27 | 
28 | SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
29 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
30 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR
31 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
32 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GEARSHIFT
 2 | GEARSHIFT is a tool that performs structure recovery for a specified function
 3 | within a stripped binary.  It also generates a fuzz harness that can be used
 4 | to call functions in a shared object (.so) or dynamically linked library (.dll)
 5 | file.
 6 | 
 7 | The name comes from it leveraging a mix of reverse and forward engineering.
 8 | 
 9 | ## Installation
10 | 
11 | To install the Ghidra script, copy the python files to one of your Ghidra
12 | script directories:
13 | 
14 | 1. In Ghidra, open the Script Manager (Window > Script Manager)
15 | 2. Click the "Script Directories" button to view the list of directories
16 | 3. Note the name of a directory. If there isn't one you can edit, add a new directory.
17 | 4. Copy all the python files in `plugin/` to the chosen directory.
18 | 5. Click the "Refresh Script List" button. The scripts should appear in the GEARSHIFT folder in the Script Manager.
19 | 
20 | ## Usage
21 | 
22 | 1. Select a function whose arguments you want to analyze.
23 | 2. From the Script Manager, under GEARSHIFT, select go.py and click Run.
24 | 3. Any structs that are identified from the arguments of the function will be
25 |    defined in Data Type Manager under $binary_name > struct.
26 | 4. The script will generate harness code and print out the names of the files
27 |    it generated
28 | 5. Compile the harness (must be compiled with `-ldl` flag for shared objects)
29 | 6. Run the harness, passing it the file name of your input file as the only
30 | argument
31 | 
32 | ## Example Programs
33 | 
34 | The `example/` directory contains example programs that can be used to try out
35 | the tool. Compile the example programs as follows:
36 | ```
37 | $ cd example
38 | $ make
39 | ```
40 | 
41 | ## Limitations
42 | The harnesses generated by GEARSHIFT currently depend on the `LoadLibrary` and
43 | `dlopen` functions, which are unable to load executable files. If your target
44 | is an executable rather than a shared library, you may need to write your own
45 | harness, but you can use the generated code to create the input datastructure.
46 | 
47 | If your target is an ELF executable, you may be able to fool `dlopen` into
48 | loading your binary by removing the PIE flag. The LIEF Project (versions >= 0.11.0)
49 | can be used to do so [as described
50 | here](https://lief.quarkslab.com/doc/latest/tutorials/08_elf_bin2lib.html#warning-for-glic-2-29-users).
51 | However, this may completely break your binary, depending on what relocations
52 | and other loader features it uses.
53 | 
54 | ## Leveraged technologies
55 | The current tool is implemented as a Ghidra script. It leverages Ghidra's
56 | intermediate language and data dependency analysis to discover struct fields,
57 | and outputs its results to the Ghidra Data Type Manager. See
58 | [the associated blog post](https://blog.grimm-co.com/2020/11/automated-struct-identification-with.html)
59 | for more information.
60 | 
61 | ## References of interest:
62 | 
63 | - http://conferences.sigcomm.org/sigcomm/2010/papers/apsys/p13.pdf
64 | - https://pdfs.semanticscholar.org/1600/f73baa952cdf433f0ed6333815d3668f8f24.pdf
65 | - https://research.cs.wisc.edu/wpis/papers/cc04.pdf
66 | 
67 | 


--------------------------------------------------------------------------------
/plugin/Harness.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # @category: GEARSHIFT.internal
 3 | 
 4 | linux_template = r"""#include <stdio.h>
 5 | #include <stdlib.h>
 6 | #include <dlfcn.h>
 7 | #include <stdint.h>
 8 | 
 9 | {structs}
10 | 
11 | typedef int(*func)(void* a, ...);
12 | 
13 | int main(int argc, char** argv) {{
14 |         if (argc < 2) {{
15 |                 if (argc < 1) {{
16 |                         printf("Usage: ./gearshift_harness_linux input_file\n");
17 |                 }} else {{
18 |                         printf("Usage: %s input_file\n", argv[0]);
19 |                 }}
20 |                 printf("\n");
21 |                 printf("\tinput_file - data to put into the arguments\n");
22 |                 printf("\n");
23 |                 return 1;
24 |         }}
25 | 	void* handle = dlopen("{process_path}", RTLD_LAZY);
26 |         if (handle == NULL) {{
27 |                 printf("Unable to open {process_path}. Exiting.\n");
28 |                 return 2;
29 |         }}
30 | 	// In glibc, the handle points to the library base address
31 | 	char* base = *((char**)handle);
32 | 	func f = (func)(base + 0x{func_offset:x});
33 | 
34 | 	FILE* h = fopen(argv[1], "r");
35 |         if (h == NULL) {{
36 |                 printf("Unable to open %s. Exiting.\n", argv[1]);
37 |                 return 3;
38 |         }}
39 | 
40 | {code}
41 | 
42 | 	int res = f((void*){args});
43 | 
44 | {cleanup}
45 | 
46 | 	printf("Result: %d\n", res);
47 | }}
48 | """
49 | 
50 | windows_template = r"""#include <stdio.h>
51 | #include <stdint.h>
52 | #include <stdlib.h>
53 | #include <windows.h>
54 | 
55 | {structs}
56 | 
57 | typedef int(*func)(void* a, ...);
58 | 
59 | int main(int argc, char** argv) {{
60 |         if (argc < 2) {{
61 |                 if (argc < 1) {{
62 |                         printf("Usage: gearshift_harness_windows input_file\n");
63 |                 }} else {{
64 |                         printf("Usage: %s input_file\n", argv[0]);
65 |                 }}
66 |                 printf("\n");
67 |                 printf("\tinput_file - data to put into the arguments\n");
68 |                 printf("\n");
69 |                 return 1;
70 |         }}
71 | 	HMODULE lib = LoadLibraryA("{process_path}");
72 | 	if (!lib) {{
73 | 		printf("Load Library failed: %d\n", GetLastError());
74 | 		exit(1);
75 | 	}}
76 | 
77 | 	// On Windows, the handle is the library base address
78 | 	char* base = (char*)lib;
79 | 	func f = (func)(base + 0x{func_offset:x});
80 | 
81 | 	FILE* h;
82 | 	fopen_s(&h, argv[1], "r");
83 | 
84 | {code}
85 | 
86 | 	int res = f((void*){args});
87 | 
88 | {cleanup}
89 | 
90 | 	printf("Result: %d\n", res);
91 | }}
92 | """
93 | 
94 | def generate_linux_harness(struct_defs, ppath, func_off, code, cleanup, args):
95 | 	return linux_template.format(structs=struct_defs, process_path=ppath, func_offset=func_off, code="\t" + code.replace("\n", "\n\t"), cleanup="\t" + cleanup.replace("\n", "\n\t"), args=args)
96 | 
97 | def generate_windows_harness(struct_defs, ppath, func_off, code, cleanup, args):
98 | 	return windows_template.format(structs=struct_defs, process_path=ppath, func_offset=func_off, code="\t" + code.replace("\n", "\n\t"), cleanup="\t" + cleanup.replace("\n", "\n\t"), args=args)
99 | 


--------------------------------------------------------------------------------
/plugin/go.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # GEARSHIFT struct identifier
  3 | # @category: GEARSHIFT
  4 | 
  5 | from __future__ import print_function
  6 | import time
  7 | import os.path
  8 | 
  9 | from ghidra.app.decompiler import *
 10 | from ghidra.program.model import address
 11 | from ghidra.program.model.pcode import PcodeOp
 12 | from ghidra.program.model.data import Undefined
 13 | from ghidra.program.model.symbol import SourceType
 14 | from ghidra.program.flatapi import FlatProgramAPI
 15 | from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
 16 | from ghidra.program.model.pcode import HighFunctionDBUtil
 17 | 
 18 | import PCodeInterpreter
 19 | import Node
 20 | import Struct
 21 | from Harness import *
 22 | 
 23 | # Global config
 24 | ARCH_BITS = currentProgram.getDefaultPointerSize() * 8
 25 | 
 26 | decompInterface = ghidra.app.decompiler.DecompInterface()
 27 | decompInterface.openProgram(currentProgram)
 28 | PCodeInterpreter.decompInterface = decompInterface
 29 | PCodeInterpreter.monitor = monitor
 30 | PCodeInterpreter.currentProgram = currentProgram
 31 | PCodeInterpreter.ARCH_BITS = ARCH_BITS
 32 | Node.ARCH_BITS = ARCH_BITS
 33 | Struct.ARCH_BITS = ARCH_BITS
 34 | Struct.struct_counter = 0
 35 | Struct.currentProgram = currentProgram
 36 | 
 37 | """
 38 | NOTES on interprocedural analysis
 39 | There are two major types of analysis we want to do: FORWARD and BACKWARD
 40 | - To identify the struct usage of a parameter to a function, we do FORWARD analysis on the passed parameter
 41 | - To identify the types of the fields stored into a member of a parameter, we do BACKWARDs analysis on the value stored
 42 | - To identify the types of the fields loaded from a member of a parameter struct, we do FORWARD analysis on the loaded value
 43 | - When a stored value is derived from another function call, we must perform backwards analysis on all the return value of that function
 44 | - When a loaded value is passed into another function call, we must perform forwards analysis on that parameter to determine its struct type
 45 | Example backward analysis: https://www.riverloopsecurity.com/blog/2019/05/pcode/
 46 | 
 47 | NOTES on caching
 48 | We should never run forward analysis on the same function twice. This is because we should already know the loads and stores performed on the argument after running it once.
 49 | For backward analysis, we are able to cache the results of the first run by using placeholder parameter inputs, and the return types based on these placeholders. Therefore, the next run, we just DFS to replace all the placeholder inputs with our actual parameters and then we have obtained the return type.
 50 | 
 51 | TODO: test recursive function analysis
 52 | 
 53 | To identify arrays, we use the idea of loop variants. A loop variant is the output from a multiequal pcode op. When running analysis multiple times with different loop variant initial conditions, the loop variant changes each run. The loads or stores that change are likely array loads and stores. Using the differences in struct accesses, we can infer which ones are arrays, and the stride of the array.
 54 | """
 55 | 
 56 | start = time.time()
 57 | 
 58 | # get current function
 59 | listing = currentProgram.getListing()
 60 | currentFunction = listing.getFunctionContaining(currentAddress)
 61 | entryPoint = currentFunction.getEntryPoint()
 62 | base_address = currentProgram.getImageBase().getOffset()
 63 | function_offset = entryPoint.getOffset() - currentProgram.getImageBase().getOffset()
 64 | program_path = currentProgram.getExecutablePath()
 65 | 
 66 | pci = PCodeInterpreter.PCodeInterpreter()
 67 | pci.currentProgram = currentProgram
 68 | argument_varnodes = PCodeInterpreter.analyzeFunctionForward(currentFunction, pci)
 69 | 
 70 | important_stores = []
 71 | important_loads = []
 72 | argument_node_objs = []
 73 | for i in argument_varnodes:
 74 | 	argument_node_objs += pci.lookup_node(i)
 75 | argument_structs = [None] * len(argument_varnodes)
 76 | 
 77 | for i in pci.stores:
 78 | 	if i.contains(argument_node_objs):
 79 | 		important_stores.append(i)
 80 | for i in pci.loads:
 81 | 	if i.contains(argument_node_objs):
 82 | 		important_loads.append(i)
 83 | 
 84 | print("Start creating struct")
 85 | 
 86 | args = []
 87 | for i in range(len(argument_structs)):
 88 | 	args.append(Struct.Struct(0))
 89 | 
 90 | used_hash = set()
 91 | used_expressions = []
 92 | for i in (important_stores + important_loads):
 93 | 	simplified = i.simplify()
 94 | 	if hash(simplified) in used_hash:
 95 | 		continue
 96 | 	try:
 97 | 		substruct, offset, grand = simplified.create_struct(args, simplified.byte_length)
 98 | 		if i in pci.arrays and not grand[0].is_array:
 99 | 			grand[0].make_array()
100 | 		used_expressions.append(simplified)
101 | 		used_hash.add(hash(simplified))
102 | 	except ValueError as e:
103 | 		print(e)
104 | 
105 | print("Done interpolating structs")
106 | 
107 | # Apply data type to original function
108 | orig_params = currentFunction.getParameters()
109 | assert len(orig_params) == len(args)
110 | struct_code = []
111 | for i in range(len(args)):
112 | 	code = args[i].pretty_print()
113 | 	struct_code.append(code)
114 | 	print(code)
115 | 	dt = args[i].get_dtype()
116 | 	print(dt)
117 | 	orig_params[i].setDataType(dt, SourceType.USER_DEFINED)
118 | 
119 | # Apply data types to subcall functions
120 | for i in range(currentFunction.getParameterCount()):
121 | 	used_hash.add(hash("ARG{}".format(i)))
122 | for func in pci.subcall_parameter_cache:
123 | 	params = pci.subcall_parameter_cache[func]
124 | 	for param_idx in range(len(params)):
125 | 		seen = set()
126 | 		for j in params[param_idx]:
127 | 			simplified = j.simplify()
128 | 			h = hash(simplified)
129 | 			if h in used_hash and h not in seen:
130 | 				seen.add(h)
131 | 				arg_idx = simplified.find_base_idx2()
132 | 				t, off = simplified.traverse_struct(args[arg_idx])
133 | 				if isinstance(t, Struct.Struct):
134 | 					print("Applying type {} to function {} parameter {}".format(t.name, func, param_idx))
135 | 					func.getParameters()[param_idx].setDataType(t.get_dtype(), SourceType.USER_DEFINED)
136 | 
137 | code, cleanup, arg_names = Struct.generate_struct_reader(args)
138 | struct_defs = "".join(struct_code)
139 | 
140 | linux_harness = generate_linux_harness(struct_defs, program_path, function_offset, code, cleanup, arg_names)
141 | windows_harness = generate_windows_harness(struct_defs, program_path, function_offset, code, cleanup, arg_names)
142 | 
143 | linux_filename = os.path.abspath('gearshift_harness_linux.c')
144 | windows_filename = os.path.abspath('gearshift_harness_windows.c')
145 | 
146 | print("writing linux harness to", linux_filename)
147 | with open(linux_filename, 'w') as harness:
148 |     harness.write(linux_harness)
149 | print("writing windows harness to", windows_filename)
150 | with open(windows_filename, 'w') as harness:
151 |     harness.write(windows_harness)
152 | 
153 | end = time.time()
154 | print("DONE - Took:", (end - start))
155 | 


--------------------------------------------------------------------------------
/plugin/Node.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # @category: GEARSHIFT.internal
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | from Struct import Struct
  7 | from ghidra.program.model.pcode import Varnode
  8 | 
  9 | # Abstract binary operation tree that stores the symbolic expression
 10 | class Node:
 11 | 	def __init__(self, operation, left, right, byte_length):
 12 | 		self.left = left
 13 | 		self.right = right
 14 | 		self.operation = operation
 15 | 		self.byte_length = byte_length
 16 | 
 17 | 	def traverse_struct(self, struct):
 18 | 		if self.is_leaf() and str(self.operation).startswith("ARG"):
 19 | 			return struct, 0
 20 | 		elif self.operation == "+":
 21 | 			assert isinstance(self.left, Node)
 22 | 			res, off = self.left.traverse_struct(struct)
 23 | 			return res, off + self.right.operation.getOffset()
 24 | 		elif self.operation == "*()":
 25 | 			assert isinstance(self.left, Node)
 26 | 			res, off = self.left.traverse_struct(struct)
 27 | 			return res.get2(off), 0
 28 | 		elif self.operation == "RESIZE":
 29 | 			return self.left.traverse_struct(struct)
 30 | 		else:
 31 | 			print("Not yet supported", self.operation)
 32 | 			raise ValueError("Not yet supported")
 33 | 
 34 | 	# (Object reference, reference offset, (Grandparent struct, grandparent offset))
 35 | 	# Creates the struct specified in arg_struct_list
 36 | 	# The intuition is that when we encounter a pointer, we also hold a pointer to that pointer (grandparent). Therefore if a pointer is dereferenced and that pointer is not yet marked a struct, then we use grandparent to change it into a struct
 37 | 	# Otherwise, we just keep track of the current offsets into the current struct and recursive base case is the argument struct.
 38 | 	def create_struct(self, arg_struct_list, parent_byte_length):
 39 | 		if self.is_leaf() and str(self.operation).startswith("ARG"):
 40 | 			arg_idx = int(self.operation[3:])
 41 | 			return (arg_struct_list[arg_idx], 0, None)
 42 | 		elif self.operation == "+":
 43 | 			assert isinstance(self.left, Node)
 44 | 			sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length)
 45 | 			if isinstance(self.right, Node):
 46 | 				if not isinstance(self.right.operation, Varnode) or not self.right.operation.isConstant():
 47 | 					raise ValueError("Complex expression, skipping")
 48 | 				if self.right.operation.getOffset() & (1 << (self.right.operation.getSize() * 8 - 1)) != 0:
 49 | 					raise ValueError("Negative constaints not supported yet")
 50 | 				offset += self.right.operation.getOffset()
 51 | 			else:
 52 | 				if not self.right.isConstant():
 53 | 					print("Non constant indexed detected: Possible array?")
 54 | 				else:
 55 | 					raise Exception("Shouldn't happen")
 56 | 			return (sub_struct, offset, grand)
 57 | 		elif self.operation == "*()":
 58 | 			assert isinstance(self.left, Node)
 59 | 			sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length)
 60 | 			if not isinstance(sub_struct, Struct):
 61 | 				temp = Struct(offset + parent_byte_length)
 62 | 				grand[0].insert(grand[1], (temp, ARCH_BITS / 8))
 63 | 				sub_struct, offset, grand = self.left.create_struct(arg_struct_list, self.byte_length)
 64 | 				sub_struct = temp
 65 | 			sub_struct.extend(offset + parent_byte_length)
 66 | 			if sub_struct.get(offset)[1] == 1:
 67 | 				sub_struct.insert(offset, (0, parent_byte_length))
 68 | 			return (sub_struct.get(offset)[0], 0, (sub_struct, offset))
 69 | 		elif self.operation == "RESIZE":
 70 | 			return self.left.create_struct(arg_struct_list, self.byte_length)
 71 | 		else:
 72 | 			print("Not yet supported", self.operation)
 73 | 			raise ValueError("Not yet supported")
 74 | 
 75 | 	def __str__(self):
 76 | 		if self.is_leaf():
 77 | 			return str(self.operation)
 78 | 		elif self.operation == "*()":
 79 | 			return "*({})".format(str(self.left))
 80 | 		elif self.operation == "RESIZE":
 81 | 			return "(uint{}_t)({})".format(self.byte_length * 8, str(self.left))
 82 | 		elif self.operation == "~":
 83 | 			return "~({})".format(str(self.left))
 84 | 		else:
 85 | 			return str(self.left) + " " + self.operation + " " + str(self.right)
 86 | 
 87 | 	def __repr__(self):
 88 | 		return '"' + self.__str__() + '"'
 89 | 
 90 | 	def __hash__(self):
 91 | 		ret = hash(str(self))
 92 | 		return ret
 93 | 
 94 | 	def relevant(self):
 95 | 		good = self.operation in ("+", "*()", "RESIZE", "*") or (self.is_leaf() and str(self.operation).startswith("ARG")) or (isinstance(self.operation, Varnode) and self.operation.isConstant()) or self.is_varnode_constant()
 96 | 		if isinstance(self.left, Node):
 97 | 			good = good and self.left.relevant()
 98 | 		if isinstance(self.right, Node):
 99 | 			good = good and self.right.relevant()
100 | 		return good
101 | 
102 | 	def contains(self, nodes):
103 | 		if self is None:
104 | 			return False
105 | 		return self in nodes or (isinstance(self.left, Node) and self.left.contains(nodes)) or (isinstance(self.right, Node) and self.right.contains(nodes))
106 | 
107 | 	def find_base_idx2(self):
108 | 		if self.is_leaf() and str(self.operation).startswith("ARG"):
109 | 			return int(str(self.operation).split("ARG")[1])
110 | 		res = None
111 | 		if isinstance(self.left, Node) and res is None:
112 | 			res = self.left.find_base_idx2()
113 | 		if isinstance(self.right, Node) and res is None:
114 | 			res = self.right.find_base_idx2()
115 | 		return res
116 | 
117 | 	def find_base_idx(self, old_params):
118 | 		if self in old_params:
119 | 			idx = old_params.index(self)
120 | 			return idx
121 | 		res = None
122 | 		if isinstance(self.left, Node) and res is None:
123 | 			res = self.left.find_base_idx(old_params)
124 | 		if isinstance(self.right, Node) and res is None:
125 | 			res = self.right.find_base_idx(old_params)
126 | 		return res
127 | 
128 | 	#replaces instances of old_params in the binary tree with instance in new_params, and makes a copy of all nodes
129 | 	def replace_base_parameters(self, old_params, new_param):
130 | 		if self in old_params:
131 | 			return new_param
132 | 		ret = self.shallow_copy()
133 | 		if isinstance(ret.left, Node):
134 | 			ret.left = ret.left.replace_base_parameters(old_params, new_param)
135 | 		if isinstance(ret.right, Node):
136 | 			ret.right = ret.right.replace_base_parameters(old_params, new_param)
137 | 		return ret
138 | 
139 | 	def is_varnode_constant(self):
140 | 		return isinstance(self.operation, Varnode) and self.operation.isConstant()
141 | 
142 | 	def _simplify(self):
143 | 		# TODO: better simplification in the future
144 | 		changed = False
145 | 		ret = self.shallow_copy()
146 | 		if ret.left is not None:
147 | 			ret.left, c = ret.left._simplify()
148 | 			changed |= c
149 | 		if ret.right is not None:
150 | 			ret.right, c = ret.right._simplify()
151 | 			changed |= c
152 | 		if ret.operation == "*" and (self.left.is_varnode_constant()) and (self.right.is_varnode_constant()) and ret.left.operation.getSize() == ret.right.operation.getSize():
153 | 			temp = ret.left.operation
154 | 			temp2 = ret.right.operation
155 | 			ret = Node(Varnode(temp.getAddress().getNewAddress(temp.getOffset() * temp2.getOffset()), temp.getSize()), None, None, temp.getSize())
156 | 			return ret, True
157 | 		elif ret.operation == "+" and (self.left.is_varnode_constant()) and (self.right.is_varnode_constant()) and ret.left.operation.getSize() == ret.right.operation.getSize():
158 | 			temp = ret.left.operation
159 | 			temp2 = ret.right.operation
160 | 			ret = Node(Varnode(temp.getAddress().getNewAddress(temp.getOffset() + temp2.getOffset()), temp.getSize()), None, None, temp.getSize())
161 | 			return ret, True
162 | 		elif ret.operation == "RESIZE" and self.left.is_varnode_constant():
163 | 			return Node(Varnode(ret.left.operation.getAddress(), ret.byte_length), ret.left.left, ret.left.right, ret.byte_length), True
164 | 		return ret, changed
165 | 
166 | 	def simplify(self):
167 | 		s, c = self._simplify()
168 | 		while c:
169 | 			s, c = s._simplify()
170 | 		return s
171 | 
172 | 	def shallow_copy(self):
173 | 		ret = Node(self.operation, self.left, self.right, self.byte_length)
174 | 		return ret
175 | 
176 | 	def deep_copy(self):
177 | 		left = self.left
178 | 		right = self.right
179 | 		if isinstance(left, Node):
180 | 			return left.deep_copy()
181 | 		if isinstance(right, Node):
182 | 			return rigth.deep_copy()
183 | 		return Node(self.operation, left, right, self.byte_length)
184 | 
185 | 	def is_leaf(self):
186 | 		return self.left is None and self.right is None
187 | 
188 | 	def add(self, value):
189 | 		return Node("+", self, value, self.byte_length)
190 | 
191 | 	def sub(self, value):
192 | 		return Node("-", self, value, self.byte_length)
193 | 
194 | 	def mult(self, value):
195 | 		return Node("*", self, value, self.byte_length)
196 | 
197 | 	def div(self, value):
198 | 		return Node("/", self, value, self.byte_length)
199 | 
200 | 	def shl(self, value):
201 | 		return Node("<<", self, value, self.byte_length)
202 | 
203 | 	def shr(self, value):
204 | 		return Node(">>", self, value, self.byte_length)
205 | 
206 | 	def bitwise_xor(self, value):
207 | 		return Node("^", self, value, self.byte_length)
208 | 
209 | 	def bitwise_or(self, value):
210 | 		return Node("|", self, value, self.byte_length)
211 | 
212 | 	def bitwise_and(self, value):
213 | 		return Node("&", self, value, self.byte_length)
214 | 
215 | 	def ptr_deref(self):
216 | 		return Node("*()", self, None, self.byte_length)
217 | 
218 | 	def resize(self, new_length):
219 | 		return Node("RESIZE", self, None, new_length)
220 | 
221 | 	def eq(self, other):
222 | 		return Node("==", self, other, self.byte_length)
223 | 
224 | 	def neq(self, other):
225 | 		return Node("neq", self, other, self.byte_length)
226 | 
227 | 	def lt(self, other):
228 | 		return Node("<", self, other, self.byte_length)
229 | 
230 | 	def le(self, other):
231 | 		return Node("<=", self, other, self.byte_length)
232 | 
233 | 	def slt(self, other):
234 | 		return Node("s<", self, other, self.byte_length)
235 | 
236 | 	def sle(self, other):
237 | 		return Node("s<=", self, other, self.byte_length)
238 | 
239 | 	def neg(self):
240 | 		return Node("~", self, None, self.byte_length)
241 | 
242 | 	def sdiv(self, other):
243 | 		return Node("s/", self, other, self.byte_length)
244 | 
245 | 	def smod(self, other):
246 | 		return Node("s%", self, other, self.byte_length)
247 | 
248 | 	def mod(self, other):
249 | 		return Node("%", self, other, self.byte_length)
250 | 
251 | 	def sshr(self, other):
252 | 		return Node("s>>", self, other, self.byte_length)
253 | 


--------------------------------------------------------------------------------
/plugin/Struct.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # @category: GEARSHIFT.internal
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | from ghidra.program.model.data import StructureDataType, CategoryPath, DataTypeConflictHandler, PointerDataType, BuiltInDataTypeManager, ArrayDataType
  7 | 
  8 | class Struct(object):
  9 | 	def __init__(self, size):
 10 | 		self.size = size # Total size of the struct
 11 | 		self.members = [(0, 1)] * size # Represents member (value, member_size)
 12 | 		self.marked = [False] * size # Marked represents offsets in the struct that are accessed
 13 | 		self.is_array = False
 14 | 		global struct_counter
 15 | 		self.name = "S{}".format(struct_counter)
 16 | 		struct_counter += 1
 17 | 		self.dtype = None
 18 | 		self.pretty = None
 19 | 
 20 | 	def get_dtype(self):
 21 | 		if self.dtype is not None:
 22 | 			return self.dtype
 23 | 		dm = currentProgram.getDataTypeManager()
 24 | 		bdm = BuiltInDataTypeManager.getDataTypeManager()
 25 | 		new_struct = StructureDataType(CategoryPath("/struct"), self.name, self.size)
 26 | 		size_lookup = {}
 27 | 		size_lookup[1] = bdm.getDataType("/char")
 28 | 		size_lookup[2] = bdm.getDataType("/short")
 29 | 		size_lookup[4] = bdm.getDataType("/int")
 30 | 		size_lookup[8] = bdm.getDataType("/longlong")
 31 | 		off = 0
 32 | 		for i in range(len(self.members)):
 33 | 			t, size = self.members[i][0], self.members[i][1]
 34 | 			comment = ""
 35 | 			if len(self.members[i]) > 2 and self.members[i][2] == False:
 36 | 				comment = "NOT ACCESSED"
 37 | 			if isinstance(t, Struct):
 38 | 				if not t.is_array:
 39 | 					sub_struct_dtype = t.get_dtype()
 40 | 					new_struct.replaceAtOffset(off, sub_struct_dtype, ARCH_BITS / 8, "entry_{}".format(i), comment)
 41 | 				else:
 42 | 					arr_dtype = bdm.getPointer(size_lookup[1], ARCH_BITS / 8)
 43 | 					new_struct.replaceAtOffset(off, arr_dtype, ARCH_BITS / 8, "entry_{}".format(i), comment)
 44 | 			else:
 45 | 				if size not in size_lookup:
 46 | 					arr_dtype = ArrayDataType(size_lookup[1], size, 1)
 47 | 					new_struct.replaceAtOffset(off, arr_dtype, size, "entry_{}".format(i), comment)
 48 | 				else:
 49 | 					new_struct.replaceAtOffset(off, size_lookup[size], size, "entry_{}".format(i), comment)
 50 | 			off += size
 51 | 		print("DONE CREATING STRUCT", self.name)
 52 | 		dm.addDataType(new_struct, DataTypeConflictHandler.REPLACE_HANDLER)
 53 | 		self.dtype = dm.getPointer(new_struct, ARCH_BITS / 8)
 54 | 		return self.dtype
 55 | 
 56 | 	def __str__(self):
 57 | 		return str(self.members)
 58 | 
 59 | 	def __repr__(self):
 60 | 		return self.__str__()
 61 | 
 62 | 	def make_array(self):
 63 | 		print("Making array")
 64 | 		print(self.members)
 65 | 		self.is_array = True
 66 | 		stride = self.members[0][1]
 67 | 		self.stride = stride
 68 | 
 69 | 	# Consolidates struct members of size 1 into a char array
 70 | 	def consolidate(self):
 71 | 		new_members = []
 72 | 		consolidate_length = 0
 73 | 		cur_offset = 0
 74 | 		for i in self.members:
 75 | 			if self.marked[cur_offset] is True:
 76 | 				if consolidate_length != 0:
 77 | 					new_members.append((0, consolidate_length, False))
 78 | 					consolidate_length = 0
 79 | 				new_members.append(i)
 80 | 			else:
 81 | 				consolidate_length += 1
 82 | 			cur_offset += i[1]
 83 | 		if consolidate_length != 0:
 84 | 			new_members.append((0, consolidate_length))
 85 | 			consolidate_length = 0
 86 | 		self.members = new_members
 87 | 
 88 | 	def mark(self, start, end):
 89 | 		for i in range(start, end):
 90 | 			self.marked[i] = True
 91 | 
 92 | 	# Indicates that there is a struct member (value, member_size) at given offset
 93 | 	def insert(self, offset, member):
 94 | 		c = 0
 95 | 		idx = 0
 96 | 		# find member
 97 | 		while c < offset:
 98 | 			c += self.members[idx][1]
 99 | 			idx += 1
100 | 		if c != offset:
101 | 			print("Misaligned buf")
102 | 			self.break_member(idx - 1)
103 | 			self.insert(offset, member)
104 | 			return
105 | 
106 | 		# combine
107 | 		c = 0
108 | 		temp = idx
109 | 		while c < member[1]:
110 | 			c += self.members[idx][1]
111 | 			idx += 1
112 | 		if c != member[1]:
113 | 			# Misaligned struct and data size accesses - might be an array?
114 | 			print("Misaligned buf")
115 | 			self.break_member(idx - 1)
116 | 			self.insert(offset, member)
117 | 			return
118 | 		c = 0
119 | 		idx = temp
120 | 		while c < member[1]:
121 | 			c += self.members[idx][1]
122 | 			del self.members[idx]
123 | 		self.members.insert(idx, member)
124 | 		self.mark(offset, offset + member[1])
125 | 
126 | 	def merge_until(self, idx, until):
127 | 		total_length = 0
128 | 		while idx < len(self.members) and self.members[idx][0] != until:
129 | 			total_length += self.members[idx][1]
130 | 			del self.members[idx]
131 | 		self.members.insert(idx, (0, total_length))
132 | 
133 | 	# Breaks apart the member at index self.members[idx]
134 | 	def break_member(self, idx):
135 | 		assert not isinstance(self.members[idx][0], Struct)
136 | 		size = self.members[idx][1]
137 | 		del self.members[idx]
138 | 		for i in range(size):
139 | 			self.members.insert(idx, (0, 1))
140 | 
141 | 	# Fetches member at given offset, and breaks apart member if there is member alignment conflict
142 | 	def get(self, offset):
143 | 		c = 0
144 | 		idx = 0
145 | 		while c < offset:
146 | 			c += self.members[idx][1]
147 | 			idx += 1
148 | 		if c != offset:
149 | 			# Same issue as insert
150 | 			print(self.members[idx - 1][1])
151 | 			print(c)
152 | 			print("Get issue", self.members[idx - 1])
153 | 			self.break_member(idx - 1)
154 | 			ret = self.get(offset)
155 | 			return ret
156 | 		self.mark(offset, offset + self.members[idx][1])
157 | 		return self.members[idx]
158 | 
159 | 	# Only fetches member at given offset
160 | 	def get2(self, offset):
161 | 		c = 0
162 | 		idx = 0
163 | 		while c < offset:
164 | 			c += self.members[idx][1]
165 | 			idx += 1
166 | 		if c != offset:
167 | 			return -1
168 | 		return self.members[idx][0]
169 | 
170 | 	# Extends the size of the struct
171 | 	def extend(self, length):
172 | 		while self.size < length:
173 | 			self.size += 1
174 | 			self.members.append((0, 1))
175 | 			self.marked.append(False)
176 | 
177 | 	def get_field(self, length, entry_num):
178 | 		if length <= 8 and length & 1 == 0:
179 | 			return "uint{}_t entry_{};".format(length * 8, entry_num)
180 | 		elif length == 1:
181 | 			return "char entry_{};".format(entry_num)
182 | 		else:
183 | 			return "char entry_{}[{}];".format(entry_num, length)
184 | 
185 | 	def pretty_print(self):
186 | 		if self.pretty is not None:
187 | 			return self.pretty
188 | 		self.consolidate()
189 | 
190 | 		# first, we detect if it's size 0, or only has one member
191 | 		if self.size == 0:
192 | 			return ""
193 | 		elif len(self.members) == 1:
194 | 			return ""
195 | 
196 | 		res = "struct {} {{\n".format(self.name)
197 | 
198 | 		c = -1
199 | 		length = 0
200 | 		entry_counter = -1
201 | 		while length < self.size:
202 | 			c += 1
203 | 			entry_counter += 1
204 | 			if isinstance(self.members[c][0], Struct):
205 | 				length += ARCH_BITS / 8
206 | 				if not self.members[c][0].is_array:
207 | 					res += "struct {}* entry_{};\n".format(self.members[c][0].name, entry_counter)
208 | 					res = self.members[c][0].pretty_print() + "\n" + res
209 | 				else:
210 | 					res += "uint{}_t* entry_{};\n".format(self.members[c][0].stride * 8, entry_counter)
211 | 			else:
212 | 				res += self.get_field(self.members[c][1], entry_counter) + "\n"
213 | 				if len(self.members[c]) > 2:
214 | 					res = res[:-1] + " //NOT ACCESSED\n"
215 | 				length += self.members[c][1]
216 | 		self.pretty = res + "};"
217 | 		return self.pretty
218 | 
219 | class Generator(object):
220 | 	def __init__(self):
221 | 		self.allocation_counter = 0
222 | 
223 | 	def _new_allocation(self):
224 | 		alloc = "allocation{}".format(self.allocation_counter)
225 | 		self.allocation_counter += 1
226 | 		return alloc
227 | 
228 | 	def _do_read(self, struct, current_reference):
229 | 		ret = ""
230 | 		clean = ""
231 | 
232 | 		if not struct.is_array:
233 | 			curoff = 0
234 | 			total_length = 0
235 | 			for i in range(len(struct.members)):
236 | 				total_length += struct.members[i][1]
237 | 
238 | 			current_allocation = self._new_allocation()
239 | 			ret += "void* {} = malloc({});\n".format(current_allocation, total_length)
240 | 			ret += "{} = (struct {}*){};\n".format(current_reference, struct.name, current_allocation)
241 | 			for i in range(len(struct.members)):
242 | 				value = struct.members[i][0]
243 | 				length = struct.members[i][1]
244 | 				if type(value) is int and value & 0xff == 0x0:
245 | 					ret += "fread((void*)&{}->entry_{}, 1, {}, h);\n".format(current_reference, i, length)
246 | 				elif type(value) is int and value & 0xff == 0x1:
247 | 					entry_allocation = self._new_allocation()
248 | 					ret += "void* {} = malloc({});\n".format(entry_allocation, (value >> 8) + 1)
249 | 					ret += "{}->entry_{} = (char*){};\n".format(current_reference, i, entry_allocation);
250 | 					ret += "{}->entry_{}[{}] = 0;\n".format(current_reference, i, (value >> 8));
251 | 					ret += "fread({}->entry_{}, 1, {}, h);\n" .format(current_reference, i, value >> 8)
252 | 					clean += "free({});\n".format(entry_allocation)
253 | 				else:
254 | 					r, c = self._do_read(value, current_reference + "->entry_{}".format(i))
255 | 					ret += r
256 | 					clean += c
257 | 				curoff += length
258 | 			clean += "free({});\n".format(current_allocation)
259 | 		else:
260 | 			current_allocation = self._new_allocation()
261 | 			ret += "void* {} = malloc({});\n".format(current_allocation, 8 * struct.stride)
262 | 			ret += "{} = (char*){};\n".format(current_reference, current_allocation)
263 | 			ret += "fread((char*){}, 1, {}, h);\n".format(current_reference, 8 * struct.stride);
264 | 			clean += "free({});\n".format(current_allocation)
265 | 		return ret, clean
266 | 
267 | 	def generate_struct_reader(self, args):
268 | 		code = ""
269 | 		cleanup = ""
270 | 		arg_names = []
271 | 		for i in range(len(args)):
272 | 			arg_names.append("arg_{}".format(i))
273 | 			if args[i].size == 0:
274 | 				# this is an int
275 | 				code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "arg_{}".format(i)) + "\n"
276 | 				code += "fread(&arg_{}, 1, 8, h);\n".format(i)
277 | 			elif len(args[i].members) == 1:
278 | 				# this is a primitive pointer
279 | 				code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "temp_arg_{}".format(i)) + "\n"
280 | 				code += args[i].get_field(ARCH_BITS / 8, 0).replace("entry_0", "*arg_{}".format(i))[:-1] + " = &temp_arg_{};\n".format(i)
281 | 				code += "fread(arg_{}, 1, 8, h);\n".format(i)
282 | 			else:
283 | 				cur = args[i]
284 | 				if isinstance(cur, Struct) and not cur.is_array:
285 | 					# struct
286 | 					code += "struct {}* arg_{};\n".format(cur.name, i)
287 | 					res, clean = self._do_read(cur, "arg_{}".format(i))
288 | 					code += res
289 | 					cleanup += clean
290 | 				else:
291 | 					# array
292 | 					array_length = 8
293 | 					code += "char* {} = (char*)malloc({});\n".format(arg_names[-1], array_length + 1)
294 | 					code += "{}[{}] = 0;\n".format(arg_names[-1], array_length)
295 | 					code += "fread({}, 1, {}, h);\n".format(arg_names[-1], array_length)
296 | 					cleanup += "free({});\n".format(arg_names[-1])
297 | 		return code, cleanup, ", ".join(arg_names)
298 | 
299 | def generate_struct_reader(args):
300 | 	generator = Generator()
301 | 	return generator.generate_struct_reader(args)
302 | 


--------------------------------------------------------------------------------
/plugin/PCodeInterpreter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # @category: GEARSHIFT.internal
  3 | 
  4 | from __future__ import print_function
  5 | 
  6 | from ghidra.program.model.pcode import PcodeOp
  7 | from ghidra.program.model.pcode import Varnode
  8 | from ghidra.program.flatapi import FlatProgramAPI
  9 | from Node import Node
 10 | from Struct import Struct
 11 | from ghidra.program.model.pcode import HighFunctionDBUtil
 12 | from ghidra.program.model.symbol import SourceType
 13 | from ghidra.program.model.data import Undefined
 14 | from ghidra.app.cmd.function import ApplyFunctionSignatureCmd
 15 | from ghidra.program.model.listing import AutoParameterImpl
 16 | 
 17 | NODE_LIMIT = 1
 18 | log = False
 19 | 
 20 | forward_cache = {}
 21 | backward_cache = {}
 22 | highfunction_cache = {}
 23 | 
 24 | # dictionary storing func: list of symbolic parameters the func is called with for each parameter
 25 | # this is used to apply retyping in the future
 26 | 
 27 | cycle_node = Node("CYCLE", None, None, 0)
 28 | 
 29 | class PCodeInterpreter:
 30 | 	def __init__(self):
 31 | 		self.nodes = {}
 32 | 		self.stores = []
 33 | 		self.loads = []
 34 | 		self.instruction = None
 35 | 		self.cycle_exec = {}
 36 | 		self.loop_variants = set()
 37 | 		self.arrays = []
 38 | 		self.subcall_parameter_cache = {}
 39 | 
 40 | 	def process(self, instruction, depth):
 41 | 		opcode = instruction.getOpcode()
 42 | 		output = instruction.getOutput()
 43 | 		inputs = instruction.getInputs()
 44 | 		self.depth = depth
 45 | 
 46 | 		saved_instruction = self.instruction
 47 | 		self.instruction = instruction
 48 | 
 49 | 		if opcode == PcodeOp.INT_ADD:
 50 | 			self.int_add(inputs, output)
 51 | 		elif opcode == PcodeOp.INT_SDIV:
 52 | 			self.int_sdiv(inputs, output)
 53 | 		elif opcode == PcodeOp.INT_DIV:
 54 | 			self.int_div(inputs, output)
 55 | 		elif opcode == PcodeOp.INT_SREM:
 56 | 			self.int_srem(inputs, output)
 57 | 		elif opcode == PcodeOp.INT_REM:
 58 | 			self.int_rem(inputs, output)
 59 | 		elif opcode == PcodeOp.INT_RIGHT:
 60 | 			self.int_right(inputs, output)
 61 | 		elif opcode == PcodeOp.INT_SRIGHT:
 62 | 			self.int_sright(inputs, output)
 63 | 		elif opcode == PcodeOp.INT_LEFT:
 64 | 			self.int_left(inputs, output)
 65 | 		elif opcode == PcodeOp.INT_AND:
 66 | 			self.int_and(inputs, output)
 67 | 		elif opcode == PcodeOp.INT_SUB:
 68 | 			self.int_sub(inputs, output)
 69 | 		elif opcode == PcodeOp.INT_OR:
 70 | 			self.int_or(inputs, output)
 71 | 		elif opcode == PcodeOp.INT_XOR:
 72 | 			self.int_xor(inputs, output)
 73 | 		elif opcode == PcodeOp.INT_NEGATE:
 74 | 			self.int_negate(inputs, output)
 75 | 		elif opcode == PcodeOp.INT_EQUAL:
 76 | 			self.int_equal(inputs, output)
 77 | 		elif opcode == PcodeOp.INT_NOTEQUAL:
 78 | 			self.int_notequal(inputs, output)
 79 | 		elif opcode == PcodeOp.INT_LESS:
 80 | 			self.int_less(inputs, output)
 81 | 		elif opcode == PcodeOp.INT_LESSEQUAL:
 82 | 			self.int_lessequal(inputs, output)
 83 | 		elif opcode == PcodeOp.INT_SLESS:
 84 | 			self.int_sless(inputs, output)
 85 | 		elif opcode == PcodeOp.INT_SLESSEQUAL:
 86 | 			self.int_slessequal(inputs, output)
 87 | 		elif opcode == PcodeOp.INT_2COMP:
 88 | 			self.int_2comp(inputs, output)
 89 | 		elif opcode == PcodeOp.PTRSUB:
 90 | 			self.ptrsub(inputs, output)
 91 | 		elif opcode == PcodeOp.STORE:
 92 | 			self.store(inputs, output)
 93 | 		elif opcode == PcodeOp.LOAD:
 94 | 			self.load(inputs, output)
 95 | 		elif opcode == PcodeOp.SUBPIECE:
 96 | 			self.subpiece(inputs, output)
 97 | 		elif opcode == PcodeOp.PIECE:
 98 | 			self.piece(inputs, output)
 99 | 		elif opcode == PcodeOp.CAST:
100 | 			self.cast(inputs, output)
101 | 		elif opcode == PcodeOp.MULTIEQUAL:
102 | 			self.multiequal(inputs, output)
103 | 		elif opcode == PcodeOp.INT_SEXT:
104 | 			self.int_sext(inputs, output)
105 | 		elif opcode == PcodeOp.INT_ZEXT:
106 | 			self.int_zext(inputs, output)
107 | 		elif opcode == PcodeOp.INT_MULT:
108 | 			self.int_mult(inputs, output)
109 | 		elif opcode == PcodeOp.PTRADD:
110 | 			self.ptradd(inputs, output)
111 | 		elif opcode == PcodeOp.CALL:
112 | 			self.call(inputs, output)
113 | 		elif opcode == PcodeOp.CALLIND:
114 | 			self.callind(inputs, output)
115 | 		elif opcode == PcodeOp.COPY:
116 | 			self.copy(inputs, output)
117 | 		elif opcode == PcodeOp.INDIRECT:
118 | 			self.indirect(inputs, output)
119 | 		elif opcode == PcodeOp.RETURN:
120 | 			if len(inputs) >= 2:
121 | 				print("RETURN")
122 | 				print(self.lookup_node(inputs[1]))
123 | 		elif opcode == PcodeOp.CBRANCH:
124 | 			pass
125 | 		else:
126 | 			print("Unsupported Opcode:", instruction.getMnemonic(), inputs[0].getPCAddress())
127 | 
128 | 		self.instruction = saved_instruction
129 | 
130 | 	def int_sdiv(self, inputs, output):
131 | 		assert len(inputs) == 2 and output is not None
132 | 		a = inputs[0]
133 | 		b = inputs[1]
134 | 		for i in self.lookup_node(a):
135 | 			for j in self.lookup_node(b):
136 | 				self.store_node(output, i.sdiv(j))
137 | 
138 | 	def int_div(self, inputs, output):
139 | 		assert len(inputs) == 2 and output is not None
140 | 		a = inputs[0]
141 | 		b = inputs[1]
142 | 		for i in self.lookup_node(a):
143 | 			for j in self.lookup_node(b):
144 | 				self.store_node(output, i.div(j))
145 | 
146 | 	def int_srem(self, inputs, output):
147 | 		assert len(inputs) == 2 and output is not None
148 | 		a = inputs[0]
149 | 		b = inputs[1]
150 | 		for i in self.lookup_node(a):
151 | 			for j in self.lookup_node(b):
152 | 				self.store_node(output, i.smod(j))
153 | 
154 | 	def int_rem(self, inputs, output):
155 | 		assert len(inputs) == 2 and output is not None
156 | 		a = inputs[0]
157 | 		b = inputs[1]
158 | 		for i in self.lookup_node(a):
159 | 			for j in self.lookup_node(b):
160 | 				self.store_node(output, i.mod(j))
161 | 
162 | 	def int_add(self, inputs, output):
163 | 		assert len(inputs) == 2 and output is not None
164 | 		a = inputs[0]
165 | 		b = inputs[1]
166 | 		if (a.isConstant() and b.isConstant()) or a.isConstant():
167 | 			raise Exception("INT_ADD error")
168 | 		for i in self.lookup_node(a):
169 | 			for j in self.lookup_node(b):
170 | 				self.store_node(output, i.add(j))
171 | 
172 | 	def int_right(self, inputs, output):
173 | 		assert len(inputs) == 2 and output is not None
174 | 		a = inputs[0]
175 | 		b = inputs[1]
176 | 		for i in self.lookup_node(a):
177 | 			for j in self.lookup_node(b):
178 | 				self.store_node(output, i.shr(j))
179 | 
180 | 	def int_sright(self, inputs, output):
181 | 		assert len(inputs) == 2 and output is not None
182 | 		a = inputs[0]
183 | 		b = inputs[1]
184 | 		for i in self.lookup_node(a):
185 | 			for j in self.lookup_node(b):
186 | 				self.store_node(output, i.sshr(j))
187 | 
188 | 	def int_left(self, inputs, output):
189 | 		assert len(inputs) == 2 and output is not None
190 | 		a = inputs[0]
191 | 		b = inputs[1]
192 | 		for i in self.lookup_node(a):
193 | 			for j in self.lookup_node(b):
194 | 				self.store_node(output, i.shl(j))
195 | 
196 | 	def int_and(self, inputs, output):
197 | 		assert len(inputs) == 2 and output is not None
198 | 		a = inputs[0]
199 | 		b = inputs[1]
200 | 		for i in self.lookup_node(a):
201 | 			for j in self.lookup_node(b):
202 | 				self.store_node(output, i.bitwise_and(j))
203 | 
204 | 	def int_sub(self, inputs, output):
205 | 		assert len(inputs) == 2 and output is not None
206 | 		a = inputs[0]
207 | 		b = inputs[1]
208 | 		for i in self.lookup_node(a):
209 | 			for j in self.lookup_node(b):
210 | 				self.store_node(output, i.sub(j))
211 | 
212 | 	def int_or(self, inputs, output):
213 | 		assert len(inputs) == 2 and output is not None
214 | 		a = inputs[0]
215 | 		b = inputs[1]
216 | 		for i in self.lookup_node(a):
217 | 			for j in self.lookup_node(b):
218 | 				self.store_node(output, i.bitwise_or(j))
219 | 
220 | 	def int_negate(self, inputs, output):
221 | 		assert len(inputs) == 1 and output is not None
222 | 		a = inputs[0]
223 | 		for i in self.lookup_node(a):
224 | 			self.store_node(output, i.neg())
225 | 
226 | 	def int_xor(self, inputs, output):
227 | 		assert len(inputs) == 2 and output is not None
228 | 		a = inputs[0]
229 | 		b = inputs[1]
230 | 		for i in self.lookup_node(a):
231 | 			for j in self.lookup_node(b):
232 | 				self.store_node(output, i.bitwise_xor(j))
233 | 
234 | 	def int_equal(self, inputs, output):
235 | 		assert output is not None
236 | 		a = inputs[0]
237 | 		b = inputs[1]
238 | 		for i in self.lookup_node(a):
239 | 			for j in self.lookup_node(b):
240 | 				res = i.eq(j)
241 | 				if res.byte_length != output.getSize():
242 | 					res = res.resize(output.getSize())
243 | 				self.store_node(output, res)
244 | 
245 | 	def int_notequal(self, inputs, output):
246 | 		assert output is not None
247 | 		a = inputs[0]
248 | 		b = inputs[1]
249 | 		for i in self.lookup_node(a):
250 | 			for j in self.lookup_node(b):
251 | 				res = i.neq(j)
252 | 				if res.byte_length != output.getSize():
253 | 					res = res.resize(output.getSize())
254 | 				self.store_node(output, res)
255 | 
256 | 	def int_less(self, inputs, output):
257 | 		assert output is not None
258 | 		a = inputs[0]
259 | 		b = inputs[1]
260 | 		for i in self.lookup_node(a):
261 | 			for j in self.lookup_node(b):
262 | 				res = i.lt(j)
263 | 				if res.byte_length != output.getSize():
264 | 					res = res.resize(output.getSize())
265 | 				self.store_node(output, res)
266 | 
267 | 	def int_lessequal(self, inputs, output):
268 | 		assert output is not None
269 | 		a = inputs[0]
270 | 		b = inputs[1]
271 | 		for i in self.lookup_node(a):
272 | 			for j in self.lookup_node(b):
273 | 				res = i.le(j)
274 | 				if res.byte_length != output.getSize():
275 | 					res = res.resize(output.getSize())
276 | 				self.store_node(output, res)
277 | 
278 | 	def int_sless(self, inputs, output):
279 | 		assert output is not None
280 | 		a = inputs[0]
281 | 		b = inputs[1]
282 | 		for i in self.lookup_node(a):
283 | 			for j in self.lookup_node(b):
284 | 				res = i.slt(j)
285 | 				if res.byte_length != output.getSize():
286 | 					res = res.resize(output.getSize())
287 | 				self.store_node(output, res)
288 | 
289 | 	def int_slessequal(self, inputs, output):
290 | 		assert output is not None
291 | 		a = inputs[0]
292 | 		b = inputs[1]
293 | 		for i in self.lookup_node(a):
294 | 			for j in self.lookup_node(b):
295 | 				res = i.sle(j)
296 | 				if res.byte_length != output.getSize():
297 | 					res = res.resize(output.getSize())
298 | 				self.store_node(output, res)
299 | 
300 | 	def int_2comp(self, inputs, output):
301 | 		assert len(inputs) == 1 and output is not None
302 | 		for i in self.lookup_node(inputs[0]):
303 | 			self.store_node(output, i.neg())
304 | 
305 | 	def ptrsub(self, inputs, output):
306 | 		assert len(inputs) == 2
307 | 		assert output is not None
308 | 		a = inputs[0]
309 | 		b = inputs[1]
310 | 		if not b.isConstant():
311 | 			raise Exception("PTRSUB error")
312 | 		for i in self.lookup_node(a):
313 | 			for j in self.lookup_node(b):
314 | 				self.store_node(output, i.add(j))
315 | 
316 | 	def store(self, inputs, output):
317 | 		assert len(inputs) == 3
318 | 		for i in self.lookup_node(inputs[1]):
319 | 			for j in self.lookup_node(inputs[2]):
320 | 				temp = i.ptr_deref()
321 | 				if temp.byte_length != j.byte_length:
322 | 					temp = temp.resize(j.byte_length)
323 | 				self.stores.append(temp)
324 | 				if log:
325 | 					print("[*]", "STORE:", inputs[0].getPCAddress(), temp)
326 | 					print("VALUE", self.lookup_node(inputs[2]))
327 | 					print("")
328 | 
329 | 	def load(self, inputs, output):
330 | 		assert len(inputs) == 2 and output is not None
331 | 		for i in self.lookup_node(inputs[1]):
332 | 			value = i.ptr_deref()
333 | 			if value.byte_length != output.getSize():
334 | 				value = value.resize(output.getSize())
335 | 			self.store_node(output, value)
336 | 			self.loads.append(value)
337 | 
338 | 	def subpiece(self, inputs, output):
339 | 		assert len(inputs) == 2 and output is not None
340 | 		for i in self.lookup_node(inputs[0]):
341 | 			for j in self.lookup_node(inputs[1]):
342 | 				value = i.shr(j.mult(Node(currentProgram.getAddressFactory().getConstantAddress(8), None, None, i.byte_length)))
343 | 				if value.byte_length != output.getSize():
344 | 					value = value.resize(output.getSize())
345 | 				self.store_node(output, value)
346 | 
347 | 	def piece(self, inputs, output):
348 | 		assert len(inputs) == 2 and output is not None
349 | 		for i in self.lookup_node(inputs[0]):
350 | 			for j in self.lookup_node(inputs[1]):
351 | 				value = i.shl(Node(currentProgram.getAddressFactory().getConstantAddress(j.byte_length), None, None, i.byte_length)).add(j)
352 | 				if value.byte_length != output.getSize():
353 | 					value = value.resize(output.getSize())
354 | 				self.store_node(output, value)
355 | 
356 | 	def cast(self, inputs, output):
357 | 		assert len(inputs) == 1 and output is not None
358 | 		for value in self.lookup_node(inputs[0]):
359 | 			assert value.byte_length == output.getSize()
360 | 			self.store_node(output, value)
361 | 
362 | 	def multiequal(self, inputs, output):
363 | 		assert output is not None and len(inputs) >= 2
364 | 		possibilities = []
365 | 		count = 0
366 | 		for i in inputs:
367 | 			result = self.lookup_node(i)
368 | 			for j in result:
369 | 				possibilities.append(j)
370 | 				self.store_node(output, j)
371 | 		self.loop_variants.add(output)
372 | 
373 | 	def int_sext(self, inputs, output):
374 | 		assert output is not None and len(inputs) == 1
375 | 		for i in self.lookup_node(inputs[0]):
376 | 			self.store_node(output, i.resize(output.getSize()))
377 | 
378 | 	def int_zext(self, inputs, output):
379 | 		assert output is not None and len(inputs) == 1
380 | 		for i in self.lookup_node(inputs[0]):
381 | 			self.store_node(output, i.resize(output.getSize()))
382 | 
383 | 	def int_mult(self, inputs, output):
384 | 		assert output is not None and len(inputs) == 2
385 | 		a = inputs[0]
386 | 		b = inputs[1]
387 | 		for i in self.lookup_node(a):
388 | 			for j in self.lookup_node(b):
389 | 				result = i.mult(j)
390 | 				self.store_node(output, result)
391 | 
392 | 	def ptradd(self, inputs, output):
393 | 		assert output is not None and len(inputs) == 3
394 | 		assert inputs[2].isConstant() and not inputs[0].isConstant()
395 | 		for a in self.lookup_node(inputs[0]):
396 | 			for b in self.lookup_node(inputs[1]):
397 | 				for c in self.lookup_node(inputs[2]):
398 | 					temp = b.mult(c)
399 | 					result = a.add(temp)
400 | 					assert output.getSize() == result.byte_length
401 | 					self.store_node(output, result)
402 | 
403 | 	def callind(self, inputs, output):
404 | 		assert len(inputs) >= 1
405 | 		print("Warning: indirect call - skipping and returning 0")
406 | 		if output is not None:
407 | 			self.store_node(output, Node(Varnode(output.getAddress(), output.getSize()), None, None, output.getSize()))
408 | 
409 | 	def call(self, inputs, output):
410 | 		assert len(inputs) >= 1
411 | 		# First we have to analyze function forward with input arguments
412 | 		# If output exists, then we have to analyze backwards to obtain ret value types
413 | 		pc_varnode = inputs[0]
414 | 		assert pc_varnode.isAddress()
415 | 		pc_addr = pc_varnode.getAddress()
416 | 		temp = FlatProgramAPI(currentProgram)
417 | 		called_func = temp.getFunctionAt(pc_addr)
418 | 		print("call:", inputs[0].getPCAddress())
419 | 
420 | 		##### START CALL RECURSIVE FORWARD ANALYSIS
421 | 
422 | 		# Note: the function analysis parameter's varnodes are DIFFERENT that the varnodes from our current state. Thus we replace the varnode -> Node map in the function with the calling parameters
423 | 		checkFixParameters(called_func, inputs[1:])
424 | 		if called_func not in forward_cache:
425 | 			global log
426 | 			pci_new = PCodeInterpreter()
427 | 			parameter_varnodes = analyzeFunctionForward(called_func, pci_new)
428 | 			parameter_nodes = []
429 | 			for i in parameter_varnodes:
430 | 				parameter_nodes.append(pci_new.lookup_node(i)[0])
431 | 			forward_cache[called_func] = (pci_new.stores, pci_new.loads, parameter_nodes, pci_new.arrays, pci_new.subcall_parameter_cache)
432 | 			log = False
433 | 
434 | 		stores, loads, parameter_node_objects, arrs, nested_subcall_parameter_cache = forward_cache[called_func]
435 | 		input_node_objects = map(self.lookup_node, inputs[1:])
436 | 		if called_func not in self.subcall_parameter_cache:
437 | 			param_list = []
438 | 			for i in range(called_func.getParameterCount()):
439 | 				param_list.append([])
440 | 			self.subcall_parameter_cache[called_func] = param_list
441 | 
442 | 		node_objects = map(self.lookup_node, inputs[1:])
443 | 		for i in range(len(self.subcall_parameter_cache[called_func])):
444 | 			self.subcall_parameter_cache[called_func][i] += node_objects[i]
445 | 
446 | 		for i in stores:
447 | 			arg_idx = i.find_base_idx(parameter_node_objects)
448 | 			if arg_idx is not None:
449 | 				for j in node_objects[arg_idx]:
450 | 					self.stores.append(i.replace_base_parameters(parameter_node_objects, j))
451 | 					if i in arrs:
452 | 						self.arrays.append(self.stores[-1])
453 | 		for i in loads:
454 | 			arg_idx = i.find_base_idx(parameter_node_objects)
455 | 			if arg_idx is not None:
456 | 				for j in node_objects[arg_idx]:
457 | 					self.loads.append(i.replace_base_parameters(parameter_node_objects, j))
458 | 					if i in arrs:
459 | 						self.arrays.append(self.loads[-1])
460 | 
461 | 		##### END CALL RECURSIVE FORWARD ANALYSIS
462 | 
463 | 		# replace args in parameter cache:
464 | 		for func_name in nested_subcall_parameter_cache:
465 | 			current_params = nested_subcall_parameter_cache[func_name]
466 | 			for param_idx in range(len(current_params)):
467 | 				for temp in current_params[param_idx]:
468 | 					arg_idx = temp.find_base_idx(parameter_node_objects)
469 | 					if arg_idx is not None:
470 | 						for j in node_objects[arg_idx]:
471 | 							replaced = temp.replace_base_parameters(parameter_node_objects, j)
472 | 							if func_name not in self.subcall_parameter_cache:
473 | 								param_list = []
474 | 								for i in range(func_name.getParameterCount()):
475 | 									param_list.append([])
476 | 								self.subcall_parameter_cache[func_name] = param_list
477 | 							if arg_idx < len(self.subcall_parameter_cache[func_name]):
478 | 								self.subcall_parameter_cache[func_name][arg_idx].append(replaced)
479 | 
480 | 		if output is not None:
481 | 			if called_func not in backward_cache: # This means we want to backwards interpolate the return type
482 | 				##### START CALL RECURSIVE BACKWARDS ANALYSIS
483 | 
484 | 				checkFixReturn(called_func, output)
485 | 				pci_new = PCodeInterpreter()
486 | 				ret_type, subfunc_parameter_varnodes = analyzeFunctionBackward(called_func, pci_new)
487 | 				backward_cache[called_func] = (ret_type, map(pci_new.lookup_node, subfunc_parameter_varnodes))
488 | 
489 | 				##### END CALL RECURSIVE BACKWARDS ANALYSIS
490 | 
491 | 			ret_type, subfunc_parameter_node_objs = backward_cache[called_func]
492 | 			replaced_rets = []
493 | 			for a in ret_type:
494 | 				for i in a:
495 | 					arg_idx = i.find_base_idx(subfunc_parameter_node_objs)
496 | 					if arg_idx is None:
497 | 						node_objects = [1] # Doesn't matter
498 | 					else:
499 | 						node_objects = self.lookup_node(inputs[1:][arg_idx])
500 | 					for j in node_objects:
501 | 						replaced_rets.append(i.replace_base_parameters(subfunc_parameter_node_objs, j))
502 | 
503 | 			for i in range(len(replaced_rets)):
504 | 				self.store_node(output, replaced_rets[i])
505 | 
506 | 	def copy(self, inputs, output):
507 | 		assert len(inputs) == 1 and output is not None
508 | 		for result in self.lookup_node(inputs[0]):
509 | 			self.store_node(output, result)
510 | 
511 | 	def indirect(self, inputs, output):
512 | 		for value in self.lookup_node(inputs[0]):
513 | 			assert value.byte_length == output.getSize()
514 | 			self.store_node(output, value)
515 | 
516 | 	# maps a Ghidra Varnode object to a binary tree object that represents its expression
517 | 	def lookup_node(self, varnode):
518 | 		# Detect cycle
519 | 		if varnode in self.cycle_exec:
520 | 			self.cycle_exec[varnode] += 1
521 | 		if varnode in self.cycle_exec and self.cycle_exec[varnode] > 0:
522 | 			if varnode not in self.nodes:
523 | 				self.store_node(varnode, Node(("CYCLE", varnode), None, None, varnode.getSize()))
524 | 			return self.nodes[varnode]
525 | 		if varnode.isConstant():
526 | 			# create constant node
527 | 			return [Node(varnode, None, None, varnode.getSize())]
528 | 		elif varnode.isAddress():
529 | 			return [Node(varnode, None, None, varnode.getSize())]
530 | 		elif varnode not in self.nodes or varnode in self.cycle_exec:
531 | 			# We have to detect cycles here, by temporarily storing "CYCLE", and if the returned value is "CYCLE", we know there is cycle
532 | 			if varnode not in self.cycle_exec:
533 | 				self.cycle_exec[varnode] = 0
534 | 			
535 | 			self.get_node_definition(varnode)
536 | 
537 | 			if self.cycle_exec[varnode] == 0:
538 | 				del self.cycle_exec[varnode]
539 | 
540 | 			return self.lookup_node(varnode)
541 | 
542 | 		# Prune
543 | 		if len(self.nodes[varnode]) > NODE_LIMIT:
544 | 			self.nodes[varnode] = self.nodes[varnode][:NODE_LIMIT]
545 | 		return self.nodes[varnode]
546 | 
547 | 	# recursively backwards traces for node's definition
548 | 	def get_node_definition(self, varnode):
549 | 		defining_instruction = varnode.getDef()
550 | 		if defining_instruction is None:
551 | 			print("WARNING: Orphaned varnode? - assuming multiequal analyzation error and skipping")
552 | 			self.nodes[varnode] = [Node("ORPHANED", None, None, varnode.getSize())]
553 | 			return
554 | 		self.process(defining_instruction, -1)
555 | 
556 | 	# stores mapping between Ghidra varnode and binary tree obj
557 | 	def store_node(self, varnode, nodeobj):
558 | 		if varnode not in self.nodes:
559 | 			self.nodes[varnode] = []
560 | 		if hash(nodeobj) not in map(hash, self.nodes[varnode]):
561 | 			self.nodes[varnode].append(nodeobj)
562 | 
563 | def get_highfunction(func):
564 | 	if func not in highfunction_cache:
565 | 		decompileResults = decompInterface.decompileFunction(func, 30, monitor)
566 | 		if decompileResults.decompileCompleted():
567 | 			hf = decompileResults.getHighFunction()
568 | 			highfunction_cache[func] = hf
569 | 			return hf
570 | 	else:
571 | 		return highfunction_cache[func]
572 | 
573 | def checkFixParameters(func, parameters):
574 | 	hf = get_highfunction(func)
575 | 	HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT)
576 | 	# reload cache
577 | 	del highfunction_cache[func]
578 | 	hf = get_highfunction(func)
579 | 
580 | 	# Check arguments
581 | 	func_proto = hf.getLocalSymbolMap()
582 | 	if func_proto.getNumParams() != len(parameters) and not func.hasVarArgs():
583 | 		print(func, "call signature wrong...")
584 | 		raise Exception("Function call signature different")
585 | 
586 | 	argument_varnodes = []
587 | 	for i in range(func_proto.getNumParams()):
588 | 		cur = func_proto.getParam(i).getRepresentative()
589 | 		if cur.getSize() != parameters[i].getSize():
590 | 			print("i: %d, cur.getSize: %d, parameters[i].getSize(): %d" % (i, cur.getSize(), parameters[i].getSize()))
591 | 			raise Exception("Func parameter size mismatch")	
592 | 
593 | # Make sure func signature matches the call
594 | def checkFixReturn(func, ret_varnode):
595 | 	hf = get_highfunction(func)
596 | 
597 | 	#  Check return types
598 | 	for i in hf.getPcodeOps():
599 | 		if i.getOpcode() == PcodeOp.RETURN:
600 | 			if len(i.getInputs()) < 2:
601 | 				print(func, "has no return value, fixing type...", i.getInputs()[0].getPCAddress())
602 | 				sig = func.getSignature()
603 | 				sig.setReturnType(Undefined.getUndefinedDataType(ret_varnode.getSize()))
604 | 				ApplyFunctionSignatureCmd(func.getEntryPoint(), sig, SourceType.USER_DEFINED).applyTo(currentProgram)
605 | 
606 | # This function performs backwards analysis on the function return type with base case of function parameters
607 | # init_param replaces the parameters of the current func to be analyzed in terms the passed parameter expressions
608 | def analyzeFunctionBackward(func, pci, init_param=None):
609 | 	print("Backwards analysis", func.getName())
610 | 
611 | 	hf = get_highfunction(func)
612 | 	HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT)
613 | 
614 | 	func_proto = hf.getLocalSymbolMap()
615 | 	# Grab return varnodes
616 | 	return_varnodes = []
617 | 	for i in hf.getPcodeOps():
618 | 		if i.getOpcode() == PcodeOp.RETURN:
619 | 			if len(i.getInputs()) >= 2:
620 | 				return_varnodes.append(i.getInputs()[1])
621 | 
622 | 	# Grab argument varnodes as base case
623 | 	argument_varnodes = []
624 | 	for i in range(func_proto.getNumParams()):
625 | 		argument_varnodes.append(func_proto.getParam(i).getRepresentative())
626 | 
627 | 	# Sets argument as base cases
628 | 	for arg in range(len(argument_varnodes)):
629 | 		if init_param is None:
630 | 			pci.store_node(argument_varnodes[arg], Node("ARG"  + str(arg), None, None, argument_varnodes[arg].getSize()))
631 | 		else:
632 | 			pci.store_node(argument_varnodes[arg], init_param[arg])
633 | 
634 | 	return_types = []
635 | 	for i in return_varnodes:
636 | 		result = pci.lookup_node(i)
637 | 		return_types.append(result)
638 | 	return return_types, argument_varnodes
639 | 
640 | def traverseForward(cur, depth, pci, visited):
641 | 	if cur is None:
642 | 		return
643 | 	children = cur.getDescendants()
644 | 	for child in children:
645 | 		pci.process(child, depth)
646 | 		if child.getOutput() is not None and child.getOutput() not in visited:
647 | 			visited.add(child.getOutput())
648 | 			traverseForward(child.getOutput(), depth + 1, pci, visited)
649 | 
650 | # This function performs forward analysis on function parameters to determine its type (struct, array, or primitive)
651 | def analyzeFunctionForward(func, pci):
652 | 	print("Forwards analysis", func.getName())
653 | 	hf = get_highfunction(func)
654 | 	HighFunctionDBUtil.commitParamsToDatabase(hf, True, SourceType.DEFAULT)
655 | 	print(func.getParameters())
656 | 
657 | 	# get the varnode of function parameters
658 | 	func_proto = hf.getLocalSymbolMap()
659 | 	argument_varnodes = []
660 | 	argument_nodes = []
661 | 	for i in range(func_proto.getNumParams()):
662 | 		argument_varnodes.append(func_proto.getParam(i).getRepresentative())
663 | 		argument_nodes.append(Node("ARG"  + str(i), None, None, argument_varnodes[i].getSize()))
664 | 
665 | 	hash_list = set()
666 | 
667 | 	for a in range(2):
668 | 		print("Loop variants", map(id, pci.loop_variants))
669 | 
670 | 		variant_vals = []
671 | 		new_nodes = {}
672 | 
673 | 		for i in pci.loop_variants:
674 | 			new_nodes[i] = pci.nodes[i]
675 | 			del pci.nodes[i]
676 | 		visited = set()
677 | 
678 | 		pci.nodes = new_nodes
679 | 
680 | 		for arg in range(len(argument_varnodes)):
681 | 			pci.store_node(argument_varnodes[arg], argument_nodes[arg])
682 | 
683 | 		# recursively traverse the varnode descendants to get reaching definitions
684 | 		for i in argument_varnodes:
685 | 			traverseForward(i, 0, pci, visited)
686 | 
687 | 		if a == 0:
688 | 			for i in pci.stores + pci.loads:
689 | 				hash_list.add(hash(i))
690 | 			continue
691 | 
692 | 		temp = pci.stores + pci.loads
693 | 
694 | 		for i in range(len(temp))[::-1]:
695 | 			if hash(temp[i]) not in hash_list:
696 | 				pci.arrays.append(temp[i])
697 | 				print("FOUND ARRAY!")
698 | 
699 | 	return argument_varnodes
700 | 


--------------------------------------------------------------------------------