├── LICENSE ├── plugin.json ├── README.md ├── __init__.py ├── utils.py └── finder.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2022 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion": 2, 3 | "name": "glibc_fsop_code_path_finder", 4 | "type": [ 5 | "core", 6 | "ui", 7 | "architecture", 8 | "binaryview", 9 | "helper" 10 | ], 11 | "api": [ 12 | "python3" 13 | ], 14 | "description": "Find FSOP code path that can hijack control flow and stack pointer in GLIBC Libio", 15 | "longdescription": "", 16 | "license": { 17 | "name": "MIT", 18 | "text": "Copyright (c) 2022 \n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n" 19 | }, 20 | "platforms": [ 21 | "Darwin", 22 | "Linux", 23 | "Windows" 24 | ], 25 | "installinstructions": { 26 | "Darwin": "", 27 | "Linux": "", 28 | "Windows": "" 29 | }, 30 | "dependencies": { 31 | }, 32 | "version": "1.0.0", 33 | "author": "xf1les", 34 | "minimumbinaryninjaversion": 3000 35 | } 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Glibc FSOP code path finder 2 | 3 | Find FSOP code path that can hijack control flow and stack pointer in GLIBC Libio. 4 | 5 | ## Description: 6 | 7 | 1. Install this plugin 8 | 2. Open `libc.so.6` in Binary ninja, then click `Tools -> Plugins -> Find FSOP code path` 9 | 3. You should see the result in `Log` windows 10 | 11 | ## Useful discoveries: 12 | 13 | ``` 14 | // GLIBC 2.34-0ubuntu3.2 15 | 3. _IO_wfile_underflow_mmap@0x86030 -> _IO_wdoallocbuf@0x83be0 16 | 0x8614d: call(0x83be0) 17 | RIP/RDI DATAFLOW: 18 | rbx = rdi -> rdi = rbx -> call(0x83be0) 19 | RBP DATAFLOW: 20 | rbp = [rdi + 0x98].q 21 | CODE PATH: 22 | eax = [rdi].d 23 | => [condition] (al & 4) == 0 24 | rax = [rdi + 0xa0].q 25 | rdx = [rax].q 26 | => [condition] rdx u>= [rax + 8].q 27 | rdx = [rdi + 8].q 28 | => [condition] rdx u< [rdi + 0x10].q 29 | rdi = [rax + 0x40].q 30 | => [condition] rdi == 0 31 | 0x83c0b: call([rax + 0x68].q) 32 | RIP/RDI DATAFLOW: 33 | rax = [rdi + 0xa0].q -> rax = [rax + 0xe0].q -> call([rax + 0x68].q) 34 | RBP DATAFLOW: 35 | (N/A) 36 | CODE PATH: 37 | rax = [rdi + 0xa0].q 38 | => [condition] [rax + 0x30].q == 0 39 | => [condition] ([rdi].b & 2) == 0 40 | ([0x216020] is the location of _IO_wfile_underflow_mmap in __libc_IO_vtables) 41 | ``` 42 | 43 | A FSOP code path which can be used to **perform stack migration** with a single FSOP attack, available on GLIBC 2.24+. It's also known as [house of apple2](https://bbs.pediy.com/thread-273832.htm) independently found by [roderick01](https://roderickchan.github.io/). 44 | 45 | I actually found this code path *few weeks ago* but was struggling to work on other stuffs these day, so I didn't notice someone has published this code path already X( 46 | 47 | ## License 48 | 49 | This plugin is released under an [MIT license](./LICENSE). -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from binaryninja import AnalysisState 2 | from binaryninja import BackgroundTaskThread 3 | from binaryninja import PluginCommand 4 | from binaryninja import log_info, log_warn, log_error 5 | 6 | from .finder import LibIOVtableFuncCallGraph 7 | from .utils import get_glibc_version, get_vtable_range, get_vtable_check_func 8 | 9 | class FinderTask(BackgroundTaskThread): 10 | def __init__(self, bv): 11 | BackgroundTaskThread.__init__(self, "Looking for eligible FSOP code path...", True) 12 | self.bv = bv 13 | 14 | def run(self): 15 | bv = self.bv 16 | 17 | if bv.analysis_progress.state == AnalysisState.AnalyzeState: 18 | log_warn("[!] An active analysis is running now. Please wait until it is completed.") 19 | return 20 | if bv.arch.name not in ['x86', 'x86_64', 'thumb2', 'aarch64']: 21 | log_warn(f"[!] Unsupported or untest platform '{bv.arch.name}', result may be incorrect. We currently support i386/x64/thumb2/aarch64.") 22 | 23 | glibc_ver = get_glibc_version(bv) 24 | if glibc_ver != None: 25 | log_info(f"[*] glibc version: {glibc_ver}") 26 | else: 27 | log_info(f"[!] Failed to detect glibc version!") 28 | 29 | G = LibIOVtableFuncCallGraph(bv) 30 | 31 | log_info("[*] Looking for __libc_IO_vtables section") 32 | start, stop = get_vtable_range(bv) 33 | if None in [start, stop]: 34 | log_error("[-] Failed to detect vtable!") 35 | return 36 | G.set_vtable_range(start, stop) 37 | if glibc_ver == None or glibc_ver >= 2.24: 38 | log_info("[*] Looking for _IO_vtable_check function...") 39 | func = get_vtable_check_func(bv) 40 | if not func: 41 | log_error(f"[-] Can't find _IO_vtable_check!") 42 | return 43 | G.set_vtable_check_func(func) 44 | log_info(f"_IO_vtable_check@{hex(func)}") 45 | else: 46 | log_info("[*] _IO_vtable_check is not available in this version of glibc.") 47 | 48 | G.parse_vtable() 49 | G.build_graph() 50 | G.find_indirect_call() 51 | G.generate_call_chain() 52 | 53 | def main(bv): 54 | FinderTask(bv).start() 55 | 56 | PluginCommand.register( 57 | 'Find FSOP code path', 58 | 'Find code path that can hijack control flow and stack pointer in GLIBC libc.so.6', 59 | main 60 | ) 61 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from binaryninja import MediumLevelILOperation 2 | from binaryninja import Endianness 3 | from binaryninja import log_warn, log_info 4 | 5 | import struct 6 | 7 | DEBUG = False 8 | 9 | def debug_print(msg): 10 | if DEBUG: 11 | log_info(f'[DBG] {msg}') 12 | 13 | def get_glibc_version(bv): 14 | f = bv.get_functions_by_name('gnu_get_libc_version')[0] 15 | for instr in f.medium_level_il.instructions: 16 | # 00020950 int64_t gnu_get_libc_version() 17 | # 0 @ 00020950 rax = "2.27" <---- 18 | # 1 @ 00020957 return "2.27" 19 | if instr.operation == MediumLevelILOperation.MLIL_SET_VAR and \ 20 | instr.instruction_operands[0].operation == MediumLevelILOperation.MLIL_CONST_PTR: 21 | str_addr = instr.instruction_operands[0].constant 22 | bv_str = bv.get_string_at(str_addr) 23 | if str_addr > bv_str.start: 24 | # "NPTL 2.27" -> "2.27" 25 | return float(bv_str.value[str_addr-bv_str.start:]) 26 | else: 27 | return float(bv_str.value) 28 | 29 | def _find_vtable_border(bv, addresses): 30 | last_nullptr = addresses[0] 31 | for addr in addresses: 32 | ptr = bv.read_pointer(addr) 33 | if ptr: 34 | f = bv.get_function_at(ptr) 35 | # Check if we encounter a libio function 36 | if f and f.name.startswith("_IO_"): 37 | # Skip _IO_obstack_jumps in 2.23 which is too far away from the border 38 | if get_glibc_version(bv) == 2.23 and f.name.startswith('_IO_obstack'): 39 | continue 40 | # Return the nearest address that contains a NULL pointer 41 | return last_nullptr 42 | else: 43 | last_nullptr = addr 44 | return None 45 | 46 | def get_vtable_range(bv): 47 | section = bv.get_section_by_name("__libc_IO_vtables") 48 | if section: 49 | vtable_start, vtable_end = section.start, section.end 50 | else: 51 | # Before GLIBC 2.24, there is no `__libc_IO_vtables` section. 52 | # Instead, we use `_find_vtable_border()` to find a memory area in `.data.rel.ro` section 53 | # as many IO vtables (`_IO_*_jumps`) as possible nested in. 54 | # This approach is very buggy, so it's recommended to avoid using `G.set_vtable_range()` 55 | # and add IO vtable functions one by one by directly calling `G.add_new_node()` if GLIBC version < 2.24. 56 | log_warn(f"[!] __libc_IO_vtables section is not available in this glibc.") 57 | log_info("[*] Searching vtable borders in .data.rel.ro section...") 58 | section = bv.get_section_by_name(".data.rel.ro") 59 | addresses = list(range(section.start, section.end, bv.arch.address_size)) 60 | vtable_start = _find_vtable_border(bv, addresses) 61 | vtable_end = _find_vtable_border(bv, addresses[::-1]) 62 | return vtable_start, vtable_end 63 | 64 | def get_vtable_check_func(bv): 65 | # Try _IO_vtable_check symbol 66 | funcs = bv.get_functions_by_name('_IO_vtable_check') 67 | if len(funcs) > 0: 68 | return funcs[0].start 69 | else: 70 | # _IO_vtable_check references this string 71 | s = "Fatal error: glibc detected an invalid stdio handle" 72 | for str_refs in bv.strings: 73 | # Find the corresponding StringReference object in binaryview 74 | if str_refs.length >= len(s) and s in str_refs.value: 75 | # Get code references to the string 76 | refs = list(bv.get_code_refs(str_refs.start)) 77 | if len(refs) > 0: 78 | return refs[0].function.start 79 | -------------------------------------------------------------------------------- /finder.py: -------------------------------------------------------------------------------- 1 | from binaryninja import LowLevelILOperation 2 | from binaryninja import MediumLevelILOperation 3 | from binaryninja import log_debug, log_info 4 | from binaryninja import function, variable, mediumlevelil, lowlevelil 5 | 6 | from collections import deque 7 | 8 | from .utils import debug_print, DEBUG 9 | 10 | # The following functions won't be added to the callgraph while building in `G.build_graph()` 11 | GRAPH_SKIP_FUNCTION = [ 12 | 'malloc', 'free', 'calloc', 'realloc', 'snprintf', 'sscanf', 13 | '__stack_chk_fail', '__stack_chk_fail_local', '__libc_fatal', '__assert_fail', '__chk_fail', 14 | '__libc_message', 'abort', 'raise', '_IO_vtable_check' 15 | ] 16 | GRAPH_SKIP_FUNCTION += list(map(lambda x : "__GI_" + x, GRAPH_SKIP_FUNCTION)) # alias name in debug symbol 17 | 18 | # This dict maps LowLevelILOperation type to the name of a method in `LowLevelILFunction` object 19 | # which can be used to create expression with the inverted operation 20 | # (See `_resolve_branch_condition()` method in `LibIOFuncTaintTracker` class) 21 | COND_INVERT_FUNC_MAP = { 22 | LowLevelILOperation.LLIL_CMP_E : 'compare_not_equal', 23 | LowLevelILOperation.LLIL_CMP_NE : 'compare_equal', 24 | LowLevelILOperation.LLIL_CMP_SGE : 'compare_signed_less_than', 25 | LowLevelILOperation.LLIL_CMP_SGT : 'compare_signed_less_equal', 26 | LowLevelILOperation.LLIL_CMP_SLE : 'compare_signed_greater_than', 27 | LowLevelILOperation.LLIL_CMP_SLT : 'compare_signed_greater_equal', 28 | LowLevelILOperation.LLIL_CMP_UGE : 'compare_unsigned_less_than', 29 | LowLevelILOperation.LLIL_CMP_UGT : 'compare_unsigned_less_equal', 30 | LowLevelILOperation.LLIL_CMP_ULE : 'compare_unsigned_greater_than', 31 | LowLevelILOperation.LLIL_CMP_ULT : 'compare_unsigned_greater_equal', 32 | LowLevelILOperation.LLIL_FCMP_E : 'float_compare_not_equal', 33 | LowLevelILOperation.LLIL_FCMP_NE : 'float_compare_equal', 34 | LowLevelILOperation.LLIL_FCMP_GE : 'float_compare_signed_less_than', 35 | LowLevelILOperation.LLIL_FCMP_GT : 'float_compare_signed_less_equal', 36 | LowLevelILOperation.LLIL_FCMP_LE : 'float_compare_signed_greater_than', 37 | LowLevelILOperation.LLIL_FCMP_LT : 'float_compare_signed_greater_equal', 38 | LowLevelILOperation.LLIL_FCMP_O : 'float_compare_unordered', 39 | LowLevelILOperation.LLIL_FCMP_UO : 'float_compare_ordered', 40 | } 41 | 42 | #### Global Options for `LibIOFuncTaintTracker` ### 43 | ## 1) TRACK_RBP: Enable RBP/EBP dataflow tracking (Default: True) 44 | ## Note that this functionlity is only available on i386/x64, 45 | ## so this option will be ignored on other architectures. 46 | TRACK_RBP = True 47 | ## 2) ALLOW_CALL_BLOCKS: Allow basic blocks with call/jmp instructions to be used in taint tracking 48 | ## (Default: False) 49 | ## Enable it if you are looking for FSOP code paths 50 | ## that can trigger some function calls like malloc()/free()/memcpy(). 51 | ALLOW_CALL_BLOCKS = False 52 | ## 3) DEFAULT_TAINT_ARG_IDX: The index of function arguments used as taint source (Default: [0]) 53 | ## In other words, these arguments are controllable during FSOP attack. 54 | ## If the pointer of an indirect call inside the function (or sink instruction) is 55 | ## tainted by one of taint source, the taint tracking process will be marked as succeed, 56 | ## which means we found a RIP-hijackable code path. 57 | ## Some examples (x64): 58 | ## [0] : Only first argument is controllable, 59 | ## which is `rdi` register and also `fp` argument in the most of libio functions. 60 | ## [0, 1, 3]: First, second and fourth argument are controllable (`rdi`, `rsi` and `rcx`). 61 | ## The indexes greater than the number of function arguments will be ignored. 62 | DEFAULT_TAINT_ARG_IDX = [0] 63 | ## 4) DEFAULT_SINK_CALLARG_IDX: The index of sink instruction arguments used as taint sink 64 | ## (Default: same as `DEFAULT_TAINT_ARG_IDX`) 65 | ## If call chain contains more than one functions, 66 | ## sink instruction may be the call to next hop function rather than an indirect call. 67 | ## In this case, we must make sure that some of sink call arguments are tainted 68 | ## so taint source can be passed to next hop. 69 | DEFAULT_SINK_CALLARG_IDX = DEFAULT_TAINT_ARG_IDX 70 | 71 | ### Global Options for `LibIOVtableFuncCallGraph` ### 72 | ## 1) MAX_CHAIN_LEN: The maxmiun length of call chain (Default: 10) 73 | ## It's also maxmiun DFS depth can be reached while generating call chains in `_visit_graph()`. 74 | MAX_CHAIN_LEN = 10 75 | 76 | class LibIOFunc: 77 | def __init__(self, bv, func: function.Function, taint_arg_idx=DEFAULT_TAINT_ARG_IDX): 78 | '''' `LibIOFunc` class represents a GLIBC LibIO function (`_IO_*`) 79 | 80 | bv : Binaryview 81 | func: The `function.Function` object of the function to be represented 82 | Everything we need to know will be learned via this object. 83 | taint_arg_idx: Used by `find_indirect_call` method to find hijackable indirect calls 84 | inside represeting function using taint analysis 85 | ''' 86 | 87 | self.bv = bv 88 | 89 | self.func = func 90 | self.addr = self.func.start 91 | self.name = f"{func.name}@{hex(self.addr)}" 92 | 93 | # A "root node" refers to a LibIO function in callgraph that belongs to 94 | # one of vtables inside the `__libc_IO_vtables` section. 95 | # For example: `_IO_str_overflow`, which is one of members in `_IO_str_jump` vtable. 96 | # (I'm not sure whether it's proper to use the term "root" in a graph, anyway XD) 97 | self.is_root_node = False 98 | 99 | # The edges in callgraph (`function.Function` => `LibIOFunc`) 100 | self.callers = {} 101 | self.callees = {} 102 | 103 | self.taint_arg_idx = taint_arg_idx 104 | self.taint_arg = [ 105 | self.func.parameter_vars[idx] 106 | for idx in filter(lambda x : x < len(self.func.parameter_vars), self.taint_arg_idx) 107 | ] 108 | 109 | if self.func.medium_level_il: 110 | self.mlil_func = self.func.medium_level_il.ssa_form 111 | else: 112 | # MIIL unavailable, possibly the function is too huge or broken. 113 | self.mlil_func = None 114 | 115 | # For root node, the list of pointers to represeting LibIO function 116 | # in `__libc_IO_vtables` section. 117 | self.vtable_locations = [] 118 | 119 | # This list stores `LibIOFuncTaintTracker` objects with succeed state 120 | # which contains the information (i.e. code path) of a hijackable indirect call. 121 | # Used by `_visited_var()` (call by `process()`) 122 | self.succeed_trackers = [] 123 | 124 | # The list of basic blocks which won't be visited during taint tracking 125 | # `find_protection()` will add protection-enabled blocks to this list. 126 | self.avoid_blocks = [] 127 | 128 | self._var_visited = [] 129 | 130 | def add_caller(self, caller): 131 | self.callers[caller.func] = caller 132 | 133 | def add_callee(self, callee): 134 | self.callees[callee.func] = callee 135 | 136 | def set_as_root(self): 137 | self.is_root_node = True 138 | 139 | def is_root(self): 140 | return self.is_root_node 141 | 142 | def has_indirect_calls(self): 143 | return len(self.succeed_trackers) > 0 144 | 145 | def find_protection(self, vtable_validate_func: function.Function = None): 146 | ''' Find basic blocks with protection (IO_validate_vtable and PointerEncryption) enabled, 147 | then add them to `self.avoid_blocks` list 148 | 149 | vtable_validate_func: A `function.Function` object of `_IO_vtable_check()`. 150 | If None, IO_validate_vtable blocks won't be detected. 151 | ''' 152 | bv = self.bv 153 | if len(self.avoid_blocks) > 0 or not self.mlil_func: 154 | return 155 | # 1. Find IO_validate_vtable blocks 156 | # IO_validate_vtable is the vtable pointer validation mostly used in LibIO, 157 | # works by checking `fp->vtable` whether it's within `__libc_IO_vtables` section 158 | # (See https://code.woboq.org/userspace/glibc/libio/libioP.h.html#IO_validate_vtable) 159 | if vtable_validate_func: 160 | for refs in bv.get_code_refs(vtable_validate_func.start): 161 | # Find the blocks that calls `_IO_vtable_check()` in current function 162 | if refs.function == self.func: 163 | call_check_func = self.func.get_low_level_il_at(refs.address).medium_level_il.ssa_form 164 | # Also add the block where vtable pointer check is performed 165 | check_vtable_range = call_check_func.il_basic_block.incoming_edges[0].source 166 | self.avoid_blocks.extend([check_vtable_range, call_check_func]) 167 | # 2. Find PointerEncryption blocks 168 | # PointerEncryption is another security feature used by few IO vtable functions such as `_IO_cookie_*`. 169 | # It works by XORing the function pointer with a secret key called `__pointer_chk_guard` 170 | # (See https://sourceware.org/glibc/wiki/PointerEncryption) 171 | for block in self.mlil_func: 172 | for instr in block: 173 | if instr.operation != MediumLevelILOperation.MLIL_SET_VAR_SSA: 174 | continue 175 | # non-i386/x64: XOR `__pointer_chk_guard` directly 176 | if bv.arch.name not in ['x86', 'x86_64']: 177 | # 00069514 int64_t _IO_cookie_close(void* arg1) 178 | # [...] 179 | # 1 @ 00069520 x1#1 = [__pointer_chk_guard].q @ mem#0 180 | # 2 @ 00069524 x3#1 = x1#1 ^ x2#1 181 | # [...] 182 | # 7 @ 00069534 x16#1 = x3#1 183 | # 8 @ 00069538 jump(x16#1) 184 | if instr.instruction_operands[0].operation == MediumLevelILOperation.MLIL_LOAD_SSA and \ 185 | any(['__pointer_chk_guard' in tkn.text for tkn in instr.instruction_operands[0].tokens]): 186 | self.avoid_blocks.append(block) 187 | break 188 | # i386/x64: ROR then XOR `__pointer_chk_guard` pointed by `fs/gs` register 189 | else: 190 | reg_offset = 0x30 if bv.arch.name == 'x86_64' else 0x18 191 | ror_bits = 0x11 if bv.arch.name == 'x86_64' else 0x9 192 | reg_name = 'fsbase' if bv.arch.name == 'x86_64' else 'gsbase' 193 | # 0007f8c0 int64_t _IO_cookie_close(void* arg1) 194 | # [...] 195 | # 1 @ 0007f8cb rax_1#2 = ror.q(rax#1, 0x11) 196 | # 2 @ 0007f8cf rax_2#3 = rax_1#2 ^ [fsbase#0 + 0x30].q @ mem#0 197 | # [...] 198 | # 7 @ 0007f8e4 jump(rax_2#3) 199 | if instr.instruction_operands[0].operation == MediumLevelILOperation.MLIL_ROR and \ 200 | instr.instruction_operands[0].instruction_operands[1].constant == ror_bits: 201 | next_instr = self.mlil_func[instr.instr_index + 1] 202 | if next_instr.operation == MediumLevelILOperation.MLIL_SET_VAR_SSA and \ 203 | next_instr.instruction_operands[0].operation == MediumLevelILOperation.MLIL_XOR and \ 204 | any([reg_name in tkn.text for tkn in next_instr.instruction_operands[0].tokens]): 205 | self.avoid_blocks.append(block) 206 | break 207 | 208 | def _get_var_uses(self, var): 209 | if isinstance(var, variable.Variable): 210 | return self.mlil_func.get_var_uses(var) 211 | elif isinstance(var, mediumlevelil.SSAVariable): 212 | return self.mlil_func.get_ssa_var_uses(var) 213 | return [] 214 | 215 | def _visit_var(self, var, Q = deque()): 216 | if var in self._var_visited: 217 | return 218 | self._var_visited.append(var) 219 | for instr in self._get_var_uses(var): 220 | if instr in Q or instr.il_basic_block in self.avoid_blocks: 221 | # Prevent cycle and skip instructions in avoid blocks 222 | continue 223 | # Check for variable-define instruction 224 | if instr.operation in [ 225 | MediumLevelILOperation.MLIL_VAR_PHI, 226 | MediumLevelILOperation.MLIL_SET_VAR_SSA 227 | ]: 228 | Q.append(instr) 229 | self._visit_var(instr.dest, Q) # Visit new variable 230 | Q.pop() 231 | # Check for call instruction 232 | elif instr.operation in [ 233 | MediumLevelILOperation.MLIL_CALL_SSA, 234 | MediumLevelILOperation.MLIL_CALL_UNTYPED_SSA 235 | ]: 236 | funcptr = instr.instruction_operands[0] 237 | # Is it an indirect call? 238 | is_indirect = False 239 | ## Type 1: 240 | ## 00086c00 uint64_t __libio_codecvt_out() 241 | ## 31 @ 00086c93 [...] = rbp#4([...]) @ mem#4 <--- 242 | if funcptr.operation == MediumLevelILOperation.MLIL_VAR_SSA and \ 243 | var == funcptr.operands[0]: 244 | is_indirect = True 245 | ## Type 2: 246 | ## 00083be0 void* _IO_wdoallocbuf() 247 | ## 10 @ 00083c0b rax#3, mem#1 = [rax_1#2 + 0x68].q @ mem#0([...]) <--- 248 | elif funcptr.operation == MediumLevelILOperation.MLIL_LOAD_SSA and \ 249 | var in funcptr.instruction_operands[0].vars_read: 250 | is_indirect = True 251 | if not is_indirect: 252 | continue 253 | # Run `LibIOFuncTaintTracker` to check if branch conditions are tainted 254 | t = LibIOFuncTaintTracker( 255 | self.bv, instr, avoid_blocks=self.avoid_blocks, 256 | taint_arg_idx=self.taint_arg_idx, 257 | sink_callarg_idx=[] # Disable callarg tracking 258 | ) 259 | t.process() 260 | if t.is_success(): 261 | # Sweet! We just found a hijackable indirect call. 262 | # Now save the tracker to `self.succeed_trackers` list for later use 263 | t.call_dataflow = list(Q) + [instr] 264 | self.succeed_trackers.append(t) 265 | 266 | def find_indirect_call(self): 267 | ''' Find hijackable indirect calls 268 | 269 | Note: `find_protection()` should be called before calling this 270 | 271 | ''' 272 | if len(self.succeed_trackers) > 0 or not self.mlil_func or len(self.taint_arg) == 0: 273 | return 274 | 275 | # Perform taint analysis on each taint arguments 276 | for var in self.taint_arg: 277 | self._visit_var(var) 278 | 279 | if len(self.succeed_trackers) > 1: 280 | # Sort by dataflow length 281 | self.succeed_trackers.sort(key = lambda x : len(x.call_dataflow)) 282 | 283 | class LibIOFuncTaintTracker: 284 | def __init__(self, bv, sink_instr, avoid_blocks=[], 285 | taint_arg_idx=DEFAULT_TAINT_ARG_IDX, 286 | sink_callarg_idx=DEFAULT_SINK_CALLARG_IDX, 287 | track_rbp=TRACK_RBP, 288 | allow_call_blocks=ALLOW_CALL_BLOCKS): 289 | 290 | ''' `LibIOFuncTaintTracker` class implements a simple taint tracker 291 | used to find function-scope code paths to specified call instruction (`sink_instr`), 292 | whose branch conditions are tainted by function arguments indicated by `taint_arg_idx`. 293 | `sink_callarg_idx` indicates the function arguments of `sink_instr` that also should be tainted. 294 | 295 | `sink_instr` should be an address or a BNIL instruction object. 296 | 297 | If an eligible code path is found, the tracker will return a succeed state 298 | (which can be checked with `is_success()` method) and yield results to the following members: 299 | * `code_path`: a list of MLIL instructions that are executed before `sink_instr` being reached. 300 | * `block_path`: a list of basic block that are passed by before `sink_instr` being reached, 301 | similar with `code_path`. 302 | * `branch_condition`: a dict that maps MLIL_IF instructions in `code_path` to a MLIL instruction 303 | representing the condition that must be satisfied to move on to next instruction in code path. 304 | * `call_dataflow`: the dataflow for function arguments indicated by `sink_callarg_idx` in code path. 305 | * `rbp_dataflow`: the dataflow for RBP/ESP register in code path, 306 | available only in i386/x64 and when `track_rbp` == True. 307 | ''' 308 | 309 | self.bv = bv 310 | 311 | # Tracker status 312 | # True - The process was completed successfully 313 | # False - The process was completed but failed 314 | # None - The process is not started yet 315 | self.status = None 316 | 317 | # Tracker options 318 | self.track_rbp = track_rbp 319 | self.avoid_blocks = avoid_blocks 320 | self.allow_call_blocks = allow_call_blocks 321 | 322 | # Tracker results 323 | self.code_path = [] 324 | self.block_path = [] 325 | self.branch_condition = {} 326 | self.call_dataflow = [] 327 | self.rbp_dataflow = [] 328 | 329 | if isinstance(sink_instr, int): 330 | f = bv.get_functions_containing(sink_instr)[0] 331 | self.sink_instr = f.get_low_level_il_at(sink_instr).medium_level_il.ssa_form 332 | elif hasattr(sink_instr, 'operation'): 333 | if isinstance(sink_instr.operation, MediumLevelILOperation): 334 | self.sink_instr = sink_instr.ssa_form 335 | else: 336 | self.sink_instr = sink_instr.medium_level_il.ssa_form 337 | else: 338 | debug_print(f"Unexpected sink_instr type {type(sink_instr)}") 339 | self._mark_failed() 340 | return 341 | self.sink_block = self.sink_instr.il_basic_block 342 | self.mlil_func = self.sink_instr.function 343 | 344 | f = self.mlil_func.source_function 345 | self.taint_arg = [ 346 | f.parameter_vars[idx] 347 | for idx in filter(lambda x : x < len(f.parameter_vars), taint_arg_idx) 348 | ] 349 | self.sink_callarg = [ 350 | self.sink_instr.params[idx] 351 | for idx in filter(lambda x : x < len(self.sink_instr.params), sink_callarg_idx) 352 | ] 353 | 354 | # For internal use 355 | self._var_visited = [] 356 | self._instr_visited = [] 357 | self._var_visited_saved = [] 358 | self._instr_visited_saved = [] 359 | 360 | def is_success(self): 361 | return self.status == True 362 | 363 | def is_failed(self): 364 | return self.status == False 365 | 366 | def is_done(self): 367 | return self.status != None 368 | 369 | def _mark_success(self): 370 | self.status = True 371 | 372 | def _mark_failed(self): 373 | self.status = False 374 | 375 | def _mark_unfinished(self): 376 | self.status = None 377 | 378 | def _save_visited_status(self): 379 | self._var_visited_saved = self._var_visited[:] 380 | self._instr_visited_saved = self._instr_visited[:] 381 | self._var_visited = [] 382 | self._instr_visited = [] 383 | 384 | def _restore_visited_status(self): 385 | self._var_visited = self._var_visited_saved[:] 386 | self._instr_visited = self._instr_visited_saved[:] 387 | self._var_visited_saved = self._instr_visited_saved = [] 388 | 389 | def _get_var_def(self, var): 390 | if isinstance(var, variable.Variable): 391 | instr = self.mlil_func.get_var_definitions(var) 392 | if instr: return instr[0].ssa_form 393 | elif isinstance(var, mediumlevelil.SSAVariable): 394 | return self.mlil_func.get_ssa_var_definition(var) 395 | return None 396 | 397 | def _get_var_name(self, var): 398 | if isinstance(var, variable.Variable): 399 | return var.name 400 | elif isinstance(var, mediumlevelil.SSAVariable): 401 | return f"{var.name}_{var.version}" 402 | else: 403 | return str(type(var)) 404 | 405 | def _get_blk_addr(self, block: mediumlevelil.MediumLevelILBasicBlock): 406 | return block[0].address 407 | 408 | def _resolve_branch_condition(self, next_block, cond_instr): 409 | if cond_instr.true == next_block.start: 410 | # Return non-ssa form condition ILInstruction 411 | return cond_instr.condition.low_level_il.non_ssa_form 412 | elif cond_instr.false == next_block.start: 413 | cond = cond_instr.condition.low_level_il.non_ssa_form 414 | # Create expression with inverted condition 415 | if cond.operation in [LowLevelILOperation.LLIL_FLAG, LowLevelILOperation.LLIL_NOT]: 416 | # For LLIL_FLAG and LLIL_NOT, simply create a NOT expression 417 | expr_idx = cond.function.not_expr(cond.size, cond.expr_index) 418 | else: 419 | func = getattr(cond.function, COND_INVERT_FUNC_MAP[cond.operation]) 420 | expr_idx = func(cond.size, cond.operands[0].expr_index, cond.operands[1].expr_index) 421 | # Return the corresponding ILInstruction with new expression 422 | return lowlevelil.LowLevelILInstruction(cond.function, expr_idx, cond, cond.function) 423 | else: 424 | return None 425 | 426 | def _check_tainted_arg(self, var): 427 | # If definition does't exist, check if the variable is belonging to one of taint source 428 | if isinstance(var, variable.Variable): 429 | src = var 430 | elif isinstance(var, mediumlevelil.SSAVariable): 431 | src = var.var 432 | else: 433 | # Unexpected variable type 434 | return False 435 | if src in self.taint_arg: 436 | debug_print(f" *Taint source reached*") 437 | return True 438 | else: 439 | debug_print(f" ERROR: unexpected taint source {src.name}") 440 | return False 441 | 442 | def _check_block(self, block: mediumlevelil.MediumLevelILBasicBlock): 443 | bv = self.bv 444 | if block in self.avoid_blocks: 445 | return False 446 | elif not self.allow_call_blocks: 447 | # Block shoud not contain a call/jmp instruction 448 | # preventing branch conditions depending on the result of these calls 449 | # (This check can be disabled by setting `allow_call_blocks=True`) 450 | for instr in block: 451 | if instr.operation in [ 452 | MediumLevelILOperation.MLIL_CALL_SSA, 453 | MediumLevelILOperation.MLIL_CALL_UNTYPED_SSA, 454 | MediumLevelILOperation.MLIL_JUMP 455 | ]: 456 | if bv.arch.name == 'x86': 457 | # i386: Allow harmless __x86.get_pc_thunk.* calls 458 | # 459 | # 00147b0d void* const __x86.get_pc_thunk.ax() 460 | # 0 @ 00147b0d eax#1 = __return_addr#0 461 | # 1 @ 00147b10 return eax#1 462 | target = instr.instruction_operands[0] 463 | if target.operation == MediumLevelILOperation.MLIL_CONST_PTR: 464 | f = bv.get_function_at(target.constant).mlil 465 | if len(f) == 2: # Contains only two instructions 466 | vaule = f[0].instruction_operands[0].value 467 | if isinstance(vaule, variable.ReturnAddressRegisterValue): 468 | continue 469 | return False 470 | return True 471 | 472 | def _visit_subpath(self, src, dst, subpaths, Q = deque()): 473 | if src in Q: 474 | # Prevent cycle 475 | return 476 | 477 | if DEBUG: 478 | deque_blk = ', '.join([hex(self._get_blk_addr(b)) for b in list(Q)[::-1]]) 479 | edge_blk = ', '.join([hex(self._get_blk_addr(e.source)) for e in src.incoming_edges]) 480 | debug_print(f" Visiting block at {hex(src.source_block.start)}...") 481 | debug_print(f" Current deque : [{deque_blk}]...") 482 | debug_print(f" Incoming edge : [{edge_blk}]...") 483 | 484 | # Note that incoming edges will be visited, not outgoing ones 485 | # for we are walking from bottom to top in callgraph 486 | Q.appendleft(src) 487 | for b in [edge.source for edge in src.incoming_edges]: 488 | if b == dst: 489 | debug_print(" *Target block reached*") 490 | subpaths.append(list(Q)[:-1]) 491 | elif self._check_block(b): 492 | self._visit_subpath(b, dst, subpaths, Q) 493 | Q.popleft() 494 | 495 | def _visit_var(self, var, idx, check_taint_src_func = _check_tainted_arg): 496 | if self.is_failed() or var in self._var_visited: 497 | return 498 | # Get variable definition 499 | def_instr = self._get_var_def(var) 500 | if not def_instr: 501 | if not check_taint_src_func(self, var): 502 | self._mark_failed() 503 | return 504 | self._var_visited.append(var) 505 | 506 | if DEBUG: 507 | vars_read = ', '.join([self._get_var_name(v) for v in def_instr.vars_read]) 508 | debug_print(f" Visiting {self._get_var_name(var)}...") 509 | debug_print(f" definition : '{str(def_instr)}' (@{hex(def_instr.address)})...") 510 | debug_print(f" vars_read : [{vars_read}]...") 511 | 512 | # Visit variables used in definition 513 | if def_instr.operation == MediumLevelILOperation.MLIL_VAR_PHI: 514 | # For PHI function, visit the *only one* variable used in previous blocks in block_path 515 | if any([pv in self._var_visited for pv in def_instr.vars_read]): 516 | return 517 | for pv in def_instr.vars_read: 518 | pv_def_instr = self.mlil_func.get_ssa_var_definition(pv) 519 | # Check if it was referenced in previous blocks 520 | if pv_def_instr and pv_def_instr.il_basic_block in self.block_path[:idx+1]: 521 | self._visit_var(pv, idx) 522 | break 523 | else: 524 | self._instr_visited.append(def_instr) 525 | for v in def_instr.vars_read: 526 | self._visit_var(v, idx) 527 | 528 | def process(self): 529 | ''' Perform taint analysis''' 530 | if DEBUG: 531 | debug_print( 532 | "Running LibIOFuncTaintTracker on " + 533 | f"func={self.mlil_func.source_function.name} " + 534 | f"sink_instr={hex(self.sink_instr.address)} " + 535 | f"taint_source={','.join([v.name for v in self.taint_arg])}" 536 | ) 537 | 538 | if self.is_done(): 539 | debug_print("*The process is already finished, exiting peacefully*") 540 | return 541 | if len(self.taint_arg) == 0: 542 | debug_print("*No taint source provided, abort*") 543 | self._mark_failed() 544 | return 545 | 546 | bv = self.bv 547 | 548 | dominators = self.sink_block.dominators 549 | # Rearrange dominator block list returned from Binary ninja if it has a wrong order 550 | if dominators[0] != self.mlil_func.basic_blocks[0]: # Entry block should be the first one 551 | debug_print("Warning : sink_block.dominators has wrong order") 552 | dominators.sort(key = lambda x : x.start) # Sort by starting address, rough but acceptable in most cases 553 | dominators = dominators[:dominators.index(self.sink_block)+1] 554 | if DEBUG: 555 | blocks = ', '.join([hex(self._get_blk_addr(b)) for b in dominators]) 556 | debug_print(f"Dominator blocks=[{blocks}]") 557 | 558 | # Check all dominator blocks if suitable for taint tracking 559 | if not all(self._check_block(b) for b in dominators[:-1]): 560 | debug_print("*Invaild dominator block detected, abort*") 561 | self._mark_failed() 562 | return 563 | 564 | # Find path between every dominator blocks 565 | debug_print("Step 1: Tracking tainted rip dataflow...") 566 | for idx, block in enumerate(dominators): 567 | self.block_path.append(block) 568 | if block == self.sink_block: 569 | debug_print(f"[BLK#{idx}] Sink block reached.") 570 | break 571 | if len(block.outgoing_edges) == 0 or idx + 1 >= len(dominators): 572 | debug_print(f"[BLK#{idx}] ERR: Broken CFG?") 573 | self._mark_failed() 574 | return 575 | # Check if one of outgoing edges can reach next block 576 | next_block = dominators[idx + 1] 577 | debug_print(f"[BLK#{idx}] Looking for outgoing edge to {hex(self._get_blk_addr(next_block))}...") 578 | for b in [edge.target for edge in block.outgoing_edges]: 579 | # Fast forward if block has only one outgoing edge 580 | debug_print(f" Edge block at {hex(b.source_block.start)}") 581 | subpath = [] 582 | while b != next_block and len(b.outgoing_edges) == 1: 583 | subpath.append(b) 584 | b = b.outgoing_edges[0].target 585 | debug_print(f" jump block skipped, next block is at {hex(self._get_blk_addr(b))}") 586 | if b == next_block: 587 | debug_print(f" *Next block reached*") 588 | self.block_path.extend(subpath) 589 | break 590 | else: 591 | debug_print(f" (unreachable)") 592 | continue 593 | else: 594 | # Do depth-first search to find subpaths from current block to next block 595 | debug_print(f"[BLK#{idx}] Searching subpath from {hex(self._get_blk_addr(block))} to {hex(self._get_blk_addr(next_block))}...") 596 | subpath_list = [] 597 | self._visit_subpath(next_block, block, subpath_list) 598 | debug_print(f"[BLK#{idx}] {len(subpath_list)} subpaths found") 599 | if len(subpath_list) == 0: 600 | debug_print(f"*No subpath found, abort*") 601 | self._mark_failed() 602 | return 603 | if len(subpath_list) > 1: 604 | # Sort by instruction count 605 | subpath_list.sort(key = lambda subpath : sum([b.instruction_count for b in subpath])) 606 | # Add shortest subpath to call_path 607 | subpath = subpath_list[0] 608 | self.block_path.extend(subpath) 609 | if DEBUG: 610 | path = ' -> '.join([hex(self._get_blk_addr(b)) for b in subpath]) 611 | debug_print(f"[BLK#{idx}] Selected subpath : [{path}]") 612 | debug_print("[*] Dataflow tracking done.") 613 | 614 | debug_print("Step 2: Tracking tainted branch condition...") 615 | # Process branch conditions in block_path 616 | for idx, block in enumerate(self.block_path[:-1]): 617 | if len(block.outgoing_edges) == 1: 618 | debug_print(f"[BLK#{idx}] Skipped") 619 | continue 620 | cond_instr = block[-1] 621 | debug_print(f"[BLK#{idx}] Condition instruction : '{str(cond_instr.low_level_il.non_ssa_form)}'") 622 | # Perform taint analysis on variables used in branch conditions 623 | self._instr_visited.append(cond_instr) 624 | for v in cond_instr.vars_read: 625 | self._visit_var(v, idx) 626 | if self.is_failed(): 627 | debug_print("*Branch condition tracking failed, abort*") 628 | return 629 | # Record branch dependency 630 | if idx + 1 < len(self.block_path): 631 | cond = self._resolve_branch_condition(self.block_path[idx + 1], cond_instr) 632 | if cond: 633 | debug_print(f"[BLK#{idx}] Resolved condition : '{str(cond)}'") 634 | else: 635 | debug_print(f"[BLK#{idx}] Unable to resolve condition!") 636 | self.branch_condition[cond_instr] = cond 637 | 638 | # (Optional) Trace rdi dataflow 639 | if len(self.sink_callarg) > 0: 640 | debug_print("Step 3.1: Tracking tainted rdi dataflow...") 641 | # Save state 642 | self._save_visited_status() 643 | # Perform taint analysis 644 | for v in self.sink_callarg: 645 | if isinstance(v, mediumlevelil.MediumLevelILVarSsa): 646 | v = v.operands[0] 647 | self._visit_var(v, len(self.block_path)) 648 | if self.is_failed(): 649 | debug_print("*rdi dataflow tracking failed, abort*") 650 | return 651 | # Save rdi dataflow 652 | for v in self._var_visited: 653 | self.call_dataflow.insert(0, self._get_var_def(v)) 654 | self.call_dataflow.append(self.sink_instr) 655 | # Reset state 656 | self._restore_visited_status() 657 | debug_print("[*] Call dataflow tracking done.") 658 | 659 | if self.track_rbp and bv.arch.name in ['x86', 'x86_64']: 660 | debug_print("Step 3.2: Tracking tainted rbp dataflow...") 661 | # Save state 662 | self._save_visited_status() 663 | # Find 664 | vars_list = list(filter(lambda v : 'rbp' in v.name or 'ebp' in v.name, self.mlil_func.ssa_vars)) 665 | vars_list.sort(key = lambda v : v.version, reverse = True) 666 | if DEBUG: 667 | tainted_vars = ', '.join([f'{v.name}_{v.version}' for v in vars_list]) 668 | debug_print(f"Possible tainted vars: {tainted_vars}") 669 | rbp_var = None 670 | for v in vars_list: 671 | def_instr = self._get_var_def(v) 672 | if not def_instr: 673 | continue 674 | block = def_instr.il_basic_block 675 | if block in self.block_path: 676 | if block == self.sink_block and def_instr.address > self.sink_instr.address: 677 | continue 678 | rbp_var = v 679 | break 680 | # Perform taint analysis 681 | if rbp_var: 682 | self._visit_var(v, len(self.block_path)) 683 | if self.is_failed(): 684 | self._mark_unfinished() 685 | else: 686 | for v in self._var_visited: 687 | self.rbp_dataflow.insert(0, self._get_var_def(v)) 688 | # Reset state 689 | self._restore_visited_status() 690 | debug_print("[*] rbp dataflow tracking done.") 691 | 692 | # Save code path 693 | for b in dominators: 694 | dataflow = list(filter(lambda x : x.il_basic_block == b, self._instr_visited)) 695 | dataflow.sort(key = lambda x : x.address) 696 | self.code_path.extend(dataflow) 697 | 698 | # Mark the process is successful 699 | self._mark_success() 700 | 701 | debug_print("*The process completed successfully*") 702 | 703 | def report_result(self, indent=4): 704 | if not self.is_success(): 705 | return 706 | 707 | bv = self.bv 708 | 709 | ljust = indent * ' ' 710 | 711 | # 1. Display header 712 | log_info(ljust + f"{hex(self.sink_instr.address)}: {str(self.sink_instr.low_level_il.non_ssa_form)}") 713 | # 2. Display rip/rdi dataflow 714 | log_info(ljust + ' RIP/RDI DATAFLOW:') 715 | callflow = [] 716 | for instr in self.call_dataflow: 717 | if instr.low_level_il: callflow.append(str(instr.low_level_il.non_ssa_form)) 718 | if len(callflow) > 0: 719 | log_info(ljust + ' ' + " -> ".join(callflow)) 720 | else: 721 | log_info(ljust + ' (N/A)') 722 | # 3. Display rbp dataflow 723 | if self.track_rbp and bv.arch.name in ['x86', 'x86_64']: 724 | log_info(ljust + ' RBP DATAFLOW:') 725 | rbpflow = [] 726 | for instr in self.rbp_dataflow: 727 | if instr.low_level_il: rbpflow.append(str(instr.low_level_il.non_ssa_form)) 728 | if len(rbpflow) > 0: 729 | log_info(ljust + ' ' + " -> ".join(rbpflow)) 730 | else: 731 | log_info(ljust + ' (N/A)') 732 | # 4. Display code path 733 | log_info(ljust + ' CODE PATH:') 734 | if len(self.code_path) > 0: 735 | for instr in self.code_path: 736 | if instr.operation == MediumLevelILOperation.MLIL_IF: 737 | log_info(ljust + f' => [condition] {str(self.branch_condition[instr])}') 738 | else: 739 | log_info(ljust + ' ' + str(instr.low_level_il.non_ssa_form)) 740 | else: 741 | log_info(ljust + ' (none)') 742 | 743 | class LibIOVtableFuncCallGraph: 744 | def __init__(self, bv): 745 | self.bv = bv 746 | 747 | self.roots = {} 748 | self.nodes = {} 749 | self.unsafe_call_nodes = [] 750 | self.unsafe_call_chains = {} 751 | 752 | self.vtable_start = None 753 | self.vtable_stop = None 754 | self.vtable_validate_func = None 755 | 756 | def set_vtable_range(self, vtable_start, vtable_stop): 757 | self.vtable_start = vtable_start 758 | self.vtable_stop = vtable_stop 759 | 760 | def set_vtable_check_func(self, vtable_validate_func): 761 | if isinstance(vtable_validate_func, int): 762 | self.vtable_validate_func = self.bv.get_function_at(vtable_validate_func) 763 | else: 764 | self.vtable_validate_func = vtable_validate_func 765 | 766 | def get_node(self, func): 767 | if func in self.nodes.keys(): 768 | return self.nodes[func] 769 | else: 770 | return None 771 | 772 | def add_new_node(self, func: function.Function, is_root=False): 773 | if func not in self.nodes: 774 | node = LibIOFunc(self.bv, func) 775 | self.nodes[func] = node 776 | if is_root: 777 | node.set_as_root() 778 | self.roots[func] = node 779 | debug_print(f"New root: {node.name}") 780 | else: 781 | debug_print(f"New node: {node.name}") 782 | return node 783 | 784 | def parse_vtable(self): 785 | bv = self.bv 786 | if None in [self.vtable_start, self.vtable_stop]: 787 | return 788 | log_info(f"[*] Parsing vtable from {hex(self.vtable_start)} to {hex(self.vtable_stop)}...") 789 | for addr in range(self.vtable_start, self.vtable_stop, bv.arch.address_size): 790 | faddr = bv.read_pointer(addr) 791 | if not faddr: 792 | continue 793 | # Do not use `bv.get_functions_at()` here 794 | # In ARM, pointers in `__libc_IO_vtables` do not point to the beginning of IO functions 795 | f = bv.get_functions_containing(faddr) 796 | if len(f) > 0: 797 | f = f[0] 798 | if f not in self.nodes: 799 | node = self.add_new_node(f, True) 800 | else: 801 | node = self.get_node(f) 802 | node.vtable_locations.append(addr) 803 | log_info(f"[*] Done. {len(self.roots)} unique root(s) processed.") 804 | 805 | def build_graph(self): 806 | log_info("[*] Building call graph...") 807 | Q = deque(self.roots.values()) 808 | cnt = 0 809 | while len(Q) > 0: 810 | root = Q.pop() 811 | for f in root.func.callees: 812 | if f == self.vtable_validate_func or f.name in GRAPH_SKIP_FUNCTION: 813 | continue 814 | if f in self.nodes: 815 | node = self.nodes[f] 816 | else: 817 | node = self.add_new_node(f) 818 | Q.append(node) 819 | node.add_caller(root) 820 | root.add_callee(node) 821 | cnt += 1 822 | log_info(f"[*] Done. {cnt} unique node(s) processed.") 823 | 824 | def find_indirect_call(self): 825 | log_info("[*] Searching indirect call in graph nodes...") 826 | cnt = 0 827 | for node in self.nodes.values(): 828 | node.find_protection(self.vtable_validate_func) 829 | node.find_indirect_call() 830 | if node.has_indirect_calls(): 831 | log_info(f"{str(cnt).rjust(2)}. {node.name}") 832 | for tracker in node.succeed_trackers: 833 | tracker.report_result() 834 | cnt += 1 835 | self.unsafe_call_nodes.append(node) 836 | log_info(f"[*] Done. {cnt} unprotected indirect call(s) found.") 837 | 838 | def _visit_graph(self, node, Q = deque()): 839 | # Stop if depth length exceeds MAX_CHAIN_LEN 840 | if len(Q) + 1 > MAX_CHAIN_LEN: 841 | return 842 | Q.appendleft(node) 843 | if not node.is_root(): 844 | # Visit node's callers 845 | for n in node.callers.values(): 846 | self._visit_graph(n, Q) 847 | else: 848 | call_chain = [] 849 | for idx, n in enumerate(Q): 850 | if idx + 1 == len(Q): 851 | call_chain.append(n.succeed_trackers[0]) 852 | break 853 | trackers = [] 854 | for ref in self.bv.get_code_refs(Q[idx + 1].addr): 855 | if ref.function == n.func: 856 | t = LibIOFuncTaintTracker(self.bv, ref.address) 857 | t.process() 858 | if t.is_success(): 859 | trackers.append(t) 860 | if len(trackers) == 0: 861 | Q.popleft() 862 | return 863 | if len(trackers) > 0: 864 | trackers.sort(key = lambda x : len(x.call_dataflow)) 865 | call_chain.append(trackers[0]) 866 | func = node.func 867 | if func not in self.unsafe_call_chains: 868 | self.unsafe_call_chains[func] = [] 869 | self.unsafe_call_chains[func].append(call_chain) 870 | Q.popleft() 871 | 872 | def generate_call_chain(self): 873 | log_info("[*] Generating call chain...") 874 | for node in self.unsafe_call_nodes: 875 | self._visit_graph(node) 876 | cnt = 0 877 | for func, chain_list in self.unsafe_call_chains.items(): 878 | node = self.get_node(func) 879 | for call_chain in chain_list: 880 | funcs = ' -> '.join(map(lambda x : self.get_node(x.mlil_func.source_function).name, call_chain)) 881 | log_info(f"{str(cnt).rjust(2)}. {funcs}") 882 | for tracker in call_chain: 883 | tracker.report_result() 884 | cnt += 1 885 | log_info(f" ([{', '.join(map(hex, node.vtable_locations))}] is the location of {func.name} in __libc_IO_vtables)") 886 | log_info(f"[*] Done. {cnt} exploitable call chain(s) found.") 887 | --------------------------------------------------------------------------------