├── ida_kernelcache.py ├── ida_kernelcache_reload.py ├── ida_kernelcache ├── internal.py ├── kernel.py ├── metaclass.py ├── tagged_pointers.py ├── __init__.py ├── build_struct.py ├── offset.py ├── classes.py ├── symbol.py ├── stub.py ├── segment.py ├── kplist.py ├── data_flow.py ├── collect_classes.py ├── vtable.py ├── ida_utilities.py └── class_struct.py ├── LICENSE ├── scripts ├── find_virtual_method_overrides.py ├── populate_struct.py └── process_external_methods.py └── README.md /ida_kernelcache.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache.py 3 | # Brandon Azad 4 | # 5 | # A script to import the ida_kernelcache module into IDA. 6 | # 7 | 8 | import ida_kernelcache 9 | import ida_kernelcache as kc 10 | -------------------------------------------------------------------------------- /ida_kernelcache_reload.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache.py 3 | # Brandon Azad 4 | # 5 | # A script to import the ida_kernelcache module into IDA, reloading all the necessary internal 6 | # modules. 7 | # 8 | 9 | import sys 10 | for mod in sys.modules.keys(): 11 | if 'ida_kernelcache' in mod: 12 | del sys.modules[mod] 13 | 14 | import ida_kernelcache 15 | import ida_kernelcache as kc 16 | -------------------------------------------------------------------------------- /ida_kernelcache/internal.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/internal.py 3 | # Brandon Azad 4 | # 5 | # Miscellaneous internal routines. 6 | # 7 | 8 | from collections import defaultdict 9 | 10 | import idc 11 | 12 | import ida_utilities as idau 13 | 14 | def make_name_generator(suffix, max_count=999999): 15 | """Create a unique name generator using the specified template factory.""" 16 | next_index_dict = defaultdict(lambda: 1) 17 | def get_next(name): 18 | assert name, 'Invalid symbol name passed to name generator' 19 | assert suffix not in name, 'Symbol name passed to name generator already contains suffix' 20 | template = name + suffix 21 | for index in xrange(next_index_dict[name], max_count): 22 | new_name = template + str(index) 23 | if idau.get_name_ea(new_name) == idc.BADADDR: 24 | next_index_dict[name] = index 25 | return new_name 26 | new_index_dict[name] = max_count 27 | return None 28 | return get_next 29 | 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Brandon Azad 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /ida_kernelcache/kernel.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/kernel.py 3 | # Brandon Azad 4 | # 5 | # The kernel module holds functions and global variables pertaining to the kernel as a whole. No 6 | # prior initialization via ida_kernelcache is necessary. 7 | # 8 | 9 | import idc 10 | import idautils 11 | import idaapi 12 | 13 | import ida_utilities as idau 14 | import kplist 15 | 16 | _log = idau.make_log(0, __name__) 17 | 18 | def find_kernel_base(): 19 | """Find the kernel base.""" 20 | return idaapi.get_fileregion_ea(0) 21 | 22 | base = find_kernel_base() 23 | """The kernel base address (the address of the main kernel Mach-O header).""" 24 | 25 | def _find_prelink_info_segments(): 26 | """Find all candidate __PRELINK_INFO segments (or sections). 27 | 28 | We try to identify any IDA segments with __PRELINK_INFO in the name so that this function will 29 | work both before and after automatic rename. A more reliable method would be parsing the 30 | Mach-O. 31 | """ 32 | segments = [] 33 | # Gather a list of all the possible segments. 34 | for seg in idautils.Segments(): 35 | name = idc.SegName(seg) 36 | if '__PRELINK_INFO' in name or name == '__info': 37 | segments.append(seg) 38 | if len(segments) < 1: 39 | _log(0, 'Could not find any __PRELINK_INFO segment candidates') 40 | elif len(segments) > 1: 41 | _log(1, 'Multiple segment names contain __PRELINK_INFO: {}', 42 | [idc.SegName(seg) for seg in segments]) 43 | return segments 44 | 45 | def parse_prelink_info(): 46 | """Find and parse the kernel __PRELINK_INFO dictionary.""" 47 | segments = _find_prelink_info_segments() 48 | for segment in segments: 49 | prelink_info_string = idc.GetString(segment) 50 | prelink_info = kplist.kplist_parse(prelink_info_string) 51 | if prelink_info: 52 | return prelink_info 53 | _log(0, 'Could not find __PRELINK_INFO') 54 | return None 55 | 56 | prelink_info = parse_prelink_info() 57 | """The kernel __PRELINK_INFO dictionary.""" 58 | 59 | KC_11_NORMAL = '11-normal' 60 | KC_12_MERGED = '12-merged' 61 | 62 | def _get_kernelcache_format(): 63 | if '_PrelinkLinkKASLROffsets' in prelink_info: 64 | return KC_11_NORMAL 65 | return KC_12_MERGED 66 | 67 | kernelcache_format = _get_kernelcache_format() 68 | 69 | -------------------------------------------------------------------------------- /scripts/find_virtual_method_overrides.py: -------------------------------------------------------------------------------- 1 | # 2 | # scripts/find_virtual_method_overrides.py 3 | # Brandon Azad 4 | # 5 | # Use ida_kernelcache to find classes that override a virtual method. 6 | # 7 | 8 | def kernelcache_find_virtual_method_overrides(classname=None, method=None): 9 | import idc 10 | import idaapi 11 | import ida_kernelcache as kc 12 | 13 | # Define the form to ask for the arguments. 14 | class MyForm(idaapi.Form): 15 | def __init__(self): 16 | swidth = 40 17 | idaapi.Form.__init__(self, r"""STARTITEM 0 18 | Find virtual method overrides 19 | 20 | <#The class#Class :{classname}> 21 | <#The virtual method#Method:{method}>""", { 22 | 'classname': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), 23 | 'method': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth), 24 | }) 25 | def OnFormChange(self, fid): 26 | return 1 27 | 28 | kc.collect_class_info() 29 | 30 | if any(arg is None for arg in (classname, method)): 31 | f = MyForm() 32 | f.Compile() 33 | f.classname.value = classname or '' 34 | f.method.value = method or '' 35 | ok = f.Execute() 36 | if ok != 1: 37 | print 'Cancelled' 38 | return False 39 | classname = f.classname.value 40 | method = f.method.value 41 | f.Free() 42 | 43 | if classname not in kc.class_info: 44 | print 'Not a valid class: {}'.format(classname) 45 | return False 46 | 47 | print 'Subclasses of {} that override {}:'.format(classname, method) 48 | baseinfo = kc.class_info[classname] 49 | found = False 50 | for classinfo in baseinfo.descendants(): 51 | for _, override, _ in kc.vtable.class_vtable_overrides(classinfo, superinfo=baseinfo, 52 | methods=True): 53 | name = idc.NameEx(idc.BADADDR, override) 54 | demangled = idc.Demangle(name, idc.GetLongPrm(idc.INF_SHORT_DN)) 55 | name = demangled if demangled else name 56 | if method in name: 57 | print '{:#x} {}'.format(override, classinfo.classname) 58 | found = True 59 | if not found: 60 | print 'No subclass of {} overrides {}'.format(classname, method) 61 | return found 62 | 63 | kernelcache_find_virtual_method_overrides() 64 | 65 | -------------------------------------------------------------------------------- /ida_kernelcache/metaclass.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/metaclass.py 3 | # Brandon Azad 4 | # 5 | # A module for working with OSMetaClass instances in the kernelcache. 6 | # 7 | 8 | import idc 9 | 10 | import ida_utilities as idau 11 | import classes 12 | import symbol 13 | 14 | _log = idau.make_log(0, __name__) 15 | 16 | def metaclass_name_for_class(classname): 17 | """Return the name of the C++ metaclass for the given class.""" 18 | if '::' in classname: 19 | return None 20 | return classname + '::MetaClass' 21 | 22 | def metaclass_instance_name_for_class(classname): 23 | """Return the name of the C++ metaclass instance for the given class.""" 24 | if '::' in classname: 25 | return None 26 | return classname + '::gMetaClass' 27 | 28 | def metaclass_symbol_for_class(classname): 29 | """Get the symbol name for the OSMetaClass instance for the given class name. 30 | 31 | Arguments: 32 | classname: The name of the C++ class. 33 | 34 | Returns: 35 | The symbol name, or None if the classname is invalid. 36 | """ 37 | metaclass_instance = metaclass_instance_name_for_class(classname) 38 | if not metaclass_instance: 39 | return None 40 | return symbol.global_name(metaclass_instance) 41 | 42 | def add_metaclass_symbol(metaclass, classname): 43 | """Add a symbol for the OSMetaClass instance at the specified address. 44 | 45 | Arguments: 46 | metaclass: The address of the OSMetaClass instance. 47 | classname: The name of the C++ class with this OSMetaClass instance. 48 | 49 | Returns: 50 | True if the OSMetaClass instance's symbol was created successfully. 51 | """ 52 | metaclass_symbol = metaclass_symbol_for_class(classname) 53 | if not idau.set_ea_name(metaclass, metaclass_symbol): 54 | _log(0, 'Address {:#x} already has name {} instead of OSMetaClass instance symbol {}' 55 | .format(metaclass, idau.get_ea_name(metaclass), metaclass_symbol)) 56 | return False 57 | return True 58 | 59 | def initialize_metaclass_symbols(): 60 | """Populate IDA with OSMetaClass instance symbols for an iOS kernelcache. 61 | 62 | Search through the kernelcache for OSMetaClass instances and add a symbol for each known 63 | instance. 64 | """ 65 | classes.collect_class_info() 66 | for classname, classinfo in classes.class_info.items(): 67 | if classinfo.metaclass: 68 | _log(1, 'Class {} has OSMetaClass instance at {:#x}', classname, classinfo.metaclass) 69 | if not add_metaclass_symbol(classinfo.metaclass, classname): 70 | _log(0, 'Could not add metaclass symbol for class {} at address {:#x}', classname, 71 | classinfo.metaclass) 72 | else: 73 | _log(1, 'Class {} has no known OSMetaClass instance', classname) 74 | 75 | -------------------------------------------------------------------------------- /ida_kernelcache/tagged_pointers.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/tagged_pointers.py 3 | # Brandon Azad 4 | # 5 | """ida_kernelcache.tagged_pointers 6 | 7 | This module is responsible for processing the tagged pointers in the new iOS 12 kernelcache and 8 | replacing them with their untagged equivalents. All found pointers are also converted into offsets. 9 | 10 | In an alternative implementation, we could just add cross-references in IDA. However, I think this 11 | approach is better because it is closer to what the kernelcache looks like at runtime. 12 | """ 13 | 14 | import idc 15 | import idautils 16 | 17 | import ida_utilities as idau 18 | import kernel 19 | 20 | _log = idau.make_log(1, __name__) 21 | 22 | def tagged_pointer_tag(tp): 23 | return (tp >> 48) & 0xffff 24 | 25 | def tagged_pointer_untag(tp): 26 | return tp | 0xffff000000000000 27 | 28 | def is_tagged_pointer_format(value): 29 | return tagged_pointer_tag(value) != 0xffff and \ 30 | (value & 0x0000ffff00000000) == 0x0000fff000000000 31 | 32 | def is_tagged_pointer(value): 33 | return is_tagged_pointer_format(value) and \ 34 | idau.is_mapped(tagged_pointer_untag(value), value=False) 35 | 36 | def tagged_pointer_link(tag): 37 | return (tag >> 1) & ~0x3 38 | 39 | def tagged_pointer_next(ea, tp, end=None): 40 | assert ea 41 | # First try to get the offset to the next link. 42 | if tp: 43 | link_offset = tagged_pointer_link(tagged_pointer_tag(tp)) 44 | if link_offset: 45 | return ea + link_offset 46 | # Skip the current tagged pointer in preparation for scanning. 47 | ea += idau.WORD_SIZE 48 | # We don't have a link. Do a forward scan until we find the next tagged pointer. 49 | _log(3, 'Scanning for next tagged pointer') 50 | if end is None: 51 | end = idc.SegEnd(ea) 52 | for value, value_ea in idau.ReadWords(ea, end, step=4, addresses=True): 53 | if is_tagged_pointer(value): 54 | return value_ea 55 | # If we didn't find any tagged pointers at all, return None. 56 | return None 57 | 58 | def untag_pointer(ea, tp): 59 | _log(4, 'Untagging pointer at {:x}', ea) 60 | idau.patch_word(ea, tagged_pointer_untag(tp)) 61 | idc.OpOff(ea, 0, 0) 62 | 63 | def untag_pointers_in_range(start, end): 64 | assert kernel.kernelcache_format == kernel.KC_12_MERGED, 'Wrong kernelcache format' 65 | ea, tp = start, None 66 | while True: 67 | ea = tagged_pointer_next(ea, tp, end) 68 | if ea is None or ea >= end: 69 | break 70 | tp = idau.read_word(ea) 71 | if not is_tagged_pointer(tp): 72 | _log(1, 'Tagged pointer traversal failed: ea={:x}, tp={:x}'.format(ea, tp)) 73 | break 74 | untag_pointer(ea, tp) 75 | 76 | def untag_pointers(): 77 | _log(2, 'Starting tagged pointer conversion') 78 | for seg in idautils.Segments(): 79 | untag_pointers_in_range(idc.SegStart(seg), idc.SegEnd(seg)) 80 | _log(2, 'Tagged pointer conversion complete') 81 | 82 | -------------------------------------------------------------------------------- /ida_kernelcache/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/__init__.py 3 | # Brandon Azad 4 | # 5 | # The main ida_kernelcache module containing my iOS kernelcache utilities for IDA. 6 | # 7 | 8 | # This isn't kernelcache-specific, but it's useful to have access to in the interpreter and other 9 | # scripts. 10 | import ida_utilities 11 | 12 | import build_struct 13 | import class_struct 14 | import classes 15 | import kernel 16 | import kplist 17 | import metaclass 18 | import offset 19 | import segment 20 | import stub 21 | import tagged_pointers 22 | import vtable 23 | 24 | from classes import (ClassInfo, collect_class_info, class_info) 25 | from kplist import (kplist_parse) 26 | from segment import (kernelcache_kext) 27 | 28 | def kernelcache_process(untag_pointers=True): 29 | """Process the kernelcache in IDA for the first time. 30 | 31 | This function performs all the standard processing available in this module: 32 | * Convert iOS 12's new static tagged pointers into normal kernel pointers. 33 | * Parse the kernel's `__PRELINK_INFO.__info` section into a dictionary. 34 | * Renames segments in IDA according to the names from the __PRELINK_INFO dictionary (split 35 | kext format kernelcaches only). 36 | * Converts pointers in data segments into offsets. 37 | * Locates virtual method tables, converts them to offsets, and adds vtable symbols. 38 | * Locates OSMetaClass instances for top-level classes and adds OSMetaClass symbols. 39 | * Symbolicates offsets in `__got` sections and stub functions in `__stubs` sections. 40 | * Symbolicates methods in vtables based on the method names in superclasses. 41 | * Creates IDA structs representing the C++ classes in the kernel. 42 | """ 43 | import idaapi 44 | import idc 45 | def autoanalyze(): 46 | idc.Wait() 47 | autoanalyze() 48 | if (kernel.kernelcache_format == kernel.KC_12_MERGED 49 | and untag_pointers 50 | and idaapi.IDA_SDK_VERSION < 720): 51 | print 'Processing tagged kernelcache pointers' 52 | tagged_pointers.untag_pointers() 53 | autoanalyze() 54 | segment.initialize_segments() 55 | print 'Initializing data offsets' 56 | offset.initialize_data_offsets() 57 | autoanalyze() 58 | print 'Initializing vtables' 59 | vtable.initialize_vtables() 60 | autoanalyze() 61 | vtable.initialize_vtable_symbols() 62 | autoanalyze() 63 | metaclass.initialize_metaclass_symbols() 64 | if kernel.kernelcache_format == kernel.KC_11_NORMAL: 65 | print 'Creating offset and stub symbols' 66 | offset.initialize_offset_symbols() 67 | autoanalyze() 68 | stub.initialize_stub_symbols() 69 | autoanalyze() 70 | print 'Propagating vtable method symbols' 71 | vtable.initialize_vtable_method_symbols() 72 | print 'Initializing class structs' 73 | class_struct.initialize_vtable_structs() 74 | class_struct.initialize_class_structs() 75 | autoanalyze() 76 | print 'Done' 77 | 78 | -------------------------------------------------------------------------------- /ida_kernelcache/build_struct.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/build_struct.py 3 | # Brandon Azad 4 | # 5 | # A module to build an IDA structure automatically from code accesses. 6 | # 7 | 8 | import collections 9 | 10 | import idc 11 | import idautils 12 | import idaapi 13 | 14 | import ida_utilities as idau 15 | 16 | _log = idau.make_log(3, __name__) 17 | 18 | def field_name(offset): 19 | """Automatically generated IDA structs have their fields named by their absolute offset.""" 20 | return 'field_{:x}'.format(offset) 21 | 22 | def create_struct_fields(sid=None, name=None, accesses=None, create=False, base=0): 23 | """Create an IDA struct with fields corresponding to the specified access pattern. 24 | 25 | Given a sequence of (offset, size) tuples designating the valid access points to the struct, 26 | create fields in the struct at the corresponding positions. 27 | 28 | Options: 29 | sid: The struct id, if the struct already exists. 30 | name: The name of the struct to update or create. 31 | accesses: The set of (offset, size) tuples representing the valid access points in the 32 | struct. 33 | create: If True, then the struct will be created with the specified name if it does not 34 | already exist. Default is False. 35 | base: The base offset for the struct. Offsets smaller than this are ignored, otherwise the 36 | field is created at the offset minus the base. Default is 0. 37 | 38 | Either sid or name must be specified. 39 | """ 40 | # Get the struct id. 41 | if sid is None: 42 | sid = idau.struct_open(name, create=True) 43 | if sid is None: 44 | _log(0, 'Could not open struct {}', name) 45 | return False 46 | else: 47 | name = idc.GetStrucName(sid) 48 | if name is None: 49 | _log(0, 'Invalid struct id {}', sid) 50 | return False 51 | # Now, for each (offset, size) pair, create a struct member. Right now we completely ignore the 52 | # possibility that some members will overlap (for various reasons; it's actually more common 53 | # than I initially thought, though I haven't investigated why). 54 | # TODO: In the future we should address this by either automatically generating sub-unions or 55 | # choosing the most appropriate member when permissible (e.g. (0, 8), (0, 2), (4, 4) might 56 | # create (0, 2), (2, 2), (4, 4)). I think the most reasonable default policy is to create the 57 | # biggest members that satisfy all accesses. 58 | success = True 59 | for offset, size in accesses: 60 | if offset < base: 61 | continue 62 | member = field_name(offset) 63 | ret = idau.struct_add_word(sid, member, offset - base, size) 64 | if ret != 0: 65 | if ret == idc.STRUC_ERROR_MEMBER_OFFSET: 66 | _log(2, 'Could not add {}.{} for access ({}, {})', name, member, offset, size) 67 | else: 68 | success = False 69 | _log(1, 'Could not add {}.{} for access ({}, {}): {}', name, member, offset, size, 70 | ret) 71 | return success 72 | 73 | -------------------------------------------------------------------------------- /ida_kernelcache/offset.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/offset.py 3 | # Brandon Azad 4 | # 5 | # Functions for converting and symbolicating offsets. 6 | # 7 | 8 | import re 9 | 10 | import idc 11 | import idautils 12 | 13 | import ida_utilities as idau 14 | import internal 15 | import kernel 16 | import stub 17 | 18 | _log = idau.make_log(1, __name__) 19 | 20 | def initialize_data_offsets(): 21 | """Convert offsets in data segments into offsets in IDA. 22 | 23 | Segment names must be initialized with segments.initialize_segments() first. 24 | """ 25 | # Normally, for user-space programs, this operation would be dangerous because there's a good 26 | # chance that a valid userspace address would happen to show up in regular program data that is 27 | # not actually an address. However, since kernel addresses are numerically much larger, the 28 | # chance of this happening is much less. 29 | for seg in idautils.Segments(): 30 | name = idc.SegName(seg) 31 | if not (name.endswith('__DATA_CONST.__const') or name.endswith('__got') 32 | or name.endswith('__DATA.__data')): 33 | continue 34 | for word, ea in idau.ReadWords(seg, idc.SegEnd(seg), addresses=True): 35 | if idau.is_mapped(word, value=False): 36 | idc.OpOff(ea, 0, 0) 37 | 38 | kernelcache_offset_suffix = '___offset_' 39 | """The suffix that gets appended to a symbol to create the offset name, without the offset ID.""" 40 | 41 | _offset_regex = re.compile(r"^(\S+)" + kernelcache_offset_suffix + r"\d+$") 42 | """A regular expression to match and extract the target name from an offset symbol.""" 43 | 44 | def offset_name_target(offset_name): 45 | """Get the target to which an offset name refers. 46 | 47 | No checks are performed to ensure that the target actually exists. 48 | """ 49 | match = _offset_regex.match(offset_name) 50 | if not match: 51 | return None 52 | return match.group(1) 53 | 54 | def _process_offset(offset, ea, next_offset): 55 | """Process an offset in a __got section.""" 56 | # Convert the address containing the offset into an offset in IDA, but continue if it fails. 57 | if not idc.OpOff(ea, 0, 0): 58 | _log(1, 'Could not convert {:#x} into an offset', ea) 59 | # Get the name to which the offset refers. 60 | name = idau.get_ea_name(offset, user=True) 61 | if not name: 62 | _log(3, 'Offset at address {:#x} has target {:#x} without a name', ea, offset) 63 | return False 64 | # Make sure this isn't an offset to another stub or to a jump function to another stub. See the 65 | # comment in _symbolicate_stub. 66 | if stub.symbol_references_stub(name): 67 | _log(1, 'Offset at address {:#x} has target {:#x} (name {}) that references a stub', ea, 68 | offset, name) 69 | return False 70 | # Set the new name for the offset. 71 | symbol = next_offset(name) 72 | if symbol is None: 73 | _log(0, 'Could not generate offset symbol for {}: names exhausted', name) 74 | return False 75 | if not idau.set_ea_name(ea, symbol, auto=True): 76 | _log(2, 'Could not set name {} for offset at {:#x}', symbol, ea) 77 | return False 78 | return True 79 | 80 | def _process_offsets_section(segstart, next_offset): 81 | """Process all the offsets in a __got section.""" 82 | for offset, ea in idau.ReadWords(segstart, idc.SegEnd(segstart), addresses=True): 83 | if not offset_name_target(idau.get_ea_name(ea)): 84 | # This is not a previously named offset. 85 | if idau.is_mapped(offset, value=False): 86 | _process_offset(offset, ea, next_offset) 87 | else: 88 | _log(-1, 'Offset {:#x} at address {:#x} is unmapped', offset, ea) 89 | 90 | def initialize_offset_symbols(): 91 | """Populate IDA with information about the offsets in an iOS kernelcache. 92 | 93 | Search through the kernelcache for global offset tables (__got sections), convert each offset 94 | into an offset type in IDA, and rename each offset according to its target. 95 | 96 | This function does nothing in the newer 12-merged format kernelcache. 97 | """ 98 | next_offset = internal.make_name_generator(kernelcache_offset_suffix) 99 | for ea in idautils.Segments(): 100 | segname = idc.SegName(ea) 101 | if not segname.endswith('__got'): 102 | continue 103 | _log(2, 'Processing segment {}', segname) 104 | _process_offsets_section(ea, next_offset) 105 | 106 | -------------------------------------------------------------------------------- /scripts/populate_struct.py: -------------------------------------------------------------------------------- 1 | # 2 | # scripts/populate_struct.py 3 | # Brandon Azad 4 | # 5 | # Populate a class or struct using data flow analysis. 6 | # 7 | 8 | def kernelcache_populate_struct(struct=None, address=None, register=None, delta=None): 9 | import idc 10 | import idautils 11 | import idaapi 12 | import ida_kernelcache as kc 13 | import ida_kernelcache.ida_utilities as idau 14 | 15 | # Define the form to ask for the arguments. 16 | class MyForm(idaapi.Form): 17 | def __init__(self): 18 | swidth = 40 19 | idaapi.Form.__init__(self, r"""STARTITEM 0 20 | Automatically populate struct fields 21 | 22 | <#The name of the structure#Structure:{structure}> 23 | <#The address of the instruction at which the register points to the structure#Address :{address}> 24 | <#The register containing the pointer to the structure#Register :{register}> 25 | <#The offset of the pointer from the start of the structure#Delta :{delta}>""", { 26 | 'structure': idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth), 27 | 'address': idaapi.Form.NumericInput(tp=idaapi.Form.FT_ADDR, swidth=swidth, width=1000), 28 | 'register': idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth), 29 | 'delta': idaapi.Form.NumericInput(tp=idaapi.Form.FT_INT64, swidth=swidth), 30 | }) 31 | def OnFormChange(self, fid): 32 | return 1 33 | 34 | # If any argument is unspecified, get it using the form. 35 | if any(arg is None for arg in (struct, address, register, delta)): 36 | f = MyForm() 37 | f.Compile() 38 | f.structure.value = struct or '' 39 | f.address.value = address or idc.ScreenEA() 40 | f.register.value = register or 'X0' 41 | f.delta.value = delta or 0 42 | ok = f.Execute() 43 | if ok != 1: 44 | print 'Cancelled' 45 | return False 46 | struct = f.structure.value 47 | address = f.address.value 48 | register = f.register.value 49 | delta = f.delta.value 50 | f.Free() 51 | 52 | # Check whether this struct is a class. 53 | kc.collect_class_info() 54 | is_class = struct in kc.class_info 55 | 56 | # Open the structure. 57 | sid = idau.struct_open(struct, create=True) 58 | if sid is None: 59 | print 'Could not open struct {}'.format(struct) 60 | return False 61 | 62 | # Check that the address is in a function. 63 | if not idaapi.get_func(address): 64 | print 'Address {:#x} is not a function'.format(address) 65 | return False 66 | 67 | # Get the register id. 68 | register_id = None 69 | if type(register) is str: 70 | register_id = idaapi.str2reg(register) 71 | elif type(register) is int: 72 | register_id = register 73 | register = idaapi.get_reg_name(register_id, 8) 74 | if register_id is None or register_id < 0: 75 | print 'Invalid register {}'.format(register) 76 | return False 77 | 78 | # Validate delta. 79 | if delta < 0 or delta > 0x1000000: 80 | print 'Invalid delta {}'.format(delta) 81 | return False 82 | elif is_class and delta != 0: 83 | print 'Nonzero delta not yet supported' 84 | return False 85 | 86 | type_name = 'class' if is_class else 'struct' 87 | print '{} = {}, address = {:#x}, register = {}, delta = {:#x}'.format(type_name, struct, 88 | address, register, delta) 89 | 90 | if is_class: 91 | # Run the analysis. 92 | kc.class_struct.process_functions([(address, struct, register_id)]) 93 | else: 94 | # Run the data flow to collect the accesses and then add those fields to the struct. 95 | accesses = kc.data_flow.pointer_accesses(function=address, 96 | initialization={ address: { register_id: delta } }) 97 | kc.build_struct.create_struct_fields(sid, accesses=accesses) 98 | 99 | # Set the offsets to stroff. 100 | for addresses_and_deltas in accesses.values(): 101 | for ea, delta in addresses_and_deltas: 102 | insn = idautils.DecodeInstruction(ea) 103 | if insn: 104 | for op in insn.Operands: 105 | if op.type == idaapi.o_displ: 106 | idau.insn_op_stroff(insn, op.n, sid, delta) 107 | 108 | # All done! :) 109 | print 'Done' 110 | return True 111 | 112 | kernelcache_populate_struct() 113 | 114 | -------------------------------------------------------------------------------- /ida_kernelcache/classes.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/classes.py 3 | # Brandon Azad 4 | # 5 | # This module defines the ClassInfo class, which stores information about a C++ class in the 6 | # kernelcache. It also provides the function collect_class_info() to scan the kernelcache for 7 | # information about C++ classes and populate global variables with the result. 8 | # 9 | 10 | import collect_classes 11 | import ida_utilities as idau 12 | import vtable 13 | 14 | class_info = {} 15 | """A global map from class names to ClassInfo objects. See collect_class_info().""" 16 | 17 | vtables = {} 18 | """A global map from the address each virtual method tables in the kernelcache to its length.""" 19 | 20 | class ClassInfo(object): 21 | """Information about a C++ class in a kernelcache.""" 22 | 23 | def __init__(self, classname, metaclass, vtable, vtable_length, class_size, superclass_name, 24 | meta_superclass): 25 | self.superclass = None 26 | self.subclasses = set() 27 | self.classname = classname 28 | self.metaclass = metaclass 29 | self.vtable = vtable 30 | self.vtable_length = vtable_length 31 | self.class_size = class_size 32 | self.superclass_name = superclass_name 33 | self.meta_superclass = meta_superclass 34 | 35 | def __repr__(self): 36 | def hex(x): 37 | if x is None: 38 | return repr(None) 39 | return '{:#x}'.format(x) 40 | return 'ClassInfo({!r}, {}, {}, {}, {}, {!r}, {})'.format( 41 | self.classname, hex(self.metaclass), hex(self.vtable), 42 | self.vtable_length, self.class_size, self.superclass_name, 43 | hex(self.meta_superclass)) 44 | 45 | @property 46 | def vtable_methods(self): 47 | return self.vtable + vtable.VTABLE_OFFSET * idau.WORD_SIZE 48 | 49 | @property 50 | def vtable_nmethods(self): 51 | if not self.vtable_length or self.vtable_length < vtable.VTABLE_OFFSET: 52 | return 0 53 | return self.vtable_length - vtable.VTABLE_OFFSET 54 | 55 | def ancestors(self, inclusive=False): 56 | """A generator over all direct or indircet superclasses of this class. 57 | 58 | Ancestors are returned in order from root (most distance) to superclass (closest), and the 59 | class itself is not returned. 60 | 61 | Options: 62 | inclusive: If True, then this class is included in the iteration. Default is False. 63 | """ 64 | if self.superclass: 65 | for ancestor in self.superclass.ancestors(inclusive=True): 66 | yield ancestor 67 | if inclusive: 68 | yield self 69 | 70 | def descendants(self, inclusive=False): 71 | """A generator over all direct or indircet subclasses of this class. 72 | 73 | Descendants are returned in descending depth-first order: first a subclass will be 74 | returned, then all of its descendants, before going on to the next subclass of this class. 75 | 76 | Options: 77 | inclusive: If True, then this class is included in the iteration. Default is False. 78 | """ 79 | if inclusive: 80 | yield self 81 | for subclass in self.subclasses: 82 | for descendant in subclass.descendants(inclusive=True): 83 | yield descendant 84 | 85 | def collect_class_info(): 86 | """Collect information about C++ classes defined in a kernelcache. 87 | 88 | This function searches through an iOS kernelcache for information about the C++ classes defined 89 | in it. It populates the global class_info dictionary, which maps the C++ class names to a 90 | ClassInfo object containing metainformation about the class. 91 | 92 | To force re-evaluation of the class_info dictionary, call class_info.clear() and then re-run 93 | this function. 94 | 95 | This function also collects the set of all virtual method tables identified in the kernelcache, 96 | even if the corresponding class could not be identified. A mapping from each virtual method 97 | table to its length is stored in the global vtables variable. 98 | 99 | Only Arm64 is supported at this time. 100 | 101 | Only top-level classes are processed. Information about nested classes is not collected. 102 | """ 103 | global class_info, vtables 104 | if not class_info: 105 | vtables.clear() 106 | result = collect_classes.collect_class_info_internal() 107 | if result is not None: 108 | all_class_info, all_vtables = result 109 | class_info.update(all_class_info) 110 | vtables.update(all_vtables) 111 | -------------------------------------------------------------------------------- /scripts/process_external_methods.py: -------------------------------------------------------------------------------- 1 | # 2 | # scripts/process_external_methods.py 3 | # Brandon Azad 4 | # 5 | # Parse a list of IOExternalMethod or IOExternalMethodDispatch structs and print metainformation 6 | # about the selectors in the format: 7 | # { selector, input_scalars_count, input_structure_size, output_scalars_count, output_structure_size } 8 | # 9 | 10 | def kernelcache_process_external_methods(ea=None, struct_type=None, count=None): 11 | import idc 12 | import ida_kernelcache as kc 13 | import ida_kernelcache.ida_utilities as idau 14 | 15 | kIOUCVariableStructureSize = 0xffffffff 16 | 17 | kIOUCTypeMask = 0xf 18 | kIOUCScalarIScalarO = 0 19 | kIOUCScalarIStructO = 2 20 | kIOUCStructIStructO = 3 21 | kIOUCScalarIStructI = 4 22 | 23 | kIOUCFlags = 0xff 24 | 25 | IOExternalMethod_types = (kIOUCScalarIScalarO, kIOUCScalarIStructO, kIOUCStructIStructO, 26 | kIOUCScalarIStructI) 27 | 28 | IOExternalMethod_count0_scalar = (kIOUCScalarIScalarO, kIOUCScalarIStructO, 29 | kIOUCScalarIStructI) 30 | 31 | IOExternalMethod_count1_scalar = (kIOUCScalarIScalarO,) 32 | 33 | def check_scalar(scalar_count): 34 | return (0 <= scalar_count <= 400) 35 | 36 | def check_structure(structure_size): 37 | return (0 <= structure_size <= 0x100000 or structure_size == kIOUCVariableStructureSize) 38 | 39 | def is_IOExternalMethodDispatch(obj): 40 | return (idau.is_mapped(obj.function) 41 | and check_scalar(obj.checkScalarInputCount) 42 | and check_structure(obj.checkStructureInputSize) 43 | and check_scalar(obj.checkScalarOutputCount) 44 | and check_structure(obj.checkStructureOutputSize)) 45 | 46 | def process_IOExternalMethodDispatch(obj): 47 | return (obj.checkScalarInputCount, obj.checkStructureInputSize, 48 | obj.checkScalarOutputCount, obj.checkStructureOutputSize) 49 | 50 | def is_IOExternalMethod(obj): 51 | method_type = obj.flags & kIOUCTypeMask 52 | check_count0 = check_scalar if method_type in IOExternalMethod_count0_scalar else check_structure 53 | check_count1 = check_scalar if method_type in IOExternalMethod_count1_scalar else check_structure 54 | return ((obj.object == 0 or idau.is_mapped(obj.object)) 55 | and (obj.flags & kIOUCFlags == obj.flags) 56 | and idau.is_mapped(obj.func) 57 | and method_type in IOExternalMethod_types 58 | and check_count0(obj.count0) 59 | and check_count1(obj.count1)) 60 | 61 | def process_IOExternalMethod(obj): 62 | isc, iss, osc, oss = 0, 0, 0, 0 63 | method_type = obj.flags & kIOUCTypeMask 64 | if method_type == kIOUCScalarIScalarO: 65 | isc, osc = obj.count0, obj.count1 66 | elif method_type == kIOUCScalarIStructO: 67 | isc, oss = obj.count0, obj.count1 68 | elif method_type == kIOUCStructIStructO: 69 | iss, oss = obj.count0, obj.count1 70 | elif method_type == kIOUCScalarIStructI: 71 | isc, iss = obj.count0, obj.count1 72 | else: 73 | assert False 74 | return (isc, iss, osc, oss) 75 | 76 | TYPE_MAP = { 77 | 'IOExternalMethodDispatch': 78 | (is_IOExternalMethodDispatch, process_IOExternalMethodDispatch), 79 | 'IOExternalMethod': (is_IOExternalMethod, process_IOExternalMethod), 80 | } 81 | 82 | # Get the EA. 83 | if ea is None: 84 | ea = idc.ScreenEA() 85 | 86 | # Get the struct_type and the check and process functions. 87 | if struct_type is None: 88 | for stype in TYPE_MAP: 89 | struct_type = stype 90 | check, process = TYPE_MAP[struct_type] 91 | obj = idau.read_struct(ea, struct=struct_type, asobject=True) 92 | if check(obj): 93 | break 94 | else: 95 | print 'Address {:#x} does not look like any known external method struct'.format(ea) 96 | return False 97 | else: 98 | if struct_type not in TYPE_MAP: 99 | print 'Unknown external method struct type {}'.format(struct_type) 100 | return False 101 | check, process = TYPE_MAP[struct_type] 102 | obj = idau.read_struct(ea, struct=struct_type, asobject=True) 103 | if not check(obj): 104 | print 'Address {:#x} does not look like {}'.format(ea, struct_type) 105 | 106 | # Process the external methods. 107 | selector = 0; 108 | while (count is None and check(obj)) or (selector < count): 109 | isc, iss, osc, oss = process(obj) 110 | print '{{ {:3}, {:5}, {:#10x}, {:5}, {:#10x} }}'.format(selector, isc, iss, osc, oss) 111 | selector += 1 112 | ea += len(obj) 113 | obj = idau.read_struct(ea, struct=struct_type, asobject=True) 114 | 115 | return True 116 | 117 | kernelcache_process_external_methods() 118 | 119 | -------------------------------------------------------------------------------- /ida_kernelcache/symbol.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/symbol.py 3 | # Brandon Azad 4 | # 5 | """ida_kernelcache.class_struct 6 | 7 | This module deals with processing and transforming symbol strings. It does not modify IDA. 8 | 9 | TODO: A lot of functions in this module really have to do with processing type strings, not symbol 10 | strings. 11 | """ 12 | 13 | import re 14 | 15 | import idc 16 | import idaapi 17 | 18 | def method_name(symbol): 19 | """Get the name of the C++ method from its symbol. 20 | 21 | If the symbol demangles to 'Class::method(args)', this function returns 'method'. 22 | """ 23 | try: 24 | demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN)) 25 | func = demangled.split('::', 1)[1] 26 | base = func.split('(', 1)[0] 27 | return base or None 28 | except: 29 | return None 30 | 31 | def method_arguments_string(symbol): 32 | """Get the arguments string of the C++ method from its symbol. 33 | 34 | If the symbol demangles to 'Class::method(arg1, arg2)', this function returns 'arg1, arg2'. 35 | """ 36 | try: 37 | demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_LONG_DN)) 38 | func = demangled.split('::', 1)[1] 39 | args = func.split('(', 1)[1] 40 | args = args.rsplit(')', 1)[0].strip() 41 | return args 42 | except: 43 | return None 44 | 45 | def method_arguments(symbol): 46 | """Get the arguments list of the C++ method from its symbol. 47 | 48 | If the symbol demangles to 'Class::method(arg1, arg2)', this function returns ['arg1', 'arg2']. 49 | """ 50 | try: 51 | arglist = [] 52 | args = method_arguments_string(symbol) 53 | if args is None: 54 | return None 55 | if not args or args == 'void': 56 | return arglist 57 | carg = '' 58 | parens = 0 59 | for c in args + ',': 60 | if c == ',' and parens == 0: 61 | carg = carg.strip() 62 | assert carg 63 | arglist.append(carg) 64 | carg = '' 65 | continue 66 | if c == '(': 67 | parens += 1 68 | elif c == ')': 69 | parens -= 1 70 | carg += c 71 | return arglist 72 | except: 73 | return None 74 | 75 | def method_argument_pointer_types(symbol): 76 | """Get the base types of pointer types used in the arguments to a C++ method.""" 77 | args = method_arguments_string(symbol) 78 | if args is None: 79 | return None 80 | if not args or args == 'void': 81 | return set() 82 | args = re.sub(r"[&]|\bconst\b", ' ', args) 83 | args = re.sub(r"\bunsigned\b", ' ', args) 84 | args = re.sub(r" +", ' ', args) 85 | argtypes = set(arg.strip() for arg in re.split(r"[,()]", args)) 86 | ptrtypes = set() 87 | for argtype in argtypes: 88 | if re.match(r"[^ ]+ [*][* ]*", argtype): 89 | ptrtypes.add(argtype.split(' ', 1)[0]) 90 | ptrtypes.difference_update(['void', 'bool', 'char', 'short', 'int', 'long', 'float', 'double', 91 | 'longlong', '__int64']) 92 | return ptrtypes 93 | 94 | def method_argument_types(symbol, sign=True): 95 | """Get the base types used in the arguments to a C++ method.""" 96 | try: 97 | args = method_arguments_string(symbol) 98 | if args is None: 99 | return None 100 | if not args or args == 'void': 101 | return set() 102 | args = re.sub(r"[*&]|\bconst\b", ' ', args) 103 | if not sign: 104 | args = re.sub(r"\bunsigned\b", ' ', args) 105 | args = re.sub(r" +", ' ', args) 106 | argtypes = set(arg.strip() for arg in re.split(r"[,()]", args)) 107 | argtypes.discard('') 108 | return argtypes 109 | except: 110 | return None 111 | 112 | def convert_function_type_to_function_pointer_type(typestr): 113 | """Convert a function type string into a function pointer type string. 114 | 115 | For example: 116 | __int64 __fastcall(arg1, arg2) => __int64 __fastcall (*)(arg1, arg2) 117 | """ 118 | try: 119 | return_part, args_part = typestr.split('(', 1) 120 | return return_part + ' (*)(' + args_part 121 | except: 122 | return None 123 | 124 | def make_ident(name): 125 | """Convert a name into a valid identifier, substituting any invalid characters.""" 126 | ident = '' 127 | for c in name: 128 | if idaapi.is_ident_char(ord(c)): 129 | ident += c 130 | else: 131 | ident += '_' 132 | return ident 133 | 134 | def _mangle_name(scopes): 135 | symbol = '' 136 | if len(scopes) > 1: 137 | symbol += 'N' 138 | for name in scopes: 139 | if len(name) == 0: 140 | return None 141 | symbol += '{}{}'.format(len(name), name) 142 | if len(scopes) > 1: 143 | symbol += 'E' 144 | return symbol 145 | 146 | def vtable_symbol_for_class(classname): 147 | """Get the mangled symbol name for the vtable for the given class name. 148 | 149 | Arguments: 150 | classname: The name of the C++ class. 151 | 152 | Returns: 153 | The symbol name, or None if the classname is invalid. 154 | """ 155 | name = _mangle_name(classname.split('::')) 156 | if not name: 157 | return None 158 | return '__ZTV' + name 159 | 160 | def vtable_symbol_get_class(symbol): 161 | """Get the class name for a vtable symbol.""" 162 | try: 163 | demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN)) 164 | pre, post = demangled.split("`vtable for'", 1) 165 | assert pre == '' 166 | return post 167 | except: 168 | return None 169 | 170 | def global_name(name): 171 | """Get the mangled symbol name for the global name. 172 | 173 | Arguments: 174 | name: The name of the global object. 175 | 176 | Returns: 177 | The symbol name, or None if the name is invalid. 178 | """ 179 | mangled = _mangle_name(name.split('::')) 180 | if not mangled: 181 | return None 182 | return '__Z' + mangled 183 | 184 | -------------------------------------------------------------------------------- /ida_kernelcache/stub.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/stub.py 3 | # Brandon Azad 4 | # 5 | # Functions for analyzing stub functions in the kernelcache. 6 | # 7 | 8 | import re 9 | 10 | import idc 11 | import idautils 12 | import idaapi 13 | 14 | import ida_utilities as idau 15 | import internal 16 | 17 | _log = idau.make_log(1, __name__) 18 | 19 | kernelcache_stub_suffix = '___stub_' 20 | """The suffix that gets appended to a symbol to create the stub name, without the stub ID.""" 21 | 22 | _stub_regex = re.compile(r"^(\S+)" + kernelcache_stub_suffix + r"\d+$") 23 | """A regular expression to match and extract the target name from a stub symbol.""" 24 | 25 | def stub_name_target(stub_name): 26 | """Get the target to which a stub name refers. 27 | 28 | No checks are performed to ensure that the target actually exists. 29 | """ 30 | match = _stub_regex.match(stub_name) 31 | if not match: 32 | return None 33 | return match.group(1) 34 | 35 | def symbol_references_stub(symbol_name): 36 | """Check if the symbol name references a stub.""" 37 | return kernelcache_stub_suffix in symbol_name 38 | 39 | def _process_stub_template_1(stub): 40 | """A template to match the following stub pattern: 41 | 42 | ADRP X, #@PAGE 43 | LDR X, [X, #@PAGEOFF] 44 | BR X 45 | """ 46 | adrp, ldr, br = idau.Instructions(stub, count=3) 47 | if (adrp.itype == idaapi.ARM_adrp and adrp.Op1.type == idaapi.o_reg 48 | and adrp.Op2.type == idaapi.o_imm 49 | and ldr.itype == idaapi.ARM_ldr and ldr.Op1.type == idaapi.o_reg 50 | and ldr.Op2.type == idaapi.o_displ and ldr.auxpref == 0 51 | and br.itype == idaapi.ARM_br and br.Op1.type == idaapi.o_reg 52 | and adrp.Op1.reg == ldr.Op1.reg == ldr.Op2.reg == br.Op1.reg): 53 | offset = adrp.Op2.value + ldr.Op2.addr 54 | target = idau.read_word(offset) 55 | if target and idau.is_mapped(target): 56 | return target 57 | 58 | _stub_processors = ( 59 | _process_stub_template_1, 60 | ) 61 | 62 | def stub_target(stub_func): 63 | """Find the target function called by a stub. 64 | 65 | Arm64 only.""" 66 | # Each processing function in _stub_processors takes the address of a stub function and returns 67 | # the address of the target function. 68 | for process in _stub_processors: 69 | try: 70 | target = process(stub_func) 71 | if target: 72 | return target 73 | except: 74 | pass 75 | 76 | def _symbolicate_stub(stub, target, next_stub): 77 | """Set a symbol for a stub function.""" 78 | name = idau.get_ea_name(target, user=True) 79 | if not name: 80 | _log(3, 'Stub {:#x} has target {:#x} without a name', stub, target) 81 | return False 82 | # Sometimes the target of the stub is a thunk in another kext. This is sometimes OK, but makes 83 | # a right mess of things when that thunk is itself a jump function for another stub, and 84 | # especially when there are multiple such jump functions to that stub in that kext. 85 | # Autorenaming of thunks interacts poorly with autonaming of stubs (you get things like 86 | # 'j_TARGET___stub_2_0', which stub_name_target() no longer thinks of as a stub). Thus, if the 87 | # current thing has '__stub_' in it, don't rename. The reason we don't just extract the inner 88 | # stub reference is that these jump functions are really wrappers with different names and 89 | # semantics in the original code, so it's not appropriate for us to cover that up with a stub. 90 | if symbol_references_stub(name): 91 | _log(2, 'Stub {:#x} has target {:#x} (name {}) that references another stub', stub, target, 92 | name) 93 | return False 94 | symbol = next_stub(name) 95 | if symbol is None: 96 | _log(0, 'Could not generate stub symbol for {}: names exhausted', name) 97 | return False 98 | if not idau.set_ea_name(stub, symbol, auto=True): 99 | _log(2, 'Could not set name {} for stub at {:#x}', symbol, stub) 100 | return False 101 | return True 102 | 103 | def _process_possible_stub(stub, make_thunk, next_stub): 104 | """Try to process a stub function.""" 105 | # First, make sure this is a stub format we recognize. 106 | target = stub_target(stub) 107 | if not target: 108 | _log(0, 'Unrecognized stub format at {:#x}', stub) 109 | return False 110 | # Next, check if IDA sees this as a function chunk rather than a function, and correct it if 111 | # reasonable. 112 | if not idau.force_function(stub): 113 | _log(1, 'Could not convert stub to function at {:#x}', stub) 114 | return False 115 | # Next, set the appropriate flags on the stub. Make the stub a thunk if that was requested. 116 | flags = idc.GetFunctionFlags(stub) 117 | if flags == -1: 118 | _log(1, 'Could not get function flags for stub at {:#x}', stub) 119 | return False 120 | target_flags = idc.GetFunctionFlags(target) 121 | if target_flags != -1 and target_flags & idc.FUNC_NORET: 122 | flags |= idc.FUNC_NORET 123 | if make_thunk: 124 | flags |= idc.FUNC_THUNK 125 | if idc.SetFunctionFlags(stub, flags | idc.FUNC_THUNK) == 0: 126 | _log(1, 'Could not set function flags for stub at {:#x}', stub) 127 | return False 128 | # Next, ensure that IDA sees the target as a function, but continue anyway if that fails. 129 | if not idau.force_function(target): 130 | _log(1, 'Stub {:#x} has target {:#x} that is not a function', stub, target) 131 | # Finally symbolicate the stub. 132 | if not _symbolicate_stub(stub, target, next_stub): 133 | return False 134 | return True 135 | 136 | def _process_stubs_section(segstart, make_thunk, next_stub): 137 | """Process all the functions in a __stubs section.""" 138 | segend = idc.SegEnd(segstart) 139 | # We'll go through each address and check if it has a reference. If it does, it is likely a 140 | # stub. As long as the address doesn't already have a stub name, process it. 141 | for ea in idau.Addresses(segstart, segend, step=1): 142 | if idc.isRef(idc.GetFlags(ea)) and not stub_name_target(idau.get_ea_name(ea)): 143 | _process_possible_stub(ea, make_thunk, next_stub) 144 | 145 | def initialize_stub_symbols(make_thunk=True): 146 | """Populate IDA with information about the stubs in an iOS kernelcache. 147 | 148 | Search through the kernelcache for stubs (__stubs sections) and rename each stub function 149 | according to the target function it calls. 150 | 151 | Arm64 only. 152 | 153 | Options: 154 | make_thunk: Set the thunk attribute for each stub function. Default is True. 155 | """ 156 | next_stub = internal.make_name_generator(kernelcache_stub_suffix) 157 | for ea in idautils.Segments(): 158 | segname = idc.SegName(ea) 159 | if not segname.endswith('__stubs'): 160 | continue 161 | _log(3, 'Processing segment {}', segname) 162 | _process_stubs_section(ea, make_thunk, next_stub) 163 | 164 | -------------------------------------------------------------------------------- /ida_kernelcache/segment.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/segment.py 3 | # Brandon Azad 4 | # 5 | # Functions for interacting with the segments of the kernelcache in IDA. No prior initialization is 6 | # necessary. 7 | # 8 | 9 | import idc 10 | 11 | import ida_utilities as idau 12 | import kernel 13 | 14 | _log = idau.make_log(0, __name__) 15 | 16 | idc.Til2Idb(-1, 'mach_header_64') 17 | idc.Til2Idb(-1, 'load_command') 18 | idc.Til2Idb(-1, 'segment_command_64') 19 | idc.Til2Idb(-1, 'section_64') 20 | 21 | _LC_SEGMENT_64 = 0x19 22 | 23 | def _macho_segments_and_sections(ea): 24 | """A generator to iterate through a Mach-O file's segments and sections. 25 | 26 | Each iteration yields a tuple: 27 | (segname, segstart, segend, [(sectname, sectstart, sectend), ...]) 28 | """ 29 | hdr = idau.read_struct(ea, 'mach_header_64', asobject=True) 30 | nlc = hdr.ncmds 31 | lc = int(hdr) + len(hdr) 32 | lcend = lc + hdr.sizeofcmds 33 | while lc < lcend and nlc > 0: 34 | loadcmd = idau.read_struct(lc, 'load_command', asobject=True) 35 | if loadcmd.cmd == _LC_SEGMENT_64: 36 | segcmd = idau.read_struct(lc, 'segment_command_64', asobject=True) 37 | segname = idau.null_terminated(segcmd.segname) 38 | segstart = segcmd.vmaddr 39 | segend = segstart + segcmd.vmsize 40 | sects = [] 41 | sc = int(segcmd) + len(segcmd) 42 | for i in range(segcmd.nsects): 43 | sect = idau.read_struct(sc, 'section_64', asobject=True) 44 | sectname = idau.null_terminated(sect.sectname) 45 | sectstart = sect.addr 46 | sectend = sectstart + sect.size 47 | sects.append((sectname, sectstart, sectend)) 48 | sc += len(sect) 49 | yield (segname, segstart, segend, sects) 50 | lc += loadcmd.cmdsize 51 | nlc -= 1 52 | 53 | def _initialize_segments_in_kext(kext, mach_header, skip=[]): 54 | """Rename the segments in the specified kext.""" 55 | def log_seg(segname, segstart, segend): 56 | _log(3, '+ segment {: <20} {:x} - {:x} ({:x})', segname, segstart, segend, 57 | segend - segstart) 58 | def log_sect(sectname, sectstart, sectend): 59 | _log(3, ' section {: <20} {:x} - {:x} ({:x})', sectname, sectstart, sectend, 60 | sectend - sectstart) 61 | def log_gap(gapno, start, end, mapped): 62 | mapped = 'mapped' if mapped else 'unmapped' 63 | _log(3, ' gap {: <20} {:x} - {:x} ({:x}, {})', gapno, start, end, 64 | end - start, mapped) 65 | def process_region(segname, name, start, end): 66 | assert end >= start 67 | if segname in skip: 68 | _log(2, 'Skipping segment {}', segname) 69 | return 70 | newname = '{}.{}'.format(segname, name) 71 | if kext: 72 | newname = '{}:{}'.format(kext, newname) 73 | if start == end: 74 | _log(2, 'Skipping empty region {} at {:x}', newname, start) 75 | return 76 | ida_segstart = idc.SegStart(start) 77 | if ida_segstart == idc.BADADDR: 78 | _log(0, "IDA doesn't think this is a real segment: {:x} - {:x}", start, end) 79 | return 80 | ida_segend = idc.SegEnd(ida_segstart) 81 | if start != ida_segstart or end != ida_segend: 82 | _log(0, 'IDA thinks segment {} {:x} - {:x} should be {:x} - {:x}', newname, start, end, 83 | ida_segstart, ida_segend) 84 | return 85 | _log(2, 'Rename {:x} - {:x}: {} -> {}', start, end, idc.SegName(start), newname) 86 | idc.SegRename(start, newname) 87 | def process_gap(segname, gapno, start, end): 88 | mapped = idau.is_mapped(start) 89 | log_gap(gapno, start, end, mapped) 90 | if mapped: 91 | name = 'HEADER' if start == mach_header else '__gap_' + str(gapno) 92 | process_region(segname, name, start, end) 93 | for segname, segstart, segend, sects in _macho_segments_and_sections(mach_header): 94 | log_seg(segname, segstart, segend) 95 | lastend = segstart 96 | gapno = 0 97 | for sectname, sectstart, sectend in sects: 98 | if lastend < sectstart: 99 | process_gap(segname, gapno, lastend, sectstart) 100 | gapno += 1 101 | log_sect(sectname, sectstart, sectend) 102 | process_region(segname, sectname, sectstart, sectend) 103 | lastend = sectend 104 | if lastend < segend: 105 | process_gap(segname, gapno, lastend, segend) 106 | gapno += 1 107 | 108 | def initialize_segments(): 109 | """Rename the kernelcache segments in IDA according to the __PRELINK_INFO data. 110 | 111 | Rename the kernelcache segments based on the contents of the __PRELINK_INFO dictionary. 112 | Segments are renamed according to the scheme '[:].
', where '' is 113 | the bundle identifier if the segment is part of a kernel extension. The special region 114 | containing the Mach-O header is renamed '[:].HEADER'. 115 | """ 116 | # First rename the kernel segments. 117 | _log(1, 'Renaming kernel segments') 118 | kernel_skip = ['__PRELINK_TEXT', '__PLK_TEXT_EXEC', '__PRELINK_DATA', '__PLK_DATA_CONST'] 119 | _initialize_segments_in_kext(None, kernel.base, skip=kernel_skip) 120 | # Process each kext identified by the __PRELINK_INFO. In the new kernelcache format 12-merged, 121 | # the _PrelinkExecutableLoadAddr key is missing for all kexts, so no extra segment renaming 122 | # takes place. 123 | prelink_info_dicts = kernel.prelink_info['_PrelinkInfoDictionary'] 124 | for kext_prelink_info in prelink_info_dicts: 125 | kext = kext_prelink_info.get('CFBundleIdentifier', None) 126 | mach_header = kext_prelink_info.get('_PrelinkExecutableLoadAddr', None) 127 | if kext is not None and mach_header is not None: 128 | orig_kext = idc.SegName(mach_header).split(':', 1)[0] 129 | if '.kpi.' not in kext and orig_kext != kext: 130 | _log(0, 'Renaming kext {} -> {}', orig_kext, kext) 131 | _log(1, 'Renaming segments in {}', kext) 132 | _initialize_segments_in_kext(kext, mach_header) 133 | 134 | _kext_regions = [] 135 | 136 | def _initialize_kext_regions(): 137 | """Get region information for each kext based on iOS 12's __PRELINK_INFO.__kmod_start. 138 | 139 | NOTE: This only accounts for __TEXT_EXEC, not the other segments.""" 140 | kmod_start = idc.SegByBase(idc.SegByName('__PRELINK_INFO.__kmod_start')) 141 | if kmod_start == idc.BADADDR: 142 | return 143 | for kmod in idau.ReadWords(kmod_start, idc.SegEnd(kmod_start)): 144 | _log(1, 'Found kmod {:x}', kmod) 145 | segments = list(_macho_segments_and_sections(kmod)) 146 | if len(segments) != 1: 147 | _log(0, 'Skipping unrecognized kmod {:x}', kmod) 148 | continue 149 | segname, segstart, segend, sects = segments[0] 150 | if segname != '__TEXT_EXEC' or len(sects) != 1: 151 | _log(0, 'Skipping unrecognized kmod {:x}', kmod) 152 | continue 153 | kmod_name = 'kext.{:x}'.format(kmod) 154 | _log(1, 'Adding module: {:x} - {:x} {}', segstart, segend, kmod_name) 155 | _kext_regions.append((segstart, segend, kmod_name)) 156 | 157 | _initialize_kext_regions() 158 | 159 | def kernelcache_kext(ea): 160 | """Return the name of the kext to which the given linear address belongs. 161 | 162 | Only works if segments have been renamed using initialize_segments(). 163 | 164 | NOTE: Kexts are not well distinguished on the new iOS 12 merged kernelcache format. Do not rely 165 | on this function. 166 | """ 167 | # TODO: This doesn't work on 12-merged kernelcaches! 168 | name = idc.SegName(ea) or '' 169 | if ':' in name: 170 | return idc.SegName(ea).split(':', 1)[0] 171 | if _kext_regions: 172 | for start, end, kext in _kext_regions: 173 | if start <= ea < end: 174 | return kext 175 | return None 176 | 177 | -------------------------------------------------------------------------------- /ida_kernelcache/kplist.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/kplist.py 3 | # Brandon Azad 4 | # 5 | # Process kernel plists. This code is based on: 6 | # - https://github.com/python/cpython/blob/3.6/Lib/plistlib.py 7 | # 8 | 9 | import base64 10 | from xml.etree.ElementTree import XMLTreeBuilder 11 | 12 | class _KPlistBuilder(object): 13 | """A companion class for XMLTreeBuilder to parse a kernel-style property list.""" 14 | # IMPLEMENTATION IDEA: The XMLTreeBuilder calls us at four points: when there's a new start 15 | # tag, when there's a new end tag, when there's data from a tag, and when there's no more data. 16 | # We build objects incrementally out of these notifications. Each tag type can implement 17 | # handlers for the start and end tags. Exactly one of these handlers must return an object that 18 | # represents the parsed plist entry. Collection entries must return the object from the start 19 | # tag handler, while leaf entries must return the object from the end tag handler. Once a 20 | # handler has produced an object for the plist entry, that object gets added to the result 21 | # using add_object. Collections are maintained in a collection stack. When a start tag handler 22 | # returns an object, that object is pushed onto the top of the collection stack to indicate 23 | # that it is the current collection. When an end tag handler does not return a value, that 24 | # indicates that the current collection is done and the collection stack is popped. When the ID 25 | # attribute is encountered, the subsequent call to add_object associates the object with that 26 | # ID. When a corresponding IDREF attribute is encountered, the start and end tag handlers are 27 | # skipped. Instead, once the next end tag is received, the previous object is looked up by ID 28 | # and passed to add_object. 29 | 30 | def __init__(self): 31 | self.collection_stack = [] 32 | self.ids = {} 33 | self.current_data = [] 34 | self.current_id = None 35 | self.current_idref = None 36 | self.current_key = None 37 | self.root = None 38 | self.start_handler = { 39 | 'dict': self.start_dict, 40 | 'array': self.start_array, 41 | } 42 | self.end_handler = { 43 | 'dict': self.end_dict, 44 | 'key': self.end_key, 45 | 'true': self.end_true, 46 | 'false': self.end_false, 47 | 'integer': self.end_integer, 48 | 'string': self.end_string, 49 | 'data': self.end_data, 50 | } 51 | self.attributes = { 52 | 'integer': ('size',), 53 | } 54 | self.tags = set(self.start_handler.keys()).union(self.end_handler.keys()) 55 | 56 | # XMLTreeBuilder calls. 57 | 58 | def start(self, tag, attr): 59 | intervening_data = self.get_data().strip() 60 | assert not intervening_data and not self.current_id 61 | # Check that the attributes are allowed. 62 | for attrname in set(attr.keys()).difference(('ID', 'IDREF')): 63 | if attrname not in self.attributes[tag]: 64 | raise ValueError('illegal attribute "{}" for tag "{}"'.format(attrname, tag)) 65 | # Handle IDREF attribute. 66 | if self.current_idref is not None: 67 | raise ValueError('non-empty IDREF') 68 | self.current_idref = self.get_id_attr(attr, 'IDREF') 69 | if self.current_idref is not None: 70 | if self.current_idref not in self.ids: 71 | raise ValueError('tag has IDREF to non-existent ID') 72 | original_tag, _ = self.ids[self.current_idref] 73 | if tag != original_tag: 74 | raise ValueError('tag "{}" has IDREF to element with different tag "{}"' 75 | .format(tag, original_tag)) 76 | if len(attr) > 1: 77 | raise ValueError('tag has IDREF and another attribute') 78 | return 79 | # Handle ID attribute. 80 | self.current_id = self.get_id_attr(attr, 'ID') 81 | if self.current_id is not None and self.current_id in self.ids: 82 | raise ValueError('tag has previously used ID attribute') 83 | # Process the start tag if this is not an IDREF. 84 | handler = self.start_handler.get(tag, None) 85 | if handler: 86 | value = handler(attr) 87 | if value is not None: 88 | # This is a collection. Add the collection object then push a new context. 89 | self.add_object(tag, value) 90 | self.collection_stack.append(value) 91 | elif tag not in self.tags: 92 | raise ValueError('unrecognized tag "{}"'.format(tag)) 93 | 94 | def end(self, tag): 95 | assert not (self.current_data and self.current_idref is not None) 96 | # If we have an ID reference, then directly add the referenced value. 97 | if self.current_idref is not None: 98 | _, value = self.ids[self.current_idref] 99 | self.current_idref = None 100 | self.add_object(tag, value) 101 | return 102 | # Otherwise, perform the end tag handler. 103 | handler = self.end_handler.get(tag, None) 104 | value = None 105 | if handler: 106 | value = handler() 107 | if value is not None: 108 | self.add_object(tag, value) 109 | else: 110 | # This is a collection. We just finished, so pop the context stack. 111 | self.collection_stack.pop() 112 | 113 | def data(self, data): 114 | if self.current_idref is not None: 115 | raise ValueError('non-empty IDREF') 116 | self.current_data.append(data) 117 | 118 | def close(self): 119 | assert not self.current_data and not self.collection_stack 120 | return self.root 121 | 122 | # Internal functions. 123 | 124 | def get_id_attr(self, attr, name): 125 | id_attr = attr.get(name, None) 126 | if id_attr is not None: 127 | try: 128 | return int(id_attr, 0) 129 | except ValueError: 130 | raise ValueError('invalid {} attribute'.format(name)) 131 | return None 132 | 133 | def add_object(self, tag, value): 134 | if self.current_id is not None: 135 | assert self.current_id not in self.ids 136 | self.ids[self.current_id] = (tag, value) 137 | self.current_id = None 138 | if tag == 'key': 139 | # We are adding a key to a dictionary but don't yet have the value. 140 | if not self.collection_stack or type(self.collection_stack[-1]) != dict: 141 | raise ValueError('invalid key tag not in a dict') 142 | if self.current_key: 143 | raise ValueError('missing value for key in dict') 144 | self.current_key = value 145 | elif self.current_key is not None: 146 | # We are adding a key and value to a dictionary. 147 | assert type(self.collection_stack[-1]) == dict 148 | if self.current_key in self.collection_stack[-1]: 149 | raise ValueError('duplicate key "{}" in dict'.format(self.current_key)) 150 | self.collection_stack[-1][self.current_key] = value 151 | self.current_key = None 152 | elif self.root is None: 153 | # We are setting the root object. 154 | self.root = value 155 | elif self.collection_stack and type(self.collection_stack[-1]) == list: 156 | # We are adding an object to an array (or other container). 157 | self.collection_stack[-1].append(value) 158 | else: 159 | # We have two values in a row not in a container. 160 | raise ValueError('unexpected element not in a container') 161 | 162 | def get_data(self): 163 | data = ''.join(self.current_data) 164 | self.current_data = [] 165 | return data 166 | 167 | # Element tag handlers. 168 | 169 | def start_dict(self, attr): 170 | return {} 171 | 172 | def start_array(self, attr): 173 | return [] 174 | 175 | def end_dict(self): 176 | if self.current_key is not None: 177 | raise ValueError('missing value for key in dict') 178 | 179 | def end_key(self): 180 | assert self.current_key is None 181 | return self.get_data() 182 | 183 | def end_true(self): 184 | if self.get_data(): 185 | raise ValueError('true tag must be empty') 186 | return True 187 | 188 | def end_false(self): 189 | if self.get_data(): 190 | raise ValueError('false tag must be empty') 191 | return False 192 | 193 | def end_integer(self): 194 | # TODO: The size attribute is currently ignored. 195 | return int(self.get_data(), 0) 196 | 197 | def end_string(self): 198 | return self.get_data() 199 | 200 | def end_data(self): 201 | return base64.b64decode(self.get_data()) 202 | 203 | def kplist_parse(plist): 204 | """Parse a kernel-style property list.""" 205 | try: 206 | builder = _KPlistBuilder() 207 | parser = XMLTreeBuilder(target=builder) 208 | parser.feed(plist) 209 | return parser.close() 210 | except: 211 | return None 212 | 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ida_kernelcache: An IDA Toolkit for analyzing iOS kernelcaches 2 | 3 | 4 | 5 | ida_kernelcache is an IDAPython module for IDA Pro to make working with iOS kernelcaches easier. 6 | The module provides functions to: 7 | 8 | * Convert iOS 12's new static tagged pointers into normal kernel pointers. 9 | * Parse the kernel's `__PRELINK_INFO` segment into a Python dictionary. 10 | * Rename the segments in IDA according to the kernel extension name, Mach-O segment, and Mach-O 11 | section. 12 | * Convert identifiable pointers in some segments into IDA offsets. 13 | * Reconstruct the C++ class hierarchy based on OSMetaClass information. 14 | * Symbolicate C++ virtual method tables (both the vtable itself and its methods). 15 | * Symbolicate offsets in `__got` sections and stub functions in `__stubs` sections. 16 | * Autogenerate IDA structs representing the C++ virtual method tables. 17 | * Autogenerate IDA structs representing the C++ classes in the kernelcache based on observed access 18 | patterns. 19 | 20 | The main processing function is designed to be run before any manual analysis or reverse 21 | engineering. With the default settings, IDA tends to miss a lot of useful information in the 22 | kernelcache. These scripts help IDA along by leveraging the known structure of the kernelcache to 23 | automatically propagate useful information. 24 | 25 | In addition to the stock functionality in the module, ida_kernelcache contains several scripts to 26 | make analyzing the iOS kernelcache easier. For example, you can use the scripts to autogenerate C 27 | structs used by a function. 28 | 29 | Many of the techniques used in ida_kernelcache were developed for and borrowed directly from 30 | [memctl]. 31 | 32 | [memctl]: https://github.com/bazad/memctl 33 | 34 | ## Versions 35 | 36 | ida_kernelcache has been tested with IDA Pro 6.95 on kernelcaches for iOS versions 10.1.1, 11.0, 37 | 11.2, 11.3.1, and 12.0 beta. Currently only Arm64 kernelcaches from iOS 10 and later are supported. 38 | 39 | ## Getting started 40 | 41 | You need to already have a decompressed kernelcache file loaded into IDA. You can find the URL to 42 | download a particular IPSW from Apple online, and there are a number of public tools (including 43 | memctl) capable of decompressing the kernelcache. 44 | 45 | In IDA, select "File" -> "Script file..." from the menu bar, then choose the `ida_kernelcache.py` 46 | script in the main directory. This will load the ida_kernelcache module into the IDAPython 47 | interpreter under the names `ida_kernelcache` and `kc`. In the IDAPython prompt, type 48 | `kc.kernelcache_process()` and hit Enter to start analyzing the kernelcache. This function performs 49 | all the major analyses supported by ida_kernelcache. The function will run for several minutes as 50 | IDA identifies and analyzes new functions. 51 | 52 | ida_kernelcache will try not to overwrite user names for addresses. This means that if the 53 | kernelcache has been manually analyzed prior to initialization with `kernelcache_process`, the 54 | results may not be as thorough because user-specified names may block automatic name propagation. 55 | However, there's also no guarantee that ida_kernelcache won't mess up prior analysis, so if you do 56 | decide to run `kernelcache_process` on a kernelcache file which you've already analyzed, make a 57 | backup first. 58 | 59 | ## The ida_kernelcache module 60 | 61 | ida_kernelcache is meant to be loaded via `ida_kernelcache.py`; the submodules in the 62 | `ida_kernelcache` directory are not meant to be loaded directly. However, ida_kernelcache exposes 63 | the functionality of many of these submodules. Here is what each of them does: 64 | 65 | * **ida_utilities**: 66 | This module wraps some of IDA's functions to provide an easier-to-use API. Particularly useful are 67 | `is_mapped`, `read_word`, `read_struct`, `force_function`, and `ReadWords`. `is_mapped` checks 68 | whether an address is mapped, and optionally whether it contains a known value. `read_word` reads a 69 | variably-sized word from an address. `read_struct` reads a structure type into a Python dictionary 70 | or Python accessor object, which makes parsing data structures much easier. `force_function` tries 71 | several tricks to convert an address into the start of a function in IDA. `ReadWords` is a 72 | generator to iterate over data words and their addresses in a range. 73 | 74 | * **build_struct**: 75 | This internal module contains utilities to automatically populate an IDA struct based on a sequence 76 | of accesses to the struct. 77 | 78 | * **class_struct**: 79 | This module provides functions to generate IDA structs representing C++ virtual method tables and 80 | classes. `initialize_vtable_structs` scans the (symbolicated) virtual method tables and creates IDA 81 | structs to hold virtual method pointers. `initialize_class_structs` performs a data flow analysis 82 | on the virtual methods to identify accesses to the fields of each class, then builds IDA structs to 83 | represent the classes. Instructions that appear to reference a field are also converted into 84 | structure offset references. See the module docstring for more details. 85 | 86 | * **classes**: 87 | This module defines the `ClassInfo` type that holds information about C++ classes in the 88 | kernelcache and provides the function `collect_class_info` to scan the kernelcache for classes and 89 | populate the global `class_info` dictionary with a map from class names to `ClassInfo` objects. The 90 | `ClassInfo` type records the class name, the OSMetaClass instance, the virtual method table, and 91 | the superclass name for each C++ class. Additionally, each `ClassInfo` object stores references to 92 | the superclass's `ClassInfo` and the `ClassInfo` of all direct subclasses, making it easy to 93 | examine and traverse the class hierarchy. `collect_class_info` also stores the set of all virtual 94 | method tables in the global `vtables` set. 95 | 96 | * **data_flow**: 97 | This internal module contains data flow operations used by the rest of ida_kernelcache. 98 | 99 | * **kernel**: 100 | This module provides the `base` and `prelink_info` global variables. `base` is the base address of 101 | the kernel image (the start of the kernel's Mach-O header). `prelink_info` is the parsed 102 | `__PRELINK_INFO` dictionary. 103 | 104 | * **kplist**: 105 | This module provides the `kplist_parse` function to parse kernel-style plists. 106 | 107 | * **metaclass**: 108 | This module provides the function `initialize_metaclass_symbols` which adds a symbol for each 109 | known OSMetaClass instance. 110 | 111 | * **offset**: 112 | This module provides the functions `initialize_data_offsets` and `initialize_offset_symbols`. The 113 | former scans through the segments looking for pointers which can be converted into offsets. The 114 | latter symbolicates offsets in the `__got` section of each kext if the target of the offset has a 115 | symbol. 116 | 117 | * **segment**: 118 | This module provides the function `initialize_segments` to rename IDA's segments to be more useful. 119 | By default, IDA seems to create the segment names by combining a guess of the bundle identifier 120 | with the Mach-O section describing the region. `initialize_segments` extracts the true bundle 121 | identifier from the `__PRELINK_INFO` dictionary and renames each segment to include the bundle 122 | identifier, Mach-O segment, and Mach-O section. This makes it possible, for example, to distinguish 123 | between `__TEXT.__const` and `__DATA_CONST.__const`. This module also provides the function 124 | `kernelcache_kext` (re-exported at the top level) to determine the kext containing the specified 125 | address (only on the old iOS 11 split-kext kernelcache format). 126 | 127 | * **stub**: 128 | Many kexts in the kernelcache contain stub functions in a `__stubs` section that jump to functions 129 | in the kernel proper. Unfortunately, these stubs provide a barrier for propagating cross references 130 | and type information. This module doesn't solve these problems, but it does make looking at stubs a 131 | bit easier by automatically renaming stub functions so that the target function name is visible. 132 | Stubs and their targets are forcibly converted into functions in IDA, which helps make the 133 | functions in IDA line up with the functions in the original source code. 134 | 135 | * **tagged_pointers**: 136 | The new iOS 12 merged kernelcache format has the upper 2 bytes of each pointer tagged with an 137 | offset in order to chain the pointers together in a list. This module contains functions for 138 | processing and restoring those tagged pointers. 139 | 140 | * **vtable**: 141 | This module provides many useful functions for working with virtual method tables, including 142 | `vtable_length`, `convert_vtable_to_offsets`, `vtable_overrides`, `initialize_vtable_symbols`, and 143 | `initialize_vtable_method_symbols`. `vtable_length` checks whether the specified address could be a 144 | vtable and returns the vtable length. The generator `vtable_overrides` enumerates the virtual 145 | methods in a class which override virtual methods used by the superclass. The function 146 | `initialize_vtable_symbols` adds a symbol for the start of each identified vtable. 147 | `initialize_vtable_method_symbols` iterates through the overridden methods in each vtable and 148 | propagates symbols from the superclass to the subclass. This is possible because most of the base 149 | classes in IOKit are defined in XNU with relatively complete symbol information. Each method 150 | override in the vtable of a subclass must conform to the same interface as the method in the 151 | superclass, which means we can generate a symbol for the override by substituting the subclass's 152 | name for the superclass's name in the virtual method symbol in the superclass. For example, if we 153 | have no name for the virtual method at index 7 in the `AppleKeyStore` class, but we know that the 154 | virtual method at index 7 in its superclass `IOService` is called 155 | `__ZNK9IOService12getMetaClassEv`, then we can infer that index 7 should be called 156 | `__ZNK13AppleKeyStore12getMetaClassEv` in the subclass. This technique can be used to symbolicate 157 | most virtual methods in most classes. 158 | 159 | ## Other scripts 160 | 161 | The `ida_kernelcache_reload.py` script is identical to `ida_kernelcache.py`, except it forces the 162 | `ida_kernelcache` module and all submodules to be reloaded. It is mostly useful for development. 163 | 164 | The `scripts` directory contains scripts that use ida_kernelcache to perform some sort of analysis. 165 | These scripts are too specific to be part of the main ida_kernelcache module, but they are useful 166 | when reverse engineering the kernelcache. They include: 167 | 168 | * **find_virtual_method_overrides.py**: 169 | A script to find descendants of a class that override a virtual method containing the specified 170 | string. Matching overrides are printed to the console. 171 | 172 | * **populate_struct.py**: 173 | Populate fields for a C++ class or C struct by performing data flow analysis starting at the 174 | current address. 175 | 176 | * **process_external_methods.py**: 177 | Process an `IOExternalMethod` or `IOExternalMethodDispatch` array into a standard form for use by 178 | fuzzing tools. 179 | 180 | ## Class reconstruction 181 | 182 | If you are using the Hex-Rays decompiler, one of the more interesting features of ida_kernelcache 183 | is the automatic C++ class reconstruction, which will use the OSMetaClass information and data flow 184 | analysis to create IDA structs to represent the classes found in the kernelcache. These 185 | representations can dramatically improve the readability of the pseudocode representation. To learn 186 | more, see the post [Reconstructing C++ classes in the iOS kernelcache using IDA Pro]. 187 | 188 | [Reconstructing C++ classes in the iOS kernelcache using IDA Pro]: https://bazad.github.io/2018/03/ida-kernelcache-class-reconstruction/ 189 | 190 | ## The new iOS 12 kernelcache format 191 | 192 | With iOS 12, Apple introduced a new kernelcache format on some devices. Among the changes, this new 193 | kernelcache's kernel pointers are tagged to link them in a list, presumably to allow iBoot to slide 194 | the kernel without the `_PrelinkLinkKASLROffsets` data in the prelink dictionary. Trying to analyze 195 | a stock kernelcache using this format in IDA is difficult due to the missing cross-references. See 196 | the article [Analyzing the iOS 12 kernelcache's tagged pointers] for details. 197 | 198 | [Analyzing the iOS 12 kernelcache's tagged pointers]: https://bazad.github.io/2018/06/ios-12-kernelcache-tagged-pointers/ 199 | 200 | If you just want to untag the pointers in the kernelcache without performing any additional 201 | processing, run `kc.tagged_pointers.untag_pointers()`. 202 | 203 | ## A note on generalizing 204 | 205 | Some of this functionality likely applies more broadly than just to Apple kernelcaches (for 206 | example, vtable analysis and symbol propagation, or most of the functions in `ida_utilities.py`). 207 | Nonetheless, I've limited the import scope to just the `ida_kernelcache` module because I have not 208 | tested any of this on other types of binaries. 209 | 210 | ## License 211 | 212 | ida_kernelcache is released under the MIT license. 213 | 214 | Much of the functionality in ida_kernelcache is borrowed from [memctl], which is also released 215 | under the MIT license. Other sources are noted in the comments in the corresponding files. 216 | 217 | 218 | --------------------------------------------------------------------------------------------------- 219 | Brandon Azad 220 | -------------------------------------------------------------------------------- /ida_kernelcache/data_flow.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/data_flow.py 3 | # Brandon Azad 4 | # 5 | # A module for data flows. 6 | # 7 | """ida_kernelcache.data_flow 8 | 9 | This module contains functions that perform various types of data flow operations on functions or 10 | code ranges. Currently only Arm64 is supported. 11 | 12 | While it is possible to implement a very generic data flow framework, allowing custom data flows to 13 | be implemented entirely externally and with little or no knowledge of the underlying architecture, 14 | this module does not take that approach, for reasons of simplicity and efficiency. 15 | 16 | """ 17 | 18 | import collections 19 | 20 | import idc 21 | import idautils 22 | import idaapi 23 | 24 | import ida_utilities as idau 25 | 26 | _log = idau.make_log(2, __name__) 27 | 28 | _INSN_OP_CHG = [ 29 | idaapi.CF_CHG1, 30 | idaapi.CF_CHG2, 31 | idaapi.CF_CHG3, 32 | idaapi.CF_CHG4, 33 | idaapi.CF_CHG5, 34 | idaapi.CF_CHG6, 35 | ] 36 | 37 | _INSN_OP_DTYP_SZ = { 38 | idaapi.dt_byte: 1, 39 | idaapi.dt_word: 2, 40 | idaapi.dt_dword: 4, 41 | idaapi.dt_qword: 8, 42 | } 43 | 44 | _ARM64_WRITEBACK = 0x20 | 0x80 45 | 46 | def _create_flow(function, bounds): 47 | """Create a FlowChart.""" 48 | f, b = None, None 49 | if function is not None: 50 | f = idaapi.get_func(function) 51 | if f is None: 52 | _log(0, 'Bad func {:#x}', func) 53 | return None 54 | if bounds is not None: 55 | b = (start, end) 56 | return idaapi.FlowChart(f=f, bounds=b) 57 | 58 | def _add_blocks_to_queue(queue, flow, addresses): 59 | for ea in addresses: 60 | for bb in flow: 61 | if bb.startEA <= ea < bb.endEA: 62 | queue.append(bb) 63 | break 64 | else: 65 | _log(2, 'Address {:#x} not contained in any basic block', ea) 66 | 67 | def _pointer_accesses_process_block(start, end, fix, entry_regs, accesses): 68 | """Process a basic block for _pointer_accesses_data_flow. 69 | 70 | Arm64 only.""" 71 | # NOTE: Some object accesses (to large offsets) are encoded in the following style: 72 | # MOV W8, #0x9210 73 | # STR X0, [X19,X8] 74 | # We try to catch these by keeping track of local constants within a block. 75 | RegValue = collections.namedtuple('RegValue', ['type', 'value']) 76 | DELTA = 0 # Pointer delta from start of target memory region. 77 | CONST = 1 # Constant value 78 | def get_reg(reg, type): 79 | rv = regs.get(reg, None) 80 | if rv is None or rv.type != type: 81 | return None 82 | return rv.value 83 | 84 | # Initialize our registers and create accessor functions. 85 | regs = { reg: RegValue(DELTA, delta) for reg, delta in entry_regs.items() } 86 | 87 | # For each instruction in the basic block, see if any new register gets assigned. 88 | for insn in idau.Instructions(start, end): 89 | # First, if this instruction has a fixed state (i.e., a set mapping of registers to 90 | # deltas), set that state. This overwrites any previous values, so care must be taken by 91 | # the caller to ensure that this initialization is correct. 92 | fixed_regs_and_deltas = fix.get(insn.ea) 93 | if fixed_regs_and_deltas: 94 | for reg, delta in fixed_regs_and_deltas.items(): 95 | _log(6, '\t\t{:x} fix {}={}', insn.ea, reg, delta) 96 | regs[reg] = RegValue(DELTA, delta) 97 | # If this is an access instruction, record the access. See comment about auxpref below. 98 | if not (insn.auxpref & _ARM64_WRITEBACK): 99 | for op in insn.Operands: 100 | # We only consider o_displ and o_phrase. 101 | if op.type == idaapi.o_void: 102 | break 103 | elif op.type not in (idaapi.o_displ, idaapi.o_phrase): 104 | continue 105 | # Get the delta for the base register. 106 | delta = get_reg(op.reg, DELTA) 107 | if delta is None: 108 | continue 109 | # Get the instruction access size. 110 | size = _INSN_OP_DTYP_SZ.get(op.dtyp) 111 | if size is None: 112 | continue 113 | # Get the offset from the base register (which is additional to the base register's 114 | # delta). 115 | op_offset = None 116 | if op.type == idaapi.o_displ: 117 | op_offset = op.addr 118 | else: # op.type == idaapi.o_phrase 119 | op_offset_reg = op.specflag1 & 0xff 120 | op_offset = get_reg(op_offset_reg, CONST) 121 | if op_offset is None: 122 | continue 123 | # Record this access. 124 | offset = (delta + op_offset) & 0xffffffffffffffff 125 | _log(5, '\t\t{:x} access({}) {}, {}', insn.ea, op.reg, offset, size) 126 | accesses[(offset, size)].add((insn.ea, delta)) 127 | # Update the set of registers pointing to the struct, and the set of known constant 128 | # registers. 129 | if (insn.itype == idaapi.ARM_mov 130 | and insn.Op1.type == idaapi.o_reg 131 | and insn.Op2.type == idaapi.o_reg 132 | and insn.Op3.type == idaapi.o_void 133 | and insn.Op1.dtyp == idaapi.dt_qword 134 | and insn.Op2.dtyp == idaapi.dt_qword 135 | and insn.Op2.reg in regs): 136 | # MOV Xdst, Xsrc 137 | _log(6, '\t\t{:x} add {}={}', insn.ea, insn.Op1.reg, regs[insn.Op2.reg].value) 138 | regs[insn.Op1.reg] = regs[insn.Op2.reg] 139 | elif (insn.itype == idaapi.ARM_mov 140 | and insn.Op1.type == idaapi.o_reg 141 | and insn.Op2.type == idaapi.o_imm 142 | and insn.Op3.type == idaapi.o_void 143 | and insn.Op1.dtyp in (idaapi.dt_dword, idaapi.dt_qword)): 144 | # MOV Xdst, #imm 145 | _log(7, '\t\t{:x} const {}={}', insn.ea, insn.Op1.reg, insn.Op2.value) 146 | regs[insn.Op1.reg] = RegValue(CONST, insn.Op2.value) 147 | elif (insn.itype == idaapi.ARM_add 148 | and insn.Op1.type == idaapi.o_reg 149 | and insn.Op2.type == idaapi.o_reg 150 | and insn.Op3.type == idaapi.o_imm 151 | and insn.Op4.type == idaapi.o_void 152 | and insn.Op1.dtyp == idaapi.dt_qword 153 | and insn.Op2.dtyp == idaapi.dt_qword 154 | and insn.Op2.reg in regs): 155 | # ADD Xdst, Xsrc, #amt 156 | op2 = regs[insn.Op2.reg] 157 | _log(6, '\t\t{:x} add {}={}+{}', insn.ea, insn.Op1.reg, op2.value, insn.Op3.value) 158 | regs[insn.Op1.reg] = RegValue(op2.type, op2.value + insn.Op3.value) 159 | elif (insn.itype == idaapi.ARM_bl or insn.itype == idaapi.ARM_blr): 160 | # A function call (direct or indirect). Any correct compiler should generate code that 161 | # does not use the temporary registers after a call, but just to be safe, clear all the 162 | # temporary registers. 163 | _log(6, '\t\t{:x} clear temps', insn.ea) 164 | for r in xrange(0, 19): 165 | regs.pop(getattr(idautils.procregs, 'X{}'.format(r)).reg, None) 166 | else: 167 | # This is an unrecognized instruction. Clear all the registers it modifies. 168 | feature = insn.get_canon_feature() 169 | # On Arm64, LDR-type instructions store their writeback behavior in the instructions's 170 | # auxpref flags. As best I can tell, insn.get_canon_feature()'s CF_CHG* flags indicate 171 | # whether the operand will change, which is different than the register changing for 172 | # operands like o_displ that use a register to refer to a memory location. Thus, we 173 | # actually need to special case auxpref and clear those registers. Fortunately, 174 | # writeback behavior is only observed in o_displ operands, of which there should only 175 | # ever be one, so it doesn't matter that auxpref is stored on the instruction and not 176 | # the operand. 177 | for op in insn.Operands: 178 | if op.type == idaapi.o_void: 179 | break 180 | if ((feature & _INSN_OP_CHG[op.n] and op.type == idaapi.o_reg) 181 | or (insn.auxpref & _ARM64_WRITEBACK and op.type == idaapi.o_displ)): 182 | _log(6, '\t\t{:x} clear {}', insn.ea, op.reg) 183 | regs.pop(op.reg, None) 184 | return { reg: rv.value for reg, rv in regs.items() if rv.type == DELTA } 185 | 186 | def _pointer_accesses_data_flow(flow, initialization, accesses): 187 | """Run the data flow for pointer_accesses.""" 188 | # bb_regs maps each block id to another map from register ids to corresponding struct offsets 189 | # at the start of the block. We don't consider the case where a register could contain more 190 | # than one possible offset. 191 | bb_regs = { bb.id: {} for bb in flow } 192 | # We'll start by processing those blocks that have an initial value. 193 | queue = collections.deque() 194 | _add_blocks_to_queue(queue, flow, initialization) 195 | # Process each block, propagating its set of registers to its successors. This isn't quite a 196 | # true data flow: We should run it until there are no more changes, then check the accesses 197 | # conditions only once it's stabilized. The difference occurs when we've processed block A, 198 | # which had register R with offset O on entry, then later found a block B that jumps back to 199 | # block A with register R set to a different offset O'. Ideally we should invalidate the 200 | # register R at the start of A and undo any accesses it generated. However, in practice the 201 | # only way this will occur is in a loop, which is usually going to be a valid access to the 202 | # structure on the first iteration. The case we're worried about is when the A->B->A loop 203 | # cycles infinitely, giving us the (mistaken) impression that our structure is infinite. We can 204 | # eliminate this possibility and also get better results if we just decline to update register 205 | # R with offset O' after processing block A, effectively ignoring loops that increment an 206 | # offset register. 207 | while queue: 208 | bb = queue.popleft() 209 | entry_regs = bb_regs[bb.id] 210 | _log(3, 'Basic block {} {:x}-{:x}', bb.id, bb.startEA, bb.endEA) 211 | _log(4, '\tregs@entry = {}', entry_regs) 212 | exit_regs = _pointer_accesses_process_block(bb.startEA, bb.endEA, initialization, 213 | entry_regs, accesses) 214 | _log(4, '\tregs@exit = {}', exit_regs) 215 | _log(4, '\tsuccs = {}', [s.id for s in bb.succs()]) 216 | for succ in bb.succs(): 217 | # Add the registers at the end of the block to the registers at the start of its 218 | # successors' blocks. This is a union since we will track accesses to any register 219 | # that can point to the struct along any path. As discussed above, any register that 220 | # already had an offset for a successor is ignored. 221 | succ_regs = bb_regs[succ.id] 222 | update = False 223 | for reg in exit_regs: 224 | if reg not in succ_regs: 225 | update = True 226 | succ_regs[reg] = exit_regs[reg] 227 | # If we added a new register, then we'll process the successor block (again). 228 | if update: 229 | queue.append(succ) 230 | 231 | def pointer_accesses(function=None, bounds=None, initialization=None, accesses=None): 232 | """Collect the set of accesses to a pointer register. 233 | 234 | In the flow graph defined by the specified function or code region, find all accesses to the 235 | memory region pointed to initially by the given register. 236 | 237 | Options: 238 | function: The address of the function to analyze. Any address within the function may be 239 | specified. Default is None. 240 | bounds: A (start, end) tuple containing the start and end addresses of the code region to 241 | analyze. Default is None. 242 | initialization: A dictionary of dictionaries, specifying for each instruction start 243 | address, which registers have which offsets into the memory region of interest. More 244 | precisely: The keys of initialization are the linear addresses of those instructions 245 | for which we know that some register points into the memory region of interest. For 246 | each such instruction, initialization[address] is a map whose keys are the register 247 | numbers of the registers that point into the memory region. Finally, 248 | initialization[address][register] is the delta between the start of the memory region 249 | and where the register points (positive values indicate the register points to a higher 250 | address than the start). This option must be supplied. 251 | accesses: If not None, then the given dictionary will be populated with the accesses, 252 | rather than creating and returning a new dictionary. This dictionary must be of type 253 | collections.defaultdict(set). Default is None. 254 | 255 | Returns: 256 | If accesses is None (the default), returns a dictionary mapping each (offset, size) tuple 257 | to the set of (address, delta) tuples that performed that access. 258 | 259 | Notes: 260 | Either a function or a code region must be specified. You cannot supply both. 261 | 262 | A common use case is analyzing a function for which we know that one register on entry 263 | points to a structure. For example, say that the function at address 0x4000 takes as an 264 | argument in register 10 a pointer 144 bytes in to an unknown structure. The appropriate 265 | initialization dictionary would be: 266 | { 0x4000: { 10: 144 } } 267 | """ 268 | # Create the FlowChart. 269 | flow = _create_flow(function, bounds) 270 | if flow is None: 271 | return None 272 | # Get the set of (offset, size) accesses by running a data flow. 273 | create = accesses is None 274 | if create: 275 | accesses = collections.defaultdict(set) 276 | _pointer_accesses_data_flow(flow, initialization, accesses) 277 | if create: 278 | accesses = dict(accesses) 279 | return accesses 280 | 281 | -------------------------------------------------------------------------------- /ida_kernelcache/collect_classes.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/collect_classes.py 3 | # Brandon Azad 4 | # 5 | # Collects information about C++ classes in a kernelcache. 6 | # 7 | 8 | from collections import defaultdict 9 | 10 | import idc 11 | import idautils 12 | import idaapi 13 | 14 | import ida_utilities as idau 15 | import classes 16 | import segment 17 | import symbol 18 | import vtable 19 | 20 | _log = idau.make_log(1, __name__) 21 | 22 | # IDK where IDA defines these. 23 | _MEMOP_PREINDEX = 0x20 24 | _MEMOP_POSTINDEX = 0x80 25 | 26 | _MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX 27 | 28 | class _Regs(object): 29 | """A set of registers for _emulate_arm64.""" 30 | 31 | class _Unknown: 32 | """A wrapper class indicating that the value is unknown.""" 33 | def __add__(self, other): 34 | return _Regs.Unknown 35 | def __radd__(self, other): 36 | return _Regs.Unknown 37 | def __nonzero__(self): 38 | return False 39 | 40 | _reg_names = idautils.GetRegisterList() 41 | Unknown = _Unknown() 42 | 43 | def __init__(self): 44 | self.clearall() 45 | 46 | def clearall(self): 47 | self._regs = {} 48 | 49 | def clear(self, reg): 50 | try: 51 | del self._regs[self._reg(reg)] 52 | except KeyError: 53 | pass 54 | 55 | def _reg(self, reg): 56 | if isinstance(reg, (int, long)): 57 | reg = _Regs._reg_names[reg] 58 | return reg 59 | 60 | def __getitem__(self, reg): 61 | try: 62 | return self._regs[self._reg(reg)] 63 | except: 64 | return _Regs.Unknown 65 | 66 | def __setitem__(self, reg, value): 67 | if value is None or value is _Regs.Unknown: 68 | self.clear(self._reg(reg)) 69 | else: 70 | self._regs[self._reg(reg)] = value & 0xffffffffffffffff 71 | 72 | def _emulate_arm64(start, end, on_BL=None, on_RET=None): 73 | """A very basic partial Arm64 emulator that does just enough to find OSMetaClass 74 | information.""" 75 | # Super basic emulation. 76 | reg = _Regs() 77 | def load(addr, dtyp): 78 | if not addr: 79 | return None 80 | if dtyp == idaapi.dt_qword: 81 | size = 8 82 | elif dtyp == idaapi.dt_dword: 83 | size = 4 84 | else: 85 | return None 86 | return idau.read_word(addr, size) 87 | def cleartemps(): 88 | for t in ['X{}'.format(i) for i in range(0, 19)]: 89 | reg.clear(t) 90 | for insn in idau.Instructions(start, end): 91 | _log(11, 'Processing instruction {:#x}', insn.ea) 92 | mnem = insn.get_canon_mnem() 93 | if mnem == 'ADRP' or mnem == 'ADR': 94 | reg[insn.Op1.reg] = insn.Op2.value 95 | elif mnem == 'ADD' and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm: 96 | reg[insn.Op1.reg] = reg[insn.Op2.reg] + insn.Op3.value 97 | elif mnem == 'NOP': 98 | pass 99 | elif mnem == 'MOV' and insn.Op2.type == idc.o_imm: 100 | reg[insn.Op1.reg] = insn.Op2.value 101 | elif mnem == 'MOV' and insn.Op2.type == idc.o_reg: 102 | reg[insn.Op1.reg] = reg[insn.Op2.reg] 103 | elif mnem == 'RET': 104 | if on_RET: 105 | on_RET(reg) 106 | break 107 | elif (mnem == 'STP' or mnem == 'LDP') and insn.Op3.type == idc.o_displ: 108 | if insn.auxpref & _MEMOP_WBINDEX: 109 | reg[insn.Op3.reg] = reg[insn.Op3.reg] + insn.Op3.addr 110 | if mnem == 'LDP': 111 | reg.clear(insn.Op1.reg) 112 | reg.clear(insn.Op2.reg) 113 | elif (mnem == 'STR' or mnem == 'LDR') and not insn.auxpref & _MEMOP_WBINDEX: 114 | if mnem == 'LDR': 115 | if insn.Op2.type == idc.o_displ: 116 | reg[insn.Op1.reg] = load(reg[insn.Op2.reg] + insn.Op2.addr, insn.Op1.dtyp) 117 | else: 118 | reg.clear(insn.Op1.reg) 119 | elif mnem == 'BL' and insn.Op1.type == idc.o_near: 120 | if on_BL: 121 | on_BL(insn.Op1.addr, reg) 122 | cleartemps() 123 | else: 124 | _log(10, 'Unrecognized instruction at address {:#x}', insn.ea) 125 | reg.clearall() 126 | 127 | class _OneToOneMapFactory(object): 128 | """A factory to extract the largest one-to-one submap.""" 129 | 130 | def __init__(self): 131 | self._as_to_bs = defaultdict(set) 132 | self._bs_to_as = defaultdict(set) 133 | 134 | def add_link(self, a, b): 135 | """Add a link between the two objects.""" 136 | self._as_to_bs[a].add(b) 137 | self._bs_to_as[b].add(a) 138 | 139 | def _make_unique_oneway(self, xs_to_ys, ys_to_xs, bad_x=None): 140 | """Internal helper to make one direction unique.""" 141 | for x, ys in xs_to_ys.items(): 142 | if len(ys) != 1: 143 | if bad_x: 144 | bad_x(x, ys) 145 | del xs_to_ys[x] 146 | for y in ys: 147 | del ys_to_xs[y] 148 | 149 | def _build_oneway(self, xs_to_ys): 150 | """Build a one-way mapping after pruning.""" 151 | x_to_y = dict() 152 | for x, ys in xs_to_ys.items(): 153 | x_to_y[x] = next(iter(ys)) 154 | return x_to_y 155 | 156 | def build(self, bad_a=None, bad_b=None): 157 | """Extract the smallest one-to-one submap.""" 158 | as_to_bs = dict(self._as_to_bs) 159 | bs_to_as = dict(self._bs_to_as) 160 | self._make_unique_oneway(as_to_bs, bs_to_as, bad_a) 161 | self._make_unique_oneway(bs_to_as, as_to_bs, bad_b) 162 | return self._build_oneway(as_to_bs) 163 | 164 | def _process_mod_init_func_for_metaclasses(func, found_metaclass): 165 | """Process a function from the __mod_init_func section for OSMetaClass information.""" 166 | _log(4, 'Processing function {}', idc.GetFunctionName(func)) 167 | def on_BL(addr, reg): 168 | X0, X1, X3 = reg['X0'], reg['X1'], reg['X3'] 169 | if not (X0 and X1 and X3): 170 | return 171 | _log(5, 'Have call to {:#x}({:#x}, {:#x}, ?, {:#x})', addr, X0, X1, X3) 172 | # OSMetaClass::OSMetaClass(this, className, superclass, classSize) 173 | if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0): 174 | return 175 | found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None) 176 | _emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL) 177 | 178 | def _process_mod_init_func_section_for_metaclasses(segstart, found_metaclass): 179 | """Process a __mod_init_func section for OSMetaClass information.""" 180 | segend = idc.SegEnd(segstart) 181 | for func in idau.ReadWords(segstart, segend): 182 | _process_mod_init_func_for_metaclasses(func, found_metaclass) 183 | 184 | def _should_process_segment(seg, segname): 185 | """Check if we should process the specified segment.""" 186 | return segname.endswith('__DATA_CONST.__mod_init_func') or \ 187 | segname == '__DATA.__kmod_init' 188 | 189 | def _collect_metaclasses(): 190 | """Collect OSMetaClass information from all kexts in the kernelcache.""" 191 | # Collect associations from class names to metaclass instances and vice versa. 192 | metaclass_to_classname_builder = _OneToOneMapFactory() 193 | metaclass_to_class_size = dict() 194 | metaclass_to_meta_superclass = dict() 195 | def found_metaclass(metaclass, classname, class_size, meta_superclass): 196 | metaclass_to_classname_builder.add_link(metaclass, classname) 197 | metaclass_to_class_size[metaclass] = class_size 198 | metaclass_to_meta_superclass[metaclass] = meta_superclass 199 | for ea in idautils.Segments(): 200 | segname = idc.SegName(ea) 201 | if not _should_process_segment(ea, segname): 202 | continue 203 | _log(2, 'Processing segment {}', segname) 204 | _process_mod_init_func_section_for_metaclasses(ea, found_metaclass) 205 | # Filter out any class name (and its associated metaclasses) that has multiple metaclasses. 206 | # This can happen when multiple kexts define a class but only one gets loaded. 207 | def bad_classname(classname, metaclasses): 208 | _log(0, 'Class {} has multiple metaclasses: {}', classname, 209 | ', '.join(['{:#x}'.format(mc) for mc in metaclasses])) 210 | # Filter out any metaclass (and its associated class names) that has multiple class names. I 211 | # have no idea why this would happen. 212 | def bad_metaclass(metaclass, classnames): 213 | _log(0, 'Metaclass {:#x} has multiple classes: {}', metaclass, 214 | ', '.join(classnames)) 215 | # Return the final dictionary of metaclass info. 216 | metaclass_to_classname = metaclass_to_classname_builder.build(bad_metaclass, bad_classname) 217 | metaclass_info = dict() 218 | for metaclass, classname in metaclass_to_classname.items(): 219 | meta_superclass = metaclass_to_meta_superclass[metaclass] 220 | superclass_name = metaclass_to_classname.get(meta_superclass, None) 221 | metaclass_info[metaclass] = classes.ClassInfo(classname, metaclass, None, None, 222 | metaclass_to_class_size[metaclass], superclass_name, meta_superclass) 223 | return metaclass_info 224 | 225 | _VTABLE_GETMETACLASS = vtable.VTABLE_OFFSET + 7 226 | _MAX_GETMETACLASS_INSNS = 3 227 | 228 | def _get_vtable_metaclass(vtable_addr, metaclass_info): 229 | """Simulate the getMetaClass method of the vtable and check if it returns an OSMetaClass.""" 230 | getMetaClass = idau.read_word(vtable_addr + _VTABLE_GETMETACLASS * idau.WORD_SIZE) 231 | def on_RET(reg): 232 | on_RET.ret = reg['X0'] 233 | on_RET.ret = None 234 | _emulate_arm64(getMetaClass, getMetaClass + idau.WORD_SIZE * _MAX_GETMETACLASS_INSNS, 235 | on_RET=on_RET) 236 | if on_RET.ret in metaclass_info: 237 | return on_RET.ret 238 | 239 | def _process_const_section_for_vtables(segstart, metaclass_info, found_vtable): 240 | """Process a __const section to search for virtual method tables.""" 241 | segend = idc.SegEnd(segstart) 242 | addr = segstart 243 | while addr < segend: 244 | possible, length = vtable.vtable_length(addr, segend, scan=True) 245 | if possible: 246 | metaclass = _get_vtable_metaclass(addr, metaclass_info) 247 | if metaclass: 248 | _log(4, 'Vtable at address {:#x} has metaclass {:#x}', addr, metaclass) 249 | found_vtable(metaclass, addr, length) 250 | addr += length * idau.WORD_SIZE 251 | 252 | def _collect_vtables(metaclass_info): 253 | """Use OSMetaClass information to search for virtual method tables.""" 254 | # Build a mapping from OSMetaClass instances to virtual method tables. 255 | metaclass_to_vtable_builder = _OneToOneMapFactory() 256 | vtable_lengths = {} 257 | # Define a callback for when we find a vtable. 258 | def found_vtable(metaclass, vtable, length): 259 | # Add our vtable length. 260 | vtable_lengths[vtable] = length 261 | # If our classname has a defined vtable symbol and that symbol's address isn't this vtable, 262 | # don't add the link. 263 | classname = metaclass_info[metaclass].classname 264 | proper_vtable_symbol = symbol.vtable_symbol_for_class(classname) 265 | proper_vtable_symbol_ea = idau.get_name_ea(proper_vtable_symbol) 266 | if proper_vtable_symbol_ea not in (idc.BADADDR, vtable): 267 | return 268 | # If our vtable has a symbol and it doesn't match the metaclass, skip adding a link. 269 | vtable_symbol = idau.get_ea_name(vtable, user=True) 270 | if vtable_symbol: 271 | vtable_classname = symbol.vtable_symbol_get_class(vtable_symbol) 272 | if vtable_classname != classname: 273 | _log(2, 'Declining association between metaclass {:x} ({}) and vtable {:x} ({})', 274 | metaclass, classname, vtable, vtable_classname) 275 | return 276 | # Add a link if they are in the same kext. 277 | if segment.kernelcache_kext(metaclass) == segment.kernelcache_kext(vtable): 278 | metaclass_to_vtable_builder.add_link(metaclass, vtable) 279 | # Process all the segments with found_vtable(). 280 | for ea in idautils.Segments(): 281 | segname = idc.SegName(ea) 282 | if not segname.endswith('__DATA_CONST.__const'): 283 | continue 284 | _log(2, 'Processing segment {}', segname) 285 | _process_const_section_for_vtables(ea, metaclass_info, found_vtable) 286 | # If a metaclass has multiple vtables, that's really weird, unless the metaclass is 287 | # OSMetaClass's metaclass. In that case all OSMetaClass subclasses will have their vtables 288 | # refer back to OSMetaClass's metaclass. 289 | def bad_metaclass(metaclass, vtables): 290 | metaclass_name = metaclass_info[metaclass].classname 291 | if metaclass_name != 'OSMetaClass': 292 | vtinfo = ['{:#x}'.format(vt) for vt in vtables] 293 | _log(0, 'Metaclass {:#x} ({}) has multiple vtables: {}', metaclass, 294 | metaclass_name, ', '.join(vtinfo)) 295 | # If a vtable has multiple metaclasses, that's really weird. 296 | def bad_vtable(vtable, metaclasses): 297 | mcinfo = ['{:#x} ({})'.format(mc, metaclass_info[mc].classname) for mc in metaclasses] 298 | _log(0, 'Vtable {:#x} has multiple metaclasses: {}', vtable, ', '.join(mcinfo)) 299 | metaclass_to_vtable = metaclass_to_vtable_builder.build(bad_metaclass, bad_vtable) 300 | # The resulting mapping may have fewer metaclasses than metaclass_info. 301 | class_info = dict() 302 | for metaclass, classinfo in metaclass_info.items(): 303 | # Add the vtable and its length, which we didn't have earlier. If the current class doesn't 304 | # have a vtable, take it from the superclass (recursing if necessary). 305 | metaclass_with_vtable = metaclass 306 | while metaclass_with_vtable: 307 | vtable = metaclass_to_vtable.get(metaclass_with_vtable, None) 308 | if vtable: 309 | classinfo.vtable = vtable 310 | classinfo.vtable_length = vtable_lengths[vtable] 311 | break 312 | classinfo_with_vtable = metaclass_info.get(metaclass_with_vtable, None) 313 | if not classinfo_with_vtable: 314 | break 315 | metaclass_with_vtable = classinfo_with_vtable.meta_superclass 316 | # Set the superclass field and add the current classinfo to the superclass's children. This 317 | # is safe since this is the last filtering operation. 318 | superclass = metaclass_info.get(classinfo.meta_superclass, None) 319 | if superclass: 320 | classinfo.superclass = metaclass_info[classinfo.meta_superclass] 321 | classinfo.superclass.subclasses.add(classinfo) 322 | # Add the classinfo to the final dictionary. 323 | class_info[classinfo.classname] = classinfo 324 | return class_info, vtable_lengths 325 | 326 | def _check_filetype(filetype): 327 | """Checks that the filetype is compatible before trying to process it.""" 328 | return 'Mach-O' in filetype and 'ARM64' in filetype 329 | 330 | def collect_class_info_internal(): 331 | """Collect information about C++ classes defined in a kernelcache. 332 | 333 | Arm64 only. 334 | """ 335 | filetype = idaapi.get_file_type_name() 336 | if not _check_filetype(filetype): 337 | _log(-1, 'Bad file type "{}"', filetype) 338 | return None 339 | _log(1, 'Collecting information about OSMetaClass instances') 340 | metaclass_info = _collect_metaclasses() 341 | if not metaclass_info: 342 | _log(-1, 'Could not collect OSMetaClass instances') 343 | return None 344 | _log(1, 'Searching for virtual method tables') 345 | class_info, all_vtables = _collect_vtables(metaclass_info) 346 | if not class_info: 347 | _log(-1, 'Could not collect virtual method tables') 348 | return None 349 | _log(1, 'Done') 350 | return class_info, all_vtables 351 | 352 | -------------------------------------------------------------------------------- /ida_kernelcache/vtable.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/vtable.py 3 | # Brandon Azad 4 | # 5 | # Functions for analyzing and symbolicating vtables in the kernelcache. 6 | # 7 | 8 | from itertools import islice, takewhile 9 | 10 | import idc 11 | import idautils 12 | 13 | from symbol import vtable_symbol_for_class 14 | import ida_utilities as idau 15 | import classes 16 | import stub 17 | 18 | _log = idau.make_log(0, __name__) 19 | 20 | VTABLE_OFFSET = 2 21 | """The first few entries of the virtual method tables in the kernelcache are empty.""" 22 | MIN_VTABLE_METHODS = 12 23 | """The minimum number of methods in a virtual method table.""" 24 | MIN_VTABLE_LENGTH = VTABLE_OFFSET + MIN_VTABLE_METHODS 25 | """The minimum length of a virtual method table in words, including the initial empty entries.""" 26 | 27 | def vtable_length(ea, end=None, scan=False): 28 | """Find the length of a virtual method table. 29 | 30 | This function checks whether the effective address could correspond to a virtual method table 31 | and calculates its length, including the initial empty entries. By default (when scan is 32 | False), this function returns the length of the vtable if the address could correspond to a 33 | vtable, or 0 if the address definitely could not be a vtable. 34 | 35 | Arguments: 36 | ea: The linear address of the start of the vtable. 37 | 38 | Options: 39 | end: The end address to search through. Defaults to the end of the section. 40 | scan: Set to True to indicate that this function is being called to scan memory for virtual 41 | method tables. Instead of returning the length of the vtable or 0, this function will 42 | return a tuple (possible, length). Additionally, as a slight optimization, this 43 | function will sometimes look ahead in order to increase the amount of data that can be 44 | skipped, reducing duplication of effort between subsequent calls. 45 | 46 | Returns: 47 | If scan is False (the default), then this function returns the length of the vtable in 48 | words, including the initial empty entries. 49 | 50 | Otherwise, this function returns a tuple (possible, length). If the address could 51 | correspond to the start of a vtable, then possible is True and length is the length of the 52 | vtable in words, including the initial empty entries. Otherwise, if the address is 53 | definitely not the start of a vtable, then possible is False and length is the number of 54 | words that can be skipped when searching for the next vtable. 55 | """ 56 | # TODO: This function should be reorganized. The better way of doing it is to count the number 57 | # of zero entries, then the number of nonzero entries, then decide based on that. Less 58 | # special-casing that way. 59 | # TODO: We should have a static=True/False flag to indicate whether we want to include the 60 | # empty entries. 61 | def return_value(possible, length): 62 | if scan: 63 | return possible, length 64 | return length if possible else 0 65 | # Initialize default values. 66 | if end is None: 67 | end = idc.SegEnd(ea) 68 | words = idau.ReadWords(ea, end) 69 | # Iterate through the first VTABLE_OFFSET words. If any of them are nonzero, then we can skip 70 | # past all the words we just saw. 71 | for idx, word in enumerate(islice(words, VTABLE_OFFSET)): 72 | if word != 0: 73 | return return_value(False, idx + 1) 74 | # Now this first word after the padding section is special. 75 | first = next(words, None) 76 | if first is None: 77 | # We have 2 zeros followed by the end of our range. 78 | return return_value(False, VTABLE_OFFSET) 79 | elif first == 0: 80 | # We have VTABLE_OFFSET + 1 zero entries. 81 | zeros = VTABLE_OFFSET + 1 82 | if scan: 83 | # To avoid re-reading the data we just read in the case of a zero-filled section, let's 84 | # look ahead a bit until we find the first non-zero value. 85 | for word in words: 86 | if word is None: 87 | return return_value(False, zeros) 88 | if word != 0: 89 | break 90 | zeros += 1 91 | else: 92 | # We found no nonzero words before the end. 93 | return return_value(False, zeros) 94 | # We can skip all but the last VTABLE_OFFSET zeros. 95 | return return_value(False, zeros - VTABLE_OFFSET) 96 | # TODO: We should verify that all vtable entries refer to code. 97 | # Now we know that we have at least one nonzero value, our job is easier. Get the full length 98 | # of the vtable, including the first VTABLE_OFFSET entries and the subsequent nonzero entries, 99 | # until either we find a zero word (not included) or run out of words in the stream. 100 | length = VTABLE_OFFSET + 1 + idau.iterlen(takewhile(lambda word: word != 0, words)) 101 | # Now it's simple: We are valid if the length is long enough, invalid if it's too short. 102 | return return_value(length >= MIN_VTABLE_LENGTH, length) 103 | 104 | def convert_vtable_to_offsets(vtable, length=None): 105 | """Convert a vtable into a sequence of offsets. 106 | 107 | Arguments: 108 | vtable: The address of the virtual method table. 109 | 110 | Options: 111 | length: The length of the vtable, if known. 112 | 113 | Returns: 114 | True if the data was successfully converted into offsets. 115 | """ 116 | if length is None: 117 | length = vtable_length(vtable) 118 | if not length: 119 | _log(0, 'Address {:#x} is not a vtable', vtable) 120 | return False 121 | successful = True 122 | for address in idau.Addresses(vtable, length=length, step=idau.WORD_SIZE): 123 | if not idc.OpOff(address, 0, 0): 124 | _log(0, 'Could not change address {:#x} into an offset', address) 125 | successful = False 126 | return successful 127 | 128 | def _convert_vtable_methods_to_functions(vtable, length): 129 | """Convert each virtual method in the vtable into an IDA function.""" 130 | for vmethod in vtable_methods(vtable, length=length): 131 | if not idau.force_function(vmethod): 132 | _log(0, 'Could not convert virtual method {:#x} into a function', vmethod) 133 | 134 | def initialize_vtables(): 135 | """Convert vtables into offsets and ensure that virtual methods are IDA functions.""" 136 | classes.collect_class_info() 137 | for vtable, length in classes.vtables.items(): 138 | if not convert_vtable_to_offsets(vtable, length): 139 | _log(0, 'Could not convert vtable at address {:x} into offsets', vtable) 140 | _convert_vtable_methods_to_functions(vtable, length) 141 | 142 | def add_vtable_symbol(vtable, classname): 143 | """Add a symbol for the virtual method table at the specified address. 144 | 145 | Arguments: 146 | vtable: The address of the virtual method table. 147 | classname: The name of the C++ class with this virtual method table. 148 | 149 | Returns: 150 | True if the data was successfully converted into a vtable and the symbol was added. 151 | """ 152 | vtable_symbol = vtable_symbol_for_class(classname) 153 | if not idau.set_ea_name(vtable, vtable_symbol): 154 | _log(0, 'Address {:#x} already has name {} instead of vtable symbol {}' 155 | .format(vtable, idau.get_ea_name(vtable), vtable_symbol)) 156 | return False 157 | return True 158 | 159 | def initialize_vtable_symbols(): 160 | """Populate IDA with virtual method table symbols for an iOS kernelcache.""" 161 | classes.collect_class_info() 162 | for classname, classinfo in classes.class_info.items(): 163 | if classinfo.vtable: 164 | _log(3, 'Class {} has vtable at {:#x}', classname, classinfo.vtable) 165 | if not add_vtable_symbol(classinfo.vtable, classname): 166 | _log(0, 'Could not add vtable symbol for class {} at address {:#x}', classname, 167 | classinfo.vtable) 168 | else: 169 | _log(0, 'Class {} has no known vtable', classname) 170 | 171 | def class_vtable_method(classinfo, index): 172 | """Get the virtual method for a class by index. 173 | 174 | Arguments: 175 | classinfo: The class information of the class. 176 | index: The index of the virtual method, skipping the empty entries (that is, the first 177 | virtual method is at index 0). 178 | """ 179 | # Get the vtable for the class. 180 | methods = classinfo.vtable_methods 181 | count = classinfo.vtable_nmethods 182 | if index >= count: 183 | return None 184 | return idau.read_word(methods + index * idau.WORD_SIZE) 185 | 186 | def vtable_methods(vtable, start=VTABLE_OFFSET, length=None, nmethods=None): 187 | """Get the methods in a virtual method table. 188 | 189 | A generator that returns each method in the virtual method table. The initial empty entries are 190 | skipped. 191 | 192 | Arguments: 193 | vtable: The address of the virtual method table. (This includes the initial empty entries.) 194 | 195 | Options: 196 | start: The index at which to start returning values. All prior indexes 197 | are skipped. Default is VTABLE_OFFSET, meaning the initial empty 198 | entries will be skipped. 199 | length: The length of the vtable, including the initial empty entries. Specify this value 200 | to read the entire vtable if the length is already known. 201 | nmethods: The number of methods to read, excluding the initial empty entries. If None, the 202 | whole vtable will be read. Default is None. 203 | """ 204 | assert vtable 205 | # Get the length of the vtable. 206 | if nmethods is not None: 207 | length = nmethods + VTABLE_OFFSET 208 | elif length is None: 209 | length = vtable_length(vtable) 210 | # Read the methods. 211 | for i in xrange(start, length): 212 | yield idau.read_word(vtable + i * idau.WORD_SIZE) 213 | 214 | def class_vtable_methods(classinfo, nmethods=None, new=False): 215 | """Get the methods in a virtual method table for a class. 216 | 217 | A generator that returns each method in the virtual method table. The initial empty entries are 218 | skipped. 219 | 220 | Arguments: 221 | classinfo: The ClassInfo object describing the class. 222 | 223 | Options: 224 | nmethods: The number of methods to read, excluding the initial empty entries. If None, the 225 | whole vtable will be read. Default is None. 226 | new: If True, only return methods not defined in the superclass. Default is False. 227 | """ 228 | if not classinfo.vtable: 229 | return [] 230 | if new and classinfo.superclass: 231 | start = classinfo.superclass.vtable_length 232 | else: 233 | start = VTABLE_OFFSET 234 | return vtable_methods(classinfo.vtable, start=start, length=classinfo.vtable_length, 235 | nmethods=nmethods) 236 | 237 | def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vlength=None, 238 | new=False, methods=False): 239 | """Get the overrides of a virtual method table. 240 | 241 | A generator that returns the index of each override in the virtual method table. The initial 242 | empty entries are skipped, so the first virtual method is at index 0. 243 | 244 | Arguments: 245 | class_vtable: The vtable of the class. 246 | super_vtable: The vtable of the ancestor to compare against for overrides. 247 | 248 | Options: 249 | class_vlength: The length of class_vtable. If None, it will be calculated. 250 | super_vlength: The length of super_vtable. If None, it will be calculated. 251 | new: If True, include new virtual methods not present in the superclass. Default is False. 252 | methods: If True, then the generator will produce a tuple containing the index, the 253 | overridden method in the subclass, and the original method in the superclas, rather 254 | than just the index. Default is False. 255 | """ 256 | assert class_vtable 257 | # Get the vtable lengths. 258 | if class_vlength is None: 259 | class_vlength = vtable_length(class_vtable) 260 | if super_vlength is None: 261 | super_vlength = vtable_length(super_vtable) 262 | assert class_vlength >= super_vlength >= 0 263 | # Skip the first VTABLE_OFFSET entries. 264 | class_vtable += VTABLE_OFFSET * idau.WORD_SIZE 265 | super_vtable += VTABLE_OFFSET * idau.WORD_SIZE 266 | class_vlength -= VTABLE_OFFSET 267 | super_vlength -= VTABLE_OFFSET 268 | # How many methods are we iterating over? 269 | if new: 270 | nmethods = class_vlength 271 | else: 272 | nmethods = super_vlength 273 | # Iterate through the methods. 274 | for i in xrange(nmethods): 275 | # Read the old method. 276 | super_method = None 277 | if i < super_vlength: 278 | super_method = idau.read_word(super_vtable + i * idau.WORD_SIZE) 279 | # Read the new method. (It's always in range.) 280 | class_method = idau.read_word(class_vtable + i * idau.WORD_SIZE) 281 | # If they're different, yield. 282 | if class_method != super_method: 283 | if methods: 284 | yield i, class_method, super_method 285 | else: 286 | yield i 287 | 288 | def class_vtable_overrides(classinfo, superinfo=None, new=False, methods=False): 289 | """Get the overrides of a virtual method table for a class. 290 | 291 | A generator that returns the index of each override in the virtual method table. The initial 292 | empty entries are skipped, so the first virtual method is at index 0. 293 | 294 | Arguments: 295 | classinfo: The ClassInfo of the class to inspect. 296 | 297 | Options: 298 | superinfo: The ClassInfo of the ancestor to compare against for overrides. If None, then 299 | the ClassInfo of the direct superclass will be used. Default is None. 300 | new: If True, include new virtual methods not present in the superclass. Default is False. 301 | methods: If True, then the generator will produce a tuple containing the index, the 302 | overridden method in the subclass, and the original method in the superclas, rather 303 | than just the index. Default is False. 304 | """ 305 | if not classinfo.vtable: 306 | return 307 | # Get the correct superinfo. 308 | if superinfo is None: 309 | # Default to the superclass, but if there isn't one, there's nothing to do. 310 | superinfo = classinfo.superclass 311 | if not superinfo and not new: 312 | return 313 | else: 314 | if superinfo not in classinfo.ancestors(): 315 | raise ValueError('Invalid arguments: classinfo={}, superinfo={}'.format(classinfo, 316 | superinfo)) 317 | # Get the vtable for the class. 318 | class_vtable = classinfo.vtable 319 | class_vlength = classinfo.vtable_length 320 | # Get the vtable for the superclass. 321 | if superinfo: 322 | super_vtable = superinfo.vtable 323 | super_vlength = superinfo.vtable_length 324 | assert class_vlength >= super_vlength 325 | else: 326 | super_vtable = 0 327 | super_vlength = 0 328 | # Run the generator. 329 | for x in vtable_overrides(class_vtable, super_vtable, class_vlength=class_vlength, 330 | super_vlength=super_vlength, new=new, methods=methods): 331 | yield x 332 | 333 | def class_from_vtable_method_symbol(method_symbol): 334 | """Get the base class in a vtable method symbol. 335 | 336 | Extract the name of the base class from a canonical method symbol. 337 | """ 338 | demangled = idc.Demangle(method_symbol, idc.GetLongPrm(idc.INF_SHORT_DN)) 339 | if not demangled: 340 | return None 341 | classname = demangled.split('::', 1)[0] 342 | if classname == demangled: 343 | return None 344 | return classname 345 | 346 | def _vtable_method_symbol_substitute_class(method_symbol, new_class, old_class=None): 347 | """Create a new method symbol by substituting the class to which the method belongs.""" 348 | # TODO: This is wrong when the class name is repeated! 349 | if not old_class: 350 | old_class = class_from_vtable_method_symbol(method_symbol) 351 | if not old_class: 352 | return None 353 | old_class_part = '{}{}'.format(len(old_class), old_class) 354 | new_class_part = '{}{}'.format(len(new_class), new_class) 355 | if old_class_part not in method_symbol: 356 | return None 357 | return method_symbol.replace(old_class_part, new_class_part, 1) 358 | 359 | _ignore_vtable_methods = ( 360 | '___cxa_pure_virtual' 361 | ) 362 | 363 | def _ok_to_rename_method(override, name): 364 | """Some method names are ok to rename.""" 365 | return (name.startswith('j_') and idau.iterlen(idautils.XrefsTo(override)) == 1) 366 | 367 | def _bad_name_dont_use_as_override(name): 368 | """Some names shouldn't propagate into vtable symbols.""" 369 | # Ignore jumps and stubs and fixed known special values. 370 | return (name.startswith('j_') or stub.symbol_references_stub(name) 371 | or name in _ignore_vtable_methods) 372 | 373 | def _symbolicate_overrides_for_classinfo(classinfo, processed): 374 | """A recursive function to symbolicate vtable overrides for a class and its superclasses.""" 375 | # If we've already been processed, stop. 376 | if classinfo in processed: 377 | return 378 | # First propagate symbol information to our superclass. 379 | if classinfo.superclass: 380 | _symbolicate_overrides_for_classinfo(classinfo.superclass, processed) 381 | # Now symbolicate the superclass. 382 | for _, override, original in class_vtable_overrides(classinfo, methods=True): 383 | # Skip this method if the override already has a name and we can't rename it. 384 | override_name = idau.get_ea_name(override, user=True) 385 | if override_name and not _ok_to_rename_method(override, override_name): 386 | continue 387 | # Skip this method if the original does not have a name or if it's a bad name. 388 | original_name = idau.get_ea_name(original, user=True) 389 | if not original_name or _bad_name_dont_use_as_override(original_name): 390 | continue 391 | # Get the new override name if we substitute for the override class's name. 392 | new_name = _vtable_method_symbol_substitute_class(original_name, classinfo.classname) 393 | if not new_name: 394 | _log(0, 'Could not substitute class {} into method symbol {} for override {:#x}', 395 | classinfo.classname, original_name, override) 396 | continue 397 | # Now that we have the new name, set it. 398 | if override_name: 399 | _log(2, 'Renaming {} -> {}', override_name, new_name) 400 | if not idau.set_ea_name(override, new_name, rename=True): 401 | _log(0, 'Could not set name {} for method {:#x}', new_name, override) 402 | # We're done. 403 | processed.add(classinfo) 404 | 405 | def initialize_vtable_method_symbols(): 406 | """Symbolicate overridden methods in a virtual method table. 407 | 408 | Propagate symbol names from the virtual method tables of the base classes. 409 | """ 410 | processed = set() 411 | classes.collect_class_info() 412 | for classinfo in classes.class_info.values(): 413 | _symbolicate_overrides_for_classinfo(classinfo, processed) 414 | 415 | -------------------------------------------------------------------------------- /ida_kernelcache/ida_utilities.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/ida_utilities.py 3 | # Brandon Azad 4 | # 5 | # Some utility functions to make working with IDA easier. 6 | # 7 | 8 | from collections import deque 9 | 10 | import idc 11 | import idautils 12 | import idaapi 13 | 14 | def make_log(log_level, module): 15 | """Create a logging function.""" 16 | def log(level, *args): 17 | if len(args) == 0: 18 | return level <= log.level 19 | if level <= log.level: 20 | print module + ': ' + args[0].format(*args[1:]) 21 | log.level = log_level 22 | return log 23 | 24 | _log = make_log(1, __name__) 25 | 26 | WORD_SIZE = 0 27 | """The size of a word on the current platform.""" 28 | 29 | BIG_ENDIAN = False 30 | """Whether the current platform is big endian.""" 31 | 32 | LITTLE_ENDIAN = True 33 | """Whether the current platform is little-endian. Always the opposite of BIG_ENDIAN.""" 34 | 35 | def _initialize(): 36 | # https://reverseengineering.stackexchange.com/questions/11396/how-to-get-the-cpu-architecture-via-idapython 37 | global WORD_SIZE, LITTLE_ENDIAN, BIG_ENDIAN 38 | info = idaapi.get_inf_structure() 39 | if info.is_64bit(): 40 | WORD_SIZE = 8 41 | elif info.is_32bit(): 42 | WORD_SIZE = 4 43 | else: 44 | WORD_SIZE = 2 45 | try: 46 | BIG_ENDIAN = info.is_be() 47 | except: 48 | BIG_ENDIAN = info.mf 49 | LITTLE_ENDIAN = not BIG_ENDIAN 50 | 51 | _initialize() 52 | 53 | def iterlen(iterator): 54 | """Consume an iterator and return its length.""" 55 | return sum(1 for _ in iterator) 56 | 57 | class AlignmentError(Exception): 58 | """An exception that is thrown if an address with improper alignment is encountered.""" 59 | def __init__(self, address): 60 | self.address = address 61 | def __str__(self): 62 | return repr(self.address) 63 | 64 | def is_mapped(ea, size=1, value=True): 65 | """Check if the given address is mapped. 66 | 67 | Specify a size greater than 1 to check if an address range is mapped. 68 | 69 | Arguments: 70 | ea: The linear address to check. 71 | 72 | Options: 73 | size: The number of bytes at ea to check. Default is 1. 74 | value: Only consider an address mapped if it has a value. For example, the contents of a 75 | bss section exist but don't have a static value. If value is False, consider such 76 | addresses as mapped. Default is True. 77 | 78 | Notes: 79 | This function is currently a hack: It only checks the first and last byte. 80 | """ 81 | if size < 1: 82 | raise ValueError('Invalid argument: size={}'.format(size)) 83 | # HACK: We only check the first and last byte, not all the bytes in between. 84 | if value: 85 | return idc.isLoaded(ea) and (size == 1 or idc.isLoaded(ea + size - 1)) 86 | else: 87 | return idaapi.getseg(ea) and (size == 1 or idaapi.getseg(ea + size - 1)) 88 | 89 | def get_name_ea(name, fromaddr=idc.BADADDR): 90 | """Get the address of a name. 91 | 92 | This function returns the linear address associated with the given name. 93 | 94 | Arguments: 95 | name: The name to look up. 96 | 97 | Options: 98 | fromaddr: The referring address. Default is BADADDR. Some addresses have a 99 | location-specific name (for example, labels within a function). If fromaddr is not 100 | BADADDR, then this function will try to retrieve the address of the name from 101 | fromaddr's perspective. If name is not a local name, its address as a global name will 102 | be returned. 103 | 104 | Returns: 105 | The address of the name or BADADDR. 106 | """ 107 | return idc.LocByNameEx(fromaddr, name) 108 | 109 | def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False): 110 | """Get the name of an address. 111 | 112 | This function returns the name associated with the byte at the specified address. 113 | 114 | Arguments: 115 | ea: The linear address whose name to find. 116 | 117 | Options: 118 | fromaddr: The referring address. Default is BADADDR. Some addresses have a 119 | location-specific name (for example, labels within a function). If fromaddr is not 120 | BADADDR, then this function will try to retrieve the name of ea from fromaddr's 121 | perspective. The global name will be returned if no location-specific name is found. 122 | true: Retrieve the true name rather than the display name. Default is False. 123 | user: Return "" if the name is not a user name. 124 | 125 | Returns: 126 | The name of the address or "". 127 | """ 128 | if user and not idc.hasUserName(idc.GetFlags(ea)): 129 | return "" 130 | if true: 131 | return idc.GetTrueNameEx(fromaddr, ea) 132 | else: 133 | return idc.NameEx(fromaddr, ea) 134 | 135 | def set_ea_name(ea, name, rename=False, auto=False): 136 | """Set the name of an address. 137 | 138 | Arguments: 139 | ea: The address to name. 140 | name: The new name of the address. 141 | 142 | Options: 143 | rename: If rename is False, and if the address already has a name, and if that name differs 144 | from the new name, then this function will fail. Set rename to True to rename the 145 | address even if it already has a custom name. Default is False. 146 | auto: If auto is True, then mark the new name as autogenerated. Default is False. 147 | 148 | Returns: 149 | True if the address was successfully named (or renamed). 150 | """ 151 | if not rename and idc.hasUserName(idc.GetFlags(ea)): 152 | return get_ea_name(ea) == name 153 | flags = idc.SN_CHECK 154 | if auto: 155 | flags |= idc.SN_AUTO 156 | return bool(idc.MakeNameEx(ea, name, flags)) 157 | 158 | def _insn_op_stroff_700(insn, n, sid, delta): 159 | """A wrapper of idc.OpStroffEx for IDA 7.""" 160 | return idc.OpStroffEx(insn, n, sid, delta) 161 | 162 | def _insn_op_stroff_695(insn, n, sid, delta): 163 | """A wrapper of idc.OpStroffEx for IDA 6.95.""" 164 | return idc.OpStroffEx(insn.ea, n, sid, delta) 165 | 166 | if idaapi.IDA_SDK_VERSION < 700: 167 | insn_op_stroff = _insn_op_stroff_695 168 | else: 169 | insn_op_stroff = _insn_op_stroff_700 170 | 171 | def _addresses(start, end, step, partial, aligned): 172 | """A generator to iterate over the addresses in an address range.""" 173 | addr = start 174 | end_full = end - step + 1 175 | while addr < end_full: 176 | yield addr 177 | addr += step 178 | if addr != end: 179 | if aligned: 180 | raise AlignmentError(end) 181 | if addr < end and partial: 182 | yield addr 183 | 184 | def _mapped_addresses(addresses, step, partial, allow_unmapped): 185 | """Wrap an _addresses generator with a filter that checks whether the addresses are mapped.""" 186 | for addr in addresses: 187 | start_is_mapped = is_mapped(addr) 188 | end_is_mapped = is_mapped(addr + step - 1) 189 | fully_mapped = start_is_mapped and end_is_mapped 190 | allowed_partial = partial and (start_is_mapped or end_is_mapped) 191 | # Yield the value if it's sufficiently mapped. Otherwise, break if we stop at an 192 | # unmapped address. 193 | if fully_mapped or allowed_partial: 194 | yield addr 195 | elif not allow_unmapped: 196 | break 197 | 198 | def Addresses(start, end=None, step=1, length=None, partial=False, aligned=False, 199 | unmapped=True, allow_unmapped=False): 200 | """A generator to iterate over the addresses in an address range. 201 | 202 | Arguments: 203 | start: The start of the address range to iterate over. 204 | 205 | Options: 206 | end: The end of the address range to iterate over. 207 | step: The amount to step the address by each iteration. Default is 1. 208 | length: The number of elements of size step to iterate over. 209 | partial: If only part of the element is in the address range, or if only part of the 210 | element is mapped, return it anyway. Default is False. This option is only meaningful 211 | if aligned is False or if some address in the range is partially unmapped. 212 | aligned: If the end address is not aligned with an iteration boundary, throw an 213 | AlignmentError. 214 | unmapped: Don't check whether an address is mapped or not before returning it. This option 215 | always implies allow_unmapped. Default is True. 216 | allow_unmapped: Don't stop iteration if an unmapped address is encountered (but the address 217 | won't be returned unless unmapped is also True). Default is False. If partial is also 218 | True, then a partially mapped address will be returned and then iteration will stop. 219 | """ 220 | # HACK: We only check the first and last byte, not all the bytes in between. 221 | # Validate step. 222 | if step < 1: 223 | raise ValueError('Invalid arguments: step={}'.format(step)) 224 | # Set the end address. 225 | if length is not None: 226 | end_addr = start + length * step 227 | if end is not None and end != end_addr: 228 | raise ValueError('Invalid arguments: start={}, end={}, step={}, length={}' 229 | .format(start, end, step, length)) 230 | end = end_addr 231 | if end is None: 232 | raise ValueError('Invalid arguments: end={}, length={}'.format(end, length)) 233 | addresses = _addresses(start, end, step, partial, aligned) 234 | # If unmapped is True, iterate over all the addresses. Otherwise, we will check that addresses 235 | # are properly mapped with a wrapper. 236 | if unmapped: 237 | return addresses 238 | else: 239 | return _mapped_addresses(addresses, step, partial, allow_unmapped) 240 | 241 | def _instructions_by_range(start, end): 242 | """A generator to iterate over instructions in a range.""" 243 | pc = start 244 | while pc < end: 245 | insn = idautils.DecodeInstruction(pc) 246 | if insn is None: 247 | break 248 | next_pc = pc + insn.size 249 | if next_pc > end: 250 | raise AlignmentError(end) 251 | yield insn 252 | pc = next_pc 253 | 254 | def _instructions_by_count(pc, count): 255 | """A generator to iterate over a specified number of instructions.""" 256 | for i in xrange(count): 257 | insn = idautils.DecodeInstruction(pc) 258 | if insn is None: 259 | break 260 | yield insn 261 | pc += insn.size 262 | 263 | def Instructions(start, end=None, count=None): 264 | """A generator to iterate over instructions. 265 | 266 | Instructions are decoded using IDA's DecodeInstruction(). If an address range is specified and 267 | the end of the address range does not fall on an instruction boundary, raises an 268 | AlignmentError. 269 | 270 | Arguments: 271 | start: The linear address from which to start decoding instructions. 272 | 273 | Options: 274 | end: The linear address at which to stop, exclusive. 275 | count: The number of instructions to decode. 276 | 277 | Notes: 278 | Exactly one of end and count must be specified. 279 | """ 280 | if (end is not None and count is not None) or (end is None and count is None): 281 | raise ValueError('Invalid arguments: end={}, count={}'.format(end, count)) 282 | if end is not None: 283 | return _instructions_by_range(start, end) 284 | else: 285 | return _instructions_by_count(start, count) 286 | 287 | _FF_FLAG_FOR_SIZE = { 288 | 1: idc.FF_BYTE, 289 | 2: idc.FF_WORD, 290 | 4: idc.FF_DWRD, 291 | 8: idc.FF_QWRD, 292 | 16: idc.FF_OWRD, 293 | } 294 | 295 | def word_flag(wordsize=WORD_SIZE): 296 | """Get the FF_xxxx flag for the given word size.""" 297 | return _FF_FLAG_FOR_SIZE.get(wordsize, 0) 298 | 299 | def read_word(ea, wordsize=WORD_SIZE): 300 | """Get the word at the given address. 301 | 302 | Words are read using Byte(), Word(), Dword(), or Qword(), as appropriate. Addresses are checked 303 | using is_mapped(). If the address isn't mapped, then None is returned. 304 | """ 305 | if not is_mapped(ea, wordsize): 306 | return None 307 | if wordsize == 1: 308 | return idc.Byte(ea) 309 | if wordsize == 2: 310 | return idc.Word(ea) 311 | if wordsize == 4: 312 | return idc.Dword(ea) 313 | if wordsize == 8: 314 | return idc.Qword(ea) 315 | raise ValueError('Invalid argument: wordsize={}'.format(wordsize)) 316 | 317 | def patch_word(ea, value, wordsize=WORD_SIZE): 318 | """Patch the word at the given address. 319 | 320 | Words are patched using PatchByte(), PatchWord(), PatchDword(), or PatchQword(), as 321 | appropriate. 322 | """ 323 | if wordsize == 1: 324 | idc.PatchByte(ea, value) 325 | elif wordsize == 2: 326 | idc.PatchWord(ea, value) 327 | elif wordsize == 4: 328 | idc.PatchDword(ea, value) 329 | elif wordsize == 8: 330 | idc.PatchQword(ea, value) 331 | else: 332 | raise ValueError('Invalid argument: wordsize={}'.format(wordsize)) 333 | 334 | class objectview(object): 335 | """A class to present an object-like view of a struct.""" 336 | # https://goodcode.io/articles/python-dict-object/ 337 | def __init__(self, fields, addr, size): 338 | self.__dict__ = fields 339 | self.__addr = addr 340 | self.__size = size 341 | def __int__(self): 342 | return self.__addr 343 | def __len__(self): 344 | return self.__size 345 | 346 | def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject): 347 | """Read part of a struct member for _read_struct_member.""" 348 | if idc.isByte(flags): 349 | return read_word(ea, 1), 1 350 | elif idc.isWord(flags): 351 | return read_word(ea, 2), 2 352 | elif idc.isDwrd(flags): 353 | return read_word(ea, 4), 4 354 | elif idc.isQwrd(flags): 355 | return read_word(ea, 8), 8 356 | elif idc.isOwrd(flags): 357 | return read_word(ea, 16), 16 358 | elif idc.isASCII(flags): 359 | return idc.GetManyBytes(ea, size), size 360 | elif idc.isFloat(flags): 361 | return idc.Float(ea), 4 362 | elif idc.isDouble(flags): 363 | return idc.Double(ea), 8 364 | elif idc.isStruct(flags): 365 | value = read_struct(ea, sid=member_sid, asobject=asobject) 366 | return value, member_size 367 | return None, size 368 | 369 | def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject): 370 | """Read a member into a struct for read_struct.""" 371 | flags = idc.GetMemberFlag(sid, offset) 372 | assert flags != -1 373 | # Extra information for parsing a struct. 374 | member_sid, member_ssize = None, None 375 | if idc.isStruct(flags): 376 | member_sid = idc.GetMemberStrId(sid, offset) 377 | member_ssize = idc.GetStrucSize(member_sid) 378 | # Get the address of the start of the member. 379 | member = ea 380 | if not union: 381 | member += offset 382 | # Now parse out the value. 383 | array = [] 384 | processed = 0 385 | while processed < size: 386 | value, read = _read_struct_member_once(member + processed, flags, size, member_sid, 387 | member_ssize, asobject) 388 | assert size % read == 0 389 | array.append(value) 390 | processed += read 391 | if len(array) == 1: 392 | value = array[0] 393 | else: 394 | value = array 395 | struct[name] = value 396 | 397 | def read_struct(ea, struct=None, sid=None, members=None, asobject=False): 398 | """Read a structure from the given address. 399 | 400 | This function reads the structure at the given address and converts it into a dictionary or 401 | accessor object. 402 | 403 | Arguments: 404 | ea: The linear address of the start of the structure. 405 | 406 | Options: 407 | sid: The structure ID of the structure type to read. 408 | struct: The name of the structure type to read. 409 | members: A list of the names of the member fields to read. If members is None, then all 410 | members are read. Default is None. 411 | asobject: If True, then the struct is returned as a Python object rather than a dict. 412 | 413 | One of sid and struct must be specified. 414 | """ 415 | # Handle sid/struct. 416 | if struct is not None: 417 | sid2 = idc.GetStrucIdByName(struct) 418 | if sid2 == idc.BADADDR: 419 | raise ValueError('Invalid struc name {}'.format(struct)) 420 | if sid is not None and sid2 != sid: 421 | raise ValueError('Invalid arguments: sid={}, struct={}'.format(sid, struct)) 422 | sid = sid2 423 | else: 424 | if sid is None: 425 | raise ValueError('Invalid arguments: sid={}, struct={}'.format(sid, struct)) 426 | if idc.GetStrucName(sid) is None: 427 | raise ValueError('Invalid struc id {}'.format(sid)) 428 | # Iterate through the members and add them to the struct. 429 | union = idc.IsUnion(sid) 430 | struct = {} 431 | for offset, name, size in idautils.StructMembers(sid): 432 | if members is not None and name not in members: 433 | continue 434 | _read_struct_member(struct, sid, union, ea, offset, name, size, asobject) 435 | if asobject: 436 | struct = objectview(struct, ea, idc.GetStrucSize(sid)) 437 | return struct 438 | 439 | def null_terminated(string): 440 | """Extract the NULL-terminated C string from the given array of bytes.""" 441 | return string.split('\0', 1)[0] 442 | 443 | def _convert_address_to_function(func): 444 | """Convert an address that IDA has classified incorrectly into a proper function.""" 445 | # If everything goes wrong, we'll try to restore this function. 446 | orig = idc.FirstFuncFchunk(func) 447 | # If the address is not code, let's undefine whatever it is. 448 | if not idc.isCode(idc.GetFlags(func)): 449 | if not is_mapped(func): 450 | # Well, that's awkward. 451 | return False 452 | item = idc.ItemHead(func) 453 | itemend = idc.ItemEnd(func) 454 | if item != idc.BADADDR: 455 | _log(1, 'Undefining item {:#x} - {:#x}', item, itemend) 456 | idc.MakeUnkn(item, idc.DOUNK_EXPAND) 457 | idc.MakeCode(func) 458 | # Give IDA a chance to analyze the new code or else we won't be able to create a 459 | # function. 460 | idc.Wait() 461 | idc.AnalyseArea(item, itemend) 462 | else: 463 | # Just try removing the chunk from its current function. IDA can add it to another function 464 | # automatically, so make sure it's removed from all functions by doing it in loop until it 465 | # fails. 466 | for i in range(1024): 467 | if not idc.RemoveFchunk(func, func): 468 | break 469 | # Now try making a function. 470 | if idc.MakeFunction(func) != 0: 471 | return True 472 | # This is a stubborn chunk. Try recording the list of chunks, deleting the original function, 473 | # creating the new function, then re-creating the original function. 474 | if orig != idc.BADADDR: 475 | chunks = list(idautils.Chunks(orig)) 476 | if idc.DelFunction(orig) != 0: 477 | # Ok, now let's create the new function, and recreate the original. 478 | if idc.MakeFunction(func) != 0: 479 | if idc.MakeFunction(orig) != 0: 480 | # Ok, so we created the functions! Now, if any of the original chunks are not 481 | # contained in a function, we'll abort and undo. 482 | if all(idaapi.get_func(start) for start, end in chunks): 483 | return True 484 | # Try to undo the damage. 485 | for start, _ in chunks: 486 | idc.DelFunction(start) 487 | # Everything we've tried so far has failed. If there was originally a function, try to restore 488 | # it. 489 | if orig != idc.BADADDR: 490 | _log(0, 'Trying to restore original function {:#x}', orig) 491 | idc.MakeFunction(orig) 492 | return False 493 | 494 | def is_function_start(ea): 495 | """Return True if the address is the start of a function.""" 496 | return idc.GetFunctionAttr(ea, idc.FUNCATTR_START) == ea 497 | 498 | def force_function(addr): 499 | """Ensure that the given address is a function type, converting it if necessary.""" 500 | if is_function_start(addr): 501 | return True 502 | return _convert_address_to_function(addr) 503 | 504 | def ReadWords(start, end, step=WORD_SIZE, wordsize=WORD_SIZE, addresses=False): 505 | """A generator to iterate over the data words in the given address range. 506 | 507 | The iterator returns a stream of words or tuples for each mapped word in the address range. 508 | Words are read using read_word(). Iteration stops at the first unmapped word. 509 | 510 | Arguments: 511 | start: The start address. 512 | end: The end address. 513 | 514 | Options: 515 | step: The number of bytes to advance per iteration. Default is WORD_SIZE. 516 | wordsize: The word size to read, in bytes. Default is WORD_SIZE. 517 | addresses: If true, then the iterator will return a stream of tuples (word, ea) for each 518 | mapped word in the address range. Otherwise, just the word itself will be returned. 519 | Default is False. 520 | """ 521 | for addr in Addresses(start, end, step=step, unmapped=True): 522 | word = read_word(addr, wordsize) 523 | if word is None: 524 | break 525 | value = (word, addr) if addresses else word 526 | yield value 527 | 528 | def WindowWords(start, end, window_size, wordsize=WORD_SIZE): 529 | """A generator to iterate over a sliding window of data words in the given address range. 530 | 531 | The iterator returns a stream of tuples (window, ea) for each word in the address range. The 532 | window is a deque of the window_size words at address ea. The deque is owned by the generator 533 | and its contents will change between iterations. 534 | """ 535 | words = ReadWords(start, end, wordsize=wordsize) 536 | window = deque([next(words) for _ in range(window_size)], maxlen=window_size) 537 | addr = start 538 | yield window, addr 539 | for word in words: 540 | window.append(word) 541 | addr += wordsize 542 | yield window, addr 543 | 544 | def struct_create(name, union=False): 545 | """Create an IDA struct with the given name, returning the SID.""" 546 | # AddStrucEx is documented as returning -1 on failure, but in practice it seems to return 547 | # BADADDR. 548 | union = 1 if union else 0 549 | sid = idc.AddStrucEx(-1, name, union) 550 | if sid in (-1, idc.BADADDR): 551 | return None 552 | return sid 553 | 554 | def struct_open(name, create=False, union=None): 555 | """Get the SID of the IDA struct with the given name, optionally creating it.""" 556 | sid = idc.GetStrucIdByName(name) 557 | if sid == idc.BADADDR: 558 | if not create: 559 | return None 560 | sid = struct_create(name, union=bool(union)) 561 | elif union is not None: 562 | is_union = bool(idc.IsUnion(sid)) 563 | if union != is_union: 564 | return None 565 | return sid 566 | 567 | def struct_member_offset(sid, name): 568 | """A version of IDA's GetMemberOffset() that also works with unions.""" 569 | struct = idaapi.get_struc(sid) 570 | if not struct: 571 | return None 572 | member = idaapi.get_member_by_name(struct, name) 573 | if not member: 574 | return None 575 | return member.soff 576 | 577 | def struct_add_word(sid, name, offset, size, count=1): 578 | """Add a word (integer) to a structure. 579 | 580 | If sid is a union, offset must be -1. 581 | """ 582 | return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | word_flag(size), -1, size * count) 583 | 584 | def struct_add_ptr(sid, name, offset, count=1, type=None): 585 | """Add a pointer to a structure. 586 | 587 | If sid is a union, offset must be -1. 588 | """ 589 | ptr_flag = idc.FF_DATA | word_flag(WORD_SIZE) | idaapi.offflag() 590 | ret = idc.AddStrucMember(sid, name, offset, ptr_flag, 0, WORD_SIZE) 591 | if ret == 0 and type is not None: 592 | if offset == -1: 593 | offset = struct_member_offset(sid, name) 594 | assert offset is not None 595 | mid = idc.GetMemberId(sid, offset) 596 | idc.SetType(mid, type) 597 | return ret 598 | 599 | def struct_add_struct(sid, name, offset, msid, count=1): 600 | """Add a structure member to a structure. 601 | 602 | If sid is a union, offset must be -1. 603 | """ 604 | size = idc.GetStrucSize(msid) 605 | return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | idc.FF_STRU, msid, size * count) 606 | 607 | -------------------------------------------------------------------------------- /ida_kernelcache/class_struct.py: -------------------------------------------------------------------------------- 1 | # 2 | # ida_kernelcache/class_struct.py 3 | # Brandon Azad 4 | # 5 | # A module to build structs representing the C++ classes in the kernelcache. 6 | # 7 | """ida_kernelcache.class_struct 8 | 9 | This module is responsible for creating the IDA structs representing the various C++ classes found 10 | in the kernelcache, including the structs for the vtables. 11 | 12 | Organization: 13 | 14 | Each class Class gets four structs: Class, Class::vtable, Class::vmethods and Class::fields. 15 | Class::vmethods is a struct containing the virtual methods for Class that are not present in its 16 | direct superclass. Class::vtable is a struct representing the virtual method table for Class, laid 17 | out as follows: 18 | 19 | struct Class::vtable { 20 | struct SuperClass1::vmethods SuperClass1; 21 | struct SuperClass2::vmethods SuperClass2; 22 | /* ... */ 23 | struct SuperClassN::vmethods SuperClassN; 24 | struct Class::vmethods Class; 25 | }; 26 | 27 | Here SuperClass1, ..., SuperClassN are the chain of superclasses of Class starting from the root. 28 | (Remember, XNU's C++ does not have multiple inheritance, which means we only have one ancestor 29 | chain. This makes everything much easier!) 30 | 31 | There are two styles for how Class is represented: struct slices and unions. 32 | 33 | In the struct slices representation, Class::fields is a struct containing those fields in Class not 34 | present in its superclass, shifted to start at offset 0. Class is a struct organized as follows: 35 | 36 | struct Class { 37 | struct Class::vtable* vtable; 38 | struct SuperClass1::fields SuperClass1; 39 | struct SuperClass2::fields SuperClass2; 40 | /* ... */ 41 | struct SuperClassN::fields SuperClassN; 42 | struct Class::fields Class; 43 | }; 44 | 45 | In the unions representation, Class::fields is also a struct containing the fields in Class not 46 | present in its superclass, however this time it is not shifted, so that the fields occur at the 47 | same offset in Class::fields as they do in the original Class class in the kernel. Class is a 48 | union organized as follows: 49 | 50 | union Class { 51 | struct Class::vtable* vtable; 52 | struct SuperClass1::fields SuperClass1; 53 | struct SuperClass2::fields SuperClass2; 54 | /* ... */ 55 | struct SuperClassN::fields SuperClassN; 56 | struct Class::fields Class; 57 | }; 58 | 59 | There are advantages and disadvantages to each representation. The unions representation can be 60 | more flexible if the automated analysis messes up, but so far I have not found a good way to set 61 | the operands of instructions referring to these structures. 62 | 63 | TODO: I know it's probably possible with ida_bytes.op_stroff(). 64 | 65 | We divide the processing into two parts: vtable generation and class generation. 66 | 67 | For vtable generation, we initially ignore the problem of setting types for each virtual method 68 | entry in the struct. The primary reason for this is that the method symbols in the kernelcache 69 | don't include return type information, so we can't be sure what the correct return type for each 70 | method is. In the future, another module will be able to populate the vtable structs with proper 71 | type information. 72 | 73 | Class generation is more complicated: We first need to collect the set of accesses to each class 74 | struct, then use that information to reconstruct the class fields. Most of the work is done by the 75 | data_flow module, which collects (offset, size) pairs for each virtual method in the class. We 76 | partiton those accesses to their respective classes by class size. (This is not perfect since the 77 | class size reported in the kernel may actually be rounded up. However, for the most part it works 78 | quite well.) Once we know which (offset, size) pairs correspond to which class, we use the 79 | build_struct module to create the appropriate fields in the struct for those accesses. 80 | 81 | Rationale: 82 | 83 | IDA structs don't have any form of inheritance, which leaves us two options: We can either create a 84 | single struct for each class and then figure out some way of synchronizing changes along the 85 | inheritance chain, or we can rely on some form of struct inclusion to ensure that the members of 86 | each class are defined only in one place, and all subclasses re-use those members by including them 87 | as a substruct. 88 | 89 | While creating one struct for each class with all members for the class and its superclasses is 90 | simple and presents most similarly to the original code, synchronizing this representation across 91 | struct changes is complex, and not possible in general. Consider: If a change is made to a member 92 | of the root class in a leaf class, we would need to propagate that change back to the root and then 93 | down to every subclass of the root class. And if along the way we found another change that was 94 | incompatible, there would be no way to automatically discover the right way to resolve the 95 | conflict. Perhaps this solution would work if we could ensure that the propagation code was run 96 | after every single structure change, so that there was no opportunity to develop conflicts, but at 97 | that point the solution is quite complex and requires direct support from IDA. 98 | 99 | Instead, I elected for a representation that forces each field of each class to be defined in only 100 | one place. This means the structures look less like the original C++, which is unfortunate and 101 | complicates adding or looking up members by offset from the start of the class. However, I still 102 | believe it's better to avoid the whole synchronization issue. 103 | 104 | Even so, there are still several possible ways of representing the classes, each with their own 105 | advantages and disadvantages. I ended up allowing the user to select their desired representation. 106 | 107 | For now, we sidestep the problem of setting type information for the function pointers in the 108 | ::vmethods structs. The reason for this, as mentioned above, is that the method symbols don't tell 109 | us what the true return type is, so at best we can guess. It is easy enough to scan through the 110 | vtables after the ::vmethods structs have been generated and add type information then, so I'll 111 | avoid over-complicating this module by trying to do that here. Instead, I imagine another module 112 | (called, for example, types) that provides two functions: 113 | - initialize_method_types: For each C++ method symbol, sets the method type by effectively 114 | doing SetType(GuessType(method)) for every method with a good symbol. 115 | - update_vtable_struct_types: For each field in each ::vmethods struct, look at the type of the 116 | corresponding method, and set the type of the field accordingly. 117 | """ 118 | 119 | import collections 120 | 121 | import idc 122 | import idautils 123 | import idaapi 124 | 125 | import ida_utilities as idau 126 | import build_struct 127 | import classes 128 | import data_flow 129 | import symbol 130 | import vtable 131 | 132 | _log = idau.make_log(2, __name__) 133 | 134 | #### Vtable generation ############################################################################ 135 | 136 | def _populate_vmethods_struct(sid, classinfo): 137 | """Populate the ::vmethods struct.""" 138 | # Loop over the new vtable methods. 139 | super_nmethods = 0 140 | if classinfo.superclass: 141 | super_nmethods = classinfo.superclass.vtable_nmethods 142 | members = set() 143 | for index, vmethod in enumerate(vtable.class_vtable_methods(classinfo)): 144 | # Skip entries in the superclass's vtable. 145 | if index < super_nmethods: 146 | continue 147 | # Get the base name of the method (i.e., for Class::method(args), extract method). 148 | sym = idau.get_ea_name(vmethod, user=True) 149 | base = symbol.method_name(sym) 150 | if not base: 151 | base = 'method_{}'.format(index) 152 | base = symbol.make_ident(base) 153 | # We'll try to use the base as our method name, but if it already exists, try appending 154 | # "_1", "_2", etc. 155 | name = base 156 | suffix = 0 157 | while name in members: 158 | suffix += 1 159 | name = '{}_{}'.format(base, suffix) 160 | members.add(name) 161 | # Create the member. 162 | offset = (index - super_nmethods) * idau.WORD_SIZE 163 | ret = idau.struct_add_ptr(sid, name, offset, type='void *') 164 | if ret != 0: 165 | _log(0, 'Could not create {}::vmethods.{}: {}', classinfo.classname, name, ret) 166 | return False 167 | return True 168 | 169 | def _populate_vtable_struct(sid, classinfo): 170 | """Populate the ::vtable struct.""" 171 | # For each ancestor from root down to us (inclusive), add our ::vmethods struct. 172 | for ci in classinfo.ancestors(inclusive=True): 173 | # Get the offset at which the ::vmethods for ci will be. 174 | offset = 0 175 | if ci.superclass: 176 | offset = ci.superclass.vtable_nmethods * idau.WORD_SIZE 177 | # The size is ci's vtable length minus the offset. 178 | vmethods_size = ci.vtable_nmethods * idau.WORD_SIZE - offset 179 | # If the vmethods_size is 0, skip this entry. Otherwise we get weird 180 | # "struct->til conversion failed" errors. 181 | if vmethods_size == 0: 182 | continue 183 | # Get the sid for ci's ::vmethods. 184 | vmethods_sid = idau.struct_open(ci.classname + '::vmethods') 185 | if vmethods_sid is None: 186 | _log(0, 'Could not find {}::vmethods', ci.classname) 187 | return False 188 | # Add this ::vmethods slice to the ::vtable struct. 189 | ret = idau.struct_add_struct(sid, ci.classname, offset, vmethods_sid) 190 | if ret != 0: 191 | _log(0, 'Could not add {}::vmethods to {}::vtable', ci.classname, classinfo.classname) 192 | return False 193 | return True 194 | 195 | def _create_vmethods_struct(classinfo): 196 | """Create the ::vmethods struct for a C++ class.""" 197 | sid = idau.struct_create(classinfo.classname + '::vmethods') 198 | if sid is None: 199 | _log(0, 'Could not create {}::vmethods', classinfo.classname) 200 | return False 201 | return _populate_vmethods_struct(sid, classinfo) 202 | 203 | def _create_vtable_struct(classinfo): 204 | """Create the ::vtable struct for a C++ class.""" 205 | sid = idau.struct_create(classinfo.classname + '::vtable') 206 | if sid is None: 207 | _log(0, 'Could not create {}::vtable', classinfo.classname) 208 | return False 209 | return _populate_vtable_struct(sid, classinfo) 210 | 211 | def initialize_vtable_structs(): 212 | """Create IDA structs representing the C++ virtual method tables in the kernel.""" 213 | classes.collect_class_info() 214 | for classinfo in classes.class_info.values(): 215 | _create_vmethods_struct(classinfo) 216 | for classinfo in classes.class_info.values(): 217 | _create_vtable_struct(classinfo) 218 | 219 | #### Classes based on struct slices ############################################################### 220 | 221 | def _create_class_structs__slices(classinfo, endmarkers=True): 222 | """Create the IDA structs for a C++ class.""" 223 | classname = classinfo.classname 224 | # Open or create the structs. 225 | sidf = idau.struct_open(classname + '::fields', create=True) 226 | sid = idau.struct_open(classname, create=True) 227 | if sid is None or sidf is None: 228 | _log(0, 'Could not create class structs for {}', classname) 229 | return None 230 | assert all(not idc.IsUnion(s) for s in (sidf, sid)) 231 | # Calculate the size of the ::fields struct. 232 | if classinfo.superclass: 233 | # If we have a superclass, our fields start after our superclass's fields end. 234 | fields_start = classinfo.superclass.class_size 235 | else: 236 | # If we don't have a superclass, our fields start after our vtable. 237 | fields_start = idau.WORD_SIZE 238 | fields_size = classinfo.class_size - fields_start 239 | # Add an ::end member to the fields struct if requested. 240 | if endmarkers: 241 | ret = idc.AddStrucMember(sidf, classname + '::end', fields_size, idc.FF_UNK, -1, 0) 242 | if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME, idc.STRUC_ERROR_MEMBER_OFFSET): 243 | # If that didn't work that's too bad, but continue anyway. 244 | _log(0, 'Could not create {}::end', classname) 245 | return sid, sidf, fields_start 246 | 247 | def _populate_fields_struct__slices(sid, classinfo, fields_start, accesses): 248 | """Fill in the members of the ::fields struct based on the accesses.""" 249 | # Sanity check. 250 | for offset, size in accesses: 251 | assert fields_start <= offset <= offset + size <= classinfo.class_size 252 | # For each (offset, size) access, add a member to the struct. 253 | build_struct.create_struct_fields(sid, accesses=accesses, base=fields_start) 254 | 255 | def _populate_wrapper_struct__slices(sid, classinfo): 256 | """Fill in the members of the wrapper struct.""" 257 | # First add the vtable pointer. 258 | offset = 0 259 | vtable_ptr_type = '{}::vtable *'.format(classinfo.classname) 260 | ret = idau.struct_add_ptr(sid, 'vtable', offset, type=vtable_ptr_type) 261 | if ret not in (0, idc.STRUC_ERROR_MEMBER_OFFSET): 262 | _log(0, 'Could not create {}.vtable: {}', classinfo.classname, ret) 263 | return False 264 | # Now add all the ::fields structs. 265 | offset += idau.WORD_SIZE 266 | for ci in classinfo.ancestors(inclusive=True): 267 | # Get the sid of the ::fields struct. 268 | fields_sid = idau.struct_open(ci.classname + '::fields') 269 | if fields_sid is None: 270 | _log(0, 'Could not find {}::fields', ci.classname) 271 | return False 272 | # If this is a 0-length struct (no fields), skip it. 273 | size = idc.GetStrucSize(fields_sid) 274 | if size == 0: 275 | continue 276 | # If this is already in the wrapper struct, skip it. This avoids weird 277 | # STRUC_ERROR_MEMBER_VARLAST errors. 278 | if idc.GetMemberOffset(sid, ci.classname) != -1: 279 | continue 280 | # Add the ::fields struct to the wrapper. 281 | ret = idau.struct_add_struct(sid, ci.classname, offset, fields_sid) 282 | if ret != 0: 283 | _log(0, 'Could not create {}.{}: {}', classinfo.classname, ci.classname, ret) 284 | return False 285 | offset += size 286 | return True 287 | 288 | def _populate_class_structs__slices(classinfo, class_accesses, sid, sidf, fields_start): 289 | """Populate the IDA structs for a C++ class.""" 290 | _populate_fields_struct__slices(sidf, classinfo, fields_start, 291 | class_accesses[classinfo.classname]) 292 | _populate_wrapper_struct__slices(sid, classinfo) 293 | 294 | #### Classes based on unions ###################################################################### 295 | 296 | def _create_class_structs__unions(classinfo): 297 | """Create the IDA structs for a C++ class.""" 298 | classname = classinfo.classname 299 | sidf = idau.struct_open(classname + '::fields', create=True) 300 | sid = idau.struct_open(classname, union=True, create=True) 301 | if sid is None or sidf is None: 302 | _log(0, 'Could not create class structs for {}', classname) 303 | return None 304 | return sid, sidf 305 | 306 | def _populate_fields_struct__unions(sid, classinfo, accesses): 307 | """Fill in the members of the ::fields struct based on the accesses.""" 308 | # Sanity check. 309 | for offset, size in accesses: 310 | assert 0 <= offset <= offset + size <= classinfo.class_size 311 | # For each (offset, size) access, add a member to the struct. 312 | build_struct.create_struct_fields(sid, accesses=accesses) 313 | 314 | def _populate_wrapper_struct__unions(sid, classinfo): 315 | """Fill in the members of the wrapper struct.""" 316 | # First add the vtable pointer. 317 | vtable_ptr_type = '{}::vtable *'.format(classinfo.classname) 318 | ret = idau.struct_add_ptr(sid, 'vtable', -1, type=vtable_ptr_type) 319 | if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME): 320 | _log(0, 'Could not create {}.vtable: {}', classinfo.classname, ret) 321 | return False 322 | # Now add all the ::fields structs. 323 | for ci in classinfo.ancestors(inclusive=True): 324 | # Get the sid of the ::fields struct. 325 | fields_sid = idau.struct_open(ci.classname + '::fields') 326 | if fields_sid is None: 327 | _log(0, 'Could not find {}::fields', ci.classname) 328 | return False 329 | # Add the ::fields struct to the wrapper. Ignore STRUC_ERROR_MEMBER_UNIVAR if the ::fields 330 | # struct has length 0. 331 | ret = idau.struct_add_struct(sid, ci.classname, -1, fields_sid) 332 | if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME, idc.STRUC_ERROR_MEMBER_UNIVAR): 333 | _log(0, 'Could not create {}.{}: {}', classinfo.classname, ci.classname, ret) 334 | return False 335 | return True 336 | 337 | def _populate_class_structs__unions(classinfo, class_accesses, sid, sidf): 338 | """Populate the IDA structs for a C++ class.""" 339 | _populate_fields_struct__unions(sidf, classinfo, class_accesses[classinfo.classname]) 340 | _populate_wrapper_struct__unions(sid, classinfo) 341 | 342 | #### Class generation ############################################################################# 343 | 344 | CLASS_SLICES = 'slices' 345 | CLASS_UNIONS = 'unions' 346 | 347 | DEFAULT_STYLE = CLASS_SLICES 348 | 349 | def initialize_class_structs(style=DEFAULT_STYLE): 350 | """Create IDA structs representing the C++ classes in the kernel. 351 | 352 | Depends on initialize_vtable_structs. 353 | """ 354 | # A generator that will yield (virtual_method, classname, X0). 355 | def virtual_methods(): 356 | for classinfo in classes.class_info.values(): 357 | for _, vmethod, _ in vtable.class_vtable_overrides(classinfo, new=True, methods=True): 358 | if not idau.is_function_start(vmethod): 359 | _log(3, 'Non-function virtual method {:#x} in class {}', vmethod, 360 | classinfo.classname) 361 | continue 362 | yield vmethod, classinfo.classname, idautils.procregs.X0.reg 363 | # Do the standard processing. 364 | process_functions(virtual_methods(), style=style) 365 | 366 | def _collect_all_class_accesses(functions): 367 | """Collect all accesses to each class by examining the functions. 368 | 369 | Arm64 only. 370 | """ 371 | all_accesses = collections.defaultdict(lambda: collections.defaultdict(set)) 372 | for function, classname, register in functions: 373 | data_flow.pointer_accesses(function=function, initialization={ function: { register: 0 } }, 374 | accesses=all_accesses[classname]) 375 | return all_accesses 376 | 377 | def _classify_class_accesses(all_accesses, style): 378 | """Categorize each access by specific class and build a list of operands to convert. 379 | 380 | Arm64 only. 381 | """ 382 | all_classes = set() 383 | class_accesses = collections.defaultdict(collections.Counter) 384 | class_operands = collections.defaultdict(set) 385 | # Helper for logging. 386 | def log_addrs(addresses_and_deltas): 387 | return ', '.join('{:#x}'.format(ea) for ea, dt in addresses_and_deltas) 388 | # For each class, look at the accesses associated with that class. 389 | for classname, accesses in all_accesses.items(): 390 | classinfo = classes.class_info.get(classname) 391 | if not classinfo: 392 | _log(-1, 'Skipping non-existent class {}', classname) 393 | continue 394 | # Put each (offset, size) pair in the appropriate dictionary. We'll traverse our ancestors 395 | # from root to leaf, which means the first time this offset/size combination fits in a 396 | # class, that's the class it goes with. 397 | ancestors = list(classinfo.ancestors(inclusive=True)) 398 | all_classes.update(ancestors) 399 | for offset_and_size, addresses_and_deltas in accesses.items(): 400 | offset, size = offset_and_size 401 | # Accesses to offsets 0-8 are actually not considered part of the ::fields struct since 402 | # they technically access the vtable. Skip it. 403 | if offset + size <= idau.WORD_SIZE: 404 | continue 405 | for ci in ancestors: 406 | if offset + size <= ci.class_size: 407 | # This is the smallest class that contains all the bytes of the access. If the 408 | # start of the access is in a smaller class, then this access spans a class 409 | # boundary. There are two possible causes: either there's a bug in the 410 | # analyzer, or the superclass's size was rounded up in the initialization 411 | # function, meaning this is actually a completely valid access in the current 412 | # class. Unfortunately there's no good way to detect this. The CLASS_UNIONS 413 | # model can deal with this OK, but the CLASS_SLICES model has problems. Skip 414 | # this access if we're not in the CLASS_UNIONS model. 415 | superclass_size = idau.WORD_SIZE 416 | if ci.superclass: 417 | superclass_size = ci.superclass.class_size 418 | if offset < superclass_size: 419 | _log(-1, 'Class {} has spanning access ({}, {}) from addresses {}', 420 | classname, offset, size, log_addrs(addresses_and_deltas)) 421 | if style != CLASS_UNIONS: 422 | break 423 | # If the access is unaligned with respect to the size, it's more likely to be 424 | # incorrect. Log it, but continue. 425 | if offset % size != 0: 426 | _log(2, 'Class {} has unaligned access ({}, {}) from addresses {}', 427 | classname, offset, size, log_addrs(addresses_and_deltas)) 428 | # Looks good, add it to the collection. 429 | class_accesses[ci.classname][offset_and_size] += len(addresses_and_deltas) 430 | class_operands[classname].update(addresses_and_deltas) 431 | break 432 | else: 433 | # Almost certainly this is caused when the same register is used for two different 434 | # classes, but the path that gets this class to this access is impossible to satisfy. 435 | _log(-1, 'Class {} has out-of-bounds access ({}, {}) from addresses {}', 436 | classname, offset, size, log_addrs(addresses_and_deltas)) 437 | return all_classes, class_accesses, class_operands 438 | 439 | def _convert_operands_to_struct_offsets(access_addresses): 440 | """Convert the operands that generated struct accesses into struct offsets.""" 441 | for classname, addresses_and_deltas in access_addresses.items(): 442 | sid = idau.struct_open(classname) 443 | if sid is not None: 444 | for ea, delta in addresses_and_deltas: 445 | insn = idautils.DecodeInstruction(ea) 446 | if insn: 447 | for op in insn.Operands: 448 | if op.type == idaapi.o_displ: 449 | if not idau.insn_op_stroff(insn, op.n, sid, delta): 450 | _log(1, 'Could not convert {:#x} to struct offset for class {} ' 451 | 'delta {}', ea, classname, delta) 452 | 453 | def _set_class_style(style): 454 | """Set the global class style.""" 455 | global _style_was_set, _create_class_structs, _populate_class_structs 456 | assert style in (CLASS_SLICES, CLASS_UNIONS) 457 | # Check the current style based on OSObject, a class that should always exist. 458 | sid = idau.struct_open('OSObject') 459 | want_union = style == CLASS_UNIONS 460 | if sid is None: 461 | # No global style has been set. 462 | idau.struct_create('OSObject', union=want_union) 463 | else: 464 | # A style already exists. Check that the requested style matches. 465 | is_union = bool(idc.IsUnion(sid)) 466 | if is_union != want_union: 467 | raise ValueError('Incompatible style {}', style) 468 | # Set the appropriate functions based on the style. 469 | if style == CLASS_SLICES: 470 | _create_class_structs = _create_class_structs__slices 471 | _populate_class_structs = _populate_class_structs__slices 472 | else: 473 | _create_class_structs = _create_class_structs__unions 474 | _populate_class_structs = _populate_class_structs__unions 475 | 476 | def process_functions(functions, style=DEFAULT_STYLE): 477 | """Process additional functions. 478 | 479 | Arguments: 480 | functions: An iterator returning (function, classname, register) tuples. 481 | 482 | Depends on initialize_class_structs. 483 | """ 484 | classes.collect_class_info() 485 | _set_class_style(style) 486 | # First, for each class, collect all the (offset, size) pairs and their associated (address, 487 | # delta) pairs. 488 | all_accesses = _collect_all_class_accesses(functions) 489 | # Now, classify the accesses. class_accesses is a map from each class name to a counter of how 490 | # many times we've seen each (offset, size) access pair that falls within the class's own 491 | # fields. class_operands is a map from each class name to the set of (address, delta) pairs 492 | # that access that class. 493 | all_classes, class_accesses, class_operands = _classify_class_accesses(all_accesses, style) 494 | # Next, for each class, create dummy versions of the class's structs, but don't populate them. 495 | # We do this first so that we'll have all the types we need available when populating the 496 | # structs below. 497 | class_structs = {} 498 | for classinfo in all_classes: 499 | data = _create_class_structs(classinfo) 500 | if data is not None: 501 | class_structs[classinfo] = data 502 | # Populate the class's structs using the access tuples. 503 | for classinfo, data in class_structs.items(): 504 | _populate_class_structs(classinfo, class_accesses, *data) 505 | # Finally, convert each operand that generated an access into an appropriately typed struct 506 | # offset reference. 507 | _convert_operands_to_struct_offsets(class_operands) 508 | 509 | #### Vtable type propagation ###################################################################### 510 | 511 | def _propagate_virtual_method_type_for_method(classinfo, class_vindex, vmethod): 512 | """Propagate the type of a class's virtual method to the vtable struct.""" 513 | if not idau.is_function_start(vmethod): 514 | _log(2, 'Not a function start: {:x}', vmethod) 515 | return False 516 | vmethod_type = idc.GuessType(vmethod) 517 | if not vmethod_type: 518 | _log(2, 'No guessed type: {:x}', vmethod) 519 | return False 520 | vmethod_ptr_type = symbol.convert_function_type_to_function_pointer_type(vmethod_type) 521 | if not vmethod_ptr_type: 522 | _log(2, 'Could not convert to function pointer type: {:x}', vmethod) 523 | return False 524 | vmethods_sid = idau.struct_open(classinfo.classname + '::vmethods') 525 | vmethod_offset = class_vindex * idau.WORD_SIZE 526 | vmethod_mid = idc.GetMemberId(vmethods_sid, vmethod_offset) 527 | if not bool(idc.SetType(vmethod_mid, vmethod_ptr_type)): 528 | _log(2, 'Could not set vmethod field type: {:x}, {}, {}', vmethod, classinfo.classname, 529 | class_vindex) 530 | return False 531 | return True 532 | 533 | def _propagate_virtual_method_types_for_class(classinfo): 534 | """Propagate the types of a class's virtual methods to the vtable struct.""" 535 | for relative_index, vmethod in enumerate(vtable.class_vtable_methods(classinfo, new=True)): 536 | _propagate_virtual_method_type_for_method(classinfo, relative_index, vmethod) 537 | 538 | def propagate_virtual_method_types_to_vtable_structs(): 539 | """Propagate the types of virtual methods to the corresponding entries in the vtables. 540 | 541 | This helps speed decompilation using Hex-Rays, but is not particularly accurate. 542 | 543 | By default, IDA will guess a type with an empty argument list for any function whose symbol 544 | includes an unknown struct type, which inhibits proper type inference. 545 | """ 546 | for classinfo in classes.class_info.values(): 547 | _propagate_virtual_method_types_for_class(classinfo) 548 | 549 | --------------------------------------------------------------------------------