├── ida_kernelcache.py
├── ida_kernelcache_reload.py
├── ida_kernelcache
    ├── internal.py
    ├── kernel.py
    ├── metaclass.py
    ├── tagged_pointers.py
    ├── __init__.py
    ├── build_struct.py
    ├── offset.py
    ├── classes.py
    ├── symbol.py
    ├── stub.py
    ├── segment.py
    ├── kplist.py
    ├── data_flow.py
    ├── collect_classes.py
    ├── vtable.py
    ├── ida_utilities.py
    └── class_struct.py
├── LICENSE
├── scripts
    ├── find_virtual_method_overrides.py
    ├── populate_struct.py
    └── process_external_methods.py
└── README.md


/ida_kernelcache.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache.py
 3 | # Brandon Azad
 4 | #
 5 | # A script to import the ida_kernelcache module into IDA.
 6 | #
 7 | 
 8 | import ida_kernelcache
 9 | import ida_kernelcache as kc
10 | 


--------------------------------------------------------------------------------
/ida_kernelcache_reload.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache.py
 3 | # Brandon Azad
 4 | #
 5 | # A script to import the ida_kernelcache module into IDA, reloading all the necessary internal
 6 | # modules.
 7 | #
 8 | 
 9 | import sys
10 | for mod in sys.modules.keys():
11 |     if 'ida_kernelcache' in mod:
12 |         del sys.modules[mod]
13 | 
14 | import ida_kernelcache
15 | import ida_kernelcache as kc
16 | 


--------------------------------------------------------------------------------
/ida_kernelcache/internal.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/internal.py
 3 | # Brandon Azad
 4 | #
 5 | # Miscellaneous internal routines.
 6 | #
 7 | 
 8 | from collections import defaultdict
 9 | 
10 | import idc
11 | 
12 | import ida_utilities as idau
13 | 
14 | def make_name_generator(suffix, max_count=999999):
15 |     """Create a unique name generator using the specified template factory."""
16 |     next_index_dict = defaultdict(lambda: 1)
17 |     def get_next(name):
18 |         assert name, 'Invalid symbol name passed to name generator'
19 |         assert suffix not in name, 'Symbol name passed to name generator already contains suffix'
20 |         template = name + suffix
21 |         for index in xrange(next_index_dict[name], max_count):
22 |             new_name = template + str(index)
23 |             if idau.get_name_ea(new_name) == idc.BADADDR:
24 |                 next_index_dict[name] = index
25 |                 return new_name
26 |         new_index_dict[name] = max_count
27 |         return None
28 |     return get_next
29 | 
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018 Brandon Azad
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/ida_kernelcache/kernel.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/kernel.py
 3 | # Brandon Azad
 4 | #
 5 | # The kernel module holds functions and global variables pertaining to the kernel as a whole. No
 6 | # prior initialization via ida_kernelcache is necessary.
 7 | #
 8 | 
 9 | import idc
10 | import idautils
11 | import idaapi
12 | 
13 | import ida_utilities as idau
14 | import kplist
15 | 
16 | _log = idau.make_log(0, __name__)
17 | 
18 | def find_kernel_base():
19 |     """Find the kernel base."""
20 |     return idaapi.get_fileregion_ea(0)
21 | 
22 | base = find_kernel_base()
23 | """The kernel base address (the address of the main kernel Mach-O header)."""
24 | 
25 | def _find_prelink_info_segments():
26 |     """Find all candidate __PRELINK_INFO segments (or sections).
27 | 
28 |     We try to identify any IDA segments with __PRELINK_INFO in the name so that this function will
29 |     work both before and after automatic rename. A more reliable method would be parsing the
30 |     Mach-O.
31 |     """
32 |     segments = []
33 |     # Gather a list of all the possible segments.
34 |     for seg in idautils.Segments():
35 |         name = idc.SegName(seg)
36 |         if '__PRELINK_INFO' in name or name == '__info':
37 |             segments.append(seg)
38 |     if len(segments) < 1:
39 |         _log(0, 'Could not find any __PRELINK_INFO segment candidates')
40 |     elif len(segments) > 1:
41 |         _log(1, 'Multiple segment names contain __PRELINK_INFO: {}',
42 |                 [idc.SegName(seg) for seg in segments])
43 |     return segments
44 | 
45 | def parse_prelink_info():
46 |     """Find and parse the kernel __PRELINK_INFO dictionary."""
47 |     segments = _find_prelink_info_segments()
48 |     for segment in segments:
49 |         prelink_info_string = idc.GetString(segment)
50 |         prelink_info = kplist.kplist_parse(prelink_info_string)
51 |         if prelink_info:
52 |             return prelink_info
53 |     _log(0, 'Could not find __PRELINK_INFO')
54 |     return None
55 | 
56 | prelink_info = parse_prelink_info()
57 | """The kernel __PRELINK_INFO dictionary."""
58 | 
59 | KC_11_NORMAL = '11-normal'
60 | KC_12_MERGED = '12-merged'
61 | 
62 | def _get_kernelcache_format():
63 |     if '_PrelinkLinkKASLROffsets' in prelink_info:
64 |         return KC_11_NORMAL
65 |     return KC_12_MERGED
66 | 
67 | kernelcache_format = _get_kernelcache_format()
68 | 
69 | 


--------------------------------------------------------------------------------
/scripts/find_virtual_method_overrides.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # scripts/find_virtual_method_overrides.py
 3 | # Brandon Azad
 4 | #
 5 | # Use ida_kernelcache to find classes that override a virtual method.
 6 | #
 7 | 
 8 | def kernelcache_find_virtual_method_overrides(classname=None, method=None):
 9 |     import idc
10 |     import idaapi
11 |     import ida_kernelcache as kc
12 | 
13 |     # Define the form to ask for the arguments.
14 |     class MyForm(idaapi.Form):
15 |         def __init__(self):
16 |             swidth = 40
17 |             idaapi.Form.__init__(self, r"""STARTITEM 0
18 | Find virtual method overrides
19 | 
20 | <#The class#Class :{classname}>
21 | <#The virtual method#Method:{method}>""", {
22 |                 'classname': idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth),
23 |                 'method':    idaapi.Form.StringInput(tp=idaapi.Form.FT_IDENT, swidth=swidth),
24 |             })
25 |         def OnFormChange(self, fid):
26 |             return 1
27 | 
28 |     kc.collect_class_info()
29 | 
30 |     if any(arg is None for arg in (classname, method)):
31 |         f = MyForm()
32 |         f.Compile()
33 |         f.classname.value = classname or ''
34 |         f.method.value    = method    or ''
35 |         ok = f.Execute()
36 |         if ok != 1:
37 |             print 'Cancelled'
38 |             return False
39 |         classname = f.classname.value
40 |         method    = f.method.value
41 |         f.Free()
42 | 
43 |     if classname not in kc.class_info:
44 |         print 'Not a valid class: {}'.format(classname)
45 |         return False
46 | 
47 |     print 'Subclasses of {} that override {}:'.format(classname, method)
48 |     baseinfo = kc.class_info[classname]
49 |     found = False
50 |     for classinfo in baseinfo.descendants():
51 |         for _, override, _ in kc.vtable.class_vtable_overrides(classinfo, superinfo=baseinfo,
52 |                 methods=True):
53 |             name = idc.NameEx(idc.BADADDR, override)
54 |             demangled = idc.Demangle(name, idc.GetLongPrm(idc.INF_SHORT_DN))
55 |             name = demangled if demangled else name
56 |             if method in name:
57 |                 print '{:#x}  {}'.format(override, classinfo.classname)
58 |                 found = True
59 |     if not found:
60 |         print 'No subclass of {} overrides {}'.format(classname, method)
61 |     return found
62 | 
63 | kernelcache_find_virtual_method_overrides()
64 | 
65 | 


--------------------------------------------------------------------------------
/ida_kernelcache/metaclass.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/metaclass.py
 3 | # Brandon Azad
 4 | #
 5 | # A module for working with OSMetaClass instances in the kernelcache.
 6 | #
 7 | 
 8 | import idc
 9 | 
10 | import ida_utilities as idau
11 | import classes
12 | import symbol
13 | 
14 | _log = idau.make_log(0, __name__)
15 | 
16 | def metaclass_name_for_class(classname):
17 |     """Return the name of the C++ metaclass for the given class."""
18 |     if '::' in classname:
19 |         return None
20 |     return classname + '::MetaClass'
21 | 
22 | def metaclass_instance_name_for_class(classname):
23 |     """Return the name of the C++ metaclass instance for the given class."""
24 |     if '::' in classname:
25 |         return None
26 |     return classname + '::gMetaClass'
27 | 
28 | def metaclass_symbol_for_class(classname):
29 |     """Get the symbol name for the OSMetaClass instance for the given class name.
30 | 
31 |     Arguments:
32 |         classname: The name of the C++ class.
33 | 
34 |     Returns:
35 |         The symbol name, or None if the classname is invalid.
36 |     """
37 |     metaclass_instance = metaclass_instance_name_for_class(classname)
38 |     if not metaclass_instance:
39 |         return None
40 |     return symbol.global_name(metaclass_instance)
41 | 
42 | def add_metaclass_symbol(metaclass, classname):
43 |     """Add a symbol for the OSMetaClass instance at the specified address.
44 | 
45 |     Arguments:
46 |         metaclass: The address of the OSMetaClass instance.
47 |         classname: The name of the C++ class with this OSMetaClass instance.
48 | 
49 |     Returns:
50 |         True if the OSMetaClass instance's symbol was created successfully.
51 |     """
52 |     metaclass_symbol = metaclass_symbol_for_class(classname)
53 |     if not idau.set_ea_name(metaclass, metaclass_symbol):
54 |         _log(0, 'Address {:#x} already has name {} instead of OSMetaClass instance symbol {}'
55 |                 .format(metaclass, idau.get_ea_name(metaclass), metaclass_symbol))
56 |         return False
57 |     return True
58 | 
59 | def initialize_metaclass_symbols():
60 |     """Populate IDA with OSMetaClass instance symbols for an iOS kernelcache.
61 | 
62 |     Search through the kernelcache for OSMetaClass instances and add a symbol for each known
63 |     instance.
64 |     """
65 |     classes.collect_class_info()
66 |     for classname, classinfo in classes.class_info.items():
67 |         if classinfo.metaclass:
68 |             _log(1, 'Class {} has OSMetaClass instance at {:#x}', classname, classinfo.metaclass)
69 |             if not add_metaclass_symbol(classinfo.metaclass, classname):
70 |                 _log(0, 'Could not add metaclass symbol for class {} at address {:#x}', classname,
71 |                         classinfo.metaclass)
72 |         else:
73 |             _log(1, 'Class {} has no known OSMetaClass instance', classname)
74 | 
75 | 


--------------------------------------------------------------------------------
/ida_kernelcache/tagged_pointers.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/tagged_pointers.py
 3 | # Brandon Azad
 4 | #
 5 | """ida_kernelcache.tagged_pointers
 6 | 
 7 | This module is responsible for processing the tagged pointers in the new iOS 12 kernelcache and
 8 | replacing them with their untagged equivalents. All found pointers are also converted into offsets.
 9 | 
10 | In an alternative implementation, we could just add cross-references in IDA. However, I think this
11 | approach is better because it is closer to what the kernelcache looks like at runtime.
12 | """
13 | 
14 | import idc
15 | import idautils
16 | 
17 | import ida_utilities as idau
18 | import kernel
19 | 
20 | _log = idau.make_log(1, __name__)
21 | 
22 | def tagged_pointer_tag(tp):
23 |     return (tp >> 48) & 0xffff
24 | 
25 | def tagged_pointer_untag(tp):
26 |     return tp | 0xffff000000000000
27 | 
28 | def is_tagged_pointer_format(value):
29 |     return tagged_pointer_tag(value) != 0xffff and \
30 |             (value & 0x0000ffff00000000) == 0x0000fff000000000
31 | 
32 | def is_tagged_pointer(value):
33 |     return is_tagged_pointer_format(value) and \
34 |             idau.is_mapped(tagged_pointer_untag(value), value=False)
35 | 
36 | def tagged_pointer_link(tag):
37 |     return (tag >> 1) & ~0x3
38 | 
39 | def tagged_pointer_next(ea, tp, end=None):
40 |     assert ea
41 |     # First try to get the offset to the next link.
42 |     if tp:
43 |         link_offset = tagged_pointer_link(tagged_pointer_tag(tp))
44 |         if link_offset:
45 |             return ea + link_offset
46 |         # Skip the current tagged pointer in preparation for scanning.
47 |         ea += idau.WORD_SIZE
48 |     # We don't have a link. Do a forward scan until we find the next tagged pointer.
49 |     _log(3, 'Scanning for next tagged pointer')
50 |     if end is None:
51 |         end = idc.SegEnd(ea)
52 |     for value, value_ea in idau.ReadWords(ea, end, step=4, addresses=True):
53 |         if is_tagged_pointer(value):
54 |             return value_ea
55 |     # If we didn't find any tagged pointers at all, return None.
56 |     return None
57 | 
58 | def untag_pointer(ea, tp):
59 |     _log(4, 'Untagging pointer at {:x}', ea)
60 |     idau.patch_word(ea, tagged_pointer_untag(tp))
61 |     idc.OpOff(ea, 0, 0)
62 | 
63 | def untag_pointers_in_range(start, end):
64 |     assert kernel.kernelcache_format == kernel.KC_12_MERGED, 'Wrong kernelcache format'
65 |     ea, tp = start, None
66 |     while True:
67 |         ea = tagged_pointer_next(ea, tp, end)
68 |         if ea is None or ea >= end:
69 |             break
70 |         tp = idau.read_word(ea)
71 |         if not is_tagged_pointer(tp):
72 |             _log(1, 'Tagged pointer traversal failed: ea={:x}, tp={:x}'.format(ea, tp))
73 |             break
74 |         untag_pointer(ea, tp)
75 | 
76 | def untag_pointers():
77 |     _log(2, 'Starting tagged pointer conversion')
78 |     for seg in idautils.Segments():
79 |         untag_pointers_in_range(idc.SegStart(seg), idc.SegEnd(seg))
80 |     _log(2, 'Tagged pointer conversion complete')
81 | 
82 | 


--------------------------------------------------------------------------------
/ida_kernelcache/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/__init__.py
 3 | # Brandon Azad
 4 | #
 5 | # The main ida_kernelcache module containing my iOS kernelcache utilities for IDA.
 6 | #
 7 | 
 8 | # This isn't kernelcache-specific, but it's useful to have access to in the interpreter and other
 9 | # scripts.
10 | import ida_utilities
11 | 
12 | import build_struct
13 | import class_struct
14 | import classes
15 | import kernel
16 | import kplist
17 | import metaclass
18 | import offset
19 | import segment
20 | import stub
21 | import tagged_pointers
22 | import vtable
23 | 
24 | from classes import (ClassInfo, collect_class_info, class_info)
25 | from kplist  import (kplist_parse)
26 | from segment import (kernelcache_kext)
27 | 
28 | def kernelcache_process(untag_pointers=True):
29 |     """Process the kernelcache in IDA for the first time.
30 | 
31 |     This function performs all the standard processing available in this module:
32 |         * Convert iOS 12's new static tagged pointers into normal kernel pointers.
33 |         * Parse the kernel's `__PRELINK_INFO.__info` section into a dictionary.
34 |         * Renames segments in IDA according to the names from the __PRELINK_INFO dictionary (split
35 |           kext format kernelcaches only).
36 |         * Converts pointers in data segments into offsets.
37 |         * Locates virtual method tables, converts them to offsets, and adds vtable symbols.
38 |         * Locates OSMetaClass instances for top-level classes and adds OSMetaClass symbols.
39 |         * Symbolicates offsets in `__got` sections and stub functions in `__stubs` sections.
40 |         * Symbolicates methods in vtables based on the method names in superclasses.
41 |         * Creates IDA structs representing the C++ classes in the kernel.
42 |     """
43 |     import idaapi
44 |     import idc
45 |     def autoanalyze():
46 |         idc.Wait()
47 |     autoanalyze()
48 |     if (kernel.kernelcache_format == kernel.KC_12_MERGED
49 |             and untag_pointers
50 |             and idaapi.IDA_SDK_VERSION < 720):
51 |         print 'Processing tagged kernelcache pointers'
52 |         tagged_pointers.untag_pointers()
53 |         autoanalyze()
54 |     segment.initialize_segments()
55 |     print 'Initializing data offsets'
56 |     offset.initialize_data_offsets()
57 |     autoanalyze()
58 |     print 'Initializing vtables'
59 |     vtable.initialize_vtables()
60 |     autoanalyze()
61 |     vtable.initialize_vtable_symbols()
62 |     autoanalyze()
63 |     metaclass.initialize_metaclass_symbols()
64 |     if kernel.kernelcache_format == kernel.KC_11_NORMAL:
65 |         print 'Creating offset and stub symbols'
66 |         offset.initialize_offset_symbols()
67 |         autoanalyze()
68 |         stub.initialize_stub_symbols()
69 |         autoanalyze()
70 |     print 'Propagating vtable method symbols'
71 |     vtable.initialize_vtable_method_symbols()
72 |     print 'Initializing class structs'
73 |     class_struct.initialize_vtable_structs()
74 |     class_struct.initialize_class_structs()
75 |     autoanalyze()
76 |     print 'Done'
77 | 
78 | 


--------------------------------------------------------------------------------
/ida_kernelcache/build_struct.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # ida_kernelcache/build_struct.py
 3 | # Brandon Azad
 4 | #
 5 | # A module to build an IDA structure automatically from code accesses.
 6 | #
 7 | 
 8 | import collections
 9 | 
10 | import idc
11 | import idautils
12 | import idaapi
13 | 
14 | import ida_utilities as idau
15 | 
16 | _log = idau.make_log(3, __name__)
17 | 
18 | def field_name(offset):
19 |     """Automatically generated IDA structs have their fields named by their absolute offset."""
20 |     return 'field_{:x}'.format(offset)
21 | 
22 | def create_struct_fields(sid=None, name=None, accesses=None, create=False, base=0):
23 |     """Create an IDA struct with fields corresponding to the specified access pattern.
24 | 
25 |     Given a sequence of (offset, size) tuples designating the valid access points to the struct,
26 |     create fields in the struct at the corresponding positions.
27 | 
28 |     Options:
29 |         sid: The struct id, if the struct already exists.
30 |         name: The name of the struct to update or create.
31 |         accesses: The set of (offset, size) tuples representing the valid access points in the
32 |             struct.
33 |         create: If True, then the struct will be created with the specified name if it does not
34 |             already exist. Default is False.
35 |         base: The base offset for the struct. Offsets smaller than this are ignored, otherwise the
36 |             field is created at the offset minus the base. Default is 0.
37 | 
38 |     Either sid or name must be specified.
39 |     """
40 |     # Get the struct id.
41 |     if sid is None:
42 |         sid = idau.struct_open(name, create=True)
43 |         if sid is None:
44 |             _log(0, 'Could not open struct {}', name)
45 |             return False
46 |     else:
47 |         name = idc.GetStrucName(sid)
48 |         if name is None:
49 |             _log(0, 'Invalid struct id {}', sid)
50 |             return False
51 |     # Now, for each (offset, size) pair, create a struct member. Right now we completely ignore the
52 |     # possibility that some members will overlap (for various reasons; it's actually more common
53 |     # than I initially thought, though I haven't investigated why).
54 |     # TODO: In the future we should address this by either automatically generating sub-unions or
55 |     # choosing the most appropriate member when permissible (e.g. (0, 8), (0, 2), (4, 4) might
56 |     # create (0, 2), (2, 2), (4, 4)). I think the most reasonable default policy is to create the
57 |     # biggest members that satisfy all accesses.
58 |     success = True
59 |     for offset, size in accesses:
60 |         if offset < base:
61 |             continue
62 |         member = field_name(offset)
63 |         ret = idau.struct_add_word(sid, member, offset - base, size)
64 |         if ret != 0:
65 |             if ret == idc.STRUC_ERROR_MEMBER_OFFSET:
66 |                 _log(2, 'Could not add {}.{} for access ({}, {})', name, member, offset, size)
67 |             else:
68 |                 success = False
69 |                 _log(1, 'Could not add {}.{} for access ({}, {}): {}', name, member, offset, size,
70 |                         ret)
71 |     return success
72 | 
73 | 


--------------------------------------------------------------------------------
/ida_kernelcache/offset.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/offset.py
  3 | # Brandon Azad
  4 | #
  5 | # Functions for converting and symbolicating offsets.
  6 | #
  7 | 
  8 | import re
  9 | 
 10 | import idc
 11 | import idautils
 12 | 
 13 | import ida_utilities as idau
 14 | import internal
 15 | import kernel
 16 | import stub
 17 | 
 18 | _log = idau.make_log(1, __name__)
 19 | 
 20 | def initialize_data_offsets():
 21 |     """Convert offsets in data segments into offsets in IDA.
 22 | 
 23 |     Segment names must be initialized with segments.initialize_segments() first.
 24 |     """
 25 |     # Normally, for user-space programs, this operation would be dangerous because there's a good
 26 |     # chance that a valid userspace address would happen to show up in regular program data that is
 27 |     # not actually an address. However, since kernel addresses are numerically much larger, the
 28 |     # chance of this happening is much less.
 29 |     for seg in idautils.Segments():
 30 |         name = idc.SegName(seg)
 31 |         if not (name.endswith('__DATA_CONST.__const') or name.endswith('__got')
 32 |                 or name.endswith('__DATA.__data')):
 33 |             continue
 34 |         for word, ea in idau.ReadWords(seg, idc.SegEnd(seg), addresses=True):
 35 |             if idau.is_mapped(word, value=False):
 36 |                 idc.OpOff(ea, 0, 0)
 37 | 
 38 | kernelcache_offset_suffix = '___offset_'
 39 | """The suffix that gets appended to a symbol to create the offset name, without the offset ID."""
 40 | 
 41 | _offset_regex = re.compile(r"^(\S+)" + kernelcache_offset_suffix + r"\d+$")
 42 | """A regular expression to match and extract the target name from an offset symbol."""
 43 | 
 44 | def offset_name_target(offset_name):
 45 |     """Get the target to which an offset name refers.
 46 | 
 47 |     No checks are performed to ensure that the target actually exists.
 48 |     """
 49 |     match = _offset_regex.match(offset_name)
 50 |     if not match:
 51 |         return None
 52 |     return match.group(1)
 53 | 
 54 | def _process_offset(offset, ea, next_offset):
 55 |     """Process an offset in a __got section."""
 56 |     # Convert the address containing the offset into an offset in IDA, but continue if it fails.
 57 |     if not idc.OpOff(ea, 0, 0):
 58 |         _log(1, 'Could not convert {:#x} into an offset', ea)
 59 |     # Get the name to which the offset refers.
 60 |     name = idau.get_ea_name(offset, user=True)
 61 |     if not name:
 62 |         _log(3, 'Offset at address {:#x} has target {:#x} without a name', ea, offset)
 63 |         return False
 64 |     # Make sure this isn't an offset to another stub or to a jump function to another stub. See the
 65 |     # comment in _symbolicate_stub.
 66 |     if stub.symbol_references_stub(name):
 67 |         _log(1, 'Offset at address {:#x} has target {:#x} (name {}) that references a stub', ea,
 68 |                 offset, name)
 69 |         return False
 70 |     # Set the new name for the offset.
 71 |     symbol = next_offset(name)
 72 |     if symbol is None:
 73 |         _log(0, 'Could not generate offset symbol for {}: names exhausted', name)
 74 |         return False
 75 |     if not idau.set_ea_name(ea, symbol, auto=True):
 76 |         _log(2, 'Could not set name {} for offset at {:#x}', symbol, ea)
 77 |         return False
 78 |     return True
 79 | 
 80 | def _process_offsets_section(segstart, next_offset):
 81 |     """Process all the offsets in a __got section."""
 82 |     for offset, ea in idau.ReadWords(segstart, idc.SegEnd(segstart), addresses=True):
 83 |         if not offset_name_target(idau.get_ea_name(ea)):
 84 |             # This is not a previously named offset.
 85 |             if idau.is_mapped(offset, value=False):
 86 |                 _process_offset(offset, ea, next_offset)
 87 |             else:
 88 |                 _log(-1, 'Offset {:#x} at address {:#x} is unmapped', offset, ea)
 89 | 
 90 | def initialize_offset_symbols():
 91 |     """Populate IDA with information about the offsets in an iOS kernelcache.
 92 | 
 93 |     Search through the kernelcache for global offset tables (__got sections), convert each offset
 94 |     into an offset type in IDA, and rename each offset according to its target.
 95 | 
 96 |     This function does nothing in the newer 12-merged format kernelcache.
 97 |     """
 98 |     next_offset = internal.make_name_generator(kernelcache_offset_suffix)
 99 |     for ea in idautils.Segments():
100 |         segname = idc.SegName(ea)
101 |         if not segname.endswith('__got'):
102 |             continue
103 |         _log(2, 'Processing segment {}', segname)
104 |         _process_offsets_section(ea, next_offset)
105 | 
106 | 


--------------------------------------------------------------------------------
/scripts/populate_struct.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # scripts/populate_struct.py
  3 | # Brandon Azad
  4 | #
  5 | # Populate a class or struct using data flow analysis.
  6 | #
  7 | 
  8 | def kernelcache_populate_struct(struct=None, address=None, register=None, delta=None):
  9 |     import idc
 10 |     import idautils
 11 |     import idaapi
 12 |     import ida_kernelcache as kc
 13 |     import ida_kernelcache.ida_utilities as idau
 14 | 
 15 |     # Define the form to ask for the arguments.
 16 |     class MyForm(idaapi.Form):
 17 |         def __init__(self):
 18 |             swidth = 40
 19 |             idaapi.Form.__init__(self, r"""STARTITEM 0
 20 | Automatically populate struct fields
 21 | 
 22 | <#The name of the structure#Structure:{structure}>
 23 | <#The address of the instruction at which the register points to the structure#Address  :{address}>
 24 | <#The register containing the pointer to the structure#Register :{register}>
 25 | <#The offset of the pointer from the start of the structure#Delta    :{delta}>""", {
 26 |                 'structure': idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth),
 27 |                 'address':   idaapi.Form.NumericInput(tp=idaapi.Form.FT_ADDR,  swidth=swidth, width=1000),
 28 |                 'register':  idaapi.Form.StringInput( tp=idaapi.Form.FT_IDENT, swidth=swidth),
 29 |                 'delta':     idaapi.Form.NumericInput(tp=idaapi.Form.FT_INT64, swidth=swidth),
 30 |             })
 31 |         def OnFormChange(self, fid):
 32 |             return 1
 33 | 
 34 |     # If any argument is unspecified, get it using the form.
 35 |     if any(arg is None for arg in (struct, address, register, delta)):
 36 |         f = MyForm()
 37 |         f.Compile()
 38 |         f.structure.value = struct or ''
 39 |         f.address.value   = address or idc.ScreenEA()
 40 |         f.register.value  = register or 'X0'
 41 |         f.delta.value     = delta or 0
 42 |         ok = f.Execute()
 43 |         if ok != 1:
 44 |             print 'Cancelled'
 45 |             return False
 46 |         struct   = f.structure.value
 47 |         address  = f.address.value
 48 |         register = f.register.value
 49 |         delta    = f.delta.value
 50 |         f.Free()
 51 | 
 52 |     # Check whether this struct is a class.
 53 |     kc.collect_class_info()
 54 |     is_class = struct in kc.class_info
 55 | 
 56 |     # Open the structure.
 57 |     sid = idau.struct_open(struct, create=True)
 58 |     if sid is None:
 59 |         print 'Could not open struct {}'.format(struct)
 60 |         return False
 61 | 
 62 |     # Check that the address is in a function.
 63 |     if not idaapi.get_func(address):
 64 |         print 'Address {:#x} is not a function'.format(address)
 65 |         return False
 66 | 
 67 |     # Get the register id.
 68 |     register_id = None
 69 |     if type(register) is str:
 70 |         register_id = idaapi.str2reg(register)
 71 |     elif type(register) is int:
 72 |         register_id = register
 73 |         register    = idaapi.get_reg_name(register_id, 8)
 74 |     if register_id is None or register_id < 0:
 75 |         print 'Invalid register {}'.format(register)
 76 |         return False
 77 | 
 78 |     # Validate delta.
 79 |     if delta < 0 or delta > 0x1000000:
 80 |         print 'Invalid delta {}'.format(delta)
 81 |         return False
 82 |     elif is_class and delta != 0:
 83 |         print 'Nonzero delta not yet supported'
 84 |         return False
 85 | 
 86 |     type_name = 'class' if is_class else 'struct'
 87 |     print '{} = {}, address = {:#x}, register = {}, delta = {:#x}'.format(type_name, struct,
 88 |             address, register, delta)
 89 | 
 90 |     if is_class:
 91 |         # Run the analysis.
 92 |         kc.class_struct.process_functions([(address, struct, register_id)])
 93 |     else:
 94 |         # Run the data flow to collect the accesses and then add those fields to the struct.
 95 |         accesses = kc.data_flow.pointer_accesses(function=address,
 96 |                 initialization={ address: { register_id: delta } })
 97 |         kc.build_struct.create_struct_fields(sid, accesses=accesses)
 98 | 
 99 |         # Set the offsets to stroff.
100 |         for addresses_and_deltas in accesses.values():
101 |             for ea, delta in addresses_and_deltas:
102 |                 insn = idautils.DecodeInstruction(ea)
103 |                 if insn:
104 |                     for op in insn.Operands:
105 |                         if op.type == idaapi.o_displ:
106 |                             idau.insn_op_stroff(insn, op.n, sid, delta)
107 | 
108 |     # All done! :)
109 |     print 'Done'
110 |     return True
111 | 
112 | kernelcache_populate_struct()
113 | 
114 | 


--------------------------------------------------------------------------------
/ida_kernelcache/classes.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/classes.py
  3 | # Brandon Azad
  4 | #
  5 | # This module defines the ClassInfo class, which stores information about a C++ class in the
  6 | # kernelcache. It also provides the function collect_class_info() to scan the kernelcache for
  7 | # information about C++ classes and populate global variables with the result.
  8 | #
  9 | 
 10 | import collect_classes
 11 | import ida_utilities as idau
 12 | import vtable
 13 | 
 14 | class_info = {}
 15 | """A global map from class names to ClassInfo objects. See collect_class_info()."""
 16 | 
 17 | vtables = {}
 18 | """A global map from the address each virtual method tables in the kernelcache to its length."""
 19 | 
 20 | class ClassInfo(object):
 21 |     """Information about a C++ class in a kernelcache."""
 22 | 
 23 |     def __init__(self, classname, metaclass, vtable, vtable_length, class_size, superclass_name,
 24 |             meta_superclass):
 25 |         self.superclass      = None
 26 |         self.subclasses      = set()
 27 |         self.classname       = classname
 28 |         self.metaclass       = metaclass
 29 |         self.vtable          = vtable
 30 |         self.vtable_length   = vtable_length
 31 |         self.class_size      = class_size
 32 |         self.superclass_name = superclass_name
 33 |         self.meta_superclass = meta_superclass
 34 | 
 35 |     def __repr__(self):
 36 |         def hex(x):
 37 |             if x is None:
 38 |                 return repr(None)
 39 |             return '{:#x}'.format(x)
 40 |         return 'ClassInfo({!r}, {}, {}, {}, {}, {!r}, {})'.format(
 41 |                 self.classname, hex(self.metaclass), hex(self.vtable),
 42 |                 self.vtable_length, self.class_size, self.superclass_name,
 43 |                 hex(self.meta_superclass))
 44 | 
 45 |     @property
 46 |     def vtable_methods(self):
 47 |         return self.vtable + vtable.VTABLE_OFFSET * idau.WORD_SIZE
 48 | 
 49 |     @property
 50 |     def vtable_nmethods(self):
 51 |         if not self.vtable_length or self.vtable_length < vtable.VTABLE_OFFSET:
 52 |             return 0
 53 |         return self.vtable_length - vtable.VTABLE_OFFSET
 54 | 
 55 |     def ancestors(self, inclusive=False):
 56 |         """A generator over all direct or indircet superclasses of this class.
 57 | 
 58 |         Ancestors are returned in order from root (most distance) to superclass (closest), and the
 59 |         class itself is not returned.
 60 | 
 61 |         Options:
 62 |             inclusive: If True, then this class is included in the iteration. Default is False.
 63 |         """
 64 |         if self.superclass:
 65 |             for ancestor in self.superclass.ancestors(inclusive=True):
 66 |                 yield ancestor
 67 |         if inclusive:
 68 |             yield self
 69 | 
 70 |     def descendants(self, inclusive=False):
 71 |         """A generator over all direct or indircet subclasses of this class.
 72 | 
 73 |         Descendants are returned in descending depth-first order: first a subclass will be
 74 |         returned, then all of its descendants, before going on to the next subclass of this class.
 75 | 
 76 |         Options:
 77 |             inclusive: If True, then this class is included in the iteration. Default is False.
 78 |         """
 79 |         if inclusive:
 80 |             yield self
 81 |         for subclass in self.subclasses:
 82 |             for descendant in subclass.descendants(inclusive=True):
 83 |                 yield descendant
 84 | 
 85 | def collect_class_info():
 86 |     """Collect information about C++ classes defined in a kernelcache.
 87 | 
 88 |     This function searches through an iOS kernelcache for information about the C++ classes defined
 89 |     in it. It populates the global class_info dictionary, which maps the C++ class names to a
 90 |     ClassInfo object containing metainformation about the class.
 91 | 
 92 |     To force re-evaluation of the class_info dictionary, call class_info.clear() and then re-run
 93 |     this function.
 94 | 
 95 |     This function also collects the set of all virtual method tables identified in the kernelcache,
 96 |     even if the corresponding class could not be identified. A mapping from each virtual method
 97 |     table to its length is stored in the global vtables variable.
 98 | 
 99 |     Only Arm64 is supported at this time.
100 | 
101 |     Only top-level classes are processed. Information about nested classes is not collected.
102 |     """
103 |     global class_info, vtables
104 |     if not class_info:
105 |         vtables.clear()
106 |         result = collect_classes.collect_class_info_internal()
107 |         if result is not None:
108 |             all_class_info, all_vtables = result
109 |             class_info.update(all_class_info)
110 |             vtables.update(all_vtables)
111 | 


--------------------------------------------------------------------------------
/scripts/process_external_methods.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # scripts/process_external_methods.py
  3 | # Brandon Azad
  4 | #
  5 | # Parse a list of IOExternalMethod or IOExternalMethodDispatch structs and print metainformation
  6 | # about the selectors in the format:
  7 | #   { selector, input_scalars_count, input_structure_size, output_scalars_count, output_structure_size }
  8 | #
  9 | 
 10 | def kernelcache_process_external_methods(ea=None, struct_type=None, count=None):
 11 |     import idc
 12 |     import ida_kernelcache as kc
 13 |     import ida_kernelcache.ida_utilities as idau
 14 | 
 15 |     kIOUCVariableStructureSize = 0xffffffff
 16 | 
 17 |     kIOUCTypeMask = 0xf
 18 |     kIOUCScalarIScalarO = 0
 19 |     kIOUCScalarIStructO = 2
 20 |     kIOUCStructIStructO = 3
 21 |     kIOUCScalarIStructI = 4
 22 | 
 23 |     kIOUCFlags = 0xff
 24 | 
 25 |     IOExternalMethod_types = (kIOUCScalarIScalarO, kIOUCScalarIStructO, kIOUCStructIStructO,
 26 |             kIOUCScalarIStructI)
 27 | 
 28 |     IOExternalMethod_count0_scalar = (kIOUCScalarIScalarO, kIOUCScalarIStructO,
 29 |             kIOUCScalarIStructI)
 30 | 
 31 |     IOExternalMethod_count1_scalar = (kIOUCScalarIScalarO,)
 32 | 
 33 |     def check_scalar(scalar_count):
 34 |         return (0 <= scalar_count <= 400)
 35 | 
 36 |     def check_structure(structure_size):
 37 |         return (0 <= structure_size <= 0x100000 or structure_size == kIOUCVariableStructureSize)
 38 | 
 39 |     def is_IOExternalMethodDispatch(obj):
 40 |         return (idau.is_mapped(obj.function)
 41 |                 and check_scalar(obj.checkScalarInputCount)
 42 |                 and check_structure(obj.checkStructureInputSize)
 43 |                 and check_scalar(obj.checkScalarOutputCount)
 44 |                 and check_structure(obj.checkStructureOutputSize))
 45 | 
 46 |     def process_IOExternalMethodDispatch(obj):
 47 |         return (obj.checkScalarInputCount, obj.checkStructureInputSize,
 48 |                 obj.checkScalarOutputCount, obj.checkStructureOutputSize)
 49 | 
 50 |     def is_IOExternalMethod(obj):
 51 |         method_type = obj.flags & kIOUCTypeMask
 52 |         check_count0 = check_scalar if method_type in IOExternalMethod_count0_scalar else check_structure
 53 |         check_count1 = check_scalar if method_type in IOExternalMethod_count1_scalar else check_structure
 54 |         return ((obj.object == 0 or idau.is_mapped(obj.object))
 55 |                 and (obj.flags & kIOUCFlags == obj.flags)
 56 |                 and idau.is_mapped(obj.func)
 57 |                 and method_type in IOExternalMethod_types
 58 |                 and check_count0(obj.count0)
 59 |                 and check_count1(obj.count1))
 60 | 
 61 |     def process_IOExternalMethod(obj):
 62 |         isc, iss, osc, oss = 0, 0, 0, 0
 63 |         method_type = obj.flags & kIOUCTypeMask
 64 |         if method_type == kIOUCScalarIScalarO:
 65 |             isc, osc = obj.count0, obj.count1
 66 |         elif method_type == kIOUCScalarIStructO:
 67 |             isc, oss = obj.count0, obj.count1
 68 |         elif method_type == kIOUCStructIStructO:
 69 |             iss, oss = obj.count0, obj.count1
 70 |         elif method_type == kIOUCScalarIStructI:
 71 |             isc, iss = obj.count0, obj.count1
 72 |         else:
 73 |             assert False
 74 |         return (isc, iss, osc, oss)
 75 | 
 76 |     TYPE_MAP = {
 77 |             'IOExternalMethodDispatch':
 78 |                 (is_IOExternalMethodDispatch, process_IOExternalMethodDispatch),
 79 |             'IOExternalMethod': (is_IOExternalMethod, process_IOExternalMethod),
 80 |     }
 81 | 
 82 |     # Get the EA.
 83 |     if ea is None:
 84 |         ea = idc.ScreenEA()
 85 | 
 86 |     # Get the struct_type and the check and process functions.
 87 |     if struct_type is None:
 88 |         for stype in TYPE_MAP:
 89 |             struct_type = stype
 90 |             check, process = TYPE_MAP[struct_type]
 91 |             obj = idau.read_struct(ea, struct=struct_type, asobject=True)
 92 |             if check(obj):
 93 |                 break
 94 |         else:
 95 |             print 'Address {:#x} does not look like any known external method struct'.format(ea)
 96 |             return False
 97 |     else:
 98 |         if struct_type not in TYPE_MAP:
 99 |             print 'Unknown external method struct type {}'.format(struct_type)
100 |             return False
101 |         check, process = TYPE_MAP[struct_type]
102 |         obj = idau.read_struct(ea, struct=struct_type, asobject=True)
103 |         if not check(obj):
104 |             print 'Address {:#x} does not look like {}'.format(ea, struct_type)
105 | 
106 |     # Process the external methods.
107 |     selector = 0;
108 |     while (count is None and check(obj)) or (selector < count):
109 |         isc, iss, osc, oss = process(obj)
110 |         print '{{ {:3}, {:5}, {:#10x}, {:5}, {:#10x} }}'.format(selector, isc, iss, osc, oss)
111 |         selector += 1
112 |         ea += len(obj)
113 |         obj = idau.read_struct(ea, struct=struct_type, asobject=True)
114 | 
115 |     return True
116 | 
117 | kernelcache_process_external_methods()
118 | 
119 | 


--------------------------------------------------------------------------------
/ida_kernelcache/symbol.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/symbol.py
  3 | # Brandon Azad
  4 | #
  5 | """ida_kernelcache.class_struct
  6 | 
  7 | This module deals with processing and transforming symbol strings. It does not modify IDA.
  8 | 
  9 | TODO: A lot of functions in this module really have to do with processing type strings, not symbol
 10 | strings.
 11 | """
 12 | 
 13 | import re
 14 | 
 15 | import idc
 16 | import idaapi
 17 | 
 18 | def method_name(symbol):
 19 |     """Get the name of the C++ method from its symbol.
 20 | 
 21 |     If the symbol demangles to 'Class::method(args)', this function returns 'method'.
 22 |     """
 23 |     try:
 24 |         demangled  = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN))
 25 |         func       = demangled.split('::', 1)[1]
 26 |         base       = func.split('(', 1)[0]
 27 |         return base or None
 28 |     except:
 29 |         return None
 30 | 
 31 | def method_arguments_string(symbol):
 32 |     """Get the arguments string of the C++ method from its symbol.
 33 | 
 34 |     If the symbol demangles to 'Class::method(arg1, arg2)', this function returns 'arg1, arg2'.
 35 |     """
 36 |     try:
 37 |         demangled  = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_LONG_DN))
 38 |         func       = demangled.split('::', 1)[1]
 39 |         args       = func.split('(', 1)[1]
 40 |         args       = args.rsplit(')', 1)[0].strip()
 41 |         return args
 42 |     except:
 43 |         return None
 44 | 
 45 | def method_arguments(symbol):
 46 |     """Get the arguments list of the C++ method from its symbol.
 47 | 
 48 |     If the symbol demangles to 'Class::method(arg1, arg2)', this function returns ['arg1', 'arg2'].
 49 |     """
 50 |     try:
 51 |         arglist = []
 52 |         args = method_arguments_string(symbol)
 53 |         if args is None:
 54 |             return None
 55 |         if not args or args == 'void':
 56 |             return arglist
 57 |         carg = ''
 58 |         parens = 0
 59 |         for c in args + ',':
 60 |             if c == ',' and parens == 0:
 61 |                 carg = carg.strip()
 62 |                 assert carg
 63 |                 arglist.append(carg)
 64 |                 carg = ''
 65 |                 continue
 66 |             if c == '(':
 67 |                 parens += 1
 68 |             elif c == ')':
 69 |                 parens -= 1
 70 |             carg += c
 71 |         return arglist
 72 |     except:
 73 |         return None
 74 | 
 75 | def method_argument_pointer_types(symbol):
 76 |     """Get the base types of pointer types used in the arguments to a C++ method."""
 77 |     args = method_arguments_string(symbol)
 78 |     if args is None:
 79 |         return None
 80 |     if not args or args == 'void':
 81 |         return set()
 82 |     args = re.sub(r"[&]|\bconst\b", ' ', args)
 83 |     args = re.sub(r"\bunsigned\b", ' ', args)
 84 |     args = re.sub(r" +", ' ', args)
 85 |     argtypes = set(arg.strip() for arg in re.split(r"[,()]", args))
 86 |     ptrtypes = set()
 87 |     for argtype in argtypes:
 88 |         if re.match(r"[^ ]+ [*][* ]*", argtype):
 89 |             ptrtypes.add(argtype.split(' ', 1)[0])
 90 |     ptrtypes.difference_update(['void', 'bool', 'char', 'short', 'int', 'long', 'float', 'double',
 91 |         'longlong', '__int64'])
 92 |     return ptrtypes
 93 | 
 94 | def method_argument_types(symbol, sign=True):
 95 |     """Get the base types used in the arguments to a C++ method."""
 96 |     try:
 97 |         args = method_arguments_string(symbol)
 98 |         if args is None:
 99 |             return None
100 |         if not args or args == 'void':
101 |             return set()
102 |         args = re.sub(r"[*&]|\bconst\b", ' ', args)
103 |         if not sign:
104 |             args = re.sub(r"\bunsigned\b", ' ', args)
105 |         args = re.sub(r" +", ' ', args)
106 |         argtypes = set(arg.strip() for arg in re.split(r"[,()]", args))
107 |         argtypes.discard('')
108 |         return argtypes
109 |     except:
110 |         return None
111 | 
112 | def convert_function_type_to_function_pointer_type(typestr):
113 |     """Convert a function type string into a function pointer type string.
114 | 
115 |     For example:
116 |         __int64 __fastcall(arg1, arg2) => __int64 __fastcall (*)(arg1, arg2)
117 |     """
118 |     try:
119 |         return_part, args_part = typestr.split('(', 1)
120 |         return return_part + ' (*)(' + args_part
121 |     except:
122 |         return None
123 | 
124 | def make_ident(name):
125 |     """Convert a name into a valid identifier, substituting any invalid characters."""
126 |     ident = ''
127 |     for c in name:
128 |         if idaapi.is_ident_char(ord(c)):
129 |             ident += c
130 |         else:
131 |             ident += '_'
132 |     return ident
133 | 
134 | def _mangle_name(scopes):
135 |     symbol = ''
136 |     if len(scopes) > 1:
137 |         symbol += 'N'
138 |     for name in scopes:
139 |         if len(name) == 0:
140 |             return None
141 |         symbol += '{}{}'.format(len(name), name)
142 |     if len(scopes) > 1:
143 |         symbol += 'E'
144 |     return symbol
145 | 
146 | def vtable_symbol_for_class(classname):
147 |     """Get the mangled symbol name for the vtable for the given class name.
148 | 
149 |     Arguments:
150 |         classname: The name of the C++ class.
151 | 
152 |     Returns:
153 |         The symbol name, or None if the classname is invalid.
154 |     """
155 |     name = _mangle_name(classname.split('::'))
156 |     if not name:
157 |         return None
158 |     return '__ZTV' + name
159 | 
160 | def vtable_symbol_get_class(symbol):
161 |     """Get the class name for a vtable symbol."""
162 |     try:
163 |         demangled = idc.Demangle(symbol, idc.GetLongPrm(idc.INF_SHORT_DN))
164 |         pre, post = demangled.split("`vtable for'", 1)
165 |         assert pre == ''
166 |         return post
167 |     except:
168 |         return None
169 | 
170 | def global_name(name):
171 |     """Get the mangled symbol name for the global name.
172 | 
173 |     Arguments:
174 |         name: The name of the global object.
175 | 
176 |     Returns:
177 |         The symbol name, or None if the name is invalid.
178 |     """
179 |     mangled = _mangle_name(name.split('::'))
180 |     if not mangled:
181 |         return None
182 |     return '__Z' + mangled
183 | 
184 | 


--------------------------------------------------------------------------------
/ida_kernelcache/stub.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/stub.py
  3 | # Brandon Azad
  4 | #
  5 | # Functions for analyzing stub functions in the kernelcache.
  6 | #
  7 | 
  8 | import re
  9 | 
 10 | import idc
 11 | import idautils
 12 | import idaapi
 13 | 
 14 | import ida_utilities as idau
 15 | import internal
 16 | 
 17 | _log = idau.make_log(1, __name__)
 18 | 
 19 | kernelcache_stub_suffix = '___stub_'
 20 | """The suffix that gets appended to a symbol to create the stub name, without the stub ID."""
 21 | 
 22 | _stub_regex = re.compile(r"^(\S+)" + kernelcache_stub_suffix + r"\d+$")
 23 | """A regular expression to match and extract the target name from a stub symbol."""
 24 | 
 25 | def stub_name_target(stub_name):
 26 |     """Get the target to which a stub name refers.
 27 | 
 28 |     No checks are performed to ensure that the target actually exists.
 29 |     """
 30 |     match = _stub_regex.match(stub_name)
 31 |     if not match:
 32 |         return None
 33 |     return match.group(1)
 34 | 
 35 | def symbol_references_stub(symbol_name):
 36 |     """Check if the symbol name references a stub."""
 37 |     return kernelcache_stub_suffix in symbol_name
 38 | 
 39 | def _process_stub_template_1(stub):
 40 |     """A template to match the following stub pattern:
 41 | 
 42 |     ADRP X<reg>, #<offset>@PAGE
 43 |     LDR  X<reg>, [X<reg>, #<offset>@PAGEOFF]
 44 |     BR   X<reg>
 45 |     """
 46 |     adrp, ldr, br = idau.Instructions(stub, count=3)
 47 |     if (adrp.itype == idaapi.ARM_adrp and adrp.Op1.type == idaapi.o_reg
 48 |             and adrp.Op2.type == idaapi.o_imm
 49 |             and ldr.itype == idaapi.ARM_ldr and ldr.Op1.type == idaapi.o_reg
 50 |             and ldr.Op2.type == idaapi.o_displ and ldr.auxpref == 0
 51 |             and br.itype == idaapi.ARM_br and br.Op1.type == idaapi.o_reg
 52 |             and adrp.Op1.reg == ldr.Op1.reg == ldr.Op2.reg == br.Op1.reg):
 53 |         offset = adrp.Op2.value + ldr.Op2.addr
 54 |         target = idau.read_word(offset)
 55 |         if target and idau.is_mapped(target):
 56 |             return target
 57 | 
 58 | _stub_processors = (
 59 |     _process_stub_template_1,
 60 | )
 61 | 
 62 | def stub_target(stub_func):
 63 |     """Find the target function called by a stub.
 64 | 
 65 |     Arm64 only."""
 66 |     # Each processing function in _stub_processors takes the address of a stub function and returns
 67 |     # the address of the target function.
 68 |     for process in _stub_processors:
 69 |         try:
 70 |             target = process(stub_func)
 71 |             if target:
 72 |                 return target
 73 |         except:
 74 |             pass
 75 | 
 76 | def _symbolicate_stub(stub, target, next_stub):
 77 |     """Set a symbol for a stub function."""
 78 |     name = idau.get_ea_name(target, user=True)
 79 |     if not name:
 80 |         _log(3, 'Stub {:#x} has target {:#x} without a name', stub, target)
 81 |         return False
 82 |     # Sometimes the target of the stub is a thunk in another kext. This is sometimes OK, but makes
 83 |     # a right mess of things when that thunk is itself a jump function for another stub, and
 84 |     # especially when there are multiple such jump functions to that stub in that kext.
 85 |     # Autorenaming of thunks interacts poorly with autonaming of stubs (you get things like
 86 |     # 'j_TARGET___stub_2_0', which stub_name_target() no longer thinks of as a stub). Thus, if the
 87 |     # current thing has '__stub_' in it, don't rename. The reason we don't just extract the inner
 88 |     # stub reference is that these jump functions are really wrappers with different names and
 89 |     # semantics in the original code, so it's not appropriate for us to cover that up with a stub.
 90 |     if symbol_references_stub(name):
 91 |         _log(2, 'Stub {:#x} has target {:#x} (name {}) that references another stub', stub, target,
 92 |                 name)
 93 |         return False
 94 |     symbol = next_stub(name)
 95 |     if symbol is None:
 96 |         _log(0, 'Could not generate stub symbol for {}: names exhausted', name)
 97 |         return False
 98 |     if not idau.set_ea_name(stub, symbol, auto=True):
 99 |         _log(2, 'Could not set name {} for stub at {:#x}', symbol, stub)
100 |         return False
101 |     return True
102 | 
103 | def _process_possible_stub(stub, make_thunk, next_stub):
104 |     """Try to process a stub function."""
105 |     # First, make sure this is a stub format we recognize.
106 |     target = stub_target(stub)
107 |     if not target:
108 |         _log(0, 'Unrecognized stub format at {:#x}', stub)
109 |         return False
110 |     # Next, check if IDA sees this as a function chunk rather than a function, and correct it if
111 |     # reasonable.
112 |     if not idau.force_function(stub):
113 |         _log(1, 'Could not convert stub to function at {:#x}', stub)
114 |         return False
115 |     # Next, set the appropriate flags on the stub. Make the stub a thunk if that was requested.
116 |     flags = idc.GetFunctionFlags(stub)
117 |     if flags == -1:
118 |         _log(1, 'Could not get function flags for stub at {:#x}', stub)
119 |         return False
120 |     target_flags = idc.GetFunctionFlags(target)
121 |     if target_flags != -1 and target_flags & idc.FUNC_NORET:
122 |         flags |= idc.FUNC_NORET
123 |     if make_thunk:
124 |         flags |= idc.FUNC_THUNK
125 |     if idc.SetFunctionFlags(stub, flags | idc.FUNC_THUNK) == 0:
126 |         _log(1, 'Could not set function flags for stub at {:#x}', stub)
127 |         return False
128 |     # Next, ensure that IDA sees the target as a function, but continue anyway if that fails.
129 |     if not idau.force_function(target):
130 |         _log(1, 'Stub {:#x} has target {:#x} that is not a function', stub, target)
131 |     # Finally symbolicate the stub.
132 |     if not _symbolicate_stub(stub, target, next_stub):
133 |         return False
134 |     return True
135 | 
136 | def _process_stubs_section(segstart, make_thunk, next_stub):
137 |     """Process all the functions in a __stubs section."""
138 |     segend = idc.SegEnd(segstart)
139 |     # We'll go through each address and check if it has a reference. If it does, it is likely a
140 |     # stub. As long as the address doesn't already have a stub name, process it.
141 |     for ea in idau.Addresses(segstart, segend, step=1):
142 |         if idc.isRef(idc.GetFlags(ea)) and not stub_name_target(idau.get_ea_name(ea)):
143 |             _process_possible_stub(ea, make_thunk, next_stub)
144 | 
145 | def initialize_stub_symbols(make_thunk=True):
146 |     """Populate IDA with information about the stubs in an iOS kernelcache.
147 | 
148 |     Search through the kernelcache for stubs (__stubs sections) and rename each stub function
149 |     according to the target function it calls.
150 | 
151 |     Arm64 only.
152 | 
153 |     Options:
154 |         make_thunk: Set the thunk attribute for each stub function. Default is True.
155 |     """
156 |     next_stub = internal.make_name_generator(kernelcache_stub_suffix)
157 |     for ea in idautils.Segments():
158 |         segname = idc.SegName(ea)
159 |         if not segname.endswith('__stubs'):
160 |             continue
161 |         _log(3, 'Processing segment {}', segname)
162 |         _process_stubs_section(ea, make_thunk, next_stub)
163 | 
164 | 


--------------------------------------------------------------------------------
/ida_kernelcache/segment.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/segment.py
  3 | # Brandon Azad
  4 | #
  5 | # Functions for interacting with the segments of the kernelcache in IDA. No prior initialization is
  6 | # necessary.
  7 | #
  8 | 
  9 | import idc
 10 | 
 11 | import ida_utilities as idau
 12 | import kernel
 13 | 
 14 | _log = idau.make_log(0, __name__)
 15 | 
 16 | idc.Til2Idb(-1, 'mach_header_64')
 17 | idc.Til2Idb(-1, 'load_command')
 18 | idc.Til2Idb(-1, 'segment_command_64')
 19 | idc.Til2Idb(-1, 'section_64')
 20 | 
 21 | _LC_SEGMENT_64 = 0x19
 22 | 
 23 | def _macho_segments_and_sections(ea):
 24 |     """A generator to iterate through a Mach-O file's segments and sections.
 25 | 
 26 |     Each iteration yields a tuple:
 27 |         (segname, segstart, segend, [(sectname, sectstart, sectend), ...])
 28 |     """
 29 |     hdr   = idau.read_struct(ea, 'mach_header_64', asobject=True)
 30 |     nlc   = hdr.ncmds
 31 |     lc    = int(hdr) + len(hdr)
 32 |     lcend = lc + hdr.sizeofcmds
 33 |     while lc < lcend and nlc > 0:
 34 |         loadcmd = idau.read_struct(lc, 'load_command', asobject=True)
 35 |         if loadcmd.cmd == _LC_SEGMENT_64:
 36 |             segcmd = idau.read_struct(lc, 'segment_command_64', asobject=True)
 37 |             segname  = idau.null_terminated(segcmd.segname)
 38 |             segstart = segcmd.vmaddr
 39 |             segend   = segstart + segcmd.vmsize
 40 |             sects    = []
 41 |             sc  = int(segcmd) + len(segcmd)
 42 |             for i in range(segcmd.nsects):
 43 |                 sect = idau.read_struct(sc, 'section_64', asobject=True)
 44 |                 sectname  = idau.null_terminated(sect.sectname)
 45 |                 sectstart = sect.addr
 46 |                 sectend   = sectstart + sect.size
 47 |                 sects.append((sectname, sectstart, sectend))
 48 |                 sc += len(sect)
 49 |             yield (segname, segstart, segend, sects)
 50 |         lc  += loadcmd.cmdsize
 51 |         nlc -= 1
 52 | 
 53 | def _initialize_segments_in_kext(kext, mach_header, skip=[]):
 54 |     """Rename the segments in the specified kext."""
 55 |     def log_seg(segname, segstart, segend):
 56 |         _log(3, '+ segment {: <20} {:x} - {:x}  ({:x})', segname, segstart, segend,
 57 |             segend - segstart)
 58 |     def log_sect(sectname, sectstart, sectend):
 59 |         _log(3, '  section {: <20} {:x} - {:x}  ({:x})', sectname, sectstart, sectend,
 60 |                 sectend - sectstart)
 61 |     def log_gap(gapno, start, end, mapped):
 62 |         mapped = 'mapped' if mapped else 'unmapped'
 63 |         _log(3, '  gap     {: <20} {:x} - {:x}  ({:x}, {})', gapno, start, end,
 64 |             end - start, mapped)
 65 |     def process_region(segname, name, start, end):
 66 |         assert end >= start
 67 |         if segname in skip:
 68 |             _log(2, 'Skipping segment {}', segname)
 69 |             return
 70 |         newname = '{}.{}'.format(segname, name)
 71 |         if kext:
 72 |             newname = '{}:{}'.format(kext, newname)
 73 |         if start == end:
 74 |             _log(2, 'Skipping empty region {} at {:x}', newname, start)
 75 |             return
 76 |         ida_segstart = idc.SegStart(start)
 77 |         if ida_segstart == idc.BADADDR:
 78 |             _log(0, "IDA doesn't think this is a real segment: {:x} - {:x}", start, end)
 79 |             return
 80 |         ida_segend = idc.SegEnd(ida_segstart)
 81 |         if start != ida_segstart or end != ida_segend:
 82 |             _log(0, 'IDA thinks segment {} {:x} - {:x} should be {:x} - {:x}', newname, start, end,
 83 |                     ida_segstart, ida_segend)
 84 |             return
 85 |         _log(2, 'Rename {:x} - {:x}: {} -> {}', start, end, idc.SegName(start), newname)
 86 |         idc.SegRename(start, newname)
 87 |     def process_gap(segname, gapno, start, end):
 88 |         mapped = idau.is_mapped(start)
 89 |         log_gap(gapno, start, end, mapped)
 90 |         if mapped:
 91 |             name = 'HEADER' if start == mach_header else '__gap_' + str(gapno)
 92 |             process_region(segname, name, start, end)
 93 |     for segname, segstart, segend, sects in _macho_segments_and_sections(mach_header):
 94 |         log_seg(segname, segstart, segend)
 95 |         lastend = segstart
 96 |         gapno   = 0
 97 |         for sectname, sectstart, sectend in sects:
 98 |             if lastend < sectstart:
 99 |                 process_gap(segname, gapno, lastend, sectstart)
100 |                 gapno += 1
101 |             log_sect(sectname, sectstart, sectend)
102 |             process_region(segname, sectname, sectstart, sectend)
103 |             lastend = sectend
104 |         if lastend < segend:
105 |             process_gap(segname, gapno, lastend, segend)
106 |             gapno += 1
107 | 
108 | def initialize_segments():
109 |     """Rename the kernelcache segments in IDA according to the __PRELINK_INFO data.
110 | 
111 |     Rename the kernelcache segments based on the contents of the __PRELINK_INFO dictionary.
112 |     Segments are renamed according to the scheme '[<kext>:]<segment>.<section>', where '<kext>' is
113 |     the bundle identifier if the segment is part of a kernel extension. The special region
114 |     containing the Mach-O header is renamed '[<kext>:]<segment>.HEADER'.
115 |     """
116 |     # First rename the kernel segments.
117 |     _log(1, 'Renaming kernel segments')
118 |     kernel_skip = ['__PRELINK_TEXT', '__PLK_TEXT_EXEC', '__PRELINK_DATA', '__PLK_DATA_CONST']
119 |     _initialize_segments_in_kext(None, kernel.base, skip=kernel_skip)
120 |     # Process each kext identified by the __PRELINK_INFO. In the new kernelcache format 12-merged,
121 |     # the _PrelinkExecutableLoadAddr key is missing for all kexts, so no extra segment renaming
122 |     # takes place.
123 |     prelink_info_dicts = kernel.prelink_info['_PrelinkInfoDictionary']
124 |     for kext_prelink_info in prelink_info_dicts:
125 |         kext = kext_prelink_info.get('CFBundleIdentifier', None)
126 |         mach_header = kext_prelink_info.get('_PrelinkExecutableLoadAddr', None)
127 |         if kext is not None and mach_header is not None:
128 |             orig_kext = idc.SegName(mach_header).split(':', 1)[0]
129 |             if '.kpi.' not in kext and orig_kext != kext:
130 |                 _log(0, 'Renaming kext {} -> {}', orig_kext, kext)
131 |             _log(1, 'Renaming segments in {}', kext)
132 |             _initialize_segments_in_kext(kext, mach_header)
133 | 
134 | _kext_regions = []
135 | 
136 | def _initialize_kext_regions():
137 |     """Get region information for each kext based on iOS 12's __PRELINK_INFO.__kmod_start.
138 | 
139 |     NOTE: This only accounts for __TEXT_EXEC, not the other segments."""
140 |     kmod_start = idc.SegByBase(idc.SegByName('__PRELINK_INFO.__kmod_start'))
141 |     if kmod_start == idc.BADADDR:
142 |         return
143 |     for kmod in idau.ReadWords(kmod_start, idc.SegEnd(kmod_start)):
144 |         _log(1, 'Found kmod {:x}', kmod)
145 |         segments = list(_macho_segments_and_sections(kmod))
146 |         if len(segments) != 1:
147 |             _log(0, 'Skipping unrecognized kmod {:x}', kmod)
148 |             continue
149 |         segname, segstart, segend, sects = segments[0]
150 |         if segname != '__TEXT_EXEC' or len(sects) != 1:
151 |             _log(0, 'Skipping unrecognized kmod {:x}', kmod)
152 |             continue
153 |         kmod_name = 'kext.{:x}'.format(kmod)
154 |         _log(1, 'Adding module:  {:x} - {:x}  {}', segstart, segend, kmod_name)
155 |         _kext_regions.append((segstart, segend, kmod_name))
156 | 
157 | _initialize_kext_regions()
158 | 
159 | def kernelcache_kext(ea):
160 |     """Return the name of the kext to which the given linear address belongs.
161 | 
162 |     Only works if segments have been renamed using initialize_segments().
163 | 
164 |     NOTE: Kexts are not well distinguished on the new iOS 12 merged kernelcache format. Do not rely
165 |     on this function.
166 |     """
167 |     # TODO: This doesn't work on 12-merged kernelcaches!
168 |     name = idc.SegName(ea) or ''
169 |     if ':' in name:
170 |         return idc.SegName(ea).split(':', 1)[0]
171 |     if _kext_regions:
172 |         for start, end, kext in _kext_regions:
173 |             if start <= ea < end:
174 |                 return kext
175 |     return None
176 | 
177 | 


--------------------------------------------------------------------------------
/ida_kernelcache/kplist.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/kplist.py
  3 | # Brandon Azad
  4 | #
  5 | # Process kernel plists. This code is based on:
  6 | #   - https://github.com/python/cpython/blob/3.6/Lib/plistlib.py
  7 | #
  8 | 
  9 | import base64
 10 | from xml.etree.ElementTree import XMLTreeBuilder
 11 | 
 12 | class _KPlistBuilder(object):
 13 |     """A companion class for XMLTreeBuilder to parse a kernel-style property list."""
 14 |     # IMPLEMENTATION IDEA: The XMLTreeBuilder calls us at four points: when there's a new start
 15 |     # tag, when there's a new end tag, when there's data from a tag, and when there's no more data.
 16 |     # We build objects incrementally out of these notifications. Each tag type can implement
 17 |     # handlers for the start and end tags. Exactly one of these handlers must return an object that
 18 |     # represents the parsed plist entry. Collection entries must return the object from the start
 19 |     # tag handler, while leaf entries must return the object from the end tag handler. Once a
 20 |     # handler has produced an object for the plist entry, that object gets added to the result
 21 |     # using add_object. Collections are maintained in a collection stack. When a start tag handler
 22 |     # returns an object, that object is pushed onto the top of the collection stack to indicate
 23 |     # that it is the current collection. When an end tag handler does not return a value, that
 24 |     # indicates that the current collection is done and the collection stack is popped. When the ID
 25 |     # attribute is encountered, the subsequent call to add_object associates the object with that
 26 |     # ID. When a corresponding IDREF attribute is encountered, the start and end tag handlers are
 27 |     # skipped. Instead, once the next end tag is received, the previous object is looked up by ID
 28 |     # and passed to add_object.
 29 | 
 30 |     def __init__(self):
 31 |         self.collection_stack = []
 32 |         self.ids              = {}
 33 |         self.current_data     = []
 34 |         self.current_id       = None
 35 |         self.current_idref    = None
 36 |         self.current_key      = None
 37 |         self.root             = None
 38 |         self.start_handler    = {
 39 |                 'dict':       self.start_dict,
 40 |                 'array':      self.start_array,
 41 |         }
 42 |         self.end_handler      = {
 43 |                 'dict':       self.end_dict,
 44 |                 'key':        self.end_key,
 45 |                 'true':       self.end_true,
 46 |                 'false':      self.end_false,
 47 |                 'integer':    self.end_integer,
 48 |                 'string':     self.end_string,
 49 |                 'data':       self.end_data,
 50 |         }
 51 |         self.attributes       = {
 52 |                 'integer':    ('size',),
 53 |         }
 54 |         self.tags = set(self.start_handler.keys()).union(self.end_handler.keys())
 55 | 
 56 |     # XMLTreeBuilder calls.
 57 | 
 58 |     def start(self, tag, attr):
 59 |         intervening_data = self.get_data().strip()
 60 |         assert not intervening_data and not self.current_id
 61 |         # Check that the attributes are allowed.
 62 |         for attrname in set(attr.keys()).difference(('ID', 'IDREF')):
 63 |             if attrname not in self.attributes[tag]:
 64 |                 raise ValueError('illegal attribute "{}" for tag "{}"'.format(attrname, tag))
 65 |         # Handle IDREF attribute.
 66 |         if self.current_idref is not None:
 67 |             raise ValueError('non-empty IDREF')
 68 |         self.current_idref = self.get_id_attr(attr, 'IDREF')
 69 |         if self.current_idref is not None:
 70 |             if self.current_idref not in self.ids:
 71 |                 raise ValueError('tag has IDREF to non-existent ID')
 72 |             original_tag, _ = self.ids[self.current_idref]
 73 |             if tag != original_tag:
 74 |                 raise ValueError('tag "{}" has IDREF to element with different tag "{}"'
 75 |                         .format(tag, original_tag))
 76 |             if len(attr) > 1:
 77 |                 raise ValueError('tag has IDREF and another attribute')
 78 |             return
 79 |         # Handle ID attribute.
 80 |         self.current_id = self.get_id_attr(attr, 'ID')
 81 |         if self.current_id is not None and self.current_id in self.ids:
 82 |             raise ValueError('tag has previously used ID attribute')
 83 |         # Process the start tag if this is not an IDREF.
 84 |         handler = self.start_handler.get(tag, None)
 85 |         if handler:
 86 |             value = handler(attr)
 87 |             if value is not None:
 88 |                 # This is a collection. Add the collection object then push a new context.
 89 |                 self.add_object(tag, value)
 90 |                 self.collection_stack.append(value)
 91 |         elif tag not in self.tags:
 92 |             raise ValueError('unrecognized tag "{}"'.format(tag))
 93 | 
 94 |     def end(self, tag):
 95 |         assert not (self.current_data and self.current_idref is not None)
 96 |         # If we have an ID reference, then directly add the referenced value.
 97 |         if self.current_idref is not None:
 98 |             _, value = self.ids[self.current_idref]
 99 |             self.current_idref = None
100 |             self.add_object(tag, value)
101 |             return
102 |         # Otherwise, perform the end tag handler.
103 |         handler = self.end_handler.get(tag, None)
104 |         value = None
105 |         if handler:
106 |             value = handler()
107 |         if value is not None:
108 |             self.add_object(tag, value)
109 |         else:
110 |             # This is a collection. We just finished, so pop the context stack.
111 |             self.collection_stack.pop()
112 | 
113 |     def data(self, data):
114 |         if self.current_idref is not None:
115 |             raise ValueError('non-empty IDREF')
116 |         self.current_data.append(data)
117 | 
118 |     def close(self):
119 |         assert not self.current_data and not self.collection_stack
120 |         return self.root
121 | 
122 |     # Internal functions.
123 | 
124 |     def get_id_attr(self, attr, name):
125 |         id_attr = attr.get(name, None)
126 |         if id_attr is not None:
127 |             try:
128 |                 return int(id_attr, 0)
129 |             except ValueError:
130 |                 raise ValueError('invalid {} attribute'.format(name))
131 |         return None
132 | 
133 |     def add_object(self, tag, value):
134 |         if self.current_id is not None:
135 |             assert self.current_id not in self.ids
136 |             self.ids[self.current_id] = (tag, value)
137 |             self.current_id = None
138 |         if tag == 'key':
139 |             # We are adding a key to a dictionary but don't yet have the value.
140 |             if not self.collection_stack or type(self.collection_stack[-1]) != dict:
141 |                 raise ValueError('invalid key tag not in a dict')
142 |             if self.current_key:
143 |                 raise ValueError('missing value for key in dict')
144 |             self.current_key = value
145 |         elif self.current_key is not None:
146 |             # We are adding a key and value to a dictionary.
147 |             assert type(self.collection_stack[-1]) == dict
148 |             if self.current_key in self.collection_stack[-1]:
149 |                 raise ValueError('duplicate key "{}" in dict'.format(self.current_key))
150 |             self.collection_stack[-1][self.current_key] = value
151 |             self.current_key = None
152 |         elif self.root is None:
153 |             # We are setting the root object.
154 |             self.root = value
155 |         elif self.collection_stack and type(self.collection_stack[-1]) == list:
156 |             # We are adding an object to an array (or other container).
157 |             self.collection_stack[-1].append(value)
158 |         else:
159 |             # We have two values in a row not in a container.
160 |             raise ValueError('unexpected element not in a container')
161 | 
162 |     def get_data(self):
163 |         data = ''.join(self.current_data)
164 |         self.current_data = []
165 |         return data
166 | 
167 |     # Element tag handlers.
168 | 
169 |     def start_dict(self, attr):
170 |         return {}
171 | 
172 |     def start_array(self, attr):
173 |         return []
174 | 
175 |     def end_dict(self):
176 |         if self.current_key is not None:
177 |             raise ValueError('missing value for key in dict')
178 | 
179 |     def end_key(self):
180 |         assert self.current_key is None
181 |         return self.get_data()
182 | 
183 |     def end_true(self):
184 |         if self.get_data():
185 |             raise ValueError('true tag must be empty')
186 |         return True
187 | 
188 |     def end_false(self):
189 |         if self.get_data():
190 |             raise ValueError('false tag must be empty')
191 |         return False
192 | 
193 |     def end_integer(self):
194 |         # TODO: The size attribute is currently ignored.
195 |         return int(self.get_data(), 0)
196 | 
197 |     def end_string(self):
198 |         return self.get_data()
199 | 
200 |     def end_data(self):
201 |         return base64.b64decode(self.get_data())
202 | 
203 | def kplist_parse(plist):
204 |     """Parse a kernel-style property list."""
205 |     try:
206 |         builder = _KPlistBuilder()
207 |         parser  = XMLTreeBuilder(target=builder)
208 |         parser.feed(plist)
209 |         return parser.close()
210 |     except:
211 |         return None
212 | 
213 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ida_kernelcache: An IDA Toolkit for analyzing iOS kernelcaches
  2 | 
  3 | <!-- Brandon Azad -->
  4 | 
  5 | ida_kernelcache is an IDAPython module for IDA Pro to make working with iOS kernelcaches easier.
  6 | The module provides functions to:
  7 | 
  8 | * Convert iOS 12's new static tagged pointers into normal kernel pointers.
  9 | * Parse the kernel's `__PRELINK_INFO` segment into a Python dictionary.
 10 | * Rename the segments in IDA according to the kernel extension name, Mach-O segment, and Mach-O
 11 |   section.
 12 | * Convert identifiable pointers in some segments into IDA offsets.
 13 | * Reconstruct the C++ class hierarchy based on OSMetaClass information.
 14 | * Symbolicate C++ virtual method tables (both the vtable itself and its methods).
 15 | * Symbolicate offsets in `__got` sections and stub functions in `__stubs` sections.
 16 | * Autogenerate IDA structs representing the C++ virtual method tables.
 17 | * Autogenerate IDA structs representing the C++ classes in the kernelcache based on observed access
 18 |   patterns.
 19 | 
 20 | The main processing function is designed to be run before any manual analysis or reverse
 21 | engineering. With the default settings, IDA tends to miss a lot of useful information in the
 22 | kernelcache. These scripts help IDA along by leveraging the known structure of the kernelcache to
 23 | automatically propagate useful information.
 24 | 
 25 | In addition to the stock functionality in the module, ida_kernelcache contains several scripts to
 26 | make analyzing the iOS kernelcache easier. For example, you can use the scripts to autogenerate C
 27 | structs used by a function.
 28 | 
 29 | Many of the techniques used in ida_kernelcache were developed for and borrowed directly from
 30 | [memctl].
 31 | 
 32 | [memctl]: https://github.com/bazad/memctl
 33 | 
 34 | ## Versions
 35 | 
 36 | ida_kernelcache has been tested with IDA Pro 6.95 on kernelcaches for iOS versions 10.1.1, 11.0,
 37 | 11.2, 11.3.1, and 12.0 beta. Currently only Arm64 kernelcaches from iOS 10 and later are supported.
 38 | 
 39 | ## Getting started
 40 | 
 41 | You need to already have a decompressed kernelcache file loaded into IDA. You can find the URL to
 42 | download a particular IPSW from Apple online, and there are a number of public tools (including
 43 | memctl) capable of decompressing the kernelcache.
 44 | 
 45 | In IDA, select "File" -> "Script file..." from the menu bar, then choose the `ida_kernelcache.py`
 46 | script in the main directory. This will load the ida_kernelcache module into the IDAPython
 47 | interpreter under the names `ida_kernelcache` and `kc`. In the IDAPython prompt, type
 48 | `kc.kernelcache_process()` and hit Enter to start analyzing the kernelcache. This function performs
 49 | all the major analyses supported by ida_kernelcache. The function will run for several minutes as
 50 | IDA identifies and analyzes new functions.
 51 | 
 52 | ida_kernelcache will try not to overwrite user names for addresses. This means that if the
 53 | kernelcache has been manually analyzed prior to initialization with `kernelcache_process`, the
 54 | results may not be as thorough because user-specified names may block automatic name propagation.
 55 | However, there's also no guarantee that ida_kernelcache won't mess up prior analysis, so if you do
 56 | decide to run `kernelcache_process` on a kernelcache file which you've already analyzed, make a
 57 | backup first.
 58 | 
 59 | ## The ida_kernelcache module
 60 | 
 61 | ida_kernelcache is meant to be loaded via `ida_kernelcache.py`; the submodules in the
 62 | `ida_kernelcache` directory are not meant to be loaded directly. However, ida_kernelcache exposes
 63 | the functionality of many of these submodules. Here is what each of them does:
 64 | 
 65 | * **ida_utilities**:
 66 | This module wraps some of IDA's functions to provide an easier-to-use API. Particularly useful are
 67 | `is_mapped`, `read_word`, `read_struct`, `force_function`, and `ReadWords`. `is_mapped` checks
 68 | whether an address is mapped, and optionally whether it contains a known value. `read_word` reads a
 69 | variably-sized word from an address. `read_struct` reads a structure type into a Python dictionary
 70 | or Python accessor object, which makes parsing data structures much easier. `force_function` tries
 71 | several tricks to convert an address into the start of a function in IDA. `ReadWords` is a
 72 | generator to iterate over data words and their addresses in a range.
 73 | 
 74 | * **build_struct**:
 75 | This internal module contains utilities to automatically populate an IDA struct based on a sequence
 76 | of accesses to the struct.
 77 | 
 78 | * **class_struct**:
 79 | This module provides functions to generate IDA structs representing C++ virtual method tables and
 80 | classes. `initialize_vtable_structs` scans the (symbolicated) virtual method tables and creates IDA
 81 | structs to hold virtual method pointers. `initialize_class_structs` performs a data flow analysis
 82 | on the virtual methods to identify accesses to the fields of each class, then builds IDA structs to
 83 | represent the classes. Instructions that appear to reference a field are also converted into
 84 | structure offset references. See the module docstring for more details.
 85 | 
 86 | * **classes**:
 87 | This module defines the `ClassInfo` type that holds information about C++ classes in the
 88 | kernelcache and provides the function `collect_class_info` to scan the kernelcache for classes and
 89 | populate the global `class_info` dictionary with a map from class names to `ClassInfo` objects. The
 90 | `ClassInfo` type records the class name, the OSMetaClass instance, the virtual method table, and
 91 | the superclass name for each C++ class. Additionally, each `ClassInfo` object stores references to
 92 | the superclass's `ClassInfo` and the `ClassInfo` of all direct subclasses, making it easy to
 93 | examine and traverse the class hierarchy. `collect_class_info` also stores the set of all virtual
 94 | method tables in the global `vtables` set.
 95 | 
 96 | * **data_flow**:
 97 | This internal module contains data flow operations used by the rest of ida_kernelcache.
 98 | 
 99 | * **kernel**:
100 | This module provides the `base` and `prelink_info` global variables. `base` is the base address of
101 | the kernel image (the start of the kernel's Mach-O header). `prelink_info` is the parsed
102 | `__PRELINK_INFO` dictionary.
103 | 
104 | * **kplist**:
105 | This module provides the `kplist_parse` function to parse kernel-style plists.
106 | 
107 | * **metaclass**:
108 | This module provides the function `initialize_metaclass_symbols` which adds a symbol for each
109 | known OSMetaClass instance.
110 | 
111 | * **offset**:
112 | This module provides the functions `initialize_data_offsets` and `initialize_offset_symbols`. The
113 | former scans through the segments looking for pointers which can be converted into offsets. The
114 | latter symbolicates offsets in the `__got` section of each kext if the target of the offset has a
115 | symbol.
116 | 
117 | * **segment**:
118 | This module provides the function `initialize_segments` to rename IDA's segments to be more useful.
119 | By default, IDA seems to create the segment names by combining a guess of the bundle identifier
120 | with the Mach-O section describing the region. `initialize_segments` extracts the true bundle
121 | identifier from the `__PRELINK_INFO` dictionary and renames each segment to include the bundle
122 | identifier, Mach-O segment, and Mach-O section. This makes it possible, for example, to distinguish
123 | between `__TEXT.__const` and `__DATA_CONST.__const`. This module also provides the function
124 | `kernelcache_kext` (re-exported at the top level) to determine the kext containing the specified
125 | address (only on the old iOS 11 split-kext kernelcache format).
126 | 
127 | * **stub**:
128 | Many kexts in the kernelcache contain stub functions in a `__stubs` section that jump to functions
129 | in the kernel proper. Unfortunately, these stubs provide a barrier for propagating cross references
130 | and type information. This module doesn't solve these problems, but it does make looking at stubs a
131 | bit easier by automatically renaming stub functions so that the target function name is visible.
132 | Stubs and their targets are forcibly converted into functions in IDA, which helps make the
133 | functions in IDA line up with the functions in the original source code.
134 | 
135 | * **tagged_pointers**:
136 | The new iOS 12 merged kernelcache format has the upper 2 bytes of each pointer tagged with an
137 | offset in order to chain the pointers together in a list. This module contains functions for
138 | processing and restoring those tagged pointers.
139 | 
140 | * **vtable**:
141 | This module provides many useful functions for working with virtual method tables, including
142 | `vtable_length`, `convert_vtable_to_offsets`, `vtable_overrides`, `initialize_vtable_symbols`, and
143 | `initialize_vtable_method_symbols`. `vtable_length` checks whether the specified address could be a
144 | vtable and returns the vtable length. The generator `vtable_overrides` enumerates the virtual
145 | methods in a class which override virtual methods used by the superclass. The function
146 | `initialize_vtable_symbols` adds a symbol for the start of each identified vtable.
147 | `initialize_vtable_method_symbols` iterates through the overridden methods in each vtable and
148 | propagates symbols from the superclass to the subclass. This is possible because most of the base
149 | classes in IOKit are defined in XNU with relatively complete symbol information. Each method
150 | override in the vtable of a subclass must conform to the same interface as the method in the
151 | superclass, which means we can generate a symbol for the override by substituting the subclass's
152 | name for the superclass's name in the virtual method symbol in the superclass. For example, if we
153 | have no name for the virtual method at index 7 in the `AppleKeyStore` class, but we know that the
154 | virtual method at index 7 in its superclass `IOService` is called
155 | `__ZNK9IOService12getMetaClassEv`, then we can infer that index 7 should be called
156 | `__ZNK13AppleKeyStore12getMetaClassEv` in the subclass. This technique can be used to symbolicate
157 | most virtual methods in most classes.
158 | 
159 | ## Other scripts
160 | 
161 | The `ida_kernelcache_reload.py` script is identical to `ida_kernelcache.py`, except it forces the
162 | `ida_kernelcache` module and all submodules to be reloaded. It is mostly useful for development.
163 | 
164 | The `scripts` directory contains scripts that use ida_kernelcache to perform some sort of analysis.
165 | These scripts are too specific to be part of the main ida_kernelcache module, but they are useful
166 | when reverse engineering the kernelcache. They include:
167 | 
168 | * **find_virtual_method_overrides.py**:
169 | A script to find descendants of a class that override a virtual method containing the specified
170 | string. Matching overrides are printed to the console.
171 | 
172 | * **populate_struct.py**:
173 | Populate fields for a C++ class or C struct by performing data flow analysis starting at the
174 | current address.
175 | 
176 | * **process_external_methods.py**:
177 | Process an `IOExternalMethod` or `IOExternalMethodDispatch` array into a standard form for use by
178 | fuzzing tools.
179 | 
180 | ## Class reconstruction
181 | 
182 | If you are using the Hex-Rays decompiler, one of the more interesting features of ida_kernelcache
183 | is the automatic C++ class reconstruction, which will use the OSMetaClass information and data flow
184 | analysis to create IDA structs to represent the classes found in the kernelcache. These
185 | representations can dramatically improve the readability of the pseudocode representation. To learn
186 | more, see the post [Reconstructing C++ classes in the iOS kernelcache using IDA Pro].
187 | 
188 | [Reconstructing C++ classes in the iOS kernelcache using IDA Pro]: https://bazad.github.io/2018/03/ida-kernelcache-class-reconstruction/
189 | 
190 | ## The new iOS 12 kernelcache format
191 | 
192 | With iOS 12, Apple introduced a new kernelcache format on some devices. Among the changes, this new
193 | kernelcache's kernel pointers are tagged to link them in a list, presumably to allow iBoot to slide
194 | the kernel without the `_PrelinkLinkKASLROffsets` data in the prelink dictionary. Trying to analyze
195 | a stock kernelcache using this format in IDA is difficult due to the missing cross-references. See
196 | the article [Analyzing the iOS 12 kernelcache's tagged pointers] for details.
197 | 
198 | [Analyzing the iOS 12 kernelcache's tagged pointers]: https://bazad.github.io/2018/06/ios-12-kernelcache-tagged-pointers/
199 | 
200 | If you just want to untag the pointers in the kernelcache without performing any additional
201 | processing, run `kc.tagged_pointers.untag_pointers()`.
202 | 
203 | ## A note on generalizing
204 | 
205 | Some of this functionality likely applies more broadly than just to Apple kernelcaches (for
206 | example, vtable analysis and symbol propagation, or most of the functions in `ida_utilities.py`).
207 | Nonetheless, I've limited the import scope to just the `ida_kernelcache` module because I have not
208 | tested any of this on other types of binaries.
209 | 
210 | ## License
211 | 
212 | ida_kernelcache is released under the MIT license.
213 | 
214 | Much of the functionality in ida_kernelcache is borrowed from [memctl], which is also released
215 | under the MIT license. Other sources are noted in the comments in the corresponding files.
216 | 
217 | 
218 | ---------------------------------------------------------------------------------------------------
219 | Brandon Azad
220 | 


--------------------------------------------------------------------------------
/ida_kernelcache/data_flow.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/data_flow.py
  3 | # Brandon Azad
  4 | #
  5 | # A module for data flows.
  6 | #
  7 | """ida_kernelcache.data_flow
  8 | 
  9 | This module contains functions that perform various types of data flow operations on functions or
 10 | code ranges. Currently only Arm64 is supported.
 11 | 
 12 | While it is possible to implement a very generic data flow framework, allowing custom data flows to
 13 | be implemented entirely externally and with little or no knowledge of the underlying architecture,
 14 | this module does not take that approach, for reasons of simplicity and efficiency.
 15 | 
 16 | """
 17 | 
 18 | import collections
 19 | 
 20 | import idc
 21 | import idautils
 22 | import idaapi
 23 | 
 24 | import ida_utilities as idau
 25 | 
 26 | _log = idau.make_log(2, __name__)
 27 | 
 28 | _INSN_OP_CHG = [
 29 |     idaapi.CF_CHG1,
 30 |     idaapi.CF_CHG2,
 31 |     idaapi.CF_CHG3,
 32 |     idaapi.CF_CHG4,
 33 |     idaapi.CF_CHG5,
 34 |     idaapi.CF_CHG6,
 35 | ]
 36 | 
 37 | _INSN_OP_DTYP_SZ = {
 38 |     idaapi.dt_byte:  1,
 39 |     idaapi.dt_word:  2,
 40 |     idaapi.dt_dword: 4,
 41 |     idaapi.dt_qword: 8,
 42 | }
 43 | 
 44 | _ARM64_WRITEBACK = 0x20 | 0x80
 45 | 
 46 | def _create_flow(function, bounds):
 47 |     """Create a FlowChart."""
 48 |     f, b = None, None
 49 |     if function is not None:
 50 |         f = idaapi.get_func(function)
 51 |         if f is None:
 52 |             _log(0, 'Bad func {:#x}', func)
 53 |             return None
 54 |     if bounds is not None:
 55 |         b = (start, end)
 56 |     return idaapi.FlowChart(f=f, bounds=b)
 57 | 
 58 | def _add_blocks_to_queue(queue, flow, addresses):
 59 |     for ea in addresses:
 60 |         for bb in flow:
 61 |             if bb.startEA <= ea < bb.endEA:
 62 |                 queue.append(bb)
 63 |                 break
 64 |         else:
 65 |             _log(2, 'Address {:#x} not contained in any basic block', ea)
 66 | 
 67 | def _pointer_accesses_process_block(start, end, fix, entry_regs, accesses):
 68 |     """Process a basic block for _pointer_accesses_data_flow.
 69 | 
 70 |     Arm64 only."""
 71 |     # NOTE: Some object accesses (to large offsets) are encoded in the following style:
 72 |     #   MOV             W8, #0x9210
 73 |     #   STR             X0, [X19,X8]
 74 |     # We try to catch these by keeping track of local constants within a block.
 75 |     RegValue = collections.namedtuple('RegValue', ['type', 'value'])
 76 |     DELTA = 0   # Pointer delta from start of target memory region.
 77 |     CONST = 1   # Constant value
 78 |     def get_reg(reg, type):
 79 |         rv = regs.get(reg, None)
 80 |         if rv is None or rv.type != type:
 81 |             return None
 82 |         return rv.value
 83 | 
 84 |     # Initialize our registers and create accessor functions.
 85 |     regs = { reg: RegValue(DELTA, delta) for reg, delta in entry_regs.items() }
 86 | 
 87 |     # For each instruction in the basic block, see if any new register gets assigned.
 88 |     for insn in idau.Instructions(start, end):
 89 |         # First, if this instruction has a fixed state (i.e., a set mapping of registers to
 90 |         # deltas), set that state. This overwrites any previous values, so care must be taken by
 91 |         # the caller to ensure that this initialization is correct.
 92 |         fixed_regs_and_deltas = fix.get(insn.ea)
 93 |         if fixed_regs_and_deltas:
 94 |             for reg, delta in fixed_regs_and_deltas.items():
 95 |                 _log(6, '\t\t{:x}  fix {}={}', insn.ea, reg, delta)
 96 |                 regs[reg] = RegValue(DELTA, delta)
 97 |         # If this is an access instruction, record the access. See comment about auxpref below.
 98 |         if not (insn.auxpref & _ARM64_WRITEBACK):
 99 |             for op in insn.Operands:
100 |                 # We only consider o_displ and o_phrase.
101 |                 if op.type == idaapi.o_void:
102 |                     break
103 |                 elif op.type not in (idaapi.o_displ, idaapi.o_phrase):
104 |                     continue
105 |                 # Get the delta for the base register.
106 |                 delta = get_reg(op.reg, DELTA)
107 |                 if delta is None:
108 |                     continue
109 |                 # Get the instruction access size.
110 |                 size = _INSN_OP_DTYP_SZ.get(op.dtyp)
111 |                 if size is None:
112 |                     continue
113 |                 # Get the offset from the base register (which is additional to the base register's
114 |                 # delta).
115 |                 op_offset = None
116 |                 if op.type == idaapi.o_displ:
117 |                     op_offset = op.addr
118 |                 else: # op.type == idaapi.o_phrase
119 |                     op_offset_reg = op.specflag1 & 0xff
120 |                     op_offset = get_reg(op_offset_reg, CONST)
121 |                 if op_offset is None:
122 |                     continue
123 |                 # Record this access.
124 |                 offset = (delta + op_offset) & 0xffffffffffffffff
125 |                 _log(5, '\t\t{:x}  access({})  {}, {}', insn.ea, op.reg, offset, size)
126 |                 accesses[(offset, size)].add((insn.ea, delta))
127 |         # Update the set of registers pointing to the struct, and the set of known constant
128 |         # registers.
129 |         if (insn.itype == idaapi.ARM_mov
130 |                 and insn.Op1.type == idaapi.o_reg
131 |                 and insn.Op2.type == idaapi.o_reg
132 |                 and insn.Op3.type == idaapi.o_void
133 |                 and insn.Op1.dtyp == idaapi.dt_qword
134 |                 and insn.Op2.dtyp == idaapi.dt_qword
135 |                 and insn.Op2.reg in regs):
136 |             # MOV Xdst, Xsrc
137 |             _log(6, '\t\t{:x}  add {}={}', insn.ea, insn.Op1.reg, regs[insn.Op2.reg].value)
138 |             regs[insn.Op1.reg] = regs[insn.Op2.reg]
139 |         elif (insn.itype == idaapi.ARM_mov
140 |                 and insn.Op1.type == idaapi.o_reg
141 |                 and insn.Op2.type == idaapi.o_imm
142 |                 and insn.Op3.type == idaapi.o_void
143 |                 and insn.Op1.dtyp in (idaapi.dt_dword, idaapi.dt_qword)):
144 |             # MOV Xdst, #imm
145 |             _log(7, '\t\t{:x}  const {}={}', insn.ea, insn.Op1.reg, insn.Op2.value)
146 |             regs[insn.Op1.reg] = RegValue(CONST, insn.Op2.value)
147 |         elif (insn.itype == idaapi.ARM_add
148 |                 and insn.Op1.type == idaapi.o_reg
149 |                 and insn.Op2.type == idaapi.o_reg
150 |                 and insn.Op3.type == idaapi.o_imm
151 |                 and insn.Op4.type == idaapi.o_void
152 |                 and insn.Op1.dtyp == idaapi.dt_qword
153 |                 and insn.Op2.dtyp == idaapi.dt_qword
154 |                 and insn.Op2.reg in regs):
155 |             # ADD Xdst, Xsrc, #amt
156 |             op2 = regs[insn.Op2.reg]
157 |             _log(6, '\t\t{:x}  add {}={}+{}', insn.ea, insn.Op1.reg, op2.value, insn.Op3.value)
158 |             regs[insn.Op1.reg] = RegValue(op2.type, op2.value + insn.Op3.value)
159 |         elif (insn.itype == idaapi.ARM_bl or insn.itype == idaapi.ARM_blr):
160 |             # A function call (direct or indirect). Any correct compiler should generate code that
161 |             # does not use the temporary registers after a call, but just to be safe, clear all the
162 |             # temporary registers.
163 |             _log(6, '\t\t{:x}  clear temps', insn.ea)
164 |             for r in xrange(0, 19):
165 |                 regs.pop(getattr(idautils.procregs, 'X{}'.format(r)).reg, None)
166 |         else:
167 |             # This is an unrecognized instruction. Clear all the registers it modifies.
168 |             feature = insn.get_canon_feature()
169 |             # On Arm64, LDR-type instructions store their writeback behavior in the instructions's
170 |             # auxpref flags. As best I can tell, insn.get_canon_feature()'s CF_CHG* flags indicate
171 |             # whether the operand will change, which is different than the register changing for
172 |             # operands like o_displ that use a register to refer to a memory location. Thus, we
173 |             # actually need to special case auxpref and clear those registers. Fortunately,
174 |             # writeback behavior is only observed in o_displ operands, of which there should only
175 |             # ever be one, so it doesn't matter that auxpref is stored on the instruction and not
176 |             # the operand.
177 |             for op in insn.Operands:
178 |                 if op.type == idaapi.o_void:
179 |                     break
180 |                 if ((feature & _INSN_OP_CHG[op.n] and op.type == idaapi.o_reg)
181 |                         or (insn.auxpref & _ARM64_WRITEBACK and op.type == idaapi.o_displ)):
182 |                     _log(6, '\t\t{:x}  clear {}', insn.ea, op.reg)
183 |                     regs.pop(op.reg, None)
184 |     return { reg: rv.value for reg, rv in regs.items() if rv.type == DELTA }
185 | 
186 | def _pointer_accesses_data_flow(flow, initialization, accesses):
187 |     """Run the data flow for pointer_accesses."""
188 |     # bb_regs maps each block id to another map from register ids to corresponding struct offsets
189 |     # at the start of the block. We don't consider the case where a register could contain more
190 |     # than one possible offset.
191 |     bb_regs = { bb.id: {} for bb in flow }
192 |     # We'll start by processing those blocks that have an initial value.
193 |     queue = collections.deque()
194 |     _add_blocks_to_queue(queue, flow, initialization)
195 |     # Process each block, propagating its set of registers to its successors. This isn't quite a
196 |     # true data flow: We should run it until there are no more changes, then check the accesses
197 |     # conditions only once it's stabilized. The difference occurs when we've processed block A,
198 |     # which had register R with offset O on entry, then later found a block B that jumps back to
199 |     # block A with register R set to a different offset O'. Ideally we should invalidate the
200 |     # register R at the start of A and undo any accesses it generated. However, in practice the
201 |     # only way this will occur is in a loop, which is usually going to be a valid access to the
202 |     # structure on the first iteration. The case we're worried about is when the A->B->A loop
203 |     # cycles infinitely, giving us the (mistaken) impression that our structure is infinite. We can
204 |     # eliminate this possibility and also get better results if we just decline to update register
205 |     # R with offset O' after processing block A, effectively ignoring loops that increment an
206 |     # offset register.
207 |     while queue:
208 |         bb = queue.popleft()
209 |         entry_regs = bb_regs[bb.id]
210 |         _log(3, 'Basic block {}  {:x}-{:x}', bb.id, bb.startEA, bb.endEA)
211 |         _log(4, '\tregs@entry = {}', entry_regs)
212 |         exit_regs = _pointer_accesses_process_block(bb.startEA, bb.endEA, initialization,
213 |                 entry_regs, accesses)
214 |         _log(4, '\tregs@exit = {}', exit_regs)
215 |         _log(4, '\tsuccs = {}', [s.id for s in bb.succs()])
216 |         for succ in bb.succs():
217 |             # Add the registers at the end of the block to the registers at the start of its
218 |             # successors' blocks. This is a union since we will track accesses to any register
219 |             # that can point to the struct along any path. As discussed above, any register that
220 |             # already had an offset for a successor is ignored.
221 |             succ_regs = bb_regs[succ.id]
222 |             update = False
223 |             for reg in exit_regs:
224 |                 if reg not in succ_regs:
225 |                     update = True
226 |                     succ_regs[reg] = exit_regs[reg]
227 |             # If we added a new register, then we'll process the successor block (again).
228 |             if update:
229 |                 queue.append(succ)
230 | 
231 | def pointer_accesses(function=None, bounds=None, initialization=None, accesses=None):
232 |     """Collect the set of accesses to a pointer register.
233 | 
234 |     In the flow graph defined by the specified function or code region, find all accesses to the
235 |     memory region pointed to initially by the given register.
236 | 
237 |     Options:
238 |         function: The address of the function to analyze. Any address within the function may be
239 |             specified. Default is None.
240 |         bounds: A (start, end) tuple containing the start and end addresses of the code region to
241 |             analyze. Default is None.
242 |         initialization: A dictionary of dictionaries, specifying for each instruction start
243 |             address, which registers have which offsets into the memory region of interest. More
244 |             precisely: The keys of initialization are the linear addresses of those instructions
245 |             for which we know that some register points into the memory region of interest. For
246 |             each such instruction, initialization[address] is a map whose keys are the register
247 |             numbers of the registers that point into the memory region. Finally,
248 |             initialization[address][register] is the delta between the start of the memory region
249 |             and where the register points (positive values indicate the register points to a higher
250 |             address than the start). This option must be supplied.
251 |         accesses: If not None, then the given dictionary will be populated with the accesses,
252 |             rather than creating and returning a new dictionary. This dictionary must be of type
253 |             collections.defaultdict(set). Default is None.
254 | 
255 |     Returns:
256 |         If accesses is None (the default), returns a dictionary mapping each (offset, size) tuple
257 |         to the set of (address, delta) tuples that performed that access.
258 | 
259 |     Notes:
260 |         Either a function or a code region must be specified. You cannot supply both.
261 | 
262 |         A common use case is analyzing a function for which we know that one register on entry
263 |         points to a structure. For example, say that the function at address 0x4000 takes as an
264 |         argument in register 10 a pointer 144 bytes in to an unknown structure. The appropriate
265 |         initialization dictionary would be:
266 |             { 0x4000: { 10: 144 } }
267 |     """
268 |     # Create the FlowChart.
269 |     flow = _create_flow(function, bounds)
270 |     if flow is None:
271 |         return None
272 |     # Get the set of (offset, size) accesses by running a data flow.
273 |     create = accesses is None
274 |     if create:
275 |         accesses = collections.defaultdict(set)
276 |     _pointer_accesses_data_flow(flow, initialization, accesses)
277 |     if create:
278 |         accesses = dict(accesses)
279 |         return accesses
280 | 
281 | 


--------------------------------------------------------------------------------
/ida_kernelcache/collect_classes.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/collect_classes.py
  3 | # Brandon Azad
  4 | #
  5 | # Collects information about C++ classes in a kernelcache.
  6 | #
  7 | 
  8 | from collections import defaultdict
  9 | 
 10 | import idc
 11 | import idautils
 12 | import idaapi
 13 | 
 14 | import ida_utilities as idau
 15 | import classes
 16 | import segment
 17 | import symbol
 18 | import vtable
 19 | 
 20 | _log = idau.make_log(1, __name__)
 21 | 
 22 | # IDK where IDA defines these.
 23 | _MEMOP_PREINDEX  = 0x20
 24 | _MEMOP_POSTINDEX = 0x80
 25 | 
 26 | _MEMOP_WBINDEX   = _MEMOP_PREINDEX | _MEMOP_POSTINDEX
 27 | 
 28 | class _Regs(object):
 29 |     """A set of registers for _emulate_arm64."""
 30 | 
 31 |     class _Unknown:
 32 |         """A wrapper class indicating that the value is unknown."""
 33 |         def __add__(self, other):
 34 |             return _Regs.Unknown
 35 |         def __radd__(self, other):
 36 |             return _Regs.Unknown
 37 |         def __nonzero__(self):
 38 |             return False
 39 | 
 40 |     _reg_names = idautils.GetRegisterList()
 41 |     Unknown = _Unknown()
 42 | 
 43 |     def __init__(self):
 44 |         self.clearall()
 45 | 
 46 |     def clearall(self):
 47 |         self._regs = {}
 48 | 
 49 |     def clear(self, reg):
 50 |         try:
 51 |             del self._regs[self._reg(reg)]
 52 |         except KeyError:
 53 |             pass
 54 | 
 55 |     def _reg(self, reg):
 56 |         if isinstance(reg, (int, long)):
 57 |             reg = _Regs._reg_names[reg]
 58 |         return reg
 59 | 
 60 |     def __getitem__(self, reg):
 61 |         try:
 62 |             return self._regs[self._reg(reg)]
 63 |         except:
 64 |             return _Regs.Unknown
 65 | 
 66 |     def __setitem__(self, reg, value):
 67 |         if value is None or value is _Regs.Unknown:
 68 |             self.clear(self._reg(reg))
 69 |         else:
 70 |             self._regs[self._reg(reg)] = value & 0xffffffffffffffff
 71 | 
 72 | def _emulate_arm64(start, end, on_BL=None, on_RET=None):
 73 |     """A very basic partial Arm64 emulator that does just enough to find OSMetaClass
 74 |     information."""
 75 |     # Super basic emulation.
 76 |     reg = _Regs()
 77 |     def load(addr, dtyp):
 78 |         if not addr:
 79 |             return None
 80 |         if dtyp == idaapi.dt_qword:
 81 |             size = 8
 82 |         elif dtyp == idaapi.dt_dword:
 83 |             size = 4
 84 |         else:
 85 |             return None
 86 |         return idau.read_word(addr, size)
 87 |     def cleartemps():
 88 |         for t in ['X{}'.format(i) for i in range(0, 19)]:
 89 |             reg.clear(t)
 90 |     for insn in idau.Instructions(start, end):
 91 |         _log(11, 'Processing instruction {:#x}', insn.ea)
 92 |         mnem = insn.get_canon_mnem()
 93 |         if mnem == 'ADRP' or mnem == 'ADR':
 94 |             reg[insn.Op1.reg] = insn.Op2.value
 95 |         elif mnem == 'ADD' and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm:
 96 |             reg[insn.Op1.reg] = reg[insn.Op2.reg] + insn.Op3.value
 97 |         elif mnem == 'NOP':
 98 |             pass
 99 |         elif mnem == 'MOV' and insn.Op2.type == idc.o_imm:
100 |             reg[insn.Op1.reg] = insn.Op2.value
101 |         elif mnem == 'MOV' and insn.Op2.type == idc.o_reg:
102 |             reg[insn.Op1.reg] = reg[insn.Op2.reg]
103 |         elif mnem == 'RET':
104 |             if on_RET:
105 |                 on_RET(reg)
106 |             break
107 |         elif (mnem == 'STP' or mnem == 'LDP') and insn.Op3.type == idc.o_displ:
108 |             if insn.auxpref & _MEMOP_WBINDEX:
109 |                 reg[insn.Op3.reg] = reg[insn.Op3.reg] + insn.Op3.addr
110 |             if mnem == 'LDP':
111 |                 reg.clear(insn.Op1.reg)
112 |                 reg.clear(insn.Op2.reg)
113 |         elif (mnem == 'STR' or mnem == 'LDR') and not insn.auxpref & _MEMOP_WBINDEX:
114 |             if mnem == 'LDR':
115 |                 if insn.Op2.type == idc.o_displ:
116 |                     reg[insn.Op1.reg] = load(reg[insn.Op2.reg] + insn.Op2.addr, insn.Op1.dtyp)
117 |                 else:
118 |                     reg.clear(insn.Op1.reg)
119 |         elif mnem == 'BL' and insn.Op1.type == idc.o_near:
120 |             if on_BL:
121 |                 on_BL(insn.Op1.addr, reg)
122 |             cleartemps()
123 |         else:
124 |             _log(10, 'Unrecognized instruction at address {:#x}', insn.ea)
125 |             reg.clearall()
126 | 
127 | class _OneToOneMapFactory(object):
128 |     """A factory to extract the largest one-to-one submap."""
129 | 
130 |     def __init__(self):
131 |         self._as_to_bs = defaultdict(set)
132 |         self._bs_to_as = defaultdict(set)
133 | 
134 |     def add_link(self, a, b):
135 |         """Add a link between the two objects."""
136 |         self._as_to_bs[a].add(b)
137 |         self._bs_to_as[b].add(a)
138 | 
139 |     def _make_unique_oneway(self, xs_to_ys, ys_to_xs, bad_x=None):
140 |         """Internal helper to make one direction unique."""
141 |         for x, ys in xs_to_ys.items():
142 |             if len(ys) != 1:
143 |                 if bad_x:
144 |                     bad_x(x, ys)
145 |                 del xs_to_ys[x]
146 |                 for y in ys:
147 |                     del ys_to_xs[y]
148 | 
149 |     def _build_oneway(self, xs_to_ys):
150 |         """Build a one-way mapping after pruning."""
151 |         x_to_y = dict()
152 |         for x, ys in xs_to_ys.items():
153 |             x_to_y[x] = next(iter(ys))
154 |         return x_to_y
155 | 
156 |     def build(self, bad_a=None, bad_b=None):
157 |         """Extract the smallest one-to-one submap."""
158 |         as_to_bs = dict(self._as_to_bs)
159 |         bs_to_as = dict(self._bs_to_as)
160 |         self._make_unique_oneway(as_to_bs, bs_to_as, bad_a)
161 |         self._make_unique_oneway(bs_to_as, as_to_bs, bad_b)
162 |         return self._build_oneway(as_to_bs)
163 | 
164 | def _process_mod_init_func_for_metaclasses(func, found_metaclass):
165 |     """Process a function from the __mod_init_func section for OSMetaClass information."""
166 |     _log(4, 'Processing function {}', idc.GetFunctionName(func))
167 |     def on_BL(addr, reg):
168 |         X0, X1, X3 = reg['X0'], reg['X1'], reg['X3']
169 |         if not (X0 and X1 and X3):
170 |             return
171 |         _log(5, 'Have call to {:#x}({:#x}, {:#x}, ?, {:#x})', addr, X0, X1, X3)
172 |         # OSMetaClass::OSMetaClass(this, className, superclass, classSize)
173 |         if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0):
174 |             return
175 |         found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None)
176 |     _emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL)
177 | 
178 | def _process_mod_init_func_section_for_metaclasses(segstart, found_metaclass):
179 |     """Process a __mod_init_func section for OSMetaClass information."""
180 |     segend = idc.SegEnd(segstart)
181 |     for func in idau.ReadWords(segstart, segend):
182 |         _process_mod_init_func_for_metaclasses(func, found_metaclass)
183 | 
184 | def _should_process_segment(seg, segname):
185 |     """Check if we should process the specified segment."""
186 |     return segname.endswith('__DATA_CONST.__mod_init_func') or \
187 |             segname == '__DATA.__kmod_init'
188 | 
189 | def _collect_metaclasses():
190 |     """Collect OSMetaClass information from all kexts in the kernelcache."""
191 |     # Collect associations from class names to metaclass instances and vice versa.
192 |     metaclass_to_classname_builder = _OneToOneMapFactory()
193 |     metaclass_to_class_size      = dict()
194 |     metaclass_to_meta_superclass = dict()
195 |     def found_metaclass(metaclass, classname, class_size, meta_superclass):
196 |         metaclass_to_classname_builder.add_link(metaclass, classname)
197 |         metaclass_to_class_size[metaclass]      = class_size
198 |         metaclass_to_meta_superclass[metaclass] = meta_superclass
199 |     for ea in idautils.Segments():
200 |         segname = idc.SegName(ea)
201 |         if not _should_process_segment(ea, segname):
202 |             continue
203 |         _log(2, 'Processing segment {}', segname)
204 |         _process_mod_init_func_section_for_metaclasses(ea, found_metaclass)
205 |     # Filter out any class name (and its associated metaclasses) that has multiple metaclasses.
206 |     # This can happen when multiple kexts define a class but only one gets loaded.
207 |     def bad_classname(classname, metaclasses):
208 |         _log(0, 'Class {} has multiple metaclasses: {}', classname,
209 |                 ', '.join(['{:#x}'.format(mc) for mc in metaclasses]))
210 |     # Filter out any metaclass (and its associated class names) that has multiple class names. I
211 |     # have no idea why this would happen.
212 |     def bad_metaclass(metaclass, classnames):
213 |         _log(0, 'Metaclass {:#x} has multiple classes: {}', metaclass,
214 |                 ', '.join(classnames))
215 |     # Return the final dictionary of metaclass info.
216 |     metaclass_to_classname = metaclass_to_classname_builder.build(bad_metaclass, bad_classname)
217 |     metaclass_info = dict()
218 |     for metaclass, classname in metaclass_to_classname.items():
219 |         meta_superclass = metaclass_to_meta_superclass[metaclass]
220 |         superclass_name = metaclass_to_classname.get(meta_superclass, None)
221 |         metaclass_info[metaclass] = classes.ClassInfo(classname, metaclass, None, None,
222 |                 metaclass_to_class_size[metaclass], superclass_name, meta_superclass)
223 |     return metaclass_info
224 | 
225 | _VTABLE_GETMETACLASS    = vtable.VTABLE_OFFSET + 7
226 | _MAX_GETMETACLASS_INSNS = 3
227 | 
228 | def _get_vtable_metaclass(vtable_addr, metaclass_info):
229 |     """Simulate the getMetaClass method of the vtable and check if it returns an OSMetaClass."""
230 |     getMetaClass = idau.read_word(vtable_addr + _VTABLE_GETMETACLASS * idau.WORD_SIZE)
231 |     def on_RET(reg):
232 |         on_RET.ret = reg['X0']
233 |     on_RET.ret = None
234 |     _emulate_arm64(getMetaClass, getMetaClass + idau.WORD_SIZE * _MAX_GETMETACLASS_INSNS,
235 |             on_RET=on_RET)
236 |     if on_RET.ret in metaclass_info:
237 |         return on_RET.ret
238 | 
239 | def _process_const_section_for_vtables(segstart, metaclass_info, found_vtable):
240 |     """Process a __const section to search for virtual method tables."""
241 |     segend = idc.SegEnd(segstart)
242 |     addr = segstart
243 |     while addr < segend:
244 |         possible, length = vtable.vtable_length(addr, segend, scan=True)
245 |         if possible:
246 |             metaclass = _get_vtable_metaclass(addr, metaclass_info)
247 |             if metaclass:
248 |                 _log(4, 'Vtable at address {:#x} has metaclass {:#x}', addr, metaclass)
249 |                 found_vtable(metaclass, addr, length)
250 |         addr += length * idau.WORD_SIZE
251 | 
252 | def _collect_vtables(metaclass_info):
253 |     """Use OSMetaClass information to search for virtual method tables."""
254 |     # Build a mapping from OSMetaClass instances to virtual method tables.
255 |     metaclass_to_vtable_builder = _OneToOneMapFactory()
256 |     vtable_lengths = {}
257 |     # Define a callback for when we find a vtable.
258 |     def found_vtable(metaclass, vtable, length):
259 |         # Add our vtable length.
260 |         vtable_lengths[vtable] = length
261 |         # If our classname has a defined vtable symbol and that symbol's address isn't this vtable,
262 |         # don't add the link.
263 |         classname = metaclass_info[metaclass].classname
264 |         proper_vtable_symbol = symbol.vtable_symbol_for_class(classname)
265 |         proper_vtable_symbol_ea = idau.get_name_ea(proper_vtable_symbol)
266 |         if proper_vtable_symbol_ea not in (idc.BADADDR, vtable):
267 |             return
268 |         # If our vtable has a symbol and it doesn't match the metaclass, skip adding a link.
269 |         vtable_symbol = idau.get_ea_name(vtable, user=True)
270 |         if vtable_symbol:
271 |             vtable_classname = symbol.vtable_symbol_get_class(vtable_symbol)
272 |             if vtable_classname != classname:
273 |                 _log(2, 'Declining association between metaclass {:x} ({}) and vtable {:x} ({})',
274 |                         metaclass, classname, vtable, vtable_classname)
275 |                 return
276 |         # Add a link if they are in the same kext.
277 |         if segment.kernelcache_kext(metaclass) == segment.kernelcache_kext(vtable):
278 |             metaclass_to_vtable_builder.add_link(metaclass, vtable)
279 |     # Process all the segments with found_vtable().
280 |     for ea in idautils.Segments():
281 |         segname = idc.SegName(ea)
282 |         if not segname.endswith('__DATA_CONST.__const'):
283 |             continue
284 |         _log(2, 'Processing segment {}', segname)
285 |         _process_const_section_for_vtables(ea, metaclass_info, found_vtable)
286 |     # If a metaclass has multiple vtables, that's really weird, unless the metaclass is
287 |     # OSMetaClass's metaclass. In that case all OSMetaClass subclasses will have their vtables
288 |     # refer back to OSMetaClass's metaclass.
289 |     def bad_metaclass(metaclass, vtables):
290 |         metaclass_name = metaclass_info[metaclass].classname
291 |         if metaclass_name != 'OSMetaClass':
292 |             vtinfo = ['{:#x}'.format(vt) for vt in vtables]
293 |             _log(0, 'Metaclass {:#x} ({}) has multiple vtables: {}', metaclass,
294 |                     metaclass_name, ', '.join(vtinfo))
295 |     # If a vtable has multiple metaclasses, that's really weird.
296 |     def bad_vtable(vtable, metaclasses):
297 |         mcinfo = ['{:#x} ({})'.format(mc, metaclass_info[mc].classname) for mc in metaclasses]
298 |         _log(0, 'Vtable {:#x} has multiple metaclasses: {}', vtable, ', '.join(mcinfo))
299 |     metaclass_to_vtable = metaclass_to_vtable_builder.build(bad_metaclass, bad_vtable)
300 |     # The resulting mapping may have fewer metaclasses than metaclass_info.
301 |     class_info = dict()
302 |     for metaclass, classinfo in metaclass_info.items():
303 |         # Add the vtable and its length, which we didn't have earlier. If the current class doesn't
304 |         # have a vtable, take it from the superclass (recursing if necessary).
305 |         metaclass_with_vtable = metaclass
306 |         while metaclass_with_vtable:
307 |             vtable = metaclass_to_vtable.get(metaclass_with_vtable, None)
308 |             if vtable:
309 |                 classinfo.vtable        = vtable
310 |                 classinfo.vtable_length = vtable_lengths[vtable]
311 |                 break
312 |             classinfo_with_vtable = metaclass_info.get(metaclass_with_vtable, None)
313 |             if not classinfo_with_vtable:
314 |                 break
315 |             metaclass_with_vtable = classinfo_with_vtable.meta_superclass
316 |         # Set the superclass field and add the current classinfo to the superclass's children. This
317 |         # is safe since this is the last filtering operation.
318 |         superclass = metaclass_info.get(classinfo.meta_superclass, None)
319 |         if superclass:
320 |             classinfo.superclass = metaclass_info[classinfo.meta_superclass]
321 |             classinfo.superclass.subclasses.add(classinfo)
322 |         # Add the classinfo to the final dictionary.
323 |         class_info[classinfo.classname] = classinfo
324 |     return class_info, vtable_lengths
325 | 
326 | def _check_filetype(filetype):
327 |     """Checks that the filetype is compatible before trying to process it."""
328 |     return 'Mach-O' in filetype and 'ARM64' in filetype
329 | 
330 | def collect_class_info_internal():
331 |     """Collect information about C++ classes defined in a kernelcache.
332 | 
333 |     Arm64 only.
334 |     """
335 |     filetype = idaapi.get_file_type_name()
336 |     if not _check_filetype(filetype):
337 |         _log(-1, 'Bad file type "{}"', filetype)
338 |         return None
339 |     _log(1, 'Collecting information about OSMetaClass instances')
340 |     metaclass_info = _collect_metaclasses()
341 |     if not metaclass_info:
342 |         _log(-1, 'Could not collect OSMetaClass instances')
343 |         return None
344 |     _log(1, 'Searching for virtual method tables')
345 |     class_info, all_vtables = _collect_vtables(metaclass_info)
346 |     if not class_info:
347 |         _log(-1, 'Could not collect virtual method tables')
348 |         return None
349 |     _log(1, 'Done')
350 |     return class_info, all_vtables
351 | 
352 | 


--------------------------------------------------------------------------------
/ida_kernelcache/vtable.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/vtable.py
  3 | # Brandon Azad
  4 | #
  5 | # Functions for analyzing and symbolicating vtables in the kernelcache.
  6 | #
  7 | 
  8 | from itertools import islice, takewhile
  9 | 
 10 | import idc
 11 | import idautils
 12 | 
 13 | from symbol import vtable_symbol_for_class
 14 | import ida_utilities as idau
 15 | import classes
 16 | import stub
 17 | 
 18 | _log = idau.make_log(0, __name__)
 19 | 
 20 | VTABLE_OFFSET      =  2
 21 | """The first few entries of the virtual method tables in the kernelcache are empty."""
 22 | MIN_VTABLE_METHODS = 12
 23 | """The minimum number of methods in a virtual method table."""
 24 | MIN_VTABLE_LENGTH  = VTABLE_OFFSET + MIN_VTABLE_METHODS
 25 | """The minimum length of a virtual method table in words, including the initial empty entries."""
 26 | 
 27 | def vtable_length(ea, end=None, scan=False):
 28 |     """Find the length of a virtual method table.
 29 | 
 30 |     This function checks whether the effective address could correspond to a virtual method table
 31 |     and calculates its length, including the initial empty entries. By default (when scan is
 32 |     False), this function returns the length of the vtable if the address could correspond to a
 33 |     vtable, or 0 if the address definitely could not be a vtable.
 34 | 
 35 |     Arguments:
 36 |         ea: The linear address of the start of the vtable.
 37 | 
 38 |     Options:
 39 |         end: The end address to search through. Defaults to the end of the section.
 40 |         scan: Set to True to indicate that this function is being called to scan memory for virtual
 41 |             method tables. Instead of returning the length of the vtable or 0, this function will
 42 |             return a tuple (possible, length). Additionally, as a slight optimization, this
 43 |             function will sometimes look ahead in order to increase the amount of data that can be
 44 |             skipped, reducing duplication of effort between subsequent calls.
 45 | 
 46 |     Returns:
 47 |         If scan is False (the default), then this function returns the length of the vtable in
 48 |         words, including the initial empty entries.
 49 | 
 50 |         Otherwise, this function returns a tuple (possible, length). If the address could
 51 |         correspond to the start of a vtable, then possible is True and length is the length of the
 52 |         vtable in words, including the initial empty entries. Otherwise, if the address is
 53 |         definitely not the start of a vtable, then possible is False and length is the number of
 54 |         words that can be skipped when searching for the next vtable.
 55 |     """
 56 |     # TODO: This function should be reorganized. The better way of doing it is to count the number
 57 |     # of zero entries, then the number of nonzero entries, then decide based on that. Less
 58 |     # special-casing that way.
 59 |     # TODO: We should have a static=True/False flag to indicate whether we want to include the
 60 |     # empty entries.
 61 |     def return_value(possible, length):
 62 |         if scan:
 63 |             return possible, length
 64 |         return length if possible else 0
 65 |     # Initialize default values.
 66 |     if end is None:
 67 |         end = idc.SegEnd(ea)
 68 |     words = idau.ReadWords(ea, end)
 69 |     # Iterate through the first VTABLE_OFFSET words. If any of them are nonzero, then we can skip
 70 |     # past all the words we just saw.
 71 |     for idx, word in enumerate(islice(words, VTABLE_OFFSET)):
 72 |         if word != 0:
 73 |             return return_value(False, idx + 1)
 74 |     # Now this first word after the padding section is special.
 75 |     first = next(words, None)
 76 |     if first is None:
 77 |         # We have 2 zeros followed by the end of our range.
 78 |         return return_value(False, VTABLE_OFFSET)
 79 |     elif first == 0:
 80 |         # We have VTABLE_OFFSET + 1 zero entries.
 81 |         zeros = VTABLE_OFFSET + 1
 82 |         if scan:
 83 |             # To avoid re-reading the data we just read in the case of a zero-filled section, let's
 84 |             # look ahead a bit until we find the first non-zero value.
 85 |             for word in words:
 86 |                 if word is None:
 87 |                     return return_value(False, zeros)
 88 |                 if word != 0:
 89 |                     break
 90 |                 zeros += 1
 91 |             else:
 92 |                 # We found no nonzero words before the end.
 93 |                 return return_value(False, zeros)
 94 |         # We can skip all but the last VTABLE_OFFSET zeros.
 95 |         return return_value(False, zeros - VTABLE_OFFSET)
 96 |     # TODO: We should verify that all vtable entries refer to code.
 97 |     # Now we know that we have at least one nonzero value, our job is easier. Get the full length
 98 |     # of the vtable, including the first VTABLE_OFFSET entries and the subsequent nonzero entries,
 99 |     # until either we find a zero word (not included) or run out of words in the stream.
100 |     length = VTABLE_OFFSET + 1 + idau.iterlen(takewhile(lambda word: word != 0, words))
101 |     # Now it's simple: We are valid if the length is long enough, invalid if it's too short.
102 |     return return_value(length >= MIN_VTABLE_LENGTH, length)
103 | 
104 | def convert_vtable_to_offsets(vtable, length=None):
105 |     """Convert a vtable into a sequence of offsets.
106 | 
107 |     Arguments:
108 |         vtable: The address of the virtual method table.
109 | 
110 |     Options:
111 |         length: The length of the vtable, if known.
112 | 
113 |     Returns:
114 |         True if the data was successfully converted into offsets.
115 |     """
116 |     if length is None:
117 |         length = vtable_length(vtable)
118 |     if not length:
119 |         _log(0, 'Address {:#x} is not a vtable', vtable)
120 |         return False
121 |     successful = True
122 |     for address in idau.Addresses(vtable, length=length, step=idau.WORD_SIZE):
123 |         if not idc.OpOff(address, 0, 0):
124 |             _log(0, 'Could not change address {:#x} into an offset', address)
125 |             successful = False
126 |     return successful
127 | 
128 | def _convert_vtable_methods_to_functions(vtable, length):
129 |     """Convert each virtual method in the vtable into an IDA function."""
130 |     for vmethod in vtable_methods(vtable, length=length):
131 |         if not idau.force_function(vmethod):
132 |             _log(0, 'Could not convert virtual method {:#x} into a function', vmethod)
133 | 
134 | def initialize_vtables():
135 |     """Convert vtables into offsets and ensure that virtual methods are IDA functions."""
136 |     classes.collect_class_info()
137 |     for vtable, length in classes.vtables.items():
138 |         if not convert_vtable_to_offsets(vtable, length):
139 |             _log(0, 'Could not convert vtable at address {:x} into offsets', vtable)
140 |         _convert_vtable_methods_to_functions(vtable, length)
141 | 
142 | def add_vtable_symbol(vtable, classname):
143 |     """Add a symbol for the virtual method table at the specified address.
144 | 
145 |     Arguments:
146 |         vtable: The address of the virtual method table.
147 |         classname: The name of the C++ class with this virtual method table.
148 | 
149 |     Returns:
150 |         True if the data was successfully converted into a vtable and the symbol was added.
151 |     """
152 |     vtable_symbol = vtable_symbol_for_class(classname)
153 |     if not idau.set_ea_name(vtable, vtable_symbol):
154 |         _log(0, 'Address {:#x} already has name {} instead of vtable symbol {}'
155 |                 .format(vtable, idau.get_ea_name(vtable), vtable_symbol))
156 |         return False
157 |     return True
158 | 
159 | def initialize_vtable_symbols():
160 |     """Populate IDA with virtual method table symbols for an iOS kernelcache."""
161 |     classes.collect_class_info()
162 |     for classname, classinfo in classes.class_info.items():
163 |         if classinfo.vtable:
164 |             _log(3, 'Class {} has vtable at {:#x}', classname, classinfo.vtable)
165 |             if not add_vtable_symbol(classinfo.vtable, classname):
166 |                 _log(0, 'Could not add vtable symbol for class {} at address {:#x}', classname,
167 |                         classinfo.vtable)
168 |         else:
169 |             _log(0, 'Class {} has no known vtable', classname)
170 | 
171 | def class_vtable_method(classinfo, index):
172 |     """Get the virtual method for a class by index.
173 | 
174 |     Arguments:
175 |         classinfo: The class information of the class.
176 |         index: The index of the virtual method, skipping the empty entries (that is, the first
177 |             virtual method is at index 0).
178 |     """
179 |     # Get the vtable for the class.
180 |     methods = classinfo.vtable_methods
181 |     count = classinfo.vtable_nmethods
182 |     if index >= count:
183 |         return None
184 |     return idau.read_word(methods + index * idau.WORD_SIZE)
185 | 
186 | def vtable_methods(vtable, start=VTABLE_OFFSET, length=None, nmethods=None):
187 |     """Get the methods in a virtual method table.
188 | 
189 |     A generator that returns each method in the virtual method table. The initial empty entries are
190 |     skipped.
191 | 
192 |     Arguments:
193 |         vtable: The address of the virtual method table. (This includes the initial empty entries.)
194 | 
195 |     Options:
196 |         start: The index at which to start returning values. All prior indexes
197 |             are skipped. Default is VTABLE_OFFSET, meaning the initial empty
198 |             entries will be skipped.
199 |         length: The length of the vtable, including the initial empty entries. Specify this value
200 |             to read the entire vtable if the length is already known.
201 |         nmethods: The number of methods to read, excluding the initial empty entries. If None, the
202 |             whole vtable will be read. Default is None.
203 |     """
204 |     assert vtable
205 |     # Get the length of the vtable.
206 |     if nmethods is not None:
207 |         length = nmethods + VTABLE_OFFSET
208 |     elif length is None:
209 |         length = vtable_length(vtable)
210 |     # Read the methods.
211 |     for i in xrange(start, length):
212 |         yield idau.read_word(vtable + i * idau.WORD_SIZE)
213 | 
214 | def class_vtable_methods(classinfo, nmethods=None, new=False):
215 |     """Get the methods in a virtual method table for a class.
216 | 
217 |     A generator that returns each method in the virtual method table. The initial empty entries are
218 |     skipped.
219 | 
220 |     Arguments:
221 |         classinfo: The ClassInfo object describing the class.
222 | 
223 |     Options:
224 |         nmethods: The number of methods to read, excluding the initial empty entries. If None, the
225 |             whole vtable will be read. Default is None.
226 |         new: If True, only return methods not defined in the superclass. Default is False.
227 |     """
228 |     if not classinfo.vtable:
229 |         return []
230 |     if new and classinfo.superclass:
231 |         start = classinfo.superclass.vtable_length
232 |     else:
233 |         start = VTABLE_OFFSET
234 |     return vtable_methods(classinfo.vtable, start=start, length=classinfo.vtable_length,
235 |             nmethods=nmethods)
236 | 
237 | def vtable_overrides(class_vtable, super_vtable, class_vlength=None, super_vlength=None,
238 |         new=False, methods=False):
239 |     """Get the overrides of a virtual method table.
240 | 
241 |     A generator that returns the index of each override in the virtual method table. The initial
242 |     empty entries are skipped, so the first virtual method is at index 0.
243 | 
244 |     Arguments:
245 |         class_vtable: The vtable of the class.
246 |         super_vtable: The vtable of the ancestor to compare against for overrides.
247 | 
248 |     Options:
249 |         class_vlength: The length of class_vtable. If None, it will be calculated.
250 |         super_vlength: The length of super_vtable. If None, it will be calculated.
251 |         new: If True, include new virtual methods not present in the superclass. Default is False.
252 |         methods: If True, then the generator will produce a tuple containing the index, the
253 |             overridden method in the subclass, and the original method in the superclas, rather
254 |             than just the index. Default is False.
255 |     """
256 |     assert class_vtable
257 |     # Get the vtable lengths.
258 |     if class_vlength is None:
259 |         class_vlength = vtable_length(class_vtable)
260 |     if super_vlength is None:
261 |         super_vlength = vtable_length(super_vtable)
262 |     assert class_vlength >= super_vlength >= 0
263 |     # Skip the first VTABLE_OFFSET entries.
264 |     class_vtable  += VTABLE_OFFSET * idau.WORD_SIZE
265 |     super_vtable  += VTABLE_OFFSET * idau.WORD_SIZE
266 |     class_vlength -= VTABLE_OFFSET
267 |     super_vlength -= VTABLE_OFFSET
268 |     # How many methods are we iterating over?
269 |     if new:
270 |         nmethods = class_vlength
271 |     else:
272 |         nmethods = super_vlength
273 |     # Iterate through the methods.
274 |     for i in xrange(nmethods):
275 |         # Read the old method.
276 |         super_method = None
277 |         if i < super_vlength:
278 |             super_method = idau.read_word(super_vtable + i * idau.WORD_SIZE)
279 |         # Read the new method. (It's always in range.)
280 |         class_method = idau.read_word(class_vtable + i * idau.WORD_SIZE)
281 |         # If they're different, yield.
282 |         if class_method != super_method:
283 |             if methods:
284 |                 yield i, class_method, super_method
285 |             else:
286 |                 yield i
287 | 
288 | def class_vtable_overrides(classinfo, superinfo=None, new=False, methods=False):
289 |     """Get the overrides of a virtual method table for a class.
290 | 
291 |     A generator that returns the index of each override in the virtual method table. The initial
292 |     empty entries are skipped, so the first virtual method is at index 0.
293 | 
294 |     Arguments:
295 |         classinfo: The ClassInfo of the class to inspect.
296 | 
297 |     Options:
298 |         superinfo: The ClassInfo of the ancestor to compare against for overrides. If None, then
299 |             the ClassInfo of the direct superclass will be used. Default is None.
300 |         new: If True, include new virtual methods not present in the superclass. Default is False.
301 |         methods: If True, then the generator will produce a tuple containing the index, the
302 |             overridden method in the subclass, and the original method in the superclas, rather
303 |             than just the index. Default is False.
304 |     """
305 |     if not classinfo.vtable:
306 |         return
307 |     # Get the correct superinfo.
308 |     if superinfo is None:
309 |         # Default to the superclass, but if there isn't one, there's nothing to do.
310 |         superinfo = classinfo.superclass
311 |         if not superinfo and not new:
312 |             return
313 |     else:
314 |         if superinfo not in classinfo.ancestors():
315 |             raise ValueError('Invalid arguments: classinfo={}, superinfo={}'.format(classinfo,
316 |                 superinfo))
317 |     # Get the vtable for the class.
318 |     class_vtable = classinfo.vtable
319 |     class_vlength = classinfo.vtable_length
320 |     # Get the vtable for the superclass.
321 |     if superinfo:
322 |         super_vtable = superinfo.vtable
323 |         super_vlength = superinfo.vtable_length
324 |         assert class_vlength >= super_vlength
325 |     else:
326 |         super_vtable = 0
327 |         super_vlength = 0
328 |     # Run the generator.
329 |     for x in vtable_overrides(class_vtable, super_vtable, class_vlength=class_vlength,
330 |             super_vlength=super_vlength, new=new, methods=methods):
331 |         yield x
332 | 
333 | def class_from_vtable_method_symbol(method_symbol):
334 |     """Get the base class in a vtable method symbol.
335 | 
336 |     Extract the name of the base class from a canonical method symbol.
337 |     """
338 |     demangled = idc.Demangle(method_symbol, idc.GetLongPrm(idc.INF_SHORT_DN))
339 |     if not demangled:
340 |         return None
341 |     classname = demangled.split('::', 1)[0]
342 |     if classname == demangled:
343 |         return None
344 |     return classname
345 | 
346 | def _vtable_method_symbol_substitute_class(method_symbol, new_class, old_class=None):
347 |     """Create a new method symbol by substituting the class to which the method belongs."""
348 |     # TODO: This is wrong when the class name is repeated!
349 |     if not old_class:
350 |         old_class = class_from_vtable_method_symbol(method_symbol)
351 |         if not old_class:
352 |             return None
353 |     old_class_part = '{}{}'.format(len(old_class), old_class)
354 |     new_class_part = '{}{}'.format(len(new_class), new_class)
355 |     if old_class_part not in method_symbol:
356 |         return None
357 |     return method_symbol.replace(old_class_part, new_class_part, 1)
358 | 
359 | _ignore_vtable_methods = (
360 |     '___cxa_pure_virtual'
361 | )
362 | 
363 | def _ok_to_rename_method(override, name):
364 |     """Some method names are ok to rename."""
365 |     return (name.startswith('j_') and idau.iterlen(idautils.XrefsTo(override)) == 1)
366 | 
367 | def _bad_name_dont_use_as_override(name):
368 |     """Some names shouldn't propagate into vtable symbols."""
369 |     # Ignore jumps and stubs and fixed known special values.
370 |     return (name.startswith('j_') or stub.symbol_references_stub(name)
371 |             or name in _ignore_vtable_methods)
372 | 
373 | def _symbolicate_overrides_for_classinfo(classinfo, processed):
374 |     """A recursive function to symbolicate vtable overrides for a class and its superclasses."""
375 |     # If we've already been processed, stop.
376 |     if classinfo in processed:
377 |         return
378 |     # First propagate symbol information to our superclass.
379 |     if classinfo.superclass:
380 |         _symbolicate_overrides_for_classinfo(classinfo.superclass, processed)
381 |     # Now symbolicate the superclass.
382 |     for _, override, original in class_vtable_overrides(classinfo, methods=True):
383 |         # Skip this method if the override already has a name and we can't rename it.
384 |         override_name = idau.get_ea_name(override, user=True)
385 |         if override_name and not _ok_to_rename_method(override, override_name):
386 |             continue
387 |         # Skip this method if the original does not have a name or if it's a bad name.
388 |         original_name = idau.get_ea_name(original, user=True)
389 |         if not original_name or _bad_name_dont_use_as_override(original_name):
390 |             continue
391 |         # Get the new override name if we substitute for the override class's name.
392 |         new_name = _vtable_method_symbol_substitute_class(original_name, classinfo.classname)
393 |         if not new_name:
394 |             _log(0, 'Could not substitute class {} into method symbol {} for override {:#x}',
395 |                     classinfo.classname, original_name, override)
396 |             continue
397 |         # Now that we have the new name, set it.
398 |         if override_name:
399 |             _log(2, 'Renaming {} -> {}', override_name, new_name)
400 |         if not idau.set_ea_name(override, new_name, rename=True):
401 |             _log(0, 'Could not set name {} for method {:#x}', new_name, override)
402 |     # We're done.
403 |     processed.add(classinfo)
404 | 
405 | def initialize_vtable_method_symbols():
406 |     """Symbolicate overridden methods in a virtual method table.
407 | 
408 |     Propagate symbol names from the virtual method tables of the base classes.
409 |     """
410 |     processed = set()
411 |     classes.collect_class_info()
412 |     for classinfo in classes.class_info.values():
413 |         _symbolicate_overrides_for_classinfo(classinfo, processed)
414 | 
415 | 


--------------------------------------------------------------------------------
/ida_kernelcache/ida_utilities.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/ida_utilities.py
  3 | # Brandon Azad
  4 | #
  5 | # Some utility functions to make working with IDA easier.
  6 | #
  7 | 
  8 | from collections import deque
  9 | 
 10 | import idc
 11 | import idautils
 12 | import idaapi
 13 | 
 14 | def make_log(log_level, module):
 15 |     """Create a logging function."""
 16 |     def log(level, *args):
 17 |         if len(args) == 0:
 18 |             return level <= log.level
 19 |         if level <= log.level:
 20 |             print module + ': ' + args[0].format(*args[1:])
 21 |     log.level = log_level
 22 |     return log
 23 | 
 24 | _log = make_log(1, __name__)
 25 | 
 26 | WORD_SIZE = 0
 27 | """The size of a word on the current platform."""
 28 | 
 29 | BIG_ENDIAN = False
 30 | """Whether the current platform is big endian."""
 31 | 
 32 | LITTLE_ENDIAN = True
 33 | """Whether the current platform is little-endian. Always the opposite of BIG_ENDIAN."""
 34 | 
 35 | def _initialize():
 36 |     # https://reverseengineering.stackexchange.com/questions/11396/how-to-get-the-cpu-architecture-via-idapython
 37 |     global WORD_SIZE, LITTLE_ENDIAN, BIG_ENDIAN
 38 |     info = idaapi.get_inf_structure()
 39 |     if info.is_64bit():
 40 |         WORD_SIZE = 8
 41 |     elif info.is_32bit():
 42 |         WORD_SIZE = 4
 43 |     else:
 44 |         WORD_SIZE = 2
 45 |     try:
 46 |         BIG_ENDIAN = info.is_be()
 47 |     except:
 48 |         BIG_ENDIAN = info.mf
 49 |     LITTLE_ENDIAN = not BIG_ENDIAN
 50 | 
 51 | _initialize()
 52 | 
 53 | def iterlen(iterator):
 54 |     """Consume an iterator and return its length."""
 55 |     return sum(1 for _ in iterator)
 56 | 
 57 | class AlignmentError(Exception):
 58 |     """An exception that is thrown if an address with improper alignment is encountered."""
 59 |     def __init__(self, address):
 60 |         self.address = address
 61 |     def __str__(self):
 62 |         return repr(self.address)
 63 | 
 64 | def is_mapped(ea, size=1, value=True):
 65 |     """Check if the given address is mapped.
 66 | 
 67 |     Specify a size greater than 1 to check if an address range is mapped.
 68 | 
 69 |     Arguments:
 70 |         ea: The linear address to check.
 71 | 
 72 |     Options:
 73 |         size: The number of bytes at ea to check. Default is 1.
 74 |         value: Only consider an address mapped if it has a value. For example, the contents of a
 75 |             bss section exist but don't have a static value. If value is False, consider such
 76 |             addresses as mapped. Default is True.
 77 | 
 78 |     Notes:
 79 |         This function is currently a hack: It only checks the first and last byte.
 80 |     """
 81 |     if size < 1:
 82 |         raise ValueError('Invalid argument: size={}'.format(size))
 83 |     # HACK: We only check the first and last byte, not all the bytes in between.
 84 |     if value:
 85 |         return idc.isLoaded(ea) and (size == 1 or idc.isLoaded(ea + size - 1))
 86 |     else:
 87 |         return idaapi.getseg(ea) and (size == 1 or idaapi.getseg(ea + size - 1))
 88 | 
 89 | def get_name_ea(name, fromaddr=idc.BADADDR):
 90 |     """Get the address of a name.
 91 | 
 92 |     This function returns the linear address associated with the given name.
 93 | 
 94 |     Arguments:
 95 |         name: The name to look up.
 96 | 
 97 |     Options:
 98 |         fromaddr: The referring address. Default is BADADDR. Some addresses have a
 99 |             location-specific name (for example, labels within a function). If fromaddr is not
100 |             BADADDR, then this function will try to retrieve the address of the name from
101 |             fromaddr's perspective. If name is not a local name, its address as a global name will
102 |             be returned.
103 | 
104 |     Returns:
105 |         The address of the name or BADADDR.
106 |     """
107 |     return idc.LocByNameEx(fromaddr, name)
108 | 
109 | def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False):
110 |     """Get the name of an address.
111 | 
112 |     This function returns the name associated with the byte at the specified address.
113 | 
114 |     Arguments:
115 |         ea: The linear address whose name to find.
116 | 
117 |     Options:
118 |         fromaddr: The referring address. Default is BADADDR. Some addresses have a
119 |             location-specific name (for example, labels within a function). If fromaddr is not
120 |             BADADDR, then this function will try to retrieve the name of ea from fromaddr's
121 |             perspective. The global name will be returned if no location-specific name is found.
122 |         true: Retrieve the true name rather than the display name. Default is False.
123 |         user: Return "" if the name is not a user name.
124 | 
125 |     Returns:
126 |         The name of the address or "".
127 |     """
128 |     if user and not idc.hasUserName(idc.GetFlags(ea)):
129 |         return ""
130 |     if true:
131 |         return idc.GetTrueNameEx(fromaddr, ea)
132 |     else:
133 |         return idc.NameEx(fromaddr, ea)
134 | 
135 | def set_ea_name(ea, name, rename=False, auto=False):
136 |     """Set the name of an address.
137 | 
138 |     Arguments:
139 |         ea: The address to name.
140 |         name: The new name of the address.
141 | 
142 |     Options:
143 |         rename: If rename is False, and if the address already has a name, and if that name differs
144 |             from the new name, then this function will fail. Set rename to True to rename the
145 |             address even if it already has a custom name. Default is False.
146 |         auto: If auto is True, then mark the new name as autogenerated. Default is False.
147 | 
148 |     Returns:
149 |         True if the address was successfully named (or renamed).
150 |     """
151 |     if not rename and idc.hasUserName(idc.GetFlags(ea)):
152 |         return get_ea_name(ea) == name
153 |     flags = idc.SN_CHECK
154 |     if auto:
155 |         flags |= idc.SN_AUTO
156 |     return bool(idc.MakeNameEx(ea, name, flags))
157 | 
158 | def _insn_op_stroff_700(insn, n, sid, delta):
159 |     """A wrapper of idc.OpStroffEx for IDA 7."""
160 |     return idc.OpStroffEx(insn, n, sid, delta)
161 | 
162 | def _insn_op_stroff_695(insn, n, sid, delta):
163 |     """A wrapper of idc.OpStroffEx for IDA 6.95."""
164 |     return idc.OpStroffEx(insn.ea, n, sid, delta)
165 | 
166 | if idaapi.IDA_SDK_VERSION < 700:
167 |     insn_op_stroff = _insn_op_stroff_695
168 | else:
169 |     insn_op_stroff = _insn_op_stroff_700
170 | 
171 | def _addresses(start, end, step, partial, aligned):
172 |     """A generator to iterate over the addresses in an address range."""
173 |     addr = start
174 |     end_full = end - step + 1
175 |     while addr < end_full:
176 |         yield addr
177 |         addr += step
178 |     if addr != end:
179 |         if aligned:
180 |             raise AlignmentError(end)
181 |         if addr < end and partial:
182 |             yield addr
183 | 
184 | def _mapped_addresses(addresses, step, partial, allow_unmapped):
185 |     """Wrap an _addresses generator with a filter that checks whether the addresses are mapped."""
186 |     for addr in addresses:
187 |         start_is_mapped = is_mapped(addr)
188 |         end_is_mapped   = is_mapped(addr + step - 1)
189 |         fully_mapped    = start_is_mapped and end_is_mapped
190 |         allowed_partial = partial and (start_is_mapped or end_is_mapped)
191 |         # Yield the value if it's sufficiently mapped. Otherwise, break if we stop at an
192 |         # unmapped address.
193 |         if fully_mapped or allowed_partial:
194 |             yield addr
195 |         elif not allow_unmapped:
196 |             break
197 | 
198 | def Addresses(start, end=None, step=1, length=None, partial=False, aligned=False,
199 |         unmapped=True, allow_unmapped=False):
200 |     """A generator to iterate over the addresses in an address range.
201 | 
202 |     Arguments:
203 |         start: The start of the address range to iterate over.
204 | 
205 |     Options:
206 |         end: The end of the address range to iterate over.
207 |         step: The amount to step the address by each iteration. Default is 1.
208 |         length: The number of elements of size step to iterate over.
209 |         partial: If only part of the element is in the address range, or if only part of the
210 |             element is mapped, return it anyway. Default is False. This option is only meaningful
211 |             if aligned is False or if some address in the range is partially unmapped.
212 |         aligned: If the end address is not aligned with an iteration boundary, throw an
213 |             AlignmentError.
214 |         unmapped: Don't check whether an address is mapped or not before returning it. This option
215 |             always implies allow_unmapped. Default is True.
216 |         allow_unmapped: Don't stop iteration if an unmapped address is encountered (but the address
217 |             won't be returned unless unmapped is also True). Default is False. If partial is also
218 |             True, then a partially mapped address will be returned and then iteration will stop.
219 |     """
220 |     # HACK: We only check the first and last byte, not all the bytes in between.
221 |     # Validate step.
222 |     if step < 1:
223 |         raise ValueError('Invalid arguments: step={}'.format(step))
224 |     # Set the end address.
225 |     if length is not None:
226 |         end_addr = start + length * step
227 |         if end is not None and end != end_addr:
228 |             raise ValueError('Invalid arguments: start={}, end={}, step={}, length={}'
229 |                     .format(start, end, step, length))
230 |         end = end_addr
231 |     if end is None:
232 |         raise ValueError('Invalid arguments: end={}, length={}'.format(end, length))
233 |     addresses = _addresses(start, end, step, partial, aligned)
234 |     # If unmapped is True, iterate over all the addresses. Otherwise, we will check that addresses
235 |     # are properly mapped with a wrapper.
236 |     if unmapped:
237 |         return addresses
238 |     else:
239 |         return _mapped_addresses(addresses, step, partial, allow_unmapped)
240 | 
241 | def _instructions_by_range(start, end):
242 |     """A generator to iterate over instructions in a range."""
243 |     pc = start
244 |     while pc < end:
245 |         insn = idautils.DecodeInstruction(pc)
246 |         if insn is None:
247 |             break
248 |         next_pc = pc + insn.size
249 |         if next_pc > end:
250 |             raise AlignmentError(end)
251 |         yield insn
252 |         pc = next_pc
253 | 
254 | def _instructions_by_count(pc, count):
255 |     """A generator to iterate over a specified number of instructions."""
256 |     for i in xrange(count):
257 |         insn = idautils.DecodeInstruction(pc)
258 |         if insn is None:
259 |             break
260 |         yield insn
261 |         pc += insn.size
262 | 
263 | def Instructions(start, end=None, count=None):
264 |     """A generator to iterate over instructions.
265 | 
266 |     Instructions are decoded using IDA's DecodeInstruction(). If an address range is specified and
267 |     the end of the address range does not fall on an instruction boundary, raises an
268 |     AlignmentError.
269 | 
270 |     Arguments:
271 |         start: The linear address from which to start decoding instructions.
272 | 
273 |     Options:
274 |         end: The linear address at which to stop, exclusive.
275 |         count: The number of instructions to decode.
276 | 
277 |     Notes:
278 |         Exactly one of end and count must be specified.
279 |     """
280 |     if (end is not None and count is not None) or (end is None and count is None):
281 |         raise ValueError('Invalid arguments: end={}, count={}'.format(end, count))
282 |     if end is not None:
283 |         return _instructions_by_range(start, end)
284 |     else:
285 |         return _instructions_by_count(start, count)
286 | 
287 | _FF_FLAG_FOR_SIZE = {
288 |     1:  idc.FF_BYTE,
289 |     2:  idc.FF_WORD,
290 |     4:  idc.FF_DWRD,
291 |     8:  idc.FF_QWRD,
292 |     16: idc.FF_OWRD,
293 | }
294 | 
295 | def word_flag(wordsize=WORD_SIZE):
296 |     """Get the FF_xxxx flag for the given word size."""
297 |     return _FF_FLAG_FOR_SIZE.get(wordsize, 0)
298 | 
299 | def read_word(ea, wordsize=WORD_SIZE):
300 |     """Get the word at the given address.
301 | 
302 |     Words are read using Byte(), Word(), Dword(), or Qword(), as appropriate. Addresses are checked
303 |     using is_mapped(). If the address isn't mapped, then None is returned.
304 |     """
305 |     if not is_mapped(ea, wordsize):
306 |         return None
307 |     if wordsize == 1:
308 |         return idc.Byte(ea)
309 |     if wordsize == 2:
310 |         return idc.Word(ea)
311 |     if wordsize == 4:
312 |         return idc.Dword(ea)
313 |     if wordsize == 8:
314 |         return idc.Qword(ea)
315 |     raise ValueError('Invalid argument: wordsize={}'.format(wordsize))
316 | 
317 | def patch_word(ea, value, wordsize=WORD_SIZE):
318 |     """Patch the word at the given address.
319 | 
320 |     Words are patched using PatchByte(), PatchWord(), PatchDword(), or PatchQword(), as
321 |     appropriate.
322 |     """
323 |     if wordsize == 1:
324 |         idc.PatchByte(ea, value)
325 |     elif wordsize == 2:
326 |         idc.PatchWord(ea, value)
327 |     elif wordsize == 4:
328 |         idc.PatchDword(ea, value)
329 |     elif wordsize == 8:
330 |         idc.PatchQword(ea, value)
331 |     else:
332 |         raise ValueError('Invalid argument: wordsize={}'.format(wordsize))
333 | 
334 | class objectview(object):
335 |     """A class to present an object-like view of a struct."""
336 |     # https://goodcode.io/articles/python-dict-object/
337 |     def __init__(self, fields, addr, size):
338 |         self.__dict__ = fields
339 |         self.__addr   = addr
340 |         self.__size   = size
341 |     def __int__(self):
342 |         return self.__addr
343 |     def __len__(self):
344 |         return self.__size
345 | 
346 | def _read_struct_member_once(ea, flags, size, member_sid, member_size, asobject):
347 |     """Read part of a struct member for _read_struct_member."""
348 |     if idc.isByte(flags):
349 |         return read_word(ea, 1), 1
350 |     elif idc.isWord(flags):
351 |         return read_word(ea, 2), 2
352 |     elif idc.isDwrd(flags):
353 |         return read_word(ea, 4), 4
354 |     elif idc.isQwrd(flags):
355 |         return read_word(ea, 8), 8
356 |     elif idc.isOwrd(flags):
357 |         return read_word(ea, 16), 16
358 |     elif idc.isASCII(flags):
359 |         return idc.GetManyBytes(ea, size), size
360 |     elif idc.isFloat(flags):
361 |         return idc.Float(ea), 4
362 |     elif idc.isDouble(flags):
363 |         return idc.Double(ea), 8
364 |     elif idc.isStruct(flags):
365 |         value = read_struct(ea, sid=member_sid, asobject=asobject)
366 |         return value, member_size
367 |     return None, size
368 | 
369 | def _read_struct_member(struct, sid, union, ea, offset, name, size, asobject):
370 |     """Read a member into a struct for read_struct."""
371 |     flags = idc.GetMemberFlag(sid, offset)
372 |     assert flags != -1
373 |     # Extra information for parsing a struct.
374 |     member_sid, member_ssize = None, None
375 |     if idc.isStruct(flags):
376 |         member_sid = idc.GetMemberStrId(sid, offset)
377 |         member_ssize = idc.GetStrucSize(member_sid)
378 |     # Get the address of the start of the member.
379 |     member = ea
380 |     if not union:
381 |         member += offset
382 |     # Now parse out the value.
383 |     array = []
384 |     processed = 0
385 |     while processed < size:
386 |         value, read = _read_struct_member_once(member + processed, flags, size, member_sid,
387 |                 member_ssize, asobject)
388 |         assert size % read == 0
389 |         array.append(value)
390 |         processed += read
391 |     if len(array) == 1:
392 |         value = array[0]
393 |     else:
394 |         value = array
395 |     struct[name] = value
396 | 
397 | def read_struct(ea, struct=None, sid=None, members=None, asobject=False):
398 |     """Read a structure from the given address.
399 | 
400 |     This function reads the structure at the given address and converts it into a dictionary or
401 |     accessor object.
402 | 
403 |     Arguments:
404 |         ea: The linear address of the start of the structure.
405 | 
406 |     Options:
407 |         sid: The structure ID of the structure type to read.
408 |         struct: The name of the structure type to read.
409 |         members: A list of the names of the member fields to read. If members is None, then all
410 |             members are read. Default is None.
411 |         asobject: If True, then the struct is returned as a Python object rather than a dict.
412 | 
413 |     One of sid and struct must be specified.
414 |     """
415 |     # Handle sid/struct.
416 |     if struct is not None:
417 |         sid2 = idc.GetStrucIdByName(struct)
418 |         if sid2 == idc.BADADDR:
419 |             raise ValueError('Invalid struc name {}'.format(struct))
420 |         if sid is not None and sid2 != sid:
421 |             raise ValueError('Invalid arguments: sid={}, struct={}'.format(sid, struct))
422 |         sid = sid2
423 |     else:
424 |         if sid is None:
425 |             raise ValueError('Invalid arguments: sid={}, struct={}'.format(sid, struct))
426 |         if idc.GetStrucName(sid) is None:
427 |             raise ValueError('Invalid struc id {}'.format(sid))
428 |     # Iterate through the members and add them to the struct.
429 |     union = idc.IsUnion(sid)
430 |     struct = {}
431 |     for offset, name, size in idautils.StructMembers(sid):
432 |         if members is not None and name not in members:
433 |             continue
434 |         _read_struct_member(struct, sid, union, ea, offset, name, size, asobject)
435 |     if asobject:
436 |         struct = objectview(struct, ea, idc.GetStrucSize(sid))
437 |     return struct
438 | 
439 | def null_terminated(string):
440 |     """Extract the NULL-terminated C string from the given array of bytes."""
441 |     return string.split('\0', 1)[0]
442 | 
443 | def _convert_address_to_function(func):
444 |     """Convert an address that IDA has classified incorrectly into a proper function."""
445 |     # If everything goes wrong, we'll try to restore this function.
446 |     orig = idc.FirstFuncFchunk(func)
447 |     # If the address is not code, let's undefine whatever it is.
448 |     if not idc.isCode(idc.GetFlags(func)):
449 |         if not is_mapped(func):
450 |             # Well, that's awkward.
451 |             return False
452 |         item    = idc.ItemHead(func)
453 |         itemend = idc.ItemEnd(func)
454 |         if item != idc.BADADDR:
455 |             _log(1, 'Undefining item {:#x} - {:#x}', item, itemend)
456 |             idc.MakeUnkn(item, idc.DOUNK_EXPAND)
457 |             idc.MakeCode(func)
458 |             # Give IDA a chance to analyze the new code or else we won't be able to create a
459 |             # function.
460 |             idc.Wait()
461 |             idc.AnalyseArea(item, itemend)
462 |     else:
463 |         # Just try removing the chunk from its current function. IDA can add it to another function
464 |         # automatically, so make sure it's removed from all functions by doing it in loop until it
465 |         # fails.
466 |         for i in range(1024):
467 |             if not idc.RemoveFchunk(func, func):
468 |                 break
469 |     # Now try making a function.
470 |     if idc.MakeFunction(func) != 0:
471 |         return True
472 |     # This is a stubborn chunk. Try recording the list of chunks, deleting the original function,
473 |     # creating the new function, then re-creating the original function.
474 |     if orig != idc.BADADDR:
475 |         chunks = list(idautils.Chunks(orig))
476 |         if idc.DelFunction(orig) != 0:
477 |             # Ok, now let's create the new function, and recreate the original.
478 |             if idc.MakeFunction(func) != 0:
479 |                 if idc.MakeFunction(orig) != 0:
480 |                     # Ok, so we created the functions! Now, if any of the original chunks are not
481 |                     # contained in a function, we'll abort and undo.
482 |                     if all(idaapi.get_func(start) for start, end in chunks):
483 |                         return True
484 |             # Try to undo the damage.
485 |             for start, _ in chunks:
486 |                 idc.DelFunction(start)
487 |     # Everything we've tried so far has failed. If there was originally a function, try to restore
488 |     # it.
489 |     if orig != idc.BADADDR:
490 |         _log(0, 'Trying to restore original function {:#x}', orig)
491 |         idc.MakeFunction(orig)
492 |     return False
493 | 
494 | def is_function_start(ea):
495 |     """Return True if the address is the start of a function."""
496 |     return idc.GetFunctionAttr(ea, idc.FUNCATTR_START) == ea
497 | 
498 | def force_function(addr):
499 |     """Ensure that the given address is a function type, converting it if necessary."""
500 |     if is_function_start(addr):
501 |         return True
502 |     return _convert_address_to_function(addr)
503 | 
504 | def ReadWords(start, end, step=WORD_SIZE, wordsize=WORD_SIZE, addresses=False):
505 |     """A generator to iterate over the data words in the given address range.
506 | 
507 |     The iterator returns a stream of words or tuples for each mapped word in the address range.
508 |     Words are read using read_word(). Iteration stops at the first unmapped word.
509 | 
510 |     Arguments:
511 |         start: The start address.
512 |         end: The end address.
513 | 
514 |     Options:
515 |         step: The number of bytes to advance per iteration. Default is WORD_SIZE.
516 |         wordsize: The word size to read, in bytes. Default is WORD_SIZE.
517 |         addresses: If true, then the iterator will return a stream of tuples (word, ea) for each
518 |             mapped word in the address range. Otherwise, just the word itself will be returned.
519 |             Default is False.
520 |     """
521 |     for addr in Addresses(start, end, step=step, unmapped=True):
522 |         word = read_word(addr, wordsize)
523 |         if word is None:
524 |             break
525 |         value = (word, addr) if addresses else word
526 |         yield value
527 | 
528 | def WindowWords(start, end, window_size, wordsize=WORD_SIZE):
529 |     """A generator to iterate over a sliding window of data words in the given address range.
530 | 
531 |     The iterator returns a stream of tuples (window, ea) for each word in the address range. The
532 |     window is a deque of the window_size words at address ea. The deque is owned by the generator
533 |     and its contents will change between iterations.
534 |     """
535 |     words = ReadWords(start, end, wordsize=wordsize)
536 |     window = deque([next(words) for _ in range(window_size)], maxlen=window_size)
537 |     addr = start
538 |     yield window, addr
539 |     for word in words:
540 |         window.append(word)
541 |         addr += wordsize
542 |         yield window, addr
543 | 
544 | def struct_create(name, union=False):
545 |     """Create an IDA struct with the given name, returning the SID."""
546 |     # AddStrucEx is documented as returning -1 on failure, but in practice it seems to return
547 |     # BADADDR.
548 |     union = 1 if union else 0
549 |     sid = idc.AddStrucEx(-1, name, union)
550 |     if sid in (-1, idc.BADADDR):
551 |         return None
552 |     return sid
553 | 
554 | def struct_open(name, create=False, union=None):
555 |     """Get the SID of the IDA struct with the given name, optionally creating it."""
556 |     sid = idc.GetStrucIdByName(name)
557 |     if sid == idc.BADADDR:
558 |         if not create:
559 |             return None
560 |         sid = struct_create(name, union=bool(union))
561 |     elif union is not None:
562 |         is_union = bool(idc.IsUnion(sid))
563 |         if union != is_union:
564 |             return None
565 |     return sid
566 | 
567 | def struct_member_offset(sid, name):
568 |     """A version of IDA's GetMemberOffset() that also works with unions."""
569 |     struct = idaapi.get_struc(sid)
570 |     if not struct:
571 |         return None
572 |     member = idaapi.get_member_by_name(struct, name)
573 |     if not member:
574 |         return None
575 |     return member.soff
576 | 
577 | def struct_add_word(sid, name, offset, size, count=1):
578 |     """Add a word (integer) to a structure.
579 | 
580 |     If sid is a union, offset must be -1.
581 |     """
582 |     return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | word_flag(size), -1, size * count)
583 | 
584 | def struct_add_ptr(sid, name, offset, count=1, type=None):
585 |     """Add a pointer to a structure.
586 | 
587 |     If sid is a union, offset must be -1.
588 |     """
589 |     ptr_flag = idc.FF_DATA | word_flag(WORD_SIZE) | idaapi.offflag()
590 |     ret = idc.AddStrucMember(sid, name, offset, ptr_flag, 0, WORD_SIZE)
591 |     if ret == 0 and type is not None:
592 |         if offset == -1:
593 |             offset = struct_member_offset(sid, name)
594 |             assert offset is not None
595 |         mid = idc.GetMemberId(sid, offset)
596 |         idc.SetType(mid, type)
597 |     return ret
598 | 
599 | def struct_add_struct(sid, name, offset, msid, count=1):
600 |     """Add a structure member to a structure.
601 | 
602 |     If sid is a union, offset must be -1.
603 |     """
604 |     size = idc.GetStrucSize(msid)
605 |     return idc.AddStrucMember(sid, name, offset, idc.FF_DATA | idc.FF_STRU, msid, size * count)
606 | 
607 | 


--------------------------------------------------------------------------------
/ida_kernelcache/class_struct.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # ida_kernelcache/class_struct.py
  3 | # Brandon Azad
  4 | #
  5 | # A module to build structs representing the C++ classes in the kernelcache.
  6 | #
  7 | """ida_kernelcache.class_struct
  8 | 
  9 | This module is responsible for creating the IDA structs representing the various C++ classes found
 10 | in the kernelcache, including the structs for the vtables.
 11 | 
 12 | Organization:
 13 | 
 14 | Each class Class gets four structs: Class, Class::vtable, Class::vmethods and Class::fields.
 15 | Class::vmethods is a struct containing the virtual methods for Class that are not present in its
 16 | direct superclass. Class::vtable is a struct representing the virtual method table for Class, laid
 17 | out as follows:
 18 | 
 19 |     struct Class::vtable {
 20 |         struct SuperClass1::vmethods SuperClass1;
 21 |         struct SuperClass2::vmethods SuperClass2;
 22 |         /* ... */
 23 |         struct SuperClassN::vmethods SuperClassN;
 24 |         struct Class::vmethods       Class;
 25 |     };
 26 | 
 27 | Here SuperClass1, ..., SuperClassN are the chain of superclasses of Class starting from the root.
 28 | (Remember, XNU's C++ does not have multiple inheritance, which means we only have one ancestor
 29 | chain. This makes everything much easier!)
 30 | 
 31 | There are two styles for how Class is represented: struct slices and unions.
 32 | 
 33 | In the struct slices representation, Class::fields is a struct containing those fields in Class not
 34 | present in its superclass, shifted to start at offset 0. Class is a struct organized as follows:
 35 | 
 36 |     struct Class {
 37 |         struct Class::vtable*      vtable;
 38 |         struct SuperClass1::fields SuperClass1;
 39 |         struct SuperClass2::fields SuperClass2;
 40 |         /* ... */
 41 |         struct SuperClassN::fields SuperClassN;
 42 |         struct Class::fields       Class;
 43 |     };
 44 | 
 45 | In the unions representation, Class::fields is also a struct containing the fields in Class not
 46 | present in its superclass, however this time it is not shifted, so that the fields occur at the
 47 | same offset in Class::fields as they do in the original Class class in the kernel. Class is a
 48 | union organized as follows:
 49 | 
 50 |     union Class {
 51 |         struct Class::vtable*      vtable;
 52 |         struct SuperClass1::fields SuperClass1;
 53 |         struct SuperClass2::fields SuperClass2;
 54 |         /* ... */
 55 |         struct SuperClassN::fields SuperClassN;
 56 |         struct Class::fields       Class;
 57 |     };
 58 | 
 59 | There are advantages and disadvantages to each representation. The unions representation can be
 60 | more flexible if the automated analysis messes up, but so far I have not found a good way to set
 61 | the operands of instructions referring to these structures.
 62 | 
 63 | TODO: I know it's probably possible with ida_bytes.op_stroff().
 64 | 
 65 | We divide the processing into two parts: vtable generation and class generation.
 66 | 
 67 | For vtable generation, we initially ignore the problem of setting types for each virtual method
 68 | entry in the struct. The primary reason for this is that the method symbols in the kernelcache
 69 | don't include return type information, so we can't be sure what the correct return type for each
 70 | method is. In the future, another module will be able to populate the vtable structs with proper
 71 | type information.
 72 | 
 73 | Class generation is more complicated: We first need to collect the set of accesses to each class
 74 | struct, then use that information to reconstruct the class fields. Most of the work is done by the
 75 | data_flow module, which collects (offset, size) pairs for each virtual method in the class. We
 76 | partiton those accesses to their respective classes by class size. (This is not perfect since the
 77 | class size reported in the kernel may actually be rounded up. However, for the most part it works
 78 | quite well.) Once we know which (offset, size) pairs correspond to which class, we use the
 79 | build_struct module to create the appropriate fields in the struct for those accesses.
 80 | 
 81 | Rationale:
 82 | 
 83 | IDA structs don't have any form of inheritance, which leaves us two options: We can either create a
 84 | single struct for each class and then figure out some way of synchronizing changes along the
 85 | inheritance chain, or we can rely on some form of struct inclusion to ensure that the members of
 86 | each class are defined only in one place, and all subclasses re-use those members by including them
 87 | as a substruct.
 88 | 
 89 | While creating one struct for each class with all members for the class and its superclasses is
 90 | simple and presents most similarly to the original code, synchronizing this representation across
 91 | struct changes is complex, and not possible in general. Consider: If a change is made to a member
 92 | of the root class in a leaf class, we would need to propagate that change back to the root and then
 93 | down to every subclass of the root class. And if along the way we found another change that was
 94 | incompatible, there would be no way to automatically discover the right way to resolve the
 95 | conflict. Perhaps this solution would work if we could ensure that the propagation code was run
 96 | after every single structure change, so that there was no opportunity to develop conflicts, but at
 97 | that point the solution is quite complex and requires direct support from IDA.
 98 | 
 99 | Instead, I elected for a representation that forces each field of each class to be defined in only
100 | one place. This means the structures look less like the original C++, which is unfortunate and
101 | complicates adding or looking up members by offset from the start of the class. However, I still
102 | believe it's better to avoid the whole synchronization issue.
103 | 
104 | Even so, there are still several possible ways of representing the classes, each with their own
105 | advantages and disadvantages. I ended up allowing the user to select their desired representation.
106 | 
107 | For now, we sidestep the problem of setting type information for the function pointers in the
108 | ::vmethods structs. The reason for this, as mentioned above, is that the method symbols don't tell
109 | us what the true return type is, so at best we can guess. It is easy enough to scan through the
110 | vtables after the ::vmethods structs have been generated and add type information then, so I'll
111 | avoid over-complicating this module by trying to do that here. Instead, I imagine another module
112 | (called, for example, types) that provides two functions:
113 |     - initialize_method_types: For each C++ method symbol, sets the method type by effectively
114 |       doing SetType(GuessType(method)) for every method with a good symbol.
115 |     - update_vtable_struct_types: For each field in each ::vmethods struct, look at the type of the
116 |       corresponding method, and set the type of the field accordingly.
117 | """
118 | 
119 | import collections
120 | 
121 | import idc
122 | import idautils
123 | import idaapi
124 | 
125 | import ida_utilities as idau
126 | import build_struct
127 | import classes
128 | import data_flow
129 | import symbol
130 | import vtable
131 | 
132 | _log = idau.make_log(2, __name__)
133 | 
134 | #### Vtable generation ############################################################################
135 | 
136 | def _populate_vmethods_struct(sid, classinfo):
137 |     """Populate the ::vmethods struct."""
138 |     # Loop over the new vtable methods.
139 |     super_nmethods = 0
140 |     if classinfo.superclass:
141 |         super_nmethods = classinfo.superclass.vtable_nmethods
142 |     members = set()
143 |     for index, vmethod in enumerate(vtable.class_vtable_methods(classinfo)):
144 |         # Skip entries in the superclass's vtable.
145 |         if index < super_nmethods:
146 |             continue
147 |         # Get the base name of the method (i.e., for Class::method(args), extract method).
148 |         sym  = idau.get_ea_name(vmethod, user=True)
149 |         base = symbol.method_name(sym)
150 |         if not base:
151 |             base = 'method_{}'.format(index)
152 |         base = symbol.make_ident(base)
153 |         # We'll try to use the base as our method name, but if it already exists, try appending
154 |         # "_1", "_2", etc.
155 |         name   = base
156 |         suffix = 0
157 |         while name in members:
158 |             suffix += 1
159 |             name = '{}_{}'.format(base, suffix)
160 |         members.add(name)
161 |         # Create the member.
162 |         offset = (index - super_nmethods) * idau.WORD_SIZE
163 |         ret = idau.struct_add_ptr(sid, name, offset, type='void *')
164 |         if ret != 0:
165 |             _log(0, 'Could not create {}::vmethods.{}: {}', classinfo.classname, name, ret)
166 |             return False
167 |     return True
168 | 
169 | def _populate_vtable_struct(sid, classinfo):
170 |     """Populate the ::vtable struct."""
171 |     # For each ancestor from root down to us (inclusive), add our ::vmethods struct.
172 |     for ci in classinfo.ancestors(inclusive=True):
173 |         # Get the offset at which the ::vmethods for ci will be.
174 |         offset = 0
175 |         if ci.superclass:
176 |             offset = ci.superclass.vtable_nmethods * idau.WORD_SIZE
177 |         # The size is ci's vtable length minus the offset.
178 |         vmethods_size = ci.vtable_nmethods * idau.WORD_SIZE - offset
179 |         # If the vmethods_size is 0, skip this entry. Otherwise we get weird
180 |         # "struct->til conversion failed" errors.
181 |         if vmethods_size == 0:
182 |             continue
183 |         # Get the sid for ci's ::vmethods.
184 |         vmethods_sid = idau.struct_open(ci.classname + '::vmethods')
185 |         if vmethods_sid is None:
186 |             _log(0, 'Could not find {}::vmethods', ci.classname)
187 |             return False
188 |         # Add this ::vmethods slice to the ::vtable struct.
189 |         ret = idau.struct_add_struct(sid, ci.classname, offset, vmethods_sid)
190 |         if ret != 0:
191 |             _log(0, 'Could not add {}::vmethods to {}::vtable', ci.classname, classinfo.classname)
192 |             return False
193 |     return True
194 | 
195 | def _create_vmethods_struct(classinfo):
196 |     """Create the ::vmethods struct for a C++ class."""
197 |     sid = idau.struct_create(classinfo.classname + '::vmethods')
198 |     if sid is None:
199 |         _log(0, 'Could not create {}::vmethods', classinfo.classname)
200 |         return False
201 |     return _populate_vmethods_struct(sid, classinfo)
202 | 
203 | def _create_vtable_struct(classinfo):
204 |     """Create the ::vtable struct for a C++ class."""
205 |     sid = idau.struct_create(classinfo.classname + '::vtable')
206 |     if sid is None:
207 |         _log(0, 'Could not create {}::vtable', classinfo.classname)
208 |         return False
209 |     return _populate_vtable_struct(sid, classinfo)
210 | 
211 | def initialize_vtable_structs():
212 |     """Create IDA structs representing the C++ virtual method tables in the kernel."""
213 |     classes.collect_class_info()
214 |     for classinfo in classes.class_info.values():
215 |         _create_vmethods_struct(classinfo)
216 |     for classinfo in classes.class_info.values():
217 |         _create_vtable_struct(classinfo)
218 | 
219 | #### Classes based on struct slices ###############################################################
220 | 
221 | def _create_class_structs__slices(classinfo, endmarkers=True):
222 |     """Create the IDA structs for a C++ class."""
223 |     classname = classinfo.classname
224 |     # Open or create the structs.
225 |     sidf = idau.struct_open(classname + '::fields', create=True)
226 |     sid  = idau.struct_open(classname, create=True)
227 |     if sid is None or sidf is None:
228 |         _log(0, 'Could not create class structs for {}', classname)
229 |         return None
230 |     assert all(not idc.IsUnion(s) for s in (sidf, sid))
231 |     # Calculate the size of the ::fields struct.
232 |     if classinfo.superclass:
233 |         # If we have a superclass, our fields start after our superclass's fields end.
234 |         fields_start = classinfo.superclass.class_size
235 |     else:
236 |         # If we don't have a superclass, our fields start after our vtable.
237 |         fields_start = idau.WORD_SIZE
238 |     fields_size = classinfo.class_size - fields_start
239 |     # Add an ::end member to the fields struct if requested.
240 |     if endmarkers:
241 |         ret = idc.AddStrucMember(sidf, classname + '::end', fields_size, idc.FF_UNK, -1, 0)
242 |         if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME, idc.STRUC_ERROR_MEMBER_OFFSET):
243 |             # If that didn't work that's too bad, but continue anyway.
244 |             _log(0, 'Could not create {}::end', classname)
245 |     return sid, sidf, fields_start
246 | 
247 | def _populate_fields_struct__slices(sid, classinfo, fields_start, accesses):
248 |     """Fill in the members of the ::fields struct based on the accesses."""
249 |     # Sanity check.
250 |     for offset, size in accesses:
251 |         assert fields_start <= offset <= offset + size <= classinfo.class_size
252 |     # For each (offset, size) access, add a member to the struct.
253 |     build_struct.create_struct_fields(sid, accesses=accesses, base=fields_start)
254 | 
255 | def _populate_wrapper_struct__slices(sid, classinfo):
256 |     """Fill in the members of the wrapper struct."""
257 |     # First add the vtable pointer.
258 |     offset = 0
259 |     vtable_ptr_type = '{}::vtable *'.format(classinfo.classname)
260 |     ret = idau.struct_add_ptr(sid, 'vtable', offset, type=vtable_ptr_type)
261 |     if ret not in (0, idc.STRUC_ERROR_MEMBER_OFFSET):
262 |         _log(0, 'Could not create {}.vtable: {}', classinfo.classname, ret)
263 |         return False
264 |     # Now add all the ::fields structs.
265 |     offset += idau.WORD_SIZE
266 |     for ci in classinfo.ancestors(inclusive=True):
267 |         # Get the sid of the ::fields struct.
268 |         fields_sid = idau.struct_open(ci.classname + '::fields')
269 |         if fields_sid is None:
270 |             _log(0, 'Could not find {}::fields', ci.classname)
271 |             return False
272 |         # If this is a 0-length struct (no fields), skip it.
273 |         size = idc.GetStrucSize(fields_sid)
274 |         if size == 0:
275 |             continue
276 |         # If this is already in the wrapper struct, skip it. This avoids weird
277 |         # STRUC_ERROR_MEMBER_VARLAST errors.
278 |         if idc.GetMemberOffset(sid, ci.classname) != -1:
279 |             continue
280 |         # Add the ::fields struct to the wrapper.
281 |         ret = idau.struct_add_struct(sid, ci.classname, offset, fields_sid)
282 |         if ret != 0:
283 |             _log(0, 'Could not create {}.{}: {}', classinfo.classname, ci.classname, ret)
284 |             return False
285 |         offset += size
286 |     return True
287 | 
288 | def _populate_class_structs__slices(classinfo, class_accesses, sid, sidf, fields_start):
289 |     """Populate the IDA structs for a C++ class."""
290 |     _populate_fields_struct__slices(sidf, classinfo, fields_start,
291 |             class_accesses[classinfo.classname])
292 |     _populate_wrapper_struct__slices(sid, classinfo)
293 | 
294 | #### Classes based on unions ######################################################################
295 | 
296 | def _create_class_structs__unions(classinfo):
297 |     """Create the IDA structs for a C++ class."""
298 |     classname = classinfo.classname
299 |     sidf = idau.struct_open(classname + '::fields', create=True)
300 |     sid  = idau.struct_open(classname, union=True, create=True)
301 |     if sid is None or sidf is None:
302 |         _log(0, 'Could not create class structs for {}', classname)
303 |         return None
304 |     return sid, sidf
305 | 
306 | def _populate_fields_struct__unions(sid, classinfo, accesses):
307 |     """Fill in the members of the ::fields struct based on the accesses."""
308 |     # Sanity check.
309 |     for offset, size in accesses:
310 |         assert 0 <= offset <= offset + size <= classinfo.class_size
311 |     # For each (offset, size) access, add a member to the struct.
312 |     build_struct.create_struct_fields(sid, accesses=accesses)
313 | 
314 | def _populate_wrapper_struct__unions(sid, classinfo):
315 |     """Fill in the members of the wrapper struct."""
316 |     # First add the vtable pointer.
317 |     vtable_ptr_type = '{}::vtable *'.format(classinfo.classname)
318 |     ret = idau.struct_add_ptr(sid, 'vtable', -1, type=vtable_ptr_type)
319 |     if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME):
320 |         _log(0, 'Could not create {}.vtable: {}', classinfo.classname, ret)
321 |         return False
322 |     # Now add all the ::fields structs.
323 |     for ci in classinfo.ancestors(inclusive=True):
324 |         # Get the sid of the ::fields struct.
325 |         fields_sid = idau.struct_open(ci.classname + '::fields')
326 |         if fields_sid is None:
327 |             _log(0, 'Could not find {}::fields', ci.classname)
328 |             return False
329 |         # Add the ::fields struct to the wrapper. Ignore STRUC_ERROR_MEMBER_UNIVAR if the ::fields
330 |         # struct has length 0.
331 |         ret = idau.struct_add_struct(sid, ci.classname, -1, fields_sid)
332 |         if ret not in (0, idc.STRUC_ERROR_MEMBER_NAME, idc.STRUC_ERROR_MEMBER_UNIVAR):
333 |             _log(0, 'Could not create {}.{}: {}', classinfo.classname, ci.classname, ret)
334 |             return False
335 |     return True
336 | 
337 | def _populate_class_structs__unions(classinfo, class_accesses, sid, sidf):
338 |     """Populate the IDA structs for a C++ class."""
339 |     _populate_fields_struct__unions(sidf, classinfo, class_accesses[classinfo.classname])
340 |     _populate_wrapper_struct__unions(sid, classinfo)
341 | 
342 | #### Class generation #############################################################################
343 | 
344 | CLASS_SLICES = 'slices'
345 | CLASS_UNIONS = 'unions'
346 | 
347 | DEFAULT_STYLE = CLASS_SLICES
348 | 
349 | def initialize_class_structs(style=DEFAULT_STYLE):
350 |     """Create IDA structs representing the C++ classes in the kernel.
351 | 
352 |     Depends on initialize_vtable_structs.
353 |     """
354 |     # A generator that will yield (virtual_method, classname, X0).
355 |     def virtual_methods():
356 |         for classinfo in classes.class_info.values():
357 |             for _, vmethod, _ in vtable.class_vtable_overrides(classinfo, new=True, methods=True):
358 |                 if not idau.is_function_start(vmethod):
359 |                     _log(3, 'Non-function virtual method {:#x} in class {}', vmethod,
360 |                             classinfo.classname)
361 |                     continue
362 |                 yield vmethod, classinfo.classname, idautils.procregs.X0.reg
363 |     # Do the standard processing.
364 |     process_functions(virtual_methods(), style=style)
365 | 
366 | def _collect_all_class_accesses(functions):
367 |     """Collect all accesses to each class by examining the functions.
368 | 
369 |     Arm64 only.
370 |     """
371 |     all_accesses = collections.defaultdict(lambda: collections.defaultdict(set))
372 |     for function, classname, register in functions:
373 |         data_flow.pointer_accesses(function=function, initialization={ function: { register: 0 } },
374 |                 accesses=all_accesses[classname])
375 |     return all_accesses
376 | 
377 | def _classify_class_accesses(all_accesses, style):
378 |     """Categorize each access by specific class and build a list of operands to convert.
379 | 
380 |     Arm64 only.
381 |     """
382 |     all_classes    = set()
383 |     class_accesses = collections.defaultdict(collections.Counter)
384 |     class_operands = collections.defaultdict(set)
385 |     # Helper for logging.
386 |     def log_addrs(addresses_and_deltas):
387 |         return ', '.join('{:#x}'.format(ea) for ea, dt in addresses_and_deltas)
388 |     # For each class, look at the accesses associated with that class.
389 |     for classname, accesses in all_accesses.items():
390 |         classinfo = classes.class_info.get(classname)
391 |         if not classinfo:
392 |             _log(-1, 'Skipping non-existent class {}', classname)
393 |             continue
394 |         # Put each (offset, size) pair in the appropriate dictionary. We'll traverse our ancestors
395 |         # from root to leaf, which means the first time this offset/size combination fits in a
396 |         # class, that's the class it goes with.
397 |         ancestors = list(classinfo.ancestors(inclusive=True))
398 |         all_classes.update(ancestors)
399 |         for offset_and_size, addresses_and_deltas in accesses.items():
400 |             offset, size = offset_and_size
401 |             # Accesses to offsets 0-8 are actually not considered part of the ::fields struct since
402 |             # they technically access the vtable. Skip it.
403 |             if offset + size <= idau.WORD_SIZE:
404 |                 continue
405 |             for ci in ancestors:
406 |                 if offset + size <= ci.class_size:
407 |                     # This is the smallest class that contains all the bytes of the access. If the
408 |                     # start of the access is in a smaller class, then this access spans a class
409 |                     # boundary. There are two possible causes: either there's a bug in the
410 |                     # analyzer, or the superclass's size was rounded up in the initialization
411 |                     # function, meaning this is actually a completely valid access in the current
412 |                     # class. Unfortunately there's no good way to detect this. The CLASS_UNIONS
413 |                     # model can deal with this OK, but the CLASS_SLICES model has problems. Skip
414 |                     # this access if we're not in the CLASS_UNIONS model.
415 |                     superclass_size = idau.WORD_SIZE
416 |                     if ci.superclass:
417 |                         superclass_size = ci.superclass.class_size
418 |                     if offset < superclass_size:
419 |                         _log(-1, 'Class {} has spanning access ({}, {}) from addresses {}',
420 |                                 classname, offset, size, log_addrs(addresses_and_deltas))
421 |                         if style != CLASS_UNIONS:
422 |                             break
423 |                     # If the access is unaligned with respect to the size, it's more likely to be
424 |                     # incorrect. Log it, but continue.
425 |                     if offset % size != 0:
426 |                         _log(2, 'Class {} has unaligned access ({}, {}) from addresses {}',
427 |                                 classname, offset, size, log_addrs(addresses_and_deltas))
428 |                     # Looks good, add it to the collection.
429 |                     class_accesses[ci.classname][offset_and_size] += len(addresses_and_deltas)
430 |                     class_operands[classname].update(addresses_and_deltas)
431 |                     break
432 |             else:
433 |                 # Almost certainly this is caused when the same register is used for two different
434 |                 # classes, but the path that gets this class to this access is impossible to satisfy.
435 |                 _log(-1, 'Class {} has out-of-bounds access ({}, {}) from addresses {}',
436 |                         classname, offset, size, log_addrs(addresses_and_deltas))
437 |     return all_classes, class_accesses, class_operands
438 | 
439 | def _convert_operands_to_struct_offsets(access_addresses):
440 |     """Convert the operands that generated struct accesses into struct offsets."""
441 |     for classname, addresses_and_deltas in access_addresses.items():
442 |         sid = idau.struct_open(classname)
443 |         if sid is not None:
444 |             for ea, delta in addresses_and_deltas:
445 |                 insn = idautils.DecodeInstruction(ea)
446 |                 if insn:
447 |                     for op in insn.Operands:
448 |                         if op.type == idaapi.o_displ:
449 |                             if not idau.insn_op_stroff(insn, op.n, sid, delta):
450 |                                 _log(1, 'Could not convert {:#x} to struct offset for class {} '
451 |                                         'delta {}', ea, classname, delta)
452 | 
453 | def _set_class_style(style):
454 |     """Set the global class style."""
455 |     global _style_was_set, _create_class_structs, _populate_class_structs
456 |     assert style in (CLASS_SLICES, CLASS_UNIONS)
457 |     # Check the current style based on OSObject, a class that should always exist.
458 |     sid = idau.struct_open('OSObject')
459 |     want_union = style == CLASS_UNIONS
460 |     if sid is None:
461 |         # No global style has been set.
462 |         idau.struct_create('OSObject', union=want_union)
463 |     else:
464 |         # A style already exists. Check that the requested style matches.
465 |         is_union = bool(idc.IsUnion(sid))
466 |         if is_union != want_union:
467 |             raise ValueError('Incompatible style {}', style)
468 |     # Set the appropriate functions based on the style.
469 |     if style == CLASS_SLICES:
470 |         _create_class_structs   = _create_class_structs__slices
471 |         _populate_class_structs = _populate_class_structs__slices
472 |     else:
473 |         _create_class_structs   = _create_class_structs__unions
474 |         _populate_class_structs = _populate_class_structs__unions
475 | 
476 | def process_functions(functions, style=DEFAULT_STYLE):
477 |     """Process additional functions.
478 | 
479 |     Arguments:
480 |         functions: An iterator returning (function, classname, register) tuples.
481 | 
482 |     Depends on initialize_class_structs.
483 |     """
484 |     classes.collect_class_info()
485 |     _set_class_style(style)
486 |     # First, for each class, collect all the (offset, size) pairs and their associated (address,
487 |     # delta) pairs.
488 |     all_accesses = _collect_all_class_accesses(functions)
489 |     # Now, classify the accesses. class_accesses is a map from each class name to a counter of how
490 |     # many times we've seen each (offset, size) access pair that falls within the class's own
491 |     # fields. class_operands is a map from each class name to the set of (address, delta) pairs
492 |     # that access that class.
493 |     all_classes, class_accesses, class_operands = _classify_class_accesses(all_accesses, style)
494 |     # Next, for each class, create dummy versions of the class's structs, but don't populate them.
495 |     # We do this first so that we'll have all the types we need available when populating the
496 |     # structs below.
497 |     class_structs = {}
498 |     for classinfo in all_classes:
499 |         data = _create_class_structs(classinfo)
500 |         if data is not None:
501 |             class_structs[classinfo] = data
502 |     # Populate the class's structs using the access tuples.
503 |     for classinfo, data in class_structs.items():
504 |         _populate_class_structs(classinfo, class_accesses, *data)
505 |     # Finally, convert each operand that generated an access into an appropriately typed struct
506 |     # offset reference.
507 |     _convert_operands_to_struct_offsets(class_operands)
508 | 
509 | #### Vtable type propagation ######################################################################
510 | 
511 | def _propagate_virtual_method_type_for_method(classinfo, class_vindex, vmethod):
512 |     """Propagate the type of a class's virtual method to the vtable struct."""
513 |     if not idau.is_function_start(vmethod):
514 |         _log(2, 'Not a function start: {:x}', vmethod)
515 |         return False
516 |     vmethod_type = idc.GuessType(vmethod)
517 |     if not vmethod_type:
518 |         _log(2, 'No guessed type: {:x}', vmethod)
519 |         return False
520 |     vmethod_ptr_type = symbol.convert_function_type_to_function_pointer_type(vmethod_type)
521 |     if not vmethod_ptr_type:
522 |         _log(2, 'Could not convert to function pointer type: {:x}', vmethod)
523 |         return False
524 |     vmethods_sid = idau.struct_open(classinfo.classname + '::vmethods')
525 |     vmethod_offset = class_vindex * idau.WORD_SIZE
526 |     vmethod_mid = idc.GetMemberId(vmethods_sid, vmethod_offset)
527 |     if not bool(idc.SetType(vmethod_mid, vmethod_ptr_type)):
528 |         _log(2, 'Could not set vmethod field type: {:x}, {}, {}', vmethod, classinfo.classname,
529 |                 class_vindex)
530 |         return False
531 |     return True
532 | 
533 | def _propagate_virtual_method_types_for_class(classinfo):
534 |     """Propagate the types of a class's virtual methods to the vtable struct."""
535 |     for relative_index, vmethod in enumerate(vtable.class_vtable_methods(classinfo, new=True)):
536 |         _propagate_virtual_method_type_for_method(classinfo, relative_index, vmethod)
537 | 
538 | def propagate_virtual_method_types_to_vtable_structs():
539 |     """Propagate the types of virtual methods to the corresponding entries in the vtables.
540 | 
541 |     This helps speed decompilation using Hex-Rays, but is not particularly accurate.
542 | 
543 |     By default, IDA will guess a type with an empty argument list for any function whose symbol
544 |     includes an unknown struct type, which inhibits proper type inference.
545 |     """
546 |     for classinfo in classes.class_info.values():
547 |         _propagate_virtual_method_types_for_class(classinfo)
548 | 
549 | 


--------------------------------------------------------------------------------