├── mypy.ini ├── .gitignore ├── arch.py ├── setup.py ├── requirements.txt ├── NOTICE ├── setup.cfg ├── LICENSE ├── controlstateplugin.py ├── ignore_funcs_pure.txt ├── kcore.py ├── prmsg.py ├── syscall_failure_ebpf.c ├── addr2line.py ├── CONTRIBUTING_DCO.md ├── CODE_OF_CONDUCT.md ├── abstractarch.py ├── syscall.py ├── recorder.py ├── README.md ├── simprocedures.py ├── kprobesreporter.py ├── syscall-failure-analyzer.py ├── intelptrecorder.py ├── kallsyms.py ├── reporter.py ├── x86arch.py ├── intelptreporter.py └── kprobesrecorder.py /mypy.ini: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | [mypy] 4 | ignore_missing_imports = True 5 | incremental = True 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !*.py 6 | !setup.cfg 7 | !*.c 8 | !mypy.ini 9 | !*.txt 10 | -------------------------------------------------------------------------------- /arch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | from abstractarch import Arch 4 | from x86arch import ArchX86 5 | 6 | arch: Arch = ArchX86() -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | # Copyright 2023 VMware, Inc. 3 | # SPDX-License-Identifier: BSD-2-Clause 4 | 5 | from setuptools import setup 6 | if __name__ == '__main__': 7 | setup() -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | angr==9.2.25 4 | ansicolors==1.1.8 5 | capstone==4.0.2 6 | claripy==9.2.25 7 | cle==9.2.25 8 | numpy==1.23.5 9 | lz4==4.3.2 10 | pyelftools==0.29 11 | psutil==5.9.4 12 | pyseccomp==0.1.2 13 | pytest==7.2.2 14 | python_ptrace==0.9.8 15 | tqdm==4.64.1 16 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2023 VMware, Inc. 2 | 3 | This product is licensed to you under the BSD 2 clause (the "License"). You may not use this product except in compliance with the License. 4 | 5 | This product may include a number of subcomponents with separate copyright notices and license terms. Your use of these subcomponents is subject to the terms and conditions of the subcomponent's license, as noted in the LICENSE file. -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = errexp 3 | version = 0.1 4 | author = Nadav Amit 5 | description = Linux syscall kernel error analyzer 6 | long_description = file: README.rst, CHANGELOG.rst, LICENSE.rst 7 | keywords = kernel, syscall, error 8 | license = BSD 2-Clause License 9 | classifiers = 10 | Programming Language :: Python :: 3 11 | Programming Language :: Python :: 3.8 12 | Programming Language :: Python :: 3.9 13 | Programming Language :: Python :: 3.10 14 | 15 | [options] 16 | zip_safe = False 17 | include_package_data = True 18 | packages = find: 19 | python_requires = >=3.8, <4 20 | install_requires = 21 | angr==9.2.25 22 | ansicolors==1.1.8 23 | bcc==0.1.10 24 | capstone==4.0.2 25 | claripy==9.2.25 26 | cle==9.2.25 27 | numpy==1.23.5 28 | lz4==4.3.2 29 | ptrace==1.0.1 30 | pyelftools==0.29 31 | psutil==5.9.4 32 | pyseccomp==0.1.2 33 | pytest==7.2.2 34 | python_ptrace==0.9.8 35 | tqdm==4.64.1 36 | 37 | [options.package_data] 38 | * = *.txt, *.rst, tests/run_tests.sh, tests/Makefile, tests/src/* 39 | 40 | [options.entry_points] 41 | console_scripts = 42 | deeperr = my_package.deeperr:main 43 | 44 | [options.packages.find] 45 | exclude = 46 | examples* 47 | tools* 48 | docs* 49 | my_package.tests* 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without 2 | modification, are permitted provided that the following conditions are 3 | met: 4 | 5 | 1. Redistributions of source code must retain the above copyright 6 | notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above 9 | copyright notice, this list of conditions and the following 10 | disclaimer in the documentation and/or other materials provided 11 | with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 14 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 16 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 17 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 19 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /controlstateplugin.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import angr 4 | import capstone 5 | import copy 6 | from typing import Any, Dict, List, Optional 7 | 8 | from arch import arch 9 | 10 | class ControlStatePlugin(angr.SimStatePlugin): 11 | STEP_TIMEOUT: int = 10 12 | 13 | def __init__(self, angr_mgr, detailed_trace:bool, branches:List[Dict], done_branches:int): 14 | super(ControlStatePlugin, self).__init__() 15 | self.done_branches = done_branches 16 | self.branches:List[Dict] = branches 17 | self.backtracking = False 18 | self.max_depth = 0x10000 # Just if something goes wrong 19 | self.stop_depth = 0 20 | self.last_depth = None 21 | # Save whether the trace is detailed and includes REP instructions and predicated moves 22 | self.detailed_trace = detailed_trace 23 | self.only_symbols = None 24 | self.__last_insn = None 25 | self.diverged = False 26 | self.expected_ip:Optional[int] = None 27 | self.in_simulated = True 28 | self.no_callees = False 29 | self.angr_mgr = angr_mgr 30 | self.arch = arch.controlStatePluginArch() 31 | 32 | @angr.SimStatePlugin.memo 33 | def copy(self, memo) -> 'ControlStatePlugin': 34 | c = copy.copy(self) 35 | c.arch = copy.copy(self.arch) 36 | return c 37 | 38 | @property 39 | def current_branch(self) -> Optional[Dict[str, Any]]: 40 | assert not self.backtracking 41 | return None if len(self.branches) == 0 else self.branches[0] 42 | 43 | def match_src(self) -> bool: 44 | br = self.current_branch 45 | return br is not None and self.last_insn is not None and br['from_ip'] == self.last_insn.address 46 | 47 | def update(self, s:angr.SimState): 48 | ip = self.angr_mgr.state_ip(s) 49 | self.__last_insn = None if ip is None else self.angr_mgr.get_insn(ip) 50 | 51 | @property 52 | def last_insn(self) -> capstone.CsInsn: 53 | return self.__last_insn 54 | 55 | def trace_finished(self) -> bool: 56 | return len(self.branches) == 0 57 | 58 | def next_branch(self) -> bool: 59 | if self.trace_finished(): 60 | return False 61 | self.branches = self.branches[1:] 62 | self.done_branches += 1 63 | return not self.trace_finished() 64 | 65 | -------------------------------------------------------------------------------- /ignore_funcs_pure.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | # 4 | # Some of those are not really pure but we want to ignore their 5 | # side-effects. 6 | _raw_spin_lock 7 | _raw_spin_unlock 8 | trace_event_raw_event_sys_enter 9 | schedule 10 | __schedule 11 | __cond_resched 12 | perf_prepare_sample 13 | __perf_event_header__init_id 14 | # __rcu_read[un]lock are used by ftrace which means we might lost calls to 15 | # them. 16 | __rcu_read_lock 17 | __rcu_read_unlock 18 | sched_clock 19 | sched_clock_cpu 20 | perf_output_copy 21 | perf_output_sample 22 | # Tracing ptrace and exit/entry is wasteful 23 | ptrace_do_notify 24 | ptrace_notify 25 | syscall_trace_enter 26 | __traceiter_sys_exit 27 | syscall_exit_work 28 | syscall_exit_to_user_mode 29 | # Tracing locks is mostly unnecessary (it might in some corner cases cause 30 | # simulation to fail though.) 31 | up_read 32 | up_write 33 | down_read 34 | down_write 35 | down_write_killable 36 | _raw_spin_lock_irqsave 37 | _raw_spin_unlock_irqrestore 38 | # Memory allocation is just overhead 39 | kmem_cache_alloc 40 | kmem_cache_free 41 | trampoline_handler # Special we still want the callees 42 | __kmalloc 43 | kfree 44 | ptrace_stop 45 | #__kmalloc_track_caller 46 | mutex_lock 47 | mutex_unlock 48 | ptrace_notify 49 | irq_enter_rcu 50 | call_rcu 51 | syscall_exit_work # After the syscall was already executed 52 | scheduler_tick 53 | update_process_times 54 | tick_sched_handle 55 | tick_periodic 56 | ### CHECK - we ignore them to make kprobe more robust and avoid kernel crashes 57 | hrtick_update 58 | rcu_core 59 | rcu_note_context_switch 60 | rcu_core_si 61 | run_rebalance_domains 62 | profile_tick 63 | idle_cpu 64 | __do_softirq 65 | __kmem_cache_free 66 | vprintk 67 | invoke_rcu_core 68 | module_put 69 | putname 70 | nohz_balance_exit_idle 71 | update_cfg_group 72 | load_balance 73 | check_cfs_rq_runtime 74 | update_blocked_averages 75 | rebalance_domains 76 | _printk 77 | hrtimer_interrupt 78 | __hrtimer_run_queues 79 | tick_sched_timer 80 | tick_sched_do_timer 81 | irq_exit_rcu 82 | lapic_next_deadline 83 | sysvec_apic_timer_interrupt 84 | cgroup_rstat_updated 85 | __destroy_inode 86 | truncate_inode_pages_range 87 | __const_udelay 88 | delay_tsc 89 | destroy_inode 90 | _raw_spin_unlock_irq 91 | truncate_inode_pages_final 92 | __inode_wait_for_writeback 93 | evict 94 | __inode_wait_for_writeback 95 | iput 96 | perf_trace_buf_alloc 97 | perf_trace_sys_exit 98 | perf_trace_run_bpf_submit 99 | trace_call_bpf 100 | migrate_disable 101 | memcg_account_kmem 102 | perf_iterate_ctx 103 | perf_event_switch_output 104 | __wake_up 105 | local_clock 106 | native_sched_clock 107 | perf_iterate_sb 108 | syscall_enter_from_user_mode 109 | mntput 110 | dput 111 | mntput_no_expire 112 | rb_erase 113 | wakeup_source_unregister 114 | release_sock 115 | __check_object_size 116 | __check_object_size.part.0 117 | current_time 118 | ktime_get_coarse_real_ts64 119 | ext4_inode_csum 120 | __srcu_read_lock 121 | __srcu_read_unlock 122 | fsnotify_destroy_marks 123 | stop_this_handle 124 | mnt_drop_write 125 | dentry_unlink_inode 126 | # Some should be non-pure 127 | crypto_shash_update 128 | chacha_permute 129 | ep_remove 130 | ext4_fc_stop_update 131 | # Need to figure out why the following is not figured out automatically 132 | rcu_read_unlock_strict 133 | __wait_for_common 134 | percpu_down_write -------------------------------------------------------------------------------- /kcore.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import re 4 | import logging 5 | from typing import List, Dict, Optional, Any 6 | from elftools.elf.elffile import ELFFile 7 | 8 | class Kcore: 9 | iomem_regex = re.compile(r'\s*(?P[0-9a-f]+)\-(?P[0-9a-f]+)\s+:\s+(?P[^\n]+)') 10 | path = "/proc/kcore" 11 | 12 | # Singleton instance variable 13 | _instance:Optional['Kcore'] = None 14 | 15 | def __new__(cls): 16 | if cls._instance is None: 17 | cls._instance = super().__new__(cls) 18 | return cls._instance 19 | 20 | def __init__(self): 21 | if not hasattr(self, 'f_kcore'): 22 | self.open() 23 | 24 | def __del__(self): 25 | if hasattr(self, 'f_kcore'): 26 | self.f_kcore.close() 27 | 28 | def open_iomem(self): 29 | ranges = list() 30 | with open("/proc/iomem") as f: 31 | for l in f: 32 | m = self.iomem_regex.match(l) 33 | if m is None: 34 | continue 35 | d = m.groupdict() 36 | if d['type'] != 'System RAM': 37 | continue 38 | ranges.append((int(d['start'], 16), int(d['end'], 16))) 39 | 40 | def open(self) -> bool: 41 | try: 42 | self.f_kcore = open(self.path, mode='rb') 43 | except PermissionError: 44 | raise Exception("no access to kcore") 45 | 46 | elf = ELFFile(self.f_kcore) 47 | 48 | self.phdr = list() 49 | for seg in iter(elf.iter_segments('PT_LOAD')): 50 | self.phdr.append(seg.header) 51 | 52 | self.modules = self.parse_proc_modules() 53 | 54 | return True 55 | 56 | def get_offset(self, addr: int) -> int: 57 | for s in self.phdr: 58 | if s.p_vaddr <= addr and addr < s.p_vaddr + s.p_filesz: 59 | break 60 | 61 | if s is None: 62 | raise ValueError("Address not found") 63 | 64 | offset = addr - s.p_vaddr 65 | return s.p_offset + offset 66 | 67 | def read(self, addr:int, sz:int) -> bytes: 68 | found = None 69 | for s in self.phdr: 70 | if s.p_vaddr <= addr and addr < s.p_vaddr + s.p_filesz: 71 | found = s 72 | break 73 | 74 | if found is None: 75 | raise ValueError("Address not found") 76 | 77 | offset = addr - found.p_vaddr 78 | self.f_kcore.seek(s.p_offset + offset) 79 | try: 80 | b = self.f_kcore.read(sz) 81 | except: 82 | logging.info(f'failed to read kcore at {hex(addr)}') 83 | b = bytes() 84 | return b 85 | 86 | def parse_proc_modules(self) -> List[Dict[str, Any]]: 87 | modules = [] 88 | 89 | with open('/proc/modules', 'r') as f: 90 | for line in f: 91 | parts = line.strip().split(' ') 92 | module_name = parts[0] 93 | module_size = int(parts[1]) 94 | module_ref_count = None if parts[2] == '-' else int(parts[2]) 95 | module_dependencies = [dep for dep in parts[4].split(',') if dep != '-'] 96 | module_state = parts[4] 97 | module_address = int(parts[5], 16) 98 | 99 | module_info = { 100 | 'name': module_name, 101 | 'size': module_size, 102 | 'ref_count': module_ref_count, 103 | 'dependencies': module_dependencies, 104 | 'state': module_state, 105 | 'address': module_address 106 | } 107 | modules.append(module_info) 108 | 109 | return modules -------------------------------------------------------------------------------- /prmsg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import sys 4 | import logging 5 | import tqdm 6 | import colors 7 | from time import time 8 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, TextIO, Sized, Iterable, Union 9 | 10 | level_to_logging = { 11 | # logging-level, color, flush log, stderr, 12 | 'OP': (logging.info, None, False, True), 13 | 'INFO': (logging.info, 'green', False, True), 14 | 'FATAL': (logging.fatal, 'red', True, True), 15 | 'ERROR': (logging.error, 'red', True, True), 16 | 'TITLE': (None, 'blue', False, True), 17 | 'DATA': (None, None, False, False), 18 | 'WARN': (logging.warning,'yellow', False, True), 19 | 'DEBUG': (logging.warning,'yellow', False, True), 20 | } 21 | 22 | startup_time = time() 23 | 24 | def uptime() -> float: 25 | return time() - startup_time 26 | 27 | output_file:TextIO = sys.stdout 28 | quiet:bool = False 29 | debug_mode:bool = False 30 | 31 | def change_output(f_name:str): 32 | global output_file 33 | 34 | try: 35 | output_file = open(f_name, 'tw+') 36 | except Exception as exc: 37 | raise ValueError(f'error opening output file {f_name}: {str(exc)}') 38 | 39 | def pr_msg(msg: str, level:str='INFO', new_line_before:bool=False, new_line_after:bool=False): 40 | global output_file 41 | 42 | l = level_to_logging[level] 43 | if l[0] is not None: 44 | l[0](msg) 45 | if l[2]: 46 | logging.getLogger().handlers[0].flush() 47 | 48 | o_file = sys.stderr if l[3] else output_file 49 | std_outputs = o_file in {sys.stderr, sys.stdout} 50 | 51 | if quiet: 52 | return 53 | 54 | if level == 'DEBUG' and not debug_mode: 55 | return 56 | 57 | if new_line_before or (Pbar.in_pbar != 0 and std_outputs): 58 | msg = '\n' + msg 59 | if new_line_after or (Pbar.in_pbar != 0 and std_outputs): 60 | msg += '\n' 61 | if std_outputs and l[1] is not None: 62 | msg = colors.color(msg, fg=l[1]) 63 | print(msg, file=o_file) 64 | 65 | class Pbar(tqdm.tqdm): 66 | in_pbar = 0 67 | 68 | def __init__(self, message:str, items:Optional[Union[Sized, Iterable]]=None, 69 | total:Optional[int]=None, unit:str='it', ignore_zero:bool=True, 70 | disable:bool=False): 71 | assert total is not None or isinstance(items, Sized) 72 | 73 | if total is None and isinstance(items, Sized): 74 | total = len(items) 75 | 76 | if quiet or (ignore_zero and total == 0): 77 | disable = True 78 | 79 | logging.info(message) 80 | super().__init__(iterable=items, total=total, unit=unit, colour="green", 81 | bar_format='{desc:<30.30}{percentage:3.0f}%|{bar:20}{r_bar}', 82 | disable=disable) 83 | super().set_description(message) 84 | if not disable: 85 | Pbar.in_pbar += 1 86 | self.pbar_disabled = disable 87 | 88 | def update_to(self, n:int): 89 | super().update(n - self.n) 90 | 91 | def __disable(self): 92 | if not self.pbar_disabled: 93 | Pbar.in_pbar -= 1 94 | self.pbar_disabled = True 95 | 96 | def __del__(self): 97 | self.__disable() 98 | self.update(self.total - self.n) 99 | super().__del__() 100 | 101 | def __exit__(self, exc_type, exc_value, traceback): 102 | self.__disable() 103 | if exc_type == None: 104 | self.update_to(self.total) 105 | super().__exit__(exc_type, exc_value, traceback) 106 | 107 | def close(self): 108 | self.__disable() 109 | super().close() 110 | 111 | warned_once:Set[str] = set() 112 | 113 | def warn_once(msg: str): 114 | if msg not in warned_once: 115 | return 116 | logging.warning(msg) 117 | warned_once.add(msg) -------------------------------------------------------------------------------- /syscall_failure_ebpf.c: -------------------------------------------------------------------------------- 1 | // Copyright 2023 VMware, Inc. 2 | // SPDX-License-Identifier: BSD-2-Clause 3 | #include 4 | #include 5 | //#include 6 | #include 7 | 8 | #define KEY_SYSCALL_NR 1 9 | #define KEY_ERROR_CODE 2 10 | #define KEY_PARENT_PID 3 11 | #define KEY_OCCUR_TIMES 4 12 | #define KEY_FLAGS 5 13 | 14 | #define STOP_ON_ERROR (1ull << 0) 15 | 16 | #define MIN_ERROR ((unsigned long)(-1024)) 17 | 18 | #if 0 19 | // Just as a record to the filter format 20 | struct syscall_filter_t { 21 | u64 syscall_nr; 22 | u64 error_code; 23 | u64 parent_pid; 24 | }; 25 | #endif 26 | 27 | struct syscall_event_t { 28 | u64 pid; 29 | u64 syscall_nr; 30 | u64 syscall_ret; 31 | u64 ts; 32 | }; 33 | 34 | BPF_PERF_OUTPUT(syscall_events); 35 | BPF_HASH(config_map, u64, u64); 36 | 37 | struct loop_ctx { 38 | struct task_struct *task; 39 | u64 parent_pid; 40 | u32 is_parent; 41 | }; 42 | 43 | static inline u64 check_parent(u32 loop_idx, struct loop_ctx *loop_ctx) { 44 | struct task_struct *task = loop_ctx->task; 45 | 46 | if (task == NULL) 47 | return 1; 48 | 49 | if (task->tgid == loop_ctx->parent_pid) { 50 | loop_ctx->is_parent = 1; 51 | return 1; 52 | } 53 | 54 | if (task->pid == 1) 55 | return 0; 56 | 57 | task = (struct task_struct *)task->real_parent; 58 | loop_ctx->task = task; 59 | return 0; 60 | } 61 | 62 | static inline int is_descendant(u64 pid, u64 parent_pid) { 63 | struct loop_ctx loop_ctx; 64 | int i; 65 | 66 | loop_ctx.task = (struct task_struct *)bpf_get_current_task(); 67 | loop_ctx.parent_pid = parent_pid; 68 | loop_ctx.is_parent = 0; 69 | 70 | //result = bpf_loop(1ul << 29, check_parent, (void *)(long)&loop_ctx, 0); 71 | for (i = 0; i < 64; i++) { 72 | check_parent(i, &loop_ctx); 73 | } 74 | 75 | return loop_ctx.is_parent; 76 | } 77 | 78 | int trace_syscalls(struct tracepoint__raw_syscalls__sys_exit *args) { 79 | struct syscall_event_t event = {}; 80 | u64 syscall_nr_req, error_code_req, parent_pid, occur_times, flags; 81 | u64 pid = bpf_get_current_pid_tgid() >> 32; 82 | 83 | u64 key_syscall_nr = KEY_SYSCALL_NR; 84 | u64 key_error_code = KEY_ERROR_CODE; 85 | u64 key_parent_pid = KEY_PARENT_PID; 86 | u64 key_occur_times = KEY_OCCUR_TIMES; 87 | u64 key_flags = KEY_FLAGS; 88 | 89 | u64 *syscall_nr_ptr = config_map.lookup(&key_syscall_nr); 90 | u64 *error_code_ptr = config_map.lookup(&key_error_code); 91 | u64 *parent_pid_ptr = config_map.lookup(&key_parent_pid); 92 | u64 *occur_times_ptr = config_map.lookup(&key_occur_times); 93 | u64 *flags_ptr = config_map.lookup(&key_flags); 94 | 95 | u64 syscall_nr = args->id; 96 | u64 syscall_ret = args->ret; 97 | 98 | if (!syscall_nr_ptr || !error_code_ptr || !parent_pid_ptr || !occur_times_ptr || !flags_ptr) 99 | return 0; 100 | 101 | syscall_nr_req = *syscall_nr_ptr; 102 | error_code_req = *error_code_ptr; 103 | parent_pid = *parent_pid_ptr; 104 | 105 | if (syscall_nr != syscall_nr_req && syscall_nr_req != -1ull) 106 | return 0; 107 | 108 | if (syscall_ret < MIN_ERROR) 109 | return 0; 110 | 111 | if (syscall_ret != error_code_req && error_code_req != -1ull) 112 | return 0; 113 | 114 | if (parent_pid != -1ull && !is_descendant(pid, parent_pid)) 115 | return 0; 116 | 117 | occur_times = *occur_times_ptr; 118 | if (occur_times != -1ull) { 119 | if (occur_times == 0) 120 | return 0; 121 | 122 | occur_times--; 123 | config_map.update(&key_occur_times, &occur_times); 124 | 125 | if (occur_times != 0) 126 | return 0; 127 | } 128 | 129 | event.pid = pid; 130 | event.syscall_nr = syscall_nr; 131 | event.syscall_ret = syscall_ret; 132 | event.ts = bpf_ktime_get_ns(); 133 | syscall_events.perf_submit(args, &event, sizeof(event)); 134 | 135 | flags = *flags_ptr; 136 | if (flags & STOP_ON_ERROR) 137 | bpf_send_signal(SIGSTOP); 138 | 139 | return 0; 140 | } 141 | -------------------------------------------------------------------------------- /addr2line.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import re 4 | import logging 5 | import subprocess 6 | from collections import defaultdict 7 | from typing import List, Dict, Optional, Tuple 8 | 9 | class Addr2Line: 10 | addr2line_loc_re = re.compile(r'(?P[^\:]+):(?P\d+)\s*(?P.*)') 11 | llvm_symbolizer_loc_re = re.compile(r'(?P[^\:]+):(?P\d+):(?P\d+)\s*(?P.*)') 12 | 13 | __instance: Optional['Addr2Line'] = None 14 | __llvm_symbolizer = 'llvm-symbolizer' 15 | __addr2line = 'addr2line' 16 | 17 | @property 18 | def llvm_symbolizer(self) -> str: 19 | return self.__llvm_symbolizer 20 | 21 | @llvm_symbolizer.setter 22 | def llvm_symbolizer(self, llvm_symbolizer:str): 23 | self.__llvm_symbolizer = llvm_symbolizer 24 | 25 | @property 26 | def addr2line(self) -> str: 27 | return self.__addr2line 28 | 29 | @addr2line.setter 30 | def addr2line(self, addr2line:str): 31 | self.__addr2line = addr2line 32 | 33 | @staticmethod 34 | def get_instance(): 35 | """ Static access method. """ 36 | if Addr2Line.__instance == None: 37 | Addr2Line() 38 | return Addr2Line.__instance 39 | 40 | def __init__(self): 41 | """ Virtually private constructor. """ 42 | if Addr2Line.__instance != None: 43 | raise Exception("This class is a singleton!") 44 | else: 45 | Addr2Line.__instance = self 46 | 47 | def run(self, obj_addrs:List[Tuple[str, int]]) -> Dict[Tuple[str, int], List[Dict]]: 48 | # Split the addresses according to the file (the first in the tuple) 49 | addr_dict:defaultdict[str, List[int]] = defaultdict(list) 50 | 51 | for obj, addr in obj_addrs: 52 | addr_dict[obj].append(addr) 53 | 54 | result:Dict[Tuple[str, int], List[Dict]] = {} 55 | for obj, addrs in addr_dict.items(): 56 | addr_args = [hex(a) for a in addrs] 57 | 58 | # Try llvm-symbolizer first since it gives the column 59 | output = None 60 | args = [self.llvm_symbolizer, f'--obj={str(obj)}', "--basenames", 61 | '--relativenames', '--print-address', *addr_args] 62 | logging.info("running: {0}".format(' '.join(args))) 63 | 64 | try: 65 | output = subprocess.check_output( 66 | args, stderr=subprocess.STDOUT, timeout=20, 67 | universal_newlines=True) 68 | except: 69 | pass 70 | 71 | line_re = self.llvm_symbolizer_loc_re 72 | 73 | if output is None: 74 | args = [self.addr2line, '-a', '-f', '-i', '-e', str(obj)] 75 | args.extend(addr_args) 76 | logging.info("running: {0}".format(' '.join(args))) 77 | try: 78 | output = subprocess.check_output( 79 | args, stderr=subprocess.STDOUT, timeout=20, 80 | universal_newlines=True) 81 | except: 82 | raise SystemError(f'Failed to run {self.addr2line} and {self.llvm_symbolizer} on {obj}') 83 | 84 | line_re = self.addr2line_loc_re 85 | 86 | func = None 87 | 88 | for l in output.splitlines(): 89 | if l == "": 90 | continue 91 | elif l.startswith("0x"): 92 | addr = int(l, 16) 93 | func = None 94 | skip = (obj, addr) in result 95 | if not skip: 96 | result[(obj, addr)] = list() 97 | elif func is None: 98 | func = l 99 | elif not skip: 100 | m = line_re.match(l) 101 | d = m.groupdict() 102 | col = int(d['col']) if 'col' in d else None 103 | loc = {'func':func, 'file':d['file'], 'line':int(d['line']), 'col':col} 104 | result[obj, addr].append(loc) 105 | func = None 106 | 107 | return result -------------------------------------------------------------------------------- /CONTRIBUTING_DCO.md: -------------------------------------------------------------------------------- 1 | # Contributing to syscall-failure-analyzer 2 | 3 | We welcome contributions from the community and first want to thank you for taking the time to contribute! 4 | 5 | Please familiarize yourself with the [Code of Conduct](https://github.com/vmware/.github/blob/main/CODE_OF_CONDUCT.md) before contributing. 6 | 7 | Before you start working with syscall-failure-analyzer, please read our [Developer Certificate of Origin](https://cla.vmware.com/dco). All contributions to this repository must be signed as described on that page. Your signature certifies that you wrote the patch or have the right to pass it on as an open-source patch. 8 | 9 | ## Ways to contribute 10 | 11 | We welcome many different types of contributions and not all of them need a Pull request. Contributions may include: 12 | 13 | * New features and proposals 14 | * Documentation 15 | * Bug fixes 16 | * Issue Triage 17 | * Answering questions and giving feedback 18 | * Helping to onboard new contributors 19 | * Other related activities 20 | 21 | ## Getting started 22 | 23 | This section provides a comprehensive guide on how to contribute to the project by setting up your development environment, and ensuring code quality before submitting a pull request. Though the project is in Python, which simplifies the build process, it's crucial to follow these guidelines for a smooth collaboration. 24 | 25 | ### Development Environment Setup 26 | 27 | 1. **Clone the Repository:** Clone the repository to your local machine using the following command in your terminal: 28 | 29 | ```bash 30 | git clone https://github.com/vmware-labs/syscall-failure-analyzer 31 | ``` 32 | 33 | 2. **Navigate to the Project Directory:** 34 | 35 | ```bash 36 | cd your-repository 37 | ``` 38 | 39 | 3. **Install Required Packages:** Use `pip` to install the required Python packages: 40 | 41 | ```bash 42 | pip install -r requirements.txt 43 | ``` 44 | 45 | ### Ensuring Code Quality 46 | 47 | Before submitting a pull request, make sure that your code adheres to the following guidelines: 48 | 49 | - **No MyPy Warnings:** Your code should not produce any MyPy warnings. Run the following command to check: 50 | 51 | ```bash 52 | mypy . 53 | ``` 54 | 55 | If you see any warnings, correct the type annotations to resolve them before submitting your pull request. 56 | 57 | ### Submitting a Pull Request 58 | 59 | 1. **Create a New Branch:** 60 | 61 | ```bash 62 | git checkout -b your-feature-branch 63 | ``` 64 | 65 | 2. **Add and Commit Your Changes:** 66 | 67 | ```bash 68 | git add . 69 | git commit --signoff -m "Your commit message" 70 | ``` 71 | 72 | 3. **Push the Changes:** 73 | 74 | ```bash 75 | git push origin your-feature-branch 76 | ``` 77 | 78 | 4. Navigate to the original repository and create a new pull request. Compare the original `main` or `master` branch with your `your-feature-branch`. 79 | 80 | 5. After submitting the pull request, maintainers will review your changes. Upon approval, your code will be merged into the main codebase. 81 | 82 | ### Common Issues 83 | 84 | Currently, there are no common issues to be aware of. As the project evolves, this section will be updated accordingly. 85 | 86 | ### Testing 87 | 88 | As of now, the project does not have automated tests. Please disregard this section until tests are added to the repository. 89 | 90 | ## Contribution Flow 91 | 92 | This is a rough outline of what a contributor's workflow looks like: 93 | 94 | * Make a fork of the repository within your GitHub account 95 | * Create a topic branch in your fork from where you want to base your work 96 | * Make commits of logical units 97 | * Make sure your commit messages are with the proper format, quality and descriptiveness (see below) 98 | * Push your changes to the topic branch in your fork 99 | * Create a pull request containing that commit 100 | 101 | We follow the GitHub workflow and you can find more details on the [GitHub flow documentation](https://docs.github.com/en/get-started/quickstart/github-flow). 102 | 103 | ### Pull Request Checklist 104 | 105 | Before submitting your pull request, we advise you to use the following: 106 | 107 | 1. Check if your code changes will pass both code linting checks and unit tests. 108 | 2. Ensure your commit messages are descriptive. We follow the conventions on [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/). Be sure to include any related GitHub issue references in the commit message. See [GFM syntax](https://guides.github.com/features/mastering-markdown/#GitHub-flavored-markdown) for referencing issues and commits. 109 | 3. Check the commits and commits messages and ensure they are free from typos. 110 | 111 | ## Reporting Bugs and Creating Issues 112 | 113 | For specifics on what to include in your report, please follow the guidelines in the issue and pull request templates when available. 114 | 115 | 116 | ## Ask for Help 117 | 118 | The best way to reach us with a question when contributing is to ask on: 119 | 120 | * The original GitHub issue 121 | 122 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in syscall-failure-analyzer project and our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at oss-coc@vmware.com. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series 85 | of actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or 92 | permanent ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within 112 | the community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.0, available at 118 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 119 | 120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 121 | enforcement ladder](https://github.com/mozilla/diversity). 122 | 123 | [homepage]: https://www.contributor-covenant.org 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | https://www.contributor-covenant.org/faq. Translations are available at 127 | https://www.contributor-covenant.org/translations. 128 | -------------------------------------------------------------------------------- /abstractarch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | from typing import Tuple, Union, Callable, Set, List, Dict, Any, Optional, Iterable 4 | from abc import ABC, abstractmethod 5 | 6 | import angr 7 | import capstone 8 | 9 | class ControlStatePluginArch(ABC): 10 | def __init__(self): 11 | pass 12 | 13 | @abstractmethod 14 | def copy(self) -> 'ControlStatePluginArch': 15 | pass 16 | 17 | class Arch(ABC): 18 | def __init__(self): 19 | pass 20 | 21 | @abstractmethod 22 | def init_capstone(self) -> capstone.Cs: 23 | pass 24 | 25 | @property 26 | @abstractmethod 27 | def default_text_base(self) -> int: 28 | pass 29 | 30 | @abstractmethod 31 | def is_call_insn(self, insn: capstone.CsInsn) -> bool: 32 | pass 33 | 34 | @abstractmethod 35 | def is_ret_insn(self, insn: capstone.CsInsn) -> bool: 36 | pass 37 | 38 | @abstractmethod 39 | def is_branch_insn(self, insn: capstone.CsInsn) -> bool: 40 | pass 41 | 42 | @abstractmethod 43 | def is_indirect_branch_insn(self, insn: capstone.CsInsn) -> bool: 44 | pass 45 | 46 | @abstractmethod 47 | def is_direct_call_insn(self, insn: capstone.CsInsn) -> bool: 48 | pass 49 | 50 | def is_indirect_call_insn(self, insn:capstone.CsInsn) -> bool: 51 | return self.is_call_insn(insn) and not self.is_direct_call_insn(insn) 52 | 53 | @abstractmethod 54 | def is_rep_insn(self, insn) -> bool: 55 | pass 56 | 57 | @property 58 | @abstractmethod 59 | def arch_name(self) -> str: 60 | pass 61 | 62 | @abstractmethod 63 | def pyvex_workaround(self, insn:capstone.CsInsn) -> Tuple[Union[Callable, None], bool]: 64 | pass 65 | 66 | @abstractmethod 67 | def nop_insn(self, size:int) -> bytes: 68 | pass 69 | 70 | @abstractmethod 71 | def is_predicated_mov(self, insn) -> bool: 72 | pass 73 | 74 | @property 75 | @abstractmethod 76 | def syscall_entry_points(self) -> Set[str]: 77 | pass 78 | 79 | @abstractmethod 80 | def get_direct_branch_target(self, insn:capstone.CsInsn) -> int: 81 | pass 82 | 83 | @abstractmethod 84 | def is_jmp_insn(self, insn) -> bool: 85 | pass 86 | 87 | @abstractmethod 88 | def is_indirect_jmp_insn(self, insn) -> bool: 89 | pass 90 | 91 | def is_direct_jmp_insn(self, insn) -> bool: 92 | return self.is_jmp_insn(insn) and not self.is_indirect_jmp_insn(insn) 93 | 94 | @abstractmethod 95 | def is_iret_insn(self, insn:capstone.CsInsn) -> bool: 96 | pass 97 | 98 | @abstractmethod 99 | def is_sysexit_sysret_insn(self, insn:capstone.CsInsn) -> bool: 100 | pass 101 | 102 | @abstractmethod 103 | def is_fixed_rep_insn(self, insn:capstone.CsInsn) -> bool: 104 | pass 105 | 106 | @property 107 | @abstractmethod 108 | def ftrace_state_str(self) -> str: 109 | pass 110 | 111 | @abstractmethod 112 | def ftrace_state_dict(self, d:Dict[str, Any]) -> Dict[str, Any]: 113 | pass 114 | 115 | @property 116 | @abstractmethod 117 | def stack_end(self) -> int: 118 | pass 119 | 120 | @property 121 | @abstractmethod 122 | def per_cpu_reg(self) -> str: 123 | pass 124 | 125 | @property 126 | @abstractmethod 127 | def per_cpu_offset(self) -> int: 128 | pass 129 | 130 | @property 131 | @abstractmethod 132 | def stack_reg(self) -> str: 133 | pass 134 | 135 | @property 136 | @abstractmethod 137 | def ret_reg_name(self) -> str: 138 | pass 139 | 140 | @property 141 | @abstractmethod 142 | def stack_related_reg_names(self) -> List[str]: 143 | pass 144 | 145 | @property 146 | @abstractmethod 147 | def ip_reg_name(self) -> str: 148 | pass 149 | 150 | @abstractmethod 151 | def is_cond_branch_insn(self, insn:capstone.CsInsn) -> bool: 152 | pass 153 | 154 | @abstractmethod 155 | def is_direct_branch_insn(self, insn:capstone.CsInsn) -> bool: 156 | pass 157 | 158 | @abstractmethod 159 | def is_indirect_branch_target(self, insn:capstone.CsInsn) -> bool: 160 | pass 161 | 162 | @abstractmethod 163 | def is_cond_jmp_insn(self, insn:capstone.CsInsn) -> bool: 164 | pass 165 | 166 | @abstractmethod 167 | def is_cond_jmp_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool: 168 | pass 169 | 170 | @abstractmethod 171 | def is_loop_insn(self, insn:capstone.CsInsn) -> bool: 172 | pass 173 | 174 | @abstractmethod 175 | def is_loop_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool: 176 | pass 177 | 178 | @abstractmethod 179 | def rep_iterations(self, insn:capstone.CsInsn, state:Dict) -> int: 180 | pass 181 | 182 | @property 183 | @abstractmethod 184 | def syscall_insn_len(self) -> int: 185 | pass 186 | 187 | @abstractmethod 188 | def controlStatePluginArch(self) -> ControlStatePluginArch: 189 | pass 190 | 191 | @property 192 | @abstractmethod 193 | def page_size(self) -> int: 194 | pass 195 | 196 | @abstractmethod 197 | def parse_interrupt_table(self, proj:angr.Project) -> Dict[int, int]: 198 | pass 199 | 200 | @abstractmethod 201 | def init_symbols(self, proj:angr.Project) -> None: 202 | pass 203 | 204 | @abstractmethod 205 | def is_exception_vector(self, vector:int) -> bool: 206 | pass 207 | 208 | @property 209 | @abstractmethod 210 | def irq_exit_sym_names(self) -> Set[str]: 211 | pass 212 | 213 | @property 214 | @abstractmethod 215 | def address_width(self) -> int: 216 | pass -------------------------------------------------------------------------------- /syscall.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import errno 4 | from collections import defaultdict 5 | from typing import Any, List, Optional, Union, DefaultDict 6 | 7 | from ptrace.syscall.ptrace_syscall import SYSCALL_NAMES 8 | from prmsg import pr_msg 9 | 10 | def str_to_int(s) -> Optional[int]: 11 | """ 12 | Convert a string to an integer. Supports base 10 and hexadecimal numbers. 13 | 14 | Args: 15 | s (str): The input string. 16 | 17 | Returns: 18 | Optional[int]: The integer value of the string, or None if conversion fails. 19 | """ 20 | 21 | if not isinstance(s, str): 22 | return None 23 | if s.startswith("0x"): 24 | return int(s, 16) 25 | try: 26 | return int(s) 27 | except: 28 | return None 29 | 30 | def ret_to_err(ret: Union[str,int]) -> Optional[int]: 31 | """ 32 | Convert a return value to an error code. 33 | 34 | Args: 35 | ret (any): The input return value. 36 | 37 | Returns: 38 | Optional[int]: The error code, or None if the conversion fails. 39 | """ 40 | v:Optional[int] = None 41 | 42 | if isinstance(ret, int): 43 | v = ret 44 | else: 45 | v = str_to_int(ret) 46 | if v is None: 47 | return None 48 | 49 | assert(v is not None) 50 | 51 | if v < 0: 52 | return v 53 | if v > (1 << 64) - 1024: 54 | return -((1 << 64) - v) 55 | return None 56 | 57 | 58 | class SyscallInfo: 59 | syscall_numbers:DefaultDict[str,List[int]] = defaultdict(list) 60 | 61 | @staticmethod 62 | def get_name(n:int) -> str: 63 | """ 64 | Get the syscall name associated with a syscall number. 65 | 66 | Args: 67 | n (int): The syscall number. 68 | 69 | Returns: 70 | str: The syscall name. 71 | """ 72 | if n is None: 73 | return None 74 | return SYSCALL_NAMES.get(n, str(n)) 75 | 76 | @staticmethod 77 | def get_syscall_nr(syscall:str) -> int: 78 | """ 79 | Get the syscall number associated with a syscall name or number string. 80 | 81 | Args: 82 | syscall (str): The syscall name or number string. 83 | 84 | Returns: 85 | Optional[int]: The syscall number, or None if the syscall is not found. 86 | """ 87 | if syscall is None: 88 | return None 89 | 90 | if syscall.isnumeric(): 91 | return int(syscall) 92 | 93 | if len(SyscallInfo.syscall_numbers) == 0: 94 | SyscallInfo.syscall_numbers = defaultdict(list) 95 | for number, name in SYSCALL_NAMES.items(): 96 | SyscallInfo.syscall_numbers[name.lower()].append(number) 97 | 98 | syscalls = SyscallInfo.syscall_numbers[syscall.lower()] 99 | if len(syscalls) > 1: 100 | pr_msg(f'Found multiple syscalls for {syscall}: {syscalls}; using {syscalls[0]}', level='WARN') 101 | elif len(syscalls) == 0: 102 | raise ValueError(f'Could not find syscall {syscall}') 103 | 104 | return syscalls[0] 105 | 106 | class ErrorcodeInfo: 107 | error_numbers:Optional[DefaultDict[str,List]] = None 108 | 109 | extra_error_codes = { 110 | 512: 'ERESTARTSYS', 111 | 513: 'ERESTARTNOINTR', 112 | 514: 'ERESTARTNOHAND', 113 | 515: 'ENOIOCTLCMD', 114 | 516: 'ERESTART_RESTARTBLOCK', 115 | 517: 'EPROBE_DEFER', 116 | 518: 'EOPENSTALE', 117 | 519: 'ENOPARAM', 118 | 521: 'EBADHANDLE', 119 | 522: 'ENOTSYNC', 120 | 523: 'EBADCOOKIE', 121 | 524: 'ENOTSUPP', 122 | 525: 'ETOOSMALL', 123 | 526: 'ESERVERFAULT', 124 | 527: 'EBADTYPE', 125 | 528: 'EJUKEBOX', 126 | 529: 'EIOCBQUEUED', 127 | 530: 'ERECALLCONFLICT', 128 | 531: 'ENOGRACE' 129 | } 130 | 131 | @staticmethod 132 | def get_name(n:int) -> str: 133 | """ 134 | Get the error string associated with an error code. 135 | 136 | Args: 137 | n (int): The error code. 138 | 139 | Returns: 140 | str: The error string. 141 | """ 142 | if n is None: 143 | return None 144 | if n < 0: 145 | n = -n 146 | if n in errno.errorcode: 147 | return errno.errorcode[n] 148 | if n in ErrorcodeInfo.extra_error_codes: 149 | return ErrorcodeInfo.extra_error_codes[n] 150 | return str(n) 151 | 152 | @staticmethod 153 | def get_errno(err:str) -> Optional[int]: 154 | """ 155 | Get the error code associated with an error string. 156 | 157 | Args: 158 | err (str): The error string. 159 | 160 | Returns: 161 | Optional[int]: The error code, or None if the error is not found. 162 | """ 163 | if err is None or len(err) == 0: 164 | return None 165 | 166 | if err[0] == '-': 167 | err = err[1:] 168 | 169 | if err.isnumeric(): 170 | return int(err) 171 | 172 | if err.startswith('0x'): 173 | return (1 << 64) - int(err, 16) 174 | 175 | # string 176 | if ErrorcodeInfo.error_numbers is None: 177 | ErrorcodeInfo.error_numbers = defaultdict(list) 178 | items = errno.errorcode.items() | ErrorcodeInfo.extra_error_codes.items() 179 | for number, name in items: 180 | ErrorcodeInfo.error_numbers[name.lower()].append(number) 181 | 182 | errnos = ErrorcodeInfo.error_numbers[err.lower()] 183 | if len(errnos) == 0: 184 | pr_msg(f'Could not find error {err}', level='ERROR') 185 | return None 186 | 187 | return errnos[0] 188 | 189 | @staticmethod 190 | def is_error_code(v: int, errcode: int) -> bool: 191 | """ 192 | Check if a value matches an error code. 193 | 194 | Args: 195 | v (int): The value to check. 196 | errcode 197 | (int): The error code to compare. 198 | """ 199 | if v < 0: 200 | v += 1 << 64 201 | 202 | mask32 = (1 << 32) - 1 203 | v_low = v & mask32 204 | v_high = (v >> 32) & mask32 205 | return v_low == ((1 << 32) - errcode) and (v_high == mask32 or v_high == 0) -------------------------------------------------------------------------------- /recorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import logging 4 | from typing import Optional, List, Dict, Any, Set, Iterable, Tuple, Union 5 | import os 6 | import pathlib 7 | import pickle 8 | import gzip 9 | import io 10 | import lz4.frame 11 | 12 | from collections import defaultdict 13 | import ptrace 14 | import ptrace.debugger.child 15 | import ptrace.debugger.process 16 | import ptrace.syscall.ptrace_syscall 17 | import ptrace.tools 18 | 19 | from arch import arch 20 | from angrmgr import Angr 21 | from cle.backends import Symbol 22 | from ftrace import Ftrace 23 | from kallsyms import Kallsyms 24 | from kcore import Kcore 25 | from prmsg import pr_msg 26 | from ptrace.syscall.ptrace_syscall import PtraceSyscall, SYSCALL_NAMES 27 | 28 | class Recorder: 29 | def __init__( 30 | self, 31 | perf: str, 32 | output: str, 33 | kcore: 'Kcore', 34 | objs: List[io.BufferedReader], 35 | snapshot_size: int, 36 | syscall_filter: Optional[int], 37 | errcode_filter: Optional[int], 38 | occurrences_filter: Optional[Set[int]], 39 | debug: bool, 40 | save_kcore: bool, 41 | early_stop: bool, 42 | ): 43 | self.output = output 44 | self.failures: List[Dict] = [] 45 | self.snapshot_size = max(snapshot_size, 128 * 1024) 46 | self.dbg = ptrace.debugger.debugger.PtraceDebugger() 47 | self.perf = perf 48 | self.syscall_filter = syscall_filter 49 | self.errcode_filter = errcode_filter 50 | self.occurrences_filter = occurrences_filter 51 | self.occurrences = 0 52 | self.debug = debug 53 | self.traces: List[Union[List[Dict[str, Union[int, str, float]]], str]] = [] 54 | self.save_kcore = save_kcore 55 | self.early_stop = early_stop 56 | 57 | pr_msg('init kallsyms...', level='OP') 58 | self.rename_old_res_file(self.output) 59 | 60 | self.angr_mgr: Optional[Angr] = None 61 | self.kallsyms: Optional[Kallsyms] = None 62 | 63 | if kcore is not None: 64 | self.kallsyms = Kallsyms(objs = objs) 65 | self.angr_mgr = Angr(kallsyms = self.kallsyms, 66 | kcore = kcore, 67 | saved_segs = None) 68 | 69 | # Need to massage some syscall names to match those in ftrace 70 | self.syscall_special_event : Dict[str, str] = { 71 | 'sendfile': 'sendfile64', 72 | } 73 | 74 | def detach_all_processes(self): 75 | if self.dbg is None: 76 | return 77 | for p in self.dbg.list: 78 | p.detach() 79 | 80 | def save_failures(self, type_str:str): 81 | if len(self.failures) == 0: 82 | return 83 | 84 | pr_msg(f'saving {len(self.failures)} failures...', level='INFO') 85 | 86 | data:Dict[str, Any] = { 87 | 'type': type_str, 88 | 'failures': self.failures, 89 | 'traces': self.traces, 90 | } 91 | 92 | if self.save_kcore: 93 | assert isinstance(self.angr_mgr, Angr) 94 | data.update({ 95 | 'kcore': self.angr_mgr.save(), 96 | 'kallsyms': self.kallsyms, 97 | }) 98 | 99 | try: 100 | with lz4.frame.open(self.output, 'wb') as f: 101 | pickle.dump(data, f) 102 | except IOError: 103 | pr_msg("error writing to result file", level="ERROR") 104 | 105 | def set_sysexit_filter(self, ftrace_instance:Ftrace, snapshot:bool): 106 | e_class, e_subclass, filter = self.get_filter_string(exit=True) 107 | syscall_event = ftrace_instance.get_event(f'{e_class}/{e_subclass}') 108 | syscall_event.filter = filter 109 | if snapshot: 110 | syscall_event.trigger = f'snapshot if {filter}' 111 | return syscall_event 112 | 113 | def restart_syscall(self, process:ptrace.debugger.process.PtraceProcess, syscall:PtraceSyscall): 114 | rip = process.getInstrPointer() 115 | process.setInstrPointer(rip - arch.syscall_insn_len) 116 | process.setreg(arch.ret_reg_name, syscall.syscall) 117 | 118 | def print_syscall_info(self, syscall:PtraceSyscall): 119 | msg = f'syscall "{syscall.name}" ({syscall.syscall}) failed with error [{syscall.result_text}]' 120 | 121 | pr_msg(msg, level="INFO", new_line_before=True) 122 | syscall_args = [hex(arg.value) for arg in syscall.arguments] 123 | msg = 'failing syscall args: {0}'.format(', '.join(syscall_args)) 124 | pr_msg(msg, level="INFO", new_line_after=True) 125 | 126 | def set_func_tracing(self, syms: Iterable[Symbol]) -> bool: 127 | ftrace = Ftrace.main_instance() 128 | 129 | # We cannot set function filters on cold symbols, and anyhow it is 130 | # meaningless, so ignore it silently. 131 | filter_sym_names = {sym.name for sym in syms if not sym.name.endswith('.cold')} 132 | success = True 133 | pr_msg(f'setting function filters ({len(filter_sym_names)} functions)...', 134 | level="OP") 135 | try: 136 | s = list(filter_sym_names) 137 | ftrace.func_filter = s 138 | ftrace.current_tracer = 'function' 139 | except OSError as e: 140 | success = False 141 | pr_msg(f'cannot set function filter: {e}', level="ERROR", new_line_before=True) 142 | except Exception as e: 143 | success = False 144 | pr_msg(f'cannot set function filter: {e}', level="ERROR", new_line_before=True) 145 | 146 | return success 147 | 148 | def rename_old_res_file(self, output:str): 149 | res_file_path = pathlib.Path(output) 150 | if res_file_path.exists(): 151 | try: 152 | res_file_path.rename(str(res_file_path)+".old") 153 | except Exception as e: 154 | pr_msg(f'error renaming result file {str(res_file_path)}', 155 | level="FATAL") 156 | raise e 157 | 158 | 159 | def init_process(self, args:'list[str]'): 160 | args[0] = ptrace.tools.locateProgram(args[0]) 161 | if not os.path.isfile(args[0]): 162 | raise FileNotFoundError(f"Error: file {args[0]} does not exist") 163 | if not os.access(args[0], os.X_OK): 164 | raise PermissionError(f'Error: file {args[0]} not executable') 165 | 166 | pid = ptrace.debugger.child.createChild(args, False, env=os.environ.copy()) 167 | self.dbg.traceExec() 168 | self.dbg.traceClone() 169 | self.dbg.traceFork() 170 | self.dbg.addProcess(pid, is_attached=True) 171 | self.monitored_pid = pid 172 | 173 | def get_filter_string(self, exit:bool) -> Tuple[str, str, Optional[str]]: 174 | if exit: 175 | filter = 'ret<0' if self.errcode_filter is None else f'ret=={-self.errcode_filter}' 176 | else: 177 | filter = '' 178 | 179 | enter_or_exit = 'enter' if not exit else 'exit' 180 | 181 | e_class, e_subclass = 'raw_syscalls', f'sys_{enter_or_exit}' 182 | if self.syscall_filter is not None: 183 | syscall_name = SYSCALL_NAMES.get(self.syscall_filter, None) 184 | if syscall_name is not None: 185 | if syscall_name in self.syscall_special_event: 186 | syscall_name = self.syscall_special_event[syscall_name] 187 | 188 | e_class, e_subclass = 'syscalls', f'sys_{enter_or_exit}_{syscall_name}' 189 | else: 190 | filter += f'&&id=={self.syscall_filter}' 191 | 192 | return e_class, e_subclass, filter if filter != '' else None -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Syscall failure analyzer 2 | 3 | ## Overview 4 | 5 | Syscall failure analyzer is a tool for root-cause analysis of syscall failures. 6 | The tool generates a callstack of the condition that triggered the syscall 7 | failure. 8 | 9 | The analysis is performed by tracking of branches that were taken during the 10 | invocation of the syscall and replaying the syscall. 11 | 12 | ## Try it out 13 | 14 | ### Prerequisites 15 | 16 | The installation steps and prerequisites provided in this document have been 17 | primarily tested on Ubuntu. While Ubuntu itself is based on Debian, and 18 | therefore the instructions are expected to work on Debian-based distributions, 19 | there might be subtle differences. 20 | 21 | If you are using another Linux distribution or a different package manager, the 22 | package names and installation steps may vary. In such cases, we encourage you 23 | to contribute by documenting the steps for your specific distribution in the 24 | [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md) file. 25 | 26 | Feel free to integrate this snippet into your documentation where it fits best. 27 | 28 | 1. Install `binutils` and `bcc`, which are required for 29 | tracing and analysis. 30 | ```bash 31 | sudo apt install binutils libcapstone3 bpfcc-tools python3-bpfcc 32 | 33 | # perf is only needed for recording traces using Intel PT. If you are using a 34 | # custom kernel, do not install linus-tools-`uname -r` since it would fail. 35 | sudo apt install linux-tools-common linux-tools-generic linux-tools-`uname -r` 36 | ``` 37 | 38 | 2. Install `libcapstone4` or `libcapstone3`. 39 | ```bash 40 | sudo apt install libcapstone4 || sudo apt install libcapstone3 41 | ``` 42 | 43 | 3. Install the kernel debug symbols 44 | ```bash 45 | codename=$(lsb_release -c | awk '{print $2}') 46 | sudo tee /etc/apt/sources.list.d/ddebs.list << EOF 47 | deb http://ddebs.ubuntu.com/ ${codename} main restricted universe multiverse 48 | deb http://ddebs.ubuntu.com/ ${codename}-security main restricted universe multiverse 49 | deb http://ddebs.ubuntu.com/ ${codename}-updates main restricted universe multiverse 50 | deb http://ddebs.ubuntu.com/ ${codename}-proposed main restricted universe multiverse 51 | EOF 52 | 53 | wget -O - http://ddebs.ubuntu.com/dbgsym-release-key.asc | sudo apt-key add - 54 | 55 | sudo apt update 56 | sudo apt install linux-image-`uname -r`-dbgsym 57 | ``` 58 | 59 | 4. *Recommended:* Installing the Linux source code is essential for any 60 | meaningful analysis using syscall-failure-analyzer output. If you have access to the source code 61 | through a custom kernel or other means, this step can be skipped. Please note 62 | that while the source code is not required for syscall-failure-analyzer execution, it is necessary 63 | for debugging based on syscall-failure-analyzer output. 64 | 65 | ```bash 66 | sudo apt-get install linux-source 67 | tar xvf /usr/src/linux-source-$(uname -r).tar.bz2 68 | cd linux-source-$(uname -r) 69 | ``` 70 | 71 | - Custom Kernels: 72 | If you choose to build your own custom kernel, syscall-failure-analyzer will require access to 73 | the debug information. You can edit your `.config` file and confirm that 74 | `CONFIG_DEBUG_INFO=y` is set. If it is not set, please update the settings and 75 | rebuild your kernel. 76 | 77 | ### Run 78 | 79 | #### Identifying the Failing Syscall 80 | 81 | The syscall-failure-analyzer tool requires the name or number of the failing 82 | syscall as an argument. Typically, you would identify this failing syscall 83 | during the development process or through debugging tools like strace. For 84 | instance, running strace alongside your application could show system calls 85 | that return an error. Once you identify the failing syscall, you can provide 86 | its name or number as an argument when running syscall-failure-analyzer. This 87 | enables the tool to specifically target and analyze that particular syscall for 88 | failures. 89 | 90 | #### Sudoer Requirement 91 | 92 | This tool requires sudo permissions to access specific system features like 93 | kcore and kallsyms. Therefore, you should run pip requirements as well as the 94 | tool itself with sudo permissions. 95 | 96 | #### Virtual Environment Setup 97 | 98 | To set up and run the project, it's advisable to use a Python virtual 99 | environment. 100 | 101 | 1. **Install python3-venv package** 102 | ```bash 103 | sudo apt install python3-venv 104 | ``` 105 | 106 | 2. **Navigate to the Project Directory** 107 | ```bash 108 | cd /path/to/syscall-failure-analyzer 109 | ``` 110 | 111 | 3. **Create a Virtual Environment** 112 | ```bash 113 | python3 -m venv myvenv 114 | ``` 115 | 116 | 4. **Activate the Virtual Environment** 117 | ```bash 118 | source myvenv/bin/activate 119 | ``` 120 | 121 | 5. **Install Required Packages** 122 | ```bash 123 | pip install -r requirements.txt 124 | ``` 125 | 126 | 6. **Create a Symbolic Link for BCC** 127 | 128 | ```bash 129 | ln -s /usr/lib/python3/dist-packages/bcc myvenv/lib/$(python3 -c "import sys; print('python{}.{}'.format(sys.version_info.major, sys.version_info.minor))")/site-packages/bcc 130 | ``` 131 | 132 | #### Recording Syscall Failure 133 | 134 | Before deploying or running the project, ensure the virtual environment is activated. If it's not, activate it using: 135 | 136 | ```bash 137 | source myvenv/bin/activate 138 | ``` 139 | 140 | To record syscall failures, use the following command. This example targets the 141 | first failure of `setregid` syscall when running Linux Test Project's `setregid03` 142 | test: 143 | 144 | ```bash 145 | sudo python3 ./syscall-failure-analyzer.py --kprobes --syscall=setregid -n 1 record /opt/ltp/testcases/bin/setregid03 146 | ``` 147 | 148 | > Note: Use the `--kprobes` flag for recording with kprobe points. If Intel PT is supported and you prefer to use it, omit the `--kprobes` flag. 149 | 150 | #### Reporting Syscall Failures 151 | 152 | After recording, generate a report using the following command: 153 | 154 | ```bash 155 | sudo python3 ./syscall-failure-analyzer.py --syscall=setregid report 156 | ``` 157 | 158 | #### Command-line Arguments 159 | 160 | The tool provides a variety of command-line options to customize its behavior: 161 | 162 | - **Basic Options** 163 | - `-h, --help`: Show help message and exit 164 | - `--verbose, -v`: Enable verbose analysis info 165 | - `--quiet, -q`: Enable quiet mode 166 | - `--syscall SYSCALL, -s SYSCALL`: Specify the failing syscall number to track 167 | - `--occurrences OCCURRENCES, -n OCCURRENCES`: Specify occurrences to record 168 | 169 | - **Advanced Options** 170 | - `--vmlinux OBJS [OBJS ...], -l OBJS [OBJS ...]`: Specify the location of the vmlinux file or other modules 171 | - `--path SRC_PATH, -p SRC_PATH`: Specify the path to source code 172 | - `--perf FileType('x'), -f FileType('x')`: Specify the location of perf 173 | - `--debug, -d`: Enable debug mode verbosity 174 | 175 | For a complete list of command-line options, you can run the tool with `-h` or `--help`: 176 | 177 | ```bash 178 | python3 ./syscall-failure-analyzer.py -h 179 | ``` 180 | 181 | ## Documentation 182 | 183 | As of now, the project is in active development, and comprehensive 184 | documentation is in the works. For the time being, you can find the most 185 | relevant information about how to use and contribute to the project in this 186 | [README.md](README.md) and in the [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md) files. 187 | 188 | If you have specific questions or encounter issues, feel free to open an issue 189 | on GitHub, and we'll do our best to assist you. 190 | 191 | We also welcome contributions to improve documentation. If you would like to 192 | contribute, please see the "Contributing" section for guidelines. 193 | 194 | ## Contributing 195 | 196 | The syscall-failure-analyzer project team welcomes contributions from the community. Before you start working with syscall-failure-analyzer, please 197 | read our [Developer Certificate of Origin](https://cla.vmware.com/dco). All contributions to this repository must be 198 | signed as described on that page. Your signature certifies that you wrote the patch or have the right to pass it on 199 | as an open-source patch. For more detailed information, refer to [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md). 200 | 201 | ## License 202 | 203 | This project is licensed under the BSD-2-Clause License. For more details, please see the [LICENSE.md](LICENSE) file in the root directory of this source tree. 204 | -------------------------------------------------------------------------------- /simprocedures.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import inspect 4 | from typing import Optional, Set, Type, Tuple 5 | import angr 6 | from controlstateplugin import ControlStatePlugin 7 | from arch import arch 8 | import capstone 9 | 10 | def state_ip(s:angr.SimState) -> Optional[int]: 11 | v = s.registers.load(arch.ip_reg_name) 12 | try: 13 | return s.solver.eval_one(v) 14 | except angr.SimValueError: 15 | return None 16 | 17 | def track_to_ret(proc: angr.SimProcedure): 18 | state = proc.state 19 | control = state.control 20 | assert isinstance(control, ControlStatePlugin) 21 | 22 | if control.backtracking: 23 | return 24 | 25 | ip = state_ip(state) 26 | assert(ip is not None) 27 | # TODO: Check if we need better way 28 | ret_ip = state.callstack.ret_addr 29 | assert(ret_ip is not None and ret_ip != 0) 30 | # TODO: let the arch give the address width 31 | if ret_ip < 0: 32 | ret_ip += 1 << arch.address_width 33 | 34 | br = control.current_branch 35 | while br is not None and br['to_ip'] != ret_ip: 36 | control.next_branch() 37 | br = control.current_branch 38 | 39 | if br is None: 40 | # We would not be able to return to the correct address 41 | control.diverged = True 42 | control.expected_ip = None 43 | else: 44 | br.update({ 45 | 'from_ip': None, 46 | 'from_sym': None, 47 | 'from_offset': None 48 | }) 49 | 50 | def track_out_of_syms(proc: angr.SimProcedure, sym_names:Set[str]): 51 | state = proc.state 52 | control = state.control 53 | assert isinstance(control, ControlStatePlugin) 54 | 55 | if control.backtracking: 56 | return 57 | 58 | ip = state_ip(state) 59 | assert(ip is not None) 60 | 61 | br = control.current_branch 62 | while br is not None and br['from_ip'] in sym_names: 63 | control.next_branch() 64 | br = control.current_branch 65 | 66 | if br is None: 67 | control.diverged = True 68 | control.expected_ip = None 69 | 70 | class CopyProcedure(angr.SimProcedure): 71 | #pylint:disable=arguments-differ 72 | 73 | def run(self, dst_addr, src_addr, limit): 74 | track_to_ret(self) 75 | copied = self.state.solver.BVS('copied', 64) 76 | self.state.add_constraints(copied >= 0) 77 | 78 | if False and 'unconstrained' in str(limit): 79 | old_limit = limit 80 | limit = self.state.solver.BVS('limit', arch.address_width) 81 | self.state.add_constraints(old_limit == limit) 82 | 83 | self.state.add_constraints(limit <= self.state.libc.max_memcpy_size) 84 | #self.state.add_constraints(copied <= self.state.libc.max_memcpy_size) 85 | self.state.add_constraints(copied <= limit) 86 | 87 | if not self.state.solver.is_true(copied == 0): 88 | src_mem = self.state.memory.load(src_addr, copied, endness='Iend_LE') 89 | self.state.memory.store(dst_addr, src_mem, size=copied, endness='Iend_LE') 90 | 91 | return self.ret(limit - copied) 92 | 93 | def __rept__(self) -> str: 94 | return 'CopyProcedure' 95 | 96 | class ReturnProcedure(angr.SimProcedure): 97 | def __init__(self): 98 | super(ReturnProcedure, self).__init__() 99 | 100 | def run(self): 101 | control = self.state.control 102 | assert isinstance(control, ControlStatePlugin) 103 | 104 | if control.backtracking: 105 | self.ret() 106 | 107 | track_out_of_syms(self, {'zen_untrain_ret', '__x86_return_thunk'}) 108 | if control.diverged: 109 | return None 110 | 111 | # Force the correct return address 112 | self.ret_to = control.current_branch['to_ip'] 113 | r = self.ret() 114 | self.ret_to = None 115 | control.next_branch() 116 | return r 117 | 118 | class ProcedureWrapper(angr.SimProcedure): 119 | def __init__(self, proc_class:Type[angr.SimProcedure], limits:Optional[Tuple[Optional[int], Optional[int]]]=None): 120 | super(ProcedureWrapper, self).__init__() 121 | self.proc_class = proc_class 122 | sig = inspect.signature(proc_class.run) 123 | self.n_parameters = len(sig.parameters) - 1 124 | self.limits = limits and enumerate(limits) 125 | 126 | def run(self): 127 | # Collect arguments from the state registers according to the calling convention 128 | track_to_ret(self) 129 | 130 | cc = self.state.project.factory.cc() 131 | args = cc.ARG_REGS 132 | 133 | # Fetch arguments from the registers 134 | arg_values = [self.state.registers.load(reg) for reg in args][:self.n_parameters] 135 | 136 | if self.limits: 137 | for i, (min_val, max_val) in self.limits: 138 | if min_val is None and max_val is None: 139 | continue 140 | 141 | val = arg_values[i] 142 | if max_val is not None: 143 | self.state.add_constraints(val <= max_val) 144 | if min_val is not None: 145 | self.state.add_constraints(val >= min_val) 146 | 147 | # call the procedure with the fetched arguments 148 | result = self.inline_call(self.proc_class, *arg_values).ret_expr 149 | if result.length == arch.address_width: 150 | return result 151 | 152 | return result.sign_extend(arch.address_width - result.length) 153 | 154 | class RepHook(angr.exploration_techniques.tracer.RepHook): 155 | def __init__(self, mnemonic): 156 | super().__init__(mnemonic.split(" ")[1]) 157 | 158 | def trace_to_next(self, state): 159 | c = state.control 160 | assert isinstance(c, ControlStatePlugin) 161 | if not c.backtracking: 162 | addr = state.addr 163 | br = c.current_branch 164 | while br is not None and br['from_ip'] == addr and br['to_ip'] == addr: 165 | c.next_branch() 166 | br = c.current_branch 167 | 168 | def run(self, state, procedure=None, *arguments, **kwargs): 169 | self.trace_to_next(state) 170 | 171 | if procedure is not None: 172 | result = self._inline_call(state, procedure, *arguments, **kwargs) 173 | print(f'Result of inline call: {result}') 174 | 175 | 176 | # Invoke the run() method from the parent class 177 | super().run(state) 178 | 179 | # TODO: Move to AngrSim 180 | class RetpolineProcedure(angr.SimProcedure): 181 | def __init__(self, reg: str): 182 | super(RetpolineProcedure, self).__init__() 183 | self.reg = reg 184 | 185 | def run(self): 186 | state = self.state 187 | reg = getattr(state.regs, self.reg) 188 | control = state.control 189 | 190 | if control.backtracking: 191 | return self.jump(reg) 192 | 193 | trace_from_ip = control.current_branch['from_ip'] 194 | trace_to_ip = control.current_branch['to_ip'] 195 | control.expected_ip = trace_to_ip 196 | angr_mgr = control.angr_mgr 197 | 198 | current_state_ip = state_ip(state) 199 | prev_state_ip = state.history and state.history.parent and state.history.parent.addr 200 | 201 | def in_retpoline(ip:int) -> bool: 202 | sym_name = angr_mgr.get_sym_name(ip) 203 | return (sym_name.startswith('__x86_indirect_thunk') or 204 | sym_name in {'__x86_return_thunk', 'zen_untrain_ret'}) 205 | 206 | # When using kprobes we skip the retpolines, but when using hardware tracer 207 | # we keep them. 208 | if (current_state_ip == trace_from_ip or 209 | (not in_retpoline(trace_from_ip) and prev_state_ip == trace_from_ip)): 210 | # TODO: Handle the case in which the trace ends with a retpoline 211 | while in_retpoline(trace_to_ip): 212 | control.next_branch() 213 | trace_to_ip = control.current_branch['to_ip'] 214 | trace_from_ip = control.current_branch['from_ip'] 215 | if not in_retpoline(trace_from_ip): 216 | control.diverged = True 217 | break 218 | control.expected_ip = trace_to_ip 219 | else: 220 | control.diverged = True 221 | 222 | if not control.diverged: 223 | state.add_constraints(reg == trace_to_ip) 224 | control.next_branch() 225 | return self.jump(trace_to_ip) 226 | 227 | return self.jump(reg) -------------------------------------------------------------------------------- /kprobesreporter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | from typing import Dict, Iterable, List, Optional, Set, Any 4 | 5 | from cle.backends import Symbol 6 | from prmsg import Pbar, warn_once, pr_msg 7 | from arch import arch 8 | from reporter import Reporter 9 | 10 | class KprobesReporter(Reporter): 11 | def report(self): 12 | for failure in self.failures: 13 | trace = self.traces[failure['trace_id']] 14 | sim_syms = [self.angr_mgr.get_sym(s) for s in failure['sim_syms']] 15 | branches = self.ftrace_to_branch(trace = trace, 16 | filter_pid = failure['pid'], 17 | sim_syms = sim_syms) 18 | super().report_one(branches = branches, 19 | errcode = failure['errcode'], 20 | sim_syms = sim_syms) 21 | 22 | # Converting ftrace to branches format that is common to processor trace 23 | # and ftrace. 24 | def ftrace_to_branch(self, trace:List[Dict[str, Any]], filter_pid:int, sim_syms:Set[Symbol]) -> List[Dict]: 25 | branches = [] 26 | first = True 27 | insn = None 28 | pending_rep_insn, pending_rep_iterations = None, None 29 | unemulated_call_entry = None 30 | 31 | #sim_syms = [s for s in sim_syms if s.name != '__check_object_size'] 32 | pbar = Pbar("processing ftrace", items=trace, unit="lines") 33 | for l in pbar: 34 | if filter_pid != l['pid']: 35 | # It should not happen, as we already configured ftrace to 36 | # filter the pid of the failure during the recording. 37 | warn_once(f"skipping pid {l['pid']}") 38 | continue 39 | 40 | if 'type' not in l: 41 | warn_once("ftrace snapshot includes unknown entries") 42 | continue 43 | 44 | next_ip, state = None, None 45 | 46 | ty = l['type'] 47 | 48 | # If we reached the syscall tracing at the end, stop 49 | if ty == 'sysexit': 50 | break 51 | elif ty == 'sysenter': 52 | continue 53 | elif ty == 'probe': 54 | state = arch.ftrace_state_dict(l) 55 | next_ip = l['addr'] 56 | elif ty == 'ret': 57 | next_ip = None 58 | elif ty == 'func': 59 | try: 60 | next_ip = self.angr_mgr.get_prev_insn(l['from_ip']).address 61 | except ValueError: 62 | next_ip = None 63 | else: 64 | raise ValueError(f"unknown ftrace type entry: {ty}") 65 | 66 | if first: 67 | insn = self.angr_mgr.get_prev_insn(l['from_ip']) 68 | 69 | first = False 70 | 71 | # Adding fake branches for rep instructions to reflect the number 72 | # of iterations that were executed. 73 | if pending_rep_insn is not None: 74 | for _ in range(0, pending_rep_iterations - 75 | arch.rep_iterations(pending_rep_insn, state)): 76 | branches.append( 77 | {'from_ip': pending_rep_insn.address, 78 | 'to_ip': pending_rep_insn.address} 79 | ) 80 | pending_rep_insn = None 81 | 82 | while insn and insn.address != next_ip: 83 | unemulated_call_entry = None 84 | 85 | pr_msg(str(insn), level="DEBUG") 86 | if not arch.is_branch_insn(insn): 87 | insn = self.angr_mgr.next_insn(insn) 88 | continue 89 | 90 | # if ((not arch.is_direct_branch_insn(insn)) or 91 | # arch.is_cond_branch_insn(insn)): 92 | # break 93 | 94 | try: 95 | target_insn = self.angr_mgr.get_branch_target_insn(insn) 96 | target_sym = target_insn and self.angr_mgr.get_sym(target_insn) 97 | except: 98 | target_insn = None 99 | target_sym = None 100 | 101 | if arch.is_direct_jmp_insn(insn): 102 | assert(target_insn is not None) 103 | branches.append({'from_ip': insn.address, 'to_ip': target_insn.address}) 104 | insn = target_insn 105 | elif ((arch.is_direct_call_insn(insn) and target_sym not in sim_syms) or 106 | (arch.is_indirect_call_insn(insn) and self.angr_mgr.next_insn_addr(insn) == next_ip)): 107 | branches.append({'from_ip': insn.address, 'to_ip': None}) 108 | insn = self.angr_mgr.next_insn(insn) 109 | unemulated_call_entry = {'from_ip': None, 'to_ip': insn.address} 110 | branches.append(unemulated_call_entry) 111 | else: 112 | break 113 | 114 | match_ip = insn and insn.address == next_ip 115 | 116 | if ty == 'func' and not match_ip: 117 | assert(0 == 1) 118 | continue 119 | 120 | target_insn = None 121 | 122 | if ty == 'probe' and match_ip and unemulated_call_entry is not None: 123 | unemulated_call_entry['ret'] = l['ax'] 124 | unemulated_call_entry = None 125 | 126 | if arch.is_indirect_jmp_insn(insn): 127 | raise NotImplementedError("indirect jump") 128 | elif arch.is_call_insn(insn): 129 | if ty != 'func': 130 | #target_insn = self.angr_mgr.get_insn(l['addr']) 131 | # We are just going to skip endbr-like probes 132 | #if arch.is_indirect_branch_target(target_insn): 133 | # continue 134 | pass 135 | elif not match_ip or ty != 'func': 136 | # TODO: Cleaner error 137 | assert(0 == 1) 138 | else: 139 | # ty == 'func' 140 | to_ip = self.angr_mgr.get_sym_addr(l['to_ip']) 141 | target_insn = self.angr_mgr.get_insn(to_ip) 142 | elif arch.is_ret_insn(insn): 143 | assert ty == 'ret' 144 | from_sym = self.angr_mgr.get_sym(insn) 145 | assert (from_sym is not None and from_sym.name == l['from_func']) 146 | 147 | target_insn = self.angr_mgr.get_insn(l['to_ip']) 148 | elif arch.is_cond_jmp_insn(insn): 149 | assert ty == 'probe' 150 | assert match_ip 151 | assert state is not None 152 | if arch.is_cond_jmp_taken(insn, state): 153 | target_insn = self.angr_mgr.get_branch_target_insn(insn) 154 | assert(target_insn is not None) 155 | pr_msg(f"taken branch: {insn} -> {target_insn}", level="DEBUG") 156 | else: 157 | insn = self.angr_mgr.next_insn(insn) 158 | pr_msg(f"not taken branch: {insn}", level="DEBUG") 159 | elif arch.is_loop_insn(insn): 160 | assert state is not None 161 | 162 | if arch.is_loop_taken(insn, state): 163 | target_insn = self.angr_mgr.get_branch_target_insn(insn) 164 | else: 165 | insn = self.angr_mgr.next_insn(insn) 166 | elif arch.is_rep_insn(insn): 167 | assert ty == 'probe' 168 | assert state is not None 169 | pending_rep_iterations = arch.rep_iterations(insn, state) 170 | if pending_rep_iterations > 0: 171 | pending_rep_insn = insn 172 | insn = self.angr_mgr.next_insn(insn) 173 | elif arch.is_predicated_mov(insn): 174 | # Create psuedo entry to know that the cmov was taken 175 | assert state is not None 176 | 177 | if not arch.is_cond_jmp_taken(insn, state): 178 | target_insn = self.angr_mgr.next_insn(insn) 179 | 180 | if target_insn is not None: 181 | assert insn is not None 182 | branch = {'from_ip':insn.address, 'to_ip':target_insn.address} 183 | if 'callstack' in l: 184 | branch['callstack'] = l['callstack'] 185 | if ty == 'ret': 186 | branch['ret'] = l['ret'] 187 | branches.append(branch) 188 | insn = target_insn 189 | 190 | return branches 191 | 192 | @property 193 | def detailed_trace(self) -> bool: 194 | return True 195 | -------------------------------------------------------------------------------- /syscall-failure-analyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # Copyright 2023 VMware, Inc. 3 | # SPDX-License-Identifier: BSD-2-Clause 4 | 5 | import argparse 6 | import glob 7 | import logging 8 | import os 9 | import pickle 10 | import sys 11 | import io 12 | import lz4.frame 13 | from typing import Optional, Set, List, BinaryIO 14 | 15 | from angrmgr import Angr 16 | from addr2line import Addr2Line 17 | from claripy.backends.backend_smtlib_solvers import * 18 | from intelptrecorder import IntelPTRecorder 19 | from intelptreporter import IntelPTReporter 20 | from kallsyms import Kallsyms, get_vmlinux 21 | from kprobesrecorder import KProbesRecorder 22 | from kprobesreporter import KprobesReporter 23 | from reporter import Reporter 24 | from prmsg import pr_msg, quiet, warn_once, change_output 25 | from ptrace.debugger.child import createChild 26 | from ptrace.tools import locateProgram 27 | from syscall import ErrorcodeInfo, SyscallInfo 28 | from kcore import Kcore 29 | from ftrace import Ftrace 30 | 31 | DEFAULT_DATA_FILENAME = 'deeperr.data' 32 | 33 | 34 | def get_occurrences(s:str) -> Optional[Set[int]]: 35 | if s is None: 36 | return None 37 | if s.isnumeric(): 38 | return {int(s)} 39 | try: 40 | r = {int(v.strip()) for v in s.split(',')} 41 | except: 42 | pr_msg('Could not parse occurances list, skipping input', level='ERROR') 43 | r = None 44 | 45 | return r 46 | 47 | def report(inputs: str, 48 | src_path: Optional[str], 49 | output: Optional[str], 50 | print_stats: bool, 51 | objs: List[io.BufferedReader], 52 | syscall_filter: Optional[int], 53 | errcode_filter: Optional[int], 54 | occurances_filter: Optional[Set[int]], 55 | **kwargs): 56 | if output is not None: 57 | try: 58 | change_output(output) 59 | except Exception as e: 60 | pr_msg(f'{e}', level='FATAL') 61 | return 62 | 63 | res_files = glob.glob(inputs) 64 | if len(res_files) == 0: 65 | pr_msg('found no result files', level="ERROR") 66 | return 67 | 68 | for f_name in res_files: 69 | try: 70 | with lz4.frame.open(f_name, 'rb') as failure_file: 71 | # Load the data from the file 72 | data = pickle.load(failure_file) 73 | except FileNotFoundError: 74 | pr_msg(f'error reading result file {f_name}: file not found', level='ERROR') 75 | continue 76 | except EOFError: 77 | pr_msg(f'error reading result file {f_name}: file is empty', level='ERROR') 78 | continue 79 | except lz4.frame.LZ4FrameError: 80 | pr_msg(f'error reading result file {f_name}: file is corrupted', level='ERROR') 81 | continue 82 | 83 | kallsyms = data.get('kallsyms', Kallsyms(objs)) 84 | saved_segs = data.get('kcore') 85 | kcore = Kcore() if saved_segs is None else None 86 | 87 | if saved_segs is None: 88 | pr_msg(f'kcore was not saved, reading from /proc/kcore', level='INFO') 89 | 90 | # We need to init ftrace before angr to clear all probe points that 91 | # might have been left. Otherwise, disassembly will fail. 92 | ftrace = Ftrace() 93 | ftrace.kprobe_event_disable_all() 94 | 95 | angr_mgr = Angr(kallsyms, 96 | kcore = kcore, 97 | saved_segs = saved_segs) 98 | 99 | reporter_cls = IntelPTReporter if data['type'] == 'intel-pt' else KprobesReporter 100 | report_kwargs = { 101 | 'objs': objs, 102 | 'errcode_filter': errcode_filter, 103 | 'syscall_filter': syscall_filter, 104 | 'print_stats': print_stats, 105 | # Filtering based on occurances is done during reporting only for Intel PT, 106 | # since we cannot reliably filter it out during recording 107 | 'occurances_filter': occurances_filter, 108 | 'angr_mgr': angr_mgr, 109 | 'traces': data['traces'], 110 | 'failures': data['failures'], 111 | 'src_path': src_path, 112 | } 113 | 114 | reporter:Reporter = reporter_cls(**report_kwargs) 115 | reporter.report() 116 | 117 | 118 | def valid_path(path): 119 | if os.path.exists(path): 120 | return path 121 | else: 122 | raise argparse.ArgumentTypeError(f"Path '{path}' does not exist.") 123 | 124 | def main(): 125 | global quiet, debug 126 | 127 | def arg_error(parser: argparse.ArgumentParser): 128 | # add suffix to the usage string 129 | parser.print_help() 130 | exit() 131 | 132 | parser = argparse.ArgumentParser("deeperr", epilog="application") 133 | parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='prints verbose analysis info') 134 | parser.add_argument('--vmlinux', '-l', action='store', dest='objs', help='location of vmlinux file or other modules', type=argparse.FileType('rb'), nargs='+', default=[]) 135 | parser.add_argument('--perf', '-f', default='perf', metavar=argparse.FileType('x'), help='location of perf') 136 | parser.add_argument('--debug', '-d', action='store_true', dest='debug', help='debug mode verbosity') 137 | parser.add_argument('--llvm-symbolizer', '-y', action='store', dest='llvm_symbolizer', default='llvm-symbolizer', help='path to llvm-symbolizer') 138 | parser.add_argument('--snapshot-size', '-z', action='store', dest='snapshot_size', type=int, default=262144, help='perf snapshot size') 139 | parser.add_argument('--tmp', '-t', action='store', dest='tmp_path', default='/tmp', type=valid_path, help='tmp path') 140 | parser.add_argument('--syscall', '-s', action='store', dest='syscall', help='failing syscall number to track') 141 | parser.add_argument('--quiet', '-q', action='store_true', dest='quiet', help='quiet mode') 142 | parser.add_argument('--errcode', '-r', action='store', dest='errcode', help='error number') 143 | parser.add_argument('--output', '-o', action='store', dest='output', help='output file', default=None, metavar='PATH') 144 | parser.add_argument('--input', '-i', action='store', dest='input', help='input file', default=DEFAULT_DATA_FILENAME, metavar='FILES') 145 | parser.add_argument('--kprobes', '-k', action='store_true', dest='kprobes', help='use kprobes') 146 | parser.add_argument('--occurrences', '-n', action='store', dest='occurrences', help='occurrences to record') 147 | parser.add_argument('--extra-info', '-x', action='store_true', dest='print_stats', help='detailed output with analysis statistics') 148 | parser.add_argument('--path', '-p', action='store', dest='src_path', default=None, type=valid_path, help='path to source code') 149 | parser.add_argument('--nokcore', '-w', action='store_true', dest='nokcore', help='do not save kcore') 150 | parser.add_argument('--early-stop', '-e', action='store_true', dest='early_stop', help='stop execution after first failure') 151 | parser.add_argument('command', choices=['record', 'report'], help='command to run: record or report') 152 | 153 | parser.usage = parser.format_usage()[7:].rstrip('\n ') + ' -- [args]\n' 154 | 155 | try: 156 | args, remaining_argv = parser.parse_known_args() 157 | except: 158 | # Exit with error 159 | exit(1) 160 | 161 | if os.geteuid() != 0: 162 | pr_msg(f'{sys.executable} must be run as root', level='FATAL') 163 | exit(1) 164 | 165 | if remaining_argv and remaining_argv[0] == '--': 166 | remaining_argv = remaining_argv[1:] 167 | 168 | sys.setrecursionlimit(10 ** 5) 169 | 170 | loglevel = 'ERROR' 171 | if args.debug: 172 | loglevel = 'DEBUG' 173 | elif args.verbose: 174 | loglevel = 'INFO' 175 | 176 | quiet = args.quiet 177 | debug = args.debug 178 | 179 | logging.basicConfig(filename='deeperr.log', level=loglevel, force=True) 180 | logging.getLogger().setLevel(loglevel) 181 | for l in ['angr', 'cle', 'pyvex', 'claripy']: 182 | logging.getLogger(l).setLevel('ERROR') 183 | 184 | objs = get_vmlinux(args.objs) 185 | 186 | syscall_filter = None 187 | if args.syscall is not None: 188 | try: 189 | syscall_filter = SyscallInfo.get_syscall_nr(args.syscall) 190 | except ValueError as e: 191 | pr_msg(e, level="ERROR") 192 | pr_msg('recording all syscall', level="WARN") 193 | 194 | syscall_filter = SyscallInfo.get_syscall_nr(args.syscall) 195 | errcode_filter = ErrorcodeInfo.get_errno(args.errcode) 196 | occurrences_filter = get_occurrences(args.occurrences) 197 | 198 | a2l = Addr2Line.get_instance() 199 | a2l.llvm_symbolizer = args.llvm_symbolizer 200 | 201 | if args.command == 'record' and len(remaining_argv) < 1: 202 | arg_error(parser) 203 | 204 | if args.command == 'record': 205 | kprobes = args.kprobes 206 | kcore = Kcore() 207 | 208 | if not kprobes and not IntelPTRecorder.cpu_supports_pt(): 209 | pr_msg("CPU does not support Intel PT", level="ERROR") 210 | 211 | recorder_cls = KProbesRecorder if kprobes else IntelPTRecorder 212 | a = recorder_cls( 213 | perf=args.perf, 214 | objs=objs, 215 | snapshot_size=args.snapshot_size, 216 | errcode_filter=errcode_filter, 217 | syscall_filter=syscall_filter, 218 | occurrences_filter=occurrences_filter, 219 | output=args.output or 'deeperr.data', 220 | tmp_path=args.tmp_path, 221 | debug=args.debug, 222 | save_kcore=not args.nokcore, 223 | early_stop=args.early_stop, 224 | ) 225 | try: 226 | a.record(args=remaining_argv) 227 | except OSError as e: 228 | pr_msg(f'error recording: {e}', level='FATAL') 229 | else: 230 | report(inputs=args.input, 231 | output=args.output, 232 | print_stats=args.print_stats, 233 | objs=objs, 234 | errcode_filter=errcode_filter, 235 | syscall_filter=syscall_filter, 236 | occurances_filter=occurrences_filter, 237 | src_path=args.src_path) 238 | 239 | if __name__ == "__main__": 240 | main() -------------------------------------------------------------------------------- /intelptrecorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import pathlib 4 | import errno 5 | import subprocess 6 | import signal 7 | import os 8 | import re 9 | import time 10 | import ctypes 11 | import shutil 12 | import psutil 13 | from typing import Optional, Set, List 14 | from recorder import Recorder 15 | from prmsg import pr_msg 16 | from syscall import ret_to_err, SyscallInfo, ErrorcodeInfo 17 | from ptrace.syscall.ptrace_syscall import SYSCALL_NAMES 18 | from bcc import BPF, DEBUG_SOURCE 19 | from kcore import Kcore 20 | 21 | class IntelPTRecorder(Recorder): 22 | def __init__( 23 | self, 24 | tmp_path: str, 25 | **kwargs 26 | ): 27 | occurrences_filter = kwargs.get('occurrences_filter') 28 | if occurrences_filter is None or len(occurrences_filter) > 1: 29 | pr_msg('Using Intel PT only one failure can be recorded', level="WARN") 30 | kwargs['occurrences_filter'] = {1} 31 | 32 | kwargs['kcore'] = Kcore() if kwargs.get('save_kcore') else None 33 | 34 | super().__init__(**kwargs) 35 | self.record_proc:Optional[subprocess.Popen[bytes]] = None 36 | self.record_proc_terminated = False 37 | self.tmp_path = pathlib.Path(tmp_path) 38 | self.sorted_occurrence_filter = sorted(self.occurrences_filter) if self.occurrences_filter else None 39 | 40 | error_pattern = r'^ERROR: (?P.*)$' 41 | self.error_regex = re.compile(error_pattern) 42 | 43 | dump_pattern = r'\[ perf record: Dump (.*?) \]' 44 | self.dump_regex = re.compile(dump_pattern) 45 | 46 | 47 | def init_tmp_path(self): 48 | if not self.tmp_path.exists() or not self.tmp_path.is_dir(): 49 | pr_msg(f'error: tmp path [{self.tmp_path}] is not a valid tmp directory', level="FATAL") 50 | return False 51 | 52 | self.my_tmp_path = self.tmp_path.joinpath(pathlib.Path("errexp")) 53 | if not self.my_tmp_path.exists(): 54 | try: 55 | self.my_tmp_path.mkdir() 56 | except: 57 | pr_msg(f"error creating tmp path [{self.my_tmp_path}]", level="FATAL") 58 | return False 59 | 60 | return True 61 | 62 | def handle_event(self, cpu, data, size): 63 | event = self.bpf['syscall_events'].event(data) 64 | pid = event.pid 65 | syscall = event.syscall_nr 66 | err = ret_to_err(event.syscall_ret) 67 | 68 | e = {'err': err, 'syscall_nr': syscall, 'pid': pid, 'ts': event.ts/1e9} 69 | 70 | try: 71 | self.record_proc.send_signal(signal.SIGUSR2) 72 | except ProcessLookupError: 73 | pr_msg("perf process already terminated", level='WARN') 74 | self.dump_filenames = [] 75 | return 76 | 77 | # Snapshots do not work well with Intel PT, and since the parent might already have 78 | # many children, it is problematic to attach perf only to these processes again. 79 | # Wake the thread that reported the error, since the eBPF paused it to allow 80 | # tracing to be more successful, but let's give one second before we do so. 81 | if not self.early_stop: 82 | try: 83 | os.kill(pid, signal.SIGCONT) 84 | except ProcessLookupError: 85 | pass 86 | 87 | # For the same reason we only track one failure 88 | if len(self.failures) == 0: 89 | self.failures.append(e) 90 | 91 | def run_perf_record(self, pid: int): 92 | e_entry_class, e_entry_subclass, entry_filter = self.get_filter_string(exit=False) 93 | e_exit_class, e_exit_subclass, exit_filter = self.get_filter_string(exit=True) 94 | 95 | # We need to save kcore since without it, retpolines are not resolved 96 | record_args_raw = [self.perf, "record", 97 | '-e', 'intel_pt/noretcomp=1/k', 98 | # '-e', f'{e_entry_class}:{e_entry_subclass}', 99 | # entry_filter and f'--filter={entry_filter}', 100 | # '-e', f'{e_exit_class}:{e_exit_subclass}', 101 | # exit_filter and f'--filter={exit_filter}', 102 | '--kcore', 103 | '--timestamp', 104 | '-p', f'{pid}', 105 | '--switch-output', 106 | f'--snapshot=e{self.snapshot_size}', 107 | f'-m,{(self.snapshot_size >> 12)}'] 108 | 109 | record_args_raw.append(f'-o{self.my_tmp_path.joinpath("perf.data")}') 110 | 111 | record_args = [arg for arg in record_args_raw if arg is not None] 112 | 113 | pr_msg(f"running: {' '.join(record_args)}", level="INFO") 114 | 115 | record_proc = subprocess.Popen(record_args, stdout=subprocess.PIPE, 116 | stderr=subprocess.STDOUT) 117 | 118 | if record_proc is None: 119 | raise SystemError("error starting perf record") 120 | 121 | # Save the process for handle_event to see it 122 | 123 | time.sleep(2) 124 | 125 | # Check that perf is running 126 | if record_proc.poll() is not None: 127 | record_proc.wait() 128 | assert record_proc.stdout is not None 129 | perf_output = record_proc.stdout.read() 130 | perf_output_str = perf_output.decode('utf-8') 131 | pr_msg(f"perf failed: {perf_output_str}", level="FATAL") 132 | pr_msg(f"hint: check that perf that is compatiable with the current kernel was provided", level="WARN") 133 | raise SystemError("error running perf record") 134 | 135 | self.record_proc = record_proc 136 | 137 | def run_perf_script(self, file: str) -> str: 138 | args = [self.perf, "script", "--itrace=b", "-i", file] 139 | pr_msg(f"running: {' '.join(args)}", level='INFO') 140 | 141 | try: 142 | output = subprocess.check_output( 143 | args, 144 | stderr=subprocess.STDOUT, 145 | timeout=60, 146 | universal_newlines=True 147 | ) 148 | except (subprocess.CalledProcessError, PermissionError, subprocess.TimeoutExpired) as exc: 149 | raise SystemError(f"error starting perf itrace: {exc}") 150 | 151 | return output 152 | 153 | def cleanup(self): 154 | if self.my_tmp_path and self.my_tmp_path.exists(): 155 | shutil.rmtree(self.my_tmp_path) 156 | self.my_tmp_path = None 157 | 158 | def prepare_bpf(self): 159 | # There is a bug in bcc that causes a warning to be printed to stderr 160 | syscall_name = SYSCALL_NAMES.get(self.syscall_filter, None) 161 | 162 | b = BPF(src_file="syscall_failure_ebpf.c", 163 | cflags=["-w", "-Wno-error", "-Wno-warning"], 164 | debug=DEBUG_SOURCE if self.debug else 0) 165 | 166 | tp = (f"syscalls:sys_exit_{syscall_name}" if False and syscall_name is not None else 167 | "raw_syscalls:sys_exit") 168 | 169 | def create_ulonglong(value): 170 | return ctypes.c_ulonglong(value) if value is not None else ctypes.c_ulonglong(0xffffffffffffffff) 171 | 172 | # Config keys 173 | SYSCALL_FILTER_KEY = 1 174 | ERRCODE_FILTER_KEY = 2 175 | MONITORED_PID_KEY = 3 176 | SORTED_OCCURRENCE_FILTER_KEY = 4 177 | FLAGS_KEY = 5 178 | 179 | # Create a dictionary to store config 180 | config_map = { 181 | SYSCALL_FILTER_KEY: self.syscall_filter, 182 | ERRCODE_FILTER_KEY: -self.errcode_filter if self.errcode_filter is not None else None, 183 | MONITORED_PID_KEY: self.monitored_pid, 184 | SORTED_OCCURRENCE_FILTER_KEY: self.sorted_occurrence_filter[0] if self.sorted_occurrence_filter else None, 185 | FLAGS_KEY: 1 if self.early_stop else 0 186 | } 187 | 188 | for key, value in config_map.items(): 189 | b["config_map"][ctypes.c_ulonglong(key)] = create_ulonglong(value) 190 | 191 | b.attach_tracepoint(tp=tp, fn_name="trace_syscalls") 192 | b["syscall_events"].open_perf_buffer(self.handle_event) 193 | self.bpf = b 194 | 195 | def close_perf(self): 196 | if self.record_proc is None: 197 | return 198 | 199 | if self.record_proc.poll(): 200 | try: 201 | self.record_proc.send_signal(signal.SIGINT) 202 | except ProcessLookupError: 203 | pr_msg("perf process already terminated", level='WARN') 204 | self.dump_filenames = [] 205 | return 206 | 207 | err = self.record_proc.wait() 208 | assert self.record_proc.stdout is not None 209 | perf_output = self.record_proc.stdout.read() 210 | perf_output_str = perf_output.decode('utf-8') 211 | pr_msg(f'record proc output: {perf_output_str}', level='DEBUG') 212 | if err not in {0, None, -errno.ENOENT}: 213 | pr_msg(f'error closing perf: {err}', level="WARN") 214 | 215 | matches = self.error_regex.findall(perf_output_str, re.MULTILINE) 216 | if matches: 217 | pr_msg(matches[0], level='ERROR') 218 | raise Exception(matches[0]) 219 | 220 | matches = self.dump_regex.findall(perf_output_str) 221 | if not matches: 222 | pr_msg(f'perf output: {perf_output_str}', level='ERROR') 223 | raise Exception('failed to find perf dump file') 224 | 225 | # Snapshots are broken with Intel-PT. Take only the first one. 226 | self.dump_filenames = [match for match in matches][:1] 227 | self.record_proc = None 228 | 229 | def record(self, args:'list[str]') -> int: 230 | if not self.init_tmp_path(): 231 | return 0 232 | 233 | collected = 0 234 | 235 | try: 236 | self.init_process(args) 237 | except (FileNotFoundError, PermissionError) as e: 238 | pr_msg(f"error starting process: {e}", level="FATAL") 239 | return 0 240 | 241 | self.prepare_bpf() 242 | self.run_perf_record(self.monitored_pid) 243 | 244 | assert self.record_proc is not None 245 | 246 | self.detach_all_processes() 247 | 248 | try: 249 | while not self.early_stop or len(self.failures) == 0: 250 | self.bpf.perf_buffer_poll(1) 251 | time.sleep(0.001) 252 | try: 253 | terminated_pid, _ = os.waitpid(self.monitored_pid, os.WNOHANG) 254 | if terminated_pid == self.monitored_pid: 255 | pr_msg(f'Child process {terminated_pid} terminated', level='INFO') 256 | break 257 | except ChildProcessError: 258 | pass 259 | except KeyboardInterrupt: 260 | pr_msg("Interrupted - stop recording", level='INFO') 261 | 262 | if psutil.pid_exists(self.monitored_pid): 263 | try: 264 | os.kill(self.monitored_pid, signal.SIGINT) 265 | except ProcessLookupError: 266 | pr_msg("monitored process already terminated", level='WARN') 267 | 268 | self.close_perf() 269 | 270 | for filename in self.dump_filenames: 271 | trace = self.run_perf_script(filename) 272 | self.traces.append(trace) 273 | 274 | for f in self.failures: 275 | syscall = SyscallInfo.get_name(f['syscall_nr']) 276 | err_msg = ErrorcodeInfo.get_name(f['err']) 277 | pid = f['pid'] 278 | pr_msg(f'[{pid}] syscall {syscall} failed with {err_msg} [{f["err"]}]', level='INFO') 279 | 280 | self.save_failures("intel-pt") 281 | self.cleanup() 282 | 283 | return collected 284 | 285 | @staticmethod 286 | def cpu_supports_pt() -> bool: 287 | # Simple test, perf will deal with more complicated situations later 288 | try: 289 | with open('/proc/cpuinfo') as f: 290 | for l in f: 291 | if l.startswith('flags'): 292 | return 'intel_pt' in l.split(' ') 293 | except Exception: 294 | pass 295 | 296 | return False -------------------------------------------------------------------------------- /kallsyms.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, Callable 4 | import logging 5 | import pathlib 6 | import subprocess 7 | import io 8 | import abc 9 | import struct 10 | import os 11 | from enum import Enum 12 | from prmsg import pr_msg 13 | from collections import defaultdict 14 | from typing import BinaryIO 15 | 16 | from elftools.elf.elffile import ELFFile 17 | from elftools.elf.sections import NoteSection 18 | 19 | import cle.backends 20 | import angr 21 | from arch import arch 22 | 23 | NT_GNU_BUILD_ID = 3 24 | 25 | def get_vmlinux(user_option:Optional[List[BinaryIO]]) -> List[BinaryIO]: 26 | if user_option is None: 27 | user_option = [] 28 | 29 | # Check if any of the filenames includes 'vmlinux' 30 | if any('vmlinux' in pathlib.Path(f.name).stem for f in user_option): 31 | return user_option 32 | 33 | vmlinux_search = [ 34 | f'/usr/lib/debug/boot/vmlinux-{os.uname().release}', 35 | 'vmlinux' 36 | ] 37 | for vmlinux in vmlinux_search: 38 | try: 39 | f = open(vmlinux, 'rb') 40 | pr_msg(f'Using vmlinux file {vmlinux}', level='INFO') 41 | user_option.append(f) 42 | return user_option 43 | except FileNotFoundError: 44 | pass 45 | except PermissionError: 46 | pr_msg(f'Could not open vmlinux file {vmlinux}', level='ERROR') 47 | 48 | pr_msg('Could not find vmlinux file, trying to continue without one', level='ERROR') 49 | pr_msg('''Consider installing symbols using: 50 | sudo apt install linux-image-$(uname -r)-dbgsym [deb/ubuntu] 51 | sudo dnf debuginfo-install kernel [fedora] 52 | sudo pacman -S linux-headers [arch] 53 | sudo emerge -av sys-kernel/linux-headers [gentoo] 54 | ''', level='WARN') 55 | return user_option 56 | 57 | def find_module_dbg(module_name:str): 58 | pathes = [f'/usr/lib/debug/lib/modules/{os.uname().release}'] 59 | for path in pathes: 60 | if not os.path.exists(path) or not os.path.isdir(path): 61 | continue 62 | for root, dirs, files in os.walk(path): 63 | for file in files: 64 | if file == f'{module_name}.ko' or file == f'{module_name}.ko.debug': 65 | return os.path.join(root, file) 66 | return None 67 | 68 | class Kallsyms: 69 | def __init__(self, objs:List[io.BufferedReader]): 70 | parsed_modules = self.parse_proc_modules() 71 | self.__find_modules(parsed_modules) 72 | 73 | self.keep_sym_types: Set[str] = {'t', 'T', 'w', 'W', 'r', 'R'} 74 | self.type_map:Dict[str, angr.cle.backends.SymbolType] = { 75 | 'a':angr.cle.backends.SymbolType.TYPE_OTHER, 76 | 'A':angr.cle.backends.SymbolType.TYPE_OTHER, 77 | 'd':angr.cle.backends.SymbolType.TYPE_OBJECT, 78 | 'D':angr.cle.backends.SymbolType.TYPE_OBJECT, 79 | 'b':angr.cle.backends.SymbolType.TYPE_OBJECT, 80 | 'B':angr.cle.backends.SymbolType.TYPE_OBJECT, 81 | 'r':angr.cle.backends.SymbolType.TYPE_OBJECT, 82 | 'R':angr.cle.backends.SymbolType.TYPE_OBJECT, 83 | 'v':angr.cle.backends.SymbolType.TYPE_OTHER, 84 | 'V':angr.cle.backends.SymbolType.TYPE_OTHER, 85 | 't':angr.cle.backends.SymbolType.TYPE_FUNCTION, 86 | 'T':angr.cle.backends.SymbolType.TYPE_FUNCTION, 87 | 'w':angr.cle.backends.SymbolType.TYPE_OTHER, 88 | 'W':angr.cle.backends.SymbolType.TYPE_OTHER, 89 | } 90 | 91 | all_syms = self.__read_symbols() 92 | all_segments = self.__analyze_sections(all_syms) 93 | self.exes = dict() 94 | 95 | obj_basenames = {self.__get_basename(pathlib.Path(f.name).stem):f for f in objs} 96 | 97 | def get_obj_base_sz(obj_name:str, syms) -> Tuple[int, int]: 98 | if obj_name == 'vmlinux': 99 | min_addr = next(s[1] for s in syms if s[0] == '_stext') 100 | max_addr = next(s[1] for s in syms if s[0] == '_end') 101 | sz = max_addr - min_addr 102 | elif obj_name in parsed_modules: 103 | min_addr = parsed_modules[obj_name]['address'] 104 | sz = int(parsed_modules[obj_name]['size']) 105 | else: 106 | min_addr = self.__get_min_addr(syms) 107 | max_addr = self.__get_max_addr(syms) 108 | sz = max_addr - min_addr 109 | 110 | return min_addr, sz 111 | 112 | 113 | for obj_name, syms in all_syms.items(): 114 | mapped_addr, sz = get_obj_base_sz(obj_name, syms) 115 | 116 | path = None 117 | if obj_name in obj_basenames: 118 | path = obj_basenames[obj_name].name 119 | elif obj_name in parsed_modules: 120 | path = parsed_modules[obj_name].get('path') 121 | 122 | if path is not None: 123 | with open(path, 'rb') as f: 124 | if not self.check_build_id(f): 125 | pr_msg(f'Build ID mismatch for {obj_name}', level='WARN') 126 | path = None 127 | 128 | self.exes[obj_name] = { 129 | 'mapped_addr': mapped_addr, 130 | 'base_addr': arch.default_text_base if obj_name == 'vmlinux' else 0, 131 | 'size': sz, 132 | 'symbols': [], 133 | 'path': path, 134 | 'segments': all_segments[obj_name], 135 | } 136 | 137 | if path is None: 138 | self.exes[obj_name]['symbols'] = self.__relative_symbol_tuples(syms, mapped_addr, sz) 139 | continue 140 | 141 | try: 142 | with open(path, 'rb') as f: 143 | base_syms = self.__read_sizes(f) 144 | except FileNotFoundError as e: 145 | pr_msg(f'Could not find file {f}: {e}', level='WARN') 146 | continue 147 | 148 | base_addr, _ = get_obj_base_sz(obj_name, base_syms) 149 | rebased_syms = self.__relative_symbol_tuples(base_syms, base_addr, sz) 150 | 151 | # Complicated since mypy doesn't like direct assignment 152 | self.exes[obj_name].update({ 153 | 'base_addr': base_addr, 154 | 'symbols': rebased_syms, 155 | }) 156 | 157 | def __find_modules(self, parsed_modules): 158 | pathes = [f'/usr/lib/debug/lib/modules/{os.uname().release}'] 159 | 160 | for path in pathes: 161 | if not os.path.exists(path) or not os.path.isdir(path): 162 | continue 163 | 164 | for root, dirs, files in os.walk(path): 165 | for file in files: 166 | if not file.endswith('.ko.debug') and not file.endswith('.ko'): 167 | continue 168 | 169 | # In kallsyms modules show with underscores instead of dashes 170 | basename = pathlib.Path(file).stem.split('.')[0] 171 | basename_underscored = basename.replace('-', '_') 172 | 173 | for obj_name in [basename, basename_underscored]: 174 | if obj_name in parsed_modules: 175 | parsed_modules[obj_name]['path'] = os.path.join(root, file) 176 | break 177 | 178 | def __relative_symbol_tuples(self, syms:List[Tuple[str, int, str, Optional[int]]], min_addr:int, sz:int) -> List[Tuple[str, int, str, Optional[int]]]: 179 | max_addr = min_addr + sz 180 | 181 | return [(s[0], s[1] - min_addr, s[2], s[3]) for s in syms if s[1] >= min_addr and s[1] < max_addr] 182 | 183 | def __get_min_addr(self, syms:List[Tuple[str, int, str, Optional[int]]]) -> int: 184 | return min([s[1] for s in syms if s[2] in {'t', 'T', 'r', 'R'}]) 185 | 186 | def __get_max_addr(self, syms:List[Tuple[str, int, str, Optional[int]]]) -> int: 187 | return max([s[1] + s[3] for s in syms if s[2] in {'t', 'T', 'r', 'R'} and s[3] is not None]) 188 | 189 | 190 | def __read_symbols(self) -> Dict[str, List[Tuple[str, int, str, Optional[int]]]]: 191 | builtin_index:defaultdict[str, int] = defaultdict(int) 192 | global arch 193 | 194 | f = open("/proc/kallsyms", "rb") 195 | logging.info("reading symbols") 196 | f.seek(0) 197 | 198 | data = f.read().decode("ascii") 199 | 200 | raw = [] 201 | for l in data.splitlines(): 202 | name = l.split()[2] 203 | addr = int(l.split()[0], 16) 204 | sym_type = l.split()[1] 205 | module_name = 'vmlinux' if len(l.split()) < 4 else l.split()[3][1:-1] 206 | 207 | # Builtin sections can overlap each other, which angr doesn't like. So 208 | # we are not going to merge them. And instead we are creating each one a 209 | # unique name with a different suffix. 210 | if module_name.startswith('__builtin') or module_name in {'bpf'}: 211 | suffix = builtin_index[module_name] 212 | builtin_index[module_name] += 1 213 | module_name = f'{module_name}:{suffix}' 214 | 215 | raw.append((name, addr, sym_type, module_name)) 216 | 217 | list.sort(raw, key=lambda x:x[1]) 218 | if len(raw) == 0: 219 | pr_msg("cannot read symbol addresses from kallsyms", level="ERROR") 220 | raise Exception() 221 | 222 | syms = defaultdict(list) 223 | 224 | # Guess the sizes 225 | prev = raw[0] 226 | for sa in raw[1:]: 227 | syms[prev[3]].append((prev[0], prev[1], prev[2], sa[1] - prev[1])) 228 | prev = sa 229 | 230 | remaining_in_page = arch.page_size - prev[1] % arch.page_size 231 | syms[prev[3]].append((prev[0], prev[1], prev[2], remaining_in_page)) 232 | return syms # type: ignore 233 | 234 | def __analyze_sections(self, syms:Dict[str, List[Tuple[str, int, str, Optional[int]]]]) -> Dict[str, List[Tuple[int, int]]]: 235 | segments_dict = dict() 236 | vmlinux = syms['vmlinux'] 237 | 238 | for k, v in syms.items(): 239 | sections:List[Tuple[int, int]] = [] 240 | cur_section_start = None 241 | cur_section_end = None 242 | 243 | for sa in v: 244 | if sa[3] is None: 245 | continue 246 | if sa[2] in self.keep_sym_types: 247 | if cur_section_start is None: 248 | cur_section_start = sa[1] 249 | cur_section_end = sa[1] + sa[3] 250 | elif sa[2] not in self.keep_sym_types and cur_section_start is not None: 251 | cur_section_end = sa[1] 252 | if cur_section_start != cur_section_end: 253 | sections.append((cur_section_start, sa[1])) 254 | cur_section_start = None 255 | 256 | if cur_section_start is not None: 257 | assert cur_section_end is not None 258 | sections.append((cur_section_start, cur_section_end)) 259 | 260 | segments_dict[k] = sections 261 | 262 | include_ranges_syms = [ 263 | ('__start_rodata', '__end_rodata'), 264 | ('_stext', '_etext'), 265 | ] 266 | # find the symbols from include_ranges_syms in vmlinux 267 | include_ranges = [] 268 | for start, end in include_ranges_syms: 269 | start_addr = next(s[1] for s in vmlinux if s[0] == start) 270 | end_addr = next(s[1] for s in vmlinux if s[0] == end) 271 | include_ranges.append((start_addr, end_addr)) 272 | 273 | # TODO: Move to arch 274 | start_addr = next(s[1] for s in vmlinux if s[0] == 'idt_table') 275 | end_addr = start_addr + 4096 276 | include_ranges.append((start_addr, end_addr)) 277 | 278 | combined_ranges = segments_dict['vmlinux'] + include_ranges 279 | combined_ranges.sort(key=lambda x: x[0]) 280 | 281 | # Initialize the merged ranges list with the first range 282 | merged_ranges = [combined_ranges[0]] 283 | 284 | for current_start, current_end in combined_ranges[1:]: 285 | last_range_start, last_range_end = merged_ranges[-1] 286 | 287 | # Check if the current range overlaps or is adjacent to the last range in the merged list 288 | if current_start <= last_range_end + 1: 289 | # Update the end value of the last range to the maximum of the current and last end values 290 | merged_ranges[-1] = (last_range_start, max(current_end, last_range_end)) 291 | else: 292 | # If the current range doesn't overlap or is not adjacent, append it to the merged list 293 | merged_ranges.append((current_start, current_end)) 294 | 295 | segments_dict['vmlinux'] = merged_ranges 296 | 297 | return segments_dict 298 | 299 | @staticmethod 300 | def __get_basename(filename: str) -> str: 301 | if filename.startswith('vmlinux'): 302 | return 'vmlinux' 303 | 304 | stem = filename.split('.')[0] 305 | return stem.replace('-', '_') 306 | 307 | @staticmethod 308 | def extract_build_id(data) -> Optional[str]: 309 | build_id = None 310 | offset = 0 311 | while offset < len(data): 312 | namesz, descsz, note_type = struct.unpack_from('III', data, offset) 313 | offset += 12 314 | 315 | name_start = offset 316 | name_end = name_start + namesz 317 | 318 | desc_start = (name_end + 3) & ~3 319 | desc_end = desc_start + descsz 320 | 321 | # Get it from the last note if there are multiple ones 322 | if note_type == NT_GNU_BUILD_ID: 323 | build_id = data[desc_start:desc_end] 324 | 325 | offset = (desc_end + 3) & ~3 326 | 327 | if build_id is None: 328 | return None 329 | 330 | build_id_hex = ''.join([format(byte, '02x') for byte in build_id]) 331 | return build_id_hex 332 | 333 | @staticmethod 334 | def get_module_build_id(module_name) -> Optional[str]: 335 | build_id_path = pathlib.Path(f"/sys/module/{module_name}/notes/.note.gnu.build-id") 336 | 337 | if not build_id_path.exists(): 338 | raise Exception(f"{build_id_path} not found. Ensure the module is loaded and you have the required permissions.") 339 | 340 | data = build_id_path.read_bytes() 341 | return Kallsyms.extract_build_id(data) 342 | 343 | @staticmethod 344 | def get_build_id_from_vmlinux(vmlinux_file:io.BufferedReader) -> Optional[str]: 345 | r = None 346 | #with open(vmlinux_file, 'rb') as f: 347 | elf = ELFFile(vmlinux_file) 348 | for section in elf.iter_sections(): 349 | if isinstance(section, NoteSection): 350 | for note in section.iter_notes(): 351 | if note.n_type == 'NT_GNU_BUILD_ID': 352 | r = note.n_desc 353 | return r 354 | 355 | @staticmethod 356 | def get_build_id_from_kernel_notes(kernel_notes_file:pathlib.Path): 357 | data = kernel_notes_file.read_bytes() 358 | return Kallsyms.extract_build_id(data) 359 | 360 | @staticmethod 361 | def check_build_id(obj_file:io.BufferedReader) -> bool: 362 | file_build_id = Kallsyms.get_build_id_from_vmlinux(obj_file) 363 | 364 | path = pathlib.Path(obj_file.name) 365 | basename = Kallsyms.__get_basename(path.name) 366 | 367 | if basename == 'vmlinux': 368 | live_build_id = Kallsyms.get_build_id_from_kernel_notes(pathlib.Path("/sys/kernel/notes")) 369 | else: 370 | live_build_id = Kallsyms.get_module_build_id(basename) 371 | 372 | if file_build_id is None: 373 | logging.info(f"no build ID found in {obj_file}") 374 | return False 375 | 376 | if live_build_id is None: 377 | logging.info(f"no build ID found in kernel") 378 | return False 379 | 380 | if file_build_id != live_build_id: 381 | logging.info(f"build ID mismatch: {file_build_id} != {live_build_id}") 382 | return False 383 | 384 | return True 385 | 386 | 387 | def __read_sizes(self, file:io.BufferedReader) -> List[Tuple[str, int, str, Optional[int]]]: 388 | filename = pathlib.Path(file.name) 389 | logging.info(f"reading symbol sizes: {filename}") 390 | 391 | # Reading the ELF using elftools is incredibly slow. Use nm instead. 392 | args = ['nm', '-n', '--print-size', str(filename)] 393 | logging.debug("running: {0}".format(' '.join(args))) 394 | try: 395 | output = subprocess.check_output( 396 | args, stderr=subprocess.STDOUT, timeout=20, 397 | universal_newlines=True) 398 | except subprocess.CalledProcessError as e: 399 | pr_msg(f"failed reading symbol file: {e}", level="ERROR") 400 | raise e 401 | 402 | lns = [[l[:16]] + l[17:].split() for l in output.splitlines()] 403 | 404 | syms = [(l[3 if len(l) == 4 else 2], # name 405 | int(l[0], 16), # addr 406 | l[2 if len(l) == 4 else 1], # type 407 | (int(l[1], 16)) if len(l) == 4 else None) # size 408 | for l in lns if len(l) <= 4 and l[0] != ' ' * 16] 409 | 410 | return syms 411 | 412 | def parse_proc_modules(self) -> Dict[str, Dict[str, Any]]: 413 | modules = dict() 414 | 415 | with open('/proc/modules', 'r') as f: 416 | for line in f: 417 | parts = line.strip().split() 418 | module_name = parts[0] 419 | module_size = int(parts[1]) 420 | module_ref_count = None if parts[2] == '-' else int(parts[2]) 421 | module_dependencies = [dep for dep in parts[4].split(',') if dep != '-'] 422 | module_state = parts[4] 423 | module_address = int(parts[5], 16) 424 | 425 | module_info = { 426 | 'size': module_size, 427 | 'ref_count': module_ref_count, 428 | 'dependencies': module_dependencies, 429 | 'state': module_state, 430 | 'address': module_address 431 | } 432 | modules[module_name] = module_info 433 | 434 | return modules 435 | 436 | def get_symbols(self, backend:cle.Backend, name:str) -> List[cle.Symbol]: 437 | syms = self.exes[name]['symbols'] 438 | assert isinstance(syms, list) 439 | 440 | syms = [cle.Symbol(owner = backend, name = s[0], 441 | relative_addr = s[1], 442 | sym_type = self.type_map[s[2]], 443 | size = s[3]) for s in syms] 444 | 445 | return syms -------------------------------------------------------------------------------- /reporter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import logging 4 | import abc 5 | import io 6 | import re 7 | from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union 8 | import colors 9 | import pathlib 10 | import copy 11 | 12 | from syscall import SyscallInfo, ErrorcodeInfo 13 | from ftrace import Ftrace 14 | from angrmgr import Angr 15 | from angrsim import AngrSim 16 | from arch import arch 17 | from cle.backends import Symbol 18 | from prmsg import pr_msg, uptime 19 | from addr2line import Addr2Line 20 | 21 | class Reporter(metaclass=abc.ABCMeta): 22 | def __init__(self, 23 | objs: List[io.BufferedReader], 24 | syscall_filter: Optional[int], 25 | errcode_filter: Optional[int], 26 | occurances_filter: Optional[Set[int]], 27 | angr_mgr: Angr, 28 | print_stats: bool, 29 | failures: List[Dict[str, Any]], 30 | traces: List[Union[List[Dict[str, Union[int, str, float]]], str]], 31 | src_path: Optional[str] = None, 32 | ): 33 | self.objs = objs 34 | self.syscall_filter = syscall_filter 35 | self.errcode_filter = errcode_filter 36 | self.occurances_filter = occurances_filter 37 | self.failures = failures 38 | self.kallsyms = None 39 | self.angr_mgr = angr_mgr 40 | self.print_stats = print_stats 41 | self.traces = traces 42 | self.src_path = src_path and pathlib.Path(src_path) 43 | 44 | @abc.abstractmethod 45 | def report(self): 46 | pass 47 | 48 | @property 49 | @abc.abstractmethod 50 | def detailed_trace(self): 51 | pass 52 | 53 | def do_print_stats(self, errcode:int, sim_attempts:int, branches:List, sim_stats:Dict[str, Union[List, int]]): 54 | pr_msg("---", new_line_after = True, level='DATA') 55 | pr_msg(f"errorcode: {errcode} [{ErrorcodeInfo.get_name(errcode)}]", level='DATA') 56 | pr_msg(f"divergence: {sim_stats['simulation diverged']}", level='DATA') 57 | pr_msg(f"functions: {sim_attempts}", level='DATA') 58 | pr_msg(f"branches: {len(branches)}", level='DATA') 59 | pr_msg(f"failure returning symbol index: {sim_stats['failure returning symbol index']}", level='DATA') 60 | pr_msg(f"failure reutrning function index: {sim_stats['failure returning function index']}", level='DATA') 61 | pr_msg(f"callstack function depth: {sim_stats['callstack function depth']}", level='DATA') 62 | pr_msg(f"callstack: {sim_stats['depth']}", level='DATA') 63 | pr_msg(f"analysis time: {int(uptime())}", level='DATA') 64 | pr_msg(f"recording time: {sim_stats.get('simulation time', 'N/A')}", level='DATA') 65 | if 'backtrack' in sim_stats: 66 | pr_msg(f'candidates: {sim_stats["divergence points"]}', level='DATA') 67 | pr_msg(f'backtracking: {sim_stats["backtrack"]}', level='DATA') 68 | pr_msg('', level='DATA', new_line_after = True) 69 | 70 | def get_unsimulated_callstack(self, branches: List[Dict[str, Any]], end: int) -> List[int]: 71 | callstack = [] 72 | first = True 73 | 74 | # Go from the end of the trace to the return point of the function we 75 | # care about, and build the callstack 76 | for branch in reversed(branches[end + 1:]): 77 | from_ip = branch['from_ip'] 78 | to_ip = branch['to_ip'] 79 | if from_ip is None: 80 | continue 81 | insn = from_ip and self.angr_mgr.get_insn(from_ip) 82 | 83 | if arch.is_ret_insn(insn) and to_ip is not None: 84 | if first: 85 | callstack.append(to_ip) 86 | callstack.append(from_ip) 87 | elif arch.is_call_insn(insn) and len(callstack) > 0: 88 | callstack.pop() 89 | first = False 90 | 91 | callstack.reverse() 92 | return callstack 93 | 94 | def get_entry_callstack(self, branch: Dict[str, Any]) -> Optional[List[int]]: 95 | if 'callstack' not in branch: 96 | return None 97 | # Skip the caller and callee on top of the callstack. 98 | # TODO: for consistency it would be best to ensure the callstack is using 99 | # the call addresses instead of the return addresses, and then remove this 100 | # manipulation. 101 | prev_ips = [self.angr_mgr.prev_insn_addr(ip) for ip in branch['callstack'][2:]] 102 | return [ip for ip in prev_ips if ip is not None] 103 | 104 | def report_one_fallback(self, 105 | branches: List[Dict[str, Union[int, Dict[str, int], None, List[int]]]], 106 | errcode: int, 107 | order: List[Tuple[int, int]], 108 | ) -> bool: 109 | '''Report a failure using the fallback method, which is to just print the 110 | callstack of the function that outermost function that returned the error''' 111 | start, end = order[-1] 112 | ret = branches[end - 1].get('ret', None) 113 | if not isinstance(ret, int) or not ErrorcodeInfo.is_error_code(ret, errcode): 114 | return False 115 | 116 | callstack = (self.get_entry_callstack(branches[start]) or 117 | self.get_unsimulated_callstack(branches, end)) 118 | 119 | assert isinstance(callstack, list) 120 | caller_address = branches[start]['from_ip'] 121 | callee_address = branches[start]['to_ip'] 122 | assert isinstance(caller_address, int) 123 | assert isinstance(callee_address, int) 124 | callstack = [callee_address, caller_address] + callstack 125 | res = { 126 | 'callstack': callstack, 127 | 'failure returning symbol index': 0 128 | } 129 | self.show_results(res) 130 | return True 131 | 132 | def report_one(self, 133 | branches: List[Dict[str, Union[int, Dict[str, int], None, List[int]]]], 134 | errcode: int, 135 | sim_syms: Optional[Set[Symbol]] = None, 136 | simulate_all: bool = False, 137 | ): 138 | if self.errcode_filter and errcode != self.errcode_filter: 139 | return 140 | 141 | if simulate_all: 142 | order = [(0, len(branches))] 143 | else: 144 | order = self.get_analysis_order(branches, errcode) 145 | 146 | # TODO: get rid off. Instead, get_sym() or something should make this cleanup 147 | if sim_syms is not None: 148 | self.angr_mgr.remove_unsupported_pyvex_insn(sim_syms) 149 | 150 | avoid_repeated_syms = True 151 | tried_syms = set() 152 | success = False 153 | sim_attempts = 0 154 | for start, end in order: 155 | sim_attempts += 1 156 | ip = branches[start]['to_ip'] 157 | if ip is None: 158 | continue 159 | sym = self.angr_mgr.get_sym(ip) 160 | if sym is None: 161 | continue 162 | if avoid_repeated_syms and sym in tried_syms: 163 | continue 164 | if (self.angr_mgr.is_skipped_sym(ip) or 165 | self.angr_mgr.is_fastpath_to_ret(ip) or 166 | self.angr_mgr.is_fastpath_to_out(ip)): 167 | continue 168 | pr_msg(f"trying {sym.name}()...", level="INFO") 169 | tried_syms.add(sym) 170 | 171 | sim = AngrSim( 172 | angr_mgr = self.angr_mgr, 173 | branches = branches[start:end], 174 | errcode = errcode, 175 | has_calls = False, 176 | sim_syms = sim_syms, 177 | detailed_trace = self.detailed_trace 178 | ) 179 | 180 | try: 181 | res = sim.simulate() 182 | except SystemError as e: 183 | pr_msg(f'retrying: {e}', level='WARN') 184 | continue 185 | 186 | if 'failure_stack' not in res: 187 | continue 188 | 189 | simulation_callstack = res['failure_stack'] 190 | assert isinstance(simulation_callstack, list) 191 | 192 | unsimulated_callstack = (self.get_entry_callstack(branches[start]) or 193 | self.get_unsimulated_callstack(branches, end)) 194 | 195 | assert isinstance(unsimulated_callstack, list) 196 | callstack = simulation_callstack + unsimulated_callstack 197 | 198 | errorcode_return_depth = res['errorcode return depth'] 199 | assert isinstance(errorcode_return_depth, int) 200 | 201 | res['callstack'] = callstack 202 | res['failure returning symbol index'] = max(len(callstack) - len(unsimulated_callstack) - errorcode_return_depth - 1, 0) 203 | 204 | self.show_results(res) 205 | 206 | if self.print_stats: 207 | res['depth'] = len(callstack) 208 | self.do_print_stats(errcode, sim_attempts, branches, res) 209 | success = True 210 | break 211 | 212 | # The very least look at the most external function return value 213 | if not success: 214 | success = self.report_one_fallback(branches, errcode, order) 215 | 216 | if not success: 217 | pr_msg("analysis failed", level="ERROR") 218 | 219 | def change_to_relative_path(self, path: str) -> str: 220 | if len(path) == 0 or path[0] != '/': 221 | return path 222 | match = re.search(r'linux-\d+\.\d+\.\d+/(.*)', path) 223 | if match: 224 | return match.group(1) 225 | return path 226 | 227 | def get_callstack_locations(self, callstack: List[int]) -> List[Dict[str, Any]]: 228 | addr2line = Addr2Line.get_instance() 229 | 230 | addr_to_base = {a: self.angr_mgr.base_addr(a) for a in callstack} 231 | base_lines_dict = addr2line.run(addr_to_base.values()) 232 | 233 | # change absolute paths to relative paths 234 | for locs in base_lines_dict.values(): 235 | for loc in (locs or []): 236 | loc['file'] = self.change_to_relative_path(loc['file']) 237 | 238 | # map addresses to locations 239 | locs = {a: base_lines_dict[addr_to_base[a]] for a in callstack} 240 | 241 | callstack_locations: List[Dict] = [] 242 | for addr in callstack: 243 | try: 244 | sym = self.angr_mgr.get_sym(addr) 245 | except ValueError: 246 | sym = None 247 | 248 | callstack_locations.append({ 249 | 'addr': addr, 250 | 'sym': sym, 251 | 'offset': sym and addr - sym.rebased_addr, 252 | 'locs': locs.get(addr), 253 | }) 254 | 255 | return callstack_locations 256 | 257 | def analyze_source_callstack(self, res:Dict): 258 | callstack = res['callstack'] 259 | failure_returning_symbol_index = res['failure returning symbol index'] 260 | callstack_locations = self.get_callstack_locations(callstack) 261 | failure_returning_function_index = 0 262 | callstack_function_depth = 1 263 | 264 | source_callstack:List[Dict] = [] 265 | 266 | for i, callstack_location in enumerate(callstack_locations): 267 | locs = callstack_location['locs'] 268 | 269 | n_funcs = max(len(locs), 1) 270 | if failure_returning_symbol_index is not None and failure_returning_symbol_index > i: 271 | failure_returning_function_index += n_funcs 272 | 273 | callstack_function_depth += n_funcs 274 | 275 | if locs is None: 276 | source_callstack.append(callstack_location) 277 | continue 278 | 279 | for loc in locs: 280 | entry = copy.copy(callstack_location) 281 | del entry['locs'] 282 | entry.update({ 283 | 'file': loc['file'], 284 | 'line': loc['line'], 285 | 'col': loc.get('col'), 286 | 'func': loc['func'], 287 | }) 288 | source_callstack.append(entry) 289 | 290 | res.update({ 291 | 'failure returning function index': failure_returning_function_index, 292 | 'callstack function depth': callstack_function_depth, 293 | 'source callstack': source_callstack 294 | }) 295 | 296 | def read_surrounding_code(self, res:Dict): 297 | source_callstack = res['source callstack'] 298 | if len(source_callstack) == 0: 299 | return 300 | 301 | to_extract_indexes = {0} 302 | to_extract_indexes.add(res['failure returning function index']) 303 | 304 | for idx in to_extract_indexes: 305 | e = source_callstack[idx] 306 | if e.get('file') is None: 307 | continue 308 | try: 309 | code = self.extract_surrounding_code(line=e['line'], 310 | col=e.get('col', 1), 311 | file_name=e['file']) 312 | e['code'] = code 313 | except FileNotFoundError as e: 314 | pr_msg(str(e), level='WARN', new_line_before=True) 315 | 316 | def print_surrounding_code(self, res:Dict): 317 | index_message = [(0, 'root-cause')] 318 | 319 | if res['failure returning symbol index'] != 0: 320 | index_message.append((res['failure returning symbol index'], 'failure-returning')) 321 | 322 | for idx, msg in index_message: 323 | callstack_entry = res['source callstack'][idx] 324 | if callstack_entry.get('code'): 325 | pr_msg(f'code around {msg}, {callstack_entry["func"]}():', level='TITLE', new_line_before=True) 326 | pr_msg(callstack_entry['code'], level='DATA', new_line_after=True) 327 | break 328 | 329 | def show_results(self, res:Dict): 330 | self.analyze_source_callstack(res) 331 | self.read_surrounding_code(res) 332 | self.print_callstack(res) 333 | self.print_surrounding_code(res) 334 | 335 | def print_callstack(self, res:Dict): 336 | failure_returning_function_index = res['failure returning function index'] 337 | pr_msg("callstack (decoding):", level="TITLE", new_line_before=True) 338 | 339 | for i, e in enumerate(res['source callstack']): 340 | addr = e['addr'] 341 | sym = e['sym'] 342 | bin_loc = hex(addr) if sym is None else f'{sym.name}+{e["offset"]}' 343 | 344 | if 'file' not in e: 345 | fileline = '?:?' 346 | else: 347 | col_str = f':{e["col"]}' if e['col'] is not None else '' 348 | fileline = f'{e["file"]}:{e["line"]}{col_str}' 349 | 350 | failure_pointer = ' <--' if failure_returning_function_index == i else '' 351 | 352 | pr_msg("{0: <40} {1: <40} {2}() {3}".format( 353 | bin_loc, fileline, e['func'], failure_pointer), level='DATA') 354 | 355 | def get_analysis_order(self, 356 | branches: List[Dict], 357 | errcode: Optional[int]) -> List[Tuple[int,int]]: 358 | tree:Dict[str, Union[List, int, bool]] = {'children': [], 'start': 0, 'end': len(branches), 'root': True} 359 | n:Dict[str, Any] 360 | cur = tree 361 | i = len(branches) - 1 362 | stack:List[Dict[str, Any]] = [] 363 | 364 | # We are going to process the entries in reverse, since we know we have 365 | # the end of the trace, but the beginning might be missing. 366 | while i >= 0: 367 | b = branches[i] 368 | ip = b['from_ip'] 369 | insn = ip and self.angr_mgr.get_insn(ip) 370 | if insn and arch.is_call_insn(insn) and len(stack) != 0: 371 | cur['start'] = i 372 | cur = stack.pop() 373 | elif not insn or arch.is_ret_insn(insn): 374 | # As we do not know where the call is, mark it as the beginning 375 | # of the trace, for cases where we have a ret without a call. 376 | n = {'children': [], 'start': 0, 'end': i + 1} 377 | 378 | assert isinstance(cur['children'], list) 379 | cur['children'].insert(0, n) 380 | 381 | stack.append(cur) 382 | cur = n 383 | 384 | i -= 1 385 | 386 | # Scan from the rightmost leaf and add to results 387 | stack = [tree] 388 | results = list() 389 | while True: 390 | n = stack[-1] 391 | if len(n['children']) != 0: 392 | stack.append(n['children'][-1]) 393 | continue 394 | 395 | if 'root' in n: 396 | break 397 | 398 | parent = stack[-2] 399 | parent['children'].pop() 400 | results.append((n['start'], n['end'])) 401 | stack.pop() 402 | 403 | results = [r for r in results 404 | if (branches[r[1] - 1]['from_ip'] is not None and 405 | (errcode is None or 'ret' not in branches[r[1] - 1] or 406 | ErrorcodeInfo.is_error_code(branches[r[1] - 1]['ret'], errcode)))] 407 | 408 | return results 409 | 410 | def parse_trace_entry(self, line:str) -> Optional[Dict]: 411 | """ 412 | Parse a single entry of the trace file. 413 | """ 414 | m = Ftrace.entry_exit_regex.match(line) 415 | if m is None: 416 | return None 417 | 418 | raw = m.groupdict() 419 | d:Dict[str, Any] = dict() 420 | 421 | d['time'] = float(raw['time']) 422 | d['cpu'] = int(raw['cpu']) 423 | d['pid'] = int(raw['pid']) 424 | 425 | if raw['syscall_enter_name'] is not None: 426 | args = [] 427 | for arg in raw['syscall_args'].split(','): 428 | k, v = arg.split(':') 429 | args.append((k, int(v, 16))) 430 | d['syscall_args'] = args 431 | d['type'] = 'syscall_enter' 432 | d['syscall'] = SyscallInfo.get_syscall_nr(raw['syscall_enter_name']) 433 | 434 | elif raw['syscall_exit_name'] is not None: 435 | d['type'] = 'syscall_exit' 436 | d['syscall'] = SyscallInfo.get_syscall_nr(raw['syscall_exit_name']) 437 | d['syscall_ret'] = int(raw['err2'], 16) 438 | 439 | elif raw['syscall_exit_nr'] is not None: 440 | d['type'] = 'syscall_exit' 441 | d['syscall'] = int(d['syscall_exit_nr']) 442 | d['syscall_ret'] = int(raw['err'], 16) 443 | 444 | return d 445 | 446 | def tokenize_c_code(code): 447 | # Regular expression pattern to match common C tokens 448 | pattern = r'\b[_a-zA-Z][_a-zA-Z0-9]*\b|[-+*/%=<>!&|^~]?=|[-+*/%<>!&|^~]|\d+\.\d+|\d+|".*?"|\'.*?\'|[(){}[\],.;]' 449 | return [(match.start(), match.group()) for match in re.finditer(pattern, code)] 450 | 451 | @staticmethod 452 | def get_tokens_around_column(code, column): 453 | tokens = Reporter.tokenize_c_code(code) 454 | before_token = '' 455 | current_token = '' 456 | after_token = '' 457 | 458 | for i, (start, token) in enumerate(tokens): 459 | if start <= column < start + len(token): 460 | current_token = token 461 | before_token = code[:start] 462 | after_token = code[start + len(token):] 463 | break 464 | 465 | return before_token, current_token, after_token 466 | 467 | def extract_surrounding_code(self, line:int, col:int, file_name:str) -> Optional[str]: 468 | if self.src_path is None: 469 | return None 470 | 471 | assert isinstance(self.src_path, pathlib.Path) 472 | file = self.src_path / file_name 473 | try: 474 | lines = file.read_text().splitlines() 475 | except FileNotFoundError: 476 | raise FileNotFoundError(f'Could not find file {file}') 477 | 478 | start_line = max(0, line - 20) 479 | end_line = start_line + 40 480 | code = lines[start_line:end_line] 481 | line_offset = line - start_line - 1 482 | if col == 0: 483 | code[line_offset] = colors.color(f'{code[line_offset]} <<<' , fg='red') 484 | else: 485 | before_token, failure_token, after_token = self.get_tokens_around_column(code[line_offset], col - 1) 486 | code[line_offset] = (before_token + 487 | colors.color(f'{failure_token}', fg='red') + 488 | after_token + 489 | colors.color(f' <<<' , fg='red')) 490 | enumerated = enumerate(code, start_line) 491 | return '\n'.join(f'{i+1:4} {l}' for i, l in enumerated) -------------------------------------------------------------------------------- /x86arch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import capstone 4 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, Callable, Union 5 | import angr 6 | import claripy 7 | import copy 8 | import struct 9 | 10 | from cle.backends import Symbol 11 | from abc import ABC, abstractmethod 12 | from abstractarch import Arch, ControlStatePluginArch 13 | 14 | 15 | class ControlStatePluginX86(ControlStatePluginArch): 16 | def __init__(self): 17 | super().__init__() 18 | self.eflags_if = True 19 | 20 | def copy(self) -> 'ControlStatePluginX86': 21 | return copy.copy(self) 22 | 23 | class ArchX86(Arch): 24 | X86_EFLAGS_CF = 0x0001 25 | X86_EFLAGS_PF = 0x0004 26 | X86_EFLAGS_AF = 0x0010 27 | X86_EFLAGS_ZF = 0x0040 28 | X86_EFLAGS_SF = 0x0080 29 | X86_EFLAGS_OF = 0x0800 30 | X86_EFLAGS_IF = 0x0200 31 | 32 | STACK_SIZE = 8 33 | STACK_END = 0xffffeb0000000000 34 | SYSCALL_INSN_LEN = 2 35 | 36 | @property 37 | def stack_end(self) -> int: 38 | return self.STACK_END 39 | 40 | @property 41 | def syscall_insn_len(self) -> int: 42 | return self.SYSCALL_INSN_LEN 43 | 44 | retpoline_thunk_regs = { 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi', 45 | 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' } 46 | 47 | __irq_exit_sym_names = {'native_irq_return_iret', 'asm_exc_nmi', 'nmi_restore'} 48 | 49 | flags_cond_map = { 50 | # Checked flags, Invert 51 | capstone.x86.X86_INS_JAE: (X86_EFLAGS_CF, True), 52 | capstone.x86.X86_INS_JA: (X86_EFLAGS_CF|X86_EFLAGS_ZF, True), 53 | capstone.x86.X86_INS_JBE: (X86_EFLAGS_CF|X86_EFLAGS_ZF, False), 54 | capstone.x86.X86_INS_JB: (X86_EFLAGS_CF, False), 55 | capstone.x86.X86_INS_JE: (X86_EFLAGS_ZF, False), 56 | capstone.x86.X86_INS_JNE: (X86_EFLAGS_ZF, True), 57 | capstone.x86.X86_INS_JNO: (X86_EFLAGS_OF, True), 58 | capstone.x86.X86_INS_JNP: (X86_EFLAGS_PF, True), 59 | capstone.x86.X86_INS_JNS: (X86_EFLAGS_SF, True), 60 | capstone.x86.X86_INS_JO: (X86_EFLAGS_OF, False), 61 | capstone.x86.X86_INS_JP: (X86_EFLAGS_PF, False), 62 | capstone.x86.X86_INS_JS: (X86_EFLAGS_SF, False), 63 | } 64 | 65 | predicated_map = { 66 | capstone.x86.X86_INS_SETAE: capstone.x86.X86_INS_JAE, 67 | capstone.x86.X86_INS_SETA: capstone.x86.X86_INS_JA, 68 | capstone.x86.X86_INS_SETBE: capstone.x86.X86_INS_JBE, 69 | capstone.x86.X86_INS_SETB: capstone.x86.X86_INS_JB, 70 | capstone.x86.X86_INS_SETE: capstone.x86.X86_INS_JE, 71 | capstone.x86.X86_INS_SETGE: capstone.x86.X86_INS_JGE, 72 | capstone.x86.X86_INS_SETG: capstone.x86.X86_INS_JG, 73 | capstone.x86.X86_INS_SETLE: capstone.x86.X86_INS_JLE, 74 | capstone.x86.X86_INS_SETL: capstone.x86.X86_INS_JL, 75 | capstone.x86.X86_INS_SETNE: capstone.x86.X86_INS_JNE, 76 | capstone.x86.X86_INS_SETNO: capstone.x86.X86_INS_JNO, 77 | capstone.x86.X86_INS_SETNP: capstone.x86.X86_INS_JNP, 78 | capstone.x86.X86_INS_SETNS: capstone.x86.X86_INS_JNS, 79 | capstone.x86.X86_INS_SETO: capstone.x86.X86_INS_JNO, 80 | capstone.x86.X86_INS_SETP: capstone.x86.X86_INS_JNP, 81 | capstone.x86.X86_INS_SETS: capstone.x86.X86_INS_JNS, 82 | capstone.x86.X86_INS_CMOVAE: capstone.x86.X86_INS_JAE, 83 | capstone.x86.X86_INS_CMOVA: capstone.x86.X86_INS_JA, 84 | capstone.x86.X86_INS_CMOVBE: capstone.x86.X86_INS_JBE, 85 | capstone.x86.X86_INS_CMOVB: capstone.x86.X86_INS_JB, 86 | capstone.x86.X86_INS_CMOVE: capstone.x86.X86_INS_JE, 87 | capstone.x86.X86_INS_CMOVGE: capstone.x86.X86_INS_JGE, 88 | capstone.x86.X86_INS_CMOVG: capstone.x86.X86_INS_JG, 89 | capstone.x86.X86_INS_CMOVLE: capstone.x86.X86_INS_JLE, 90 | capstone.x86.X86_INS_CMOVL: capstone.x86.X86_INS_JL, 91 | capstone.x86.X86_INS_CMOVNE: capstone.x86.X86_INS_JNE, 92 | capstone.x86.X86_INS_CMOVNO: capstone.x86.X86_INS_JNO, 93 | capstone.x86.X86_INS_CMOVNP: capstone.x86.X86_INS_JNP, 94 | capstone.x86.X86_INS_CMOVNS: capstone.x86.X86_INS_JNS, 95 | capstone.x86.X86_INS_CMOVO: capstone.x86.X86_INS_JNO, 96 | capstone.x86.X86_INS_CMOVP: capstone.x86.X86_INS_JNP, 97 | capstone.x86.X86_INS_CMOVS: capstone.x86.X86_INS_JNS, 98 | capstone.x86.X86_INS_SBB: capstone.x86.X86_INS_JB, 99 | } 100 | 101 | cx_cond_map = { 102 | capstone.x86.X86_INS_JCXZ: 0xffff, 103 | capstone.x86.X86_INS_JECXZ: 0xffffffff, 104 | capstone.x86.X86_INS_JRCXZ: 0xffffffffffffffff, 105 | } 106 | 107 | cs_to_pyvex_reg_map = { 108 | capstone.x86.X86_REG_AH: 'ah', 109 | capstone.x86.X86_REG_RAX: 'rax', 110 | capstone.x86.X86_REG_RDX: 'rdx', 111 | capstone.x86.X86_REG_EFLAGS: 'eflags', 112 | capstone.x86.X86_REG_AL : 'al', 113 | capstone.x86.X86_REG_AX : 'ax', 114 | capstone.x86.X86_REG_BH : 'bh', 115 | capstone.x86.X86_REG_BL : 'bl', 116 | capstone.x86.X86_REG_BP : 'bp', 117 | capstone.x86.X86_REG_BPL : 'bpl', 118 | capstone.x86.X86_REG_AX: 'ax', 119 | capstone.x86.X86_REG_BX : 'bx', 120 | capstone.x86.X86_REG_CH : 'ch', 121 | capstone.x86.X86_REG_CL : 'cl', 122 | capstone.x86.X86_REG_CS : 'cs', 123 | capstone.x86.X86_REG_CX : 'cx', 124 | capstone.x86.X86_REG_DH : 'dh', 125 | capstone.x86.X86_REG_DI : 'di', 126 | capstone.x86.X86_REG_DIL : 'dil', 127 | capstone.x86.X86_REG_DL : 'dl', 128 | capstone.x86.X86_REG_DS : 'ds', 129 | capstone.x86.X86_REG_DX : 'dx', 130 | capstone.x86.X86_REG_EAX : 'eax', 131 | capstone.x86.X86_REG_EBP : 'ebp', 132 | capstone.x86.X86_REG_EBX : 'ebx', 133 | capstone.x86.X86_REG_ECX : 'ecx', 134 | capstone.x86.X86_REG_EDI : 'edi', 135 | capstone.x86.X86_REG_EDX : 'edx', 136 | capstone.x86.X86_REG_EFLAGS : 'eflags', 137 | capstone.x86.X86_REG_EIP : 'eip', 138 | capstone.x86.X86_REG_EIZ : 'eiz', 139 | capstone.x86.X86_REG_ES : 'es', 140 | capstone.x86.X86_REG_ESI : 'esi', 141 | capstone.x86.X86_REG_ESP : 'esp', 142 | capstone.x86.X86_REG_FS : 'fs', 143 | capstone.x86.X86_REG_GS : 'gs', 144 | capstone.x86.X86_REG_IP : 'ip', 145 | capstone.x86.X86_REG_RAX : 'rax', 146 | capstone.x86.X86_REG_RBP : 'rbp', 147 | capstone.x86.X86_REG_RBX : 'rbx', 148 | capstone.x86.X86_REG_RCX : 'rcx', 149 | capstone.x86.X86_REG_RDI : 'rdi', 150 | capstone.x86.X86_REG_RDX : 'rdx', 151 | capstone.x86.X86_REG_RIP : 'rip', 152 | capstone.x86.X86_REG_RIZ : 'riz', 153 | capstone.x86.X86_REG_RSI : 'rsi', 154 | capstone.x86.X86_REG_RSP : 'rsp', 155 | capstone.x86.X86_REG_SI : 'si', 156 | capstone.x86.X86_REG_SIL : 'sil', 157 | capstone.x86.X86_REG_SP : 'sp', 158 | capstone.x86.X86_REG_SPL : 'spl', 159 | capstone.x86.X86_REG_SS : 'ss', 160 | } 161 | 162 | def cs_to_pyvex_reg(self, reg:int) -> str: 163 | return self.cs_to_pyvex_reg_map[reg] 164 | 165 | @property 166 | def pointer_size(self) -> int: 167 | return 8 168 | 169 | @property 170 | def arch_name(self) -> str: 171 | return "amd64" 172 | 173 | @property 174 | def default_text_base(self) -> int: 175 | return 0xffffffff81000000 176 | 177 | @property 178 | def syscall_entry_points(self) -> Set[str]: 179 | #return {'entry_SYSCALL_64', 'entry_SYSCALL_64_after_hwframe'} 180 | return {'do_syscall_64'} 181 | 182 | def controlStatePluginArch(self) -> ControlStatePluginX86: 183 | return ControlStatePluginX86() 184 | 185 | # Returns two states following a cmov constraint. The first is the one that 186 | # actually took place, and the second one is the one was not followed. 187 | def predicated_mov_constraint(self, state:angr.SimState, cond_true:bool, insn:capstone.CsInsn) -> List[angr.SimState]: 188 | 189 | def ffs(x:int) -> int: 190 | """Returns the index, counting from 0, of the 191 | least significant set bit in `x`. 192 | """ 193 | return (x&-x).bit_length()-1 194 | 195 | def flags_equal(flags, flag_a:int, flag_b:int) -> bool: 196 | offset_a, offset_b = ffs(flag_a), ffs(flag_b) 197 | return flags[offset_a] == flags[offset_b] 198 | 199 | # Creating a list of taken, not-taken 200 | successors = list() 201 | 202 | flags = state.regs.eflags 203 | id = self.predicated_map[insn.id] 204 | simple_mask, simple_mask_clear, single_bit_cond = None, False, False 205 | if id in self.flags_cond_map: 206 | mask, invert = self.flags_cond_map[id] 207 | constraint = (flags & mask) != 0 208 | if invert: 209 | constraint = claripy.Not(constraint)#) if cond[1] else flags & cond[0] 210 | single_bit_cond = (mask & (mask - 1)) == 0 211 | simple_mask, simple_mask_clear = mask, invert 212 | elif id == capstone.x86.X86_INS_JGE: 213 | constraint = flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF) 214 | elif id == capstone.x86.X86_INS_JG: 215 | constraint = claripy.And((flags & self.X86_EFLAGS_ZF) == 0, 216 | flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)) 217 | elif id == capstone.x86.X86_INS_JLE: 218 | constraint = claripy.Or((flags & self.X86_EFLAGS_ZF) != 0, 219 | claripy.Not(flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF))) 220 | elif id == capstone.x86.X86_INS_JL: 221 | constraint = flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF) 222 | else: 223 | raise Exception("Unhandled condition") 224 | 225 | for sim_cond_true in [True, False]: 226 | n = state.copy() 227 | n.add_constraints(constraint if sim_cond_true else claripy.Not(constraint)) 228 | 229 | # Try to set the flags to simplify execution if we can figure out the flags 230 | if simple_mask is not None: 231 | # if they are not equal, the bit is cleared 232 | if sim_cond_true == simple_mask_clear: 233 | n.regs.flags = flags & ~simple_mask 234 | elif single_bit_cond: 235 | n.regs.flags = flags | simple_mask 236 | 237 | n.control.diverged = cond_true != sim_cond_true 238 | n.control.expected_ip = state.solver.eval_one(state.addr) 239 | 240 | successors.append(n) 241 | 242 | return successors 243 | 244 | def is_cond_jmp_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool: 245 | def flags_equal(flags:int, flag_a:int, flag_b:int) -> bool: 246 | return ((flags & flag_a) != 0) == ((flags & flag_b) != 0) 247 | 248 | flags = state['flags'] 249 | id = self.predicated_map.get(insn.id, insn.id) 250 | 251 | if id in self.flags_cond_map: 252 | cond = self.flags_cond_map[id] 253 | r = flags & cond[0] == 0 254 | return r if cond[1] else not r 255 | if id in self.cx_cond_map: 256 | # TODO: It just never happended and should be checked once 257 | assert 0 == 1 258 | return state['cx'] & self.cx_cond_map[id] != 0 259 | if id == capstone.x86.X86_INS_JGE: 260 | return flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF) 261 | if id == capstone.x86.X86_INS_JG: 262 | return ((flags & self.X86_EFLAGS_ZF) == 0 and 263 | flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)) 264 | if id == capstone.x86.X86_INS_JLE: 265 | return ((flags & self.X86_EFLAGS_ZF) != 0 or 266 | not flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)) 267 | if id == capstone.x86.X86_INS_JL: 268 | return not flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF) 269 | 270 | raise Exception('Unhandled condition') 271 | 272 | def rep_iterations(self, insn:capstone.CsInsn, state:Dict) -> int: 273 | return state['cx'] & ((1 << (insn.operands[0].size * 8)) - 1) 274 | 275 | def is_rep_taken(self, insn:capstone.CsInsn, state:Dict) -> bool: 276 | # We would assume only one rep prefix as proper code 277 | rep_prefix = [prefix for prefix in insn.prefix if prefix in { 278 | capstone.x86.X86_PREFIX_REPE, 279 | capstone.x86.X86_PREFIX_REPNE, 280 | capstone.x86.X86_PREFIX_REP, 281 | }][0] 282 | 283 | if self.rep_iterations(insn, state) == 0: 284 | return False 285 | 286 | if rep_prefix == capstone.x86.X86_PREFIX_REPNE: 287 | return state['flags'] & self.X86_EFLAGS_ZF == 0 288 | if rep_prefix == capstone.x86.X86_PREFIX_REPE: 289 | return state['flags'] & self.X86_EFLAGS_ZF != 0 290 | 291 | assert(rep_prefix == capstone.x86.X86_PREFIX_REP) 292 | return True 293 | 294 | @property 295 | def ftrace_state_str(self) -> str: 296 | return 'flags=%flags cx=%cx ax=%ax' 297 | 298 | def ftrace_state_dict(self, d:Dict[str, Any]) -> Dict[str, Any]: 299 | return { 300 | 'flags': d['flags'], 301 | 'cx': d['cx'], 302 | } 303 | 304 | def is_loop_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool: 305 | flags, rcx = state['flags'], state['cx'] 306 | 307 | if (rcx & (1 << (insn.operands[0].size * 8)) - 1) == 0: 308 | return False 309 | 310 | if insn.id == capstone.x86.X86_INS_LOOPNE: 311 | return flags & self.X86_EFLAGS_ZF == 0 312 | if insn.id == capstone.x86.X86_INS_LOOPE: 313 | return flags & self.X86_EFLAGS_ZF != 0 314 | 315 | assert(insn.id == capstone.x86.X86_INS_LOOP) 316 | return True 317 | 318 | def is_predicated_mov(self, insn) -> bool: 319 | # cannot just check the group, since SETxx does not have a group 320 | return insn.id in self.predicated_map 321 | 322 | def is_rep_insn(self, insn) -> bool: 323 | return (not {capstone.x86.X86_PREFIX_REP, capstone.x86.X86_PREFIX_REPE, 324 | capstone.x86.X86_PREFIX_REPNE}.isdisjoint(insn.prefix)) 325 | 326 | def is_fixed_rep_insn(self, insn) -> bool: 327 | return (insn.mnemonic.startswith("rep m") or 328 | insn.mnemonic.startswith("rep s")) 329 | 330 | def is_branch_insn(self, insn) -> bool: 331 | return ((not {capstone.CS_GRP_CALL, capstone.CS_GRP_RET, 332 | capstone.CS_GRP_JUMP}.isdisjoint(insn.groups)) or 333 | self.is_rep_insn(insn) or self.is_loop_insn(insn)) 334 | 335 | def is_jmp_insn(self, insn) -> bool: 336 | return capstone.x86.X86_GRP_JUMP in insn.groups 337 | 338 | def is_indirect_jmp_insn(self, insn) -> bool: 339 | return (self.is_jmp_insn(insn) and 340 | insn.id in {capstone.x86.X86_INS_LJMP, 341 | capstone.x86.X86_INS_JMP} and 342 | insn.operands[0].type != capstone.x86.X86_OP_IMM) 343 | 344 | def is_indirect_branch_target(self, insn) -> bool: 345 | return insn.id in {capstone.x86.X86_INS_ENDBR32, 346 | capstone.x86.X86_INS_ENDBR64} 347 | 348 | def is_indirect_branch_insn(self, insn) -> bool: 349 | return (self.is_indirect_jmp_insn(insn) or 350 | self.is_indirect_call_insn(insn)) 351 | 352 | def __is_ret_insn(self, insn:capstone.CsInsn) -> bool: 353 | return capstone.x86.X86_GRP_RET in insn.groups 354 | 355 | def is_ret_insn(self, insn:capstone.CsInsn) -> bool: 356 | if self.__is_ret_insn(insn): 357 | return True 358 | 359 | # Detect retthunks as effectively ret instructions 360 | if self.is_direct_jmp_insn(insn): 361 | target = self.get_direct_branch_target(insn) 362 | return target == self.return_thunk_addr 363 | 364 | return False 365 | 366 | def is_call_insn(self, insn:capstone.CsInsn) -> bool: 367 | return capstone.x86.X86_GRP_CALL in insn.groups 368 | 369 | def is_cond_jmp_insn(self, insn:capstone.CsInsn) -> bool: 370 | return (capstone.x86.X86_GRP_JUMP in insn.groups and 371 | insn.id not in {capstone.x86.X86_INS_LJMP, 372 | capstone.x86.X86_INS_JMP}) 373 | 374 | def is_loop_insn(self, insn:capstone.CsInsn) -> bool: 375 | return insn.id in (capstone.x86.X86_INS_LOOP, 376 | capstone.x86.X86_INS_LOOPNE, 377 | capstone.x86.X86_INS_LOOPE) 378 | 379 | def is_cond_branch_insn(self, insn:capstone.CsInsn) -> bool: 380 | return (self.is_cond_jmp_insn(insn) or self.is_rep_insn(insn) or 381 | self.is_loop_insn(insn)) 382 | 383 | def is_direct_call_insn(self, insn:capstone.CsInsn) -> bool: 384 | return (self.is_call_insn(insn) and 385 | insn.operands[0].type == capstone.x86.X86_OP_IMM) 386 | 387 | def is_direct_branch_insn(self, insn:capstone.CsInsn) -> bool: 388 | return self.is_direct_jmp_insn(insn) or self.is_direct_call_insn(insn) 389 | 390 | def get_direct_branch_target(self, insn:capstone.CsInsn) -> int: 391 | if self.is_rep_insn(insn): 392 | return insn.address 393 | return int(insn.op_str, 16) 394 | 395 | @staticmethod 396 | def get_control_state_arch(state:angr.SimState) -> 'ControlStatePluginX86': 397 | # To avoid circular import, we could have used lazy import 398 | return state.control.arch # type: ignore 399 | 400 | @staticmethod 401 | def sti_hook(state:angr.SimState): 402 | archX86 = ArchX86.get_control_state_arch(state) 403 | archX86.eflags_if = True 404 | 405 | @staticmethod 406 | def cli_hook(state:angr.SimState): 407 | archX86 = ArchX86.get_control_state_arch(state) 408 | archX86.eflags_if = False 409 | 410 | @staticmethod 411 | def __popf_hook(state:angr.SimState, reg:str): 412 | archX86 = ArchX86.get_control_state_arch(state) 413 | rsp = state.registers.load('rsp') 414 | v = state.memory.load(rsp, size=8, endness='Iend_LE') 415 | state.registers.store(reg, v) 416 | archX86.eflags_if = (v & arch.X86_EFLAGS_IF) != 0 417 | rsp += ArchX86.STACK_SIZE 418 | state.registers.store('rsp', rsp) 419 | 420 | @staticmethod 421 | def popf_hook(state:angr.SimState): 422 | ArchX86.__popf_hook(state, "flags") 423 | 424 | @staticmethod 425 | def popfd_hook(state:angr.SimState): 426 | ArchX86.__popf_hook(state, "eflags") 427 | 428 | @staticmethod 429 | def popfq_hook(state:angr.SimState): 430 | ArchX86.__popf_hook(state, "rflags") 431 | 432 | @staticmethod 433 | def __pushf_hook(state:angr.SimState, reg:str): 434 | archX86 = ArchX86.get_control_state_arch(state) 435 | rsp = state.registers.load('rsp') 436 | rsp -= ArchX86.STACK_SIZE 437 | v = state.registers.load(reg) 438 | if archX86.eflags_if: 439 | v |= arch.X86_EFLAGS_IF 440 | 441 | state.memory.store(rsp, v, size=8, endness='Iend_LE') 442 | state.registers.store('rsp', rsp) 443 | 444 | @staticmethod 445 | def pushf_hook(state:angr.SimState): 446 | ArchX86.__pushf_hook(state, "flags") 447 | 448 | @staticmethod 449 | def pushfd_hook(state:angr.SimState): 450 | ArchX86.__pushf_hook(state, "eflags") 451 | 452 | @staticmethod 453 | def pushfq_hook(state:angr.SimState): 454 | ArchX86.__pushf_hook(state, "rflags") 455 | 456 | @staticmethod 457 | def skip_mask_hook(state:angr.SimState): 458 | #insn = angr_mgr.state_insn(state) 459 | insn = state.control.angr_mgr.state_insn(state) # type: ignore 460 | 461 | for reg in insn.regs_write: 462 | reg_name = arch.cs_to_pyvex_reg(reg) 463 | val = state.registers.load(reg_name) 464 | v = state.solver.Unconstrained("unconstrained_val", val.length) 465 | # TODO: find width and create correct value 466 | state.registers.store(reg_name, v) 467 | 468 | @property 469 | def per_cpu_reg(self) -> str: 470 | return 'gs' 471 | 472 | @property 473 | def per_cpu_offset(self) -> int: 474 | return 0x833e8000 475 | 476 | @property 477 | def stack_reg(self) -> str: 478 | return 'rsp' 479 | 480 | def pyvex_workaround(self, insn:capstone.CsInsn) -> Tuple[Union[Callable, None], bool]: 481 | # MOV x, SREG 482 | if insn.bytes[0] == 0x8e: 483 | return self.skip_mask_hook, True 484 | 485 | # RDPKRU 486 | if insn.bytes[0:3] == b'\x0f\x01\xee': 487 | return self.skip_mask_hook, True 488 | 489 | if insn.id in {capstone.x86.X86_INS_WRFSBASE, 490 | capstone.x86.X86_INS_WRGSBASE, 491 | capstone.x86.X86_INS_STAC, 492 | capstone.x86.X86_INS_CLAC, 493 | capstone.x86.X86_INS_INVLPG, 494 | capstone.x86.X86_INS_INVLPGA, 495 | capstone.x86.X86_INS_INVPCID, 496 | capstone.x86.X86_INS_INVEPT, 497 | capstone.x86.X86_INS_SGDT, 498 | capstone.x86.X86_INS_LGDT, 499 | capstone.x86.X86_INS_IDIV, 500 | capstone.x86.X86_INS_UD0, 501 | capstone.x86.X86_INS_UD2B, 502 | capstone.x86.X86_INS_SWAPGS, 503 | capstone.x86.X86_INS_WRMSR, 504 | capstone.x86.X86_INS_RDMSR, 505 | capstone.x86.X86_INS_VERW, 506 | }: 507 | return self.skip_mask_hook, True 508 | 509 | hooks = {capstone.x86.X86_INS_STI: self.sti_hook, 510 | capstone.x86.X86_INS_CLI: self.cli_hook, 511 | capstone.x86.X86_INS_PUSHF: self.pushf_hook, 512 | capstone.x86.X86_INS_PUSHFD: self.pushfd_hook, 513 | capstone.x86.X86_INS_PUSHFQ: self.pushfq_hook, 514 | capstone.x86.X86_INS_POPF: self.popf_hook, 515 | capstone.x86.X86_INS_POPFD: self.popfd_hook, 516 | capstone.x86.X86_INS_POPFQ: self.popfq_hook, 517 | } 518 | 519 | if insn.id in hooks: 520 | return hooks[insn.id], False 521 | 522 | return None, False 523 | 524 | def nop_insn(self, size:int) -> bytes: 525 | return b'\x90' * size 526 | 527 | def init_capstone(self) -> capstone.Cs: 528 | return capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) 529 | 530 | @property 531 | def ret_reg_name(self) -> str: 532 | return 'rax' 533 | 534 | @property 535 | def stack_related_reg_names(self) -> List[str]: 536 | return ['rsp', 'rbp'] 537 | 538 | @property 539 | def ip_reg_name(self) -> str: 540 | return 'rip' 541 | 542 | def is_iret_insn(self, insn:capstone.CsInsn) -> bool: 543 | return (insn.id == capstone.x86.X86_INS_IRET or 544 | insn.id == capstone.x86.X86_INS_IRETD or 545 | insn.id == capstone.x86.X86_INS_IRETQ) 546 | 547 | def is_sysexit_sysret_insn(self, insn:capstone.CsInsn) -> bool: 548 | return (insn.id == capstone.x86.X86_INS_SYSEXIT or 549 | insn.id == capstone.x86.X86_INS_SYSRET) 550 | 551 | @property 552 | def page_size(self) -> int: 553 | return 4096 554 | 555 | def parse_interrupt_table(self, proj:angr.Project) -> Dict[int, int]: 556 | idt_handlers = {} 557 | 558 | idt_table_symbol = proj.loader.find_symbol('idt_table') 559 | # Assuming all entries are present 560 | num_entries = 256 561 | entry_size = 8 if proj.arch.bits == 32 else 16 562 | 563 | if idt_table_symbol is None: 564 | raise ValueError("idt_table symbol not found") 565 | 566 | # Get the IDT base address 567 | idt_size = num_entries * entry_size 568 | assert isinstance(proj.loader.memory, angr.cle.Clemory) 569 | idt_data = proj.loader.memory.load(idt_table_symbol.rebased_addr, idt_size) 570 | 571 | for i in range(num_entries): 572 | entry_data = idt_data[i * entry_size : (i + 1) * entry_size] 573 | 574 | if proj.arch.bits == 32: 575 | # 32-bit IDT entry format: https://wiki.osdev.org/Interrupt_Descriptor_Table#Structure_IA-32 576 | offset_low, selector, _zero, access, offset_high = struct.unpack(' None: 590 | # get the symbol for __x86_return_thunk 591 | try: 592 | return_thunk_sym = proj.loader.find_symbol('__x86_return_thunk') 593 | except KeyError: 594 | return_thunk_sym = None 595 | 596 | self.return_thunk_addr = return_thunk_sym and return_thunk_sym.rebased_addr 597 | 598 | def is_exception_vector(self, vector:int) -> bool: 599 | return vector < 32 600 | 601 | @property 602 | def irq_exit_sym_names(self) -> Set[str]: 603 | return self.__irq_exit_sym_names 604 | 605 | @property 606 | def address_width(self) -> int: 607 | return 64 608 | 609 | arch = ArchX86() 610 | -------------------------------------------------------------------------------- /intelptreporter.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | from typing import Optional, Set, List, Dict, Tuple, Any, Union, List 4 | import re 5 | from concurrent.futures import ProcessPoolExecutor, as_completed 6 | from collections import defaultdict 7 | 8 | import syscall 9 | import itertools 10 | from angrmgr import Angr 11 | from arch import arch 12 | from cle.backends import Symbol 13 | from ftrace import Ftrace 14 | from prmsg import pr_msg, Pbar 15 | from reporter import Reporter 16 | from syscall import SyscallInfo, ErrorcodeInfo 17 | 18 | class IntelPTReporter(Reporter): 19 | branch_regex = re.compile( 20 | Ftrace.common_trace_pattern + 21 | r'(?P\d+)\s+' + 22 | r'(?P[^\:]+):(?P[ku]):\s+' + 23 | r'(?P[0-9a-f]+)\s+' + 24 | r'(?P[^\+]+)' + 25 | r'(\+0x(?P[0-9a-f]+))? ' + 26 | r'\(' + 27 | r'(\[(?P[^\]]+)\])?' + 28 | r'(?P[^\)]*)' + 29 | r'\)' + 30 | r' =>\s+' + 31 | r'(?P[0-9a-f]+)\s+' + 32 | r'(?P[^\+]+)' + 33 | r'(\+0x(?P[0-9a-f]+))? ' + 34 | r'\(' + 35 | r'(\[(?P[^\]]+)\])?' + 36 | r'(?P[^\)]*)' + 37 | r'\)') 38 | 39 | @staticmethod 40 | def parse_entries_batch(strings, start_line): 41 | results = [] 42 | bpf_perf_event_output_indices = [] 43 | exit_event_indices = [] 44 | for i, string in enumerate(strings): 45 | match = IntelPTReporter.branch_regex.match(string) 46 | if match: 47 | d = match.groupdict() 48 | 49 | # Ignore PID -1 events. For some reason perf might emit them, but we 50 | # cannot associate them with a process, which makes their processing 51 | # non-trivial. 52 | if d['pid'] == '-1': 53 | results.append(None) 54 | continue 55 | 56 | d['time'] = float(d['time']) 57 | for field in ['to_sym', 'from_sym']: 58 | if d[field] == '[unknown]': 59 | d[field] = None 60 | for field in ['to_obj', 'from_obj']: 61 | if d[field] == 'unknown': 62 | d[field] = None 63 | for field in ['to_ip', 'from_ip', 'from_off', 'to_off']: 64 | d[field] = d[field] and int(d[field], 16) 65 | for field in ['pid', 'id', 'cpu']: 66 | d[field] = int(d[field]) 67 | results.append(d) 68 | 69 | # Check the conditions for bpf_perf_event_output branches 70 | if ((d['from_sym'] or '').startswith('bpf_prog_') and 71 | d.get('to_sym') == 'bpf_perf_event_output_tp'): 72 | bpf_perf_event_output_indices.append(start_line + i) 73 | continue 74 | 75 | match = Ftrace.err_exit_regex.match(string) 76 | if False and match: 77 | d = match.groupdict() 78 | d['time'] = float(d['time']) 79 | d['pid'] = int(d['pid']) 80 | d['errcode'] = int(d['err'], 16) if d['err'] else int(d['err2'], 16) 81 | d['syscall_exit_nr'] = (int(d['syscall_exit_nr']) if d['syscall_exit_nr'] 82 | else SyscallInfo.get_syscall_nr(d['syscall_exit_name'])) 83 | for k in ['err', 'err2']: 84 | del d[k] 85 | results.append(d) 86 | exit_event_indices.append(start_line + i) 87 | continue 88 | 89 | match = Ftrace.entry_exit_regex.match(string) 90 | if match: 91 | d = match.groupdict() 92 | syscall_entry = (d['syscall_enter_name'] is not None or 93 | d['syscall_enter_nr'] is not None) 94 | r = { 95 | 'time': float(d['time']), 96 | 'pid': int(d['pid']), 97 | 'proc': d['proc'], 98 | 'cpu': int(d['cpu']), 99 | 'type': 'syscall' if syscall_entry else 'syscall_exit', 100 | } 101 | if syscall_entry: 102 | if d['syscall_args1'] is not None: 103 | matches = re.findall(r'(\w+):\s*(0x[\da-fA-F]+)', d['syscall_args1']) 104 | args = {k: int(v, 16) for k, v in matches} 105 | else: 106 | argument_values = d['syscall_args2'].split(', ') 107 | args = {f'arg{i+1}': int(x, 16) for i, x in enumerate(argument_values)} 108 | 109 | r.update({ 110 | 'syscall_nr': (int(d['syscall_enter_nr']) if d['syscall_enter_nr'] 111 | else SyscallInfo.get_syscall_nr(d['syscall_enter_name'])), 112 | 'syscall_args': args 113 | }) 114 | else: 115 | r.update({ 116 | 'errcode': int(d['err'], 16) if d['err'] else int(d['err2'], 16), 117 | 'syscall_nr': (int(d['syscall_exit_nr']) if d['syscall_exit_nr'] 118 | else SyscallInfo.get_syscall_nr(d['syscall_exit_name'])) 119 | }) 120 | results.append(r) 121 | continue 122 | 123 | results.append(None) 124 | return results, bpf_perf_event_output_indices, exit_event_indices 125 | 126 | @staticmethod 127 | def entries_chunk_list(input_list, chunk_size): 128 | return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)] 129 | 130 | @staticmethod 131 | def parse_entries_batch_wrapper(args): 132 | return IntelPTReporter.parse_entries_batch(*args) 133 | 134 | def parse_trace(self, trace: List[str], errcode:Optional[int]=None) -> Tuple[List[Dict[str, Union[int, str, float]]], List[Dict]]: 135 | batch_size = 1000 # Set this to an appropriate value based on your dataset and hardware capabilities 136 | 137 | input_batches = self.entries_chunk_list(trace, batch_size) 138 | input_batches_with_start_line = [(batch, i * batch_size) for i, batch in enumerate(input_batches)] 139 | 140 | with ProcessPoolExecutor(max_workers=10) as executor: 141 | with Pbar(message="process trace", items=input_batches_with_start_line) as pbar: 142 | batch_results = list(executor.map(self.parse_entries_batch_wrapper, pbar)) 143 | 144 | # Flatten the list of results and bpf_perf_event_output_indices 145 | results = [result for batch in batch_results for result in batch[0]] 146 | bpf_perf_event_output_indices = [index for batch in batch_results for index in batch[1]] 147 | exit_event_indices = [index for batch in batch_results for index in batch[2]] 148 | 149 | failures = [] 150 | for index in bpf_perf_event_output_indices: 151 | pid = results[index]['pid'] 152 | match = None 153 | for exit_index in exit_event_indices: 154 | if exit_index > index and results[exit_index].get('pid') == pid: 155 | match = exit_index 156 | break 157 | 158 | failure = {'index': index, 'pid': pid} 159 | if match is None: 160 | # TODO: reenable 161 | if False and errcode is None: 162 | pr_msg('found a failure, but no data on the error code', level='ERROR') 163 | continue 164 | failure['errcode'] = errcode 165 | else: 166 | failure['errcode'] = results[match]['errcode'] 167 | failure['syscall'] = results[match]['syscall_exit_nr'] 168 | failures.append(failure) 169 | 170 | return results, failures 171 | 172 | def is_intr_entry(self, entry:Dict[str, Any]) -> bool: 173 | return (entry.get('to_off') == 0 and 174 | entry.get('from_sym') != entry['to_sym'] and 175 | self.angr_mgr.is_interrupt_handler_addr(entry['to_ip'])) 176 | 177 | def is_intr_exit(self, entry:Dict[str, Any]) -> bool: 178 | if entry.get('from_sym') not in arch.irq_exit_sym_names: 179 | return False 180 | insn = self.angr_mgr.get_insn(entry['from_ip']) 181 | return insn and arch.is_iret_insn(insn) 182 | 183 | def is_syscall_entry(self, entry:Dict[str, Any]) -> bool: 184 | # TODO: move to arch-specific code 185 | return entry.get('to_sym') in {'__entry_text_start', 'entry_SYSCALL_64', 'syscall_enter_from_user_mode'} 186 | 187 | def is_syscall_exit(self, entry:Dict[str, Any]) -> bool: 188 | return (entry.get('to_sym') == 'syscall_exit_to_user_mode' and 189 | entry['to_off'] == 0) 190 | 191 | def report(self) -> bool: 192 | n_reported = 0 193 | n_traces = len(self.traces) 194 | n_failures = len(self.failures) 195 | 196 | # TODO: coorelate the trace with the failure 197 | for failure in self.failures: 198 | for i_trace, trace in enumerate(self.traces): 199 | # Although we have a timestamp on the failure that we collected using eBPF, 200 | # it is using a different time source than perf, so we have no reasonable way 201 | # to correlate the two. Instead, we just look for the error code in the trace 202 | # and then look for the syscall entry/exit points around it. 203 | pr_msg(f"processing trace {i_trace+1}/{n_traces}", level='INFO') 204 | 205 | if not isinstance(trace, str): 206 | raise SystemError('Intel-PT trace is not a string') 207 | 208 | trace_entries = trace.splitlines() 209 | 210 | parsed, trace_failures = self.parse_trace(trace_entries) 211 | 212 | #failures = self.get_errors(trace_entries) 213 | 214 | if len(trace_failures) == 0: 215 | pr_msg('found no failures in trace', level='INFO') 216 | continue 217 | 218 | for trace_failure in trace_failures: 219 | failure_entries = parsed[:trace_failure['index']] 220 | failure_errcode = failure['err'] 221 | failure_syscall = failure['syscall_nr'] 222 | 223 | failure_entries = [e for e in failure_entries if e is not None and e['pid'] == failure['pid']] 224 | 225 | # Remove any entries in which the from_sym or to_sym is None 226 | failure_entries = [e for e in failure_entries 227 | if e.get('from_sym', '') is not None and e.get('to_sym', '') is not None] 228 | 229 | # TODO: Fix the filters based on the failure entries 230 | if ((self.syscall_filter and self.syscall_filter != failure_syscall) or 231 | (self.errcode_filter and self.errcode_filter != failure_errcode)): 232 | continue 233 | 234 | branches = self.skip_intr_entries(failure_entries) 235 | 236 | # TODO: extract all syscalls, not just the last one 237 | extracted = self.extract_last_syscall(branches) 238 | if extracted is None: 239 | continue 240 | 241 | branches = extracted 242 | branches = self.skip_fentry_entries(branches) 243 | 244 | super().report_one( 245 | branches = branches, 246 | errcode = -failure_errcode, 247 | simulate_all = True 248 | ) 249 | n_reported += 1 250 | 251 | if n_reported == n_failures: 252 | return True 253 | 254 | return True 255 | 256 | def find_time(self, trace:List[str], time:float, before:bool) -> Optional[int]: 257 | """Find the index of the first entry with the given time""" 258 | # Bisect to find the time, but gracefully handle entries with no time 259 | s = 0 260 | e = len(trace) 261 | found = None 262 | while s < e: 263 | mid = (s + e) // 2 264 | # Only consider branch entries since their time is in sync with 265 | # the time we look for. If we do not have such entry, go forward 266 | # and then backward until we find one. 267 | for i in itertools.chain(range(mid, len(trace)), range(mid, -1, -1)): 268 | m = self.branch_regex.match(trace[i]) 269 | if m is not None: 270 | break 271 | if m is None: 272 | return None 273 | d = m.groupdict() 274 | e_time = float(d['time']) 275 | if e_time < time: 276 | if before: 277 | found = max(mid, found or mid) 278 | s = mid + 1 279 | elif e_time == time: 280 | if before: 281 | e = mid - 1 282 | found = min(mid - 1, found or mid - 1) 283 | else: 284 | s = mid + 1 285 | found = max(mid + 1, found or mid + 1) 286 | else: # e_time > time 287 | if not before: 288 | found = min(mid, found or mid) 289 | e = mid 290 | 291 | # assert found is not None 292 | # for i in range(found, 0, -1): 293 | # if Ftrace.err_exit_regex.match(trace[i]) is not None: 294 | # return i 295 | 296 | return found 297 | 298 | @staticmethod 299 | def search_in_chunk(args): 300 | chunk, regex_pattern, start_line = args 301 | matches = [] 302 | 303 | for i, line in enumerate(chunk, start_line): 304 | match = regex_pattern.match(line) 305 | if match: 306 | matches.append(i) 307 | 308 | return matches 309 | 310 | @staticmethod 311 | def chunk_lines(lines, chunk_size): 312 | return [(lines[i:i + chunk_size], i) for i in range(0, len(lines), chunk_size)] 313 | 314 | @staticmethod 315 | def search_regex_multiprocess(lines: List[str], compiled_regex, max_workers=10, chunk_size=100): 316 | all_matches = [] 317 | 318 | chunks = IntelPTReporter.chunk_lines(lines, chunk_size) 319 | 320 | with ProcessPoolExecutor(max_workers=max_workers) as executor: 321 | futures = [executor.submit(IntelPTReporter.search_in_chunk, (chunk, compiled_regex, start_line)) for chunk, start_line in chunks] 322 | 323 | for future in as_completed(futures): 324 | result = future.result() 325 | if result: 326 | all_matches.extend(result) 327 | 328 | return all_matches 329 | 330 | def skip_fentry_entries(self, trace:List[Dict]) -> List[Dict]: 331 | """Skip all fentry entries in the trace""" 332 | result:List[Dict] = [] 333 | 334 | def is_untracked_sym(sym:str) -> bool: 335 | return sym in {'__fentry__', 'zen_untrain_ret', '__x86_return_thunk'} or sym.startswith('__x86_indirect_thunk') 336 | 337 | in_untracked = False 338 | in_fentry = False 339 | for entry in trace: 340 | from_sym = entry.get('from_sym', '') 341 | to_sym = entry.get('to_sym', '') 342 | from_ip = entry.get('from_ip', 0) 343 | to_ip = entry.get('to_ip', 0) 344 | is_untracked_target = is_untracked_sym(to_sym) 345 | 346 | # Skip all fentry until return. 347 | # TODO: consider handling nested 348 | if not in_untracked: 349 | if in_fentry: 350 | try: 351 | insn = self.angr_mgr.get_insn(from_ip) 352 | except: 353 | continue 354 | if from_sym == '__fentry__' and arch.is_ret_insn(insn): 355 | in_fentry = False 356 | continue 357 | elif to_sym == '__fentry__': 358 | in_fentry = True 359 | continue 360 | 361 | is_untracked_target = (to_sym in {'__fentry__', 'zen_untrain_ret', '__x86_return_thunk'} or 362 | to_sym.startswith('__x86_indirect_thunk')) 363 | 364 | if in_untracked: 365 | if not is_untracked_target: 366 | if len(result) > 0: 367 | for k in ['to_sym', 'to_off', 'to_sec', 'to_ip']: 368 | result[-1][k] = entry[k] 369 | in_untracked = False 370 | else: 371 | # Add in both cases; we will fix the to_* fields later 372 | if is_untracked_target: 373 | in_untracked = True 374 | result.append(entry.copy()) 375 | else: 376 | result.append(entry) 377 | 378 | return result 379 | 380 | def skip_intr_entries(self, trace:List[Dict]) -> List[Dict]: 381 | result:List[Dict] = [] 382 | enumerated = [e for e in enumerate(trace)] 383 | irq_entries = [e[0] for e in enumerated if self.is_intr_entry(e[1])] 384 | irq_exits = [e[0] for e in enumerated if self.is_intr_exit(e[1])] 385 | in_irq = 0 386 | irq_entries_i = 0 387 | irq_exits_i = 0 388 | start_idx = 0 389 | # Indexes to trace that reflects the last non-nested IRQ/exception entries/exits 390 | trace_irq_entry_i = None 391 | trace_irq_exit_i = None 392 | 393 | while irq_entries_i < len(irq_entries) or irq_exits_i < len(irq_exits): 394 | if (irq_entries_i < len(irq_entries) and (irq_exits_i >= len(irq_exits) or 395 | irq_entries[irq_entries_i] < irq_exits[irq_exits_i])): 396 | # IRQ entry 397 | if in_irq == 0: 398 | trace_irq_entry_i = irq_entries[irq_entries_i] 399 | result.extend(trace[start_idx:trace_irq_entry_i]) 400 | start_idx = trace_irq_entry_i + 1 401 | in_irq += 1 402 | irq_entries_i += 1 403 | else: 404 | # IRQ exit 405 | if in_irq == 0: 406 | # We are not in an IRQ, but something went wrong. We will just clean the result and 407 | # hope for the best. 408 | pr_msg(f'IRQ exit without entry: {trace[irq_exits[irq_exits_i]]}', level = 'DEBUG') 409 | result = [] 410 | elif in_irq > 0: 411 | in_irq -= 1 412 | if in_irq == 0: 413 | trace_irq_exit_i = irq_exits[irq_exits_i] 414 | 415 | # Special handling for exception tables. If the return address 416 | # does not match the exception address, we are going to add the 417 | # entry and exit entries to the trace. 418 | if (trace_irq_entry_i is not None and 419 | trace[trace_irq_entry_i].get('from_ip') != trace[trace_irq_exit_i].get('to_ip')): 420 | for i in [trace_irq_entry_i, trace_irq_exit_i]: 421 | e = trace[i].copy() 422 | e['exception'] = True 423 | result.append(e) 424 | 425 | start_idx = trace_irq_exit_i + 1 426 | 427 | irq_exits_i += 1 428 | 429 | if not in_irq: 430 | result.extend(trace[start_idx:]) 431 | 432 | return result 433 | 434 | def extract_last_syscall(self, trace:List[Dict]) -> Optional[List[Dict]]: 435 | enumerated = [e for e in enumerate(trace)] 436 | exit_entry_idxs = [i for i, e in enumerated if self.is_syscall_exit(e)] 437 | 438 | # Find the entry before the last exit 439 | if len(exit_entry_idxs) == 0: 440 | return None 441 | 442 | #exit_entry_idx = exit_entry_idxs[-1] 443 | exit_entry_idx = len(trace) - 1 444 | entries = enumerated[:exit_entry_idx+1] 445 | 446 | enter_entry_idxs = [i for i, e in enumerated if self.is_syscall_entry(e)] 447 | if len(enter_entry_idxs) == 0: 448 | return None 449 | enter_entry_idx = enter_entry_idxs[-1] 450 | 451 | # We still need to get rid of all unemulated code at the beginning of the trace. 452 | # As a hueristic, which might only fit x86-64, we will look for a call from 453 | # the entry point. 454 | for i in range(enter_entry_idx, exit_entry_idx): 455 | insn = self.angr_mgr.get_insn(entries[i][1]['from_ip']) 456 | if insn is None or not arch.is_call_insn(insn): 457 | continue 458 | if entries[i][1]['from_sym'] not in arch.syscall_entry_points: 459 | continue 460 | break 461 | 462 | enter_entry_idx = i 463 | if enter_entry_idx == exit_entry_idx: 464 | return None 465 | 466 | # Cut the end of the trace to the return address of the first call. 467 | # This is a heuristic that might not work for all architectures. 468 | expected_ret_addr = self.angr_mgr.next_insn_addr(insn) 469 | for i in range(exit_entry_idx, enter_entry_idx, -1): 470 | if entries[i][1]['to_ip'] == expected_ret_addr: 471 | break 472 | 473 | exit_entry_idx = i 474 | if enter_entry_idx == exit_entry_idx: 475 | return None 476 | 477 | return trace[enter_entry_idx:exit_entry_idx+1] 478 | 479 | def get_errors(self, trace:List[str]) -> List[Dict]: 480 | # The failures that were recorded had the wrong time source, so we need 481 | # to find the time of the failure in the trace. However, the location of 482 | # the failure in the trace, as indicated by the syscall entry/exit point 483 | # if not in sync with the branch trace. So we find the time of the 484 | # failure and would later find the branches in between. 485 | err_list = [] 486 | unmatched_exits = 0 487 | matched_syscalls = [] 488 | enter_pid_dict = {} 489 | 490 | pr_msg("finding failures in trace...", level = "INFO") 491 | 492 | line_nums = self.search_regex_multiprocess(trace, Ftrace.complete_exit_regex) 493 | 494 | parsed = [(n, self.parse_trace_entry(trace[n])) for n in line_nums] 495 | 496 | for line_num, syscall_info in parsed: 497 | assert syscall_info is not None 498 | 499 | syscall_type = syscall_info["type"] 500 | pid = syscall_info["pid"] 501 | 502 | if syscall_type == "syscall_enter": 503 | enter_pid_dict[pid] = syscall_info 504 | 505 | elif syscall_type == "syscall_exit": 506 | if pid in enter_pid_dict: 507 | matched_syscalls.append((enter_pid_dict[pid], syscall_info)) 508 | del enter_pid_dict[pid] 509 | else: 510 | #matched_syscalls.append((None, line_num)) 511 | unmatched_exits += 1 512 | 513 | if unmatched_exits > 0: 514 | pr_msg(f"encountered {unmatched_exits} with incomplete trace", level = "INFO") 515 | 516 | for (entry, exit) in matched_syscalls: 517 | errcode = syscall.ret_to_err(exit['syscall_ret']) 518 | if errcode is None: 519 | continue 520 | 521 | f = {'start_time': entry['time'], 522 | 'end_time': exit['time'], 523 | 'errcode': -errcode, 524 | 'syscall_nr': exit['syscall'], 525 | 'pid': exit['pid'], 526 | 'args': entry['syscall_args']} 527 | err_list.append(f) 528 | 529 | return err_list 530 | 531 | # TODO: Combine with kprobes function of remove_untracked_from_snapshot() 532 | def remove_untracked_branches(self, branches: List[Dict]) -> List[Dict]: 533 | # Various kernel code (e.g., context switch) performs complicated call/ret 534 | # interactions. So, we track nesting level based on addresses and not call 535 | # and rets. 536 | tracked_branches = list() 537 | nesting_level = 0 538 | callee_address, callee_sym, ret_to_ip = None, None, None 539 | for b in Pbar("clean trace", branches): 540 | from_ip, to_ip = b['from_ip'], b['to_ip'] 541 | to_sym = to_ip and self.angr_mgr.get_sym(to_ip) 542 | from_insn = self.angr_mgr.get_insn(from_ip) 543 | 544 | if nesting_level == 0: 545 | if not arch.is_call_insn(from_insn): 546 | tracked_branches.append(b) 547 | continue 548 | 549 | # TODO: Do we want to check if the entire symbol is hooked? 550 | if not to_sym or self.angr_mgr.is_ignored_sym(to_sym) or self.angr_mgr.proj.is_hooked(to_ip): 551 | callee_address = to_ip 552 | ret_to_ip = self.angr_mgr.next_insn_addr(from_ip) 553 | nesting_level = 1 554 | tracked_branches.append({'from_ip': from_ip, 'to_ip': None}) 555 | tracked_branches.append({'from_ip': None, 'to_ip': ret_to_ip}) 556 | else: 557 | tracked_branches.append(b) 558 | elif to_ip == ret_to_ip and (from_insn is None or arch.is_ret_insn(from_insn)): 559 | nesting_level -= 1 560 | elif ((not callee_address or from_ip == callee_address) 561 | and (from_insn is not None and arch.is_call_insn(from_insn))): 562 | nesting_level += 1 563 | 564 | return tracked_branches 565 | 566 | @property 567 | def detailed_trace(self) -> bool: 568 | return False -------------------------------------------------------------------------------- /kprobesrecorder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 VMware, Inc. 2 | # SPDX-License-Identifier: BSD-2-Clause 3 | import logging 4 | from typing import Optional, Set, List, Dict, Tuple, Iterable, Any, Union 5 | from collections import deque, defaultdict 6 | 7 | import ptrace 8 | from ptrace.debugger.process import PtraceProcess 9 | from ptrace.syscall.ptrace_syscall import PtraceSyscall 10 | 11 | from arch import arch 12 | from cle.backends import Symbol 13 | from capstone import CsInsn 14 | 15 | from kcore import Kcore 16 | from ftrace import Ftrace 17 | from recorder import Recorder 18 | from prmsg import pr_msg, Pbar, warn_once 19 | 20 | class KProbesRecorder(Recorder): 21 | SKIP_TRACE_EVENTS: List[str] = [ 22 | 'irq/irq_handler_entry', 23 | 'irq_vectors/call_function_entry', 24 | 'irq_vectors/call_function_single_entry', 25 | 'irq_vectors/error_apic_entry', 26 | 'irq_vectors/local_timer_entry', 27 | 'irq_vectors/reschedule_entry', 28 | 'irq_vectors/spurious_apic_entry', 29 | 'irq_vectors/thermal_apic_entry', 30 | 'irq_vectors/threshold_apic_entry', 31 | ] 32 | 33 | RESUME_TRACE_EVENTS: List[str] = [ 34 | 'irq/irq_handler_exit', 35 | 'irq_vectors/call_function_exit', 36 | 'irq_vectors/call_function_single_exit', 37 | 'irq_vectors/error_apic_exit', 38 | 'irq_vectors/local_timer_exit', 39 | 'irq_vectors/reschedule_exit', 40 | 'irq_vectors/spurious_apic_exit', 41 | 'irq_vectors/thermal_apic_exit', 42 | 'irq_vectors/threshold_apic_exit', 43 | ] 44 | 45 | NORETURN_FUNCS = { 46 | '__stack_chk_fail', 47 | 'fortify_panic', 48 | } 49 | 50 | def __init__(self, **kwargs): 51 | self.pending_signals = defaultdict(deque) 52 | self.kprobes = dict() 53 | 54 | kwargs.pop('tmp_path', None) 55 | kwargs['kcore'] = Kcore() 56 | super().__init__(**kwargs) 57 | self.ftrace = Ftrace.main_instance(self.angr_mgr) 58 | 59 | def set_probes(self, addrs:Iterable[int]) -> List[int]: 60 | probes = list() 61 | 62 | for addr in Pbar("setting probes", items=addrs, unit="kprobe"): 63 | if self.ftrace.is_kprobe_blacklisted(addr): 64 | raise ValueError(f'kprobe on {hex(addr)} is blacklisted') 65 | probe = self.get_kprobe(addr = addr, extra = arch.ftrace_state_str) 66 | if probe is None: 67 | logging.error(f'could not set probe on {hex(addr)}') 68 | else: 69 | probes.append(probe) 70 | 71 | probes.sort(key=lambda x: x.addr) 72 | for probe in probes: 73 | probe.enable = True 74 | 75 | return probes 76 | 77 | def get_kprobe(self, 78 | addr: int, 79 | ret: bool = False, 80 | extra: str = ''): 81 | key = (addr, ret) 82 | assert key is not None 83 | 84 | if not self.ftrace.is_valid_kprobe(addr): 85 | return None 86 | 87 | prefix = 'r' if ret else 'p' 88 | ename = f'{prefix}_{hex(addr)}' 89 | 90 | # We always use _stext as the target function, since there might be multiple 91 | # symbols with the same name. 92 | assert self.angr_mgr is not None 93 | target_sym = self.angr_mgr.get_sym('_stext') 94 | offset = addr - target_sym.rebased_addr 95 | assert offset >= 0 96 | 97 | kprobe = self.ftrace.KprobeEvent( 98 | ftrace = self.ftrace, 99 | probe_type = prefix, 100 | event_name = ename, 101 | module_name = '', 102 | target_function = target_sym, 103 | probe_offset = offset, 104 | extra = extra) 105 | 106 | # self.kprobes[key] = kprobe 107 | return kprobe 108 | 109 | def set_ret_probes(self, syms:Set[Symbol]) -> List: 110 | events = list() 111 | for sym in Pbar("setting ret probes", items=syms, unit="symbol"): 112 | e = self.get_kprobe(addr = sym.rebased_addr, ret=True, extra='ret=$retval') 113 | if e is not None: 114 | events.append(e) 115 | 116 | for e in events: 117 | e.enable = True 118 | return events 119 | 120 | def record(self, args:List[str]): 121 | """ 122 | Record function to trace kernel failures using kprobes 123 | 124 | :param args: command line arguments 125 | """ 126 | assert self.angr_mgr is not None 127 | 128 | ftrace = Ftrace.main_instance(self.angr_mgr) 129 | ftrace.tracing_on = False 130 | 131 | stext_addr = self.angr_mgr.get_sym_addr("_stext") 132 | assert stext_addr is not None 133 | ftrace.kprobe_event_disable_all() 134 | 135 | ftrace.init_kprobe_base("_stext", self.angr_mgr.get_sym_addr) 136 | pr_msg("starting the process...", level='TITLE', new_line_before=True) 137 | 138 | try: 139 | self.init_process(args) 140 | except (FileNotFoundError, PermissionError) as e: 141 | pr_msg(f"error starting process: {e}", level="FATAL") 142 | return 0 143 | 144 | ftrace.buffer_size_kb = self.snapshot_size 145 | ftrace.irq_info = False 146 | ftrace.event_fork = False 147 | ftrace.function_fork = False 148 | sys_exit_event = self.set_sysexit_filter(ftrace, True) 149 | ftrace.stacktrace = False 150 | ftrace.func_stack_trace = True 151 | 152 | trace_events = [ 153 | ftrace.get_event(ev) 154 | for ev in ['raw_syscalls/sys_enter'] + self.SKIP_TRACE_EVENTS + self.RESUME_TRACE_EVENTS 155 | ] + [sys_exit_event] 156 | 157 | while True: 158 | # Cleanup if we did not finish nicely the last error 159 | ftrace.remove_all_probes() 160 | ftrace.current_tracer = 'nop' 161 | ftrace.func_filter = [] 162 | ftrace.sym_addr = True 163 | for ev in trace_events: 164 | ev.enable = False 165 | 166 | pr_msg("waiting for failure...", level='TITLE', new_line_before=True) 167 | syscall = self.wait_for_syscall(None) 168 | if syscall is None: 169 | pr_msg("execution ended", level="INFO") 170 | break 171 | 172 | process = syscall.process 173 | ftrace.pid = process.pid 174 | ftrace.event_pid = process.pid 175 | 176 | self.print_syscall_info(syscall) 177 | 178 | pr_msg('stage 1: producing call graph', level='TITLE', new_line_before=True) 179 | ftrace.current_tracer = 'function' 180 | 181 | for ev in trace_events: 182 | ev.enable = True 183 | 184 | try: 185 | snapshot = self.rerun_get_snapshot(process, syscall) 186 | except Exception as e: 187 | pr_msg(f'error: {e}', level="ERROR") 188 | continue 189 | 190 | snapshot = self.cleanup_callstack(snapshot) 191 | snapshot = self.remove_snapshot_irqs(snapshot) 192 | trace_syms = self.get_ftrace_snapshot_syms(snapshot) 193 | 194 | ftrace.tracing_on = False 195 | 196 | pr_msg(f'stage 2: obtaining return values ({len(trace_syms)} functions)', 197 | level='TITLE', new_line_before=True) 198 | 199 | ret_probes = self.set_ret_probes(trace_syms) 200 | trace_syms.intersection_update([self.angr_mgr.get_sym(probe.addr) for probe in ret_probes]) 201 | 202 | if not self.set_func_tracing(trace_syms): 203 | exit(1) 204 | 205 | try: 206 | snapshot = self.rerun_get_snapshot(process, syscall) 207 | except Exception as e: 208 | pr_msg(f'error: {e}', level="ERROR") 209 | continue 210 | 211 | ftrace.remove_all_probes() 212 | snapshot = self.cleanup_callstack(snapshot) 213 | snapshot = self.remove_snapshot_irqs(snapshot) 214 | snapshot = self.remove_untracked_from_snapshot(snapshot) 215 | trace_syms = self.get_ftrace_snapshot_syms(snapshot) 216 | 217 | pr_msg("stage 3: creating trace", level='TITLE', new_line_before=True) 218 | 219 | reachable_syms = self.angr_mgr.process_reachable_syms(trace_syms) 220 | probe_addrs, probe_syms = self.tracking_probe_addrs(reachable_syms) 221 | self.set_ret_probes(probe_syms) 222 | 223 | if not self.set_func_tracing(probe_syms): 224 | exit() 225 | self.set_probes(probe_addrs) 226 | snapshot = self.rerun_get_snapshot(process, syscall) 227 | ftrace.remove_all_probes() 228 | 229 | snapshot = self.cleanup_callstack(snapshot) 230 | snapshot = self.remove_snapshot_irqs(snapshot) 231 | # TODO: Save the reachable syms 232 | snapshot = self.remove_untracked_from_snapshot(snapshot, probe_syms) 233 | 234 | # Save regardless to live analysis 235 | self.log_kprobes_failure(syscall=syscall, 236 | trace=snapshot, 237 | pid=process.pid, 238 | probe_addrs=probe_addrs, 239 | sim_syms=reachable_syms) 240 | 241 | if self.early_stop: 242 | for p in self.dbg.list: 243 | p.kill() 244 | break 245 | 246 | self.save_failures("kprobes") 247 | 248 | # turn everything off again 249 | for ev in trace_events: 250 | ev.enable = False 251 | 252 | ftrace.current_tracer = 'nop' 253 | ftrace.tracing_on = False 254 | ftrace.func_stack_trace = False 255 | ftrace.pid = [] 256 | ftrace.event_pid = [] 257 | sys_exit_event.trigger = None 258 | 259 | def get_ftrace_snapshot_syms(self, snapshot:List[Dict[str,Any]]) -> Set[Symbol]: 260 | assert self.angr_mgr is not None 261 | 262 | syms = {entry['callstack_syms'][0] for entry in snapshot 263 | if entry['type'] == 'func' and 'callstack_syms' in entry} 264 | 265 | syms = {sym for sym in syms if sym and not self.is_invalid_func_probe(sym) and 266 | not self.angr_mgr.is_noprobe_sym(sym)} 267 | 268 | # Ensure we can disasm each symbol 269 | syms = {sym for sym in syms if self.angr_mgr.disasm_sym(sym)} 270 | 271 | return syms 272 | 273 | 274 | def remove_untracked_from_snapshot(self, snapshot:List[Dict], syms:Optional[Set[Symbol]]=None) -> List[Dict]: 275 | assert self.angr_mgr is not None 276 | 277 | entry_syms = {self.angr_mgr.get_sym(s) for s in arch.syscall_entry_points} 278 | found_entry_point = False 279 | cleaned = list() 280 | untracked = 0 281 | ignored_caller_syms = {self.angr_mgr.get_sym(s) for s in arch.syscall_entry_points} 282 | 283 | for l in Pbar("cleaning ftrace", items=snapshot, unit="line"): 284 | to_sym = l['callstack_syms'][0] if len(l.get('callstack_syms', [])) > 0 else None 285 | from_sym = l['callstack_syms'][1] if len(l.get('callstack_syms', [])) > 1 else None 286 | 287 | if not found_entry_point: 288 | if l['type'] != 'func' or from_sym not in entry_syms: 289 | continue 290 | found_entry_point = True 291 | 292 | if l['type'] == 'func': 293 | if untracked > 0: 294 | untracked += 1 295 | continue 296 | 297 | for callstack_sym in l['callstack_syms']: 298 | if callstack_sym and callstack_sym.name in arch.syscall_entry_points: 299 | break 300 | 301 | if (callstack_sym is None or 302 | self.angr_mgr.is_noprobe_sym(callstack_sym) or 303 | (syms is not None and callstack_sym not in syms|entry_syms|ignored_caller_syms)): 304 | untracked = 1 305 | break 306 | 307 | if untracked > 0: 308 | continue 309 | 310 | # Ignore interrupts, exceptions 311 | prev_insn = self.angr_mgr.get_prev_insn(l['from_ip']) 312 | if prev_insn is None or not arch.is_branch_insn(prev_insn): 313 | pr_msg(f'failed insn {prev_insn} to {hex(l["to_ip"])}', level="ERROR") 314 | assert(0 == 1) 315 | continue 316 | 317 | if to_sym is None or (syms is not None and self.is_invalid_func_probe(to_sym)): 318 | untracked = 1 319 | continue 320 | 321 | elif l['type'] == 'ret': 322 | if untracked > 0: 323 | untracked -= 1 324 | continue 325 | 326 | if untracked == 0: 327 | cleaned.append(l) 328 | 329 | return cleaned 330 | 331 | def log_kprobes_failure(self, 332 | syscall: PtraceSyscall, 333 | trace: List[Dict[str, Union[int, str, float, List]]], 334 | pid:int, 335 | probe_addrs:Iterable[int], 336 | sim_syms:Iterable[Symbol]): 337 | failure = { 338 | 'syscall': syscall.syscall, 339 | 'errcode': -syscall.result, 340 | 'trace_id': len(self.traces), 341 | 'pid': pid, 342 | 'probe_addrs': probe_addrs, 343 | 'sim_syms': [s.rebased_addr for s in sim_syms], 344 | } 345 | for trace_entry in trace: 346 | trace_entry.pop('callstack_syms', None) 347 | 348 | # TODO: delete some more useless stuff 349 | 350 | self.traces.append(trace) # type: ignore 351 | self.failures.append(failure) 352 | 353 | def rerun_get_snapshot(self, process:PtraceProcess, failing_syscall:PtraceSyscall) -> List[Dict[str, Any]]: 354 | ftrace = Ftrace.main_instance() 355 | ftrace.clear_snapshot() 356 | ftrace.tracing_on = True 357 | self.restart_syscall(process, failing_syscall) 358 | syscall = self.wait_for_syscall(process) 359 | ftrace.tracing_on = False 360 | 361 | if syscall is None or syscall.result != failing_syscall.result: 362 | raise ValueError("reproduction error") 363 | 364 | assert syscall.process == process 365 | assert syscall.instr_pointer == failing_syscall.instr_pointer 366 | 367 | s = ftrace.get_snapshot(self.SKIP_TRACE_EVENTS, self.RESUME_TRACE_EVENTS) 368 | return s 369 | 370 | def cleanup_callstack(self, trace:List[Dict[str, Any]]) -> List[Dict[str, Any]]: 371 | addr_to_sym:Dict[int, Symbol] = dict() 372 | 373 | def get_sym(addr): 374 | if addr in addr_to_sym: 375 | return addr_to_sym[addr] 376 | 377 | try: 378 | sym = self.angr_mgr.get_sym(addr) 379 | except ValueError: 380 | sym = None 381 | addr_to_sym[addr] = sym 382 | return sym 383 | 384 | last_callstack:List[int] = list() 385 | last_callstack_syms:List[Symbol] = list() 386 | 387 | for l in Pbar("finding symbols", items=trace): 388 | callstack_syms = [] 389 | if 'to_ip' not in l or l.get('type') != 'func': 390 | continue 391 | to_sym = get_sym(l['to_ip']) 392 | from_sym = get_sym(l['from_ip']) 393 | 394 | if to_sym is not None and from_sym is not None: 395 | callstack = [l['to_ip'], l['from_ip']] 396 | callstack_syms = [to_sym, from_sym] 397 | 398 | # TODO: Consider whether we actually save the callstack on return 399 | if 'callstack' not in l or l['callstack'] is None: 400 | continue 401 | 402 | # The callstack is really dirty: There is some junk of ftrace on top 403 | # of to_sym in callstack_sym. Get rid of it. Then the from entry is 404 | # not always there, so we need to check whether to skip it. 405 | skip:Optional[str] = "to" 406 | for callstack_entry in l['callstack']: 407 | sym = callstack_entry and get_sym(callstack_entry) 408 | 409 | if skip == "to": 410 | if sym == to_sym: 411 | skip = "from" 412 | continue 413 | if skip == "from": 414 | skip = None 415 | if sym == from_sym: 416 | continue 417 | 418 | callstack.append(callstack_entry) 419 | callstack_syms.append(sym) 420 | 421 | # Guess the symbols and the addresses we did not figure out from the last stack 422 | for i, v in enumerate(reversed(callstack)): 423 | if v is None and i < len(last_callstack): 424 | callstack[-i-1] = last_callstack[-i-1] 425 | callstack_syms[-i-1] = last_callstack_syms[-i-1] 426 | 427 | last_callstack_syms = callstack_syms 428 | last_callstack = callstack 429 | 430 | l['callstack_syms'] = callstack_syms 431 | l['callstack'] = callstack 432 | return trace 433 | 434 | def wait_for_syscall(self, process:Optional[PtraceProcess]) -> Optional[PtraceSyscall]: 435 | while len(self.dbg.list) != 0: 436 | process_filter = [process] if process is not None else self.dbg.list 437 | stopped = filter(lambda p: p.is_stopped, process_filter) 438 | for p in stopped: 439 | signum = 0 440 | if len(self.pending_signals[p.pid]) != 0: 441 | signum = self.pending_signals[p.pid].popleft() 442 | try: 443 | p.syscall(signum) 444 | except (ptrace.debugger.ProcessExit, ptrace.PtraceError) as exc: 445 | pr_msg(f"error waiting for syscall failure {exc}", level="WARN") 446 | 447 | signum = 0 448 | is_syscall = False 449 | 450 | trapped_process: PtraceProcess 451 | 452 | try: 453 | e = self.dbg.waitSyscall() 454 | is_syscall = True 455 | trapped_process = e.process 456 | except ptrace.debugger.ProcessExit as e: 457 | e.process.processExited(e) 458 | trapped_process = e.process 459 | except ptrace.debugger.ProcessSignal as e: 460 | self.pending_signals[e.process.pid].append(e.signum) 461 | trapped_process = e.process 462 | except ptrace.debugger.NewProcessEvent as e: 463 | e.process.parent.is_stopped = True 464 | trapped_process = e.process 465 | except ptrace.debugger.ProcessExecution as e: 466 | # It should have been marked as stopped, but it is not 467 | e.process.is_stopped = True 468 | trapped_process = e.process 469 | 470 | if not is_syscall: 471 | continue 472 | 473 | if process_filter and trapped_process not in process_filter: 474 | # TODO: queue the process to be resumed or analyzed later, since 475 | # otherwise we might miss failures 476 | continue 477 | 478 | try: 479 | syscall = trapped_process.syscall_state.event(ptrace.func_call.FunctionCallOptions()) 480 | except (ptrace.debugger.ProcessExit, ptrace.PtraceError) as exc: 481 | pr_msg(f'error getting syscall info: {exc}', level='WARN') 482 | continue 483 | 484 | # For syscall entry, the result is None 485 | if syscall.result is None: 486 | continue 487 | 488 | # On reproduction, process is not None and we do not care about the 489 | # result and the syscall. (There might be some strange scenario that 490 | # we do if some signal is involved, but ignore it.) 491 | if process is None: 492 | if self.syscall_filter is not None and self.syscall_filter != syscall.syscall: 493 | continue 494 | 495 | if (syscall.result >= 0 or 496 | (self.errcode_filter and self.errcode_filter != -syscall.result)): 497 | continue 498 | 499 | self.occurrences += 1 500 | if self.occurrences_filter is not None and self.occurrences not in self.occurrences_filter: 501 | continue 502 | 503 | return syscall 504 | 505 | return None 506 | 507 | def remove_snapshot_irqs(self, snapshot:List[Dict]) -> List[Dict]: 508 | """ 509 | Removes all IRQ-related events from a given snapshot, including all 510 | events between an irqenter event and its corresponding irqexit event. 511 | 512 | :param snapshot: A list of dictionaries representing events in the snapshot. 513 | :return: The input snapshot with all IRQ-related events removed. 514 | """ 515 | irq_depth = 0 516 | filtered_snapshot = [] 517 | for event in Pbar("remove irqs", snapshot): 518 | if event['type'] == 'irqenter': 519 | irq_depth += 1 520 | elif event['type'] == 'irqexit': 521 | irq_depth -= 1 522 | elif irq_depth == 0: 523 | filtered_snapshot.append(event) 524 | return filtered_snapshot 525 | 526 | def analyze_probe_insns(self, sym:Symbol) -> Set[CsInsn]: 527 | assert self.angr_mgr is not None 528 | 529 | def collect(sym: Symbol, insn:CsInsn, **kwargs): 530 | assert self.angr_mgr is not None 531 | 532 | # Do not put probes on the first instruction of a function, as we 533 | # have already set a probe on the function. 534 | insns = kwargs['insns'] 535 | if (arch.is_predicated_mov(insn) or arch.is_cond_branch_insn(insn) or 536 | arch.is_rep_insn(insn)): 537 | insns.add(insn) 538 | 539 | if arch.is_rep_insn(insn): 540 | # For rep-prefix, we need to trace the counter on the following 541 | # instruction to figure out how many iterations were executed. 542 | insns.add(self.angr_mgr.next_insn(insn)) 543 | if arch.is_indirect_branch_target(insn): 544 | # We cannot put a probe point on the ENDBRxx instructions. Instead 545 | # put on the next one. Anyhow, we do not care about the first instruction 546 | # in a symbol. 547 | if (insn.address != self.angr_mgr.get_sym_addr(sym) or 548 | not Ftrace.is_available_filter_function(sym)): 549 | insns.add(self.angr_mgr.next_insn(insn)) 550 | elif arch.is_direct_call_insn(insn): 551 | # On calls to functions that cannot be probed, keep the return 552 | # value. We will create an artifical fork based on the return 553 | # value if the return value is the error code. 554 | tgt = arch.get_direct_branch_target(insn) 555 | try: 556 | tgt_sym = self.angr_mgr.get_sym(tgt) 557 | except: 558 | tgt_sym = None 559 | 560 | if (tgt_sym is None or 561 | (tgt_sym.name not in self.NORETURN_FUNCS and 562 | not Ftrace.is_available_filter_function(tgt_sym))): 563 | try: 564 | insns.add(self.angr_mgr.next_insn(insn)) 565 | except: 566 | pass 567 | elif arch.is_indirect_call_insn(insn): 568 | # We might not have the callee as instrumentable. We would add the next 569 | # instruction to the probe list. It would have been better to figure out 570 | # from the trace whether we can actually trace without this probe point. 571 | insns.add(self.angr_mgr.next_insn(insn)) 572 | 573 | insns:Set[CsInsn] = set() 574 | self.angr_mgr.for_each_insn_in_sym(sym, collect, insns=insns) 575 | return insns 576 | 577 | # Returns addresses of probes, set of symbols to trace entry, set of symbols 578 | # to simulate. 579 | def tracking_probe_addrs(self, syms:Set[Symbol]) -> Tuple[Set[int], Set[Symbol]]: 580 | probe_syms:Set[Symbol] = set() 581 | probe_insns:Set[CsInsn] = set() 582 | 583 | for sym in Pbar("find probe points", syms, unit="symbol"): 584 | if self.is_invalid_func_probe(sym): 585 | pr_msg(f"cannot set func probe on {sym.name}", level="DEBUG") 586 | continue 587 | 588 | insns = self.analyze_probe_insns(sym) 589 | cannot_probe = {insn.address for insn in insns if self.is_invalid_probe(insn)} 590 | if len(cannot_probe) == 0: 591 | probe_insns |= insns 592 | probe_syms.add(sym) 593 | else: 594 | cannot_probe_first = next(iter(cannot_probe)) 595 | cannot_probe_addr = (cannot_probe_first if isinstance(cannot_probe_first, int) 596 | else cannot_probe_first.address) 597 | pr_msg(f"cannot set probe on {sym.name} (e.g., {hex(cannot_probe_addr)})", level="DEBUG") 598 | 599 | probe_addrs = {insn.address for insn in probe_insns} - {sym.rebased_addr for sym in probe_syms} 600 | return (probe_addrs, probe_syms) 601 | 602 | def invalid_func_probe_cause(self, sym: Symbol) -> Optional[str]: 603 | assert self.angr_mgr is not None 604 | 605 | ftrace = Ftrace.main_instance() 606 | 607 | if sym is None: 608 | return 'none' 609 | if not ftrace.is_available_filter_function(sym): 610 | return 'func blacklisted' 611 | if self.angr_mgr.is_noprobe_sym(sym): 612 | return 'discarded' 613 | return None 614 | 615 | def is_invalid_func_probe(self, sym: Symbol) -> bool: 616 | return self.invalid_func_probe_cause(sym) is not None 617 | 618 | def is_invalid_probe(self, insn: CsInsn) -> Optional[str]: 619 | addr = insn.address 620 | 621 | # Detect UD2: cannot set kprobes 622 | if insn.bytes == b'\x0f\x0b': 623 | return 'bug' 624 | 625 | # Indirect jumps cannot be patched (possibly due to spectre) 626 | if arch.is_indirect_jmp_insn(insn): 627 | return "indirect-jmp" 628 | 629 | # Check if the address is blacklisted in ftrace 630 | ftrace = Ftrace.main_instance() 631 | if ftrace.main_instance().is_kprobe_blacklisted(addr): 632 | return 'blacklisted' 633 | 634 | # Check if the address is invalid for kprobe (e.g., static key/call) 635 | if ftrace.is_invalid_kprobe_addr(addr): 636 | return 'invalid' 637 | 638 | # If none of the conditions above are met, the probe is valid 639 | return None --------------------------------------------------------------------------------