├── mypy.ini
├── .gitignore
├── arch.py
├── setup.py
├── requirements.txt
├── NOTICE
├── setup.cfg
├── LICENSE
├── controlstateplugin.py
├── ignore_funcs_pure.txt
├── kcore.py
├── prmsg.py
├── syscall_failure_ebpf.c
├── addr2line.py
├── CONTRIBUTING_DCO.md
├── CODE_OF_CONDUCT.md
├── abstractarch.py
├── syscall.py
├── recorder.py
├── README.md
├── simprocedures.py
├── kprobesreporter.py
├── syscall-failure-analyzer.py
├── intelptrecorder.py
├── kallsyms.py
├── reporter.py
├── x86arch.py
├── intelptreporter.py
└── kprobesrecorder.py


/mypy.ini:
--------------------------------------------------------------------------------
1 | # Copyright 2023 VMware, Inc.
2 | # SPDX-License-Identifier: BSD-2-Clause
3 | [mypy]
4 | ignore_missing_imports = True
5 | incremental = True
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore everything in this directory
 2 | *
 3 | # Except this file
 4 | !.gitignore
 5 | !*.py
 6 | !setup.cfg
 7 | !*.c
 8 | !mypy.ini
 9 | !*.txt
10 | 


--------------------------------------------------------------------------------
/arch.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 VMware, Inc.
2 | # SPDX-License-Identifier: BSD-2-Clause
3 | from abstractarch import Arch
4 | from x86arch import ArchX86
5 | 
6 | arch: Arch = ArchX86()


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/bin/python3
2 | # Copyright 2023 VMware, Inc.
3 | # SPDX-License-Identifier: BSD-2-Clause
4 | 
5 | from setuptools import setup
6 | if __name__ == '__main__':
7 |     setup()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 VMware, Inc.
 2 | # SPDX-License-Identifier: BSD-2-Clause
 3 | angr==9.2.25
 4 | ansicolors==1.1.8
 5 | capstone==4.0.2
 6 | claripy==9.2.25
 7 | cle==9.2.25
 8 | numpy==1.23.5
 9 | lz4==4.3.2
10 | pyelftools==0.29
11 | psutil==5.9.4
12 | pyseccomp==0.1.2
13 | pytest==7.2.2
14 | python_ptrace==0.9.8
15 | tqdm==4.64.1
16 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright 2023 VMware, Inc.
2 | 
3 | This product is licensed to you under the BSD 2 clause (the "License"). You may not use this product except in compliance with the License.
4 | 
5 | This product may include a number of subcomponents with separate copyright notices and license terms. Your use of these subcomponents is subject to the terms and conditions of the subcomponent's license, as noted in the LICENSE file.


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = errexp
 3 | version = 0.1
 4 | author = Nadav Amit
 5 | description = Linux syscall kernel error analyzer
 6 | long_description = file: README.rst, CHANGELOG.rst, LICENSE.rst
 7 | keywords = kernel, syscall, error
 8 | license = BSD 2-Clause License
 9 | classifiers =
10 |     Programming Language :: Python :: 3
11 |     Programming Language :: Python :: 3.8
12 |     Programming Language :: Python :: 3.9
13 |     Programming Language :: Python :: 3.10
14 | 
15 | [options]
16 | zip_safe = False
17 | include_package_data = True
18 | packages = find:
19 | python_requires = >=3.8, <4
20 | install_requires =
21 |     angr==9.2.25
22 |     ansicolors==1.1.8
23 |     bcc==0.1.10
24 |     capstone==4.0.2
25 |     claripy==9.2.25
26 |     cle==9.2.25
27 |     numpy==1.23.5
28 |     lz4==4.3.2
29 |     ptrace==1.0.1
30 |     pyelftools==0.29
31 |     psutil==5.9.4
32 |     pyseccomp==0.1.2
33 |     pytest==7.2.2
34 |     python_ptrace==0.9.8
35 |     tqdm==4.64.1
36 | 
37 | [options.package_data]
38 | * = *.txt, *.rst, tests/run_tests.sh, tests/Makefile, tests/src/*
39 | 
40 | [options.entry_points]
41 | console_scripts =
42 |     deeperr = my_package.deeperr:main
43 | 
44 | [options.packages.find]
45 | exclude =
46 |     examples*
47 |     tools*
48 |     docs*
49 |     my_package.tests*
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Redistribution and use in source and binary forms, with or without
 2 | modification, are permitted provided that the following conditions are
 3 | met:
 4 | 
 5 |    1. Redistributions of source code must retain the above copyright
 6 |       notice, this list of conditions and the following disclaimer.
 7 | 
 8 |    2. Redistributions in binary form must reproduce the above
 9 |       copyright notice, this list of conditions and the following
10 |       disclaimer in the documentation and/or other materials provided
11 |       with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
14 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
16 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
17 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
19 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/controlstateplugin.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 VMware, Inc.
 2 | # SPDX-License-Identifier: BSD-2-Clause
 3 | import angr
 4 | import capstone
 5 | import copy
 6 | from typing import Any, Dict, List, Optional
 7 | 
 8 | from arch import arch
 9 | 
10 | class ControlStatePlugin(angr.SimStatePlugin):
11 |     STEP_TIMEOUT: int = 10
12 | 
13 |     def __init__(self, angr_mgr, detailed_trace:bool, branches:List[Dict], done_branches:int):
14 |         super(ControlStatePlugin, self).__init__()
15 |         self.done_branches = done_branches
16 |         self.branches:List[Dict] = branches
17 |         self.backtracking = False
18 |         self.max_depth = 0x10000    # Just if something goes wrong
19 |         self.stop_depth = 0
20 |         self.last_depth = None
21 |         # Save whether the trace is detailed and includes REP instructions and predicated moves
22 |         self.detailed_trace = detailed_trace
23 |         self.only_symbols = None
24 |         self.__last_insn = None
25 |         self.diverged = False
26 |         self.expected_ip:Optional[int] = None
27 |         self.in_simulated = True
28 |         self.no_callees = False
29 |         self.angr_mgr = angr_mgr
30 |         self.arch = arch.controlStatePluginArch()
31 | 
32 |     @angr.SimStatePlugin.memo
33 |     def copy(self, memo) -> 'ControlStatePlugin':
34 |         c = copy.copy(self)
35 |         c.arch = copy.copy(self.arch)
36 |         return c
37 | 
38 |     @property
39 |     def current_branch(self) -> Optional[Dict[str, Any]]:
40 |         assert not self.backtracking
41 |         return None if len(self.branches) == 0 else self.branches[0]
42 | 
43 |     def match_src(self) -> bool:
44 |         br = self.current_branch
45 |         return br is not None and self.last_insn is not None and br['from_ip'] == self.last_insn.address
46 |     
47 |     def update(self, s:angr.SimState):
48 |         ip = self.angr_mgr.state_ip(s)
49 |         self.__last_insn = None if ip is None else self.angr_mgr.get_insn(ip)
50 | 
51 |     @property
52 |     def last_insn(self) -> capstone.CsInsn:
53 |         return self.__last_insn
54 | 
55 |     def trace_finished(self) -> bool:
56 |         return len(self.branches) == 0
57 | 
58 |     def next_branch(self) -> bool:
59 |         if self.trace_finished():
60 |             return False
61 |         self.branches = self.branches[1:]
62 |         self.done_branches += 1
63 |         return not self.trace_finished()
64 | 
65 | 


--------------------------------------------------------------------------------
/ignore_funcs_pure.txt:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | #
  4 | # Some of those are not really pure but we want to ignore their
  5 | # side-effects.
  6 | _raw_spin_lock
  7 | _raw_spin_unlock
  8 | trace_event_raw_event_sys_enter
  9 | schedule
 10 | __schedule
 11 | __cond_resched
 12 | perf_prepare_sample
 13 | __perf_event_header__init_id
 14 | # __rcu_read[un]lock are used by ftrace which means we might lost calls to
 15 | # them.
 16 | __rcu_read_lock
 17 | __rcu_read_unlock
 18 | sched_clock
 19 | sched_clock_cpu
 20 | perf_output_copy
 21 | perf_output_sample
 22 | # Tracing ptrace and exit/entry is wasteful
 23 | ptrace_do_notify
 24 | ptrace_notify
 25 | syscall_trace_enter
 26 | __traceiter_sys_exit
 27 | syscall_exit_work
 28 | syscall_exit_to_user_mode
 29 | # Tracing locks is mostly unnecessary (it might in some corner cases cause
 30 | # simulation to fail though.)
 31 | up_read
 32 | up_write
 33 | down_read
 34 | down_write
 35 | down_write_killable
 36 | _raw_spin_lock_irqsave
 37 | _raw_spin_unlock_irqrestore
 38 | # Memory allocation is just overhead
 39 | kmem_cache_alloc
 40 | kmem_cache_free
 41 | trampoline_handler # Special we still want the callees
 42 | __kmalloc
 43 | kfree
 44 | ptrace_stop
 45 | #__kmalloc_track_caller 
 46 | mutex_lock
 47 | mutex_unlock
 48 | ptrace_notify
 49 | irq_enter_rcu
 50 | call_rcu
 51 | syscall_exit_work # After the syscall was already executed
 52 | scheduler_tick
 53 | update_process_times
 54 | tick_sched_handle
 55 | tick_periodic
 56 | ### CHECK - we ignore them to make kprobe more robust and avoid kernel crashes
 57 | hrtick_update
 58 | rcu_core
 59 | rcu_note_context_switch
 60 | rcu_core_si
 61 | run_rebalance_domains
 62 | profile_tick
 63 | idle_cpu
 64 | __do_softirq
 65 | __kmem_cache_free
 66 | vprintk
 67 | invoke_rcu_core
 68 | module_put
 69 | putname
 70 | nohz_balance_exit_idle
 71 | update_cfg_group
 72 | load_balance
 73 | check_cfs_rq_runtime
 74 | update_blocked_averages
 75 | rebalance_domains
 76 | _printk
 77 | hrtimer_interrupt
 78 | __hrtimer_run_queues
 79 | tick_sched_timer
 80 | tick_sched_do_timer
 81 | irq_exit_rcu
 82 | lapic_next_deadline
 83 | sysvec_apic_timer_interrupt
 84 | cgroup_rstat_updated
 85 | __destroy_inode
 86 | truncate_inode_pages_range
 87 | __const_udelay
 88 | delay_tsc
 89 | destroy_inode
 90 | _raw_spin_unlock_irq
 91 | truncate_inode_pages_final
 92 | __inode_wait_for_writeback
 93 | evict
 94 | __inode_wait_for_writeback
 95 | iput
 96 | perf_trace_buf_alloc
 97 | perf_trace_sys_exit
 98 | perf_trace_run_bpf_submit
 99 | trace_call_bpf
100 | migrate_disable
101 | memcg_account_kmem
102 | perf_iterate_ctx
103 | perf_event_switch_output
104 | __wake_up
105 | local_clock
106 | native_sched_clock
107 | perf_iterate_sb
108 | syscall_enter_from_user_mode
109 | mntput
110 | dput
111 | mntput_no_expire
112 | rb_erase
113 | wakeup_source_unregister
114 | release_sock
115 | __check_object_size
116 | __check_object_size.part.0
117 | current_time
118 | ktime_get_coarse_real_ts64
119 | ext4_inode_csum
120 | __srcu_read_lock
121 | __srcu_read_unlock
122 | fsnotify_destroy_marks
123 | stop_this_handle
124 | mnt_drop_write
125 | dentry_unlink_inode
126 | # Some should be non-pure
127 | crypto_shash_update
128 | chacha_permute
129 | ep_remove
130 | ext4_fc_stop_update
131 | # Need to figure out why the following is not figured out automatically
132 | rcu_read_unlock_strict
133 | __wait_for_common
134 | percpu_down_write


--------------------------------------------------------------------------------
/kcore.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import re
  4 | import logging
  5 | from typing import List, Dict, Optional, Any
  6 | from elftools.elf.elffile import ELFFile
  7 | 
  8 | class Kcore:
  9 |     iomem_regex = re.compile(r'\s*(?P<start>[0-9a-f]+)\-(?P<end>[0-9a-f]+)\s+:\s+(?P<type>[^\n]+)')
 10 |     path = "/proc/kcore"
 11 | 
 12 |     # Singleton instance variable
 13 |     _instance:Optional['Kcore'] = None
 14 |     
 15 |     def __new__(cls):
 16 |         if cls._instance is None:
 17 |             cls._instance = super().__new__(cls)
 18 |         return cls._instance
 19 | 
 20 |     def __init__(self):
 21 |         if not hasattr(self, 'f_kcore'):
 22 |             self.open()
 23 | 
 24 |     def __del__(self):
 25 |         if hasattr(self, 'f_kcore'):
 26 |             self.f_kcore.close()
 27 | 
 28 |     def open_iomem(self):
 29 |         ranges = list()
 30 |         with open("/proc/iomem") as f:
 31 |             for l in f:
 32 |                 m = self.iomem_regex.match(l)
 33 |                 if m is None:
 34 |                     continue
 35 |                 d = m.groupdict()
 36 |                 if d['type'] != 'System RAM':
 37 |                     continue
 38 |                 ranges.append((int(d['start'], 16), int(d['end'], 16)))
 39 | 
 40 |     def open(self) -> bool:
 41 |         try:
 42 |             self.f_kcore = open(self.path, mode='rb')
 43 |         except PermissionError:
 44 |             raise Exception("no access to kcore")
 45 | 
 46 |         elf = ELFFile(self.f_kcore)
 47 | 
 48 |         self.phdr = list()
 49 |         for seg in iter(elf.iter_segments('PT_LOAD')):
 50 |             self.phdr.append(seg.header)
 51 | 
 52 |         self.modules = self.parse_proc_modules()
 53 | 
 54 |         return True
 55 |     
 56 |     def get_offset(self, addr: int) -> int:
 57 |         for s in self.phdr:
 58 |             if s.p_vaddr <= addr and addr < s.p_vaddr + s.p_filesz:
 59 |                 break
 60 | 
 61 |         if s is None:
 62 |             raise ValueError("Address not found")
 63 | 
 64 |         offset = addr - s.p_vaddr
 65 |         return s.p_offset + offset
 66 | 
 67 |     def read(self, addr:int, sz:int) -> bytes:
 68 |         found = None
 69 |         for s in self.phdr:
 70 |             if s.p_vaddr <= addr and addr < s.p_vaddr + s.p_filesz:
 71 |                 found = s
 72 |                 break
 73 | 
 74 |         if found is None:
 75 |             raise ValueError("Address not found")
 76 | 
 77 |         offset = addr - found.p_vaddr
 78 |         self.f_kcore.seek(s.p_offset + offset)
 79 |         try:
 80 |             b = self.f_kcore.read(sz)
 81 |         except:
 82 |             logging.info(f'failed to read kcore at {hex(addr)}')
 83 |             b = bytes()
 84 |         return b
 85 |     
 86 |     def parse_proc_modules(self) -> List[Dict[str, Any]]:
 87 |         modules = []
 88 | 
 89 |         with open('/proc/modules', 'r') as f:
 90 |             for line in f:
 91 |                 parts = line.strip().split(' ')
 92 |                 module_name = parts[0]
 93 |                 module_size = int(parts[1])
 94 |                 module_ref_count = None if parts[2] == '-' else int(parts[2])
 95 |                 module_dependencies = [dep for dep in parts[4].split(',') if dep != '-']
 96 |                 module_state = parts[4]
 97 |                 module_address = int(parts[5], 16)
 98 | 
 99 |                 module_info = {
100 |                     'name': module_name,
101 |                     'size': module_size,
102 |                     'ref_count': module_ref_count,
103 |                     'dependencies': module_dependencies,
104 |                     'state': module_state,
105 |                     'address': module_address
106 |                 }
107 |                 modules.append(module_info)
108 | 
109 |         return modules


--------------------------------------------------------------------------------
/prmsg.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import sys
  4 | import logging
  5 | import tqdm
  6 | import colors
  7 | from time import time
  8 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, TextIO, Sized, Iterable, Union
  9 | 
 10 | level_to_logging = {
 11 |     #           logging-level,  color,      flush log,  stderr,
 12 |     'OP':       (logging.info,  None,       False,      True),
 13 |     'INFO':     (logging.info,  'green',    False,      True),
 14 |     'FATAL':    (logging.fatal, 'red',      True,       True),
 15 |     'ERROR':    (logging.error, 'red',      True,       True),
 16 |     'TITLE':    (None,          'blue',     False,      True),
 17 |     'DATA':     (None,          None,       False,      False),
 18 |     'WARN':     (logging.warning,'yellow',  False,      True),
 19 |     'DEBUG':    (logging.warning,'yellow',  False,      True),
 20 | }
 21 | 
 22 | startup_time = time()
 23 | 
 24 | def uptime() -> float:
 25 |     return time() - startup_time
 26 | 
 27 | output_file:TextIO = sys.stdout
 28 | quiet:bool = False
 29 | debug_mode:bool = False
 30 | 
 31 | def change_output(f_name:str):
 32 |     global output_file
 33 | 
 34 |     try:
 35 |         output_file = open(f_name, 'tw+')
 36 |     except Exception as exc:
 37 |         raise ValueError(f'error opening output file {f_name}: {str(exc)}')
 38 | 
 39 | def pr_msg(msg: str, level:str='INFO', new_line_before:bool=False, new_line_after:bool=False):
 40 |     global output_file
 41 | 
 42 |     l = level_to_logging[level]
 43 |     if l[0] is not None:
 44 |         l[0](msg)
 45 |         if l[2]:
 46 |             logging.getLogger().handlers[0].flush()
 47 |     
 48 |     o_file = sys.stderr if l[3] else output_file
 49 |     std_outputs = o_file in {sys.stderr, sys.stdout}
 50 | 
 51 |     if quiet:
 52 |         return
 53 | 
 54 |     if level == 'DEBUG' and not debug_mode:
 55 |         return
 56 | 
 57 |     if new_line_before or (Pbar.in_pbar != 0 and std_outputs):
 58 |         msg = '\n' + msg
 59 |     if new_line_after or (Pbar.in_pbar != 0 and std_outputs):
 60 |         msg += '\n'
 61 |     if std_outputs and l[1] is not None:
 62 |         msg = colors.color(msg, fg=l[1])
 63 |     print(msg, file=o_file)
 64 | 
 65 | class Pbar(tqdm.tqdm):
 66 |     in_pbar = 0
 67 | 
 68 |     def __init__(self, message:str, items:Optional[Union[Sized, Iterable]]=None, 
 69 |                 total:Optional[int]=None, unit:str='it', ignore_zero:bool=True,
 70 |                 disable:bool=False):
 71 |         assert total is not None or isinstance(items, Sized)
 72 | 
 73 |         if total is None and isinstance(items, Sized):
 74 |             total = len(items)
 75 |     
 76 |         if quiet or (ignore_zero and total == 0):
 77 |             disable = True
 78 | 
 79 |         logging.info(message)
 80 |         super().__init__(iterable=items, total=total, unit=unit, colour="green",
 81 |                         bar_format='{desc:<30.30}{percentage:3.0f}%|{bar:20}{r_bar}',
 82 |                         disable=disable)
 83 |         super().set_description(message)
 84 |         if not disable:
 85 |             Pbar.in_pbar += 1
 86 |         self.pbar_disabled = disable
 87 | 
 88 |     def update_to(self, n:int):
 89 |         super().update(n - self.n)
 90 | 
 91 |     def __disable(self):
 92 |         if not self.pbar_disabled:
 93 |             Pbar.in_pbar -= 1
 94 |         self.pbar_disabled = True
 95 | 
 96 |     def __del__(self):
 97 |         self.__disable()
 98 |         self.update(self.total - self.n)
 99 |         super().__del__()
100 |     
101 |     def __exit__(self, exc_type, exc_value, traceback):
102 |         self.__disable()
103 |         if exc_type == None:
104 |             self.update_to(self.total)
105 |         super().__exit__(exc_type, exc_value, traceback)
106 | 
107 |     def close(self):
108 |         self.__disable()
109 |         super().close()
110 | 
111 | warned_once:Set[str] = set()
112 | 
113 | def warn_once(msg: str):
114 |     if msg not in warned_once:
115 |         return
116 |     logging.warning(msg)
117 |     warned_once.add(msg)


--------------------------------------------------------------------------------
/syscall_failure_ebpf.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 VMware, Inc.
  2 | // SPDX-License-Identifier: BSD-2-Clause
  3 | #include <uapi/linux/ptrace.h>
  4 | #include <linux/sched.h>
  5 | //#include <bpf/bpf_helpers.h>
  6 | #include <bcc/proto.h>
  7 | 
  8 | #define KEY_SYSCALL_NR 1
  9 | #define KEY_ERROR_CODE 2
 10 | #define KEY_PARENT_PID 3
 11 | #define KEY_OCCUR_TIMES 4
 12 | #define KEY_FLAGS 5
 13 | 
 14 | #define STOP_ON_ERROR	(1ull << 0)
 15 | 
 16 | #define MIN_ERROR	((unsigned long)(-1024))
 17 | 
 18 | #if 0
 19 | // Just as a record to the filter format
 20 | struct syscall_filter_t {
 21 |     u64 syscall_nr;
 22 |     u64 error_code;
 23 |     u64 parent_pid;
 24 | };
 25 | #endif
 26 | 
 27 | struct syscall_event_t {
 28 |     u64 pid;
 29 |     u64 syscall_nr;
 30 |     u64 syscall_ret;
 31 |     u64 ts;
 32 | };
 33 | 
 34 | BPF_PERF_OUTPUT(syscall_events);
 35 | BPF_HASH(config_map, u64, u64);
 36 | 
 37 | struct loop_ctx {
 38 |     struct task_struct *task;
 39 |     u64 parent_pid;
 40 |     u32 is_parent;
 41 | };
 42 | 
 43 | static inline u64 check_parent(u32 loop_idx, struct loop_ctx *loop_ctx) {
 44 |     struct task_struct *task = loop_ctx->task;
 45 | 
 46 |     if (task == NULL)
 47 |         return 1;
 48 | 
 49 |     if (task->tgid == loop_ctx->parent_pid) {
 50 |         loop_ctx->is_parent = 1;
 51 |         return 1;
 52 |     }
 53 | 
 54 |     if (task->pid == 1)
 55 |         return 0;
 56 | 
 57 |     task = (struct task_struct *)task->real_parent;
 58 |     loop_ctx->task = task;
 59 |     return 0;
 60 | }
 61 | 
 62 | static inline int is_descendant(u64 pid, u64 parent_pid) {
 63 |     struct loop_ctx loop_ctx;
 64 |     int i;
 65 | 
 66 |     loop_ctx.task = (struct task_struct *)bpf_get_current_task();
 67 |     loop_ctx.parent_pid = parent_pid;
 68 |     loop_ctx.is_parent = 0;
 69 | 
 70 |     //result = bpf_loop(1ul << 29, check_parent, (void *)(long)&loop_ctx, 0);
 71 |     for (i = 0; i < 64; i++) {
 72 |         check_parent(i, &loop_ctx);
 73 |     }
 74 | 
 75 |     return loop_ctx.is_parent;
 76 | }
 77 | 
 78 | int trace_syscalls(struct tracepoint__raw_syscalls__sys_exit *args) {
 79 |     struct syscall_event_t event = {};
 80 |     u64 syscall_nr_req, error_code_req, parent_pid, occur_times, flags;
 81 |     u64 pid = bpf_get_current_pid_tgid() >> 32;
 82 | 
 83 |     u64 key_syscall_nr = KEY_SYSCALL_NR;
 84 |     u64 key_error_code = KEY_ERROR_CODE;
 85 |     u64 key_parent_pid = KEY_PARENT_PID;
 86 |     u64 key_occur_times = KEY_OCCUR_TIMES;
 87 |     u64 key_flags = KEY_FLAGS;
 88 | 
 89 |     u64 *syscall_nr_ptr = config_map.lookup(&key_syscall_nr);
 90 |     u64 *error_code_ptr = config_map.lookup(&key_error_code);
 91 |     u64 *parent_pid_ptr = config_map.lookup(&key_parent_pid);
 92 |     u64 *occur_times_ptr = config_map.lookup(&key_occur_times);
 93 |     u64 *flags_ptr = config_map.lookup(&key_flags);
 94 | 
 95 |     u64 syscall_nr = args->id;
 96 |     u64 syscall_ret = args->ret;
 97 | 
 98 |     if (!syscall_nr_ptr || !error_code_ptr || !parent_pid_ptr || !occur_times_ptr || !flags_ptr)
 99 |         return 0;
100 |     
101 |     syscall_nr_req = *syscall_nr_ptr;
102 |     error_code_req = *error_code_ptr;
103 |     parent_pid = *parent_pid_ptr;
104 | 
105 |     if (syscall_nr != syscall_nr_req && syscall_nr_req != -1ull)
106 |         return 0;
107 | 
108 |     if (syscall_ret < MIN_ERROR)
109 |     	return 0;
110 | 
111 |     if (syscall_ret != error_code_req && error_code_req != -1ull)
112 |         return 0;
113 | 
114 |     if (parent_pid != -1ull && !is_descendant(pid, parent_pid))
115 |         return 0;
116 | 
117 |     occur_times = *occur_times_ptr;
118 |     if (occur_times != -1ull) {
119 |         if (occur_times == 0)
120 |             return 0;
121 | 
122 |         occur_times--;
123 |         config_map.update(&key_occur_times, &occur_times);
124 | 
125 |         if (occur_times != 0)
126 |             return 0;
127 |     }
128 | 
129 |     event.pid = pid;
130 |     event.syscall_nr = syscall_nr;
131 |     event.syscall_ret = syscall_ret;
132 |     event.ts = bpf_ktime_get_ns();
133 |     syscall_events.perf_submit(args, &event, sizeof(event));
134 | 
135 |     flags = *flags_ptr;
136 |     if (flags & STOP_ON_ERROR)
137 |         bpf_send_signal(SIGSTOP);
138 | 
139 |     return 0;
140 | }
141 | 


--------------------------------------------------------------------------------
/addr2line.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import re
  4 | import logging
  5 | import subprocess
  6 | from collections import defaultdict
  7 | from typing import List, Dict, Optional, Tuple
  8 | 
  9 | class Addr2Line:
 10 |     addr2line_loc_re = re.compile(r'(?P<file>[^\:]+):(?P<line>\d+)\s*(?P<disc>.*)')
 11 |     llvm_symbolizer_loc_re = re.compile(r'(?P<file>[^\:]+):(?P<line>\d+):(?P<col>\d+)\s*(?P<disc>.*)')
 12 | 
 13 |     __instance: Optional['Addr2Line'] = None
 14 |     __llvm_symbolizer = 'llvm-symbolizer'
 15 |     __addr2line = 'addr2line'
 16 | 
 17 |     @property
 18 |     def llvm_symbolizer(self) -> str:
 19 |         return self.__llvm_symbolizer
 20 | 
 21 |     @llvm_symbolizer.setter
 22 |     def llvm_symbolizer(self, llvm_symbolizer:str):
 23 |         self.__llvm_symbolizer = llvm_symbolizer
 24 | 
 25 |     @property
 26 |     def addr2line(self) -> str:
 27 |         return self.__addr2line
 28 | 
 29 |     @addr2line.setter
 30 |     def addr2line(self, addr2line:str):
 31 |         self.__addr2line = addr2line
 32 | 
 33 |     @staticmethod
 34 |     def get_instance():
 35 |         """ Static access method. """
 36 |         if Addr2Line.__instance == None:
 37 |             Addr2Line()
 38 |         return Addr2Line.__instance
 39 | 
 40 |     def __init__(self):
 41 |         """ Virtually private constructor. """
 42 |         if Addr2Line.__instance != None:
 43 |             raise Exception("This class is a singleton!")
 44 |         else:
 45 |             Addr2Line.__instance = self
 46 | 
 47 |     def run(self, obj_addrs:List[Tuple[str, int]]) -> Dict[Tuple[str, int], List[Dict]]:
 48 |         # Split the addresses according to the file (the first in the tuple)
 49 |         addr_dict:defaultdict[str, List[int]] = defaultdict(list)
 50 | 
 51 |         for obj, addr in obj_addrs:
 52 |             addr_dict[obj].append(addr)
 53 | 
 54 |         result:Dict[Tuple[str, int], List[Dict]] = {}
 55 |         for obj, addrs in addr_dict.items():
 56 |             addr_args = [hex(a) for a in addrs]
 57 | 
 58 |             # Try llvm-symbolizer first since it gives the column
 59 |             output = None
 60 |             args = [self.llvm_symbolizer, f'--obj={str(obj)}', "--basenames",
 61 |                     '--relativenames', '--print-address', *addr_args]
 62 |             logging.info("running: {0}".format(' '.join(args)))
 63 | 
 64 |             try:
 65 |                 output = subprocess.check_output(
 66 |                     args, stderr=subprocess.STDOUT, timeout=20,
 67 |                     universal_newlines=True)
 68 |             except:
 69 |                 pass
 70 | 
 71 |             line_re = self.llvm_symbolizer_loc_re
 72 | 
 73 |             if output is None:
 74 |                 args = [self.addr2line, '-a', '-f', '-i', '-e', str(obj)]
 75 |                 args.extend(addr_args)
 76 |                 logging.info("running: {0}".format(' '.join(args)))
 77 |                 try:
 78 |                     output = subprocess.check_output(
 79 |                         args, stderr=subprocess.STDOUT, timeout=20,
 80 |                         universal_newlines=True)
 81 |                 except:
 82 |                     raise SystemError(f'Failed to run {self.addr2line} and {self.llvm_symbolizer} on {obj}')
 83 | 
 84 |                 line_re = self.addr2line_loc_re
 85 | 
 86 |             func = None
 87 | 
 88 |             for l in output.splitlines():
 89 |                 if l == "":
 90 |                     continue
 91 |                 elif l.startswith("0x"):
 92 |                     addr = int(l, 16)
 93 |                     func = None
 94 |                     skip = (obj, addr) in result
 95 |                     if not skip:
 96 |                         result[(obj, addr)] = list()
 97 |                 elif func is None:
 98 |                     func = l
 99 |                 elif not skip:
100 |                     m = line_re.match(l)
101 |                     d = m.groupdict()
102 |                     col = int(d['col']) if 'col' in d else None
103 |                     loc = {'func':func, 'file':d['file'], 'line':int(d['line']), 'col':col}
104 |                     result[obj, addr].append(loc)
105 |                     func = None
106 |             
107 |         return result


--------------------------------------------------------------------------------
/CONTRIBUTING_DCO.md:
--------------------------------------------------------------------------------
  1 | # Contributing to syscall-failure-analyzer
  2 | 
  3 | We welcome contributions from the community and first want to thank you for taking the time to contribute!
  4 | 
  5 | Please familiarize yourself with the [Code of Conduct](https://github.com/vmware/.github/blob/main/CODE_OF_CONDUCT.md) before contributing.
  6 | 
  7 | Before you start working with syscall-failure-analyzer, please read our [Developer Certificate of Origin](https://cla.vmware.com/dco). All contributions to this repository must be signed as described on that page. Your signature certifies that you wrote the patch or have the right to pass it on as an open-source patch.
  8 | 
  9 | ## Ways to contribute
 10 | 
 11 | We welcome many different types of contributions and not all of them need a Pull request. Contributions may include:
 12 | 
 13 | * New features and proposals
 14 | * Documentation
 15 | * Bug fixes
 16 | * Issue Triage
 17 | * Answering questions and giving feedback
 18 | * Helping to onboard new contributors
 19 | * Other related activities
 20 | 
 21 | ## Getting started
 22 | 
 23 | This section provides a comprehensive guide on how to contribute to the project by setting up your development environment, and ensuring code quality before submitting a pull request. Though the project is in Python, which simplifies the build process, it's crucial to follow these guidelines for a smooth collaboration.
 24 | 
 25 | ### Development Environment Setup
 26 | 
 27 | 1. **Clone the Repository:** Clone the repository to your local machine using the following command in your terminal:
 28 | 
 29 |     ```bash
 30 |     git clone https://github.com/vmware-labs/syscall-failure-analyzer
 31 |     ```
 32 | 
 33 | 2. **Navigate to the Project Directory:**
 34 | 
 35 |     ```bash
 36 |     cd your-repository
 37 |     ```
 38 | 
 39 | 3. **Install Required Packages:** Use `pip` to install the required Python packages:
 40 | 
 41 |     ```bash
 42 |     pip install -r requirements.txt
 43 |     ```
 44 | 
 45 | ### Ensuring Code Quality
 46 | 
 47 | Before submitting a pull request, make sure that your code adheres to the following guidelines:
 48 | 
 49 | - **No MyPy Warnings:** Your code should not produce any MyPy warnings. Run the following command to check:
 50 | 
 51 |     ```bash
 52 |     mypy .
 53 |     ```
 54 | 
 55 |     If you see any warnings, correct the type annotations to resolve them before submitting your pull request.
 56 | 
 57 | ### Submitting a Pull Request
 58 | 
 59 | 1. **Create a New Branch:**
 60 | 
 61 |     ```bash
 62 |     git checkout -b your-feature-branch
 63 |     ```
 64 | 
 65 | 2. **Add and Commit Your Changes:**
 66 | 
 67 |     ```bash
 68 |     git add .
 69 |     git commit --signoff -m "Your commit message"
 70 |     ```
 71 | 
 72 | 3. **Push the Changes:**
 73 | 
 74 |     ```bash
 75 |     git push origin your-feature-branch
 76 |     ```
 77 | 
 78 | 4. Navigate to the original repository and create a new pull request. Compare the original `main` or `master` branch with your `your-feature-branch`.
 79 | 
 80 | 5. After submitting the pull request, maintainers will review your changes. Upon approval, your code will be merged into the main codebase.
 81 | 
 82 | ### Common Issues
 83 | 
 84 | Currently, there are no common issues to be aware of. As the project evolves, this section will be updated accordingly.
 85 | 
 86 | ### Testing
 87 | 
 88 | As of now, the project does not have automated tests. Please disregard this section until tests are added to the repository.
 89 | 
 90 | ## Contribution Flow
 91 | 
 92 | This is a rough outline of what a contributor's workflow looks like:
 93 | 
 94 | * Make a fork of the repository within your GitHub account
 95 | * Create a topic branch in your fork from where you want to base your work
 96 | * Make commits of logical units
 97 | * Make sure your commit messages are with the proper format, quality and descriptiveness (see below)
 98 | * Push your changes to the topic branch in your fork
 99 | * Create a pull request containing that commit
100 | 
101 | We follow the GitHub workflow and you can find more details on the [GitHub flow documentation](https://docs.github.com/en/get-started/quickstart/github-flow).
102 | 
103 | ### Pull Request Checklist
104 | 
105 | Before submitting your pull request, we advise you to use the following:
106 | 
107 | 1. Check if your code changes will pass both code linting checks and unit tests.
108 | 2. Ensure your commit messages are descriptive. We follow the conventions on [How to Write a Git Commit Message](http://chris.beams.io/posts/git-commit/). Be sure to include any related GitHub issue references in the commit message. See [GFM syntax](https://guides.github.com/features/mastering-markdown/#GitHub-flavored-markdown) for referencing issues and commits.
109 | 3. Check the commits and commits messages and ensure they are free from typos.
110 | 
111 | ## Reporting Bugs and Creating Issues
112 | 
113 | For specifics on what to include in your report, please follow the guidelines in the issue and pull request templates when available.
114 | 
115 | 
116 | ## Ask for Help
117 | 
118 | The best way to reach us with a question when contributing is to ask on:
119 | 
120 | * The original GitHub issue
121 | 
122 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in syscall-failure-analyzer project and our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at oss-coc@vmware.com.
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series
 85 | of actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or
 92 | permanent ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior,  harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within
112 | the community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0, available at
118 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
119 | 
120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
121 | enforcement ladder](https://github.com/mozilla/diversity).
122 | 
123 | [homepage]: https://www.contributor-covenant.org
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | https://www.contributor-covenant.org/faq. Translations are available at
127 | https://www.contributor-covenant.org/translations.
128 | 


--------------------------------------------------------------------------------
/abstractarch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | from typing import Tuple, Union, Callable, Set, List, Dict, Any, Optional, Iterable
  4 | from abc import ABC, abstractmethod
  5 | 
  6 | import angr
  7 | import capstone
  8 | 
  9 | class ControlStatePluginArch(ABC):
 10 |     def __init__(self):
 11 |         pass
 12 | 
 13 |     @abstractmethod
 14 |     def copy(self) -> 'ControlStatePluginArch':
 15 |         pass
 16 | 
 17 | class Arch(ABC):
 18 |     def __init__(self):
 19 |         pass
 20 |     
 21 |     @abstractmethod
 22 |     def init_capstone(self) -> capstone.Cs:
 23 |         pass
 24 | 
 25 |     @property
 26 |     @abstractmethod
 27 |     def default_text_base(self) -> int:
 28 |         pass
 29 | 
 30 |     @abstractmethod
 31 |     def is_call_insn(self, insn: capstone.CsInsn) -> bool:
 32 |         pass
 33 | 
 34 |     @abstractmethod
 35 |     def is_ret_insn(self, insn: capstone.CsInsn) -> bool:
 36 |         pass
 37 | 
 38 |     @abstractmethod
 39 |     def is_branch_insn(self, insn: capstone.CsInsn) -> bool:
 40 |         pass
 41 | 
 42 |     @abstractmethod
 43 |     def is_indirect_branch_insn(self, insn: capstone.CsInsn) -> bool:
 44 |         pass
 45 | 
 46 |     @abstractmethod
 47 |     def is_direct_call_insn(self, insn: capstone.CsInsn) -> bool:
 48 |         pass
 49 | 
 50 |     def is_indirect_call_insn(self, insn:capstone.CsInsn) -> bool:
 51 |         return self.is_call_insn(insn) and not self.is_direct_call_insn(insn)
 52 | 
 53 |     @abstractmethod
 54 |     def is_rep_insn(self, insn) -> bool:
 55 |         pass
 56 | 
 57 |     @property
 58 |     @abstractmethod
 59 |     def arch_name(self) -> str:
 60 |         pass
 61 | 
 62 |     @abstractmethod
 63 |     def pyvex_workaround(self, insn:capstone.CsInsn) -> Tuple[Union[Callable, None],  bool]:
 64 |         pass
 65 | 
 66 |     @abstractmethod
 67 |     def nop_insn(self, size:int) -> bytes:
 68 |         pass
 69 | 
 70 |     @abstractmethod
 71 |     def is_predicated_mov(self, insn) -> bool:
 72 |         pass
 73 | 
 74 |     @property
 75 |     @abstractmethod
 76 |     def syscall_entry_points(self) -> Set[str]:
 77 |         pass
 78 | 
 79 |     @abstractmethod
 80 |     def get_direct_branch_target(self, insn:capstone.CsInsn) -> int:
 81 |         pass
 82 | 
 83 |     @abstractmethod
 84 |     def is_jmp_insn(self, insn) -> bool:
 85 |         pass
 86 |  
 87 |     @abstractmethod
 88 |     def is_indirect_jmp_insn(self, insn) -> bool:
 89 |         pass
 90 |  
 91 |     def is_direct_jmp_insn(self, insn) -> bool:
 92 |         return self.is_jmp_insn(insn) and not self.is_indirect_jmp_insn(insn)
 93 | 
 94 |     @abstractmethod 
 95 |     def is_iret_insn(self, insn:capstone.CsInsn) -> bool:
 96 |         pass
 97 |     
 98 |     @abstractmethod
 99 |     def is_sysexit_sysret_insn(self, insn:capstone.CsInsn) -> bool:
100 |         pass
101 | 
102 |     @abstractmethod
103 |     def is_fixed_rep_insn(self, insn:capstone.CsInsn) -> bool:
104 |         pass
105 | 
106 |     @property
107 |     @abstractmethod
108 |     def ftrace_state_str(self) -> str:
109 |         pass
110 |     
111 |     @abstractmethod
112 |     def ftrace_state_dict(self, d:Dict[str, Any]) -> Dict[str, Any]:
113 |         pass
114 | 
115 |     @property
116 |     @abstractmethod
117 |     def stack_end(self) -> int:
118 |         pass
119 |     
120 |     @property
121 |     @abstractmethod
122 |     def per_cpu_reg(self) -> str:
123 |         pass
124 | 
125 |     @property
126 |     @abstractmethod
127 |     def per_cpu_offset(self) -> int:
128 |         pass
129 | 
130 |     @property
131 |     @abstractmethod
132 |     def stack_reg(self) -> str:
133 |         pass
134 | 
135 |     @property
136 |     @abstractmethod
137 |     def ret_reg_name(self) -> str:
138 |         pass
139 | 
140 |     @property
141 |     @abstractmethod
142 |     def stack_related_reg_names(self) -> List[str]:
143 |         pass
144 | 
145 |     @property
146 |     @abstractmethod
147 |     def ip_reg_name(self) -> str:
148 |         pass
149 | 
150 |     @abstractmethod
151 |     def is_cond_branch_insn(self, insn:capstone.CsInsn) -> bool:
152 |         pass
153 | 
154 |     @abstractmethod
155 |     def is_direct_branch_insn(self, insn:capstone.CsInsn) -> bool:
156 |         pass
157 | 
158 |     @abstractmethod
159 |     def is_indirect_branch_target(self, insn:capstone.CsInsn) -> bool:
160 |         pass
161 | 
162 |     @abstractmethod
163 |     def is_cond_jmp_insn(self, insn:capstone.CsInsn) -> bool:
164 |         pass
165 | 
166 |     @abstractmethod
167 |     def is_cond_jmp_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool:
168 |         pass
169 |     
170 |     @abstractmethod
171 |     def is_loop_insn(self, insn:capstone.CsInsn) -> bool:
172 |         pass
173 |     
174 |     @abstractmethod
175 |     def is_loop_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool:
176 |         pass
177 | 
178 |     @abstractmethod
179 |     def rep_iterations(self, insn:capstone.CsInsn, state:Dict) -> int:
180 |         pass
181 | 
182 |     @property
183 |     @abstractmethod
184 |     def syscall_insn_len(self) -> int:
185 |         pass
186 |   
187 |     @abstractmethod
188 |     def controlStatePluginArch(self) -> ControlStatePluginArch:
189 |         pass
190 | 
191 |     @property
192 |     @abstractmethod
193 |     def page_size(self) -> int:
194 |         pass
195 | 
196 |     @abstractmethod
197 |     def parse_interrupt_table(self, proj:angr.Project) -> Dict[int, int]:
198 |         pass
199 | 
200 |     @abstractmethod
201 |     def init_symbols(self, proj:angr.Project) -> None:
202 |         pass
203 | 
204 |     @abstractmethod
205 |     def is_exception_vector(self, vector:int) -> bool:
206 |         pass
207 | 
208 |     @property
209 |     @abstractmethod
210 |     def irq_exit_sym_names(self) -> Set[str]:
211 |         pass
212 | 
213 |     @property
214 |     @abstractmethod
215 |     def address_width(self) -> int:
216 |         pass


--------------------------------------------------------------------------------
/syscall.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import errno
  4 | from collections import defaultdict
  5 | from typing import Any, List, Optional, Union, DefaultDict
  6 | 
  7 | from ptrace.syscall.ptrace_syscall import SYSCALL_NAMES
  8 | from prmsg import pr_msg
  9 | 
 10 | def str_to_int(s) -> Optional[int]:
 11 |     """
 12 |     Convert a string to an integer. Supports base 10 and hexadecimal numbers.
 13 | 
 14 |     Args:
 15 |         s (str): The input string.
 16 | 
 17 |     Returns:
 18 |         Optional[int]: The integer value of the string, or None if conversion fails.
 19 |     """
 20 |     
 21 |     if not isinstance(s, str):
 22 |         return None
 23 |     if s.startswith("0x"):
 24 |         return int(s, 16)
 25 |     try:
 26 |         return int(s)
 27 |     except:
 28 |         return None
 29 | 
 30 | def ret_to_err(ret: Union[str,int]) -> Optional[int]:
 31 |     """
 32 |     Convert a return value to an error code.
 33 | 
 34 |     Args:
 35 |         ret (any): The input return value.
 36 | 
 37 |     Returns:
 38 |         Optional[int]: The error code, or None if the conversion fails.
 39 |     """
 40 |     v:Optional[int] = None
 41 | 
 42 |     if isinstance(ret, int):
 43 |         v = ret
 44 |     else:
 45 |         v = str_to_int(ret)
 46 |         if v is None:
 47 |             return None
 48 | 
 49 |     assert(v is not None)
 50 | 
 51 |     if v < 0:
 52 |         return v
 53 |     if v > (1 << 64) - 1024:
 54 |         return -((1 << 64) - v)
 55 |     return None
 56 | 
 57 | 
 58 | class SyscallInfo:
 59 |     syscall_numbers:DefaultDict[str,List[int]] = defaultdict(list)
 60 | 
 61 |     @staticmethod
 62 |     def get_name(n:int) -> str:
 63 |         """
 64 |         Get the syscall name associated with a syscall number.
 65 | 
 66 |         Args:
 67 |             n (int): The syscall number.
 68 | 
 69 |         Returns:
 70 |             str: The syscall name.
 71 |         """
 72 |         if n is None:
 73 |             return None
 74 |         return SYSCALL_NAMES.get(n, str(n))
 75 | 
 76 |     @staticmethod
 77 |     def get_syscall_nr(syscall:str) -> int:
 78 |         """
 79 |         Get the syscall number associated with a syscall name or number string.
 80 | 
 81 |         Args:
 82 |             syscall (str): The syscall name or number string.
 83 | 
 84 |         Returns:
 85 |             Optional[int]: The syscall number, or None if the syscall is not found.
 86 |         """
 87 |         if syscall is None:
 88 |             return None
 89 | 
 90 |         if syscall.isnumeric():
 91 |             return int(syscall)
 92 | 
 93 |         if len(SyscallInfo.syscall_numbers) == 0:
 94 |             SyscallInfo.syscall_numbers = defaultdict(list)
 95 |             for number, name in SYSCALL_NAMES.items():
 96 |                 SyscallInfo.syscall_numbers[name.lower()].append(number)
 97 | 
 98 |         syscalls = SyscallInfo.syscall_numbers[syscall.lower()]
 99 |         if len(syscalls) > 1:
100 |             pr_msg(f'Found multiple syscalls for {syscall}: {syscalls}; using {syscalls[0]}', level='WARN')
101 |         elif len(syscalls) == 0:
102 |             raise ValueError(f'Could not find syscall {syscall}')
103 | 
104 |         return syscalls[0]
105 | 
106 | class ErrorcodeInfo: 
107 |     error_numbers:Optional[DefaultDict[str,List]] = None
108 | 
109 |     extra_error_codes = {
110 |         512: 'ERESTARTSYS',
111 |         513: 'ERESTARTNOINTR',
112 |         514: 'ERESTARTNOHAND',
113 |         515: 'ENOIOCTLCMD',
114 |         516: 'ERESTART_RESTARTBLOCK',
115 |         517: 'EPROBE_DEFER',
116 |         518: 'EOPENSTALE',
117 |         519: 'ENOPARAM',
118 |         521: 'EBADHANDLE',
119 |         522: 'ENOTSYNC',
120 |         523: 'EBADCOOKIE',
121 |         524: 'ENOTSUPP',
122 |         525: 'ETOOSMALL',
123 |         526: 'ESERVERFAULT',
124 |         527: 'EBADTYPE',
125 |         528: 'EJUKEBOX',
126 |         529: 'EIOCBQUEUED',
127 |         530: 'ERECALLCONFLICT',
128 |         531: 'ENOGRACE'
129 |     }
130 | 
131 |     @staticmethod
132 |     def get_name(n:int) -> str:
133 |         """
134 |         Get the error string associated with an error code.
135 | 
136 |         Args:
137 |             n (int): The error code.
138 | 
139 |         Returns:
140 |             str: The error string.
141 |         """
142 |         if n is None:
143 |             return None
144 |         if n < 0:
145 |             n = -n
146 |         if n in errno.errorcode:
147 |             return errno.errorcode[n]
148 |         if n in ErrorcodeInfo.extra_error_codes:
149 |             return ErrorcodeInfo.extra_error_codes[n]
150 |         return str(n)
151 | 
152 |     @staticmethod
153 |     def get_errno(err:str) -> Optional[int]:
154 |         """
155 |         Get the error code associated with an error string.
156 | 
157 |         Args:
158 |             err (str): The error string.
159 | 
160 |         Returns:
161 |             Optional[int]: The error code, or None if the error is not found.
162 |         """
163 |         if err is None or len(err) == 0:
164 |             return None
165 | 
166 |         if err[0] == '-':
167 |             err = err[1:]
168 | 
169 |         if err.isnumeric():
170 |             return int(err)
171 |         
172 |         if err.startswith('0x'):
173 |             return (1 << 64) - int(err, 16)
174 | 
175 |         # string
176 |         if ErrorcodeInfo.error_numbers is None:
177 |             ErrorcodeInfo.error_numbers = defaultdict(list)
178 |             items = errno.errorcode.items() | ErrorcodeInfo.extra_error_codes.items()
179 |             for number, name in items:
180 |                 ErrorcodeInfo.error_numbers[name.lower()].append(number)
181 | 
182 |         errnos = ErrorcodeInfo.error_numbers[err.lower()]
183 |         if len(errnos) == 0:
184 |             pr_msg(f'Could not find error {err}', level='ERROR')
185 |             return None
186 | 
187 |         return errnos[0]
188 | 
189 |     @staticmethod
190 |     def is_error_code(v: int, errcode: int) -> bool:
191 |         """
192 |             Check if a value matches an error code.
193 | 
194 |             Args:
195 |                 v (int): The value to check.
196 |                 errcode
197 |             (int): The error code to compare.
198 |         """
199 |         if v < 0:
200 |             v += 1 << 64
201 | 
202 |         mask32 = (1 << 32) - 1 
203 |         v_low = v & mask32
204 |         v_high = (v >> 32) & mask32
205 |         return v_low == ((1 << 32) - errcode) and (v_high == mask32 or v_high == 0)


--------------------------------------------------------------------------------
/recorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import logging
  4 | from typing import Optional, List, Dict, Any, Set, Iterable, Tuple, Union
  5 | import os
  6 | import pathlib
  7 | import pickle
  8 | import gzip
  9 | import io
 10 | import lz4.frame
 11 | 
 12 | from collections import defaultdict
 13 | import ptrace
 14 | import ptrace.debugger.child
 15 | import ptrace.debugger.process
 16 | import ptrace.syscall.ptrace_syscall
 17 | import ptrace.tools
 18 | 
 19 | from arch import arch
 20 | from angrmgr import Angr
 21 | from cle.backends import Symbol
 22 | from ftrace import Ftrace
 23 | from kallsyms import Kallsyms
 24 | from kcore import Kcore
 25 | from prmsg import pr_msg
 26 | from ptrace.syscall.ptrace_syscall import PtraceSyscall, SYSCALL_NAMES
 27 | 
 28 | class Recorder:
 29 |     def __init__(
 30 |         self,
 31 |         perf: str,
 32 |         output: str,
 33 |         kcore: 'Kcore',
 34 |         objs: List[io.BufferedReader],
 35 |         snapshot_size: int,
 36 |         syscall_filter: Optional[int],
 37 |         errcode_filter: Optional[int],
 38 |         occurrences_filter: Optional[Set[int]],
 39 |         debug: bool,
 40 |         save_kcore: bool,
 41 |         early_stop: bool,
 42 |     ):
 43 |         self.output = output
 44 |         self.failures: List[Dict] = []
 45 |         self.snapshot_size = max(snapshot_size, 128 * 1024)
 46 |         self.dbg = ptrace.debugger.debugger.PtraceDebugger()
 47 |         self.perf = perf
 48 |         self.syscall_filter = syscall_filter
 49 |         self.errcode_filter = errcode_filter
 50 |         self.occurrences_filter = occurrences_filter
 51 |         self.occurrences = 0
 52 |         self.debug = debug
 53 |         self.traces: List[Union[List[Dict[str, Union[int, str, float]]], str]] = []
 54 |         self.save_kcore = save_kcore
 55 |         self.early_stop = early_stop
 56 | 
 57 |         pr_msg('init kallsyms...', level='OP')
 58 |         self.rename_old_res_file(self.output)
 59 |         
 60 |         self.angr_mgr: Optional[Angr] = None
 61 |         self.kallsyms: Optional[Kallsyms] = None
 62 | 
 63 |         if kcore is not None:
 64 |             self.kallsyms = Kallsyms(objs = objs)
 65 |             self.angr_mgr = Angr(kallsyms = self.kallsyms,
 66 |                                 kcore = kcore,
 67 |                                 saved_segs = None)
 68 | 
 69 |         # Need to massage some syscall names to match those in ftrace
 70 |         self.syscall_special_event : Dict[str, str] = {
 71 |             'sendfile': 'sendfile64',
 72 |         }
 73 | 
 74 |     def detach_all_processes(self):
 75 |         if self.dbg is None:
 76 |             return
 77 |         for p in self.dbg.list:
 78 |             p.detach()
 79 | 
 80 |     def save_failures(self, type_str:str):
 81 |         if len(self.failures) == 0:
 82 |             return
 83 | 
 84 |         pr_msg(f'saving {len(self.failures)} failures...', level='INFO')
 85 | 
 86 |         data:Dict[str, Any] = {
 87 |             'type': type_str,
 88 |             'failures': self.failures,
 89 |             'traces': self.traces,
 90 |         }
 91 | 
 92 |         if self.save_kcore:
 93 |             assert isinstance(self.angr_mgr, Angr)
 94 |             data.update({
 95 |                 'kcore': self.angr_mgr.save(),
 96 |                 'kallsyms': self.kallsyms,
 97 |             })
 98 | 
 99 |         try:
100 |             with lz4.frame.open(self.output, 'wb') as f:
101 |                 pickle.dump(data, f)
102 |         except IOError:
103 |             pr_msg("error writing to result file", level="ERROR")
104 | 
105 |     def set_sysexit_filter(self, ftrace_instance:Ftrace, snapshot:bool):
106 |         e_class, e_subclass, filter = self.get_filter_string(exit=True)
107 |         syscall_event = ftrace_instance.get_event(f'{e_class}/{e_subclass}')
108 |         syscall_event.filter = filter
109 |         if snapshot:
110 |             syscall_event.trigger = f'snapshot if {filter}'
111 |         return syscall_event
112 | 
113 |     def restart_syscall(self, process:ptrace.debugger.process.PtraceProcess, syscall:PtraceSyscall):
114 |         rip = process.getInstrPointer()
115 |         process.setInstrPointer(rip - arch.syscall_insn_len)
116 |         process.setreg(arch.ret_reg_name, syscall.syscall)
117 | 
118 |     def print_syscall_info(self, syscall:PtraceSyscall):
119 |         msg = f'syscall "{syscall.name}" ({syscall.syscall}) failed with error [{syscall.result_text}]'
120 | 
121 |         pr_msg(msg, level="INFO", new_line_before=True)
122 |         syscall_args = [hex(arg.value) for arg in syscall.arguments]
123 |         msg = 'failing syscall args: {0}'.format(', '.join(syscall_args))
124 |         pr_msg(msg, level="INFO", new_line_after=True)
125 | 
126 |     def set_func_tracing(self, syms: Iterable[Symbol]) -> bool:
127 |         ftrace = Ftrace.main_instance()
128 | 
129 |         # We cannot set function filters on cold symbols, and anyhow it is
130 |         # meaningless, so ignore it silently.
131 |         filter_sym_names = {sym.name for sym in syms if not sym.name.endswith('.cold')}
132 |         success = True
133 |         pr_msg(f'setting function filters ({len(filter_sym_names)} functions)...',
134 |                 level="OP")
135 |         try:
136 |             s = list(filter_sym_names) 
137 |             ftrace.func_filter = s
138 |             ftrace.current_tracer = 'function'
139 |         except OSError as e:
140 |             success = False
141 |             pr_msg(f'cannot set function filter: {e}', level="ERROR", new_line_before=True)
142 |         except Exception as e:
143 |             success = False
144 |             pr_msg(f'cannot set function filter: {e}', level="ERROR", new_line_before=True)
145 | 
146 |         return success
147 | 
148 |     def rename_old_res_file(self, output:str):
149 |         res_file_path = pathlib.Path(output)
150 |         if res_file_path.exists():
151 |             try:
152 |                 res_file_path.rename(str(res_file_path)+".old")
153 |             except Exception as e:
154 |                 pr_msg(f'error renaming result file {str(res_file_path)}',
155 |                         level="FATAL")
156 |                 raise e
157 | 
158 | 
159 |     def init_process(self, args:'list[str]'):
160 |         args[0] = ptrace.tools.locateProgram(args[0])
161 |         if not os.path.isfile(args[0]):
162 |             raise FileNotFoundError(f"Error: file {args[0]} does not exist")
163 |         if not os.access(args[0], os.X_OK):
164 |             raise PermissionError(f'Error: file {args[0]} not executable')
165 | 
166 |         pid = ptrace.debugger.child.createChild(args, False, env=os.environ.copy())
167 |         self.dbg.traceExec()
168 |         self.dbg.traceClone()
169 |         self.dbg.traceFork()
170 |         self.dbg.addProcess(pid, is_attached=True)
171 |         self.monitored_pid = pid
172 | 
173 |     def get_filter_string(self, exit:bool) -> Tuple[str, str, Optional[str]]:
174 |         if exit:
175 |             filter = 'ret<0' if self.errcode_filter is None else f'ret=={-self.errcode_filter}'
176 |         else:
177 |             filter = ''
178 | 
179 |         enter_or_exit = 'enter' if not exit else 'exit'
180 | 
181 |         e_class, e_subclass = 'raw_syscalls', f'sys_{enter_or_exit}'
182 |         if self.syscall_filter is not None:
183 |             syscall_name = SYSCALL_NAMES.get(self.syscall_filter, None)
184 |             if syscall_name is not None:
185 |                 if syscall_name in self.syscall_special_event:
186 |                     syscall_name = self.syscall_special_event[syscall_name]
187 | 
188 |                 e_class, e_subclass = 'syscalls', f'sys_{enter_or_exit}_{syscall_name}'
189 |             else:
190 |                 filter += f'&&id=={self.syscall_filter}'
191 | 
192 |         return e_class, e_subclass, filter if filter != '' else None


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Syscall failure analyzer
  2 | 
  3 | ## Overview
  4 | 
  5 | Syscall failure analyzer is a tool for root-cause analysis of syscall failures.
  6 | The tool generates a callstack of the condition that triggered the syscall
  7 | failure.
  8 | 
  9 | The analysis is performed by tracking of branches that were taken during the
 10 | invocation of the syscall and replaying the syscall.
 11 | 
 12 | ## Try it out
 13 | 
 14 | ### Prerequisites
 15 | 
 16 | The installation steps and prerequisites provided in this document have been
 17 | primarily tested on Ubuntu. While Ubuntu itself is based on Debian, and
 18 | therefore the instructions are expected to work on Debian-based distributions,
 19 | there might be subtle differences.
 20 | 
 21 | If you are using another Linux distribution or a different package manager, the
 22 | package names and installation steps may vary. In such cases, we encourage you
 23 | to contribute by documenting the steps for your specific distribution in the
 24 | [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md) file.
 25 | 
 26 | Feel free to integrate this snippet into your documentation where it fits best.
 27 | 
 28 | 1. Install `binutils` and `bcc`, which are required for
 29 |    tracing and analysis.
 30 |    ```bash
 31 |    sudo apt install binutils libcapstone3 bpfcc-tools python3-bpfcc
 32 | 
 33 |    # perf is only needed for recording traces using Intel PT. If you are using a
 34 |    # custom kernel, do not install linus-tools-`uname -r` since it would fail.
 35 |    sudo apt install linux-tools-common linux-tools-generic linux-tools-`uname -r`
 36 |    ```
 37 | 
 38 | 2. Install `libcapstone4` or `libcapstone3`.
 39 |    ```bash
 40 |    sudo apt install libcapstone4 || sudo apt install libcapstone3
 41 |    ```
 42 | 
 43 | 3. Install the kernel debug symbols
 44 |    ```bash
 45 |    codename=$(lsb_release -c | awk  '{print $2}')
 46 |    sudo tee /etc/apt/sources.list.d/ddebs.list << EOF
 47 |    deb http://ddebs.ubuntu.com/ ${codename}      main restricted universe multiverse
 48 |    deb http://ddebs.ubuntu.com/ ${codename}-security main restricted universe multiverse
 49 |    deb http://ddebs.ubuntu.com/ ${codename}-updates  main restricted universe multiverse
 50 |    deb http://ddebs.ubuntu.com/ ${codename}-proposed main restricted universe multiverse
 51 |    EOF
 52 | 
 53 |    wget -O - http://ddebs.ubuntu.com/dbgsym-release-key.asc | sudo apt-key add -
 54 | 
 55 |    sudo apt update
 56 |    sudo apt install linux-image-`uname -r`-dbgsym
 57 |    ```
 58 | 
 59 | 4. *Recommended:* Installing the Linux source code is essential for any
 60 |    meaningful analysis using syscall-failure-analyzer output. If you have access to the source code
 61 |    through a custom kernel or other means, this step can be skipped. Please note
 62 |    that while the source code is not required for syscall-failure-analyzer execution, it is necessary
 63 |    for debugging based on syscall-failure-analyzer output.
 64 | 
 65 |    ```bash
 66 |    sudo apt-get install linux-source
 67 |    tar xvf /usr/src/linux-source-$(uname -r).tar.bz2
 68 |    cd linux-source-$(uname -r)
 69 |    ```
 70 | 
 71 | - <u>Custom Kernels</u>:
 72 |  If you choose to build your own custom kernel, syscall-failure-analyzer will require access to
 73 |  the debug information. You can edit your `.config` file and confirm that
 74 |  `CONFIG_DEBUG_INFO=y` is set. If it is not set, please update the settings and
 75 |  rebuild your kernel.
 76 | 
 77 | ### Run
 78 | 
 79 | #### Identifying the Failing Syscall
 80 | 
 81 | The syscall-failure-analyzer tool requires the name or number of the failing
 82 | syscall as an argument. Typically, you would identify this failing syscall
 83 | during the development process or through debugging tools like strace. For
 84 | instance, running strace alongside your application could show system calls
 85 | that return an error. Once you identify the failing syscall, you can provide
 86 | its name or number as an argument when running syscall-failure-analyzer. This
 87 | enables the tool to specifically target and analyze that particular syscall for
 88 | failures.
 89 | 
 90 | #### Sudoer Requirement
 91 | 
 92 | This tool requires sudo permissions to access specific system features like
 93 | kcore and kallsyms. Therefore, you should run pip requirements as well as the
 94 | tool itself with sudo permissions.
 95 | 
 96 | #### Virtual Environment Setup
 97 | 
 98 | To set up and run the project, it's advisable to use a Python virtual
 99 | environment.
100 | 
101 | 1. **Install python3-venv package**
102 |    ```bash
103 |    sudo apt install python3-venv
104 |    ```
105 | 
106 | 2. **Navigate to the Project Directory**
107 |     ```bash
108 |     cd /path/to/syscall-failure-analyzer
109 |     ```
110 | 
111 | 3. **Create a Virtual Environment**
112 |     ```bash
113 |     python3 -m venv myvenv
114 |     ```
115 | 
116 | 4. **Activate the Virtual Environment**
117 |     ```bash
118 |     source myvenv/bin/activate
119 |     ```
120 | 
121 | 5. **Install Required Packages**
122 |     ```bash
123 |     pip install -r requirements.txt
124 |     ```
125 | 
126 | 6. **Create a Symbolic Link for BCC**
127 | 
128 |     ```bash
129 |     ln -s /usr/lib/python3/dist-packages/bcc myvenv/lib/$(python3 -c "import sys; print('python{}.{}'.format(sys.version_info.major, sys.version_info.minor))")/site-packages/bcc
130 |     ```
131 | 
132 | #### Recording Syscall Failure
133 | 
134 | Before deploying or running the project, ensure the virtual environment is activated. If it's not, activate it using:
135 | 
136 | ```bash
137 | source myvenv/bin/activate
138 | ```
139 | 
140 | To record syscall failures, use the following command. This example targets the
141 | first failure of `setregid` syscall when running Linux Test Project's `setregid03`
142 | test:
143 | 
144 | ```bash
145 | sudo python3 ./syscall-failure-analyzer.py --kprobes --syscall=setregid -n 1 record /opt/ltp/testcases/bin/setregid03
146 | ```
147 | 
148 | > Note: Use the `--kprobes` flag for recording with kprobe points. If Intel PT is supported and you prefer to use it, omit the `--kprobes` flag.
149 | 
150 | #### Reporting Syscall Failures
151 | 
152 | After recording, generate a report using the following command:
153 | 
154 | ```bash
155 | sudo python3 ./syscall-failure-analyzer.py --syscall=setregid report
156 | ```
157 | 
158 | #### Command-line Arguments
159 | 
160 | The tool provides a variety of command-line options to customize its behavior:
161 | 
162 | - **Basic Options**
163 |     - `-h, --help`: Show help message and exit
164 |     - `--verbose, -v`: Enable verbose analysis info
165 |     - `--quiet, -q`: Enable quiet mode
166 |     - `--syscall SYSCALL, -s SYSCALL`: Specify the failing syscall number to track
167 |     - `--occurrences OCCURRENCES, -n OCCURRENCES`: Specify occurrences to record
168 | 
169 | - **Advanced Options**
170 |     - `--vmlinux OBJS [OBJS ...], -l OBJS [OBJS ...]`: Specify the location of the vmlinux file or other modules
171 |     - `--path SRC_PATH, -p SRC_PATH`: Specify the path to source code
172 |     - `--perf FileType('x'), -f FileType('x')`: Specify the location of perf
173 |     - `--debug, -d`: Enable debug mode verbosity
174 | 
175 | For a complete list of command-line options, you can run the tool with `-h` or `--help`:
176 | 
177 | ```bash
178 | python3 ./syscall-failure-analyzer.py -h
179 | ```
180 | 
181 | ## Documentation
182 | 
183 | As of now, the project is in active development, and comprehensive
184 | documentation is in the works. For the time being, you can find the most
185 | relevant information about how to use and contribute to the project in this
186 | [README.md](README.md) and in the [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md) files.
187 | 
188 | If you have specific questions or encounter issues, feel free to open an issue
189 | on GitHub, and we'll do our best to assist you.
190 | 
191 | We also welcome contributions to improve documentation. If you would like to
192 | contribute, please see the "Contributing" section for guidelines.
193 | 
194 | ## Contributing
195 | 
196 | The syscall-failure-analyzer project team welcomes contributions from the community. Before you start working with syscall-failure-analyzer, please
197 | read our [Developer Certificate of Origin](https://cla.vmware.com/dco). All contributions to this repository must be
198 | signed as described on that page. Your signature certifies that you wrote the patch or have the right to pass it on
199 | as an open-source patch. For more detailed information, refer to [CONTRIBUTING_DCO.md](CONTRIBUTING_DCO.md).
200 | 
201 | ## License
202 | 
203 | This project is licensed under the BSD-2-Clause License. For more details, please see the [LICENSE.md](LICENSE) file in the root directory of this source tree.
204 | 


--------------------------------------------------------------------------------
/simprocedures.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import inspect
  4 | from typing import Optional, Set, Type, Tuple
  5 | import angr
  6 | from controlstateplugin import ControlStatePlugin
  7 | from arch import arch
  8 | import capstone
  9 | 
 10 | def state_ip(s:angr.SimState) -> Optional[int]:
 11 |     v = s.registers.load(arch.ip_reg_name)
 12 |     try:
 13 |         return s.solver.eval_one(v)
 14 |     except angr.SimValueError:
 15 |         return None
 16 | 
 17 | def track_to_ret(proc: angr.SimProcedure):
 18 |     state = proc.state
 19 |     control = state.control
 20 |     assert isinstance(control, ControlStatePlugin)
 21 | 
 22 |     if control.backtracking:
 23 |         return
 24 | 
 25 |     ip = state_ip(state)
 26 |     assert(ip is not None)
 27 |     # TODO: Check if we need better way
 28 |     ret_ip = state.callstack.ret_addr
 29 |     assert(ret_ip is not None and ret_ip != 0)
 30 |     # TODO: let the arch give the address width
 31 |     if ret_ip < 0:
 32 |         ret_ip += 1 << arch.address_width
 33 | 
 34 |     br = control.current_branch
 35 |     while br is not None and br['to_ip'] != ret_ip:
 36 |         control.next_branch()
 37 |         br = control.current_branch
 38 | 
 39 |     if br is None:
 40 |         # We would not be able to return to the correct address
 41 |         control.diverged = True
 42 |         control.expected_ip = None
 43 |     else:
 44 |         br.update({
 45 |             'from_ip': None,
 46 |             'from_sym': None,
 47 |             'from_offset': None
 48 |         })
 49 | 
 50 | def track_out_of_syms(proc: angr.SimProcedure, sym_names:Set[str]):
 51 |     state = proc.state
 52 |     control = state.control
 53 |     assert isinstance(control, ControlStatePlugin)
 54 | 
 55 |     if control.backtracking:
 56 |         return
 57 | 
 58 |     ip = state_ip(state)
 59 |     assert(ip is not None)
 60 | 
 61 |     br = control.current_branch
 62 |     while br is not None and br['from_ip'] in sym_names:
 63 |         control.next_branch()
 64 |         br = control.current_branch
 65 | 
 66 |     if br is None:
 67 |         control.diverged = True
 68 |         control.expected_ip = None
 69 | 
 70 | class CopyProcedure(angr.SimProcedure):
 71 |     #pylint:disable=arguments-differ
 72 | 
 73 |     def run(self, dst_addr, src_addr, limit):
 74 |         track_to_ret(self)
 75 |         copied = self.state.solver.BVS('copied', 64)
 76 |         self.state.add_constraints(copied >= 0)
 77 | 
 78 |         if False and 'unconstrained' in str(limit):
 79 |             old_limit = limit
 80 |             limit = self.state.solver.BVS('limit', arch.address_width)
 81 |             self.state.add_constraints(old_limit == limit)
 82 | 
 83 |         self.state.add_constraints(limit <= self.state.libc.max_memcpy_size)
 84 |         #self.state.add_constraints(copied <= self.state.libc.max_memcpy_size)
 85 |         self.state.add_constraints(copied <= limit)
 86 | 
 87 |         if not self.state.solver.is_true(copied == 0):
 88 |             src_mem = self.state.memory.load(src_addr, copied, endness='Iend_LE')
 89 |             self.state.memory.store(dst_addr, src_mem, size=copied, endness='Iend_LE')
 90 | 
 91 |         return self.ret(limit - copied)
 92 | 
 93 |     def __rept__(self) -> str:
 94 |         return 'CopyProcedure'
 95 | 
 96 | class ReturnProcedure(angr.SimProcedure):
 97 |     def __init__(self):
 98 |         super(ReturnProcedure, self).__init__()
 99 | 
100 |     def run(self):
101 |         control = self.state.control
102 |         assert isinstance(control, ControlStatePlugin)
103 | 
104 |         if control.backtracking:
105 |             self.ret()
106 |         
107 |         track_out_of_syms(self, {'zen_untrain_ret', '__x86_return_thunk'})
108 |         if control.diverged:
109 |             return None
110 |         
111 |         # Force the correct return address
112 |         self.ret_to = control.current_branch['to_ip']
113 |         r = self.ret()
114 |         self.ret_to = None
115 |         control.next_branch()
116 |         return r
117 | 
118 | class ProcedureWrapper(angr.SimProcedure):
119 |     def __init__(self, proc_class:Type[angr.SimProcedure], limits:Optional[Tuple[Optional[int], Optional[int]]]=None):
120 |         super(ProcedureWrapper, self).__init__()
121 |         self.proc_class = proc_class
122 |         sig = inspect.signature(proc_class.run)
123 |         self.n_parameters = len(sig.parameters) - 1
124 |         self.limits = limits and enumerate(limits)
125 | 
126 |     def run(self):
127 |         # Collect arguments from the state registers according to the calling convention
128 |         track_to_ret(self)
129 | 
130 |         cc = self.state.project.factory.cc()
131 |         args = cc.ARG_REGS
132 | 
133 |         # Fetch arguments from the registers
134 |         arg_values = [self.state.registers.load(reg) for reg in args][:self.n_parameters]
135 | 
136 |         if self.limits:
137 |             for i, (min_val, max_val) in self.limits:
138 |                 if min_val is None and max_val is None:
139 |                     continue
140 | 
141 |                 val = arg_values[i]
142 |                 if max_val is not None:
143 |                     self.state.add_constraints(val <= max_val)
144 |                 if min_val is not None:
145 |                     self.state.add_constraints(val >= min_val)
146 | 
147 |         # call the procedure with the fetched arguments
148 |         result = self.inline_call(self.proc_class, *arg_values).ret_expr
149 |         if result.length == arch.address_width:
150 |             return result
151 |         
152 |         return result.sign_extend(arch.address_width - result.length)
153 | 
154 | class RepHook(angr.exploration_techniques.tracer.RepHook):
155 |     def __init__(self, mnemonic):
156 |         super().__init__(mnemonic.split(" ")[1])
157 | 
158 |     def trace_to_next(self, state):
159 |         c = state.control
160 |         assert isinstance(c, ControlStatePlugin)
161 |         if not c.backtracking:
162 |             addr = state.addr
163 |             br = c.current_branch
164 |             while br is not None and br['from_ip'] == addr and br['to_ip'] == addr:
165 |                 c.next_branch()
166 |                 br = c.current_branch
167 | 
168 |     def run(self, state, procedure=None, *arguments, **kwargs):
169 |         self.trace_to_next(state)
170 | 
171 |         if procedure is not None:
172 |             result = self._inline_call(state, procedure, *arguments, **kwargs)
173 |             print(f'Result of inline call: {result}')
174 | 
175 |         
176 |         # Invoke the run() method from the parent class
177 |         super().run(state)
178 | 
179 | # TODO: Move to AngrSim
180 | class RetpolineProcedure(angr.SimProcedure):
181 |     def __init__(self, reg: str):
182 |         super(RetpolineProcedure, self).__init__()
183 |         self.reg = reg
184 | 
185 |     def run(self):
186 |         state = self.state
187 |         reg = getattr(state.regs, self.reg)
188 |         control = state.control
189 | 
190 |         if control.backtracking:
191 |             return self.jump(reg)
192 | 
193 |         trace_from_ip = control.current_branch['from_ip']
194 |         trace_to_ip = control.current_branch['to_ip']
195 |         control.expected_ip = trace_to_ip
196 |         angr_mgr = control.angr_mgr
197 | 
198 |         current_state_ip = state_ip(state)
199 |         prev_state_ip = state.history and state.history.parent and state.history.parent.addr
200 | 
201 |         def in_retpoline(ip:int) -> bool:
202 |             sym_name = angr_mgr.get_sym_name(ip)
203 |             return (sym_name.startswith('__x86_indirect_thunk') or 
204 |                     sym_name in {'__x86_return_thunk', 'zen_untrain_ret'})
205 | 
206 |         # When using kprobes we skip the retpolines, but when using hardware tracer
207 |         # we keep them.
208 |         if (current_state_ip == trace_from_ip or
209 |             (not in_retpoline(trace_from_ip) and prev_state_ip == trace_from_ip)):
210 |             # TODO: Handle the case in which the trace ends with a retpoline
211 |             while in_retpoline(trace_to_ip):
212 |                 control.next_branch()
213 |                 trace_to_ip = control.current_branch['to_ip']
214 |                 trace_from_ip = control.current_branch['from_ip']
215 |                 if not in_retpoline(trace_from_ip):
216 |                     control.diverged = True
217 |                     break
218 |             control.expected_ip = trace_to_ip
219 |         else:
220 |             control.diverged = True
221 | 
222 |         if not control.diverged:
223 |             state.add_constraints(reg == trace_to_ip)
224 |             control.next_branch()
225 |             return self.jump(trace_to_ip)
226 | 
227 |         return self.jump(reg)


--------------------------------------------------------------------------------
/kprobesreporter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | from typing import Dict, Iterable, List, Optional, Set, Any
  4 | 
  5 | from cle.backends import Symbol
  6 | from prmsg import Pbar, warn_once, pr_msg
  7 | from arch import arch
  8 | from reporter import Reporter
  9 | 
 10 | class KprobesReporter(Reporter):
 11 |     def report(self):
 12 |         for failure in self.failures:
 13 |             trace = self.traces[failure['trace_id']]
 14 |             sim_syms = [self.angr_mgr.get_sym(s) for s in failure['sim_syms']]
 15 |             branches = self.ftrace_to_branch(trace = trace,
 16 |                                              filter_pid = failure['pid'],
 17 |                                              sim_syms = sim_syms)
 18 |             super().report_one(branches = branches,
 19 |                                errcode = failure['errcode'],
 20 |                                sim_syms = sim_syms)
 21 | 
 22 |     # Converting ftrace to branches format that is common to processor trace
 23 |     # and ftrace.
 24 |     def ftrace_to_branch(self, trace:List[Dict[str, Any]], filter_pid:int, sim_syms:Set[Symbol]) -> List[Dict]:
 25 |         branches = []
 26 |         first = True
 27 |         insn = None
 28 |         pending_rep_insn, pending_rep_iterations = None, None
 29 |         unemulated_call_entry = None
 30 | 
 31 |         #sim_syms = [s for s in sim_syms if s.name != '__check_object_size']
 32 |         pbar = Pbar("processing ftrace", items=trace, unit="lines")
 33 |         for l in pbar:
 34 |             if filter_pid != l['pid']:
 35 |                 # It should not happen, as we already configured ftrace to
 36 |                 # filter the pid of the failure during the recording.
 37 |                 warn_once(f"skipping pid {l['pid']}")
 38 |                 continue
 39 | 
 40 |             if 'type' not in l:
 41 |                 warn_once("ftrace snapshot includes unknown entries")
 42 |                 continue
 43 |             
 44 |             next_ip, state = None, None
 45 | 
 46 |             ty = l['type']
 47 | 
 48 |             # If we reached the syscall tracing at the end, stop
 49 |             if ty == 'sysexit':
 50 |                 break
 51 |             elif ty == 'sysenter':
 52 |                 continue
 53 |             elif ty == 'probe':
 54 |                 state = arch.ftrace_state_dict(l)
 55 |                 next_ip = l['addr']
 56 |             elif ty == 'ret':
 57 |                 next_ip = None
 58 |             elif ty == 'func':
 59 |                 try:
 60 |                     next_ip = self.angr_mgr.get_prev_insn(l['from_ip']).address
 61 |                 except ValueError:
 62 |                     next_ip = None
 63 |             else:
 64 |                 raise ValueError(f"unknown ftrace type entry: {ty}")
 65 |                 
 66 |             if first:
 67 |                 insn = self.angr_mgr.get_prev_insn(l['from_ip'])
 68 |             
 69 |             first = False
 70 | 
 71 |             # Adding fake branches for rep instructions to reflect the number
 72 |             # of iterations that were executed.
 73 |             if pending_rep_insn is not None:
 74 |                 for _ in range(0, pending_rep_iterations -
 75 |                                   arch.rep_iterations(pending_rep_insn, state)):
 76 |                     branches.append(
 77 |                         {'from_ip': pending_rep_insn.address,
 78 |                          'to_ip': pending_rep_insn.address}
 79 |                     )
 80 |                 pending_rep_insn = None
 81 |                 
 82 |             while insn and insn.address != next_ip:
 83 |                 unemulated_call_entry = None
 84 | 
 85 |                 pr_msg(str(insn), level="DEBUG")
 86 |                 if not arch.is_branch_insn(insn):
 87 |                     insn = self.angr_mgr.next_insn(insn)
 88 |                     continue
 89 | 
 90 | #                if ((not arch.is_direct_branch_insn(insn)) or
 91 | #                    arch.is_cond_branch_insn(insn)):
 92 | #                    break
 93 | 
 94 |                 try:
 95 |                     target_insn = self.angr_mgr.get_branch_target_insn(insn)
 96 |                     target_sym = target_insn and self.angr_mgr.get_sym(target_insn)
 97 |                 except:
 98 |                     target_insn = None
 99 |                     target_sym = None
100 | 
101 |                 if arch.is_direct_jmp_insn(insn):
102 |                     assert(target_insn is not None)
103 |                     branches.append({'from_ip': insn.address, 'to_ip': target_insn.address})
104 |                     insn = target_insn
105 |                 elif ((arch.is_direct_call_insn(insn) and target_sym not in sim_syms) or
106 |                       (arch.is_indirect_call_insn(insn) and self.angr_mgr.next_insn_addr(insn) == next_ip)):
107 |                     branches.append({'from_ip': insn.address, 'to_ip': None})
108 |                     insn = self.angr_mgr.next_insn(insn)
109 |                     unemulated_call_entry = {'from_ip': None, 'to_ip': insn.address}
110 |                     branches.append(unemulated_call_entry)
111 |                 else:
112 |                     break
113 | 
114 |             match_ip = insn and insn.address == next_ip
115 | 
116 |             if ty == 'func' and not match_ip:
117 |                 assert(0 == 1)
118 |                 continue
119 | 
120 |             target_insn = None
121 |             
122 |             if ty == 'probe' and match_ip and unemulated_call_entry is not None:
123 |                 unemulated_call_entry['ret'] = l['ax']
124 |                 unemulated_call_entry = None
125 | 
126 |             if arch.is_indirect_jmp_insn(insn):
127 |                 raise NotImplementedError("indirect jump")
128 |             elif arch.is_call_insn(insn):
129 |                 if ty != 'func':
130 |                     #target_insn = self.angr_mgr.get_insn(l['addr'])
131 |                     # We are just going to skip endbr-like probes
132 |                     #if arch.is_indirect_branch_target(target_insn):
133 |                     #    continue
134 |                     pass
135 |                 elif not match_ip or ty != 'func':
136 |                     # TODO: Cleaner error
137 |                     assert(0 == 1)
138 |                 else:
139 |                     # ty == 'func'
140 |                     to_ip = self.angr_mgr.get_sym_addr(l['to_ip'])
141 |                     target_insn = self.angr_mgr.get_insn(to_ip)
142 |             elif arch.is_ret_insn(insn):
143 |                 assert ty == 'ret'
144 |                 from_sym = self.angr_mgr.get_sym(insn)
145 |                 assert (from_sym is not None and from_sym.name == l['from_func'])
146 | 
147 |                 target_insn = self.angr_mgr.get_insn(l['to_ip'])
148 |             elif arch.is_cond_jmp_insn(insn):
149 |                 assert ty == 'probe'
150 |                 assert match_ip
151 |                 assert state is not None
152 |                 if arch.is_cond_jmp_taken(insn, state):
153 |                     target_insn = self.angr_mgr.get_branch_target_insn(insn)
154 |                     assert(target_insn is not None)
155 |                     pr_msg(f"taken branch: {insn} -> {target_insn}", level="DEBUG")
156 |                 else:
157 |                     insn = self.angr_mgr.next_insn(insn)
158 |                     pr_msg(f"not taken branch: {insn}", level="DEBUG")
159 |             elif arch.is_loop_insn(insn):
160 |                 assert state is not None
161 | 
162 |                 if arch.is_loop_taken(insn, state):
163 |                     target_insn = self.angr_mgr.get_branch_target_insn(insn)
164 |                 else:
165 |                     insn = self.angr_mgr.next_insn(insn)
166 |             elif arch.is_rep_insn(insn):
167 |                 assert ty == 'probe'
168 |                 assert state is not None
169 |                 pending_rep_iterations = arch.rep_iterations(insn, state)
170 |                 if pending_rep_iterations > 0:
171 |                     pending_rep_insn = insn
172 |                 insn = self.angr_mgr.next_insn(insn)
173 |             elif arch.is_predicated_mov(insn):
174 |                 # Create psuedo entry to know that the cmov was taken
175 |                 assert state is not None
176 | 
177 |                 if not arch.is_cond_jmp_taken(insn, state):
178 |                     target_insn = self.angr_mgr.next_insn(insn)
179 | 
180 |             if target_insn is not None:
181 |                 assert insn is not None
182 |                 branch = {'from_ip':insn.address, 'to_ip':target_insn.address}
183 |                 if 'callstack' in l:
184 |                     branch['callstack'] = l['callstack']
185 |                 if ty == 'ret':
186 |                     branch['ret'] = l['ret']
187 |                 branches.append(branch)
188 |                 insn = target_insn
189 | 
190 |         return branches
191 |     
192 |     @property
193 |     def detailed_trace(self) -> bool:
194 |         return True
195 | 


--------------------------------------------------------------------------------
/syscall-failure-analyzer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # Copyright 2023 VMware, Inc.
  3 | # SPDX-License-Identifier: BSD-2-Clause
  4 | 
  5 | import argparse
  6 | import glob
  7 | import logging
  8 | import os
  9 | import pickle
 10 | import sys
 11 | import io
 12 | import lz4.frame
 13 | from typing import Optional, Set, List, BinaryIO
 14 | 
 15 | from angrmgr import Angr
 16 | from addr2line import Addr2Line
 17 | from claripy.backends.backend_smtlib_solvers import *
 18 | from intelptrecorder import IntelPTRecorder
 19 | from intelptreporter import IntelPTReporter
 20 | from kallsyms import Kallsyms, get_vmlinux
 21 | from kprobesrecorder import KProbesRecorder
 22 | from kprobesreporter import KprobesReporter
 23 | from reporter import Reporter
 24 | from prmsg import pr_msg, quiet, warn_once, change_output
 25 | from ptrace.debugger.child import createChild
 26 | from ptrace.tools import locateProgram
 27 | from syscall import ErrorcodeInfo, SyscallInfo
 28 | from kcore import Kcore
 29 | from ftrace import Ftrace
 30 | 
 31 | DEFAULT_DATA_FILENAME = 'deeperr.data'
 32 | 
 33 | 
 34 | def get_occurrences(s:str) -> Optional[Set[int]]:
 35 |     if s is None:
 36 |         return None
 37 |     if s.isnumeric():
 38 |         return {int(s)}
 39 |     try:
 40 |         r = {int(v.strip()) for v in s.split(',')}
 41 |     except:
 42 |         pr_msg('Could not parse occurances list, skipping input', level='ERROR')
 43 |         r = None
 44 | 
 45 |     return r
 46 | 
 47 | def report(inputs: str,
 48 |            src_path: Optional[str],
 49 |            output: Optional[str],
 50 |            print_stats: bool,
 51 |            objs: List[io.BufferedReader],
 52 |            syscall_filter: Optional[int],
 53 |            errcode_filter: Optional[int],
 54 |            occurances_filter: Optional[Set[int]],
 55 |            **kwargs):
 56 |     if output is not None:
 57 |         try:
 58 |             change_output(output)
 59 |         except Exception as e:
 60 |             pr_msg(f'{e}', level='FATAL')
 61 |             return
 62 | 
 63 |     res_files = glob.glob(inputs)
 64 |     if len(res_files) == 0:
 65 |         pr_msg('found no result files', level="ERROR")
 66 |         return
 67 | 
 68 |     for f_name in res_files:
 69 |         try:
 70 |             with lz4.frame.open(f_name, 'rb') as failure_file:
 71 |                 # Load the data from the file
 72 |                 data = pickle.load(failure_file)
 73 |         except FileNotFoundError:
 74 |             pr_msg(f'error reading result file {f_name}: file not found', level='ERROR')
 75 |             continue
 76 |         except EOFError:
 77 |             pr_msg(f'error reading result file {f_name}: file is empty', level='ERROR')
 78 |             continue
 79 |         except lz4.frame.LZ4FrameError:
 80 |             pr_msg(f'error reading result file {f_name}: file is corrupted', level='ERROR')
 81 |             continue
 82 |         
 83 |         kallsyms = data.get('kallsyms', Kallsyms(objs))
 84 |         saved_segs = data.get('kcore')
 85 |         kcore = Kcore() if saved_segs is None else None
 86 | 
 87 |         if saved_segs is None:
 88 |             pr_msg(f'kcore was not saved, reading from /proc/kcore', level='INFO')
 89 | 
 90 |         # We need to init ftrace before angr to clear all probe points that
 91 |         # might have been left. Otherwise, disassembly will fail.
 92 |         ftrace = Ftrace()
 93 |         ftrace.kprobe_event_disable_all()
 94 | 
 95 |         angr_mgr = Angr(kallsyms, 
 96 |                         kcore = kcore,
 97 |                         saved_segs = saved_segs)
 98 | 
 99 |         reporter_cls = IntelPTReporter if data['type'] == 'intel-pt' else KprobesReporter
100 |         report_kwargs = {
101 |             'objs': objs,
102 |             'errcode_filter': errcode_filter,
103 |             'syscall_filter': syscall_filter,
104 |             'print_stats': print_stats,
105 |             # Filtering based on occurances is done during reporting only for Intel PT,
106 |             # since we cannot reliably filter it out during recording
107 |             'occurances_filter': occurances_filter,
108 |             'angr_mgr': angr_mgr,
109 |             'traces': data['traces'],
110 |             'failures': data['failures'],
111 |             'src_path': src_path,
112 |         }
113 | 
114 |         reporter:Reporter = reporter_cls(**report_kwargs)
115 |         reporter.report()
116 | 
117 | 
118 | def valid_path(path):
119 |     if os.path.exists(path):
120 |         return path
121 |     else:
122 |         raise argparse.ArgumentTypeError(f"Path '{path}' does not exist.")
123 | 
124 | def main():
125 |     global quiet, debug
126 | 
127 |     def arg_error(parser: argparse.ArgumentParser):
128 |         # add suffix to the usage string
129 |         parser.print_help()
130 |         exit()
131 | 
132 |     parser = argparse.ArgumentParser("deeperr", epilog="application")
133 |     parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='prints verbose analysis info')
134 |     parser.add_argument('--vmlinux', '-l', action='store', dest='objs', help='location of vmlinux file or other modules', type=argparse.FileType('rb'), nargs='+', default=[])
135 |     parser.add_argument('--perf', '-f', default='perf', metavar=argparse.FileType('x'), help='location of perf')
136 |     parser.add_argument('--debug', '-d', action='store_true', dest='debug', help='debug mode verbosity')
137 |     parser.add_argument('--llvm-symbolizer', '-y', action='store', dest='llvm_symbolizer', default='llvm-symbolizer', help='path to llvm-symbolizer')
138 |     parser.add_argument('--snapshot-size', '-z', action='store', dest='snapshot_size', type=int, default=262144, help='perf snapshot size')
139 |     parser.add_argument('--tmp', '-t', action='store', dest='tmp_path', default='/tmp', type=valid_path, help='tmp path')
140 |     parser.add_argument('--syscall', '-s', action='store', dest='syscall', help='failing syscall number to track')
141 |     parser.add_argument('--quiet', '-q', action='store_true', dest='quiet', help='quiet mode')
142 |     parser.add_argument('--errcode', '-r', action='store', dest='errcode', help='error number')
143 |     parser.add_argument('--output', '-o', action='store', dest='output', help='output file', default=None, metavar='PATH')
144 |     parser.add_argument('--input', '-i', action='store', dest='input', help='input file', default=DEFAULT_DATA_FILENAME, metavar='FILES')
145 |     parser.add_argument('--kprobes', '-k', action='store_true', dest='kprobes', help='use kprobes')
146 |     parser.add_argument('--occurrences', '-n', action='store', dest='occurrences', help='occurrences to record')
147 |     parser.add_argument('--extra-info', '-x', action='store_true', dest='print_stats', help='detailed output with analysis statistics')
148 |     parser.add_argument('--path', '-p', action='store', dest='src_path', default=None, type=valid_path, help='path to source code')
149 |     parser.add_argument('--nokcore', '-w', action='store_true', dest='nokcore', help='do not save kcore')
150 |     parser.add_argument('--early-stop', '-e', action='store_true', dest='early_stop', help='stop execution after first failure')
151 |     parser.add_argument('command', choices=['record', 'report'], help='command to run: record or report')
152 | 
153 |     parser.usage = parser.format_usage()[7:].rstrip('\n ') + ' -- <command> [args]\n'
154 | 
155 |     try:
156 |         args, remaining_argv = parser.parse_known_args()
157 |     except:
158 |         # Exit with error
159 |         exit(1)
160 | 
161 |     if os.geteuid() != 0:
162 |         pr_msg(f'{sys.executable} must be run as root', level='FATAL')
163 |         exit(1)
164 | 
165 |     if remaining_argv and remaining_argv[0] == '--':
166 |         remaining_argv = remaining_argv[1:]
167 |     
168 |     sys.setrecursionlimit(10 ** 5)
169 | 
170 |     loglevel = 'ERROR'
171 |     if args.debug:
172 |         loglevel = 'DEBUG'
173 |     elif args.verbose:
174 |         loglevel = 'INFO'
175 | 
176 |     quiet = args.quiet
177 |     debug = args.debug
178 | 
179 |     logging.basicConfig(filename='deeperr.log', level=loglevel, force=True)
180 |     logging.getLogger().setLevel(loglevel)
181 |     for l in ['angr', 'cle', 'pyvex', 'claripy']:
182 |         logging.getLogger(l).setLevel('ERROR')
183 | 
184 |     objs = get_vmlinux(args.objs)
185 | 
186 |     syscall_filter = None
187 |     if args.syscall is not None:
188 |         try:
189 |             syscall_filter = SyscallInfo.get_syscall_nr(args.syscall)
190 |         except ValueError as e:
191 |             pr_msg(e, level="ERROR")
192 |             pr_msg('recording all syscall', level="WARN")
193 | 
194 |     syscall_filter = SyscallInfo.get_syscall_nr(args.syscall)
195 |     errcode_filter = ErrorcodeInfo.get_errno(args.errcode)
196 |     occurrences_filter = get_occurrences(args.occurrences)
197 | 
198 |     a2l = Addr2Line.get_instance()
199 |     a2l.llvm_symbolizer = args.llvm_symbolizer
200 | 
201 |     if args.command == 'record' and len(remaining_argv) < 1:
202 |         arg_error(parser)
203 | 
204 |     if args.command == 'record':
205 |         kprobes = args.kprobes
206 |         kcore = Kcore()
207 | 
208 |         if not kprobes and not IntelPTRecorder.cpu_supports_pt():
209 |             pr_msg("CPU does not support Intel PT", level="ERROR")
210 | 
211 |         recorder_cls = KProbesRecorder if kprobes else IntelPTRecorder
212 |         a = recorder_cls(
213 |             perf=args.perf,
214 |             objs=objs,
215 |             snapshot_size=args.snapshot_size,
216 |             errcode_filter=errcode_filter,
217 |             syscall_filter=syscall_filter,
218 |             occurrences_filter=occurrences_filter,
219 |             output=args.output or 'deeperr.data',
220 |             tmp_path=args.tmp_path,
221 |             debug=args.debug,
222 |             save_kcore=not args.nokcore,
223 |             early_stop=args.early_stop,
224 |         )
225 |         try:
226 |             a.record(args=remaining_argv)
227 |         except OSError as e:
228 |             pr_msg(f'error recording: {e}', level='FATAL')
229 |     else:
230 |         report(inputs=args.input,
231 |                output=args.output,
232 |                print_stats=args.print_stats,
233 |                objs=objs,
234 |                errcode_filter=errcode_filter,
235 |                syscall_filter=syscall_filter,
236 |                occurances_filter=occurrences_filter,
237 |                src_path=args.src_path)
238 | 
239 | if __name__ == "__main__":
240 |     main()


--------------------------------------------------------------------------------
/intelptrecorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import pathlib
  4 | import errno
  5 | import subprocess
  6 | import signal
  7 | import os
  8 | import re
  9 | import time
 10 | import ctypes
 11 | import shutil
 12 | import psutil
 13 | from typing import Optional, Set, List
 14 | from recorder import Recorder
 15 | from prmsg import pr_msg
 16 | from syscall import ret_to_err, SyscallInfo, ErrorcodeInfo
 17 | from ptrace.syscall.ptrace_syscall import SYSCALL_NAMES
 18 | from bcc import BPF, DEBUG_SOURCE
 19 | from kcore import Kcore
 20 | 
 21 | class IntelPTRecorder(Recorder):
 22 |     def __init__(
 23 |         self,
 24 |         tmp_path: str,
 25 |         **kwargs
 26 |     ):
 27 |         occurrences_filter = kwargs.get('occurrences_filter')
 28 |         if occurrences_filter is None or len(occurrences_filter) > 1:
 29 |             pr_msg('Using Intel PT only one failure can be recorded', level="WARN")
 30 |             kwargs['occurrences_filter'] = {1}
 31 | 
 32 |         kwargs['kcore'] = Kcore() if kwargs.get('save_kcore') else None
 33 | 
 34 |         super().__init__(**kwargs)
 35 |         self.record_proc:Optional[subprocess.Popen[bytes]] = None
 36 |         self.record_proc_terminated = False
 37 |         self.tmp_path = pathlib.Path(tmp_path)
 38 |         self.sorted_occurrence_filter = sorted(self.occurrences_filter) if self.occurrences_filter else None
 39 | 
 40 |         error_pattern = r'^ERROR: (?P<error>.*)$'
 41 |         self.error_regex = re.compile(error_pattern)
 42 | 
 43 |         dump_pattern = r'\[ perf record: Dump (.*?) \]'
 44 |         self.dump_regex = re.compile(dump_pattern)
 45 | 
 46 | 
 47 |     def init_tmp_path(self):
 48 |         if not self.tmp_path.exists() or not self.tmp_path.is_dir():
 49 |             pr_msg(f'error: tmp path [{self.tmp_path}] is not a valid tmp directory', level="FATAL")
 50 |             return False
 51 | 
 52 |         self.my_tmp_path = self.tmp_path.joinpath(pathlib.Path("errexp"))
 53 |         if not self.my_tmp_path.exists():
 54 |             try:
 55 |                 self.my_tmp_path.mkdir()
 56 |             except:
 57 |                 pr_msg(f"error creating tmp path [{self.my_tmp_path}]", level="FATAL")
 58 |                 return False
 59 |         
 60 |         return True
 61 |     
 62 |     def handle_event(self, cpu, data, size):
 63 |         event = self.bpf['syscall_events'].event(data)
 64 |         pid = event.pid
 65 |         syscall = event.syscall_nr
 66 |         err = ret_to_err(event.syscall_ret)
 67 | 
 68 |         e = {'err': err, 'syscall_nr': syscall, 'pid': pid, 'ts': event.ts/1e9}
 69 | 
 70 |         try:
 71 |             self.record_proc.send_signal(signal.SIGUSR2)
 72 |         except ProcessLookupError:
 73 |             pr_msg("perf process already terminated", level='WARN')
 74 |             self.dump_filenames = []
 75 |             return
 76 | 
 77 |         # Snapshots do not work well with Intel PT, and since the parent might already have
 78 |         # many children, it is problematic to attach perf only to these processes again.        
 79 |         # Wake the thread that reported the error, since the eBPF paused it to allow
 80 |         # tracing to be more successful, but let's give one second before we do so.
 81 |         if not self.early_stop:
 82 |             try:
 83 |                 os.kill(pid, signal.SIGCONT)
 84 |             except ProcessLookupError:
 85 |                 pass
 86 |  
 87 |         # For the same reason we only track one failure
 88 |         if len(self.failures) == 0:
 89 |             self.failures.append(e)
 90 | 
 91 |     def run_perf_record(self, pid: int):
 92 |         e_entry_class, e_entry_subclass, entry_filter = self.get_filter_string(exit=False)
 93 |         e_exit_class, e_exit_subclass, exit_filter = self.get_filter_string(exit=True)
 94 | 
 95 |         # We need to save kcore since without it, retpolines are not resolved
 96 |         record_args_raw = [self.perf, "record",
 97 |                     '-e', 'intel_pt/noretcomp=1/k',
 98 | #                    '-e', f'{e_entry_class}:{e_entry_subclass}',
 99 | #                    entry_filter and f'--filter={entry_filter}',
100 | #                    '-e', f'{e_exit_class}:{e_exit_subclass}',
101 | #                    exit_filter and f'--filter={exit_filter}',
102 |                     '--kcore',
103 |                     '--timestamp',
104 |                     '-p', f'{pid}',
105 |                     '--switch-output',
106 |                     f'--snapshot=e{self.snapshot_size}',
107 |                     f'-m,{(self.snapshot_size >> 12)}']
108 | 
109 |         record_args_raw.append(f'-o{self.my_tmp_path.joinpath("perf.data")}')
110 |         
111 |         record_args = [arg for arg in record_args_raw if arg is not None]
112 | 
113 |         pr_msg(f"running: {' '.join(record_args)}", level="INFO")
114 | 
115 |         record_proc = subprocess.Popen(record_args, stdout=subprocess.PIPE,
116 |                                        stderr=subprocess.STDOUT)
117 | 
118 |         if record_proc is None:
119 |             raise SystemError("error starting perf record")
120 |         
121 |         # Save the process for handle_event to see it
122 | 
123 |         time.sleep(2)
124 | 
125 |         # Check that perf is running
126 |         if record_proc.poll() is not None:
127 |             record_proc.wait()
128 |             assert record_proc.stdout is not None
129 |             perf_output = record_proc.stdout.read()
130 |             perf_output_str = perf_output.decode('utf-8')
131 |             pr_msg(f"perf failed: {perf_output_str}", level="FATAL")
132 |             pr_msg(f"hint: check that perf that is compatiable with the current kernel was provided", level="WARN")
133 |             raise SystemError("error running perf record")
134 |         
135 |         self.record_proc = record_proc
136 | 
137 |     def run_perf_script(self, file: str) -> str:
138 |         args = [self.perf, "script", "--itrace=b", "-i", file]
139 |         pr_msg(f"running: {' '.join(args)}", level='INFO')
140 | 
141 |         try:
142 |             output = subprocess.check_output(
143 |                 args,
144 |                 stderr=subprocess.STDOUT,
145 |                 timeout=60,
146 |                 universal_newlines=True
147 |             )
148 |         except (subprocess.CalledProcessError, PermissionError, subprocess.TimeoutExpired) as exc:
149 |             raise SystemError(f"error starting perf itrace: {exc}")
150 | 
151 |         return output
152 | 
153 |     def cleanup(self):
154 |         if self.my_tmp_path and self.my_tmp_path.exists():
155 |             shutil.rmtree(self.my_tmp_path)
156 |         self.my_tmp_path = None
157 | 
158 |     def prepare_bpf(self):
159 |         # There is a bug in bcc that causes a warning to be printed to stderr
160 |         syscall_name = SYSCALL_NAMES.get(self.syscall_filter, None)
161 | 
162 |         b = BPF(src_file="syscall_failure_ebpf.c",
163 |                 cflags=["-w", "-Wno-error", "-Wno-warning"],
164 |                 debug=DEBUG_SOURCE if self.debug else 0)
165 | 
166 |         tp = (f"syscalls:sys_exit_{syscall_name}" if False and syscall_name is not None else
167 |                "raw_syscalls:sys_exit")
168 |         
169 |         def create_ulonglong(value):
170 |             return ctypes.c_ulonglong(value) if value is not None else ctypes.c_ulonglong(0xffffffffffffffff)
171 | 
172 |             # Config keys
173 |         SYSCALL_FILTER_KEY = 1
174 |         ERRCODE_FILTER_KEY = 2
175 |         MONITORED_PID_KEY = 3
176 |         SORTED_OCCURRENCE_FILTER_KEY = 4
177 |         FLAGS_KEY = 5
178 | 
179 |         # Create a dictionary to store config
180 |         config_map = {
181 |             SYSCALL_FILTER_KEY: self.syscall_filter,
182 |             ERRCODE_FILTER_KEY: -self.errcode_filter if self.errcode_filter is not None else None,
183 |             MONITORED_PID_KEY: self.monitored_pid,
184 |             SORTED_OCCURRENCE_FILTER_KEY: self.sorted_occurrence_filter[0] if self.sorted_occurrence_filter else None,
185 |             FLAGS_KEY: 1 if self.early_stop else 0
186 |         }
187 | 
188 |         for key, value in config_map.items():
189 |             b["config_map"][ctypes.c_ulonglong(key)] = create_ulonglong(value)
190 | 
191 |         b.attach_tracepoint(tp=tp, fn_name="trace_syscalls")
192 |         b["syscall_events"].open_perf_buffer(self.handle_event)
193 |         self.bpf = b
194 | 
195 |     def close_perf(self):
196 |         if self.record_proc is None:
197 |             return
198 | 
199 |         if self.record_proc.poll():
200 |             try:
201 |                 self.record_proc.send_signal(signal.SIGINT)
202 |             except ProcessLookupError:
203 |                 pr_msg("perf process already terminated", level='WARN')
204 |                 self.dump_filenames = []
205 |                 return
206 | 
207 |         err = self.record_proc.wait()
208 |         assert self.record_proc.stdout is not None
209 |         perf_output = self.record_proc.stdout.read()
210 |         perf_output_str = perf_output.decode('utf-8')
211 |         pr_msg(f'record proc output: {perf_output_str}', level='DEBUG')
212 |         if err not in {0, None, -errno.ENOENT}:
213 |             pr_msg(f'error closing perf: {err}', level="WARN")
214 | 
215 |         matches = self.error_regex.findall(perf_output_str, re.MULTILINE)
216 |         if matches:
217 |             pr_msg(matches[0], level='ERROR')
218 |             raise Exception(matches[0])
219 | 
220 |         matches = self.dump_regex.findall(perf_output_str)
221 |         if not matches:
222 |             pr_msg(f'perf output: {perf_output_str}', level='ERROR')
223 |             raise Exception('failed to find perf dump file')
224 |         
225 |         # Snapshots are broken with Intel-PT. Take only the first one.
226 |         self.dump_filenames = [match for match in matches][:1]
227 |         self.record_proc = None
228 |  
229 |     def record(self, args:'list[str]') -> int:
230 |         if not self.init_tmp_path():
231 |             return 0
232 | 
233 |         collected = 0
234 | 
235 |         try:
236 |             self.init_process(args)
237 |         except (FileNotFoundError, PermissionError) as e:
238 |             pr_msg(f"error starting process: {e}", level="FATAL")
239 |             return 0
240 | 
241 |         self.prepare_bpf()
242 |         self.run_perf_record(self.monitored_pid)
243 | 
244 |         assert self.record_proc is not None
245 | 
246 |         self.detach_all_processes() 
247 | 
248 |         try:
249 |             while not self.early_stop or len(self.failures) == 0:
250 |                 self.bpf.perf_buffer_poll(1)
251 |                 time.sleep(0.001)
252 |                 try:
253 |                     terminated_pid, _ = os.waitpid(self.monitored_pid, os.WNOHANG)
254 |                     if terminated_pid == self.monitored_pid:
255 |                         pr_msg(f'Child process {terminated_pid} terminated', level='INFO')
256 |                         break
257 |                 except ChildProcessError:
258 |                     pass
259 |         except KeyboardInterrupt:
260 |             pr_msg("Interrupted - stop recording", level='INFO')
261 | 
262 |         if psutil.pid_exists(self.monitored_pid):
263 |             try:
264 |                 os.kill(self.monitored_pid, signal.SIGINT)
265 |             except ProcessLookupError:
266 |                 pr_msg("monitored process already terminated", level='WARN') 
267 | 
268 |         self.close_perf()
269 | 
270 |         for filename in self.dump_filenames:
271 |             trace = self.run_perf_script(filename)
272 |             self.traces.append(trace)
273 | 
274 |         for f in self.failures:
275 |             syscall = SyscallInfo.get_name(f['syscall_nr'])
276 |             err_msg = ErrorcodeInfo.get_name(f['err'])
277 |             pid = f['pid']
278 |             pr_msg(f'[{pid}] syscall {syscall} failed with {err_msg} [{f["err"]}]', level='INFO')
279 | 
280 |         self.save_failures("intel-pt")
281 |         self.cleanup()
282 | 
283 |         return collected
284 | 
285 |     @staticmethod
286 |     def cpu_supports_pt() -> bool:
287 |         # Simple test, perf will deal with more complicated situations later
288 |         try:
289 |             with open('/proc/cpuinfo') as f:
290 |                 for l in f:
291 |                     if l.startswith('flags'):
292 |                         return 'intel_pt' in l.split(' ')
293 |         except Exception:
294 |             pass
295 |         
296 |         return False


--------------------------------------------------------------------------------
/kallsyms.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, Callable
  4 | import logging
  5 | import pathlib
  6 | import subprocess
  7 | import io
  8 | import abc
  9 | import struct
 10 | import os
 11 | from enum import Enum
 12 | from prmsg import pr_msg
 13 | from collections import defaultdict
 14 | from typing import BinaryIO
 15 | 
 16 | from elftools.elf.elffile import ELFFile
 17 | from elftools.elf.sections import NoteSection
 18 | 
 19 | import cle.backends
 20 | import angr
 21 | from arch import arch
 22 | 
 23 | NT_GNU_BUILD_ID = 3
 24 | 
 25 | def get_vmlinux(user_option:Optional[List[BinaryIO]]) -> List[BinaryIO]:
 26 |     if user_option is None:
 27 |         user_option = []
 28 | 
 29 |     # Check if any of the filenames includes 'vmlinux'
 30 |     if any('vmlinux' in pathlib.Path(f.name).stem for f in user_option):
 31 |         return user_option
 32 |     
 33 |     vmlinux_search = [
 34 |         f'/usr/lib/debug/boot/vmlinux-{os.uname().release}',
 35 |         'vmlinux'
 36 |     ]
 37 |     for vmlinux in vmlinux_search:
 38 |         try:
 39 |             f = open(vmlinux, 'rb')
 40 |             pr_msg(f'Using vmlinux file {vmlinux}', level='INFO')
 41 |             user_option.append(f)
 42 |             return user_option
 43 |         except FileNotFoundError:
 44 |             pass
 45 |         except PermissionError:
 46 |             pr_msg(f'Could not open vmlinux file {vmlinux}', level='ERROR')
 47 | 
 48 |     pr_msg('Could not find vmlinux file, trying to continue without one', level='ERROR')
 49 |     pr_msg('''Consider installing symbols using:
 50 |                 sudo apt install linux-image-$(uname -r)-dbgsym [deb/ubuntu]
 51 |                 sudo dnf debuginfo-install kernel [fedora]
 52 |                 sudo pacman -S linux-headers [arch]
 53 |                 sudo emerge -av sys-kernel/linux-headers [gentoo]
 54 |             ''', level='WARN')
 55 |     return user_option
 56 | 
 57 | def find_module_dbg(module_name:str):
 58 |     pathes = [f'/usr/lib/debug/lib/modules/{os.uname().release}']
 59 |     for path in pathes:
 60 |         if not os.path.exists(path) or not os.path.isdir(path):
 61 |             continue
 62 |         for root, dirs, files in os.walk(path):
 63 |             for file in files:
 64 |                 if file == f'{module_name}.ko' or file == f'{module_name}.ko.debug':
 65 |                     return os.path.join(root, file)
 66 |     return None
 67 | 
 68 | class Kallsyms:
 69 |     def __init__(self, objs:List[io.BufferedReader]):
 70 |         parsed_modules = self.parse_proc_modules()
 71 |         self.__find_modules(parsed_modules)
 72 | 
 73 |         self.keep_sym_types: Set[str] = {'t', 'T', 'w', 'W', 'r', 'R'}
 74 |         self.type_map:Dict[str, angr.cle.backends.SymbolType] = {
 75 |                     'a':angr.cle.backends.SymbolType.TYPE_OTHER,
 76 |                     'A':angr.cle.backends.SymbolType.TYPE_OTHER,
 77 |                     'd':angr.cle.backends.SymbolType.TYPE_OBJECT,
 78 |                     'D':angr.cle.backends.SymbolType.TYPE_OBJECT,
 79 |                     'b':angr.cle.backends.SymbolType.TYPE_OBJECT,
 80 |                     'B':angr.cle.backends.SymbolType.TYPE_OBJECT,
 81 |                     'r':angr.cle.backends.SymbolType.TYPE_OBJECT,
 82 |                     'R':angr.cle.backends.SymbolType.TYPE_OBJECT,
 83 |                     'v':angr.cle.backends.SymbolType.TYPE_OTHER,
 84 |                     'V':angr.cle.backends.SymbolType.TYPE_OTHER,
 85 |                     't':angr.cle.backends.SymbolType.TYPE_FUNCTION,
 86 |                     'T':angr.cle.backends.SymbolType.TYPE_FUNCTION,
 87 |                     'w':angr.cle.backends.SymbolType.TYPE_OTHER,
 88 |                     'W':angr.cle.backends.SymbolType.TYPE_OTHER,
 89 |         }
 90 | 
 91 |         all_syms = self.__read_symbols()
 92 |         all_segments = self.__analyze_sections(all_syms)
 93 |         self.exes = dict()
 94 | 
 95 |         obj_basenames = {self.__get_basename(pathlib.Path(f.name).stem):f for f in objs}
 96 | 
 97 |         def get_obj_base_sz(obj_name:str, syms) -> Tuple[int, int]:
 98 |             if obj_name == 'vmlinux':
 99 |                 min_addr = next(s[1] for s in syms if s[0] == '_stext')
100 |                 max_addr = next(s[1] for s in syms if s[0] == '_end')
101 |                 sz = max_addr - min_addr
102 |             elif obj_name in parsed_modules:
103 |                 min_addr = parsed_modules[obj_name]['address']
104 |                 sz = int(parsed_modules[obj_name]['size'])
105 |             else:
106 |                 min_addr = self.__get_min_addr(syms)
107 |                 max_addr = self.__get_max_addr(syms)
108 |                 sz = max_addr - min_addr
109 | 
110 |             return min_addr, sz
111 | 
112 | 
113 |         for obj_name, syms in all_syms.items():
114 |             mapped_addr, sz = get_obj_base_sz(obj_name, syms)
115 | 
116 |             path = None
117 |             if obj_name in obj_basenames:
118 |                 path = obj_basenames[obj_name].name
119 |             elif obj_name in parsed_modules:
120 |                 path = parsed_modules[obj_name].get('path')
121 | 
122 |             if path is not None:
123 |                 with open(path, 'rb') as f:
124 |                     if not self.check_build_id(f):
125 |                         pr_msg(f'Build ID mismatch for {obj_name}', level='WARN')
126 |                         path = None
127 |  
128 |             self.exes[obj_name] = {
129 |                 'mapped_addr': mapped_addr,
130 |                 'base_addr': arch.default_text_base if obj_name == 'vmlinux' else 0,
131 |                 'size': sz,
132 |                 'symbols': [],
133 |                 'path': path,
134 |                 'segments': all_segments[obj_name],
135 |             }
136 |             
137 |             if path is None:
138 |                 self.exes[obj_name]['symbols'] = self.__relative_symbol_tuples(syms, mapped_addr, sz)
139 |                 continue
140 |            
141 |             try:
142 |                 with open(path, 'rb') as f:
143 |                     base_syms = self.__read_sizes(f)
144 |             except FileNotFoundError as e:
145 |                 pr_msg(f'Could not find file {f}: {e}', level='WARN')
146 |                 continue
147 | 
148 |             base_addr, _ = get_obj_base_sz(obj_name, base_syms)
149 |             rebased_syms = self.__relative_symbol_tuples(base_syms, base_addr, sz)
150 |            
151 |             # Complicated since mypy doesn't like direct assignment
152 |             self.exes[obj_name].update({
153 |                 'base_addr': base_addr,
154 |                 'symbols': rebased_syms,
155 |             })
156 | 
157 |     def __find_modules(self, parsed_modules):
158 |         pathes = [f'/usr/lib/debug/lib/modules/{os.uname().release}']
159 | 
160 |         for path in pathes:
161 |             if not os.path.exists(path) or not os.path.isdir(path):
162 |                 continue
163 | 
164 |             for root, dirs, files in os.walk(path):
165 |                 for file in files:
166 |                     if not file.endswith('.ko.debug') and not file.endswith('.ko'):
167 |                         continue
168 | 
169 |                     # In kallsyms modules show with underscores instead of dashes
170 |                     basename = pathlib.Path(file).stem.split('.')[0]
171 |                     basename_underscored = basename.replace('-', '_')
172 | 
173 |                     for obj_name in [basename, basename_underscored]:
174 |                         if obj_name in parsed_modules:
175 |                             parsed_modules[obj_name]['path'] = os.path.join(root, file)
176 |             break
177 | 
178 |     def __relative_symbol_tuples(self, syms:List[Tuple[str, int, str, Optional[int]]], min_addr:int, sz:int) -> List[Tuple[str, int, str, Optional[int]]]:
179 |             max_addr = min_addr + sz
180 | 
181 |             return [(s[0], s[1] - min_addr, s[2], s[3]) for s in syms if s[1] >= min_addr and s[1] < max_addr]
182 | 
183 |     def __get_min_addr(self, syms:List[Tuple[str, int, str, Optional[int]]]) -> int:
184 |         return min([s[1] for s in syms if s[2] in {'t', 'T', 'r', 'R'}])
185 | 
186 |     def __get_max_addr(self, syms:List[Tuple[str, int, str, Optional[int]]]) -> int:
187 |         return max([s[1] + s[3] for s in syms if s[2] in {'t', 'T', 'r', 'R'} and s[3] is not None])
188 | 
189 | 
190 |     def __read_symbols(self) -> Dict[str, List[Tuple[str, int, str, Optional[int]]]]:
191 |         builtin_index:defaultdict[str, int] = defaultdict(int)
192 |         global arch
193 | 
194 |         f = open("/proc/kallsyms", "rb")
195 |         logging.info("reading symbols")
196 |         f.seek(0)
197 | 
198 |         data = f.read().decode("ascii")
199 | 
200 |         raw = []
201 |         for l in data.splitlines():
202 |             name = l.split()[2]
203 |             addr = int(l.split()[0], 16)
204 |             sym_type = l.split()[1]
205 |             module_name = 'vmlinux' if len(l.split()) < 4 else l.split()[3][1:-1]
206 | 
207 |             # Builtin sections can overlap each other, which angr doesn't like. So
208 |             # we are not going to merge them. And instead we are creating each one a
209 |             # unique name with a different suffix.
210 |             if module_name.startswith('__builtin') or module_name in {'bpf'}:
211 |                 suffix = builtin_index[module_name]
212 |                 builtin_index[module_name] += 1
213 |                 module_name = f'{module_name}:{suffix}'
214 | 
215 |             raw.append((name, addr, sym_type, module_name))
216 | 
217 |         list.sort(raw, key=lambda x:x[1])
218 |         if len(raw) == 0:
219 |             pr_msg("cannot read symbol addresses from kallsyms", level="ERROR")
220 |             raise Exception()
221 | 
222 |         syms = defaultdict(list)
223 | 
224 |         # Guess the sizes
225 |         prev = raw[0]
226 |         for sa in raw[1:]:
227 |             syms[prev[3]].append((prev[0], prev[1], prev[2], sa[1] - prev[1])) 
228 |             prev = sa
229 | 
230 |         remaining_in_page = arch.page_size - prev[1] % arch.page_size
231 |         syms[prev[3]].append((prev[0], prev[1], prev[2], remaining_in_page))
232 |         return syms # type: ignore
233 |     
234 |     def __analyze_sections(self, syms:Dict[str, List[Tuple[str, int, str, Optional[int]]]]) -> Dict[str, List[Tuple[int, int]]]:
235 |         segments_dict = dict()
236 |         vmlinux = syms['vmlinux']
237 | 
238 |         for k, v in syms.items():
239 |             sections:List[Tuple[int, int]] = []
240 |             cur_section_start = None
241 |             cur_section_end = None
242 | 
243 |             for sa in v:
244 |                 if sa[3] is None:
245 |                     continue
246 |                 if sa[2] in self.keep_sym_types:
247 |                     if cur_section_start is None:
248 |                         cur_section_start = sa[1]
249 |                     cur_section_end = sa[1] + sa[3]
250 |                 elif sa[2] not in self.keep_sym_types and cur_section_start is not None:
251 |                     cur_section_end = sa[1]
252 |                     if cur_section_start != cur_section_end:
253 |                         sections.append((cur_section_start, sa[1]))
254 |                     cur_section_start = None
255 | 
256 |             if cur_section_start is not None:
257 |                 assert cur_section_end is not None
258 |                 sections.append((cur_section_start, cur_section_end))
259 | 
260 |             segments_dict[k] = sections
261 | 
262 |         include_ranges_syms = [
263 |             ('__start_rodata', '__end_rodata'),
264 |             ('_stext', '_etext'),
265 |         ]
266 |         # find the symbols from include_ranges_syms in vmlinux
267 |         include_ranges = []
268 |         for start, end in include_ranges_syms:
269 |             start_addr = next(s[1] for s in vmlinux if s[0] == start)
270 |             end_addr = next(s[1] for s in vmlinux if s[0] == end)
271 |             include_ranges.append((start_addr, end_addr))
272 | 
273 |         # TODO: Move to arch
274 |         start_addr = next(s[1] for s in vmlinux if s[0] == 'idt_table')
275 |         end_addr =  start_addr + 4096
276 |         include_ranges.append((start_addr, end_addr))
277 | 
278 |         combined_ranges = segments_dict['vmlinux'] + include_ranges
279 |         combined_ranges.sort(key=lambda x: x[0])
280 | 
281 |         # Initialize the merged ranges list with the first range
282 |         merged_ranges = [combined_ranges[0]]
283 | 
284 |         for current_start, current_end in combined_ranges[1:]:
285 |             last_range_start, last_range_end = merged_ranges[-1]
286 | 
287 |             # Check if the current range overlaps or is adjacent to the last range in the merged list
288 |             if current_start <= last_range_end + 1:
289 |                 # Update the end value of the last range to the maximum of the current and last end values
290 |                 merged_ranges[-1] = (last_range_start, max(current_end, last_range_end))
291 |             else:
292 |                 # If the current range doesn't overlap or is not adjacent, append it to the merged list
293 |                 merged_ranges.append((current_start, current_end))
294 | 
295 |         segments_dict['vmlinux'] = merged_ranges
296 | 
297 |         return segments_dict
298 | 
299 |     @staticmethod
300 |     def __get_basename(filename: str) -> str:
301 |         if filename.startswith('vmlinux'):
302 |             return 'vmlinux'
303 |         
304 |         stem = filename.split('.')[0]
305 |         return stem.replace('-', '_')
306 | 
307 |     @staticmethod
308 |     def extract_build_id(data) -> Optional[str]:
309 |         build_id = None
310 |         offset = 0
311 |         while offset < len(data):
312 |             namesz, descsz, note_type = struct.unpack_from('III', data, offset)
313 |             offset += 12
314 | 
315 |             name_start = offset
316 |             name_end = name_start + namesz
317 | 
318 |             desc_start = (name_end + 3) & ~3
319 |             desc_end = desc_start + descsz
320 | 
321 |             # Get it from the last note if there are multiple ones
322 |             if note_type == NT_GNU_BUILD_ID:
323 |                 build_id = data[desc_start:desc_end]
324 | 
325 |             offset = (desc_end + 3) & ~3
326 |         
327 |         if build_id is None:
328 |             return None
329 |         
330 |         build_id_hex = ''.join([format(byte, '02x') for byte in build_id])
331 |         return build_id_hex
332 | 
333 |     @staticmethod
334 |     def get_module_build_id(module_name) -> Optional[str]:
335 |         build_id_path = pathlib.Path(f"/sys/module/{module_name}/notes/.note.gnu.build-id")
336 |         
337 |         if not build_id_path.exists():
338 |             raise Exception(f"{build_id_path} not found. Ensure the module is loaded and you have the required permissions.")
339 | 
340 |         data = build_id_path.read_bytes()
341 |         return Kallsyms.extract_build_id(data)
342 | 
343 |     @staticmethod
344 |     def get_build_id_from_vmlinux(vmlinux_file:io.BufferedReader) -> Optional[str]:
345 |         r = None
346 |         #with open(vmlinux_file, 'rb') as f:
347 |         elf = ELFFile(vmlinux_file)
348 |         for section in elf.iter_sections():
349 |             if isinstance(section, NoteSection):
350 |                 for note in section.iter_notes():
351 |                     if note.n_type == 'NT_GNU_BUILD_ID':
352 |                         r = note.n_desc
353 |         return r
354 | 
355 |     @staticmethod
356 |     def get_build_id_from_kernel_notes(kernel_notes_file:pathlib.Path):
357 |         data = kernel_notes_file.read_bytes()
358 |         return Kallsyms.extract_build_id(data)
359 | 
360 |     @staticmethod
361 |     def check_build_id(obj_file:io.BufferedReader) -> bool:
362 |         file_build_id = Kallsyms.get_build_id_from_vmlinux(obj_file)
363 | 
364 |         path = pathlib.Path(obj_file.name)
365 |         basename = Kallsyms.__get_basename(path.name)
366 | 
367 |         if basename == 'vmlinux':
368 |             live_build_id = Kallsyms.get_build_id_from_kernel_notes(pathlib.Path("/sys/kernel/notes"))
369 |         else:
370 |             live_build_id = Kallsyms.get_module_build_id(basename)
371 |         
372 |         if file_build_id is None:
373 |             logging.info(f"no build ID found in {obj_file}")
374 |             return False
375 |         
376 |         if live_build_id is None:
377 |             logging.info(f"no build ID found in kernel")
378 |             return False
379 |         
380 |         if file_build_id != live_build_id:
381 |             logging.info(f"build ID mismatch: {file_build_id} != {live_build_id}")
382 |             return False
383 |         
384 |         return True
385 | 
386 | 
387 |     def __read_sizes(self, file:io.BufferedReader) -> List[Tuple[str, int, str, Optional[int]]]:
388 |         filename = pathlib.Path(file.name)
389 |         logging.info(f"reading symbol sizes: {filename}")
390 | 
391 |         # Reading the ELF using elftools is incredibly slow. Use nm instead.
392 |         args = ['nm', '-n', '--print-size', str(filename)]
393 |         logging.debug("running: {0}".format(' '.join(args)))
394 |         try:
395 |             output = subprocess.check_output(
396 |                 args, stderr=subprocess.STDOUT, timeout=20,
397 |                 universal_newlines=True)
398 |         except subprocess.CalledProcessError as e:
399 |             pr_msg(f"failed reading symbol file: {e}", level="ERROR")
400 |             raise e
401 | 
402 |         lns = [[l[:16]] + l[17:].split() for l in output.splitlines()]
403 | 
404 |         syms = [(l[3 if len(l) == 4 else 2],                # name
405 |             int(l[0], 16),                                  # addr
406 |             l[2 if len(l) == 4 else 1],                     # type
407 |             (int(l[1], 16)) if len(l) == 4 else None)       # size
408 |             for l in lns if len(l) <= 4 and l[0] != ' ' * 16]
409 | 
410 |         return syms
411 | 
412 |     def parse_proc_modules(self) -> Dict[str, Dict[str, Any]]:
413 |         modules = dict()
414 | 
415 |         with open('/proc/modules', 'r') as f:
416 |             for line in f:
417 |                 parts = line.strip().split()
418 |                 module_name = parts[0]
419 |                 module_size = int(parts[1])
420 |                 module_ref_count = None if parts[2] == '-' else int(parts[2])
421 |                 module_dependencies = [dep for dep in parts[4].split(',') if dep != '-']
422 |                 module_state = parts[4]
423 |                 module_address = int(parts[5], 16)
424 | 
425 |                 module_info = {
426 |                     'size': module_size,
427 |                     'ref_count': module_ref_count,
428 |                     'dependencies': module_dependencies,
429 |                     'state': module_state,
430 |                     'address': module_address
431 |                 }
432 |                 modules[module_name] = module_info
433 | 
434 |         return modules
435 | 
436 |     def get_symbols(self, backend:cle.Backend, name:str) -> List[cle.Symbol]:
437 |         syms = self.exes[name]['symbols']
438 |         assert isinstance(syms, list)
439 | 
440 |         syms = [cle.Symbol(owner = backend, name = s[0],
441 |                 relative_addr = s[1],
442 |                 sym_type = self.type_map[s[2]],
443 |                 size = s[3]) for s in syms]
444 | 
445 |         return syms


--------------------------------------------------------------------------------
/reporter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import logging
  4 | import abc
  5 | import io
  6 | import re
  7 | from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
  8 | import colors
  9 | import pathlib
 10 | import copy
 11 | 
 12 | from syscall import SyscallInfo, ErrorcodeInfo
 13 | from ftrace import Ftrace
 14 | from angrmgr import Angr
 15 | from angrsim import AngrSim
 16 | from arch import arch
 17 | from cle.backends import Symbol
 18 | from prmsg import pr_msg, uptime
 19 | from addr2line import Addr2Line
 20 | 
 21 | class Reporter(metaclass=abc.ABCMeta):
 22 |     def __init__(self,
 23 |                  objs: List[io.BufferedReader],
 24 |                  syscall_filter: Optional[int],
 25 |                  errcode_filter: Optional[int],
 26 |                  occurances_filter: Optional[Set[int]],
 27 |                  angr_mgr: Angr,
 28 |                  print_stats: bool,
 29 |                  failures: List[Dict[str, Any]],
 30 |                  traces: List[Union[List[Dict[str, Union[int, str, float]]], str]],
 31 |                  src_path: Optional[str] = None,
 32 |     ):
 33 |         self.objs = objs
 34 |         self.syscall_filter = syscall_filter
 35 |         self.errcode_filter = errcode_filter
 36 |         self.occurances_filter = occurances_filter
 37 |         self.failures = failures
 38 |         self.kallsyms = None
 39 |         self.angr_mgr = angr_mgr
 40 |         self.print_stats = print_stats
 41 |         self.traces = traces
 42 |         self.src_path = src_path and pathlib.Path(src_path)
 43 | 
 44 |     @abc.abstractmethod
 45 |     def report(self):
 46 |         pass
 47 | 
 48 |     @property
 49 |     @abc.abstractmethod
 50 |     def detailed_trace(self):
 51 |         pass
 52 | 
 53 |     def do_print_stats(self, errcode:int, sim_attempts:int, branches:List, sim_stats:Dict[str, Union[List, int]]):
 54 |         pr_msg("---", new_line_after = True, level='DATA')
 55 |         pr_msg(f"errorcode: {errcode} [{ErrorcodeInfo.get_name(errcode)}]", level='DATA')
 56 |         pr_msg(f"divergence: {sim_stats['simulation diverged']}", level='DATA')
 57 |         pr_msg(f"functions: {sim_attempts}", level='DATA')
 58 |         pr_msg(f"branches: {len(branches)}", level='DATA')
 59 |         pr_msg(f"failure returning symbol index: {sim_stats['failure returning symbol index']}", level='DATA')
 60 |         pr_msg(f"failure reutrning function index: {sim_stats['failure returning function index']}", level='DATA')
 61 |         pr_msg(f"callstack function depth: {sim_stats['callstack function depth']}", level='DATA')
 62 |         pr_msg(f"callstack: {sim_stats['depth']}", level='DATA')
 63 |         pr_msg(f"analysis time: {int(uptime())}", level='DATA')
 64 |         pr_msg(f"recording time: {sim_stats.get('simulation time', 'N/A')}", level='DATA')
 65 |         if 'backtrack' in sim_stats:
 66 |             pr_msg(f'candidates: {sim_stats["divergence points"]}', level='DATA')
 67 |             pr_msg(f'backtracking: {sim_stats["backtrack"]}', level='DATA')
 68 |         pr_msg('', level='DATA', new_line_after = True)
 69 | 
 70 |     def get_unsimulated_callstack(self, branches: List[Dict[str, Any]], end: int) -> List[int]:
 71 |         callstack = []
 72 |         first = True
 73 | 
 74 |         # Go from the end of the trace to the return point of the function we
 75 |         # care about, and build the callstack
 76 |         for branch in reversed(branches[end + 1:]):
 77 |             from_ip = branch['from_ip']
 78 |             to_ip = branch['to_ip']
 79 |             if from_ip is None:
 80 |                 continue
 81 |             insn = from_ip and self.angr_mgr.get_insn(from_ip)
 82 | 
 83 |             if arch.is_ret_insn(insn) and to_ip is not None:
 84 |                 if first:
 85 |                     callstack.append(to_ip)
 86 |                 callstack.append(from_ip)
 87 |             elif arch.is_call_insn(insn) and len(callstack) > 0:
 88 |                 callstack.pop()
 89 |             first = False
 90 | 
 91 |         callstack.reverse()
 92 |         return callstack
 93 | 
 94 |     def get_entry_callstack(self, branch: Dict[str, Any]) -> Optional[List[int]]:
 95 |         if 'callstack' not in branch:
 96 |             return None
 97 |         # Skip the caller and callee on top of the callstack.
 98 |         # TODO: for consistency it would be best to ensure the callstack is using
 99 |         # the call addresses instead of the return addresses, and then remove this
100 |         # manipulation.
101 |         prev_ips = [self.angr_mgr.prev_insn_addr(ip) for ip in branch['callstack'][2:]]
102 |         return [ip for ip in prev_ips if ip is not None]
103 | 
104 |     def report_one_fallback(self,
105 |                             branches: List[Dict[str, Union[int, Dict[str, int], None, List[int]]]],
106 |                             errcode: int,
107 |                             order: List[Tuple[int, int]],
108 |         ) -> bool:
109 |         '''Report a failure using the fallback method, which is to just print the
110 |         callstack of the function that outermost function that returned the error'''
111 |         start, end = order[-1]
112 |         ret = branches[end - 1].get('ret', None)
113 |         if not isinstance(ret, int) or not ErrorcodeInfo.is_error_code(ret, errcode):
114 |             return False
115 | 
116 |         callstack = (self.get_entry_callstack(branches[start]) or
117 |                      self.get_unsimulated_callstack(branches, end))
118 | 
119 |         assert isinstance(callstack, list)
120 |         caller_address = branches[start]['from_ip']
121 |         callee_address = branches[start]['to_ip']
122 |         assert isinstance(caller_address, int)
123 |         assert isinstance(callee_address, int)
124 |         callstack = [callee_address, caller_address] + callstack
125 |         res = {
126 |             'callstack': callstack,
127 |             'failure returning symbol index': 0
128 |         }
129 |         self.show_results(res)
130 |         return True
131 | 
132 |     def report_one(self,
133 |                    branches: List[Dict[str, Union[int, Dict[str, int], None, List[int]]]],
134 |                    errcode: int,
135 |                    sim_syms: Optional[Set[Symbol]] = None,
136 |                    simulate_all: bool = False,
137 |     ):
138 |         if self.errcode_filter and errcode != self.errcode_filter:
139 |             return
140 | 
141 |         if simulate_all:
142 |             order = [(0, len(branches))]
143 |         else:
144 |             order = self.get_analysis_order(branches, errcode)
145 | 
146 |         # TODO: get rid off. Instead, get_sym() or something should make this cleanup
147 |         if sim_syms is not None:
148 |             self.angr_mgr.remove_unsupported_pyvex_insn(sim_syms)
149 | 
150 |         avoid_repeated_syms = True
151 |         tried_syms = set()
152 |         success = False
153 |         sim_attempts = 0
154 |         for start, end in order:
155 |             sim_attempts += 1
156 |             ip = branches[start]['to_ip']
157 |             if ip is None:
158 |                 continue
159 |             sym = self.angr_mgr.get_sym(ip)
160 |             if sym is None:
161 |                 continue
162 |             if avoid_repeated_syms and sym in tried_syms:
163 |                 continue
164 |             if (self.angr_mgr.is_skipped_sym(ip) or
165 |                 self.angr_mgr.is_fastpath_to_ret(ip) or
166 |                 self.angr_mgr.is_fastpath_to_out(ip)):
167 |                 continue
168 |             pr_msg(f"trying {sym.name}()...", level="INFO")
169 |             tried_syms.add(sym)
170 | 
171 |             sim = AngrSim(
172 |                 angr_mgr = self.angr_mgr,
173 |                 branches = branches[start:end],
174 |                 errcode = errcode,
175 |                 has_calls = False,
176 |                 sim_syms = sim_syms,
177 |                 detailed_trace = self.detailed_trace
178 |             )
179 | 
180 |             try:
181 |                 res = sim.simulate()
182 |             except SystemError as e:
183 |                 pr_msg(f'retrying: {e}', level='WARN')
184 |                 continue
185 | 
186 |             if 'failure_stack' not in res:
187 |                 continue
188 | 
189 |             simulation_callstack = res['failure_stack']
190 |             assert isinstance(simulation_callstack, list)
191 |            
192 |             unsimulated_callstack = (self.get_entry_callstack(branches[start]) or
193 |                                     self.get_unsimulated_callstack(branches, end))
194 | 
195 |             assert isinstance(unsimulated_callstack, list)
196 |             callstack = simulation_callstack + unsimulated_callstack
197 | 
198 |             errorcode_return_depth = res['errorcode return depth']
199 |             assert isinstance(errorcode_return_depth, int)
200 | 
201 |             res['callstack'] = callstack
202 |             res['failure returning symbol index'] = max(len(callstack) - len(unsimulated_callstack) - errorcode_return_depth - 1, 0)
203 | 
204 |             self.show_results(res)
205 | 
206 |             if self.print_stats:
207 |                 res['depth'] = len(callstack)
208 |                 self.do_print_stats(errcode, sim_attempts, branches, res)
209 |             success = True
210 |             break
211 | 
212 |         # The very least look at the most external function return value
213 |         if not success:
214 |             success = self.report_one_fallback(branches, errcode, order)
215 | 
216 |         if not success:
217 |             pr_msg("analysis failed", level="ERROR")
218 | 
219 |     def change_to_relative_path(self, path: str) -> str:
220 |         if len(path) == 0 or path[0] != '/':
221 |             return path
222 |         match = re.search(r'linux-\d+\.\d+\.\d+/(.*)', path)
223 |         if match:
224 |             return match.group(1)
225 |         return path
226 | 
227 |     def get_callstack_locations(self, callstack: List[int]) -> List[Dict[str, Any]]:
228 |         addr2line = Addr2Line.get_instance()
229 | 
230 |         addr_to_base = {a: self.angr_mgr.base_addr(a) for a in callstack}
231 |         base_lines_dict = addr2line.run(addr_to_base.values())
232 | 
233 |         # change absolute paths to relative paths
234 |         for locs in base_lines_dict.values():
235 |             for loc in (locs or []):
236 |                 loc['file'] = self.change_to_relative_path(loc['file'])
237 | 
238 |         # map addresses to locations
239 |         locs = {a: base_lines_dict[addr_to_base[a]] for a in callstack}
240 | 
241 |         callstack_locations: List[Dict] = []
242 |         for addr in callstack:
243 |             try:
244 |                 sym = self.angr_mgr.get_sym(addr)
245 |             except ValueError:
246 |                 sym = None
247 | 
248 |             callstack_locations.append({
249 |                 'addr': addr,
250 |                 'sym': sym,
251 |                 'offset': sym and addr - sym.rebased_addr,
252 |                 'locs': locs.get(addr),
253 |             })
254 | 
255 |         return callstack_locations
256 | 
257 |     def analyze_source_callstack(self, res:Dict):
258 |         callstack = res['callstack']
259 |         failure_returning_symbol_index = res['failure returning symbol index']
260 |         callstack_locations = self.get_callstack_locations(callstack)
261 |         failure_returning_function_index = 0
262 |         callstack_function_depth = 1
263 | 
264 |         source_callstack:List[Dict] = []
265 | 
266 |         for i, callstack_location in enumerate(callstack_locations):
267 |             locs = callstack_location['locs']
268 | 
269 |             n_funcs = max(len(locs), 1)
270 |             if failure_returning_symbol_index is not None and failure_returning_symbol_index > i:
271 |                 failure_returning_function_index += n_funcs
272 | 
273 |             callstack_function_depth += n_funcs
274 | 
275 |             if locs is None:
276 |                 source_callstack.append(callstack_location)
277 |                 continue
278 | 
279 |             for loc in locs:
280 |                 entry = copy.copy(callstack_location)
281 |                 del entry['locs']
282 |                 entry.update({
283 |                     'file': loc['file'],
284 |                     'line': loc['line'],
285 |                     'col': loc.get('col'),
286 |                     'func': loc['func'],
287 |                 })
288 |                 source_callstack.append(entry)
289 |     
290 |         res.update({
291 |             'failure returning function index': failure_returning_function_index,
292 |             'callstack function depth': callstack_function_depth,
293 |             'source callstack': source_callstack
294 |         })
295 | 
296 |     def read_surrounding_code(self, res:Dict):
297 |         source_callstack = res['source callstack']
298 |         if len(source_callstack) == 0:
299 |             return
300 | 
301 |         to_extract_indexes = {0}
302 |         to_extract_indexes.add(res['failure returning function index'])
303 | 
304 |         for idx in to_extract_indexes:
305 |             e = source_callstack[idx]
306 |             if e.get('file') is None:
307 |                 continue
308 |             try:
309 |                 code = self.extract_surrounding_code(line=e['line'],
310 |                                                      col=e.get('col', 1),
311 |                                                      file_name=e['file'])
312 |                 e['code'] = code
313 |             except FileNotFoundError as e:
314 |                 pr_msg(str(e), level='WARN', new_line_before=True)
315 | 
316 |     def print_surrounding_code(self, res:Dict):
317 |         index_message = [(0, 'root-cause')]
318 | 
319 |         if res['failure returning symbol index'] != 0:
320 |             index_message.append((res['failure returning symbol index'], 'failure-returning'))
321 | 
322 |         for idx, msg in index_message:
323 |             callstack_entry = res['source callstack'][idx]
324 |             if callstack_entry.get('code'):
325 |                 pr_msg(f'code around {msg}, {callstack_entry["func"]}():', level='TITLE', new_line_before=True)
326 |                 pr_msg(callstack_entry['code'], level='DATA', new_line_after=True)
327 |                 break
328 | 
329 |     def show_results(self, res:Dict):
330 |         self.analyze_source_callstack(res)
331 |         self.read_surrounding_code(res)
332 |         self.print_callstack(res)
333 |         self.print_surrounding_code(res)
334 | 
335 |     def print_callstack(self, res:Dict):
336 |         failure_returning_function_index = res['failure returning function index']
337 |         pr_msg("callstack (decoding):", level="TITLE", new_line_before=True)
338 | 
339 |         for i, e in enumerate(res['source callstack']):
340 |             addr = e['addr']
341 |             sym = e['sym']
342 |             bin_loc = hex(addr) if sym is None else f'{sym.name}+{e["offset"]}'
343 | 
344 |             if 'file' not in e:
345 |                 fileline = '?:?'
346 |             else:
347 |                 col_str = f':{e["col"]}' if e['col'] is not None else '' 
348 |                 fileline = f'{e["file"]}:{e["line"]}{col_str}'
349 | 
350 |                 failure_pointer = ' <--' if failure_returning_function_index == i else ''
351 | 
352 |                 pr_msg("{0: <40}  {1: <40}  {2}() {3}".format(
353 |                     bin_loc, fileline, e['func'], failure_pointer), level='DATA')
354 | 
355 |     def get_analysis_order(self,
356 |                            branches: List[Dict],
357 |                            errcode: Optional[int]) -> List[Tuple[int,int]]:
358 |         tree:Dict[str, Union[List, int, bool]] = {'children': [], 'start': 0, 'end': len(branches), 'root': True}
359 |         n:Dict[str, Any]
360 |         cur = tree
361 |         i = len(branches) - 1
362 |         stack:List[Dict[str, Any]] = []
363 | 
364 |         # We are going to process the entries in reverse, since we know we have
365 |         # the end of the trace, but the beginning might be missing.
366 |         while i >= 0:
367 |             b = branches[i]
368 |             ip = b['from_ip']
369 |             insn = ip and self.angr_mgr.get_insn(ip)
370 |             if insn and arch.is_call_insn(insn) and len(stack) != 0:
371 |                 cur['start'] = i
372 |                 cur = stack.pop()
373 |             elif not insn or arch.is_ret_insn(insn):
374 |                 # As we do not know where the call is, mark it as the beginning
375 |                 # of the trace, for cases where we have a ret without a call.
376 |                 n = {'children': [], 'start': 0, 'end': i + 1}
377 | 
378 |                 assert isinstance(cur['children'], list)
379 |                 cur['children'].insert(0, n)
380 | 
381 |                 stack.append(cur)
382 |                 cur = n
383 | 
384 |             i -= 1
385 | 
386 |         # Scan from the rightmost leaf and add to results
387 |         stack = [tree]
388 |         results = list()
389 |         while True:
390 |             n = stack[-1]
391 |             if len(n['children']) != 0:
392 |                 stack.append(n['children'][-1])
393 |                 continue
394 | 
395 |             if 'root' in n:
396 |                 break
397 | 
398 |             parent = stack[-2]
399 |             parent['children'].pop()
400 |             results.append((n['start'], n['end']))
401 |             stack.pop()
402 |         
403 |         results = [r for r in results
404 |                     if (branches[r[1] - 1]['from_ip'] is not None and
405 |                     (errcode is None or 'ret' not in branches[r[1] - 1] or
406 |                     ErrorcodeInfo.is_error_code(branches[r[1] - 1]['ret'], errcode)))]
407 | 
408 |         return results
409 |     
410 |     def parse_trace_entry(self, line:str) -> Optional[Dict]:
411 |         """
412 |         Parse a single entry of the trace file.
413 |         """
414 |         m = Ftrace.entry_exit_regex.match(line)
415 |         if m is None:
416 |             return None
417 |         
418 |         raw = m.groupdict()
419 |         d:Dict[str, Any] = dict()
420 | 
421 |         d['time'] = float(raw['time'])
422 |         d['cpu'] = int(raw['cpu'])
423 |         d['pid'] = int(raw['pid'])
424 | 
425 |         if raw['syscall_enter_name'] is not None:
426 |             args = []
427 |             for arg in raw['syscall_args'].split(','):
428 |                 k, v = arg.split(':')
429 |                 args.append((k, int(v, 16)))
430 |             d['syscall_args'] = args
431 |             d['type'] = 'syscall_enter'
432 |             d['syscall'] = SyscallInfo.get_syscall_nr(raw['syscall_enter_name'])
433 | 
434 |         elif raw['syscall_exit_name'] is not None:
435 |             d['type'] = 'syscall_exit'
436 |             d['syscall'] = SyscallInfo.get_syscall_nr(raw['syscall_exit_name'])
437 |             d['syscall_ret'] = int(raw['err2'], 16)
438 |         
439 |         elif raw['syscall_exit_nr'] is not None:
440 |             d['type'] = 'syscall_exit'
441 |             d['syscall'] = int(d['syscall_exit_nr'])
442 |             d['syscall_ret'] = int(raw['err'], 16)
443 |         
444 |         return d
445 | 
446 |     def tokenize_c_code(code):
447 |         # Regular expression pattern to match common C tokens
448 |         pattern = r'\b[_a-zA-Z][_a-zA-Z0-9]*\b|[-+*/%=<>!&|^~]?=|[-+*/%<>!&|^~]|\d+\.\d+|\d+|".*?"|\'.*?\'|[(){}[\],.;]'
449 |         return [(match.start(), match.group()) for match in re.finditer(pattern, code)]
450 | 
451 |     @staticmethod
452 |     def get_tokens_around_column(code, column):
453 |         tokens = Reporter.tokenize_c_code(code)
454 |         before_token = ''
455 |         current_token = ''
456 |         after_token = ''
457 | 
458 |         for i, (start, token) in enumerate(tokens):
459 |             if start <= column < start + len(token):
460 |                 current_token = token
461 |                 before_token = code[:start]
462 |                 after_token = code[start + len(token):]
463 |                 break
464 | 
465 |         return before_token, current_token, after_token
466 | 
467 |     def extract_surrounding_code(self, line:int, col:int, file_name:str) -> Optional[str]:
468 |         if self.src_path is None:
469 |             return None
470 |         
471 |         assert isinstance(self.src_path, pathlib.Path)
472 |         file = self.src_path / file_name
473 |         try:
474 |             lines = file.read_text().splitlines()
475 |         except FileNotFoundError:
476 |             raise FileNotFoundError(f'Could not find file {file}')
477 | 
478 |         start_line = max(0, line - 20)
479 |         end_line = start_line + 40
480 |         code = lines[start_line:end_line]
481 |         line_offset = line - start_line - 1
482 |         if col == 0:
483 |             code[line_offset] = colors.color(f'{code[line_offset]}     <<<' , fg='red')
484 |         else:
485 |             before_token, failure_token, after_token = self.get_tokens_around_column(code[line_offset], col - 1)
486 |             code[line_offset] = (before_token + 
487 |                                  colors.color(f'{failure_token}', fg='red') +
488 |                                  after_token +
489 |                                  colors.color(f'    <<<' , fg='red'))
490 |         enumerated = enumerate(code, start_line)
491 |         return '\n'.join(f'{i+1:4} {l}' for i, l in enumerated)


--------------------------------------------------------------------------------
/x86arch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import capstone
  4 | from typing import Any, Dict, Tuple, List, Optional, Set, Iterable, Callable, Union
  5 | import angr
  6 | import claripy
  7 | import copy
  8 | import struct
  9 | 
 10 | from cle.backends import Symbol
 11 | from abc import ABC, abstractmethod
 12 | from abstractarch import Arch, ControlStatePluginArch
 13 | 
 14 | 
 15 | class ControlStatePluginX86(ControlStatePluginArch):
 16 |     def __init__(self):
 17 |         super().__init__()
 18 |         self.eflags_if = True
 19 | 
 20 |     def copy(self) -> 'ControlStatePluginX86':
 21 |         return copy.copy(self)
 22 | 
 23 | class ArchX86(Arch):
 24 |     X86_EFLAGS_CF = 0x0001
 25 |     X86_EFLAGS_PF = 0x0004
 26 |     X86_EFLAGS_AF = 0x0010
 27 |     X86_EFLAGS_ZF = 0x0040
 28 |     X86_EFLAGS_SF = 0x0080
 29 |     X86_EFLAGS_OF = 0x0800
 30 |     X86_EFLAGS_IF = 0x0200
 31 | 
 32 |     STACK_SIZE = 8
 33 |     STACK_END = 0xffffeb0000000000
 34 |     SYSCALL_INSN_LEN = 2
 35 | 
 36 |     @property
 37 |     def stack_end(self) -> int:
 38 |         return self.STACK_END
 39 | 
 40 |     @property
 41 |     def syscall_insn_len(self) -> int:
 42 |         return self.SYSCALL_INSN_LEN
 43 | 
 44 |     retpoline_thunk_regs = { 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi',
 45 |                         'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' }
 46 | 
 47 |     __irq_exit_sym_names = {'native_irq_return_iret', 'asm_exc_nmi', 'nmi_restore'}
 48 | 
 49 |     flags_cond_map = {
 50 |         # Checked flags, Invert
 51 |         capstone.x86.X86_INS_JAE: (X86_EFLAGS_CF, True),
 52 |         capstone.x86.X86_INS_JA: (X86_EFLAGS_CF|X86_EFLAGS_ZF, True),
 53 |         capstone.x86.X86_INS_JBE: (X86_EFLAGS_CF|X86_EFLAGS_ZF, False),
 54 |         capstone.x86.X86_INS_JB: (X86_EFLAGS_CF, False),
 55 |         capstone.x86.X86_INS_JE: (X86_EFLAGS_ZF, False),
 56 |         capstone.x86.X86_INS_JNE: (X86_EFLAGS_ZF, True),
 57 |         capstone.x86.X86_INS_JNO: (X86_EFLAGS_OF, True),
 58 |         capstone.x86.X86_INS_JNP: (X86_EFLAGS_PF, True),
 59 |         capstone.x86.X86_INS_JNS: (X86_EFLAGS_SF, True),
 60 |         capstone.x86.X86_INS_JO: (X86_EFLAGS_OF, False),
 61 |         capstone.x86.X86_INS_JP: (X86_EFLAGS_PF, False),
 62 |         capstone.x86.X86_INS_JS: (X86_EFLAGS_SF, False),
 63 |     }
 64 | 
 65 |     predicated_map = {
 66 |         capstone.x86.X86_INS_SETAE: capstone.x86.X86_INS_JAE,
 67 |         capstone.x86.X86_INS_SETA: capstone.x86.X86_INS_JA,
 68 |         capstone.x86.X86_INS_SETBE: capstone.x86.X86_INS_JBE,
 69 |         capstone.x86.X86_INS_SETB: capstone.x86.X86_INS_JB,
 70 |         capstone.x86.X86_INS_SETE: capstone.x86.X86_INS_JE,
 71 |         capstone.x86.X86_INS_SETGE: capstone.x86.X86_INS_JGE,
 72 |         capstone.x86.X86_INS_SETG: capstone.x86.X86_INS_JG,
 73 |         capstone.x86.X86_INS_SETLE: capstone.x86.X86_INS_JLE,
 74 |         capstone.x86.X86_INS_SETL: capstone.x86.X86_INS_JL,
 75 |         capstone.x86.X86_INS_SETNE: capstone.x86.X86_INS_JNE,
 76 |         capstone.x86.X86_INS_SETNO: capstone.x86.X86_INS_JNO,
 77 |         capstone.x86.X86_INS_SETNP: capstone.x86.X86_INS_JNP,
 78 |         capstone.x86.X86_INS_SETNS: capstone.x86.X86_INS_JNS,
 79 |         capstone.x86.X86_INS_SETO: capstone.x86.X86_INS_JNO,
 80 |         capstone.x86.X86_INS_SETP: capstone.x86.X86_INS_JNP,
 81 |         capstone.x86.X86_INS_SETS: capstone.x86.X86_INS_JNS,
 82 |         capstone.x86.X86_INS_CMOVAE: capstone.x86.X86_INS_JAE,
 83 |         capstone.x86.X86_INS_CMOVA: capstone.x86.X86_INS_JA,
 84 |         capstone.x86.X86_INS_CMOVBE: capstone.x86.X86_INS_JBE,
 85 |         capstone.x86.X86_INS_CMOVB: capstone.x86.X86_INS_JB,
 86 |         capstone.x86.X86_INS_CMOVE: capstone.x86.X86_INS_JE,
 87 |         capstone.x86.X86_INS_CMOVGE: capstone.x86.X86_INS_JGE,
 88 |         capstone.x86.X86_INS_CMOVG: capstone.x86.X86_INS_JG,
 89 |         capstone.x86.X86_INS_CMOVLE: capstone.x86.X86_INS_JLE,
 90 |         capstone.x86.X86_INS_CMOVL: capstone.x86.X86_INS_JL,
 91 |         capstone.x86.X86_INS_CMOVNE: capstone.x86.X86_INS_JNE,
 92 |         capstone.x86.X86_INS_CMOVNO: capstone.x86.X86_INS_JNO,
 93 |         capstone.x86.X86_INS_CMOVNP: capstone.x86.X86_INS_JNP,
 94 |         capstone.x86.X86_INS_CMOVNS: capstone.x86.X86_INS_JNS,
 95 |         capstone.x86.X86_INS_CMOVO: capstone.x86.X86_INS_JNO,
 96 |         capstone.x86.X86_INS_CMOVP: capstone.x86.X86_INS_JNP,
 97 |         capstone.x86.X86_INS_CMOVS: capstone.x86.X86_INS_JNS,
 98 |         capstone.x86.X86_INS_SBB: capstone.x86.X86_INS_JB,
 99 |     }
100 | 
101 |     cx_cond_map = {
102 |         capstone.x86.X86_INS_JCXZ: 0xffff,
103 |         capstone.x86.X86_INS_JECXZ: 0xffffffff,
104 |         capstone.x86.X86_INS_JRCXZ: 0xffffffffffffffff,
105 |     }
106 | 
107 |     cs_to_pyvex_reg_map = {
108 |         capstone.x86.X86_REG_AH: 'ah',
109 |         capstone.x86.X86_REG_RAX: 'rax',
110 |         capstone.x86.X86_REG_RDX: 'rdx',
111 |         capstone.x86.X86_REG_EFLAGS: 'eflags',
112 |         capstone.x86.X86_REG_AL : 'al',
113 |         capstone.x86.X86_REG_AX : 'ax',
114 |         capstone.x86.X86_REG_BH : 'bh',
115 |         capstone.x86.X86_REG_BL : 'bl',
116 |         capstone.x86.X86_REG_BP : 'bp',
117 |         capstone.x86.X86_REG_BPL : 'bpl',
118 |         capstone.x86.X86_REG_AX: 'ax',
119 |         capstone.x86.X86_REG_BX : 'bx',
120 |         capstone.x86.X86_REG_CH : 'ch',
121 |         capstone.x86.X86_REG_CL : 'cl',
122 |         capstone.x86.X86_REG_CS : 'cs',
123 |         capstone.x86.X86_REG_CX : 'cx',
124 |         capstone.x86.X86_REG_DH : 'dh',
125 |         capstone.x86.X86_REG_DI : 'di',
126 |         capstone.x86.X86_REG_DIL : 'dil',
127 |         capstone.x86.X86_REG_DL : 'dl',
128 |         capstone.x86.X86_REG_DS : 'ds',
129 |         capstone.x86.X86_REG_DX : 'dx',
130 |         capstone.x86.X86_REG_EAX : 'eax',
131 |         capstone.x86.X86_REG_EBP : 'ebp',
132 |         capstone.x86.X86_REG_EBX : 'ebx',
133 |         capstone.x86.X86_REG_ECX : 'ecx',
134 |         capstone.x86.X86_REG_EDI : 'edi',
135 |         capstone.x86.X86_REG_EDX : 'edx',
136 |         capstone.x86.X86_REG_EFLAGS : 'eflags',
137 |         capstone.x86.X86_REG_EIP : 'eip',
138 |         capstone.x86.X86_REG_EIZ : 'eiz',
139 |         capstone.x86.X86_REG_ES : 'es',
140 |         capstone.x86.X86_REG_ESI : 'esi',
141 |         capstone.x86.X86_REG_ESP : 'esp',
142 |         capstone.x86.X86_REG_FS : 'fs',
143 |         capstone.x86.X86_REG_GS : 'gs',
144 |         capstone.x86.X86_REG_IP : 'ip',
145 |         capstone.x86.X86_REG_RAX : 'rax',
146 |         capstone.x86.X86_REG_RBP : 'rbp',
147 |         capstone.x86.X86_REG_RBX : 'rbx',
148 |         capstone.x86.X86_REG_RCX : 'rcx',
149 |         capstone.x86.X86_REG_RDI : 'rdi',
150 |         capstone.x86.X86_REG_RDX : 'rdx',
151 |         capstone.x86.X86_REG_RIP : 'rip',
152 |         capstone.x86.X86_REG_RIZ : 'riz',
153 |         capstone.x86.X86_REG_RSI : 'rsi',
154 |         capstone.x86.X86_REG_RSP : 'rsp',
155 |         capstone.x86.X86_REG_SI : 'si',
156 |         capstone.x86.X86_REG_SIL : 'sil',
157 |         capstone.x86.X86_REG_SP : 'sp',
158 |         capstone.x86.X86_REG_SPL : 'spl',
159 |         capstone.x86.X86_REG_SS : 'ss',
160 |     }
161 | 
162 |     def cs_to_pyvex_reg(self, reg:int) -> str:
163 |         return self.cs_to_pyvex_reg_map[reg]
164 | 
165 |     @property
166 |     def pointer_size(self) -> int:
167 |         return 8
168 | 
169 |     @property
170 |     def arch_name(self) -> str:
171 |         return "amd64"
172 | 
173 |     @property
174 |     def default_text_base(self) -> int:
175 |         return 0xffffffff81000000
176 | 
177 |     @property
178 |     def syscall_entry_points(self) -> Set[str]:
179 |         #return {'entry_SYSCALL_64', 'entry_SYSCALL_64_after_hwframe'}
180 |         return {'do_syscall_64'}
181 |     
182 |     def controlStatePluginArch(self) -> ControlStatePluginX86:
183 |         return ControlStatePluginX86()
184 | 
185 |     # Returns two states following a cmov constraint. The first is the one that
186 |     # actually took place, and the second one is the one was not followed.
187 |     def predicated_mov_constraint(self, state:angr.SimState, cond_true:bool, insn:capstone.CsInsn) -> List[angr.SimState]:
188 | 
189 |         def ffs(x:int) -> int:
190 |             """Returns the index, counting from 0, of the
191 |             least significant set bit in `x`.
192 |             """
193 |             return (x&-x).bit_length()-1
194 | 
195 |         def flags_equal(flags, flag_a:int, flag_b:int) -> bool:
196 |             offset_a, offset_b = ffs(flag_a), ffs(flag_b)
197 |             return flags[offset_a] == flags[offset_b]
198 | 
199 |         # Creating a list of taken, not-taken
200 |         successors = list()
201 | 
202 |         flags = state.regs.eflags
203 |         id = self.predicated_map[insn.id]
204 |         simple_mask, simple_mask_clear, single_bit_cond = None, False, False
205 |         if id in self.flags_cond_map:
206 |             mask, invert = self.flags_cond_map[id]
207 |             constraint = (flags & mask) != 0
208 |             if invert:
209 |                 constraint = claripy.Not(constraint)#) if cond[1] else flags & cond[0]
210 |             single_bit_cond = (mask & (mask - 1)) == 0
211 |             simple_mask, simple_mask_clear = mask, invert
212 |         elif id == capstone.x86.X86_INS_JGE:
213 |             constraint = flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)
214 |         elif id == capstone.x86.X86_INS_JG:
215 |                 constraint = claripy.And((flags & self.X86_EFLAGS_ZF) == 0,
216 |                         flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF))
217 |         elif id == capstone.x86.X86_INS_JLE:
218 |             constraint = claripy.Or((flags & self.X86_EFLAGS_ZF) != 0,
219 |                     claripy.Not(flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)))
220 |         elif id == capstone.x86.X86_INS_JL:
221 |             constraint = flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)
222 |         else:
223 |             raise Exception("Unhandled condition")
224 | 
225 |         for sim_cond_true in [True, False]:
226 |             n = state.copy()
227 |             n.add_constraints(constraint if sim_cond_true else claripy.Not(constraint))
228 | 
229 |             # Try to set the flags to simplify execution if we can figure out the flags
230 |             if simple_mask is not None:
231 |                 # if they are not equal, the bit is cleared
232 |                 if sim_cond_true == simple_mask_clear:
233 |                     n.regs.flags = flags & ~simple_mask
234 |                 elif single_bit_cond:
235 |                     n.regs.flags = flags | simple_mask
236 | 
237 |             n.control.diverged = cond_true != sim_cond_true
238 |             n.control.expected_ip = state.solver.eval_one(state.addr)
239 | 
240 |             successors.append(n)
241 | 
242 |         return successors
243 | 
244 |     def is_cond_jmp_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool:
245 |         def flags_equal(flags:int, flag_a:int, flag_b:int) -> bool:
246 |             return ((flags & flag_a) != 0) == ((flags & flag_b) != 0)
247 | 
248 |         flags = state['flags']
249 |         id = self.predicated_map.get(insn.id, insn.id)
250 | 
251 |         if id in self.flags_cond_map:
252 |             cond = self.flags_cond_map[id]
253 |             r = flags & cond[0] == 0
254 |             return r if cond[1] else not r
255 |         if id in self.cx_cond_map:
256 |             # TODO: It just never happended and should be checked once
257 |             assert 0 == 1
258 |             return state['cx'] & self.cx_cond_map[id] != 0
259 |         if id == capstone.x86.X86_INS_JGE:
260 |             return flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)
261 |         if id == capstone.x86.X86_INS_JG:
262 |             return ((flags & self.X86_EFLAGS_ZF) == 0 and
263 |                     flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF))
264 |         if id == capstone.x86.X86_INS_JLE:
265 |             return ((flags & self.X86_EFLAGS_ZF) != 0 or
266 |                     not flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF))
267 |         if id == capstone.x86.X86_INS_JL:
268 |             return not flags_equal(flags, self.X86_EFLAGS_SF, self.X86_EFLAGS_OF)
269 | 
270 |         raise Exception('Unhandled condition')
271 | 
272 |     def rep_iterations(self, insn:capstone.CsInsn, state:Dict) -> int:
273 |         return state['cx'] & ((1 << (insn.operands[0].size * 8)) - 1)
274 | 
275 |     def is_rep_taken(self, insn:capstone.CsInsn, state:Dict) -> bool:
276 |         # We would assume only one rep prefix as proper code
277 |         rep_prefix = [prefix for prefix in insn.prefix if prefix in {
278 |             capstone.x86.X86_PREFIX_REPE,
279 |             capstone.x86.X86_PREFIX_REPNE,
280 |             capstone.x86.X86_PREFIX_REP,
281 |         }][0]
282 | 
283 |         if self.rep_iterations(insn, state) == 0:
284 |             return False
285 | 
286 |         if rep_prefix == capstone.x86.X86_PREFIX_REPNE:
287 |             return state['flags'] & self.X86_EFLAGS_ZF == 0
288 |         if rep_prefix == capstone.x86.X86_PREFIX_REPE:
289 |             return state['flags'] & self.X86_EFLAGS_ZF != 0
290 | 
291 |         assert(rep_prefix == capstone.x86.X86_PREFIX_REP)
292 |         return True
293 | 
294 |     @property
295 |     def ftrace_state_str(self) -> str:
296 |         return 'flags=%flags cx=%cx ax=%ax'
297 |     
298 |     def ftrace_state_dict(self, d:Dict[str, Any]) -> Dict[str, Any]:
299 |         return {
300 |             'flags': d['flags'],
301 |             'cx': d['cx'],
302 |         }
303 | 
304 |     def is_loop_taken(self, insn:capstone.CsInsn, state:Dict[str, Any]) -> bool:
305 |         flags, rcx = state['flags'], state['cx']
306 | 
307 |         if (rcx & (1 << (insn.operands[0].size * 8)) - 1) == 0:
308 |             return False
309 | 
310 |         if insn.id == capstone.x86.X86_INS_LOOPNE:
311 |             return flags & self.X86_EFLAGS_ZF == 0
312 |         if insn.id == capstone.x86.X86_INS_LOOPE:
313 |             return flags & self.X86_EFLAGS_ZF != 0
314 | 
315 |         assert(insn.id == capstone.x86.X86_INS_LOOP)
316 |         return True
317 | 
318 |     def is_predicated_mov(self, insn) -> bool:
319 |         # cannot just check the group, since SETxx does not have a group
320 |         return insn.id in self.predicated_map
321 | 
322 |     def is_rep_insn(self, insn) -> bool:
323 |         return (not {capstone.x86.X86_PREFIX_REP, capstone.x86.X86_PREFIX_REPE, 
324 |                  capstone.x86.X86_PREFIX_REPNE}.isdisjoint(insn.prefix))
325 | 
326 |     def is_fixed_rep_insn(self, insn) -> bool:
327 |         return (insn.mnemonic.startswith("rep m") or
328 |                insn.mnemonic.startswith("rep s"))
329 | 
330 |     def is_branch_insn(self, insn) -> bool:
331 |             return ((not {capstone.CS_GRP_CALL, capstone.CS_GRP_RET,
332 |                  capstone.CS_GRP_JUMP}.isdisjoint(insn.groups)) or
333 |                  self.is_rep_insn(insn) or self.is_loop_insn(insn))
334 | 
335 |     def is_jmp_insn(self, insn) -> bool:
336 |         return capstone.x86.X86_GRP_JUMP in insn.groups
337 | 
338 |     def is_indirect_jmp_insn(self, insn) -> bool:
339 |         return (self.is_jmp_insn(insn) and
340 |                 insn.id in {capstone.x86.X86_INS_LJMP,
341 |                             capstone.x86.X86_INS_JMP} and
342 |                 insn.operands[0].type != capstone.x86.X86_OP_IMM)
343 | 
344 |     def is_indirect_branch_target(self, insn) -> bool:
345 |         return insn.id in {capstone.x86.X86_INS_ENDBR32,
346 |                            capstone.x86.X86_INS_ENDBR64}
347 | 
348 |     def is_indirect_branch_insn(self, insn) -> bool:
349 |         return (self.is_indirect_jmp_insn(insn) or
350 |                 self.is_indirect_call_insn(insn))
351 | 
352 |     def __is_ret_insn(self, insn:capstone.CsInsn) -> bool:
353 |         return capstone.x86.X86_GRP_RET in insn.groups
354 | 
355 |     def is_ret_insn(self, insn:capstone.CsInsn) -> bool:
356 |         if self.__is_ret_insn(insn):
357 |             return True
358 | 
359 |         # Detect retthunks as effectively ret instructions
360 |         if self.is_direct_jmp_insn(insn):
361 |             target = self.get_direct_branch_target(insn)
362 |             return target == self.return_thunk_addr
363 | 
364 |         return False
365 | 
366 |     def is_call_insn(self, insn:capstone.CsInsn) -> bool:
367 |         return capstone.x86.X86_GRP_CALL in insn.groups
368 |     
369 |     def is_cond_jmp_insn(self, insn:capstone.CsInsn) -> bool:
370 |         return (capstone.x86.X86_GRP_JUMP in insn.groups and
371 |                 insn.id not in {capstone.x86.X86_INS_LJMP,
372 |                             capstone.x86.X86_INS_JMP})
373 | 
374 |     def is_loop_insn(self, insn:capstone.CsInsn) -> bool:
375 |         return insn.id in (capstone.x86.X86_INS_LOOP,
376 |                             capstone.x86.X86_INS_LOOPNE,
377 |                             capstone.x86.X86_INS_LOOPE)
378 | 
379 |     def is_cond_branch_insn(self, insn:capstone.CsInsn) -> bool:
380 |         return (self.is_cond_jmp_insn(insn) or self.is_rep_insn(insn) or
381 |                 self.is_loop_insn(insn))
382 | 
383 |     def is_direct_call_insn(self, insn:capstone.CsInsn) -> bool:
384 |         return (self.is_call_insn(insn) and
385 |                 insn.operands[0].type == capstone.x86.X86_OP_IMM)
386 |     
387 |     def is_direct_branch_insn(self, insn:capstone.CsInsn) -> bool:
388 |         return self.is_direct_jmp_insn(insn) or self.is_direct_call_insn(insn)
389 | 
390 |     def get_direct_branch_target(self, insn:capstone.CsInsn) -> int:
391 |         if self.is_rep_insn(insn):
392 |             return insn.address
393 |         return int(insn.op_str, 16)
394 | 
395 |     @staticmethod
396 |     def get_control_state_arch(state:angr.SimState) -> 'ControlStatePluginX86':
397 |         # To avoid circular import, we could have used lazy import
398 |         return state.control.arch # type: ignore
399 | 
400 |     @staticmethod
401 |     def sti_hook(state:angr.SimState):
402 |         archX86 = ArchX86.get_control_state_arch(state)
403 |         archX86.eflags_if = True
404 | 
405 |     @staticmethod
406 |     def cli_hook(state:angr.SimState):
407 |         archX86 = ArchX86.get_control_state_arch(state)
408 |         archX86.eflags_if = False
409 | 
410 |     @staticmethod
411 |     def __popf_hook(state:angr.SimState, reg:str):
412 |         archX86 = ArchX86.get_control_state_arch(state)
413 |         rsp = state.registers.load('rsp')
414 |         v = state.memory.load(rsp, size=8, endness='Iend_LE')
415 |         state.registers.store(reg, v)
416 |         archX86.eflags_if = (v & arch.X86_EFLAGS_IF) != 0
417 |         rsp += ArchX86.STACK_SIZE
418 |         state.registers.store('rsp', rsp)
419 | 
420 |     @staticmethod
421 |     def popf_hook(state:angr.SimState):
422 |         ArchX86.__popf_hook(state, "flags")
423 |     
424 |     @staticmethod
425 |     def popfd_hook(state:angr.SimState):
426 |         ArchX86.__popf_hook(state, "eflags")
427 |          
428 |     @staticmethod
429 |     def popfq_hook(state:angr.SimState):
430 |         ArchX86.__popf_hook(state, "rflags")
431 | 
432 |     @staticmethod
433 |     def __pushf_hook(state:angr.SimState, reg:str):
434 |         archX86 = ArchX86.get_control_state_arch(state)
435 |         rsp = state.registers.load('rsp')
436 |         rsp -= ArchX86.STACK_SIZE
437 |         v = state.registers.load(reg)
438 |         if archX86.eflags_if:
439 |             v |= arch.X86_EFLAGS_IF
440 | 
441 |         state.memory.store(rsp, v, size=8, endness='Iend_LE')
442 |         state.registers.store('rsp', rsp)
443 | 
444 |     @staticmethod
445 |     def pushf_hook(state:angr.SimState):
446 |         ArchX86.__pushf_hook(state, "flags")
447 |     
448 |     @staticmethod
449 |     def pushfd_hook(state:angr.SimState):
450 |         ArchX86.__pushf_hook(state, "eflags")
451 |          
452 |     @staticmethod
453 |     def pushfq_hook(state:angr.SimState):
454 |         ArchX86.__pushf_hook(state, "rflags")
455 | 
456 |     @staticmethod
457 |     def skip_mask_hook(state:angr.SimState):
458 |         #insn = angr_mgr.state_insn(state)
459 |         insn = state.control.angr_mgr.state_insn(state) # type: ignore
460 | 
461 |         for reg in insn.regs_write:
462 |             reg_name = arch.cs_to_pyvex_reg(reg)
463 |             val = state.registers.load(reg_name)
464 |             v = state.solver.Unconstrained("unconstrained_val", val.length)
465 |             # TODO: find width and create correct value
466 |             state.registers.store(reg_name, v)
467 | 
468 |     @property
469 |     def per_cpu_reg(self) -> str:
470 |         return 'gs'
471 | 
472 |     @property
473 |     def per_cpu_offset(self) -> int:
474 |         return 0x833e8000
475 |     
476 |     @property
477 |     def stack_reg(self) -> str:
478 |         return 'rsp'
479 |          
480 |     def pyvex_workaround(self, insn:capstone.CsInsn) -> Tuple[Union[Callable, None],  bool]:
481 |         # MOV x, SREG
482 |         if insn.bytes[0] == 0x8e:
483 |             return self.skip_mask_hook, True
484 | 
485 |         # RDPKRU
486 |         if insn.bytes[0:3] == b'\x0f\x01\xee':
487 |             return self.skip_mask_hook, True
488 |             
489 |         if insn.id in {capstone.x86.X86_INS_WRFSBASE,
490 |                     capstone.x86.X86_INS_WRGSBASE,
491 |                     capstone.x86.X86_INS_STAC,
492 |                     capstone.x86.X86_INS_CLAC,
493 |                     capstone.x86.X86_INS_INVLPG,
494 |                     capstone.x86.X86_INS_INVLPGA,
495 |                     capstone.x86.X86_INS_INVPCID,
496 |                     capstone.x86.X86_INS_INVEPT,
497 |                     capstone.x86.X86_INS_SGDT,
498 |                     capstone.x86.X86_INS_LGDT,
499 |                     capstone.x86.X86_INS_IDIV,
500 |                     capstone.x86.X86_INS_UD0,
501 |                     capstone.x86.X86_INS_UD2B,
502 |                     capstone.x86.X86_INS_SWAPGS,
503 |                     capstone.x86.X86_INS_WRMSR,
504 |                     capstone.x86.X86_INS_RDMSR,
505 |                     capstone.x86.X86_INS_VERW,
506 |                     }:
507 |             return self.skip_mask_hook, True
508 | 
509 |         hooks = {capstone.x86.X86_INS_STI: self.sti_hook,
510 |                  capstone.x86.X86_INS_CLI: self.cli_hook,
511 |                  capstone.x86.X86_INS_PUSHF: self.pushf_hook,
512 |                  capstone.x86.X86_INS_PUSHFD: self.pushfd_hook,
513 |                  capstone.x86.X86_INS_PUSHFQ: self.pushfq_hook,
514 |                  capstone.x86.X86_INS_POPF: self.popf_hook,
515 |                  capstone.x86.X86_INS_POPFD: self.popfd_hook,
516 |                  capstone.x86.X86_INS_POPFQ: self.popfq_hook,
517 |                  }
518 | 
519 |         if insn.id in hooks:
520 |             return hooks[insn.id], False
521 | 
522 |         return None, False
523 | 
524 |     def nop_insn(self, size:int) -> bytes:
525 |         return b'\x90' * size
526 | 
527 |     def init_capstone(self) -> capstone.Cs:
528 |         return capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
529 | 
530 |     @property
531 |     def ret_reg_name(self) -> str:
532 |         return 'rax'
533 | 
534 |     @property 
535 |     def stack_related_reg_names(self) -> List[str]:
536 |         return ['rsp', 'rbp']
537 | 
538 |     @property
539 |     def ip_reg_name(self) -> str:
540 |         return 'rip'
541 | 
542 |     def is_iret_insn(self, insn:capstone.CsInsn) -> bool:
543 |         return (insn.id == capstone.x86.X86_INS_IRET or
544 |                 insn.id == capstone.x86.X86_INS_IRETD or
545 |                 insn.id == capstone.x86.X86_INS_IRETQ)
546 |     
547 |     def is_sysexit_sysret_insn(self, insn:capstone.CsInsn) -> bool:
548 |         return (insn.id == capstone.x86.X86_INS_SYSEXIT or
549 |                 insn.id == capstone.x86.X86_INS_SYSRET)
550 |     
551 |     @property
552 |     def page_size(self) -> int:
553 |         return 4096
554 |     
555 |     def parse_interrupt_table(self, proj:angr.Project) -> Dict[int, int]:
556 |         idt_handlers = {}
557 | 
558 |         idt_table_symbol = proj.loader.find_symbol('idt_table')
559 |         # Assuming all entries are present
560 |         num_entries = 256
561 |         entry_size = 8 if proj.arch.bits == 32 else 16
562 | 
563 |         if idt_table_symbol is None:
564 |             raise ValueError("idt_table symbol not found")
565 | 
566 |         # Get the IDT base address
567 |         idt_size = num_entries * entry_size
568 |         assert isinstance(proj.loader.memory, angr.cle.Clemory)
569 |         idt_data = proj.loader.memory.load(idt_table_symbol.rebased_addr, idt_size)
570 | 
571 |         for i in range(num_entries):
572 |             entry_data = idt_data[i * entry_size : (i + 1) * entry_size]
573 | 
574 |             if proj.arch.bits == 32:
575 |                 # 32-bit IDT entry format: https://wiki.osdev.org/Interrupt_Descriptor_Table#Structure_IA-32
576 |                 offset_low, selector, _zero, access, offset_high = struct.unpack('<HHBHB', entry_data)
577 |                 handler_addr = (offset_high << 16) | offset_low
578 |             else:
579 |                 # 64-bit IDT entry format: https://wiki.osdev.org/Interrupt_Descriptor_Table#Structure_AMD64
580 |                 offset_low, selector, ist, access, offset_middle, offset_high = struct.unpack('<HHBBHI', entry_data[0:12])
581 |                 handler_addr = (offset_high << 32) | (offset_middle << 16) | offset_low
582 | 
583 |             # Check if the entry is present (access & 0x80)
584 |             if access & 0x80:
585 |                 idt_handlers[i] = handler_addr
586 | 
587 |         return idt_handlers
588 | 
589 |     def init_symbols(self, proj:angr.Project) -> None:
590 |         # get the symbol for __x86_return_thunk
591 |         try:
592 |             return_thunk_sym = proj.loader.find_symbol('__x86_return_thunk')
593 |         except KeyError:
594 |             return_thunk_sym = None
595 | 
596 |         self.return_thunk_addr = return_thunk_sym and return_thunk_sym.rebased_addr
597 | 
598 |     def is_exception_vector(self, vector:int) -> bool:
599 |         return vector < 32
600 |     
601 |     @property
602 |     def irq_exit_sym_names(self) -> Set[str]:
603 |         return self.__irq_exit_sym_names
604 |     
605 |     @property
606 |     def address_width(self) -> int:
607 |         return 64
608 | 
609 | arch = ArchX86()
610 | 


--------------------------------------------------------------------------------
/intelptreporter.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | from typing import Optional, Set, List, Dict, Tuple, Any, Union, List
  4 | import re
  5 | from concurrent.futures import ProcessPoolExecutor, as_completed
  6 | from collections import defaultdict
  7 | 
  8 | import syscall
  9 | import itertools
 10 | from angrmgr import Angr
 11 | from arch import arch
 12 | from cle.backends import Symbol
 13 | from ftrace import Ftrace
 14 | from prmsg import pr_msg, Pbar
 15 | from reporter import Reporter
 16 | from syscall import SyscallInfo, ErrorcodeInfo
 17 | 
 18 | class IntelPTReporter(Reporter):
 19 |     branch_regex = re.compile(
 20 |             Ftrace.common_trace_pattern +
 21 |             r'(?P<id>\d+)\s+' +
 22 |             r'(?P<event>[^\:]+):(?P<ctx>[ku]):\s+' +
 23 |             r'(?P<from_ip>[0-9a-f]+)\s+' + 
 24 |             r'(?P<from_sym>[^\+]+)' +
 25 |             r'(\+0x(?P<from_off>[0-9a-f]+))? ' +
 26 |             r'\(' +
 27 |             r'(\[(?P<from_obj>[^\]]+)\])?' +
 28 |             r'(?P<from_sec>[^\)]*)' +
 29 |             r'\)' +
 30 |             r' =>\s+' +
 31 |             r'(?P<to_ip>[0-9a-f]+)\s+' +
 32 |             r'(?P<to_sym>[^\+]+)' +
 33 |             r'(\+0x(?P<to_off>[0-9a-f]+))? ' +
 34 |             r'\(' +
 35 |             r'(\[(?P<to_obj>[^\]]+)\])?' +
 36 |             r'(?P<to_sec>[^\)]*)' +
 37 |             r'\)')
 38 | 
 39 |     @staticmethod
 40 |     def parse_entries_batch(strings, start_line):
 41 |         results = []
 42 |         bpf_perf_event_output_indices = []
 43 |         exit_event_indices = []
 44 |         for i, string in enumerate(strings):
 45 |             match = IntelPTReporter.branch_regex.match(string)
 46 |             if match:
 47 |                 d = match.groupdict()
 48 | 
 49 |                 # Ignore PID -1 events. For some reason perf might emit them, but we
 50 |                 # cannot associate them with a process, which makes their processing
 51 |                 # non-trivial.
 52 |                 if d['pid'] == '-1':
 53 |                     results.append(None)
 54 |                     continue
 55 | 
 56 |                 d['time'] = float(d['time'])
 57 |                 for field in ['to_sym', 'from_sym']:
 58 |                     if d[field] == '[unknown]':
 59 |                         d[field] = None
 60 |                 for field in ['to_obj', 'from_obj']:
 61 |                     if d[field] == 'unknown':
 62 |                         d[field] = None
 63 |                 for field in ['to_ip', 'from_ip', 'from_off', 'to_off']:
 64 |                     d[field] = d[field] and int(d[field], 16)
 65 |                 for field in ['pid', 'id', 'cpu']:
 66 |                     d[field] = int(d[field])
 67 |                 results.append(d)
 68 |                 
 69 |                 # Check the conditions for bpf_perf_event_output branches
 70 |                 if ((d['from_sym'] or '').startswith('bpf_prog_') and
 71 |                     d.get('to_sym') == 'bpf_perf_event_output_tp'):
 72 |                     bpf_perf_event_output_indices.append(start_line + i)
 73 |                 continue
 74 |             
 75 |             match = Ftrace.err_exit_regex.match(string)
 76 |             if False and match:
 77 |                 d = match.groupdict()
 78 |                 d['time'] = float(d['time'])
 79 |                 d['pid'] = int(d['pid'])
 80 |                 d['errcode'] = int(d['err'], 16) if d['err'] else int(d['err2'], 16)
 81 |                 d['syscall_exit_nr'] = (int(d['syscall_exit_nr']) if d['syscall_exit_nr']
 82 |                                         else SyscallInfo.get_syscall_nr(d['syscall_exit_name']))
 83 |                 for k in ['err', 'err2']:
 84 |                     del d[k]
 85 |                 results.append(d)
 86 |                 exit_event_indices.append(start_line + i)
 87 |                 continue
 88 | 
 89 |             match = Ftrace.entry_exit_regex.match(string)
 90 |             if match:
 91 |                 d = match.groupdict()
 92 |                 syscall_entry = (d['syscall_enter_name'] is not None or
 93 |                                  d['syscall_enter_nr'] is not None)
 94 |                 r = {
 95 |                     'time': float(d['time']),
 96 |                     'pid': int(d['pid']),
 97 |                     'proc': d['proc'],
 98 |                     'cpu': int(d['cpu']),
 99 |                     'type': 'syscall' if syscall_entry else 'syscall_exit',
100 |                 }
101 |                 if syscall_entry:
102 |                     if d['syscall_args1'] is not None:
103 |                         matches = re.findall(r'(\w+):\s*(0x[\da-fA-F]+)', d['syscall_args1'])
104 |                         args = {k: int(v, 16) for k, v in matches}
105 |                     else:
106 |                         argument_values = d['syscall_args2'].split(', ')
107 |                         args = {f'arg{i+1}': int(x, 16) for i, x in enumerate(argument_values)}
108 | 
109 |                     r.update({
110 |                         'syscall_nr': (int(d['syscall_enter_nr']) if d['syscall_enter_nr']
111 |                                         else SyscallInfo.get_syscall_nr(d['syscall_enter_name'])),
112 |                         'syscall_args': args
113 |                     })
114 |                 else:
115 |                     r.update({
116 |                         'errcode': int(d['err'], 16) if d['err'] else int(d['err2'], 16),
117 |                         'syscall_nr': (int(d['syscall_exit_nr']) if d['syscall_exit_nr']
118 |                                         else SyscallInfo.get_syscall_nr(d['syscall_exit_name']))
119 |                     })
120 |                 results.append(r)
121 |                 continue
122 | 
123 |             results.append(None)
124 |         return results, bpf_perf_event_output_indices, exit_event_indices
125 | 
126 |     @staticmethod    
127 |     def entries_chunk_list(input_list, chunk_size):
128 |         return [input_list[i:i + chunk_size] for i in range(0, len(input_list), chunk_size)]
129 | 
130 |     @staticmethod
131 |     def parse_entries_batch_wrapper(args):
132 |         return IntelPTReporter.parse_entries_batch(*args)
133 | 
134 |     def parse_trace(self, trace: List[str], errcode:Optional[int]=None) -> Tuple[List[Dict[str, Union[int, str, float]]], List[Dict]]:
135 |         batch_size = 1000  # Set this to an appropriate value based on your dataset and hardware capabilities
136 | 
137 |         input_batches = self.entries_chunk_list(trace, batch_size)
138 |         input_batches_with_start_line = [(batch, i * batch_size) for i, batch in enumerate(input_batches)]
139 | 
140 |         with ProcessPoolExecutor(max_workers=10) as executor:
141 |             with Pbar(message="process trace", items=input_batches_with_start_line) as pbar:
142 |                 batch_results = list(executor.map(self.parse_entries_batch_wrapper, pbar))
143 | 
144 |         # Flatten the list of results and bpf_perf_event_output_indices
145 |         results = [result for batch in batch_results for result in batch[0]]
146 |         bpf_perf_event_output_indices = [index for batch in batch_results for index in batch[1]]
147 |         exit_event_indices = [index for batch in batch_results for index in batch[2]]
148 | 
149 |         failures = []
150 |         for index in bpf_perf_event_output_indices:
151 |             pid = results[index]['pid']
152 |             match = None
153 |             for exit_index in exit_event_indices:
154 |                 if exit_index > index and results[exit_index].get('pid') == pid:
155 |                     match = exit_index
156 |                     break
157 | 
158 |             failure = {'index': index, 'pid': pid}
159 |             if match is None:
160 |                 # TODO: reenable
161 |                 if False and errcode is None:
162 |                     pr_msg('found a failure, but no data on the error code', level='ERROR')
163 |                     continue
164 |                 failure['errcode'] = errcode
165 |             else:
166 |                 failure['errcode'] = results[match]['errcode']
167 |                 failure['syscall'] = results[match]['syscall_exit_nr']
168 |             failures.append(failure)
169 | 
170 |         return results, failures
171 | 
172 |     def is_intr_entry(self, entry:Dict[str, Any]) -> bool:
173 |         return (entry.get('to_off') == 0 and
174 |                 entry.get('from_sym') != entry['to_sym'] and
175 |                 self.angr_mgr.is_interrupt_handler_addr(entry['to_ip']))
176 |     
177 |     def is_intr_exit(self, entry:Dict[str, Any]) -> bool:
178 |         if entry.get('from_sym') not in arch.irq_exit_sym_names:
179 |             return False
180 |         insn = self.angr_mgr.get_insn(entry['from_ip'])
181 |         return insn and arch.is_iret_insn(insn)
182 | 
183 |     def is_syscall_entry(self, entry:Dict[str, Any]) -> bool:
184 |         # TODO: move to arch-specific code
185 |         return entry.get('to_sym') in {'__entry_text_start', 'entry_SYSCALL_64', 'syscall_enter_from_user_mode'}
186 | 
187 |     def is_syscall_exit(self, entry:Dict[str, Any]) -> bool:
188 |         return (entry.get('to_sym') == 'syscall_exit_to_user_mode' and
189 |                 entry['to_off'] == 0)
190 | 
191 |     def report(self) -> bool:
192 |         n_reported = 0
193 |         n_traces = len(self.traces)
194 |         n_failures = len(self.failures)
195 | 
196 |         # TODO: coorelate the trace with the failure
197 |         for failure in self.failures:
198 |             for i_trace, trace in enumerate(self.traces):
199 |                 # Although we have a timestamp on the failure that we collected using eBPF,
200 |                 # it is using a different time source than perf, so we have no reasonable way
201 |                 # to correlate the two. Instead, we just look for the error code in the trace
202 |                 # and then look for the syscall entry/exit points around it.
203 |                 pr_msg(f"processing trace {i_trace+1}/{n_traces}", level='INFO')
204 |                 
205 |                 if not isinstance(trace, str):
206 |                     raise SystemError('Intel-PT trace is not a string')
207 | 
208 |                 trace_entries = trace.splitlines()
209 | 
210 |                 parsed, trace_failures = self.parse_trace(trace_entries)
211 | 
212 |                 #failures = self.get_errors(trace_entries)
213 | 
214 |                 if len(trace_failures) == 0:
215 |                     pr_msg('found no failures in trace', level='INFO')
216 |                     continue
217 | 
218 |                 for trace_failure in trace_failures:
219 |                     failure_entries = parsed[:trace_failure['index']]
220 |                     failure_errcode = failure['err']
221 |                     failure_syscall = failure['syscall_nr']
222 | 
223 |                     failure_entries = [e for e in failure_entries if e is not None and e['pid'] == failure['pid']]
224 | 
225 |                     # Remove any entries in which the from_sym or to_sym is None
226 |                     failure_entries = [e for e in failure_entries
227 |                                     if e.get('from_sym', '') is not None and e.get('to_sym', '') is not None]
228 | 
229 |                     # TODO: Fix the filters based on the failure entries
230 |                     if ((self.syscall_filter and self.syscall_filter != failure_syscall) or
231 |                         (self.errcode_filter and self.errcode_filter != failure_errcode)):
232 |                         continue
233 | 
234 |                     branches = self.skip_intr_entries(failure_entries)
235 | 
236 |                     # TODO: extract all syscalls, not just the last one
237 |                     extracted = self.extract_last_syscall(branches)
238 |                     if extracted is None:
239 |                         continue
240 | 
241 |                     branches = extracted
242 |                     branches = self.skip_fentry_entries(branches)
243 | 
244 |                     super().report_one(
245 |                         branches = branches,
246 |                         errcode = -failure_errcode,
247 |                         simulate_all = True
248 |                     )
249 |                     n_reported += 1
250 | 
251 |                     if n_reported == n_failures:
252 |                         return True
253 | 
254 |         return True
255 | 
256 |     def find_time(self, trace:List[str], time:float, before:bool) -> Optional[int]:
257 |         """Find the index of the first entry with the given time"""
258 |         # Bisect to find the time, but gracefully handle entries with no time
259 |         s = 0
260 |         e = len(trace)
261 |         found = None
262 |         while s < e:
263 |             mid = (s + e) // 2
264 |             # Only consider branch entries since their time is in sync with
265 |             # the time we look for. If we do not have such entry, go forward
266 |             # and then backward until we find one.
267 |             for i in itertools.chain(range(mid, len(trace)), range(mid, -1, -1)):
268 |                 m = self.branch_regex.match(trace[i])
269 |                 if m is not None:
270 |                     break
271 |             if m is None:
272 |                 return None
273 |             d = m.groupdict()
274 |             e_time = float(d['time'])
275 |             if e_time < time:
276 |                 if before:
277 |                     found = max(mid, found or mid)
278 |                 s = mid + 1
279 |             elif e_time == time:
280 |                 if before:
281 |                     e = mid - 1
282 |                     found = min(mid - 1, found or mid - 1)
283 |                 else:
284 |                     s = mid + 1
285 |                     found = max(mid + 1, found or mid + 1)
286 |             else: # e_time > time
287 |                 if not before:
288 |                     found = min(mid, found or mid)
289 |                 e = mid
290 | 
291 | #        assert found is not None
292 | #        for i in range(found, 0, -1):
293 | #            if Ftrace.err_exit_regex.match(trace[i]) is not None:
294 | #                return i
295 |         
296 |         return found
297 | 
298 |     @staticmethod
299 |     def search_in_chunk(args):
300 |         chunk, regex_pattern, start_line = args
301 |         matches = []
302 | 
303 |         for i, line in enumerate(chunk, start_line):
304 |             match = regex_pattern.match(line)
305 |             if match:
306 |                 matches.append(i)
307 | 
308 |         return matches
309 | 
310 |     @staticmethod
311 |     def chunk_lines(lines, chunk_size):
312 |         return [(lines[i:i + chunk_size], i) for i in range(0, len(lines), chunk_size)]
313 | 
314 |     @staticmethod
315 |     def search_regex_multiprocess(lines: List[str], compiled_regex, max_workers=10, chunk_size=100):
316 |         all_matches = []
317 | 
318 |         chunks = IntelPTReporter.chunk_lines(lines, chunk_size)
319 | 
320 |         with ProcessPoolExecutor(max_workers=max_workers) as executor:
321 |             futures = [executor.submit(IntelPTReporter.search_in_chunk, (chunk, compiled_regex, start_line)) for chunk, start_line in chunks]
322 | 
323 |             for future in as_completed(futures):
324 |                 result = future.result()
325 |                 if result:
326 |                     all_matches.extend(result)
327 | 
328 |         return all_matches
329 |     
330 |     def skip_fentry_entries(self, trace:List[Dict]) -> List[Dict]:
331 |         """Skip all fentry entries in the trace"""
332 |         result:List[Dict] = []
333 | 
334 |         def is_untracked_sym(sym:str) -> bool:
335 |             return sym in {'__fentry__', 'zen_untrain_ret', '__x86_return_thunk'} or sym.startswith('__x86_indirect_thunk')
336 |         
337 |         in_untracked = False
338 |         in_fentry = False
339 |         for entry in trace:
340 |             from_sym = entry.get('from_sym', '')
341 |             to_sym = entry.get('to_sym', '')
342 |             from_ip = entry.get('from_ip', 0)
343 |             to_ip = entry.get('to_ip', 0)
344 |             is_untracked_target = is_untracked_sym(to_sym)
345 | 
346 |             # Skip all fentry until return.
347 |             # TODO: consider handling nested
348 |             if not in_untracked:
349 |                 if in_fentry:
350 |                     try:
351 |                         insn = self.angr_mgr.get_insn(from_ip)
352 |                     except:
353 |                         continue
354 |                     if from_sym == '__fentry__' and arch.is_ret_insn(insn):
355 |                         in_fentry = False
356 |                     continue
357 |                 elif to_sym == '__fentry__':
358 |                     in_fentry = True
359 |                     continue
360 |             
361 |             is_untracked_target = (to_sym in {'__fentry__', 'zen_untrain_ret', '__x86_return_thunk'} or
362 |                                     to_sym.startswith('__x86_indirect_thunk'))
363 |  
364 |             if in_untracked:
365 |                 if not is_untracked_target:
366 |                     if len(result) > 0:
367 |                         for k in ['to_sym', 'to_off', 'to_sec', 'to_ip']:
368 |                             result[-1][k] = entry[k]
369 |                     in_untracked = False
370 |             else:
371 |                 # Add in both cases; we will fix the to_* fields later
372 |                 if is_untracked_target:
373 |                     in_untracked = True
374 |                     result.append(entry.copy())
375 |                 else:
376 |                     result.append(entry)
377 | 
378 |         return result
379 | 
380 |     def skip_intr_entries(self, trace:List[Dict]) -> List[Dict]:
381 |         result:List[Dict] = []
382 |         enumerated = [e for e in enumerate(trace)]
383 |         irq_entries = [e[0] for e in enumerated if self.is_intr_entry(e[1])]
384 |         irq_exits = [e[0] for e in enumerated if self.is_intr_exit(e[1])]
385 |         in_irq = 0
386 |         irq_entries_i = 0
387 |         irq_exits_i = 0
388 |         start_idx = 0
389 |         # Indexes to trace that reflects the last non-nested IRQ/exception entries/exits
390 |         trace_irq_entry_i = None
391 |         trace_irq_exit_i = None
392 | 
393 |         while irq_entries_i < len(irq_entries) or irq_exits_i < len(irq_exits):
394 |             if (irq_entries_i < len(irq_entries) and (irq_exits_i >= len(irq_exits) or
395 |                 irq_entries[irq_entries_i] < irq_exits[irq_exits_i])):
396 |                 # IRQ entry
397 |                 if in_irq == 0:
398 |                     trace_irq_entry_i = irq_entries[irq_entries_i]
399 |                     result.extend(trace[start_idx:trace_irq_entry_i])
400 |                     start_idx = trace_irq_entry_i + 1
401 |                 in_irq += 1
402 |                 irq_entries_i += 1
403 |             else:
404 |                 # IRQ exit
405 |                 if in_irq == 0:
406 |                     # We are not in an IRQ, but something went wrong. We will just clean the result and
407 |                     # hope for the best.
408 |                     pr_msg(f'IRQ exit without entry: {trace[irq_exits[irq_exits_i]]}', level = 'DEBUG')
409 |                     result = []
410 |                 elif in_irq > 0:
411 |                     in_irq -= 1
412 |                     if in_irq == 0:
413 |                         trace_irq_exit_i = irq_exits[irq_exits_i]
414 | 
415 |                         # Special handling for exception tables. If the return address
416 |                         # does not match the exception address, we are going to add the
417 |                         # entry and exit entries to the trace.
418 |                         if (trace_irq_entry_i is not None and
419 |                             trace[trace_irq_entry_i].get('from_ip') != trace[trace_irq_exit_i].get('to_ip')):
420 |                             for i in [trace_irq_entry_i, trace_irq_exit_i]:
421 |                                 e = trace[i].copy()
422 |                                 e['exception'] = True
423 |                                 result.append(e)
424 | 
425 |                         start_idx = trace_irq_exit_i + 1
426 |                          
427 |                 irq_exits_i += 1
428 | 
429 |         if not in_irq:
430 |             result.extend(trace[start_idx:])
431 | 
432 |         return result
433 | 
434 |     def extract_last_syscall(self, trace:List[Dict]) -> Optional[List[Dict]]:
435 |         enumerated = [e for e in enumerate(trace)]
436 |         exit_entry_idxs = [i for i, e in enumerated if self.is_syscall_exit(e)]
437 | 
438 |         # Find the entry before the last exit
439 |         if len(exit_entry_idxs) == 0:
440 |             return None
441 |         
442 |         #exit_entry_idx = exit_entry_idxs[-1]
443 |         exit_entry_idx = len(trace) - 1
444 |         entries = enumerated[:exit_entry_idx+1]
445 |       
446 |         enter_entry_idxs = [i for i, e in enumerated if self.is_syscall_entry(e)]
447 |         if len(enter_entry_idxs) == 0:
448 |             return None
449 |         enter_entry_idx = enter_entry_idxs[-1]
450 |        
451 |         # We still need to get rid of all unemulated code at the beginning of the trace.
452 |         # As a hueristic, which might only fit x86-64, we will look for a call from
453 |         # the entry point.
454 |         for i in range(enter_entry_idx, exit_entry_idx):
455 |             insn = self.angr_mgr.get_insn(entries[i][1]['from_ip'])
456 |             if insn is None or not arch.is_call_insn(insn):
457 |                 continue
458 |             if entries[i][1]['from_sym'] not in arch.syscall_entry_points:
459 |                 continue
460 |             break
461 |         
462 |         enter_entry_idx = i
463 |         if enter_entry_idx == exit_entry_idx:
464 |             return None
465 |         
466 |         # Cut the end of the trace to the return address of the first call.
467 |         # This is a heuristic that might not work for all architectures.
468 |         expected_ret_addr = self.angr_mgr.next_insn_addr(insn)
469 |         for i in range(exit_entry_idx, enter_entry_idx, -1):
470 |             if entries[i][1]['to_ip'] == expected_ret_addr:
471 |                 break
472 | 
473 |         exit_entry_idx = i
474 |         if enter_entry_idx == exit_entry_idx:
475 |             return None
476 | 
477 |         return trace[enter_entry_idx:exit_entry_idx+1]
478 | 
479 |     def get_errors(self, trace:List[str]) -> List[Dict]:
480 |         # The failures that were recorded had the wrong time source, so we need
481 |         # to find the time of the failure in the trace. However, the location of
482 |         # the failure in the trace, as indicated by the syscall entry/exit point
483 |         # if not in sync with the branch trace. So we find the time of the
484 |         # failure and would later find the branches in between. 
485 |         err_list = []
486 |         unmatched_exits = 0
487 |         matched_syscalls = []
488 |         enter_pid_dict = {}
489 | 
490 |         pr_msg("finding failures in trace...", level = "INFO")
491 | 
492 |         line_nums = self.search_regex_multiprocess(trace, Ftrace.complete_exit_regex)
493 | 
494 |         parsed = [(n, self.parse_trace_entry(trace[n])) for n in line_nums]
495 | 
496 |         for line_num, syscall_info in parsed:
497 |             assert syscall_info is not None
498 | 
499 |             syscall_type = syscall_info["type"]
500 |             pid = syscall_info["pid"]
501 | 
502 |             if syscall_type == "syscall_enter":
503 |                 enter_pid_dict[pid] = syscall_info
504 | 
505 |             elif syscall_type == "syscall_exit":
506 |                 if pid in enter_pid_dict:
507 |                     matched_syscalls.append((enter_pid_dict[pid], syscall_info))
508 |                     del enter_pid_dict[pid]
509 |                 else:
510 |                     #matched_syscalls.append((None, line_num))
511 |                     unmatched_exits += 1
512 | 
513 |         if unmatched_exits > 0:
514 |             pr_msg(f"encountered {unmatched_exits} with incomplete trace", level = "INFO")
515 |         
516 |         for (entry, exit) in matched_syscalls:
517 |             errcode = syscall.ret_to_err(exit['syscall_ret'])
518 |             if errcode is None:
519 |                 continue
520 | 
521 |             f = {'start_time': entry['time'],
522 |                 'end_time': exit['time'],
523 |                 'errcode': -errcode,
524 |                 'syscall_nr': exit['syscall'],
525 |                 'pid': exit['pid'],
526 |                 'args': entry['syscall_args']}
527 |             err_list.append(f)
528 | 
529 |         return err_list
530 | 
531 |     # TODO: Combine with kprobes function of remove_untracked_from_snapshot()
532 |     def remove_untracked_branches(self, branches: List[Dict]) -> List[Dict]:
533 |         # Various kernel code (e.g., context switch) performs complicated call/ret
534 |         # interactions. So, we track nesting level based on addresses and not call
535 |         # and rets.
536 |         tracked_branches = list()
537 |         nesting_level = 0
538 |         callee_address, callee_sym, ret_to_ip = None, None, None
539 |         for b in Pbar("clean trace", branches):
540 |             from_ip, to_ip = b['from_ip'], b['to_ip']
541 |             to_sym = to_ip and self.angr_mgr.get_sym(to_ip)
542 |             from_insn = self.angr_mgr.get_insn(from_ip)
543 | 
544 |             if nesting_level == 0:
545 |                 if not arch.is_call_insn(from_insn):
546 |                     tracked_branches.append(b)
547 |                     continue
548 |                 
549 |                 # TODO: Do we want to check if the entire symbol is hooked?
550 |                 if not to_sym or self.angr_mgr.is_ignored_sym(to_sym) or self.angr_mgr.proj.is_hooked(to_ip):
551 |                     callee_address = to_ip
552 |                     ret_to_ip = self.angr_mgr.next_insn_addr(from_ip)
553 |                     nesting_level = 1
554 |                     tracked_branches.append({'from_ip': from_ip, 'to_ip': None})
555 |                     tracked_branches.append({'from_ip': None, 'to_ip': ret_to_ip})
556 |                 else:
557 |                     tracked_branches.append(b)
558 |             elif to_ip == ret_to_ip and (from_insn is None or arch.is_ret_insn(from_insn)):
559 |                 nesting_level -= 1
560 |             elif ((not callee_address or from_ip == callee_address) 
561 |                   and (from_insn is not None and arch.is_call_insn(from_insn))):
562 |                 nesting_level += 1
563 | 
564 |         return tracked_branches
565 |     
566 |     @property
567 |     def detailed_trace(self) -> bool:
568 |         return False


--------------------------------------------------------------------------------
/kprobesrecorder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 VMware, Inc.
  2 | # SPDX-License-Identifier: BSD-2-Clause
  3 | import logging
  4 | from typing import Optional, Set, List, Dict, Tuple, Iterable, Any, Union
  5 | from collections import deque, defaultdict
  6 | 
  7 | import ptrace
  8 | from ptrace.debugger.process import PtraceProcess
  9 | from ptrace.syscall.ptrace_syscall import PtraceSyscall
 10 | 
 11 | from arch import arch
 12 | from cle.backends import Symbol
 13 | from capstone import CsInsn
 14 | 
 15 | from kcore import Kcore
 16 | from ftrace import Ftrace
 17 | from recorder import Recorder
 18 | from prmsg import pr_msg, Pbar, warn_once
 19 | 
 20 | class KProbesRecorder(Recorder):
 21 |     SKIP_TRACE_EVENTS: List[str] = [
 22 |            'irq/irq_handler_entry',
 23 |            'irq_vectors/call_function_entry',
 24 |            'irq_vectors/call_function_single_entry',
 25 |            'irq_vectors/error_apic_entry',
 26 |            'irq_vectors/local_timer_entry',
 27 |            'irq_vectors/reschedule_entry',
 28 |            'irq_vectors/spurious_apic_entry',
 29 |            'irq_vectors/thermal_apic_entry',
 30 |            'irq_vectors/threshold_apic_entry',
 31 |     ]
 32 | 
 33 |     RESUME_TRACE_EVENTS: List[str] = [
 34 |            'irq/irq_handler_exit',
 35 |            'irq_vectors/call_function_exit',
 36 |            'irq_vectors/call_function_single_exit',
 37 |            'irq_vectors/error_apic_exit',
 38 |            'irq_vectors/local_timer_exit',
 39 |            'irq_vectors/reschedule_exit',
 40 |            'irq_vectors/spurious_apic_exit',
 41 |            'irq_vectors/thermal_apic_exit',
 42 |            'irq_vectors/threshold_apic_exit',
 43 |     ]
 44 | 
 45 |     NORETURN_FUNCS = {
 46 |         '__stack_chk_fail',
 47 |         'fortify_panic',
 48 |     }
 49 | 
 50 |     def __init__(self, **kwargs):
 51 |         self.pending_signals = defaultdict(deque)
 52 |         self.kprobes = dict()
 53 | 
 54 |         kwargs.pop('tmp_path', None)
 55 |         kwargs['kcore'] = Kcore()
 56 |         super().__init__(**kwargs)
 57 |         self.ftrace = Ftrace.main_instance(self.angr_mgr)
 58 | 
 59 |     def set_probes(self, addrs:Iterable[int]) -> List[int]:
 60 |         probes = list()
 61 | 
 62 |         for addr in Pbar("setting probes", items=addrs, unit="kprobe"):
 63 |             if self.ftrace.is_kprobe_blacklisted(addr):
 64 |                 raise ValueError(f'kprobe on {hex(addr)} is blacklisted')
 65 |             probe = self.get_kprobe(addr = addr, extra = arch.ftrace_state_str)
 66 |             if probe is None:
 67 |                 logging.error(f'could not set probe on {hex(addr)}')
 68 |             else:
 69 |                 probes.append(probe)
 70 | 
 71 |         probes.sort(key=lambda x: x.addr)
 72 |         for probe in probes:
 73 |             probe.enable = True
 74 | 
 75 |         return probes
 76 | 
 77 |     def get_kprobe(self,
 78 |                    addr: int,
 79 |                    ret: bool = False,
 80 |                    extra: str = ''):
 81 |         key = (addr, ret)
 82 |         assert key is not None
 83 | 
 84 |         if not self.ftrace.is_valid_kprobe(addr):
 85 |             return None
 86 | 
 87 |         prefix = 'r' if ret else 'p'
 88 |         ename = f'{prefix}_{hex(addr)}'
 89 | 
 90 |         # We always use _stext as the target function, since there might be multiple
 91 |         # symbols with the same name.
 92 |         assert self.angr_mgr is not None
 93 |         target_sym = self.angr_mgr.get_sym('_stext')
 94 |         offset = addr - target_sym.rebased_addr
 95 |         assert offset >= 0
 96 | 
 97 |         kprobe = self.ftrace.KprobeEvent(
 98 |                      ftrace = self.ftrace,
 99 |                      probe_type = prefix,
100 |                      event_name = ename,
101 |                      module_name = '',
102 |                      target_function = target_sym,
103 |                      probe_offset = offset,
104 |                      extra = extra)
105 | 
106 | #        self.kprobes[key] = kprobe
107 |         return kprobe
108 | 
109 |     def set_ret_probes(self, syms:Set[Symbol]) -> List:
110 |         events = list()
111 |         for sym in Pbar("setting ret probes", items=syms, unit="symbol"):
112 |             e = self.get_kprobe(addr = sym.rebased_addr, ret=True, extra='ret=$retval')
113 |             if e is not None:
114 |                 events.append(e)
115 | 
116 |         for e in events:
117 |             e.enable = True
118 |         return events
119 | 
120 |     def record(self, args:List[str]):
121 |         """
122 |         Record function to trace kernel failures using kprobes
123 | 
124 |         :param args: command line arguments
125 |         """
126 |         assert self.angr_mgr is not None
127 | 
128 |         ftrace = Ftrace.main_instance(self.angr_mgr)
129 |         ftrace.tracing_on = False
130 | 
131 |         stext_addr = self.angr_mgr.get_sym_addr("_stext")
132 |         assert stext_addr is not None
133 |         ftrace.kprobe_event_disable_all()
134 | 
135 |         ftrace.init_kprobe_base("_stext", self.angr_mgr.get_sym_addr)
136 |         pr_msg("starting the process...", level='TITLE', new_line_before=True)
137 | 
138 |         try:
139 |             self.init_process(args)
140 |         except (FileNotFoundError, PermissionError) as e:
141 |             pr_msg(f"error starting process: {e}", level="FATAL")
142 |             return 0
143 | 
144 |         ftrace.buffer_size_kb = self.snapshot_size
145 |         ftrace.irq_info = False
146 |         ftrace.event_fork = False
147 |         ftrace.function_fork = False
148 |         sys_exit_event = self.set_sysexit_filter(ftrace, True)
149 |         ftrace.stacktrace = False
150 |         ftrace.func_stack_trace = True
151 | 
152 |         trace_events = [
153 |             ftrace.get_event(ev)
154 |             for ev in ['raw_syscalls/sys_enter'] + self.SKIP_TRACE_EVENTS + self.RESUME_TRACE_EVENTS
155 |         ] + [sys_exit_event] 
156 |        
157 |         while True:
158 |             # Cleanup if we did not finish nicely the last error
159 |             ftrace.remove_all_probes()
160 |             ftrace.current_tracer = 'nop'
161 |             ftrace.func_filter = []
162 |             ftrace.sym_addr = True           
163 |             for ev in trace_events:
164 |                 ev.enable = False
165 | 
166 |             pr_msg("waiting for failure...", level='TITLE', new_line_before=True)
167 |             syscall = self.wait_for_syscall(None)
168 |             if syscall is None:
169 |                 pr_msg("execution ended", level="INFO")
170 |                 break
171 | 
172 |             process = syscall.process
173 |             ftrace.pid = process.pid
174 |             ftrace.event_pid = process.pid
175 | 
176 |             self.print_syscall_info(syscall)
177 | 
178 |             pr_msg('stage 1: producing call graph', level='TITLE', new_line_before=True)
179 |             ftrace.current_tracer = 'function'
180 | 
181 |             for ev in trace_events:
182 |                 ev.enable = True
183 | 
184 |             try:
185 |                 snapshot = self.rerun_get_snapshot(process, syscall)
186 |             except Exception as e:
187 |                 pr_msg(f'error: {e}', level="ERROR")
188 |                 continue
189 | 
190 |             snapshot = self.cleanup_callstack(snapshot)
191 |             snapshot = self.remove_snapshot_irqs(snapshot)
192 |             trace_syms = self.get_ftrace_snapshot_syms(snapshot)
193 | 
194 |             ftrace.tracing_on = False
195 | 
196 |             pr_msg(f'stage 2: obtaining return values ({len(trace_syms)} functions)',
197 |                    level='TITLE', new_line_before=True)
198 | 
199 |             ret_probes = self.set_ret_probes(trace_syms)
200 |             trace_syms.intersection_update([self.angr_mgr.get_sym(probe.addr) for probe in ret_probes])
201 | 
202 |             if not self.set_func_tracing(trace_syms):
203 |                 exit(1)
204 |             
205 |             try: 
206 |                 snapshot = self.rerun_get_snapshot(process, syscall)
207 |             except Exception as e:
208 |                 pr_msg(f'error: {e}', level="ERROR")
209 |                 continue
210 |             
211 |             ftrace.remove_all_probes()
212 |             snapshot = self.cleanup_callstack(snapshot)
213 |             snapshot = self.remove_snapshot_irqs(snapshot)
214 |             snapshot = self.remove_untracked_from_snapshot(snapshot)
215 |             trace_syms = self.get_ftrace_snapshot_syms(snapshot)
216 | 
217 |             pr_msg("stage 3: creating trace", level='TITLE', new_line_before=True)
218 | 
219 |             reachable_syms = self.angr_mgr.process_reachable_syms(trace_syms)
220 |             probe_addrs, probe_syms = self.tracking_probe_addrs(reachable_syms)
221 |             self.set_ret_probes(probe_syms)
222 | 
223 |             if not self.set_func_tracing(probe_syms):
224 |                 exit()
225 |             self.set_probes(probe_addrs)
226 |             snapshot = self.rerun_get_snapshot(process, syscall)
227 |             ftrace.remove_all_probes()
228 |            
229 |             snapshot = self.cleanup_callstack(snapshot)
230 |             snapshot = self.remove_snapshot_irqs(snapshot)
231 |             # TODO: Save the reachable syms
232 |             snapshot = self.remove_untracked_from_snapshot(snapshot, probe_syms)
233 | 
234 |             # Save regardless to live analysis
235 |             self.log_kprobes_failure(syscall=syscall,
236 |                                      trace=snapshot,
237 |                                      pid=process.pid,
238 |                                      probe_addrs=probe_addrs,
239 |                                      sim_syms=reachable_syms)
240 |             
241 |             if self.early_stop:
242 |                 for p in self.dbg.list:
243 |                     p.kill()
244 |                 break
245 |                 
246 |         self.save_failures("kprobes")
247 | 
248 |         # turn everything off again
249 |         for ev in trace_events:
250 |             ev.enable = False
251 | 
252 |         ftrace.current_tracer = 'nop'
253 |         ftrace.tracing_on = False
254 |         ftrace.func_stack_trace = False
255 |         ftrace.pid = []
256 |         ftrace.event_pid = []
257 |         sys_exit_event.trigger = None
258 | 
259 |     def get_ftrace_snapshot_syms(self, snapshot:List[Dict[str,Any]]) -> Set[Symbol]:
260 |         assert self.angr_mgr is not None
261 | 
262 |         syms = {entry['callstack_syms'][0] for entry in snapshot
263 |                       if entry['type'] == 'func' and 'callstack_syms' in entry}
264 | 
265 |         syms = {sym for sym in syms if sym and not self.is_invalid_func_probe(sym) and 
266 |                                     not self.angr_mgr.is_noprobe_sym(sym)}
267 | 
268 |         # Ensure we can disasm each symbol
269 |         syms = {sym for sym in syms if self.angr_mgr.disasm_sym(sym)}
270 | 
271 |         return syms
272 |             
273 | 
274 |     def remove_untracked_from_snapshot(self, snapshot:List[Dict], syms:Optional[Set[Symbol]]=None) -> List[Dict]:
275 |         assert self.angr_mgr is not None
276 | 
277 |         entry_syms = {self.angr_mgr.get_sym(s) for s in arch.syscall_entry_points}
278 |         found_entry_point = False
279 |         cleaned = list()
280 |         untracked = 0
281 |         ignored_caller_syms = {self.angr_mgr.get_sym(s) for s in arch.syscall_entry_points}
282 |         
283 |         for l in Pbar("cleaning ftrace", items=snapshot, unit="line"):
284 |             to_sym = l['callstack_syms'][0] if len(l.get('callstack_syms', [])) > 0 else None
285 |             from_sym = l['callstack_syms'][1] if len(l.get('callstack_syms', [])) > 1 else None
286 | 
287 |             if not found_entry_point:
288 |                 if l['type'] != 'func' or from_sym not in entry_syms:
289 |                     continue
290 |                 found_entry_point = True
291 | 
292 |             if l['type'] == 'func':
293 |                 if untracked > 0:
294 |                     untracked += 1
295 |                     continue
296 | 
297 |                 for callstack_sym in l['callstack_syms']:
298 |                     if callstack_sym and callstack_sym.name in arch.syscall_entry_points:
299 |                         break
300 | 
301 |                     if (callstack_sym is None or
302 |                         self.angr_mgr.is_noprobe_sym(callstack_sym) or
303 |                         (syms is not None and callstack_sym not in syms|entry_syms|ignored_caller_syms)):
304 |                         untracked = 1
305 |                         break
306 | 
307 |                 if untracked > 0:
308 |                     continue
309 | 
310 |                 # Ignore interrupts, exceptions
311 |                 prev_insn = self.angr_mgr.get_prev_insn(l['from_ip'])
312 |                 if prev_insn is None or not arch.is_branch_insn(prev_insn):
313 |                     pr_msg(f'failed insn {prev_insn} to {hex(l["to_ip"])}', level="ERROR")
314 |                     assert(0 == 1)
315 |                     continue
316 | 
317 |                 if to_sym is None or (syms is not None and self.is_invalid_func_probe(to_sym)):
318 |                     untracked = 1
319 |                     continue
320 |                 
321 |             elif l['type'] == 'ret':
322 |                 if untracked > 0:
323 |                     untracked -= 1
324 |                     continue
325 | 
326 |             if untracked == 0:
327 |                 cleaned.append(l)
328 | 
329 |         return cleaned
330 | 
331 |     def log_kprobes_failure(self,
332 |                             syscall: PtraceSyscall,
333 |                             trace: List[Dict[str, Union[int, str, float, List]]],
334 |                             pid:int,
335 |                             probe_addrs:Iterable[int],
336 |                             sim_syms:Iterable[Symbol]):
337 |         failure = {
338 |             'syscall': syscall.syscall,
339 |             'errcode': -syscall.result,
340 |             'trace_id': len(self.traces),
341 |             'pid': pid,
342 |             'probe_addrs': probe_addrs,
343 |             'sim_syms': [s.rebased_addr for s in sim_syms],
344 |         }
345 |         for trace_entry in trace:
346 |             trace_entry.pop('callstack_syms', None)
347 | 
348 |             # TODO: delete some more useless stuff
349 | 
350 |         self.traces.append(trace) # type: ignore
351 |         self.failures.append(failure)
352 | 
353 |     def rerun_get_snapshot(self, process:PtraceProcess, failing_syscall:PtraceSyscall) -> List[Dict[str, Any]]:
354 |         ftrace = Ftrace.main_instance()
355 |         ftrace.clear_snapshot()
356 |         ftrace.tracing_on = True
357 |         self.restart_syscall(process, failing_syscall)
358 |         syscall = self.wait_for_syscall(process)
359 |         ftrace.tracing_on = False
360 | 
361 |         if syscall is None or syscall.result != failing_syscall.result:
362 |             raise ValueError("reproduction error")
363 |         
364 |         assert syscall.process == process
365 |         assert syscall.instr_pointer == failing_syscall.instr_pointer
366 | 
367 |         s = ftrace.get_snapshot(self.SKIP_TRACE_EVENTS, self.RESUME_TRACE_EVENTS)
368 |         return s
369 |     
370 |     def cleanup_callstack(self, trace:List[Dict[str, Any]]) -> List[Dict[str, Any]]:
371 |         addr_to_sym:Dict[int, Symbol] = dict()
372 | 
373 |         def get_sym(addr):
374 |             if addr in addr_to_sym:
375 |                 return addr_to_sym[addr]
376 |             
377 |             try:
378 |                 sym = self.angr_mgr.get_sym(addr)
379 |             except ValueError:
380 |                 sym = None
381 |             addr_to_sym[addr] = sym
382 |             return sym
383 | 
384 |         last_callstack:List[int] = list()
385 |         last_callstack_syms:List[Symbol] = list()
386 | 
387 |         for l in Pbar("finding symbols", items=trace):
388 |             callstack_syms = []
389 |             if 'to_ip' not in l or l.get('type') != 'func':
390 |                 continue
391 |             to_sym = get_sym(l['to_ip'])
392 |             from_sym = get_sym(l['from_ip'])
393 | 
394 |             if to_sym is not None and from_sym is not None:
395 |                 callstack = [l['to_ip'], l['from_ip']]
396 |                 callstack_syms = [to_sym, from_sym]
397 | 
398 |             # TODO: Consider whether we actually save the callstack on return
399 |             if 'callstack' not in l or l['callstack'] is None:
400 |                 continue
401 | 
402 |             # The callstack is really dirty: There is some junk of ftrace on top
403 |             # of to_sym in callstack_sym. Get rid of it.  Then the from entry is
404 |             # not always there, so we need to check whether to skip it.
405 |             skip:Optional[str] = "to"
406 |             for callstack_entry in l['callstack']:
407 |                 sym = callstack_entry and get_sym(callstack_entry)
408 | 
409 |                 if skip == "to":
410 |                     if sym == to_sym:
411 |                         skip = "from"
412 |                     continue
413 |                 if skip == "from":
414 |                     skip = None
415 |                     if sym == from_sym:
416 |                       continue
417 | 
418 |                 callstack.append(callstack_entry)
419 |                 callstack_syms.append(sym)
420 | 
421 |             # Guess the symbols and the addresses we did not figure out from the last stack
422 |             for i, v in enumerate(reversed(callstack)):
423 |                 if v is None and i < len(last_callstack):
424 |                     callstack[-i-1] = last_callstack[-i-1]
425 |                     callstack_syms[-i-1] = last_callstack_syms[-i-1]
426 | 
427 |             last_callstack_syms = callstack_syms
428 |             last_callstack = callstack
429 | 
430 |             l['callstack_syms'] = callstack_syms
431 |             l['callstack'] = callstack
432 |         return trace
433 | 
434 |     def wait_for_syscall(self, process:Optional[PtraceProcess]) -> Optional[PtraceSyscall]:
435 |         while len(self.dbg.list) != 0:
436 |             process_filter = [process] if process is not None else self.dbg.list
437 |             stopped = filter(lambda p: p.is_stopped, process_filter)
438 |             for p in stopped:
439 |                 signum = 0
440 |                 if len(self.pending_signals[p.pid]) != 0:
441 |                     signum = self.pending_signals[p.pid].popleft()
442 |                 try:
443 |                     p.syscall(signum)
444 |                 except (ptrace.debugger.ProcessExit, ptrace.PtraceError) as exc:
445 |                     pr_msg(f"error waiting for syscall failure {exc}", level="WARN")
446 | 
447 |             signum = 0
448 |             is_syscall = False
449 | 
450 |             trapped_process: PtraceProcess
451 | 
452 |             try:
453 |                 e = self.dbg.waitSyscall()
454 |                 is_syscall = True
455 |                 trapped_process = e.process
456 |             except ptrace.debugger.ProcessExit as e:
457 |                 e.process.processExited(e)
458 |                 trapped_process = e.process
459 |             except ptrace.debugger.ProcessSignal as e:
460 |                 self.pending_signals[e.process.pid].append(e.signum)
461 |                 trapped_process = e.process
462 |             except ptrace.debugger.NewProcessEvent as e:
463 |                 e.process.parent.is_stopped = True
464 |                 trapped_process = e.process
465 |             except ptrace.debugger.ProcessExecution as e:
466 |                 # It should have been marked as stopped, but it is not
467 |                 e.process.is_stopped = True
468 |                 trapped_process = e.process
469 | 
470 |             if not is_syscall:
471 |                 continue
472 | 
473 |             if process_filter and trapped_process not in process_filter:
474 |                 # TODO: queue the process to be resumed or analyzed later, since
475 |                 # otherwise we might miss failures
476 |                 continue
477 |             
478 |             try:
479 |                 syscall = trapped_process.syscall_state.event(ptrace.func_call.FunctionCallOptions())
480 |             except (ptrace.debugger.ProcessExit, ptrace.PtraceError) as exc:
481 |                 pr_msg(f'error getting syscall info: {exc}', level='WARN')
482 |                 continue
483 | 
484 |             # For syscall entry, the result is None
485 |             if syscall.result is None:
486 |                 continue
487 | 
488 |             # On reproduction, process is not None and we do not care about the
489 |             # result and the syscall. (There might be some strange scenario that
490 |             # we do if some signal is involved, but ignore it.)
491 |             if process is None:
492 |                 if self.syscall_filter is not None and self.syscall_filter != syscall.syscall:
493 |                     continue
494 | 
495 |                 if (syscall.result >= 0 or
496 |                     (self.errcode_filter and self.errcode_filter != -syscall.result)):
497 |                     continue
498 | 
499 |                 self.occurrences += 1
500 |                 if self.occurrences_filter is not None and self.occurrences not in self.occurrences_filter:
501 |                     continue
502 | 
503 |             return syscall
504 | 
505 |         return None
506 | 
507 |     def remove_snapshot_irqs(self, snapshot:List[Dict]) -> List[Dict]:
508 |         """
509 |         Removes all IRQ-related events from a given snapshot, including all
510 |         events between an irqenter event and its corresponding irqexit event.
511 |         
512 |         :param snapshot: A list of dictionaries representing events in the snapshot.
513 |         :return: The input snapshot with all IRQ-related events removed.
514 |         """
515 |         irq_depth = 0
516 |         filtered_snapshot = []
517 |         for event in Pbar("remove irqs", snapshot):
518 |             if event['type'] == 'irqenter':
519 |                 irq_depth += 1
520 |             elif event['type'] == 'irqexit':
521 |                 irq_depth -= 1
522 |             elif irq_depth == 0:
523 |                 filtered_snapshot.append(event)
524 |         return filtered_snapshot
525 | 
526 |     def analyze_probe_insns(self, sym:Symbol) -> Set[CsInsn]:
527 |         assert self.angr_mgr is not None
528 | 
529 |         def collect(sym: Symbol, insn:CsInsn, **kwargs):
530 |             assert self.angr_mgr is not None
531 | 
532 |             # Do not put probes on the first instruction of a function, as we
533 |             # have already set a probe on the function.
534 |             insns = kwargs['insns']
535 |             if (arch.is_predicated_mov(insn) or arch.is_cond_branch_insn(insn) or
536 |                 arch.is_rep_insn(insn)):
537 |                 insns.add(insn)
538 | 
539 |             if arch.is_rep_insn(insn):
540 |                 # For rep-prefix, we need to trace the counter on the following
541 |                 # instruction to figure out how many iterations were executed.
542 |                 insns.add(self.angr_mgr.next_insn(insn))
543 |             if arch.is_indirect_branch_target(insn):
544 |                 # We cannot put a probe point on the ENDBRxx instructions. Instead
545 |                 # put on the next one. Anyhow, we do not care about the first instruction
546 |                 # in a symbol.
547 |                 if (insn.address != self.angr_mgr.get_sym_addr(sym) or
548 |                     not Ftrace.is_available_filter_function(sym)):
549 |                     insns.add(self.angr_mgr.next_insn(insn))
550 |             elif arch.is_direct_call_insn(insn):
551 |                 # On calls to functions that cannot be probed, keep the return
552 |                 # value. We will create an artifical fork based on the return
553 |                 # value if the return value is the error code.
554 |                 tgt = arch.get_direct_branch_target(insn)
555 |                 try:
556 |                     tgt_sym = self.angr_mgr.get_sym(tgt)
557 |                 except:
558 |                     tgt_sym = None
559 | 
560 |                 if (tgt_sym is None or
561 |                     (tgt_sym.name not in self.NORETURN_FUNCS and
562 |                     not Ftrace.is_available_filter_function(tgt_sym))):
563 |                     try:
564 |                         insns.add(self.angr_mgr.next_insn(insn))
565 |                     except:
566 |                         pass
567 |             elif arch.is_indirect_call_insn(insn):
568 |                 # We might not have the callee as instrumentable. We would add the next
569 |                 # instruction to the probe list. It would have been better to figure out
570 |                 # from the trace whether we can actually trace without this probe point.
571 |                 insns.add(self.angr_mgr.next_insn(insn))
572 |         
573 |         insns:Set[CsInsn] = set()
574 |         self.angr_mgr.for_each_insn_in_sym(sym, collect, insns=insns)
575 |         return insns
576 | 
577 |     # Returns addresses of probes, set of symbols to trace entry, set of symbols
578 |     # to simulate.
579 |     def tracking_probe_addrs(self, syms:Set[Symbol]) -> Tuple[Set[int], Set[Symbol]]:
580 |         probe_syms:Set[Symbol] = set()
581 |         probe_insns:Set[CsInsn] = set()
582 | 
583 |         for sym in Pbar("find probe points", syms, unit="symbol"):
584 |             if self.is_invalid_func_probe(sym):
585 |                 pr_msg(f"cannot set func probe on {sym.name}", level="DEBUG")
586 |                 continue
587 | 
588 |             insns = self.analyze_probe_insns(sym)
589 |             cannot_probe = {insn.address for insn in insns if self.is_invalid_probe(insn)}
590 |             if len(cannot_probe) == 0:
591 |                 probe_insns |= insns
592 |                 probe_syms.add(sym)
593 |             else:
594 |                 cannot_probe_first = next(iter(cannot_probe))
595 |                 cannot_probe_addr = (cannot_probe_first if isinstance(cannot_probe_first, int)
596 |                                     else cannot_probe_first.address)
597 |                 pr_msg(f"cannot set probe on {sym.name} (e.g., {hex(cannot_probe_addr)})", level="DEBUG")
598 | 
599 |         probe_addrs = {insn.address for insn in probe_insns} - {sym.rebased_addr for sym in probe_syms}
600 |         return (probe_addrs, probe_syms)
601 | 
602 |     def invalid_func_probe_cause(self, sym: Symbol) -> Optional[str]:
603 |         assert self.angr_mgr is not None
604 | 
605 |         ftrace = Ftrace.main_instance()
606 | 
607 |         if sym is None:
608 |             return 'none'
609 |         if not ftrace.is_available_filter_function(sym):
610 |             return 'func blacklisted'
611 |         if self.angr_mgr.is_noprobe_sym(sym):
612 |             return 'discarded'
613 |         return None
614 |     
615 |     def is_invalid_func_probe(self, sym: Symbol) -> bool:
616 |         return self.invalid_func_probe_cause(sym) is not None
617 | 
618 |     def is_invalid_probe(self, insn: CsInsn) -> Optional[str]:
619 |         addr = insn.address
620 | 
621 |         # Detect UD2: cannot set kprobes
622 |         if insn.bytes == b'\x0f\x0b':
623 |             return 'bug'
624 | 
625 |         # Indirect jumps cannot be patched (possibly due to spectre)
626 |         if arch.is_indirect_jmp_insn(insn):
627 |             return "indirect-jmp"
628 | 
629 |         # Check if the address is blacklisted in ftrace
630 |         ftrace = Ftrace.main_instance()
631 |         if ftrace.main_instance().is_kprobe_blacklisted(addr):
632 |             return 'blacklisted'
633 | 
634 |         # Check if the address is invalid for kprobe (e.g., static key/call)
635 |         if ftrace.is_invalid_kprobe_addr(addr):
636 |             return 'invalid'
637 | 
638 |         # If none of the conditions above are met, the probe is valid
639 |         return None


--------------------------------------------------------------------------------