├── .editorconfig ├── .gitattributes ├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── assets ├── github-social-card.xcf ├── logo.png └── logo.xcf ├── pyproject.toml ├── src └── systrack │ ├── __init__.py │ ├── __main__.py │ ├── arch │ ├── __init__.py │ ├── arch_base.py │ ├── arm.py │ ├── arm64.py │ ├── mips.py │ ├── powerpc.py │ ├── riscv.py │ └── x86.py │ ├── elf.py │ ├── kconfig.py │ ├── kconfig_options.py │ ├── kernel.py │ ├── location.py │ ├── output.py │ ├── signature.py │ ├── syscall.py │ ├── templates │ ├── syscall_table.css │ ├── syscall_table.html │ └── syscall_table.js │ ├── type_hints.py │ ├── utils.py │ └── version.py └── tests ├── __init__.py ├── data ├── .gitignore ├── Makefile └── x86_no_table_syscall_handlers.s ├── test_mips.py ├── test_powerpc.py ├── test_x86.py └── utils.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = tab 6 | indent_size = 4 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.md] 12 | indent_style = unset 13 | 14 | [*.yml] 15 | indent_style = space 16 | indent_size = 2 17 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Exclude assembly from linguist code stats (prevents GitHub from marking the 2 | # repository as >50% assembly). 3 | *.s linguist-vendored 4 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: 6 | - published 7 | 8 | # Allow only one concurrent job 9 | concurrency: 10 | group: publish 11 | cancel-in-progress: false 12 | 13 | jobs: 14 | test-before-publish: 15 | uses: ./.github/workflows/test.yml 16 | publish: 17 | needs: [test-before-publish] 18 | runs-on: ubuntu-latest 19 | environment: 20 | name: hatch 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v4 24 | - name: Ensure matching version and release tag 25 | run: test v"$(python3 src/systrack/version.py)" = "${{github.ref_name}}" 26 | - name: Install build dependencies 27 | run: python3 -m pip install --upgrade build hatch 28 | - name: Build wheel and sdist 29 | run: hatch build 30 | - name: Publish to PyPI 31 | run: hatch publish --no-prompt 32 | env: 33 | HATCH_INDEX_USER: __token__ 34 | HATCH_INDEX_AUTH: ${{secrets.HATCH_INDEX_AUTH}} 35 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - dev 8 | workflow_call: 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-22.04 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 16 | steps: 17 | - name: Checkout 18 | uses: actions/checkout@v4 19 | - name: Setup Python 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install test dependencies 24 | run: python3 -m pip install --upgrade build hatch pytest 25 | - name: Run tests 26 | run: hatch test 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | systrack.egg-info 3 | __pycache__ 4 | .pytest_cache 5 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Systrack changelog 2 | ================== 3 | 4 | 5 | v0.7 6 | ---- 7 | 8 | New arch support: RISC-V 32-bit and 64-bit, tested on v4.15+ kernels (i.e., 9 | since the first Linux version supporting RISC-V). 10 | 11 | **Improvements**: 12 | 13 | - Improve dummy syscall implementation detection: try to first match known 14 | "ni_syscall" code. 15 | - Improve error messages and debug/info logs, pretty printing command-line 16 | arguments and executed commands instead of dumping their tuple/list 17 | representation. 18 | - mips: implement simple arch-specific dummy syscall detection. 19 | - arm64: remove "arm64_" arch-specific prefix from syscall names. 20 | 21 | **Bug fixes**: 22 | 23 | - mips: new dummy syscall detection now correctly identifies some dummy syscalls 24 | that were previously missed (notably `cachestat`). 25 | 26 | **Internal changes**: 27 | 28 | - Archs can now specify multiple kernel Makefile config targets to run one after 29 | the other as a "base" config. 30 | 31 | 32 | v0.6 33 | ---- 34 | 35 | **Improvements**: 36 | 37 | - More robust and comprehensive syscall definition location search. 38 | 39 | **Bug fixes**: 40 | 41 | - Fix broken syscall definition location search and subsequent signature 42 | extraction. Some syscalls were incorrectly reported as defined in place of 43 | others, also causing the wrong signature to be extracted. Do not fully trust 44 | the output of `addr2line` and perform full syscall name matching to fix this. 45 | PowerPC was notably affected the most by this issue. 46 | 47 | 48 | v0.5.1 49 | ------ 50 | 51 | **Improvements**: 52 | 53 | - x86: improve x86 syscall extraction code fixing undetected CALL targets. 54 | 55 | **Internal changes**: 56 | 57 | - x86: add some tests for syscall extraction based on v6.11 kernel build. 58 | 59 | 60 | v0.5 61 | ---- 62 | 63 | We tried so hard, and got so far, but in the end, we need a disassembler! x86 64 | mitigations have defeated us, we no longer have syscall tables to rely on. 65 | Kernel developers were kind enough to write very simple ABI-specific 66 | switch-based handlers to dispach syscalls, so analysis is still possible... just 67 | significantly more complicated. 68 | 69 | **Breaking changes**: 70 | 71 | - Drop support for Python 3.6 and 3.7. Systrack now requires Python 3.8+. This 72 | is because of the new dependency on 73 | [`iced-x86`](https://pypi.org/project/iced-x86/). 74 | 75 | **Improvements**: 76 | 77 | - x86: support new kernels (6.9+) with no syscall tables. 78 | - Remove unnecessary spaces between asterisks for double pointers in function 79 | signatures. 80 | - Avoid KFCI `__{cfi,pfx}_` symbols when looking for `ni_syscall` symbols. 81 | 82 | **Internal changes**: 83 | 84 | - Depend on [`iced-x86`](https://pypi.org/project/iced-x86/) for disassembling 85 | x86 instructions and on [`jinja2`](https://pypi.org/project/jinja2/) for HTML 86 | output directly. Remove optional dependencies and only build one package. 87 | - Rename `test` folder to `tests` to use the `hatch test` as test commnad 88 | - Improve logging reproducibility by sorting more debugging log output. 89 | - Improve broken Python package metadata (Python packaging moment). 90 | 91 | 92 | v0.4 93 | ---- 94 | 95 | New arch support: PowerPC 32-bit, tested on v5.0+ kernels. 96 | 97 | **Improvements**: 98 | 99 | - Improve kconfig dependency checking logic for better warning/error messages. 100 | - PowerPC PPC64: improve esoteric fast switch_endian syscall detection. 101 | - Better (narrower) emoji spacing in HTML output. 102 | 103 | **Bug fixes**: 104 | 105 | - Correctly report `delete_module` depending on `CONFIG_MODULE_UNLOAD=y`. 106 | - Fix incorrectly handled shared syscall table in x86-64 x32 ABI resulting in 107 | duplicated and unwanted entries in the output for kernels older than v5.4. 108 | - Fix chance of building kernels without `memfd_create`, `memfd_secret`, 109 | `delete_module` (and possibly others) by always enabling `MEMFD_CREATE`, 110 | `MODULE_UNLOAD`, `NET` and `SECRETMEM` when available. 111 | - Fix wrong handling of relative `--kdir` path (e.g., `.`) in some cases. 112 | - Fix missed detection of non-implemented syscalls pointing to `kernel/sys_ni.c` 113 | when DWARF debug info contains relative paths. 114 | - x86 x32: fix some x64 syscalls reported twice because both the x64 number and 115 | the historycally misnumbered x32 numbers (512-547) were being considered 116 | valid. 117 | 118 | **Internal changes**: 119 | 120 | - Ignore `sound/` and `user/` dirs to speed up grepping syscall definitions. 121 | - Implement some basic unit tests for powerpc dummy/esoteric syscall detection. 122 | 123 | 124 | v0.3.3 125 | ------ 126 | 127 | **Improvements**: 128 | 129 | - Correctly report `lsm_{list_modules,get_self_attr,set_self_attr}` depending on 130 | `CONFIG_SECURITY=y`. 131 | 132 | 133 | v0.3.2 134 | ------ 135 | 136 | **Improvements**: 137 | 138 | - Correctly report `futex_{wait,wake,requeue}` depending on `CONFIG_FUTEX=y`. 139 | - Use unicorn emoji (cuter) instead of test tube for esoteric syscalls in HTML 140 | output. 141 | 142 | 143 | v0.3.1 144 | ------ 145 | 146 | **Improvements**: 147 | 148 | - x86: Add build support for `map_shadow_stack`. 149 | - Prefer `compat_sys_` over `__se_compat_sys_` and other longer symbol synonyms; 150 | same for `.compat_sys_` on PowerPC. 151 | 152 | **Bug fixes**: 153 | 154 | - Fix broken naive grepping of syscall definitions when no ripgrep is available. 155 | - Correctly report `cachestat` depending on `CACHESTAT_SYSCALL=y`. 156 | 157 | **Internal changes**: 158 | 159 | - Sort stderr logs for reproducible output and easier diffing. 160 | - Skip `lib/` directory in kernel sources to improve grepping performance. 161 | 162 | 163 | v0.3 164 | ---- 165 | 166 | New arch support: PowerPC 64-bit, all ABIs, tested on v5.0+ kernels. 167 | 168 | **Improvements:** 169 | 170 | - Add ABI `bits` (integer) and `compat` (boolean) fields to JSON output. 171 | - Support ELF symbols with weird names (special chars in the name). 172 | - Support function descriptors for syscall table entries (useful for PowerPC64 173 | and Itanium 64). 174 | - Support weird arch-specific `SYSCALL_DEFINEn` macros. 175 | - Building kernels now generates relative paths in DWARF debug symbols through 176 | `-fdebug-prefix-map`. 177 | - Improve stdout output and add a table header. 178 | - Use `null` instead of `??`/`?` for unknown file/line info in JSON output. 179 | - x86: improve dummy syscall implementation detection (handling endbr64/32 180 | instructions). 181 | - ARM OABI: output syscall number location for the calling convention 182 | (`swi `). 183 | 184 | **Bug fixes**: 185 | 186 | - Correctly report `socketcall` depending on `CONFIG_NET=y`. 187 | - Correctly strip more syscall symbol prefixes for more accurate syscall names. 188 | - Fix bad symbol prefix detection in some weird edge cases, leading to wrong 189 | syscall names. 190 | - x86: fix wrong register names for x86-64 compat 32-bit ABI (IA-32). 191 | 192 | **Internal changes**: 193 | 194 | - Reorganize arch-specific code. 195 | - Handle SIGINT for more graceful termination. 196 | - Auto-remap definition locations relative to KDIR for ease of use. 197 | 198 | 199 | v0.2.1 200 | ------ 201 | 202 | **Improvements**: 203 | 204 | - Make syscall symbol preference more consistent (in particular, stop mixing 205 | `__se_sys_xxx` and `sys_xxx` when possible). 206 | - Achieve W3C compliance for HTML output format. 207 | 208 | **Bug fixes**: 209 | 210 | - x86: correct wrong syscall numbers for x32 ABI, they should all be ORed with 211 | `0x40000000` (`__X32_SYSCALL_BIT`). 212 | 213 | 214 | v0.2 215 | ---- 216 | 217 | **Improvements**: 218 | 219 | - Improve existing MIPS build and analysis support: use `ip27_defconfig` for 220 | 64-bit for NUMA support and strip more symbol prefixes. 221 | - Improve dummy syscall implementation detection (x86-64, ARM). 222 | 223 | **Bug fixes**: 224 | 225 | - Fix help text for `--arch`: building with `--arch arm` creates an 226 | EABI-only kernel. 227 | - Fix a logging bug that caused not loging syscalls' `.origname` for not-found 228 | locations after grepping. 229 | - x86: use the right Kconfig option for vm86 and vm86old 230 | 231 | 232 | v0.1 233 | ---- 234 | 235 | First release. 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Systrack 2 | ======== 3 | 4 | [![License][license-badge]](./LICENSE) 5 | [![GitHub actions workflow status][actions-badge]][actions-link] 6 | [![PyPI version][pypi-badge]][pypi-systrack] 7 | [![PyPI downloads][pypi-badge2]][pypistats-systrack] 8 | 9 | Systrack logo 10 | 11 | **See [mebeim/linux-syscalls](https://github.com/mebeim/linux-syscalls) for live syscall tables powered by Systrack**. 12 | 13 | Systrack is a tool to analyze Linux kernel images (`vmlinux`) and extract 14 | information about implemented syscalls. Given a `vmlinux` image, Systrack can 15 | extract syscall numbers, names, symbol names, definition locations within kernel 16 | sources, function signatures, and more. 17 | 18 | Systrack can configure and build kernels for all its 19 | [supported architectures](#supported-architectures-and-abis), and works best at 20 | analyzing kernels that it has configured and built by itself. 21 | 22 | 23 | Installation 24 | ------------ 25 | 26 | Systrack is [available on PyPI][pypi-systrack], it requires Python 3.8+ and is 27 | installable through Pip: 28 | 29 | ```bash 30 | pip install systrack 31 | ``` 32 | 33 | Building and installaing from source requires [`hatch`][pypi-hatch]: 34 | 35 | ```bash 36 | hatch build 37 | pip install dist/systrack-XXX.whl 38 | ``` 39 | 40 | Usage 41 | ----- 42 | 43 | Systrack can mainly be used for two purposes: analyzing or building Linux 44 | kernels. See also [Command line help](#command-line-help) (`systrack --help`) 45 | and [Supported architectures and ABIs](#supported-architectures-and-abis) 46 | (`systrack --arch help`) below. 47 | 48 | - **Analyzing** a kernel image can be done given a `vmlinux` ELF with symbols, 49 | and optionally also a kernel source directory (`--kdir`). Systrack will 50 | extract information about implemented syscalls from the symbol table present 51 | in the given `vmlinux` ELF, and if debugging information is present, it will 52 | also extract file and line number information for syscall definitions. 53 | Supplying a `--kdir` pointing Systrack to the checked-out sources for the 54 | right kernel version (the same as the one to analyze) will help refine and/or 55 | correct the location of the definitions. 56 | 57 | Systrack can guess the architecture and ABI to analyze, but if the given 58 | kernel was built with support for multiple ABIs, the right one can be selected 59 | through `--arch`. 60 | 61 | ```none 62 | systrack path/to/vmlinux 63 | systrack --format json path/to/vmlinux 64 | systrack --format html path/to/vmlinux 65 | systrack --kdir path/to/linux_git_repo path/to/vmlinux 66 | systrack --kdir path/to/linux_git_repo --arch x86-64-ia32 path/to/vmlinux 67 | ``` 68 | 69 | - **Building** can be done through the `--build` option. You will need to 70 | provide a kernel source directory (`--kdir`) and an architecture/ABI 71 | combination to build for (`--arch`). 72 | 73 | ```none 74 | systrack --build --kdir path/to/linux_source_dir --arch x86-64 75 | ``` 76 | 77 | When building, kernel sources are configured to enable all syscalls available 78 | for the selected architecture/ABI as to produce a `vmlinux` with a "complete" 79 | syscall table. 80 | 81 | Cross-compilation with GCC is possible specifying the correct toolchain prefix 82 | with the `--cross` option, which will set the `CROSS_COMPILE` variable for the 83 | kernel's `Makefile`. Other environment variables can also be used as usual and 84 | are passed as is to `make`, so LLVM [cross]-compilation and custom toolchain 85 | usage is also possible. 86 | 87 | ```none 88 | systrack --build --kdir path/to/linux_source --arch arm64 --cross aarch64-linux-gnu- 89 | ``` 90 | 91 | 92 | Supported architectures and ABIs 93 | -------------------------------- 94 | 95 | Here's a list of supported arch/ABI combinations accepted via `--arch` (values 96 | are case-insensitive). This information is also available running 97 | `systrack --arch help`. 98 | 99 | | Value | Aliases | Arch | Kernel | Syscall ABI | Build based on | Notes | 100 | |:----------------|:-------------------|:--------|:-------|:---------------|:------------------------------|:--------| 101 | | `arm` | `arm-eabi`, `eabi` | ARM | 32-bit | 32-bit EABI | `multi_v7_defconfig` | *[2]* | 102 | | `arm-oabi` | `oabi` | ARM | 32-bit | 32-bit OABI | `multi_v7_defconfig` | *[2,4]* | 103 | | `arm64` | `aarch64` | ARM | 64-bit | 64-bit AArch64 | `defconfig` | | 104 | | `arm64-aarch32` | `aarch32` | ARM | 64-bit | 32-bit AArch32 | `defconfig` | *[1]* | 105 | | `mips` | `mips32`, `o32` | MIPS | 32-bit | 32-bit O32 | `defconfig` | | 106 | | `mips64` | `n64` | MIPS | 64-bit | 64-bit N64 | `ip27_defconfig` | *[1]* | 107 | | `mips64-n32` | `n32` | MIPS | 64-bit | 64-bit N32 | `ip27_defconfig` | *[1]* | 108 | | `mips64-o32` | `o32-64` | MIPS | 64-bit | 32-bit O32 | `ip27_defconfig` | *[1]* | 109 | | `powerpc` | `ppc`, `ppc32` | PowerPC | 32-bit | 32-bit PPC32 | `ppc64_defconfig` | | 110 | | `powerpc64` | `ppc64` | PowerPC | 64-bit | 64-bit PPC64 | `ppc64_defconfig` | *[1]* | 111 | | `powerpc64-32` | `ppc64-32` | PowerPC | 64-bit | 32-bit PPC32 | `ppc64_defconfig` | *[1]* | 112 | | `powerpc64-spu` | `ppc64-spu`, `spu` | PowerPC | 64-bit | 64-bit "SPU" | `ppc64_defconfig` | *[1,5]* | 113 | | `riscv` | `riscv32`, `rv32` | RISC-V | 32-bit | 32-bit "RV32" | `defconfig` + `32-bit.config` | *[3,6]* | 114 | | `riscv64` | `rv64` | RISC-V | 64-bit | 64-bit "RV64" | `defconfig` | *[1,6]* | 115 | | `riscv64-32` | `rv64-32` | RISC-V | 64-bit | 32-bit "RV32" | `defconfig` | *[1,6]* | 116 | | `x86` | `i386`, `ia32` | x86 | 32-bit | 32-bit IA32 | `i386_defconfig` | | 117 | | `x86-64` | `x64` | x86 | 64-bit | 64-bit x86-64 | `x86_64_defconfig` | *[1]* | 118 | | `x86-64-x32` | `x32` | x86 | 64-bit | 64-bit x32 | `x86_64_defconfig` | *[1]* | 119 | | `x86-64-ia32` | `ia32-64` | x86 | 64-bit | 32-bit IA32 | `x86_64_defconfig` | *[1]* | 120 | 121 | Notes: 122 | 123 | 1. Building creates a kernel supporting all ABIs for this architecture. 124 | 2. Build based on `defconfig` for Linux <= v3.7. 125 | 3. Build based on `rv32_defconfig` for Linux <= v6.7 and `defconfig` for 126 | Linux <= v5.0. 127 | 4. Building creates an EABI kernel with compat OABI support. Building an 128 | OABI-only kernel is NOT supported. The seccomp filter system will be missing. 129 | 5. "SPU" is not a real ABI. It indicates a Cell processor SPU (Synergistic 130 | Processing Unit). The ABI is really PPC64, but SPUs can only use a subset of 131 | syscalls. 132 | 6. "RV32" and "RV64" are not real ABIs, but rather ISAs. The RISC-V syscall 133 | ABI is the same for 32-bit and 64-bit (only register size differs). These 134 | names are only used for clarity. 135 | 136 | Runtime dependencies 137 | -------------------- 138 | 139 | External (non-Python) runtime dependencies are: 140 | 141 | - **Required**: `readelf` (from GNU binutils) is used to parse and extract ELF 142 | metadata such as symbols and sections. This is currently the only *compulsory* 143 | external dependency of Systrack. 144 | - Optional: `addr2line` (from GNU binutils) is used to extract location 145 | information from DWARF debug info. Without this program, Systrack will not 146 | output any information about syscall definition locations. 147 | - Optional: `rg` ([ripgrep][ripgrep]) is used for much faster recursive 148 | grepping of syscall definition locations within kernel sources when needed. 149 | Otherwise, a slower pure-Python implementation is used. 150 | - Optional: a working compiler toolchain and 151 | [kernel build dependencies](https://www.kernel.org/doc/html/latest/process/changes.html) 152 | are obviously needed if you want Systrack to *build* kernels from source. 153 | 154 | 155 | Limitations 156 | ----------- 157 | 158 | - Supported kernel images: Systrack works with regular *uncompressed* `vmlinux` 159 | ELF images and *needs* ELF symbols. Compressed and stripped kernel images are 160 | not supported. Tools such as 161 | [`vmlinux-to-elf`](https://github.com/marin-m/vmlinux-to-elf) can be used to 162 | uncompress and unstrip kernel images, after which Systrack will be able to 163 | analyze them. 164 | - Old kernel versions: Systrack was mainly designed for and tested on modern 165 | kernels (>= v4.0) and has not been tested on older kernels. It should still 166 | *somewhat* work on older kernels, but without the same level of guarantee on 167 | the correctness of the output. Support for old kernels may come gradually in 168 | the future. 169 | - Relocatable kernels: Systrack does not currently parse and apply ELF 170 | relocations. This means that Systrack does not support kernels using 171 | relocation entries for the syscall table. On some architectures (notably MIPS) 172 | if the kernel is relocatable the syscall table is relocated at startup and 173 | does not contain valid virtual addresses: Systrack will currently fail to 174 | analyze such kernels. 175 | 176 | 177 | Command line help 178 | ----------------- 179 | 180 | ```none 181 | $ systrack --help 182 | 183 | usage: systrack [OPTIONS...] [VMLINUX] 184 | 185 | Analyze a Linux kernel image and extract information about implemented syscalls 186 | 187 | positional arguments: 188 | VMLINUX path to vmlinux, if not inside KDIR or no KDIR supplied 189 | 190 | options: 191 | -h, --help show this help message and exit 192 | -k KDIR, --kdir KDIR kernel source directory 193 | -a ARCH, --arch ARCH kernel architecture/ABI combination; pass "help" for a list 194 | (default: autodetect) 195 | -b, --build configure and build kernel and exit 196 | -c, --config configure kernel and exit 197 | -C, --clean clean kernel sources (make distclean) and exit 198 | -x PREFIX, --cross PREFIX 199 | toolchain prefix for cross-compilation; use with -b/-c/-C 200 | -o OUTDIR, --out OUTDIR 201 | output directory for out-of-tree kernel build (make O=...); only 202 | meaningful with -b/-c/-C 203 | -f FMT, --format FMT output format: text, json or html (default: text) 204 | --absolute-paths output absolute paths instead of paths relative to KDIR 205 | --remap ORIG_KDIR replace ORIG_KDIR with the KDIR provided with -k/--kdir for paths 206 | obtained from ELF debug information; needed if the kernel was 207 | built with ORIG_KDIR as source directory instead of KDIR, and 208 | debug info contains absolute paths to ORIG_KDIR 209 | --checkout REF git checkout to REF inside KDIR before doing anything; the 210 | special value "auto" can be used to checkout to the tag 211 | corresponding to the detected kernel version from VMLINUX 212 | --disable-opt try building kernel with reduced/disabled optimizations for more 213 | reliable location results; only meaningful with -b 214 | -q, --quiet quietness level: 215 | -q = no info, -qq = no warnings, -qqq = no errors 216 | -qqqq = no standard error output whatsoever 217 | -v, --verbose verbosity level: 218 | -v = info, -vv = debug, -vvv = more debug 219 | -V, --version show version information and exit 220 | ``` 221 | 222 | --- 223 | 224 | *Copyright © 2023-2025 Marco Bonelli. Licensed under the GNU General Public License v3.0.* 225 | 226 | [license-badge]: https://img.shields.io/github/license/mebeim/systrack?color=blue 227 | [actions-badge]: https://img.shields.io/github/actions/workflow/status/mebeim/systrack/publish.yml?event=release&label=publish 228 | [actions-link]: https://github.com/mebeim/systrack/actions/workflows/publish.yml 229 | [pypi-badge]: https://img.shields.io/pypi/v/systrack 230 | [pypi-badge2]: https://img.shields.io/pypi/dm/systrack 231 | [pypi-systrack]: https://pypi.org/project/systrack/ 232 | [pypistats-systrack]: https://pypistats.org/packages/systrack 233 | [pypi-hatch]: https://pypi.org/project/hatch 234 | [ripgrep]: https://github.com/BurntSushi/ripgrep 235 | -------------------------------------------------------------------------------- /assets/github-social-card.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mebeim/systrack/e45f94d06d39f162b0939f1f45b4913d37609dc6/assets/github-social-card.xcf -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mebeim/systrack/e45f94d06d39f162b0939f1f45b4913d37609dc6/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mebeim/systrack/e45f94d06d39f162b0939f1f45b4913d37609dc6/assets/logo.xcf -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = 'systrack' 3 | description = 'Linux kernel syscall implementation tracker' 4 | authors = [{name = 'Marco Bonelli'}, {name = 'Marco Bonelli', email = 'marco@mebeim.net'}] 5 | maintainers = [{name = 'Marco Bonelli'}, {name = 'Marco Bonelli', email = 'marco@mebeim.net'}] 6 | license = {text = 'GNU General Public License v3 (GPLv3)'} 7 | readme = 'README.md' 8 | platforms = 'any' 9 | requires-python = '>=3.8' 10 | dynamic = ['version'] 11 | keywords = ['systrack', 'linux', 'kernel', 'syscall', 'kconfig', 'elf', 'abi'] 12 | classifiers = [ 13 | 'Development Status :: 4 - Beta', 14 | 'Environment :: Console', 15 | 'Intended Audience :: Developers', 16 | 'Intended Audience :: Science/Research', 17 | 'Intended Audience :: System Administrators', 18 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 19 | 'Natural Language :: English', 20 | 'Operating System :: OS Independent', 21 | 'Programming Language :: Python :: 3', 22 | 'Topic :: Security', 23 | 'Topic :: Software Development :: Embedded Systems', 24 | 'Topic :: Software Development :: Testing', 25 | 'Topic :: System :: Operating System Kernels :: Linux', 26 | 'Topic :: Utilities', 27 | ] 28 | dependencies = [ 29 | 'iced-x86~=1.21.0', 30 | 'jinja2~=3.1.2' 31 | ] 32 | 33 | [project.urls] 34 | Homepage = 'https://github.com/mebeim/systrack' 35 | Repository = 'https://github.com/mebeim/systrack.git' 36 | Changelog = 'https://github.com/mebeim/systrack/blob/master/CHANGELOG.md' 37 | 38 | [project.scripts] 39 | systrack = 'systrack.__main__:main' 40 | 41 | [build-system] 42 | requires = ['hatchling'] 43 | build-backend = 'hatchling.build' 44 | 45 | [tool.hatch.version] 46 | path = 'src/systrack/version.py' 47 | 48 | [tool.hatch.build] 49 | ignore-vcs = true 50 | include = ['src/systrack/templates/*'] 51 | 52 | [tool.hatch.build.targets.wheel] 53 | packages = ['src/systrack'] 54 | 55 | [tool.hatch.build.targets.sdist] 56 | include = ['src', 'CHANGELOG.md'] 57 | 58 | [tool.hatch.envs.default] 59 | python = '3' 60 | 61 | [tool.hatch.envs.test] 62 | dependencies = ['pytest'] 63 | 64 | [tool.ruff.lint] 65 | # Don't warn for multi-line statements 66 | ignore = ['E701'] 67 | 68 | [tool.ruff.lint.per-file-ignores] 69 | # Don't warn for star imports in these files 70 | 'arch/__init__.py' = ['F403', 'F405'] 71 | 'tests/*' = ['F403', 'F405'] 72 | -------------------------------------------------------------------------------- /src/systrack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mebeim/systrack/e45f94d06d39f162b0939f1f45b4913d37609dc6/src/systrack/__init__.py -------------------------------------------------------------------------------- /src/systrack/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import signal 5 | import sys 6 | 7 | from pathlib import Path 8 | from textwrap import TextWrapper 9 | 10 | from .arch import SUPPORTED_ARCHS, SUPPORTED_ARCHS_HELP 11 | from .kernel import Kernel, KernelVersionError, KernelArchError 12 | from .kernel import KernelWithoutSymbolsError, KernelMultiABIError 13 | from .output import output_syscalls 14 | from .utils import command_argv_to_string, command_available 15 | from .utils import eprint, enable_high_verbosity, enable_silent 16 | from .utils import gcc_version, git_checkout, maybe_rel, format_duration 17 | from .version import VERSION, VERSION_HELP 18 | 19 | def sigint_handler(_, __): 20 | sys.stderr.write('Caught SIGINT, stopping\n') 21 | sys.exit(1) 22 | 23 | def wrap_help(body: str) -> str: 24 | '''Wrap a string to 65 columns without breaking words for a nice --help 25 | output of the tool. 26 | ''' 27 | tx = TextWrapper(65, break_long_words=False, replace_whitespace=False) 28 | return '\n'.join(tx.fill(line) for line in body.splitlines() if line.strip()) 29 | 30 | def parse_args() -> argparse.Namespace: 31 | '''Parse and partially validate command line arguments through argparse. 32 | ''' 33 | ap = argparse.ArgumentParser( 34 | prog='systrack', 35 | usage='systrack [OPTIONS...] [VMLINUX]', 36 | description='Analyze a Linux kernel image and extract information about implemented syscalls', 37 | formatter_class=argparse.RawTextHelpFormatter 38 | ) 39 | 40 | ap.add_argument('vmlinux', metavar='VMLINUX', nargs='?', 41 | help=wrap_help('path to vmlinux, if not inside KDIR or no KDIR supplied')) 42 | ap.add_argument('-k', '--kdir', metavar='KDIR', 43 | help=wrap_help('kernel source directory')) 44 | ap.add_argument('-a', '--arch', metavar='ARCH', 45 | help=wrap_help('kernel architecture/ABI combination; pass "help" for a ' 46 | 'list (default: autodetect)')) 47 | ap.add_argument('-b', '--build', action='store_true', 48 | help=wrap_help('configure and build kernel and exit')) 49 | ap.add_argument('-c', '--config', action='store_true', 50 | help=wrap_help('configure kernel and exit')) 51 | ap.add_argument('-C', '--clean', action='store_true', 52 | help=wrap_help('clean kernel sources (make distclean) and exit')) 53 | ap.add_argument('-x', '--cross', metavar='PREFIX', 54 | help=wrap_help('toolchain prefix for cross-compilation; use with -b/-c/-C')) 55 | ap.add_argument('-o', '--out', metavar='OUTDIR', 56 | help=wrap_help('output directory for out-of-tree kernel build (make ' 57 | 'O=...); only meaningful with -b/-c/-C')) 58 | ap.add_argument('-f', '--format', metavar='FMT', 59 | choices=('text', 'json', 'html'), default='text', 60 | help=wrap_help('output format: text, json or html (default: text)')) 61 | ap.add_argument('--absolute-paths', action='store_true', 62 | help=wrap_help('output absolute paths instead of paths relative to KDIR')) 63 | ap.add_argument('--remap', metavar='ORIG_KDIR', 64 | help=wrap_help('replace ORIG_KDIR with the KDIR provided with ' 65 | '-k/--kdir for paths obtained from ELF debug information; needed ' 66 | 'if the kernel was built with ORIG_KDIR as source directory ' 67 | 'instead of KDIR, and debug info contains absolute paths to ' 68 | 'ORIG_KDIR')) 69 | ap.add_argument('--checkout', metavar='REF', 70 | help=wrap_help('git checkout to REF inside KDIR before doing anything; ' 71 | 'the special value "auto" can be used to checkout to the tag ' 72 | 'corresponding to the detected kernel version from VMLINUX')) 73 | ap.add_argument('--disable-opt', action='store_true', 74 | help=wrap_help('try building kernel with reduced/disabled ' 75 | 'optimizations for more reliable location results; only meaningful ' 76 | 'with -b')) 77 | ap.add_argument('-q', '--quiet', action='count', default=0, 78 | help=wrap_help('quietness level:\n' 79 | ' -q = no info, -qq = no warnings, -qqq = no errors\n' 80 | ' -qqqq = no standard error output whatsoever')) 81 | ap.add_argument('-v', '--verbose', action='count', default=0, 82 | help=wrap_help('verbosity level:\n -v = info, -vv = debug, -vvv = more debug')) 83 | ap.add_argument('-V', '--version', action='version', version=VERSION_HELP, 84 | help=wrap_help('show version information and exit')) 85 | 86 | return ap.parse_args() 87 | 88 | def setup_logging(quietness: int, verbosity: int, colors: bool = True): 89 | '''Setup logging verbosity on the root logger based on the given quietness 90 | and verbosity levels from command line arguments (number of -q and -v 91 | options given). Enable colored logs with ANSI escape codes if color=True. 92 | ''' 93 | orig_factory = logging.getLogRecordFactory() 94 | 95 | if verbosity > 0: 96 | quietness = 0 97 | if verbosity >= 3: 98 | enable_high_verbosity() 99 | 100 | if quietness >= 1: 101 | quietness -= 1 102 | enable_silent() 103 | 104 | if colors: 105 | fmt = '%(color)s[%(levelname)s] %(message)s\x1b[0m' 106 | level_colors = { 107 | logging.CRITICAL: '\x1b[1;31m', 108 | logging.ERROR : '\x1b[31m', 109 | logging.WARNING : '\x1b[33m', 110 | logging.INFO : '\x1b[32m', 111 | logging.DEBUG : '\x1b[34m', 112 | } 113 | 114 | def record_factory(*args, **kwargs): 115 | record = orig_factory(*args, **kwargs) 116 | lvl = record.levelno 117 | record.color = level_colors.get(lvl, '') 118 | record.levelname = 'FATAL' if lvl == logging.CRITICAL else record.levelname[0] 119 | return record 120 | else: 121 | fmt = '[%(levelname)s] %(message)s' 122 | 123 | def record_factory(*args, **kwargs): 124 | record = orig_factory(*args, **kwargs) 125 | record.levelname = 'FATAL' if record.levelno == logging.CRITICAL else record.levelname[0] 126 | return record 127 | 128 | adj = quietness - verbosity 129 | logging.basicConfig(level=max(30 + 10 * adj, 0), format=fmt) 130 | logging.setLogRecordFactory(record_factory) 131 | 132 | def instantiate_kernel(*a, **kwa) -> Kernel: 133 | '''Instantiate the Kernel class with the given parameters, handling and 134 | printing possible errors. 135 | ''' 136 | try: 137 | return Kernel(*a, **kwa) 138 | except KernelVersionError: 139 | eprint('Unable to determine kernel version!') 140 | eprint('Did you specify a valid kernel source directory (--kdir) or vmlinux path?') 141 | sys.exit(1) 142 | except KernelArchError as e: 143 | eprint(str(e)) 144 | eprint(f"See '{sys.argv[0]} --arch help' for more information") 145 | sys.exit(1) 146 | except KernelWithoutSymbolsError: 147 | eprint('The provided kernel image has no symbols, which are necessary for Systrack to work.') 148 | eprint('You can try unstripping the image with tools such as "vmlinux-to-elf".') 149 | sys.exit(1) 150 | except KernelMultiABIError as e: 151 | arch_class, abis = e.args[1:] 152 | eprint(f'Detected architecture: {arch_class.name}') 153 | eprint(f'Detected ABIs: {", ".join(abis)}') 154 | eprint('This kernel was built with support for multiple syscall ABIs.') 155 | eprint('Select one using --arch NAME (see --arch HELP for more info).') 156 | sys.exit(1) 157 | 158 | def main() -> int: 159 | signal.signal(signal.SIGINT, sigint_handler) 160 | 161 | args = parse_args() 162 | setup_logging(args.quiet, args.verbose, os.isatty(sys.stderr.fileno())) 163 | 164 | logging.debug('Systrack v%s', VERSION) 165 | logging.debug('Command line: systrack %s', command_argv_to_string(sys.argv[1:])) 166 | 167 | arch_name = args.arch 168 | 169 | if arch_name is not None: 170 | arch_name = arch_name.lower() 171 | 172 | if arch_name not in SUPPORTED_ARCHS: 173 | if arch_name not in ('help', '?'): 174 | eprint(f'Unsupported architecture/ABI combination: {arch_name}') 175 | eprint('See --arch HELP for a list') 176 | return 1 177 | 178 | eprint(SUPPORTED_ARCHS_HELP) 179 | return 0 180 | 181 | if not args.kdir and not args.vmlinux: 182 | eprint('Need to specify a kernel source direcory and/or path to vmlinux') 183 | eprint('See --help for more information') 184 | return 1 185 | 186 | if not args.kdir and (args.checkout or args.config or args.build): 187 | eprint('Need to specify a kernel source direcory (--kdir)') 188 | return 1 189 | 190 | if not arch_name and (args.config or args.build): 191 | eprint('Need to specify an architecture/ABI combination (--arch)') 192 | eprint('See --arch HELP for a list') 193 | return 1 194 | 195 | cross = args.cross or '' 196 | vmlinux = Path(args.vmlinux) if args.vmlinux else None 197 | kdir = Path(args.kdir) if args.kdir else None 198 | outdir = Path(args.out) if args.out else None 199 | rdir = Path(args.remap) if args.remap else None 200 | 201 | # Checkout before building only if not set to auto 202 | if args.checkout and args.checkout != 'auto': 203 | eprint('Checking out to', args.checkout) 204 | git_checkout(kdir, args.checkout) 205 | 206 | if args.clean or args.config or args.build: 207 | if args.out: 208 | out = Path(args.out) 209 | 210 | try: 211 | if out.exists() and not out.is_dir(): 212 | eprint(f'Output directory "{args.out}" already exists and is not a directory') 213 | return 1 214 | 215 | out.mkdir(exist_ok=True) 216 | except Exception as e: 217 | eprint(f'Failed to create output directory "{args.out}": {str(e)}') 218 | return 1 219 | 220 | # Check that GCC is available and log its version for our own sanity to 221 | # avoid mixing up toolchains 222 | gcc_cmd = cross + 'gcc' 223 | 224 | if not command_available(gcc_cmd): 225 | eprint(f'Command "{gcc_cmd}" not found') 226 | eprint('Make sure your cross-compilation toolchain is in $PATH') 227 | return 127 228 | 229 | if args.config or args.build: 230 | eprint('Compiler:', gcc_version(gcc_cmd)) 231 | 232 | kernel = instantiate_kernel(arch_name, kdir=kdir, outdir=outdir, toolchain_prefix=cross) 233 | 234 | if args.build: 235 | eprint('Cleaning kernel sources') 236 | kernel.clean() 237 | eprint('Detected kernel version:', kernel.version_str) 238 | eprint('Configuring kernel') 239 | kernel.configure() 240 | eprint('Building kernel (might take a while)') 241 | elapsed = kernel.build(args.disable_opt) 242 | eprint('Build took', format_duration(elapsed)) 243 | elif args.config: 244 | eprint('Cleaning kernel sources') 245 | kernel.clean() 246 | eprint('Detected kernel version:', kernel.version_str) 247 | eprint('Configuring kernel') 248 | kernel.configure() 249 | eprint('Done') 250 | elif args.clean: 251 | eprint('Cleaning kernel sources') 252 | kernel.clean() 253 | eprint('Done') 254 | 255 | return 0 256 | 257 | # Auto-checkout to the correct tag is only possible if we already have a 258 | # vmlinux to extract the version from 259 | if args.checkout == 'auto' and not vmlinux: 260 | eprint('Cannot perform auto-checkout without a vmlinux image!') 261 | return 1 262 | 263 | if not vmlinux: 264 | vmlinux = kdir / 'vmlinux' 265 | 266 | if not vmlinux.is_file(): 267 | eprint(f'Unable to find vmlinux at "{vmlinux}".') 268 | eprint('Build the kernel or provide a valid path.') 269 | return 1 270 | 271 | if not command_available('readelf'): 272 | eprint('Command "readelf" unavailable, can\'t do much without it!') 273 | return 127 274 | 275 | kernel = instantiate_kernel(arch_name, vmlinux, kdir, outdir, rdir) 276 | eprint('Detected kernel version:', kernel.version_str) 277 | 278 | if args.checkout == 'auto': 279 | assert kernel.version_source == 'vmlinux' 280 | eprint('Checking out to', kernel.version_tag) 281 | git_checkout(kdir, kernel.version_tag) 282 | 283 | if not kernel.syscalls: 284 | return 1 285 | 286 | # Apply a couple of transformations that are independent of the chosen 287 | # output format, and also check how many syscalls do not have location or 288 | # signature information. 289 | 290 | syscalls = kernel.syscalls 291 | kdir = kernel.kdir 292 | abs_paths = args.absolute_paths 293 | n_no_loc = 0 294 | n_no_sig = 0 295 | n_grepped = 0 296 | 297 | for sc in kernel.syscalls: 298 | if sc.file is None: 299 | n_no_loc += 1 300 | else: 301 | if kdir and not abs_paths: 302 | sc.file = maybe_rel(sc.file, kdir) 303 | 304 | if kdir and sc.signature is None: 305 | n_no_sig += 1 306 | 307 | if sc.grepped_location: 308 | n_grepped += 1 309 | 310 | eprint('Found', len(syscalls), 'implemented syscalls') 311 | 312 | if n_grepped: 313 | eprint('Found', n_grepped, 'definition location' + ('s' if n_grepped > 1 else ''), 'through grepping') 314 | if n_no_loc: 315 | eprint('Could not find definition location for', n_no_loc, 'syscall' + ('s' if n_no_loc > 1 else '')) 316 | if n_no_sig: 317 | eprint('Could not extract signature for', n_no_sig, 'syscall' + ('s' if n_no_sig > 1 else '')) 318 | 319 | eprint() 320 | output_syscalls(kernel, args.format) 321 | return 0 322 | 323 | # NOTE: this is NOT executed in a normal install, because the `systrack` command 324 | # will point to a script that imports and directly calls the main() function 325 | # above. 326 | if __name__ == '__main__': 327 | sys.exit(main()) 328 | -------------------------------------------------------------------------------- /src/systrack/arch/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional, Type, Tuple, List 3 | 4 | from ..elf import ELF 5 | from ..type_hints import KernelVersion 6 | 7 | from .arch_base import Arch 8 | from .arm import ArchArm 9 | from .arm64 import ArchArm64 10 | from .mips import ArchMips 11 | from .powerpc import ArchPowerPC 12 | from .riscv import ArchRiscV 13 | from .x86 import ArchX86 14 | 15 | ARCH_CLASSES = ( 16 | ArchArm, 17 | ArchArm64, 18 | ArchMips, 19 | ArchPowerPC, 20 | ArchRiscV, 21 | ArchX86, 22 | ) 23 | 24 | # NOTE: For the sake of mental sanity, try keeping abi= the same name as the one 25 | # in the *.tbl files in the kernel sources. 26 | SUPPORTED_ARCHS = { 27 | 'x86' : lambda v: ArchX86(v, abi='ia32', bits32=True), # "i386" ABI 28 | 'x86-64' : lambda v: ArchX86(v, abi='x64'), # "64" ABI 29 | 'x86-64-x32' : lambda v: ArchX86(v, abi='x32'), 30 | 'x86-64-ia32' : lambda v: ArchX86(v, abi='ia32'), 31 | 'arm' : lambda v: ArchArm(v, abi='eabi'), 32 | 'arm-oabi' : lambda v: ArchArm(v, abi='oabi'), 33 | 'arm64' : lambda v: ArchArm64(v, abi='aarch64'), 34 | 'arm64-aarch32': lambda v: ArchArm64(v, abi='aarch32'), 35 | 'mips' : lambda v: ArchMips(v, abi='o32', bits32=True), 36 | 'mips64' : lambda v: ArchMips(v, abi='n64'), 37 | 'mips64-n32' : lambda v: ArchMips(v, abi='n32'), 38 | 'mips64-o32' : lambda v: ArchMips(v, abi='o32'), 39 | 'powerpc' : lambda v: ArchPowerPC(v, abi='ppc32', bits32=True), # "32" ABI 40 | 'powerpc64' : lambda v: ArchPowerPC(v, abi='ppc64'), # "64" ABI 41 | 'powerpc64-32' : lambda v: ArchPowerPC(v, abi='ppc32'), # "32" ABI 42 | 'powerpc64-spu': lambda v: ArchPowerPC(v, abi='spu'), 43 | 'riscv' : lambda v: ArchRiscV(v, abi='rv32', bits32=True), 44 | 'riscv64' : lambda v: ArchRiscV(v, abi='rv64'), 45 | 'riscv64-32' : lambda v: ArchRiscV(v, abi='rv32'), 46 | } 47 | 48 | ARCH_ALIASES = ( 49 | # name alias 50 | ('x86' , 'i386' ), 51 | ('x86' , 'ia32' ), 52 | ('x86-64' , 'x64' ), 53 | ('x86-64-x32' , 'x32' ), 54 | ('x86-64-ia32' , 'ia32-64' ), 55 | ('arm' , 'arm-eabi' ), 56 | ('arm' , 'eabi' ), 57 | ('arm-oabi' , 'oabi' ), 58 | ('arm64' , 'aarch64' ), 59 | ('arm64-aarch32', 'aarch32' ), 60 | ('mips' , 'mips32' ), 61 | ('mips' , 'o32' ), 62 | ('mips64' , 'n64' ), 63 | ('mips64-n32' , 'n32' ), 64 | ('mips64-o32' , 'o32-64' ), 65 | ('powerpc' , 'ppc' ), 66 | ('powerpc' , 'ppc32' ), 67 | ('powerpc64' , 'ppc64' ), 68 | ('powerpc64-32' , 'ppc64-32' ), 69 | ('powerpc64-spu', 'ppc64-spu' ), 70 | ('powerpc64-spu', 'spu' ), 71 | ('riscv' , 'riscv32' ), 72 | ('riscv' , 'rv32' ), 73 | ('riscv64' , 'rv64' ), 74 | ('riscv64-32' , 'rv64-32' ), 75 | ) 76 | 77 | SUPPORTED_ARCHS.update({alias: SUPPORTED_ARCHS[arch] for arch, alias in ARCH_ALIASES}) 78 | 79 | SUPPORTED_ARCHS_HELP = '''\ 80 | Supported architectures and ABIs (values are case-insensitive): 81 | 82 | Value Aliases Arch Kernel Syscall ABI Build based on Notes 83 | ------------------------------------------------------------------------------------------------ 84 | arm arm-eabi, eabi ARM 32-bit 32-bit EABI multi_v7_defconfig [2] 85 | arm-oabi oabi ARM 32-bit 32-bit OABI multi_v7_defconfig [2,4] 86 | ------------------------------------------------------------------------------------------------ 87 | arm64 aarch64 ARM 64-bit 64-bit AArch64 defconfig 88 | arm64-aarch32 aarch32 ARM 64-bit 32-bit AArch32 defconfig [1] 89 | ------------------------------------------------------------------------------------------------ 90 | mips mips32, o32 MIPS 32-bit 32-bit O32 defconfig 91 | mips64 n64 MIPS 64-bit 64-bit N64 ip27_defconfig [1] 92 | mips64-n32 n32 MIPS 64-bit 64-bit N32 ip27_defconfig [1] 93 | mips64-o32 o32-64 MIPS 64-bit 32-bit O32 ip27_defconfig [1] 94 | ------------------------------------------------------------------------------------------------ 95 | powerpc ppc, ppc32 PowerPC 32-bit 32-bit PPC32 ppc64_defconfig 96 | powerpc64 ppc64 PowerPC 64-bit 64-bit PPC64 ppc64_defconfig [1] 97 | powerpc64-32 ppc64-32 PowerPC 64-bit 32-bit PPC32 ppc64_defconfig [1] 98 | powerpc64-spu ppc64-spu, spu PowerPC 64-bit 64-bit "SPU" ppc64_defconfig [1,5] 99 | ------------------------------------------------------------------------------------------------ 100 | riscv riscv32, rv32 RISC-V 32-bit 32-bit "RV32" defconfig + 32-bit.config [3,6] 101 | riscv64 rv64 RISC-V 64-bit 64-bit "RV64" defconfig [1,6] 102 | riscv64-32 rv64-32 RISC-V 64-bit 32-bit "RV32" defconfig [1,6] 103 | ------------------------------------------------------------------------------------------------ 104 | x86 i386, ia32 x86 32-bit 32-bit IA32 i386_defconfig 105 | x86-64 x64 x86 64-bit 64-bit x86-64 x86_64_defconfig [1] 106 | x86-64-x32 x32 x86 64-bit 64-bit x32 x86_64_defconfig [1] 107 | x86-64-ia32 ia32-64 x86 64-bit 32-bit IA32 x86_64_defconfig [1] 108 | 109 | [1] Building creates a kernel supporting all ABIs for this architecture. 110 | [2] Build based on "defconfig" for Linux <= v3.7. 111 | [3] Build based on "rv32_defconfig" for Linux <= v6.7 and "defconfig" for Linux <= v5.0. 112 | [4] Building creates an EABI kernel with compat OABI support. Building an OABI-only kernel is 113 | NOT supported. The seccomp filter system will be missing. 114 | [5] "SPU" is not a real ABI. It indicates a Cell processor SPU (Synergistic Processing Unit). 115 | The ABI is really PPC64, but SPUs can only use a subset of syscalls. 116 | [6] "RV32" and "RV64" are not real ABIs, but rather ISAs. The RISC-V syscall ABI is the same 117 | for 32-bit and 64-bit (only register size differs). These names are only used for clarity. 118 | ''' 119 | 120 | def arch_from_name(name: str, kernel_version: KernelVersion) -> Arch: 121 | '''Instantiate and return the right Arch subclass given a human-friendly 122 | name (--arch). The name should be already validated. 123 | ''' 124 | return SUPPORTED_ARCHS[name](kernel_version) 125 | 126 | def arch_from_vmlinux(vmlinux: ELF) -> Optional[Tuple[Type[Arch],bool,List[str]]]: 127 | '''Determine architecture and supported ABIs from vmlinux ELF. Returns the 128 | correct Arch subclass, the bitness and a list of detected ABIs. 129 | ''' 130 | for klass in ARCH_CLASSES: 131 | match = klass.match(vmlinux) 132 | if match: 133 | return klass, *match 134 | 135 | logging.fatal('Unknown or unsupported architecture: e_machine = %d, ' 136 | 'e_flags = 0x%x', vmlinux.e_machine, vmlinux.e_flags) 137 | return None 138 | -------------------------------------------------------------------------------- /src/systrack/arch/arch_base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import Tuple, List, Dict, Optional 5 | 6 | from ..elf import Symbol, ELF 7 | from ..syscall import Syscall 8 | from ..type_hints import KernelVersion, EsotericSyscall 9 | from ..utils import VersionedDict, anysuffix, noprefix, nosuffix 10 | 11 | class Arch(ABC): 12 | # Directory name for this arch in the kernel source, under arch/ 13 | name: Optional[str] = None 14 | 15 | # Whether this arch is 32-bits or not 16 | bits32: bool = False 17 | 18 | # Selected ABI to inspect/build for 19 | abi: Optional[str] = None 20 | 21 | # Whether the selected ABI is 32-bits or not 22 | abi_bits32: bool = False 23 | 24 | # Whether this architecture makes use of function descriptors for function 25 | # pointers or not 26 | uses_function_descriptors: bool = False 27 | 28 | # Are we looking for compat syscalls (COMPACT_SYSCALL_DEFINEn)? Or, in other 29 | # words, is this not the "main" ABI of the kernel we're analyzing? 30 | compat: bool = False 31 | 32 | # Kernel version that we are intersted in analyzing 33 | kernel_version: Optional[KernelVersion] = None 34 | 35 | # Make targets to run (one by one in the specified order) to obtain the base 36 | # config to build the kernel with 37 | config_targets: Tuple[str,...] = ('defconfig',) 38 | 39 | # Name of the syscall table symbol to look for 40 | syscall_table_name: Optional[str] = 'sys_call_table' 41 | 42 | # Base syscall number (actual syscall number is base + idx in syscall table) 43 | # NOTE: easiest way to check this is to just compile a binary that makes a 44 | # raw syscall for the right arch/ABI. The arch_syscall_addr() kernel 45 | # function can also be useful to inspect. 46 | syscall_num_base: int = 0 47 | 48 | # Syscall number destination (register name, None if no register is used, 49 | # e.g. arm/OABI where the instruction is swi ). Subclasses must override 50 | # this. 51 | syscall_num_reg: Optional[str] = None 52 | 53 | # Registers for syscall arguments. Subclasses must override this. 54 | syscall_arg_regs: Optional[Tuple[str, ...]] = None 55 | 56 | # Additional kconfig options to set 57 | kconfig: VersionedDict = VersionedDict() 58 | 59 | # Arch-specific syscall kconfig options dependency (supersedes global 60 | # arch-agnostic KCONFIG_SYSCALL_DEPS (see the comment in kconfig_options.py 61 | # to know how to fill this) 62 | kconfig_syscall_deps: VersionedDict = VersionedDict() 63 | 64 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool): 65 | self.kernel_version = kernel_version 66 | self.bits32 = bits32 67 | self.abi = abi 68 | 69 | def __repr__(s): 70 | return (f'{s.__class__.__name__}(name={s.name!r}, ' 71 | f'bits32={s.bits32}, abi={s.abi!r}, compat={s.compat!r}, ...)') 72 | 73 | @staticmethod 74 | @abstractmethod 75 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 76 | '''Determine if the given vmlinux ELF was built for this architecture, 77 | and if so return the bitness as boolean (True if 32-bit) and a list of 78 | detected ABIs. This is useful to determine which Arch subclass to 79 | instantiate (if any). 80 | ''' 81 | pass 82 | 83 | @abstractmethod 84 | def matches(self, vmlinux: ELF) -> bool: 85 | '''Determine whether this architecture matches the one of the provided 86 | vmlinux (machine and bits). This is useful as a sanity check, e.g. if 87 | a subclass is instantiated and then we want to use it on an unknown 88 | vmlinux (or multiple ones). 89 | ''' 90 | pass 91 | 92 | def adjust_abi(self, vmlinux: ELF): 93 | '''Adjust internal ABI-specific attributes that can be ambiguous for a 94 | certain ABI selection (e.g. syscall_table_name) to the correct value 95 | based on the provided vmlinux. 96 | ''' 97 | pass 98 | 99 | def prefer_compat(self, a: Symbol, b: Symbol) -> Symbol: 100 | '''If only one of the two symbols is "compat", pick the correct one 101 | according to self.compat. 102 | ''' 103 | ca = 'compat' in a.name 104 | cb = 'compat' in b.name 105 | 106 | if ca ^ cb: 107 | if self.compat: 108 | return a if ca else b 109 | return b if ca else a 110 | return None 111 | 112 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 113 | '''Decide which symbol should be preferred when multiple syscall symbols 114 | point to the same virtual address. By default, just prefer symbols with 115 | the classic "sys_" or "compat_sys_" prefix over anything else. 116 | Subclesses can override this to implement their own preferences. 117 | 118 | For example, on x86-64 with IA32 emulation support, __x64_sys_getpid and 119 | __ia32_sys_getpid point to the same vaddr. We prefer __x64_sys_getpid if 120 | we are inspecting the 64-bit syscall table, and __ia32_sys_getpid if we 121 | are inspecting the 32-bit one. 122 | 123 | This does not have any meaningful effect on the correctness of the 124 | output, since at the end of the day if multiple symbols point to the 125 | same vaddr, they are in fact the same function, and the location 126 | information will also be correct regardless of which one is picked. 127 | ''' 128 | if a.name.startswith('sys_'): return a 129 | if b.name.startswith('sys_'): return b 130 | return a if a.name.startswith('compat_sys_') else b 131 | 132 | def symbol_is_ni_syscall(self, sym: Symbol) -> bool: 133 | '''Determine whether the symbol name identifies the special 134 | "not implemented" syscall a.k.a. ni_syscall. 135 | 136 | There can be multiple ni_syscall implementations with different 137 | prefixes and at different vaddrs (go figure). Make sure to get all of 138 | them (readelf -s vmlinux | grep ni_syscall). 139 | 140 | For example on x86 v5.0+: 141 | 142 | sys_ni_syscall 143 | __x64_sys_ni_syscall 144 | __ia32_sys_ni_syscall 145 | 146 | By default, also avoid ftrace-related _eil_addr_XXX symbols generated 147 | with CONFIG_FTRACE_SYSCALLS=y. 148 | ''' 149 | # This generic approach should be good enough 150 | return ( 151 | sym.type == 'FUNC' 152 | and anysuffix(sym.name, 'sys_ni_syscall', 'compat_ni_syscall') 153 | # Avoid ftrace-related symbols 154 | and not sym.name.startswith('_eil_addr_') 155 | # Avoid KCFI-related symbols 156 | and not sym.name.startswith('__cfi_') 157 | and not sym.name.startswith('__pfx_') 158 | ) 159 | 160 | def skip_syscall(self, sc: Syscall) -> bool: 161 | '''Determine whether to skip this syscall. 162 | 163 | Kernels compiled with support for multiple ABIs might share the same 164 | syscall table between two or more ABIs, and in such case we want to 165 | filter out syscalls that aren't for the ABI we are currently inspecting. 166 | 167 | E.G. on x86-64 the 64 and x32 ABI share the same syscall table 168 | (sys_call_table) before v5.4, which also holds some x32 compat syscalls 169 | that are only available for applications using the x32 ABI. 170 | ''' 171 | return False 172 | 173 | def translate_syscall_symbol_name(self, sym_name: str) -> str: 174 | '''Translate symbol name into syscall name, potentially stripping or 175 | replacing arch-specific suffixes/prefixes from the symbol name, in order 176 | to be able to correctly identify a syscall. Overriding this shouldn't be 177 | needed in most cases. 178 | 179 | This default implementation just removes prefixes/suffixes that are not 180 | common enough to be indentified as common prefixes and stripped 181 | automatically. 182 | ''' 183 | return noprefix(sym_name, 'ptregs_sys_', 'ptregs_compat_sys_', 184 | '__se_compat_sys_', '__se_sys_', '__sys_', 'compat_sys_') 185 | 186 | def _normalize_syscall_name(self, name: str) -> str: 187 | '''Normalize a syscall name possibly stripping unneeded arch-specific 188 | prefixes/suffixes (e.g., "ia32_", "aarch32_", "oabi_", "ppc_" etc.). 189 | These are prefixes/suffixes that are ACTUALLY PRESENT IN THE SOURCE, 190 | and not just in the symbol name. 191 | ''' 192 | return name 193 | 194 | def normalize_syscall_name(self, name: str) -> str: 195 | '''Normalize a syscall name removing unneeded prefixes and suffixes. 196 | These are prefixes/suffixes that are ACTUALLY PRESENT IN THE SOURCE, 197 | and not just in the symbol name. 198 | ''' 199 | # NOTE: subclesses should NOT override this method and override 200 | # ._normalize_syscall_name() above instead, so that common prefixes are 201 | # always stripped first. 202 | 203 | # In theory we could also remove the trailing "16" from 16-bit UID 204 | # syscalls (setuid16, chown16, etc.) since it's not the real syscall 205 | # name, but that'd make the output a bit confusing because we'd have 206 | # both 16-bit and 32-bit UID syscalls with the same names, so let's 207 | # avoid it. 208 | #name = nosuffix(name, '16') 209 | 210 | # Y2038 patches rename syscalls that deal with time adding a "_time64" 211 | # or "_time32" suffix to distinguish whether they use 64-bit time 212 | # structs (e.g. `struct __kernel_timespec`) or 32-bit time structs (e.g. 213 | # `struct old_timespec32`). The suffix is shortened to just "64" or "32" 214 | # if the syscall name already ends in "time". This suffix is independent 215 | # of the arch, so strip it regardless. 216 | # 217 | # In v5.1 a bunch of 64-bit time syscalls were added to 32-bit archs 218 | # with some exceptions (notably riscv). 219 | # 220 | # SYSCALL_DEFINE5(recvmmsg_time32, ...) -> recvmmsg 221 | # SYSCALL_DEFINE2(clock_adjtime32, ...) -> clock_adjtime 222 | # 223 | name = nosuffix(name, '_time32', '_time64') 224 | if name.endswith('time32') or name.endswith('time64'): 225 | name = name[:-2] 226 | 227 | # Some architectures have a "sys32_" or "32_" prefix for... whatever 228 | # annoying reason (e.g. v5.1 MIPS 64bit o32). Stripping it regardless of 229 | # arch seems fine, so do it. 230 | # 231 | # asmlinkage long sys32_sync_file_range(...) -> sync_file_range 232 | # SYSCALL_DEFINE4(32_truncate64, ...) -> truncate64 233 | # 234 | name = noprefix(name, '32_', 'sys32_') 235 | 236 | # Some architectures have an "old_" prefix for old syscalls which have 237 | # been superseded by new ones. There is also stuff like "oldumount" 238 | # (v5.18 ARM), but that's actually a different syscall and the kernel 239 | # also has "umount" under a different number, so leave it be. 240 | # 241 | # SYSCALL_DEFINE2(old_getrlimit, ...) -> getrlimit 242 | # SYSCALL_DEFINE1(oldumount, ...) -> oldumount (leave it be) 243 | # 244 | name = noprefix(name, 'old_') 245 | return self._normalize_syscall_name(name) 246 | 247 | def _dummy_syscall_code(self, sc: Syscall, vmlinux: ELF) -> Optional[bytes]: 248 | '''Determine whether a syscall has a dummy implementation (e.g. one that 249 | only does `return -ENOSYS/-EINVAL`). If this is the case, return the 250 | machine code of the syscall, otherwise None. 251 | ''' 252 | return None 253 | 254 | def is_dummy_syscall(self, sc: Syscall, vmlinux: ELF, 255 | ni_sym: Optional[bytes]=None, ni_code: Optional[bytes]=None) -> bool: 256 | '''Determine whether a syscall has a dummy implementation (e.g. one that 257 | only does `return -ENOSYS/-EINVAL`). Try matching the vaddr or code of a 258 | known ni_syscall symbol first, otherwise fall back to arch-specific 259 | logic. 260 | 261 | NOTE: this is just a wrapper around ._dummy_syscall_code() that also 262 | logs some useful info in case a dummy syscall is detected. Subclesses 263 | should only override ._dummy_syscall_code(). 264 | ''' 265 | if ni_sym is not None: 266 | if sc.symbol.real_vaddr == ni_sym.real_vaddr: 267 | logging.info('Syscall %s (%s) is not really implemented: ' 268 | 'vaddr matches %s', sc.name, sc.symbol.name, 269 | ni_sym.name) 270 | return True 271 | 272 | # Cache ni_syscall code for speed as this function will definitely 273 | # be called multiple times for the same ni_syscall. 274 | if ni_code is not None: 275 | code = vmlinux.vaddr_read(sc.symbol.real_vaddr, len(ni_code)) 276 | if code == ni_code: 277 | logging.info('Syscall %s (%s) is not really implemented: ' 278 | 'code matches %s', sc.name, sc.symbol.name, 279 | ni_sym.name) 280 | return True 281 | 282 | code = self._dummy_syscall_code(sc, vmlinux) 283 | if code is None: 284 | return False 285 | 286 | logging.info('Syscall %s (%s) is not really implemented: dummy ' 287 | 'implementation: %s', sc.name, sc.symbol.name, code.hex()) 288 | return True 289 | 290 | def adjust_syscall_number(self, number: int) -> int: 291 | '''Adjust the number for the given syscall according to any 292 | arch-specific quirk there might be (e.g. PowerPC with its interleaved 293 | syscall numbers). 294 | ''' 295 | return number 296 | 297 | def extract_syscall_vaddrs(self, vmlinux: ELF) -> Dict[int,int]: 298 | '''Extract virtual addresses of syscall functions. Implemented in case 299 | this isn't just as simple as looking at the addresses in the syscall 300 | table (e.g., there might not be one to begin with). 301 | ''' 302 | logging.error("Sorry, don't know how to extract syscall vaddrs for this arch!") 303 | return {} 304 | 305 | def extract_esoteric_syscalls(self, vmlinux: ELF) -> EsotericSyscall: 306 | '''Extract weird arch-specific syscalls not in the syscall table: there 307 | isn't much else to do except either manually list these (if they are 308 | always present) or perform static binary analysis. 309 | 310 | The returned value is a list of tuples of the form: (number, name, 311 | symbol_name, signature, kconfig_opts). 312 | 313 | NOTE: the symbol_name that is returned needs to exist in the given 314 | vmlinux. 315 | ''' 316 | return [] 317 | 318 | def syscall_def_regexp(self, syscall_name: Optional[str]=None) -> Optional[str]: 319 | '''Return a regexp capable of matching syscall definitions using 320 | arch-specific SYSCALL_DEFINEx macros with weird names or arch-specific 321 | adsmlinkage function name prefixes. If syscall_name is given, return a 322 | regexp to match this syscall definition exactly, otherwise just a 323 | generic one. 324 | 325 | With syscall_name: the returned regexp should match a macro call up to 326 | and **including** the syscall name plus a word boundary or any useful 327 | delimiter after the name to match it completely. 328 | E.g.: r'SYSCALL_DEFINE\\d\\(name\\b' or r'asmlinkage long sys_name\\('. 329 | 330 | Without syscall_name: the returned regexp should match the macro call up 331 | to and **including** the first open parenthesis. 332 | E.g.: r'SYSCALL_DEFINE\\d\\(' or r'asmlinkage long sys_\\w+\\('. 333 | ''' 334 | # Dev note: the \\ above are because that's a docstring (lol), you 335 | # obviously only need one in the regexp itself with the r'' syntax. 336 | return None 337 | -------------------------------------------------------------------------------- /src/systrack/arch/arm.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Optional 2 | 3 | from ..elf import Symbol, ELF, E_MACHINE, E_FLAGS 4 | from ..kconfig_options import VERSION_INF 5 | from ..syscall import Syscall 6 | from ..type_hints import KernelVersion, EsotericSyscall 7 | from ..utils import VersionedDict, noprefix, nosuffix 8 | 9 | from .arch_base import Arch 10 | 11 | class ArchArm(Arch): 12 | name = 'arm' 13 | bits32 = True 14 | abi_bits32 = True 15 | syscall_arg_regs = ('r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6') 16 | 17 | kconfig = VersionedDict(( 18 | # kexec_load 19 | ((2,6,21), VERSION_INF, 'KEXEC=y' , ['PM_SLEEP_SMP=y', 'MMU=y']), 20 | # seccomp 21 | ((2,6,37), (5,10) , 'SECCOMP=y', []), 22 | # No NUMA support => no mbind, migrate_pages, {get,set}_mempolicy 23 | )) 24 | 25 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool = True): 26 | assert bits32, f'{self.__class__.__name__} is 32-bit only' 27 | super().__init__(kernel_version, abi, True) 28 | assert self.bits32 and self.abi_bits32 29 | assert self.abi in ('eabi', 'oabi') 30 | 31 | if self.kernel_version >= (3,7): 32 | # We want a modern-enough processor for which SMP=y by default 33 | self.config_targets = ('multi_v7_defconfig',) 34 | else: 35 | # TODO: not sure which config is best for < 3.7, but defconfig 36 | # definitely isn't that good, we might be missing some syscalls e.g. 37 | # kexec if SMP=n, so warn about it. This is something to think about 38 | # when we get around supporting such kernel versions. 39 | self.config_targets = ('defconfig',) 40 | 41 | if self.abi == 'eabi': 42 | # Apparently OABI_COMPAT is on by default on old kernels (e.g. 4.0), 43 | # so disable it if not needed, or we're gonna build a kernel with 44 | # no seccomp. 45 | self.kconfig.add((2,6,16), VERSION_INF, 'OABI_COMPAT=n', []) 46 | self.syscall_num_reg = 'r7' 47 | elif self.abi == 'oabi': 48 | self.syscall_num_base = 0x900000 49 | # No register, number passed as immediate to the SWI instruction 50 | self.syscall_num_reg = 'swi ' 51 | 52 | # Building an old OABI-only kernel is annoying. Assume EABI + compat 53 | # OABI (OABI_COMPAT=y) and just build with support for both ABIs. 54 | # FIXME: this will disable the seccomp syscall. Configure for an 55 | # OABI-only kernel here in the future... 56 | self.kconfig.add((2,6,16), VERSION_INF, 'OABI_COMPAT=y', ['AEABI=y', 'THUMB2_KERNEL=n']) 57 | 58 | @staticmethod 59 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 60 | if vmlinux.e_machine != E_MACHINE.EM_ARM: 61 | return None 62 | 63 | assert vmlinux.bits32, 'EM_ARM 64-bit? WAT' 64 | 65 | if 'sys_oabi_call_table' in vmlinux.symbols: 66 | abis = ['eabi', 'oabi'] 67 | else: 68 | # For EABI, e_flags in the ELF header should tell us the EABI 69 | # version (assuming it is set). 70 | if (vmlinux.e_flags & E_FLAGS.EF_ARM_EABI_MASK) != 0: 71 | abis = ['eabi'] 72 | abis = ['oabi'] 73 | 74 | return True, abis 75 | 76 | def matches(self, vmlinux: ELF) -> bool: 77 | return vmlinux.bits32 and vmlinux.e_machine == E_MACHINE.EM_ARM 78 | 79 | def adjust_abi(self, vmlinux: ELF): 80 | # We could be dealing with an EABI + compat OABI kernel or an 81 | # EABI/OABI-only kernel. In the former case, we'll need to select the 82 | # compat syscall table. 83 | if self.abi == 'oabi' and 'sys_oabi_call_table' in vmlinux.symbols: 84 | # EABI + compat OABI 85 | self.compat = True 86 | self.syscall_table_name = 'sys_oabi_call_table' 87 | else: 88 | # EABI/OABI only 89 | self.compat = False 90 | self.syscall_table_name = 'sys_call_table' 91 | 92 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 93 | c = self.prefer_compat(a, b) 94 | if c is not None: 95 | return c 96 | return super().preferred_symbol(a, b) 97 | 98 | def translate_syscall_symbol_name(self, sym_name: str) -> str: 99 | sym_name = super().translate_syscall_symbol_name(sym_name) 100 | # For some reason some syscalls are wrapped in assembly at the entry 101 | # point e.g. sys_sigreturn_wrapper v5.18 arch/arm/kernel/entry-common.S. 102 | # Stripping the "_wrapper" suffix can help locate them through source 103 | # code grepping. 104 | return nosuffix(sym_name, '_wrapper') 105 | 106 | def _normalize_syscall_name(self, name: str) -> str: 107 | if self.abi == 'oabi': 108 | # E.g. v5.18 asmlinkage long sys_oabi_connect(...) 109 | name = noprefix(name, 'oabi_') 110 | # E.g. v5.18 asmlinkage long sys_arm_fadvise64_64(...) 111 | return noprefix(name, 'arm_') 112 | 113 | def _dummy_syscall_code(self, sc: Syscall, vmlinux: ELF) -> Optional[bytes]: 114 | # Match the following code exactly with either #21 (EINVAL - 1) or #37 115 | # (ENOSYS - 1) as immediate for MVN: 116 | # 117 | # f06f 0015 mvn.w r0, #21 118 | # 4770 bx lr 119 | # 120 | # Taken from sys_fork on v5.0 multi_v7_defconfig with MMU=n. 121 | # 122 | if sc.symbol.size != 6: 123 | return None 124 | 125 | code = vmlinux.read_symbol(sc.symbol) 126 | if code in (b'\x6f\xf0\x15\x00\x70\x47', b'\x6f\xf0\x25\x00\x70\x47'): 127 | return code 128 | return None 129 | 130 | def extract_esoteric_syscalls(self, vmlinux: ELF) -> EsotericSyscall: 131 | # ARM-specific syscalls that are outside the syscall table, with numbers 132 | # in the range 0x0f0000-0x0fffff for EABI and 0x9f0000-0x9fffff for 133 | # OABI. These are all implemented in arm_syscall() 134 | # (arch/arm/kernel/traps.c) with a switch statement. WEEEIRD! 135 | # 136 | if 'arm_syscall' not in vmlinux.functions: 137 | return [] 138 | 139 | base = self.syscall_num_base + 0x0f0000 140 | res = [ 141 | (base + 1, 'breakpoint', 'arm_syscall', (), None), 142 | (base + 2, 'cacheflush', 'arm_syscall', ('unsigned long start', 'unsigned long end', 'int flags'), None), 143 | (base + 3, 'usr26' , 'arm_syscall', (), None), 144 | (base + 4, 'usr32' , 'arm_syscall', (), None), 145 | (base + 5, 'set_tls' , 'arm_syscall', ('unsigned long val',), None), 146 | ] 147 | 148 | if self.kernel_version >= (4,15): 149 | res.append((base + 6, 'get_tls', 'arm_syscall', (), None)) 150 | 151 | return res 152 | 153 | def syscall_def_regexp(self, syscall_name: Optional[str]=None) -> Optional[str]: 154 | if self.abi != 'oabi': 155 | return None 156 | 157 | if syscall_name is not None: 158 | if syscall_name.startswith('sys_oabi_'): 159 | return rf'\basmlinkage\s*(unsigned\s+)?\w+\s*{syscall_name}\s*\(' 160 | return rf'\basmlinkage\s*(unsigned\s+)?\w+\s*sys_oabi_{syscall_name}\s*\(' 161 | return r'\basmlinkage\s*(unsigned\s+)?\w+\s*sys_oabi_\w+\s*\(' 162 | -------------------------------------------------------------------------------- /src/systrack/arch/arm64.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Optional 2 | 3 | from ..elf import Symbol, ELF, E_MACHINE 4 | from ..kconfig_options import VERSION_INF 5 | from ..type_hints import KernelVersion 6 | from ..utils import VersionedDict, noprefix 7 | 8 | from .arch_base import Arch 9 | 10 | class ArchArm64(Arch): 11 | name = 'arm64' 12 | bits32 = False 13 | syscall_num_reg = 'w8' 14 | syscall_arg_regs = ('x0', 'x1', 'x2', 'x3', 'x4', 'x5') 15 | 16 | kconfig = VersionedDict(( 17 | # Enable aarch32 ABI regardless, should be =y by default, but better safe than sorry 18 | ((3,7) , VERSION_INF, 'COMPAT=y', ['ARM64_4K_PAGES=y', 'EXPERT=y']), 19 | # kexec[_file]_load 20 | ((4,8) , VERSION_INF, 'KEXEC=y' , ['PM_SLEEP_SMP=y']), 21 | ((5,0) , VERSION_INF, 'KEXEC_FILE=y', []), 22 | # seccomp 23 | ((3,19), (5,10) , 'SECCOMP=y' , []), 24 | # mbind, migrate_pages, {get,set}_mempolicy 25 | ((4,7) , VERSION_INF, 'NUMA=y' , []), 26 | )) 27 | 28 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool = False): 29 | assert not bits32, f'{self.__class__.__name__} is 64-bit only' 30 | assert kernel_version >= (3,7), 'Linux only supports arm64 from v3.7' 31 | super().__init__(kernel_version, abi, False) 32 | assert not self.bits32 33 | assert self.abi in ('aarch64', 'aarch32') 34 | 35 | if self.abi == 'aarch32': 36 | self.compat = True 37 | self.abi_bits32 = True 38 | self.syscall_table_name = 'compat_sys_call_table' 39 | 40 | @staticmethod 41 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 42 | if vmlinux.e_machine != E_MACHINE.EM_AARCH64: 43 | return None 44 | 45 | assert not vmlinux.bits32, 'EM_AARCH64 32-bit? WAT' 46 | 47 | if 'compat_sys_call_table' in vmlinux.symbols: 48 | abis = ['aarch64', 'aarch32'] 49 | else: 50 | abis = ['aarch64'] 51 | 52 | return False, abis 53 | 54 | def matches(self, vmlinux: ELF) -> bool: 55 | return not vmlinux.bits32 and vmlinux.e_machine == E_MACHINE.EM_AARCH64 56 | 57 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 58 | c = self.prefer_compat(a, b) 59 | if c is not None: 60 | return c 61 | 62 | # See commit 4378a7d4be30ec6994702b19936f7d1465193541 63 | if a.name.startswith('__arm64_'): return a 64 | if b.name.startswith('__arm64_'): return b 65 | return super().preferred_symbol(a, b) 66 | 67 | def _normalize_syscall_name(self, name: str) -> str: 68 | # E.g. v5.18 COMPAT_SYSCALL_DEFINE6(aarch32_mmap2, ...) 69 | # E.g. v5.2-v6.13+ SYSCALL_DEFINE1(arm64_personality, ...) 70 | return noprefix(name, 'aarch32_', 'arm64_') 71 | -------------------------------------------------------------------------------- /src/systrack/arch/mips.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Optional 2 | 3 | from ..elf import Symbol, ELF, E_MACHINE 4 | from ..kconfig_options import VERSION_ZERO, VERSION_INF 5 | from ..syscall import Syscall 6 | from ..type_hints import KernelVersion 7 | from ..utils import VersionedDict, anyprefix, noprefix 8 | 9 | from .arch_base import Arch 10 | 11 | class ArchMips(Arch): 12 | name = 'mips' 13 | syscall_num_reg = 'v0' 14 | 15 | kconfig = VersionedDict(( 16 | # kexec[_file]_load 17 | ((2,6,20), (3,9) , 'KEXEC=y' , ['EXPERIMENTAL=y']), 18 | ((3,9) , VERSION_INF, 'KEXEC=y' , []), 19 | # seccomp 20 | ((2,6,15), (5,10) , 'SECCOMP=y', []), 21 | )) 22 | 23 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool = False): 24 | super().__init__(kernel_version, abi, bits32) 25 | assert self.abi in ('o32', 'n32', 'n64') 26 | 27 | if self.abi == 'o32': 28 | self.abi_bits32 = True 29 | # Interestingly, man 2 syscall states: "The mips/o32 system call 30 | # convention passes arguments 5 through 8 on the user stack". 31 | # What syscall takes 8 arguments on MIPS o32? WTF. 32 | self.syscall_num_base = 4000 33 | self.syscall_arg_regs = ('a0', 'a1', 'a2', 'a3', 'stack', 'stack', 'stack', 'stack') 34 | 35 | if not self.bits32: 36 | self.syscall_table_name = 'sys32_call_table' 37 | else: 38 | self.abi_bits32 = False 39 | self.syscall_arg_regs = ('a0', 'a1', 'a2', 'a3', 'a4', 'a5') 40 | 41 | if self.abi == 'n64': 42 | self.syscall_num_base = 5000 43 | else: # n32 44 | self.syscall_num_base = 6000 45 | self.syscall_table_name = 'sysn32_call_table' 46 | 47 | if self.bits32: 48 | # MIPS 32bit means o32 ABI. 49 | assert self.abi == 'o32' 50 | 51 | # Just to be clear: for 32-bit we are ok with defconfig 52 | self.config_targets = ('defconfig',) 53 | 54 | self.kconfig.add(VERSION_ZERO, VERSION_INF, '32BIT=y', []) 55 | self.kconfig.add(VERSION_ZERO, VERSION_INF, '64BIT=n', []) 56 | 57 | # Select CPU release. It does not seem to matter much, so select R2, 58 | # which has the best kernel version compatibility (along with R1). 59 | # These are a multiple choice menu, so better set all of them. 60 | self.kconfig.add((2,6,15), VERSION_INF, 'CPU_MIPS32_R1=n', []) 61 | self.kconfig.add((2,6,15), VERSION_INF, 'CPU_MIPS32_R2=y', ['SYS_HAS_CPU_MIPS32_R2=y']) 62 | self.kconfig.add((4,0) , VERSION_INF, 'CPU_MIPS32_R6=n', []) 63 | else: 64 | self.compat = self.abi != 'n64' 65 | 66 | # Grab SGI IP27 (Origin200/2000), which apparently is one of the 67 | # only two MIPS machine with NUMA support along with Longsoon64 68 | # (loongson3_defconfig), as the latter is more of a pain in the ass 69 | # to build. No need to select CPU release for this, it's R10000. 70 | self.config_targets = ('ip27_defconfig',) 71 | 72 | self.kconfig.add(VERSION_ZERO, VERSION_INF, '32BIT=n', []) 73 | self.kconfig.add(VERSION_ZERO, VERSION_INF, '64BIT=y', []) 74 | 75 | # 32-bit has no NUMA support (apparently), but 64-bit does and 76 | # ip27_defconfig should include it. Make sure an error is raised in 77 | # case of no NUMA. Needed for mbind, migrate_pages, 78 | # {get,set}_mempolicy. 79 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'NUMA=y', ['SYS_SUPPORTS_NUMA=y']) 80 | 81 | # MIPS 64bit supports all ABIs: 32bit o32, 64bit n32, 64bit n64. 82 | # Enable all of them regardless, we will be able to extract the 83 | # right syscall table anyway. 84 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'MIPS32_O32=y', []) 85 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'MIPS32_N32=y', []) 86 | 87 | @staticmethod 88 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 89 | if vmlinux.e_machine != E_MACHINE.EM_MIPS: 90 | return None 91 | 92 | if vmlinux.bits32: 93 | abis = ['o32'] 94 | else: 95 | abis = ['n64'] 96 | 97 | if 'sys32_call_table' in vmlinux.symbols: 98 | abis.append('o32') 99 | if 'sysn32_call_table' in vmlinux.symbols: 100 | abis.append('n32') 101 | 102 | return vmlinux.bits32, abis 103 | 104 | def matches(self, vmlinux: ELF) -> bool: 105 | return ( 106 | vmlinux.e_machine == E_MACHINE.EM_MIPS 107 | and vmlinux.bits32 == self.bits32 108 | ) 109 | 110 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 111 | c = self.prefer_compat(a, b) 112 | if c is not None: 113 | return c 114 | return super().preferred_symbol(a, b) 115 | 116 | def _normalize_syscall_name(self, name: str) -> str: 117 | # E.G. v5.1 asmlinkage int sysm_pipe(void) for weird historical reasons 118 | # E.G. v5.18 SYSCALL_DEFINE6(mips_mmap, ...) 119 | # E.G. v5.0-6.13+ asmlinkage long mipsmt_sys_sched_setaffinity(...) 120 | return noprefix(name, 'sysm_', 'mips_', 'mipsmt_sys_') 121 | 122 | def _dummy_syscall_code(self, sc: Syscall, vmlinux: ELF) -> Optional[bytes]: 123 | # Match the following code exactly with either -22 (EINVAL) or -89 124 | # (-ENOSYS), which of course is different than normalon MIPS) as 125 | # immediate for LI: 126 | # 127 | # 03e00008 jr ra 128 | # 2402ffa7 li v0,-89 129 | # 130 | # Taken from __se_sys_cachectl on v6.9 64-bit ip27_defconfig. 131 | # 132 | if sc.symbol.size != 8: 133 | return None 134 | 135 | code = vmlinux.read_symbol(sc.symbol) 136 | 137 | if vmlinux.big_endian: 138 | if not code.startswith(b'\x03\xe0\x00\x08\x24\x02'): 139 | return None 140 | 141 | imm = int.from_bytes(code[6:], 'big', signed=True) 142 | else: 143 | if not (code.startswith(b'\x08\x00\xe0\x03') and code.endswith(b'\x02\x24')): 144 | return None 145 | 146 | imm = int.from_bytes(code[4:6], 'little', signed=True) 147 | 148 | if imm == -22 or imm == -89: 149 | return code 150 | return None 151 | 152 | def syscall_def_regexp(self, syscall_name: Optional[str]=None) -> Optional[str]: 153 | # Absolutely insane old-style prefixes on MIPS... 154 | exps = [] 155 | 156 | if syscall_name is not None: 157 | if anyprefix(syscall_name, 'sysm_', 'mipsmt_sys_'): 158 | exps.append(rf'\basmlinkage\s*(unsigned\s+)?\w+\s*{syscall_name}\s*\(') 159 | else: 160 | exps.append(rf'\basmlinkage\s*(unsigned\s+)?\w+\s*(sysm|mipsmt_sys)_{syscall_name}\s*\(') 161 | 162 | if self.abi == 'n32': 163 | if anyprefix(syscall_name, 'sysn32_'): 164 | exps.append(rf'\basmlinkage\s*(unsigned\s+)?\w+\s*{syscall_name}\s*\(') 165 | else: 166 | exps.append(rf'\basmlinkage\s*(unsigned\s+)?\w+\s*sysn32_{syscall_name}\s*\(') 167 | else: 168 | exps.append(r'\basmlinkage\s*(unsigned\s+)?\w+\s*(sysm|mipsmt_sys)_\w+\s*\(') 169 | 170 | if self.abi == 'n32': 171 | exps.append(r'\basmlinkage\s*(unsigned\s+)?\w+\s*sysn32_\w+\s*\(') 172 | 173 | return '|'.join(exps) 174 | -------------------------------------------------------------------------------- /src/systrack/arch/powerpc.py: -------------------------------------------------------------------------------- 1 | from struct import iter_unpack 2 | from typing import Tuple, List, Optional 3 | from operator import itemgetter 4 | 5 | from ..elf import Symbol, ELF, E_MACHINE 6 | from ..kconfig_options import VERSION_ZERO, VERSION_INF 7 | from ..syscall import Syscall 8 | from ..type_hints import KernelVersion, EsotericSyscall 9 | from ..utils import VersionedDict, noprefix 10 | 11 | from .arch_base import Arch 12 | 13 | class ArchPowerPC(Arch): 14 | name = 'powerpc' 15 | syscall_num_base = 0 16 | syscall_num_reg = 'r0' 17 | 18 | # NOTE: We treat "SPU" as an ABI, even though it's not a real ABI. It stands 19 | # for "Synergistic Processor Unit", one of the CPUs composing a Cell 20 | # processor: https://en.wikipedia.org/wiki/Cell_(processor). SPUs are quite 21 | # peculiar: as the comment in arch/powerpc/platforms/cell/spu_callbacks.c 22 | # (v5.0) explains, they can only use a subset of the syscalls defined for 23 | # the "64" ABI. 24 | 25 | # NOTE: we are assuming to have PPC_BOOK3S=y (and therefore PPC_BOOK3S_32=y 26 | # for 32-bit or PPC_BOOK3S_64=y for 64-bit) 27 | kconfig = VersionedDict(( 28 | # These are needed for RELOCATABLE=n, we do not really need to list 29 | # dependencies since we are disabling them. 30 | ((2,6,30) , VERSION_INF, 'PPC_OF_BOOT_TRAMPOLINE=n', []), 31 | ((2,6,16) , (2,6,27) , 'CRASH_DUMP=n' , []), 32 | ((2,6,27) , VERSION_INF, 'CRASH_DUMP=n' , []), 33 | ((4,12) , VERSION_INF, 'CRASH_DUMP=n' , []), 34 | ((3,4) , VERSION_INF, 'FA_DUMP=n' , []), 35 | # Needs to be set here too because arch-specific kconfigs are applied 36 | # after those listed in KCONFIG_DEBUGGING (kconfig_options.py) 37 | (VERSION_ZERO, VERSION_INF, 'RELOCATABLE=n', ['PPC_OF_BOOT_TRAMPOLINE=n', 'CRASH_DUMP=n', 'FA_DUMP=n']), 38 | # kexec_load 39 | ((2,6,15) , (3,9) , 'KEXEC=y', ['PPC_BOOK3S=y', 'EXPERIMENTAL=y']), 40 | ((3,9) , VERSION_INF, 'KEXEC=y', ['PPC_BOOK3S=y']), 41 | # seccomp 42 | ((2,6,15) , (5,10) , 'SECCOMP=y', ['PROC_FS=y']), 43 | # rtas 44 | ((2,6,15) , VERSION_INF, 'PPC_RTAS=y', []), 45 | )) 46 | 47 | # FIXME: more like a curiosity, but why the hell do migrate_pages and 48 | # move_pages look like they depend on MIGRATION and not necessarily on NUMA, 49 | # but then aren't available for PPC 32-bit which has NUMA=n??? 50 | 51 | kconfig_syscall_deps = VersionedDict(( 52 | (VERSION_ZERO, VERSION_INF, 'pkey_alloc' , 'PPC_MEM_KEYS'), 53 | (VERSION_ZERO, VERSION_INF, 'pkey_free' , 'PPC_MEM_KEYS'), 54 | (VERSION_ZERO, VERSION_INF, 'pkey_mprotect', 'PPC_MEM_KEYS'), 55 | )) 56 | 57 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool = False): 58 | super().__init__(kernel_version, abi, bits32) 59 | assert self.abi in ('ppc32', 'ppc64', 'spu') 60 | 61 | # The "powerpc" directory was added under arch in v2.6.15 and it weirdly 62 | # coexisted with "ppc" until v2.6.27, when the latter was removed. 63 | assert self.kernel_version >= (2,6,15), 'kernel too old, sorry!' 64 | 65 | if self.abi == 'spu': 66 | # spu_syscall_table only exists since v2.6.16, I have no idea how 67 | # things were handled before then. This is a rather old kernel 68 | # version, we'll worry about it in the future (if ever). 69 | assert self.kernel_version >= (2,6,16), 'kernel too old, sorry!' 70 | 71 | if self.abi == 'ppc32': 72 | self.syscall_arg_regs = ('r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9') 73 | self.abi_bits32 = True 74 | else: 75 | self.syscall_arg_regs = ('r3', 'r4', 'r5', 'r6', 'r7', 'r8') 76 | self.abi_bits32 = False 77 | 78 | if self.bits32: 79 | self.compat = False 80 | self.uses_function_descriptors = False 81 | self.syscall_table_name = 'sys_call_table' 82 | 83 | # PPC_BOOK3S_32 was introduced in v2.6.31. We'll worry about 84 | # older kernels in the future (if ever). 85 | assert self.kernel_version >= (2,6,31), 'kernel too old, sorry!' 86 | 87 | # Apparently there isn't a nice 32-bit defconfig and one needs 88 | # to manually disable 64-bit??? What in tarnation >:( lame! 89 | # There's ppc_defconfig from v5.2, which also takes half the time to 90 | # build so it'd be nice to use... but using it as is without tweaks 91 | # compiles a kernel without memfd_create. 92 | self.config_targets = ('ppc64_defconfig',) 93 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'PPC64=n', []) 94 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'PPC_BOOK3S_32=y', []) 95 | else: 96 | self.compat = self.abi != 'ppc64' 97 | self.abi_bits32 = self.abi == 'ppc32' 98 | self.config_targets = ('ppc64_defconfig',) 99 | self.uses_function_descriptors = True 100 | 101 | if self.abi == 'spu': 102 | self.syscall_table_name = 'spu_syscall_table' 103 | elif self.abi == 'ppc32' and self.kernel_version >= (5,0): 104 | # 32-bit and 64-bit syscalls before v5.0 share the same table 105 | # (see skip_syscall() below), they are split in two tables only 106 | # from v5.0. 107 | self.syscall_table_name = 'compat_sys_call_table' 108 | 109 | # PowerPC64 supports all ABIs: 64, 32, "spu". Enable all of them, we 110 | # will be able to extract the right syscall table regardless. 111 | self.kconfig.add((2,6,15), (5,7) , 'COMPAT=y', ['PPC64=y']) 112 | self.kconfig.add((5,7) , VERSION_INF, 'COMPAT=y', ['PPC64=y', 'CPU_LITTLE_ENDIAN=n', 'CC_IS_CLANG=n']) 113 | 114 | # Needed for NUMA=y 115 | self.kconfig.add((2,6,15), (2,6,22) , 'PPC_PSERIES=y', ['PPC64=y', 'PPC_MULTIPLATFORM=y']), 116 | self.kconfig.add((2,6,22), VERSION_INF, 'PPC_PSERIES=y', ['PPC64=y', 'PPC_BOOK3S=y']), 117 | # mbind, migrate_pages, {get,set}_mempolicy 118 | # NOTE: in theory depends on (PPC_PSERIES || PPC_POWERNV) after 119 | # 5.10, but we are assuming PPC_PSERIES=y 120 | self.kconfig.add((2,6,15), VERSION_INF, 'NUMA=y', ['PPC64=y', 'SMP=y', 'PPC_PSERIES=y']) 121 | # kexec_file_load 122 | self.kconfig.add((4,10) , VERSION_INF, 'KEXEC_FILE=y', ['PPC64=y', 'CRYPTO=y', 'CRYPTO_SHA256=y']) 123 | # Needed for PPC_SUBPAGE_PROT=y 124 | # NOTE: in theory depends on (44x || PPC_BOOK3S_64), but we are 125 | # assuming PPC_BOOK3S_64=y 126 | self.kconfig.add((2,6,15), VERSION_INF, 'PPC_64K_PAGES=y', ['PPC_BOOK3S_64=y']) 127 | # subpage_prot (ppc only, 64-bit only) 128 | self.kconfig.add((2,6,25), (5,9) , 'PPC_SUBPAGE_PROT=y', ['PPC_64K_PAGES=y', 'PPC_BOOK3S_64=y']) 129 | self.kconfig.add((5,9) , VERSION_INF, 'PPC_SUBPAGE_PROT=y', ['PPC_64K_PAGES=y', 'PPC_64S_HASH_MMU=y']) 130 | # pkey_alloc, pkey_free, pkey_mprotect 131 | self.kconfig.add((4,16) , VERSION_INF, 'PPC_MEM_KEYS=y', ['PPC_BOOK3S_64=y', 'PPC_64S_HASH_MMU=y']) 132 | # switch_endian (esoteric fast version) 133 | self.kconfig.add((4,15) , (6,12) , 'PPC_FAST_ENDIAN_SWITCH=y', []), 134 | # spu_run, spu_create 135 | self.kconfig.add((2,6,16), VERSION_INF, 'SPU_FS=y' , ['PPC_CELL=y', 'COREDUMP=y']), 136 | self.kconfig.add((2,6,18), VERSION_INF, 'SPU_BASE=y', []), 137 | 138 | @staticmethod 139 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 140 | if vmlinux.e_machine == E_MACHINE.EM_PPC: 141 | assert vmlinux.bits32, 'EM_PPC 64-bit? WAT' 142 | elif vmlinux.e_machine == E_MACHINE.EM_PPC64: 143 | assert not vmlinux.bits32, 'EM_PPC64 32-bit? WAT' 144 | else: 145 | return None 146 | 147 | if vmlinux.bits32: 148 | abis = ['ppc32'] 149 | else: 150 | abis = ['ppc64'] 151 | 152 | # v5.0+ has a separate compat table and can be built with COMPAT=n. 153 | # Before v5.0 64-bit and 32-bit syscalls share a single table and 154 | # apparently it's always COMPAT=y. If none of these match, we must 155 | # be dealing with a v5.0+ COMPAT=n kernel, which is the only case 156 | # where there's no 32-bit syscall table. 157 | if 'compat_sys_call_table' in vmlinux.symbols \ 158 | or 'compat_sys_execve' in vmlinux.symbols \ 159 | or '.compat_sys_execve' in vmlinux.symbols: 160 | abis.append('ppc32') 161 | 162 | if 'spu_syscall_table' in vmlinux.symbols: 163 | abis.append('spu') 164 | 165 | return vmlinux.bits32, abis 166 | 167 | def matches(self, vmlinux: ELF) -> bool: 168 | # Linux PPC 32-bit should be big-endian only 169 | assert vmlinux.big_endian, 'Little-endian PowerPC 32-bit kernel? WAT' 170 | return ( 171 | vmlinux.e_machine == (E_MACHINE.EM_PPC64, E_MACHINE.EM_PPC)[self.bits32] 172 | and vmlinux.bits32 == self.bits32 173 | ) 174 | 175 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 176 | if self.bits32: 177 | return super().preferred_symbol(a, b) 178 | 179 | # Function descriptors take the "nice" symbol name, while the actual 180 | # functions have a goofy dot prefix. 181 | adot = a.name.startswith('.') 182 | bdot = b.name.startswith('.') 183 | 184 | if adot or bdot: 185 | if not adot: return b 186 | if not bdot: return a 187 | if a.name.startswith('.sys_'): return a 188 | if b.name.startswith('.sys_'): return b 189 | return a if a.name.startswith('.compat_sys_') else b 190 | 191 | # Base method does not know about dotted symbols 192 | return super().preferred_symbol(a, b) 193 | 194 | def skip_syscall(self, sc: Syscall) -> bool: 195 | if self.bits32 or self.kernel_version >= (5,0): 196 | return False 197 | 198 | # On PowerPC 64-bit before v5.0, 64-bit and 32-bit syscalls are 199 | # *interleaved* in the same syscall table, with 64-bit syscalls at even 200 | # indexes. This means that we need to ignore half the syscall table! :') 201 | if self.abi == 'ppc32': 202 | return sc.index % 2 == 0 203 | # 'ppc64' or 'spu' 204 | return sc.index % 2 == 1 205 | 206 | def translate_syscall_symbol_name(self, sym_name: str) -> str: 207 | return super().translate_syscall_symbol_name(noprefix(sym_name, '.sys_', '.')) 208 | 209 | def _normalize_syscall_name(self, name: str) -> str: 210 | return noprefix(name, 'ppc64_', 'ppc32_', 'ppc_') 211 | 212 | def _dummy_syscall_code(self, sc: Syscall, vmlinux: ELF) -> Optional[bytes]: 213 | # Check for `li r3,-ENOSYS; blr` optionally accompained by some other 214 | # known non-branching instructions along the way: 215 | # 216 | # - {mflr,mtlr} r0 217 | # - {stw,std,lwz,ld} r0,X(r1) 218 | # - matching stwu/stdu and addi on r1 (stack pointer) 219 | # - bl (to call _mcount() or other func, which *has* to return) 220 | # - nop (ori 0,0,0) 221 | # 222 | # TODO: relies on the symbol having a valid size (!= 0), improve? 223 | if sc.symbol.size < 8: 224 | return None 225 | 226 | code = vmlinux.read_symbol(sc.symbol) 227 | r1_dec = r1_inc = None 228 | insns = [] 229 | 230 | for insn in map(itemgetter(0), iter_unpack('<>'[vmlinux.big_endian] + 'L', code)): 231 | hi = insn >> 16 232 | 233 | # mflr r0 / mtlr r0 / nop (ori 0,0,0) 234 | if insn in (0x7c0802a6, 0x7c0803a6, 0x60000000): 235 | continue 236 | # bl X 237 | if (hi >> 8) == 0x4b: 238 | continue 239 | # stw r0,X(r1) / std r0,X(r1) / lwz r0,X(r1) / ld r0,X(r1) 240 | if hi in (0x9001, 0xf801, 0xe801, 0x8001): 241 | continue 242 | # stdu r1,X(r1) 243 | if insn & 0xffff0003 == 0xf8210001: 244 | r1_dec = 0x10000 - (insn & 0xfffc) 245 | continue 246 | # stwu r1,X(r1) 247 | if hi in (0x9421, 0xf821): 248 | r1_dec = 0x10000 - (insn & 0xffff) 249 | continue 250 | # addi r1,r1,X (after stwu/stdu) 251 | if hi == 0x3821 and r1_dec is not None: 252 | r1_inc = insn & 0xffff 253 | continue 254 | 255 | if len(insns) > 2: 256 | return None 257 | 258 | insns.append(insn) 259 | 260 | # Stack pointer decrement/increment must match 261 | if (r1_dec is not None or r1_inc is not None) and r1_dec != r1_inc: 262 | return None 263 | 264 | # li r3,-ENOSYS; blr 265 | if insns == [0x3860ffda, 0x4e800020]: 266 | return code 267 | 268 | return None 269 | 270 | def adjust_syscall_number(self, number: int) -> int: 271 | if self.bits32 or self.kernel_version >= (5,0): 272 | return number 273 | 274 | # See comment in skip_syscall() above. 275 | return number // 2 276 | 277 | def extract_esoteric_syscalls(self, vmlinux: ELF) -> EsotericSyscall: 278 | # This is currently only used for fast switch_endian, which is only 279 | # implemented for ppc64 and was killed in v6.12. Save some time here. 280 | if self.abi != 'ppc64' or self.kernel_version >= (6,12): 281 | return [] 282 | 283 | # The switch_endian syscall has a "fast" version implemented with a 284 | # branch at syscall entry point (arch/powerpc/kernel/exceptions-64s.S). 285 | # 286 | # The symbol to look at is exc_real_0xc00_system_call, where we should 287 | # find `cmpdi r0,0x1ebe` followed by a `beq-` to code that updates the 288 | # saved LE bit in SRR1. The same code has been there since at least 289 | # v2.6.31. 290 | # 291 | # 2c 20 1e be cmpdi r0,7870 292 | # 41 c2 00 20 beq X 293 | # ... 294 | # 7d 9b 02 a6 X: mfsrr1 r12 295 | # 69 8c 00 01 xori r12,r12,1 296 | # 7d 9b 03 a6 mtsrr1 r12 297 | # 4c 00 00 24 rfid 298 | # 299 | # This "fast" implementation depends on PPC_FAST_ENDIAN_SWITCH from 300 | # v4.15 onwards. It was removed in v6.12. Old kernels only had this fast 301 | # version and no switch_endian syscall in the syscall table, which was 302 | # added in v4.1 (529d235a0e190ded1d21ccc80a73e625ebcad09b). 303 | # 304 | # FIXME: on older kernels (< v5.0) the associated syscall entry symbol 305 | # may be different. 306 | # 307 | exc = vmlinux.symbols.get('exc_real_0xc00_system_call') 308 | if exc is None: 309 | return [] 310 | 311 | # Unfortunately we cannot rely on the symbol having a good size, so just 312 | # find the next symbol after it and use it as a boundary. 313 | boundary = vmlinux.next_symbol(exc) 314 | boundary = boundary.vaddr if boundary else exc.vaddr + 0x80 315 | code = vmlinux.vaddr_read(exc.vaddr, boundary - exc.vaddr) 316 | insns = iter_unpack('<>'[vmlinux.big_endian] + 'L', code) 317 | insns = list(map(itemgetter(0), insns)) 318 | 319 | try: 320 | idx_cmpdi = insns.index(0x2c201ebe) 321 | beq = insns[idx_cmpdi + 1] 322 | except (IndexError, ValueError): 323 | return [] 324 | 325 | idx_mfsrr1 = idx_cmpdi + 1 + (beq & 0xffff) // 4 326 | if idx_mfsrr1 >= len(insns) or insns[idx_mfsrr1] != 0x7d9b02a6: 327 | return [] 328 | 329 | # Match the branch after the cmpdi. Technically it should be a `beq-` 330 | # (beq with not taken branch prediction), but also accept others. 331 | # beq- beq+ beq beq 332 | if (beq >> 16) not in (0x41c2, 0x41e2, 0x4182, 0x41a2): 333 | return [] 334 | 335 | try: 336 | idx_xori = insns.index(0x698c0001, idx_mfsrr1 + 1) 337 | idx_mtsrr1 = insns.index(0x7d9b03a6, idx_xori + 1) 338 | insns.index(0x4c000024, idx_mtsrr1 + 1) 339 | except ValueError: 340 | return [] 341 | 342 | # We have the syscall 343 | kconf = 'PPC_FAST_ENDIAN_SWITCH' if self.kernel_version >= (4,15) else None 344 | return [(0x1ebe, 'switch_endian', exc.name, (), kconf)] 345 | 346 | def syscall_def_regexp(self, syscall_name: Optional[str]=None) -> Optional[str]: 347 | if self.abi != 'ppc32': 348 | return None 349 | 350 | if syscall_name is not None: 351 | return rf'\bPPC32_SYSCALL_DEFINE\d\s*\({syscall_name}\b' 352 | return r'\bPPC32_SYSCALL_DEFINE\d\s*\(' 353 | -------------------------------------------------------------------------------- /src/systrack/arch/riscv.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, List, Optional 2 | 3 | from ..elf import Symbol, ELF, E_MACHINE 4 | from ..kconfig_options import VERSION_INF 5 | from ..type_hints import KernelVersion 6 | from ..utils import VersionedDict 7 | 8 | from .arch_base import Arch 9 | 10 | class ArchRiscV(Arch): 11 | name = 'riscv' 12 | syscall_num_reg = 'a7' 13 | syscall_arg_regs = ('a0', 'a1', 'a2', 'a3', 'a4', 'a5') 14 | 15 | kconfig = VersionedDict(( 16 | # kexec_load 17 | ((4,8) , VERSION_INF, 'KEXEC=y' , ['MMU=y']), 18 | # seccomp 19 | ((5,5) , (5,10) , 'SECCOMP=y' , []), 20 | # mbind, migrate_pages, {get,set}_mempolicy 21 | ((5,12), VERSION_INF, 'NUMA=y' , ['SMP=y', 'MMU=y']), 22 | )) 23 | 24 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool=False): 25 | super().__init__(kernel_version, abi, bits32) 26 | assert kernel_version >= (4,15), 'Linux only supports RISC-V from v4.15' 27 | assert self.abi in ('rv32', 'rv64') 28 | 29 | if self.abi == 'rv32': 30 | self.abi_bits32 = True 31 | 32 | if not self.bits32: 33 | assert self.kernel_version >= (5,19), 'Linux only supports compat RV32 from v5.19' 34 | self.compat = True 35 | self.syscall_table_name = 'compat_sys_call_table' 36 | 37 | if self.bits32: 38 | if self.kernel_version >= (6,8): 39 | # rv32_defconfig removed in v6.8 40 | self.config_targets = ('defconfig', '32-bit.config') 41 | elif self.kernel_version >= (5,1): 42 | self.config_targets = ('rv32_defconfig',) 43 | else: 44 | self.config_targets = ('defconfig',) 45 | 46 | # No "easy" make target for 32-bit before 5.1. Need manual config. 47 | self.kconfig.add((4,15), (5,1), '32BIT=y', []) 48 | self.kconfig.add((4,15), (5,1), '64BIT=n', []) 49 | self.kconfig.add((4,15), (5,1), 'ARCH_RV32I=y', []) 50 | self.kconfig.add((4,15), (5,1), 'ARCH_RV64I=n', []) 51 | self.kconfig.add((4,15), (5,1), 'CPU_SUPPORTS_32BIT_KERNEL=y', []) 52 | self.kconfig.add((4,15), (5,1), 'CPU_SUPPORTS_64BIT_KERNEL=n', []) 53 | else: 54 | self.config_targets = ('defconfig',) 55 | 56 | # Enable compat ABI regardless (should be =y by default, but better 57 | # safe than sorry) 58 | self.kconfig.add((5,19), VERSION_INF, 'COMPAT=y', ['64BIT=y', 'MMU=y']), 59 | # kexec_file_load 60 | self.kconfig.add((5,19), VERSION_INF, 'KEXEC_FILE=y', ['64BIT=y','MMU=y']) 61 | 62 | 63 | @staticmethod 64 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 65 | if vmlinux.e_machine != E_MACHINE.EM_RISCV: 66 | return None 67 | 68 | if vmlinux.bits32: 69 | abis = ['rv32'] 70 | else: 71 | abis = ['rv64'] 72 | 73 | if 'compat_sys_call_table' in vmlinux.symbols: 74 | abis.append('rv32') 75 | 76 | return vmlinux.bits32, abis 77 | 78 | def matches(self, vmlinux: ELF) -> bool: 79 | return ( 80 | vmlinux.e_machine == E_MACHINE.EM_RISCV 81 | and vmlinux.bits32 == self.bits32 82 | ) 83 | 84 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 85 | c = self.prefer_compat(a, b) 86 | if c is not None: 87 | return c 88 | 89 | if a.name.startswith('__riscv_'): return a 90 | if b.name.startswith('__riscv_'): return b 91 | return super().preferred_symbol(a, b) 92 | -------------------------------------------------------------------------------- /src/systrack/arch/x86.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import defaultdict 3 | from operator import itemgetter 4 | from typing import Tuple, List, Dict, DefaultDict, Set, FrozenSet, Optional 5 | 6 | from iced_x86 import Decoder, Instruction 7 | from iced_x86.Mnemonic import Mnemonic, RET, CMP, TEST, JA, JAE, JB, JBE, JE, JNE 8 | from iced_x86.OpKind import REGISTER 9 | 10 | from ..elf import Symbol, ELF, E_MACHINE 11 | from ..kconfig_options import VERSION_ZERO, VERSION_INF 12 | from ..syscall import Syscall 13 | from ..type_hints import KernelVersion 14 | from ..utils import VersionedDict, noprefix 15 | 16 | from .arch_base import Arch 17 | 18 | class ArchX86(Arch): 19 | name = 'x86' 20 | 21 | kconfig = VersionedDict(( 22 | # Disable retpoline mitigations for better compiler compatibility 23 | ((4,15) , VERSION_INF, 'RETPOLINE=n' , []), 24 | # kexec_load 25 | ((2,6,13), (2,6,19) , 'KEXEC=y' , ['EXPERIMENTAL=y']), 26 | ((2,6,19), VERSION_INF, 'KEXEC=y' , []), 27 | # seccomp 28 | ((2,6,12), (2,6,24) , 'SECCOMP=y' , ['PROC_FS=y']), 29 | ((2,6,24), (5,10) , 'SECCOMP=y' , []), 30 | # iopl, ioperm (x86 only) 31 | ((5,5) , VERSION_INF, 'X86_IOPL_IOPERM=y' , []), 32 | # modify_ldt 33 | ((4,3) , VERSION_INF, 'MODIFY_LDT_SYSCALL=y', []), 34 | ((4,3) , VERSION_INF, 'MODIFY_LDT_SYSCALL=y', []), 35 | )) 36 | 37 | kconfig_syscall_deps = VersionedDict(( 38 | (VERSION_ZERO, VERSION_INF, 'pkey_alloc' , 'X86_INTEL_MEMORY_PROTECTION_KEYS'), 39 | (VERSION_ZERO, VERSION_INF, 'pkey_free' , 'X86_INTEL_MEMORY_PROTECTION_KEYS'), 40 | (VERSION_ZERO, VERSION_INF, 'pkey_mprotect', 'X86_INTEL_MEMORY_PROTECTION_KEYS'), 41 | )) 42 | 43 | # Numbers marked as "64" in syscall_64.tbl before v5.4 (when x64 and x32 44 | # still shared the same table), which should therefore NOT be used in x32 45 | # mode. These also include the (lower) x64 numbers for the misnumbered 46 | # 512-547 syscalls. 47 | # 48 | # cat arch/x86/entry/syscalls/syscall_64.tbl | rg '\t64' | cut -f1 49 | # 50 | __bad_x32_numbers = { 51 | 13, 15, 16, 19, 20, 45, 46, 47, 54, 55, 59, 101, 127, 128, 129, 131, 52 | 134, 156, 174, 177, 178, 180, 205, 206, 209, 211, 214, 215, 222, 236, 53 | 244, 246, 247, 273, 274, 278, 279, 295, 296, 297, 299, 307, 310, 311, 54 | 322, 327, 328 55 | } 56 | 57 | def __init__(self, kernel_version: KernelVersion, abi: str, bits32: bool = False): 58 | super().__init__(kernel_version, abi, bits32) 59 | assert self.abi in ('x64', 'ia32', 'x32') 60 | 61 | # i386_defconfig and x86_64_defconfig don't exist before v2.6.24: need 62 | # a different configuration in such case. We'll think about it when (if) 63 | # we ever get to supporting such old kernels. Additionally, there were 64 | # two directories under arch before v2.6.24 ("i386" and "x86_64"), so 65 | # self.name should reflect that too too. 66 | assert self.kernel_version >= (2,6,24), 'kernel too old, sorry!' 67 | 68 | # Syscall tables are no longer guaranteed to exists since v6.9 69 | # (see commit 1e3ad78334a69b36e107232e337f9d693dcc9df2). We will 70 | # determine later in adjust_abi() if we actually have a table for the 71 | # selected ABI (in case of FTRACE_SYSCALLS=y we may have one). 72 | if self.kernel_version < (6,9): 73 | self.syscall_table_name = 'sys_call_table' 74 | 75 | if not self.bits32: 76 | if self.abi == 'ia32': 77 | self.syscall_table_name = 'ia32_sys_call_table' 78 | elif self.abi == 'x32' and self.kernel_version >= (5,4): 79 | self.syscall_table_name = 'x32_sys_call_table' 80 | else: 81 | self.syscall_table_name = None 82 | 83 | if self.abi == 'ia32': 84 | self.syscall_num_reg = 'eax' 85 | self.syscall_arg_regs = ('ebx', 'ecx', 'edx', 'esi', 'edi', 'ebp') 86 | else: 87 | self.syscall_num_reg = 'rax' 88 | self.syscall_arg_regs = ('rdi', 'rsi', 'rdx', 'r10', 'r8', 'r9') 89 | 90 | if self.bits32: 91 | assert self.abi == 'ia32' 92 | self.abi_bits32 = True 93 | self.config_targets = ('i386_defconfig',) 94 | 95 | # vm86 (x86 only, 32-bit only, no compat support in 64-bit kernels) 96 | self.kconfig.add((2,6,16), (2,6,18) , 'VM86=y' , ['X86=y', 'EMBEDDED=y']), 97 | self.kconfig.add((2,6,18), (2,6,24) , 'VM86=y' , ['EMBEDDED=y']), 98 | self.kconfig.add((2,6,24), (4,3) , 'VM86=y' , ['X86_32=y', 'EXPERT=y']), 99 | self.kconfig.add((4,3) , VERSION_INF, 'X86_LEGACY_VM86=y', ['X86_32=y']), 100 | self.kconfig.add((4,3) , VERSION_INF, 'X86_LEGACY_VM86=y', ['X86_32=y']), 101 | # Needed for NUMA=y 102 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'NOHIGHMEM=n', []) 103 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'HIGHMEM4G=n', []) 104 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'HIGHMEM64G=y', []) 105 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'X86_BIGSMP=y', ['SMP=y']) 106 | # mbind, migrate_pages, {get,set}_mempolicy 107 | # NOTE: before v2.6.29 NUMA actually also needs more options in 108 | # OR, but we don't support checking kconfig expressions 109 | self.kconfig.add(VERSION_ZERO, (2,6,23) , 'NUMA=y', ['SMP=y', 'HIGHMEM64G=y']) 110 | self.kconfig.add((2,6,23) , (2,6,29) , 'NUMA=y', ['SMP=y', 'HIGHMEM64G=y', 'EXPERIMENTAL=y']) 111 | self.kconfig.add((2,6,29) , VERSION_INF, 'NUMA=y', ['SMP=y', 'HIGHMEM64G=y', 'X86_BIGSMP=y']) 112 | else: 113 | self.abi_bits32 = self.abi == 'ia32' 114 | self.compat = self.abi != 'x64' 115 | self.config_targets = ('x86_64_defconfig',) 116 | 117 | if self.abi == 'x32': 118 | # x32 syscalls have this bit set (__X32_SYSCALL_BIT) 119 | self.syscall_num_base = 0x40000000 120 | 121 | # x86-64 supports all ABIs: ia32, x64, x32. Enable all of them, we 122 | # will be able to extract the right syscall table regardless. 123 | self.kconfig.add(VERSION_ZERO, VERSION_INF, 'IA32_EMULATION=y', []) 124 | self.kconfig.add((3,4) , (3,9) , 'X86_X32=y' , ['EXPERIMENTAL=y']) 125 | self.kconfig.add((3,9) , (5,18) , 'X86_X32=y' , []) 126 | self.kconfig.add((5,18) , VERSION_INF, 'X86_X32_ABI=y' , []) 127 | 128 | # kexec_file_load 129 | self.kconfig.add((3,17) , VERSION_INF, 'KEXEC_FILE=y', ['X86_64=y', 'CRYPTO=y', 'CRYPTO_SHA256=y']) 130 | # mbind, migrate_pages, {get,set}_mempolicy 131 | self.kconfig.add(VERSION_ZERO, (2,6,15) , 'NUMA=y', []) 132 | self.kconfig.add((2,6,15) , (2,6,29) , 'NUMA=y', ['SMP=y']) 133 | self.kconfig.add((2,6,29) , VERSION_INF, 'NUMA=y', ['SMP=y']) 134 | # pkey_alloc, pkey_free, pkey_mprotect 135 | # NOTE: in theory depends on (CPU_SUP_INTEL || CPU_SUP_AMD) but we 136 | # are pretty sure that CPU_SUP_INTEL will be =y 137 | self.kconfig.add((4,6) , VERSION_INF, 'X86_INTEL_MEMORY_PROTECTION_KEYS=y', ['X86_64=y', 'CPU_SUP_INTEL=y']) 138 | # map_shadow_stack 139 | # NOTE: depends on assembler support for WRUSS instruction 140 | # (GNU binutils >= 2.31) 141 | self.kconfig.add((6,6) , VERSION_INF, 'X86_USER_SHADOW_STACK=y', ['AS_WRUSS=y']) 142 | 143 | @staticmethod 144 | def match(vmlinux: ELF) -> Optional[Tuple[bool,List[str]]]: 145 | if vmlinux.e_machine == E_MACHINE.EM_386: 146 | assert vmlinux.bits32, 'EM_386 64-bit? WAT' 147 | elif vmlinux.e_machine == E_MACHINE.EM_X86_64: 148 | assert not vmlinux.bits32, 'EM_X86_64 32-bit? WAT' 149 | else: 150 | return None 151 | 152 | if vmlinux.bits32: 153 | abis = ['ia32'] 154 | else: 155 | abis = ['x64'] 156 | 157 | if 'ia32_sys_call_table' in vmlinux.symbols: 158 | abis.append('ia32') 159 | elif 'ia32_sys_call' in vmlinux.symbols: 160 | # Since v6.9 no more tables, but we have this function instead 161 | abis.append('ia32') 162 | 163 | if 'x32_sys_call_table' in vmlinux.symbols: 164 | abis.append('x32') 165 | elif 'x32_sys_call' in vmlinux.symbols: 166 | # Since v6.9 no more tables, but we have this function instead 167 | abis.append('x32') 168 | elif any('x32_compat_sys' in s for s in vmlinux.symbols): 169 | # Before v5.4 x32 did NOT have its own table 170 | abis.append('x32') 171 | 172 | return vmlinux.bits32, abis 173 | 174 | def matches(self, vmlinux: ELF) -> bool: 175 | return ( 176 | vmlinux.e_machine == (E_MACHINE.EM_X86_64, E_MACHINE.EM_386)[self.bits32] 177 | and vmlinux.bits32 == self.bits32 178 | ) 179 | 180 | def adjust_abi(self, vmlinux: ELF): 181 | if self.kernel_version < (6,9): 182 | return 183 | 184 | # Figure out if we have a syscall table (FTRACE_SYSCALLS=y) or not. The 185 | # sys_call_table symbol represents the x64 table for 64-bit and the ia32 186 | # table for 32-bit. There is no ia32 nor x32 table for 64-bit kernels. 187 | if 'sys_call_table' in vmlinux.symbols and not self.compat: 188 | self.syscall_table_name = 'sys_call_table' 189 | 190 | __is_ia32_name = staticmethod(lambda n: n.startswith('__ia32_')) # __ia32_[compat_]sys_xxx 191 | __is_x64_name = staticmethod(lambda n: n.startswith('__x64_')) # __x64_[compat_]sys_xxx 192 | __is_x32_name = staticmethod(lambda n: n.startswith('__x32_')) # __x32_compat_sys_xxx 193 | 194 | def preferred_symbol(self, a: Symbol, b: Symbol) -> Symbol: 195 | # Try preferring the symbol with the right ABI in its prefix. 196 | na, nb = a.name, b.name 197 | 198 | if self.abi == 'ia32': 199 | if self.__is_ia32_name(na): return a 200 | if self.__is_ia32_name(nb): return b 201 | if self.__is_x64_name(na): return a 202 | if self.__is_x64_name(nb): return b 203 | if not na.islower(): return b 204 | if not nb.islower(): return a 205 | return super().preferred_symbol(a, b) 206 | 207 | if self.abi == 'x32': 208 | if self.__is_x32_name(na): return a 209 | if self.__is_x32_name(nb): return b 210 | 211 | if self.__is_x64_name(na): return a 212 | if self.__is_x64_name(nb): return b 213 | if self.__is_ia32_name(na): return b 214 | if self.__is_ia32_name(nb): return a 215 | if not na.islower(): return b 216 | if not nb.islower(): return a 217 | return super().preferred_symbol(a, b) 218 | 219 | def skip_syscall(self, sc: Syscall) -> bool: 220 | # Syscalls 512 through 547 are historically misnumbered and x32 only, 221 | # see comment in v5.10 arch/x86/entry/syscalls/syscall_64.tbl. 222 | # 223 | # x32 should only use the x32 numbers (512-547) ORed with the special 224 | # __X32_SYSCALL_BIT, and NOT the x64 numbers for the same syscalls. 225 | # x64 should use the x64 numbers and NOT the x32 numbers (512-547) for 226 | # the same syscalls. 227 | # 228 | # The checks performed by the kernel (mostly in do_syscall_64() under 229 | # arch/x86/entry/common.c) however are completely idiotic, and the fact 230 | # that before v5.4 there is only one syscall table for both x64 and x32 231 | # does not help: this makes it technically possible to mix up the 232 | # numbers in funny ways. 233 | # 234 | # In fact, in v5.3, execve can be called using *four* different numbers 235 | # from both x64 and x32 mode (determining which number/mode combination 236 | # will result in rax=-EFAULT is left as an exercise to the reader): 237 | # 238 | # 1. 0x3b : the x64 number 239 | # (techincally only correct for x64 mode) 240 | # 2. 0x208 : the x32 number without __X32_SYSCALL_BIT set 241 | # (techincally incorrect in both modes) 242 | # 3. 0x4000003b: the x64 number with __X32_SYSCALL_BIT set 243 | # (techincally incorrect in both modes) 244 | # 4. 0x40000208: the x32 number with __X32_SYSCALL_BIT set 245 | # (techincally only correct for x32 mode) 246 | # 247 | # In v5.4 (commit 6365b842aae4490ebfafadfc6bb27a6d3cc54757) a separate 248 | # x32 syscall table was introduced to try and make things less 249 | # confusing. After this commit, options 2 and 3 above give -ENOSYS, 250 | # while 1 and 4 both work (again, try to guess which number/mode combo 251 | # will result in rax=-EFAULT). 252 | # 253 | if self.abi == 'x64' and 512 <= sc.number <= 547: 254 | # x64 cannot use x32 numbers even though they are in the table 255 | return True 256 | 257 | if self.abi == 'x32': 258 | if self.kernel_version >= (5,4): 259 | # We have our own table, anything we find there is acceptable 260 | return False 261 | 262 | if (sc.number & ~0x40000000) in self.__bad_x32_numbers: 263 | # x32 should NOT use these! 264 | return True 265 | 266 | if self.abi == 'ia32': 267 | # vm86 and vm86old are only available in 32-bit kernels, but might 268 | # still be implemented as simple wrappers that print a warning to 269 | # dmesg and return -ENOSYS in 64-bit kernels, so ignore them 270 | if not self.bits32 and sc.number in (113, 166): 271 | return True 272 | 273 | # pkey_{alloc,free,mprotect} are available for compat ia32 on 274 | # 64-bit, but not for 32-bit kernels (on x86 they depend X86_64=y), 275 | # so avoid wasting time with these 276 | if self.bits32 and sc.number in (380, 381, 382): 277 | return True 278 | 279 | return False 280 | 281 | def translate_syscall_symbol_name(self, sym_name: str) -> str: 282 | sym_name = super().translate_syscall_symbol_name(sym_name) 283 | # For whatever reason some syscalls are wrapped in assembly at the entry 284 | # point e.g. in v4.0 stub_execve in arch/x86/kernel/entry_64.S or 285 | # stub32_execve in arch/x86/ia32/ia32entry.S. These stubs with prefix 286 | # "stub[32]_" make calls to the actual syscall function. 287 | # 288 | # Removing the prefix helps locate the actual syscall definition through 289 | # source code grepping IFF they do not have any other prefix/suffix in 290 | # the source (stub_fork -> fork -> easily find SYSCALL_DEFINE0(fork)). 291 | # 292 | # In some cases this is not enough though, because the actual function 293 | # has another prefix: e.g. stub_rt_sigreturn, which calls 294 | # sys_rt_sigreturn, defined as `asmlinkage long sys_rt_sigreturn` 295 | # and not `asmlinkage long rt_sigreturn` or 296 | # `SYSCALL_DEFINE0(rt_sigreturn)`. Kind of a bummer, but I don't really 297 | # want to become insane to accomodate all these quirks. 298 | return noprefix(sym_name, 'stub32_', 'stub_') 299 | 300 | def _normalize_syscall_name(self, name: str) -> str: 301 | # E.g. v5.18 COMPAT_SYSCALL_DEFINE1(ia32_mmap, ...) 302 | return noprefix(name, 'ia32_', 'x86_', 'x32_') 303 | 304 | def _dummy_syscall_code(self, sc: Syscall, vmlinux: ELF) -> Optional[bytes]: 305 | # Check if the code of the syscall only consists of 306 | # `MOV rax/eax, -ENOSYS/-EINVAL` followed by a RET or relative JMP and 307 | # optionally preceded by an ENDBR64/32. E.G., lookup_dcookie in v6.3: 308 | # 309 | # <__x64_sys_lookup_dcookie>: 310 | # f3 0f 1e fa endbr64 311 | # 48 c7 c0 da ff ff ff mov rax,0xffffffffffffffda 312 | # e9 74 8d 90 00 jmp ffffffff819b8b84 <__x86_return_thunk> 313 | # 314 | # TODO: relies on the symbol having a valid size (!= 0), improve? 315 | sz = sc.symbol.size 316 | if sz < 6 or sz > 16: 317 | return None 318 | 319 | orig = code = vmlinux.read_symbol(sc.symbol) 320 | bad_imm = (b'\xda\xff\xff\xff', b'\xea\xff\xff\xff') 321 | 322 | # endbr64/endbr32 323 | if code.startswith(b'\xf3\x0f\x1e\xfa') or code.startswith(b'\xf3\x0f\x1e\xfb'): 324 | code = code[4:] 325 | sz -= 4 326 | 327 | # 32-bit kernel 328 | if code[:1] == b'\xb8' and code[1:5] in bad_imm: # mov eax, -ENOSYS/-EINVAL 329 | if sz == 6 and code[5] == 0xc3: return orig # ret 330 | if sz == 7 and code[5] == 0xeb: return orig # jmp rel8 331 | if sz == 10 and code[5] == 0xe9: return orig # jmp rel32 332 | 333 | # 64-bit kernel 334 | if code[:3] == b'\x48\xc7\xc0' and code[3:7] in bad_imm: # mov rax, -ENOSYS/-EINVAL 335 | if sz == 8 and code[7] == 0xc3: return orig # ret 336 | if sz == 9 and code[7] == 0xeb: return orig # jmp rel8 337 | if sz == 12 and code[7] == 0xe9: return orig # jmp rel32 338 | 339 | return None 340 | 341 | def __emulate_syscall_switch(self, func: Symbol, func_code: bytes) -> Optional[Tuple[DefaultDict[int,Set[int]],Set[Instruction]]]: 342 | start = func.real_vaddr 343 | end = func.real_vaddr + func.size 344 | insns = list(Decoder(32 if self.bits32 else 64, func_code, ip=start)) 345 | 346 | # Register used to hold syscall number 347 | nr_reg = None 348 | 349 | # Assume first compared register holds syscall number 350 | for insn in insns: 351 | if insn.op_code().mnemonic in (CMP, TEST): 352 | for i in range(insn.op_count): 353 | if insn.op_kind(i) == REGISTER: 354 | nr_reg = insn.op_register(i) 355 | break 356 | 357 | if nr_reg is not None: 358 | break 359 | 360 | if nr_reg is None: 361 | logging.error('Could not find syscall number register') 362 | return None 363 | 364 | # Supported Jcc instructions 365 | jccs = {JA, JAE, JB, JBE, JE, JNE} 366 | # Maximum syscall number supported plus 1 367 | nr_max = 0x1000 368 | # Possible syscall numbers at a given address (instruction pointer) 369 | nrs: DefaultDict[int,FrozenSet[int]] = defaultdict(frozenset, {start: frozenset(range(nr_max))}) 370 | # Candidate branches to syscall functions 371 | candidate_insns: Set[Instruction] = set() 372 | # Accumulate non-NOP skipped insns for logging/debugging purposes 373 | skipped_insns: DefaultDict[Instruction,int] = defaultdict(int) 374 | 375 | keep_going = True 376 | iteration = 0 377 | 378 | # Symbolically trace the function code to determine the possible syscall 379 | # numbers and the instructions that lead to them 380 | while keep_going: 381 | iteration += 1 382 | keep_going = False 383 | 384 | invert_condition = False 385 | mnemonic: Optional[Mnemonic] = None 386 | last_cmp_immediate: Optional[int] = None 387 | 388 | for insn in insns: 389 | ip = insn.ip 390 | next_ip = insn.next_ip 391 | prev_mnemonic = mnemonic 392 | mnemonic = insn.op_code().mnemonic 393 | cur_nrs = nrs[ip] 394 | 395 | # Only support a TEST that appears right before JE/JNE, which is 396 | # functionally equal to a CMP with 0. 397 | if prev_mnemonic == TEST and mnemonic not in (JE, JNE): 398 | logging.error('Unsupported instruction after TEST: %#x: %r', ip, insn) 399 | return None 400 | 401 | if mnemonic == RET: 402 | continue 403 | 404 | if mnemonic == TEST: 405 | if insn.op0_kind != REGISTER or insn.op1_kind != REGISTER: 406 | logging.error('Unsupported TEST instruction %#x: %r', ip, insn) 407 | return None 408 | 409 | # Treat `TEST reg, reg` as `CMP reg, 0`. We make sure that 410 | # this is the only possible case above. 411 | last_cmp_immediate = 0 412 | nrs[next_ip] |= cur_nrs 413 | continue 414 | 415 | if mnemonic == CMP: 416 | if insn.op0_kind == REGISTER: 417 | reg = insn.op0_register 418 | imm_op_idx = 1 419 | invert_condition = False 420 | elif insn.op1_kind == REGISTER: 421 | reg = insn.op1_register 422 | imm_op_idx = 0 423 | invert_condition = True 424 | else: 425 | # Should not happen, but guard against it anyway. 426 | imm_op_idx = None 427 | 428 | try: 429 | last_cmp_immediate = insn.immediate(imm_op_idx) 430 | except (ValueError, TypeError): 431 | logging.error('Unsupported CMP instruction %#x: %r', ip, insn) 432 | return None 433 | 434 | if reg != nr_reg: 435 | logging.error('Unexpected register in CMP instruction ' 436 | '%#x: %r', ip, insn) 437 | return None 438 | 439 | nrs[next_ip] |= cur_nrs 440 | continue 441 | 442 | new_taken_nrs = frozenset() 443 | new_not_taken_nrs = frozenset() 444 | 445 | if insn.is_jmp_short_or_near: 446 | target_ip = insn.near_branch_target 447 | new_taken_nrs = cur_nrs 448 | elif insn.is_jcc_short_or_near: 449 | if mnemonic not in jccs: 450 | logging.error('Unsupported Jcc instruction %#x: %r', ip, insn) 451 | return None 452 | if last_cmp_immediate is None: 453 | logging.error('No previous CMP/TEST instruction for Jcc: ' 454 | '%#x: %r', ip, insn) 455 | return None 456 | 457 | target_ip = insn.near_branch_target 458 | 459 | if mnemonic == JA: 460 | taken_filter = frozenset(range(last_cmp_immediate + 1, nr_max)) 461 | elif mnemonic == JAE: 462 | taken_filter = frozenset(range(last_cmp_immediate, nr_max)) 463 | elif mnemonic == JB: 464 | taken_filter = frozenset(range(last_cmp_immediate)) 465 | elif mnemonic == JBE: 466 | taken_filter = frozenset(range(last_cmp_immediate + 1)) 467 | elif mnemonic == JE: 468 | taken_filter = frozenset((last_cmp_immediate,)) 469 | elif mnemonic == JNE: 470 | taken_filter = frozenset(range(0, last_cmp_immediate)) 471 | taken_filter |= frozenset(range(last_cmp_immediate + 1, nr_max)) 472 | 473 | new_taken_nrs = cur_nrs & taken_filter 474 | new_not_taken_nrs = cur_nrs - taken_filter 475 | 476 | if invert_condition: 477 | new_taken_nrs, new_not_taken_nrs = new_not_taken_nrs, new_taken_nrs 478 | elif insn.is_call_near: 479 | target_ip = insn.near_branch_target 480 | new_taken_nrs = cur_nrs 481 | if start <= target_ip < end: 482 | logging.error('%s calling itself??? %r', func.name, insn) 483 | return None 484 | else: 485 | if iteration == 1 and not insn.op_code().is_nop: 486 | skipped_insns[insn] += 1 487 | 488 | # YOLO 489 | nrs[next_ip] |= cur_nrs 490 | continue 491 | 492 | # We get here for JMP, Jcc and CALL near 493 | if start <= target_ip < end: 494 | # Branch target inside function 495 | if target_ip < ip: 496 | # Backward branch: new numbers may be added to the 497 | # target instruction, but we are already past it. In 498 | # such case, we'll need an additional iteration to 499 | # propagate the information. 500 | if not new_taken_nrs.issubset(nrs[target_ip]): 501 | keep_going = True 502 | else: 503 | # Branch target outside function, assume it's a branch to a 504 | # syscall function 505 | candidate_insns.add(insn) 506 | 507 | nrs[target_ip] |= new_taken_nrs 508 | nrs[next_ip] |= new_not_taken_nrs 509 | 510 | logging.info('Symbolic emulation done in %d iteration%s', iteration, 511 | 's'[:iteration ^ 1]) 512 | 513 | if skipped_insns: 514 | n_skipped = sum(skipped_insns.values()) 515 | skipped = sorted(skipped_insns.items(), key=itemgetter(1, 0), reverse=True) 516 | skipped = '; '.join((f'{i:r} (x{n})' for i, n in skipped)) 517 | logging.debug('Skipped %d instruction%s: %s', n_skipped, 518 | 's'[:n_skipped ^ 1], skipped) 519 | 520 | return nrs, candidate_insns 521 | 522 | def extract_syscall_vaddrs(self, vmlinux: ELF) -> Dict[int,int]: 523 | # We need to go through a painful examination of the switch statement 524 | # implemented by {x64,x32,ia32}_sys_call(): 525 | # 526 | # #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); 527 | # 528 | # long x64_sys_call(const struct pt_regs *regs, unsigned int nr) 529 | # { 530 | # switch (nr) { 531 | # #include 532 | # default: return __x64_sys_ni_syscall(regs); 533 | # } 534 | # } 535 | # 536 | # The switch statement on the second argument is implemented as a binary 537 | # search. Therefore, the generated instructions should simply be a bunch 538 | # of CMP/Jcc/JMP. No other implementation is supported right now. 539 | # 540 | assert self.syscall_table_name is None 541 | 542 | func_name = f'{self.abi}_sys_call' 543 | sym = vmlinux.functions.get(func_name) 544 | if sym is None: 545 | logging.error('Could not find function %s', func_name) 546 | return {} 547 | 548 | if sym.size < 0x10: 549 | logging.error('%s is too small (%d bytes)', sym.name, sym.size) 550 | return {} 551 | 552 | logging.info('Extracting syscalls from code of %s() at %#x', sym.name, 553 | sym.real_vaddr) 554 | 555 | res = self.__emulate_syscall_switch(sym, vmlinux.read_symbol(sym)) 556 | if res is None: 557 | return {} 558 | 559 | nrs, candidate_insns = res 560 | vaddrs: Dict[int,int] = {} 561 | found_default_case = False 562 | 563 | for insn in candidate_insns: 564 | # Guaranteed to have .near_branch_target by the code in 565 | # __emulate_syscall_switch() above 566 | vaddr = insn.near_branch_target 567 | numbers = nrs[vaddr] 568 | 569 | if len(numbers) == 0: 570 | # This should never happen, bail out 571 | logging.error('Empty set of syscall numbers for %#x (target of ' 572 | '%r). Unreachable!?', vaddr, insn) 573 | return {} 574 | 575 | if len(numbers) > 100: 576 | logging.debug('Default switch case at %#x (reachable %d ' 577 | 'times): %r => %#x is ni_syscall', insn.ip, 578 | len(numbers), insn, vaddr) 579 | 580 | if found_default_case: 581 | logging.error('Multiple default switch cases!?') 582 | return {} 583 | 584 | found_default_case = True 585 | continue 586 | 587 | # Let the caller handle de-duplication in case a single vaddr can be 588 | # reached by multiple syscall numbers 589 | for nr in numbers: 590 | if nr in vaddrs: 591 | if vaddrs[nr] != vaddr: 592 | logging.error('Number %d leads to multiple vaddrs!? ' 593 | 'Got %#x and %#x. Bailing out!', nr, vaddrs[nr], vaddr) 594 | return {} 595 | continue 596 | 597 | vaddrs[nr] = vaddr 598 | 599 | return vaddrs 600 | 601 | def syscall_def_regexp(self, syscall_name: Optional[str]=None) -> Optional[str]: 602 | if self.abi != 'x32': 603 | return None 604 | 605 | if syscall_name is not None: 606 | if syscall_name.startswith('sys32_x32_'): 607 | return rf'\basmlinkage\s*(unsigned\s+)?\w+\s*{syscall_name}\s*\(' 608 | return rf'\basmlinkage\s*(unsigned\s+)?\w+\s*sys32_x32_{syscall_name}\s*\(' 609 | 610 | return r'\basmlinkage\s*(unsigned\s+)?\w+\s*sys32_x32_\w+\s*\(' 611 | -------------------------------------------------------------------------------- /src/systrack/elf.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import sys 4 | 5 | from enum import IntEnum 6 | from functools import lru_cache 7 | from pathlib import Path 8 | from struct import unpack 9 | from operator import attrgetter 10 | from collections import namedtuple 11 | from typing import Union, Dict, Optional 12 | 13 | from .utils import ensure_command 14 | 15 | # Only EM_* macros relevant for vmlinux ELFs 16 | class E_MACHINE(IntEnum): 17 | EM_386 = 3 # x86 18 | EM_MIPS = 8 # MIPS R3000 (32 or 64 bit) 19 | EM_PPC = 20 # PowerPC 32-bit 20 | EM_PPC64 = 21 # PowerPC 64-bit 21 | EM_ARM = 40 # ARM 32-bit 22 | EM_X86_64 = 62 # x86-64 23 | EM_AARCH64 = 183 # ARM 64-bit 24 | EM_RISCV = 243 # RISC-V 25 | 26 | # Only EF_* macros that we actually use 27 | class E_FLAGS(IntEnum): 28 | EF_ARM_EABI_MASK = 0xff000000 29 | 30 | Section = namedtuple('Section', ('name', 'vaddr', 'off', 'size')) 31 | _Symbol = namedtuple('_Symbol', ('vaddr', 'real_vaddr', 'size', 'type', 'name')) 32 | 33 | # NOTE: other code may assume that Symbol acts like a tuple. Think twice about 34 | # making this a full-fledged class and not a subclass of namedtuple. Classes are 35 | # not hashable and two classes only compare equal if they are both the exact 36 | # same instance. 37 | class Symbol(_Symbol): 38 | '''Class representing an ELF symbol. 39 | ''' 40 | def __repr__(s): 41 | if s.real_vaddr == s.vaddr: 42 | return f'Symbol("{s.name}" at 0x{s.vaddr:x}, type={s.type}, size=0x{s.size:x})' 43 | else: 44 | return f'Symbol("{s.name}" at 0x{s.vaddr:x} (real 0x{s.real_vaddr:x}), type={s.type}, size=0x{s.size:x})' 45 | 46 | class ELF: 47 | __slots__ = ( 48 | 'path', 'file', 'bits32', 'big_endian', 'e_machine', 'e_flags', 49 | '__sections', '__symbols', '__functions' 50 | ) 51 | 52 | def __init__(self, path: Union[str,Path]): 53 | self.path = Path(path) 54 | self.file = self.path.open('rb') 55 | self.__sections = None 56 | self.__symbols = None 57 | self.__functions = None 58 | 59 | magic, ei_class, ei_data = unpack('<4sBB', self.file.read(6)) 60 | 61 | if magic != b'\x7fELF': 62 | logging.warning('Bad ELF magic: %r', magic) 63 | 64 | if ei_class == 1: 65 | self.bits32 = True 66 | elif ei_class == 2: 67 | self.bits32 = False 68 | else: 69 | logging.critical('Invalid ELF e_ident[EI_CLASS] = %d', ei_data) 70 | sys.exit(1) 71 | 72 | if ei_data == 1: 73 | self.big_endian = False 74 | elif ei_data == 2: 75 | self.big_endian = True 76 | else: 77 | logging.critical('Invalid ELF e_ident[EI_DATA] = %d', ei_data) 78 | sys.exit(1) 79 | 80 | unpack_endian = '<>'[self.big_endian] 81 | 82 | assert self.file.seek(0x12) == 0x12 83 | self.e_machine = unpack(unpack_endian + 'H', self.file.read(2))[0] 84 | 85 | assert self.file.seek(0x24) == 0x24 86 | self.e_flags = unpack(unpack_endian + 'L', self.file.read(4))[0] 87 | 88 | @property 89 | def sections(self) -> Dict[str,Section]: 90 | if self.__sections is not None: 91 | return self.__sections 92 | 93 | # We actually only really care about SHT_PROGBITS or SHT_NOBITS 94 | exp = re.compile(r'\s([.\w]+)\s+(PROGBITS|NOBITS)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)') 95 | out = ensure_command(['readelf', '-WS', self.path]) 96 | secs = {} 97 | 98 | for match in exp.finditer(out): 99 | name, _, va, off, sz = match.groups() 100 | secs[name] = Section(name, int(va, 16), int(off, 16), int(sz, 16)) 101 | 102 | self.__sections = secs 103 | return secs 104 | 105 | @property 106 | def symbols(self) -> Dict[str, Symbol]: 107 | if self.__symbols is None: 108 | self.__extract_symbols() 109 | return self.__symbols 110 | 111 | @property 112 | def functions(self) -> Dict[str, Symbol]: 113 | if self.__functions is None: 114 | self.__extract_symbols() 115 | return self.__functions 116 | 117 | @property 118 | def has_debug_info(self) -> bool: 119 | return '.debug_line' in self.sections 120 | 121 | def __extract_symbols(self): 122 | exp = re.compile(r'\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+(\w+).+\s+(\S+)$') 123 | out = ensure_command(['readelf', '-Ws', self.path]).splitlines() 124 | syms = {} 125 | funcs = {} 126 | 127 | for line in out: 128 | match = exp.search(line) 129 | if not match: 130 | continue 131 | 132 | vaddr, sz, typ, name = match.groups() 133 | vaddr = real_vaddr = int(vaddr, 16) 134 | 135 | # Unaligned vaddr on ARM 32-bit means the function code is in 136 | # Thumb mode. Nonetheless, the actual code is aligned, so the 137 | # real vaddr is a multiple of 2. 138 | if self.e_machine == E_MACHINE.EM_ARM and typ == 'FUNC' and vaddr & 1: 139 | real_vaddr &= 0xfffffffe 140 | 141 | sym = Symbol(vaddr, real_vaddr, int(sz), typ, name) 142 | syms[sym.name] = sym 143 | 144 | if typ == 'FUNC': 145 | funcs[sym.name] = sym 146 | 147 | self.__symbols = syms 148 | self.__functions = funcs 149 | 150 | def vaddr_to_file_offset(self, vaddr: int) -> int: 151 | for sec in self.sections.values(): 152 | if sec.vaddr <= vaddr < sec.vaddr + sec.size: 153 | return sec.off + vaddr - sec.vaddr 154 | raise ValueError('vaddr not in range of any known section') 155 | 156 | def vaddr_read_string(self, vaddr: int) -> str: 157 | off = self.vaddr_to_file_offset(vaddr) 158 | assert self.file.seek(off) == off 159 | 160 | data = self.file.read(1) 161 | while data[-1]: 162 | data += self.file.read(1) 163 | return data[:-1].decode() 164 | 165 | def vaddr_read(self, vaddr: int, size: int) -> bytes: 166 | off = self.vaddr_to_file_offset(vaddr) 167 | assert self.file.seek(off) == off 168 | return self.file.read(size) 169 | 170 | def read_symbol(self, sym: Union[str,Symbol]) -> bytes: 171 | if not isinstance(sym, Symbol): 172 | sym = self.symbols[sym] 173 | 174 | return self.vaddr_read(sym.real_vaddr, sym.size) 175 | 176 | @lru_cache(maxsize=128) 177 | def next_symbol(self, sym: Symbol) -> Optional[Symbol]: 178 | '''Find and return the symbol (if any) with the lowest real virtual 179 | address higher than the one of sym. 180 | ''' 181 | candidates = filter(lambda s: s.real_vaddr > sym.real_vaddr, self.symbols.values()) 182 | 183 | try: 184 | return min(candidates, key=attrgetter('vaddr')) 185 | except ValueError: 186 | return None 187 | -------------------------------------------------------------------------------- /src/systrack/kconfig.py: -------------------------------------------------------------------------------- 1 | # 2 | # Automatic kernel Kconfig configuration. 3 | # 4 | # This module contains utility functions to edit configuration options through 5 | # the kernel's `scripts/config` script, plus all arch-agnostig Kconfig options 6 | # needed. 7 | # 8 | 9 | import logging 10 | 11 | from pathlib import Path 12 | from typing import List, Dict, Iterable 13 | 14 | from .arch import Arch 15 | from .kconfig_options import * 16 | from .type_hints import KernelVersion 17 | from .utils import anyprefix, ensure_command 18 | 19 | def kconfig_debugging(kernel_version: KernelVersion) -> Dict[str,List[str]]: 20 | return KCONFIG_DEBUGGING[kernel_version] 21 | 22 | def kconfig_compatibility(kernel_version: KernelVersion) -> Dict[str,List[str]]: 23 | return KCONFIG_COMPATIBILITY[kernel_version] 24 | 25 | def kconfig_more_syscalls(kernel_version: KernelVersion) -> Dict[str,List[str]]: 26 | return KCONFIG_MORE_SYSCALLS[kernel_version] 27 | 28 | def kconfig_syscall_deps(syscall_name: str, kernel_version: KernelVersion, arch: Arch) -> str: 29 | opt = arch.kconfig_syscall_deps[kernel_version].get(syscall_name) 30 | opt = opt or KCONFIG_SYSCALL_DEPS[kernel_version].get(syscall_name) 31 | return ('CONFIG_' + opt) if opt else None 32 | 33 | def run_config_script(kdir: Path, config_file: Path, args: List[str]): 34 | return ensure_command(['./scripts/config', '--file', config_file] + args, cwd=kdir) 35 | 36 | # TODO: maybe turn this into a class with a __getitem__ that invokes 37 | # scripts/config -s VAL if VAL is not explicitly set so that we can avoid 38 | # manually checking? 39 | def parse_config(config_file: Path) -> Dict[str,str]: 40 | lines = map(str.strip, config_file.open().readlines()) 41 | lines = filter(lambda l: l and not l.startswith('#'), lines) 42 | config = {} 43 | 44 | for line in lines: 45 | name, val = line.split('=', 1) 46 | assert name.startswith('CONFIG_') 47 | config[name[7:]] = val 48 | 49 | return config 50 | 51 | # TODO: check if the options were set correctly? 52 | def edit_config(kdir: Path, config_file: Path, options: Iterable[str]): 53 | if not options: 54 | return 55 | 56 | args = [] 57 | for opt in options: 58 | name, val = opt.split('=', 1) 59 | 60 | if val == 'y': 61 | args += ['-e', name] 62 | elif val == 'n': 63 | args += ['-d', name] 64 | elif val == 'm': 65 | args += ['-m', name] 66 | else: 67 | args += ['--set-val', name, val] 68 | 69 | run_config_script(kdir, config_file, args) 70 | 71 | # TODO: actually check deps parsing Kconfig instead of taking an hardcoded 72 | # dictionary {opt: deps} which is error prone and very annoying to maintain. 73 | def edit_config_check_deps(kdir: Path, config_file: Path, options: Dict[str,List[str]]): 74 | if not options: 75 | return 76 | 77 | toset = dict(map(lambda x: x.split('=', 1), options)) 78 | config = parse_config(config_file) 79 | 80 | for opt, deps in options.items(): 81 | for dep in deps: 82 | dep_name, dep_wanted = dep.split('=', 1) 83 | dep_actual = toset.get(dep_name) or config.get(dep_name) 84 | 85 | if dep_actual is None: 86 | # Dependency not explicitly set, try getting its default value 87 | dep_actual = run_config_script(kdir, config_file, ['-s', dep_name]).strip() 88 | if dep_actual == 'undef': 89 | dep_actual = None 90 | 91 | if dep_actual != dep_wanted: 92 | # It's OK if we want =n but it's unset 93 | if dep_wanted == 'n' and dep_actual is None: 94 | continue 95 | 96 | if dep_actual is None: 97 | dep_name_and_val = f'CONFIG_{dep_name} is not set' 98 | else: 99 | dep_name_and_val = f'CONFIG_{dep_name}={dep_actual}' 100 | 101 | # It's ok if we want to enable some config, but we cannot do it 102 | # because the arch we are building for doesn't declare support 103 | # for one of its dependencies 104 | if dep_wanted == 'y' and dep_actual in ('n', None): 105 | if anyprefix(dep_name, 'HAVE_', 'ARCH_HAS_'): 106 | logging.warning(f"CONFIG_{opt} won't be set because " 107 | f'{dep_name_and_val}') 108 | continue 109 | 110 | logging.error(f'CONFIG_{opt} wants CONFIG_{dep_name}=' 111 | f'{dep_wanted}, but {dep_name_and_val}!') 112 | 113 | edit_config(kdir, config_file, options.keys()) 114 | -------------------------------------------------------------------------------- /src/systrack/kernel.py: -------------------------------------------------------------------------------- 1 | import re 2 | import logging 3 | import struct 4 | import atexit 5 | from pathlib import Path 6 | from time import monotonic 7 | from os import sched_getaffinity 8 | from operator import itemgetter, attrgetter 9 | from collections import defaultdict, Counter 10 | from typing import Tuple, List, Dict, Iterator, Union, Any, Optional 11 | 12 | from .arch import arch_from_name, arch_from_vmlinux 13 | from .elf import ELF, Symbol, Section 14 | from .kconfig import edit_config, edit_config_check_deps 15 | from .kconfig import kconfig_more_syscalls, kconfig_debugging 16 | from .kconfig import kconfig_compatibility, kconfig_syscall_deps 17 | from .location import extract_syscall_locations 18 | from .signature import extract_syscall_signatures 19 | from .syscall import Syscall, common_syscall_symbol_prefixes 20 | from .type_hints import KernelVersion 21 | from .utils import run_command, ensure_command, high_verbosity 22 | from .utils import maybe_rel, noprefix 23 | 24 | class KernelVersionError(RuntimeError): 25 | pass 26 | 27 | class KernelArchError(RuntimeError): 28 | pass 29 | 30 | class KernelWithoutSymbolsError(RuntimeError): 31 | pass 32 | 33 | class KernelMultiABIError(RuntimeError): 34 | pass 35 | 36 | class Kernel: 37 | __version = None 38 | __version_source = None 39 | __syscalls = None 40 | __backup_makefile = None 41 | __long_size = None 42 | __long_pack_fmt = None 43 | 44 | def __init__(self, arch_name: Optional[str] = None, 45 | vmlinux: Optional[Path] = None, kdir: Optional[Path] = None, 46 | outdir: Optional[Path] = None, rdir: Optional[Path] = None, 47 | toolchain_prefix: Optional[str] = None): 48 | if not kdir and not vmlinux: 49 | raise ValueError('at least one of vmlinux or kdir is needed') 50 | if arch_name is None and vmlinux is None: 51 | raise ValueError('need vmlinux to determine arch if not supplied') 52 | 53 | self.kdir = kdir 54 | self.outdir = outdir 55 | self.rdir = rdir 56 | self.vmlinux = ELF(vmlinux) if vmlinux else None 57 | self.arch_name = arch_name 58 | self.toolchain_prefix = toolchain_prefix 59 | 60 | if self.vmlinux and not self.vmlinux.symbols: 61 | raise KernelWithoutSymbolsError('Provided vmlinux ELF has no symbols') 62 | 63 | if self.arch_name is None: 64 | m = arch_from_vmlinux(self.vmlinux) 65 | if m is None: 66 | raise KernelArchError('Failed to detect kernel architecture/ABI') 67 | 68 | arch_class, bits32, abis = m 69 | if len(abis) > 1: 70 | raise KernelMultiABIError('Multiple ABIs supported, need to ' 71 | 'select one', arch_class, abis) 72 | 73 | self.arch = arch_class(self.version, abis[0], bits32) 74 | else: 75 | self.arch = arch_from_name(self.arch_name, self.version) 76 | 77 | if self.vmlinux: 78 | if not self.arch.matches(self.vmlinux): 79 | raise KernelArchError(f'Architecture {arch_name} does not ' 80 | 'match provided vmlinux') 81 | 82 | self.__long_size = (8, 4)[self.vmlinux.bits32] 83 | self.__long_pack_fmt = '<>'[self.vmlinux.big_endian] + 'QL'[self.vmlinux.bits32] 84 | 85 | @staticmethod 86 | def version_from_str(s: str) -> KernelVersion: 87 | m = re.match(r'(\d+)\.(\d+)(\.(\d+))?', s) 88 | if not m: 89 | return None 90 | 91 | a, b, c = int(m.group(1)), int(m.group(2)), m.group(4) 92 | return (a, b) if c is None else (a, b, int(c)) 93 | 94 | @staticmethod 95 | def version_from_banner(banner: Union[str,bytes]) -> KernelVersion: 96 | if isinstance(banner, bytes): 97 | banner = banner.decode() 98 | 99 | if not banner.startswith('Linux version '): 100 | return None 101 | return Kernel.version_from_str(banner[14:]) 102 | 103 | def __version_from_vmlinux(self) -> KernelVersion: 104 | banner = self.vmlinux.symbols.get('linux_banner') 105 | if banner is None: 106 | return None 107 | 108 | if banner.size: 109 | banner = self.vmlinux.read_symbol(banner) 110 | else: 111 | banner = self.vmlinux.vaddr_read_string(banner.vaddr) 112 | 113 | return self.version_from_banner(banner) 114 | 115 | def __version_from_make(self) -> KernelVersion: 116 | v = ensure_command('make kernelversion', self.kdir) 117 | return self.version_from_str(v) 118 | 119 | @property 120 | def version(self) -> KernelVersion: 121 | if self.__version is None: 122 | if self.vmlinux: 123 | self.__version = self.__version_from_vmlinux() 124 | self.__version_source = 'vmlinux' 125 | elif self.kdir: 126 | # This could in theory be tried even if __version_from_vmlinux() 127 | # fails... but if that fails there are probably bigger problems. 128 | self.__version = self.__version_from_make() 129 | self.__version_source = 'make' 130 | 131 | if self.__version is None: 132 | raise KernelVersionError('unable to determine kernel version') 133 | return self.__version 134 | 135 | @property 136 | def version_str(self) -> str: 137 | return '.'.join(map(str, self.version)) + f' (from {self.__version_source})' 138 | 139 | @property 140 | def version_tag(self) -> str: 141 | a, b, c = self.version 142 | if c == 0: 143 | return f'v{a}.{b}' 144 | return f'v{a}.{b}.{c}' 145 | 146 | @property 147 | def version_source(self) -> str: 148 | if self.__version_source or self.version: 149 | return self.__version_source 150 | return None 151 | 152 | @property 153 | def can_extract_location_info(self): 154 | return self.vmlinux.has_debug_info 155 | 156 | @property 157 | def can_extract_signature_info(self): 158 | return ( 159 | '__start_syscalls_metadata' in self.vmlinux.symbols 160 | or self.vmlinux.has_debug_info 161 | ) 162 | 163 | @property 164 | def syscalls(self) -> List[Syscall]: 165 | if self.__syscalls is None: 166 | self.__syscalls = self.__extract_syscalls() 167 | return self.__syscalls 168 | 169 | def __rel(self, path: Path) -> Path: 170 | return maybe_rel(path, self.kdir) 171 | 172 | def __unpack_long(self, vaddr: int) -> int: 173 | return struct.unpack(self.__long_pack_fmt, self.vmlinux.vaddr_read(vaddr, self.__long_size))[0] 174 | 175 | def __iter_unpack_vmlinux(self, fmt: str, off: int, size: int = None) -> Iterator[Tuple[Any, ...]]: 176 | f = self.vmlinux.file 177 | assert f.seek(off) == off 178 | 179 | if size is None: 180 | chunk_size = struct.calcsize(fmt) 181 | while 1: 182 | yield struct.unpack(fmt, f.read(chunk_size)) 183 | else: 184 | yield from struct.iter_unpack(fmt, f.read(size)) 185 | 186 | def __iter_unpack_vmlinux_long(self, off: int, size: int = None) -> Iterator[int]: 187 | yield from map(itemgetter(0), self.__iter_unpack_vmlinux(self.__long_pack_fmt, off, size)) 188 | 189 | def __unpack_syscall_table(self, tbl: Symbol, target_section: Section) -> List[int]: 190 | tbl_file_off = self.vmlinux.vaddr_to_file_offset(tbl.vaddr) 191 | 192 | # This is the section we would like the function pointers to point to, 193 | # we'll warn or halt in case we find fptrs pointing outside 194 | vstart = target_section.vaddr 195 | vend = vstart + target_section.size 196 | 197 | if tbl.size > 0x80: 198 | logging.info('Syscall table (%s) is %d bytes, %d entries', tbl.name, 199 | tbl.size, tbl.size // self.__long_size) 200 | 201 | vaddrs = list(self.__iter_unpack_vmlinux_long(tbl_file_off, tbl.size)) 202 | 203 | # Sanity check: ensure all vaddrs are within the target section 204 | for idx, vaddr in enumerate(vaddrs): 205 | if not (vstart <= vaddr < vend): 206 | logging.warn('Virtual address 0x%x idx %d is outside %s: ' 207 | 'something is off!', vaddr, tbl.name, idx, target_section.name) 208 | else: 209 | # Apparently on some archs (e.g. MIPS, PPC) the syscall table symbol 210 | # can have size 0. In this case we'll just warn the user and keep 211 | # extracting vaddrs as long as they are valid, stopping at the first 212 | # invalid one or at the next symbol we encounter. 213 | logging.warn('Syscall table (%s) has bad size (%d), doing my best ' 214 | 'to figure out when to stop', tbl.name, tbl.size) 215 | 216 | cur_idx_vaddr = tbl.vaddr 217 | boundary = self.vmlinux.next_symbol(tbl) 218 | boundary = boundary.vaddr if boundary else float('inf') 219 | vaddrs = [] 220 | 221 | for vaddr in self.__iter_unpack_vmlinux_long(tbl_file_off): 222 | # Stop at the first vaddr pointing outside target_section 223 | if not (vstart <= vaddr < vend): 224 | break 225 | 226 | # Stop if we collide with another symbol right after the syscall 227 | # table (may be another syscall table e.g. the compat one) 228 | if cur_idx_vaddr >= boundary: 229 | break 230 | 231 | vaddrs.append(vaddr) 232 | cur_idx_vaddr += self.__long_size 233 | 234 | logging.info('Syscall table seems to be %d bytes, %d entries', 235 | cur_idx_vaddr - tbl.vaddr, len(vaddrs)) 236 | 237 | return vaddrs 238 | 239 | def __syscall_vaddrs_from_syscall_table(self) -> Dict[int,int]: 240 | tbl = self.vmlinux.symbols.get(self.arch.syscall_table_name) 241 | if not tbl: 242 | logging.critical('Unable to find %s symbol!', 243 | self.arch.syscall_table_name) 244 | return {} 245 | 246 | logging.debug('Syscall table: %r', tbl) 247 | 248 | # Read and parse the syscall table unpacking all virtual addresses it 249 | # contains. Depending on arch, we might need to parse function 250 | # descriptors for the function pointers in the syscall table. 251 | 252 | text = self.vmlinux.sections['.text'] 253 | vaddrs = {} 254 | 255 | if self.arch.uses_function_descriptors: 256 | text_vstart = text.vaddr 257 | text_vend = text_vstart + text.size 258 | 259 | # Even if this arch uses function descriptors, we don't know if they 260 | # are effectively used for function pointers in the syscall table. 261 | # This needs to be tested, and in case they aren't used, we can 262 | # fallback to "normal" parsing instead. 263 | if not (text_vstart <= self.__unpack_long(tbl.vaddr) < text_vend): 264 | logging.debug('Syscall table uses function descriptors') 265 | 266 | opd = self.vmlinux.sections.get('.opd') 267 | if not opd: 268 | logging.critical('Arch uses function descriptors, but ' 269 | 'vmlinux has no .opd section!') 270 | return {} 271 | 272 | descriptors = self.__unpack_syscall_table(tbl, opd) 273 | 274 | # Translate function descriptors (one more level of indirection) 275 | for i, desc_vaddr in enumerate(descriptors): 276 | vaddr = self.vmlinux.vaddr_read(desc_vaddr, self.__long_size) 277 | vaddr = struct.unpack(self.__long_pack_fmt, vaddr)[0] 278 | 279 | if not (text_vstart <= vaddr < text_vend): 280 | logging.warn('Function descriptor at 0x%x points ' 281 | 'outside .text: something is off!', desc_vaddr) 282 | 283 | vaddrs[i] = vaddr 284 | else: 285 | logging.debug('Syscall table does NOT use function descriptors') 286 | 287 | if not vaddrs: 288 | vaddrs = dict(enumerate(self.__unpack_syscall_table(tbl, text))) 289 | 290 | if not vaddrs: 291 | logging.critical('Could not extract any valid function pointer ' 292 | 'from %s, giving up!', self.arch.syscall_table_name) 293 | logging.critical('Is the kernel relocatable? Relocation entries ' 294 | 'for the syscall table are not supported.') 295 | return {} 296 | 297 | return vaddrs 298 | 299 | def __extract_syscalls(self) -> List[Syscall]: 300 | if self.arch.bits32 != self.vmlinux.bits32: 301 | a, b = (32, 64) if self.arch.bits32 else (64, 32) 302 | logging.critical('Selected arch is %d-bit, but kernel is %d-bit', a, b) 303 | return [] 304 | 305 | self.arch.adjust_abi(self.vmlinux) 306 | logging.debug('Arch: %r', self.arch) 307 | 308 | have_syscall_table = self.arch.syscall_table_name is not None 309 | 310 | if have_syscall_table: 311 | vaddrs = self.__syscall_vaddrs_from_syscall_table() 312 | else: 313 | logging.warn('No syscall table available! Trying my best...') 314 | vaddrs = self.arch.extract_syscall_vaddrs(self.vmlinux) 315 | 316 | if not vaddrs: 317 | logging.critical('Unable to extract any syscall vaddr, giving up!') 318 | return [] 319 | 320 | # Find all ni_syscall symbols (there might be multiple) and keep track 321 | # of them for later in order to detect non-implemented syscalls. 322 | ni_syscalls = set() 323 | 324 | for sym in self.vmlinux.functions.values(): 325 | if self.arch.symbol_is_ni_syscall(sym): 326 | ni_syscalls.add(sym) 327 | 328 | for sym in sorted(ni_syscalls, key=attrgetter('name')): 329 | logging.debug('Found ni_syscall: %r', sym) 330 | 331 | if not ni_syscalls: 332 | logging.critical('No ni_syscall found!') 333 | return [] 334 | 335 | seen = set(vaddrs.values()) 336 | symbols_by_vaddr = {sym.vaddr: sym for sym in ni_syscalls} 337 | discarded_logs = [] 338 | preferred_logs = [] 339 | 340 | # Create a mapping vaddr -> symbol for every vaddr in the syscall table 341 | # for convenience. Sort symbols by name for reproducible results. We 342 | # look at .symbols instead of .functions here because (of course) some 343 | # of these symbols may not be classified as FUNC. 344 | for sym in sorted(self.vmlinux.symbols.values(), key=attrgetter('name')): 345 | vaddr = sym.vaddr 346 | if vaddr not in seen: 347 | continue 348 | 349 | other = symbols_by_vaddr.get(vaddr) 350 | if sym == other: 351 | continue 352 | 353 | if other is not None: 354 | if other in ni_syscalls and sym not in ni_syscalls: 355 | # Don't allow other symbols to "override" a ni_syscall 356 | if logging.root.isEnabledFor(logging.DEBUG): 357 | discarded_logs.append((sym.name, other.name)) 358 | continue 359 | 360 | pref = self.arch.preferred_symbol(sym, other) 361 | sym, other = pref, (other if pref is sym else sym) 362 | 363 | if high_verbosity(): 364 | preferred_logs.append((pref.name, other.name)) 365 | 366 | symbols_by_vaddr[vaddr] = sym 367 | 368 | # Sort logs for reproducible output (the above sorting does not 369 | # guarantee that these are sorted as well). 370 | discarded_logs.sort() 371 | preferred_logs.sort() 372 | 373 | for sym, other in discarded_logs: 374 | logging.debug('Discarding %s as alias for %s', sym, other) 375 | 376 | for sym, other in preferred_logs: 377 | logging.debug('Preferring %s over %s', sym, other) 378 | 379 | del discarded_logs 380 | del preferred_logs 381 | del seen 382 | 383 | if not symbols_by_vaddr: 384 | logging.critical('Unable to find any symbol in the syscall table, giving up!') 385 | logging.critical('Is "%s" the correct arch/ABI combination for ' 386 | 'this kernel?', self.arch_name) 387 | return [] 388 | 389 | # Sanity check: the only repeated vaddrs in the syscall table should be 390 | # the ones for *_ni_syscall. Warn in case there are others. 391 | counts = Counter(vaddrs.values()).items() 392 | counts = filter(lambda c: c[1] > 1, counts) 393 | counts = sorted(counts, key=itemgetter(1), reverse=True) 394 | 395 | if counts: 396 | # In case of no syscall table, ni_syscalls may have already been 397 | # filtered by arch-specific extraction code, so don't sweat it. 398 | if any(sym in ni_syscalls for sym in vaddrs.values()): 399 | best = symbols_by_vaddr[counts[0][0]] 400 | 401 | if best not in ni_syscalls: 402 | logging.error('Interesting! I was expecting *_ni_syscall to be the ' 403 | 'most frequent symbol in the syscall table, but %s is (' 404 | 'appearing %d times).', best.name, counts[0][1]) 405 | 406 | for va, n in counts: 407 | sym = symbols_by_vaddr.get(va, f'{va:#x} ') 408 | if sym not in ni_syscalls: 409 | logging.warn('Interesting! Vaddr found %d times: %s', n, sym) 410 | 411 | symbols = [] 412 | symbol_names = [] 413 | ni_count = defaultdict(int) 414 | 415 | # Filter out only defined syscalls 416 | for idx, vaddr in sorted(vaddrs.items()): 417 | sym = symbols_by_vaddr.get(vaddr) 418 | if sym is None: 419 | if have_syscall_table: 420 | logging.error('Unable to find symbol for %s[%d]: 0x%x', 421 | self.arch.syscall_table_name, idx, vaddr) 422 | else: 423 | logging.error('Unable to find symbol for #%d 0x%x', idx, 424 | vaddr) 425 | continue 426 | 427 | if high_verbosity(): 428 | if have_syscall_table: 429 | logging.debug('%s[%d]: %s', self.arch.syscall_table_name, 430 | idx, sym) 431 | else: 432 | logging.debug('#%d: %s', idx, sym) 433 | 434 | if sym in ni_syscalls: 435 | ni_count[sym] += 1 436 | continue 437 | 438 | symbols.append((idx, sym)) 439 | symbol_names.append(sym.name) 440 | 441 | # Find common syscall symbol prefixes (e.g. "__x64_sys_") in order to be 442 | # able to strip them later to obtain the actual syscall name 443 | prefixes = common_syscall_symbol_prefixes(symbol_names, 20) 444 | if prefixes: 445 | logging.info('Common syscall symbol prefixes: %s', ', '.join(prefixes)) 446 | else: 447 | logging.warn('No common syscall symbol prefixes found (weird!)') 448 | 449 | syscalls = [] 450 | n_skipped = 0 451 | 452 | # Build list of syscalls (with prefixes stripped from the names) and 453 | # skip uneeded ones (e.g. implemented for other ABIs) 454 | for idx, sym in symbols: 455 | num = self.arch.syscall_num_base + idx 456 | origname = self.arch.translate_syscall_symbol_name(sym.name) 457 | origname = noprefix(origname, *prefixes) 458 | name = self.arch.normalize_syscall_name(origname) 459 | kdeps = kconfig_syscall_deps(name, self.version, self.arch) 460 | 461 | # We could need the original name to differentiate some syscalls 462 | # in order to understand if they need some Kconfig or not 463 | if not kdeps: 464 | kdeps = kconfig_syscall_deps(origname, self.version, self.arch) 465 | 466 | num = self.arch.adjust_syscall_number(num) 467 | sc = Syscall(idx, num, name, origname, sym, kdeps) 468 | 469 | if self.arch.skip_syscall(sc): 470 | logging.debug('Skipping %s', sym.name) 471 | n_skipped += 1 472 | continue 473 | 474 | syscalls.append(sc) 475 | 476 | ni_total = 0 477 | for sym, n in sorted(ni_count.items(), key=itemgetter(1), reverse=True): 478 | logging.info('%d entries point to %s', n, sym.name) 479 | ni_total += n 480 | 481 | # Add esoteric syscalls to the list, if any. These do not need any name 482 | # translation or signature search. Some may need tailored static binary 483 | # analysis. Very fun. 484 | esoteric = self.arch.extract_esoteric_syscalls(self.vmlinux) 485 | n_esoteric = len(esoteric) 486 | 487 | # Log these, they are interesting 488 | if esoteric: 489 | logging.info('Found %d esoteric syscall%s: %s', n_esoteric, 490 | 's'[:n_esoteric ^ 1], ', '.join(map(itemgetter(1), esoteric))) 491 | 492 | for num, name, sym_name, sig, kconf in esoteric: 493 | sym = self.vmlinux.symbols[sym_name] 494 | syscalls.append(Syscall(None, num, name, name, sym, kconf, signature=sig, esoteric=True)) 495 | 496 | assert len(syscalls) == len(vaddrs) - ni_total - n_skipped + n_esoteric 497 | 498 | # Extract the most common ni_syscall symbol we found (if any) and its 499 | # code to use it as a reference to detect other non-implemented syscalls 500 | # (whose handlers could simply be inlined ni_syscall code). 501 | ni_ref_sym = max(ni_count, key=ni_count.get) if ni_count else None 502 | ni_ref_code = self.vmlinux.read_symbol(ni_ref_sym) if ni_ref_sym else None 503 | 504 | # Some syscalls are just a dummy function that does `return -ENOSYS` or 505 | # some other error, meaning that the syscall is not actually 506 | # implemented, even if present in the syscall table. We can filter those 507 | # out using .is_dummy_syscall() by either exactly matching the reference 508 | # ni_syscall code extracted above or invoking arch-specific logic (if 509 | # any). 510 | # 511 | # We are however not guaranteed to catch everything. For example, 512 | # .is_dummy_syscall() may be useless if the symbol has bad/zero size or 513 | # if the compiler does something funny and uses weird instructions. 514 | # Unless we check sources, we can always have false positives even after 515 | # this step. 516 | syscalls = list(filter( 517 | lambda s: not self.arch.is_dummy_syscall(s, self.vmlinux, ni_ref_sym, ni_ref_code), 518 | syscalls 519 | )) 520 | 521 | # Find locations and signatures for all the syscalls we found (except 522 | # esoteric ones). 523 | extract_syscall_locations(syscalls, self.vmlinux, self.arch, self.kdir, self.rdir) 524 | extract_syscall_signatures(syscalls, self.vmlinux, self.kdir is not None) 525 | 526 | # Second pass to extract only implemented syscalls: warn for potentially 527 | # bad matches and filter out invalid ones. 528 | implemented = [] 529 | bad_loc_info = [] 530 | no_loc_info = [] 531 | no_sig_info = [] 532 | 533 | for sc in syscalls: 534 | file, line, good = sc.file, sc.line, sc.good_location 535 | 536 | if not sc.esoteric and not good and file is not None: 537 | if self.__rel(file).match('kernel/sys_ni.c'): 538 | # If we got to this point the location is still not 539 | # "good" and points to sys_ni.c even after 540 | # adjusting/grepping. Assume the syscall is not 541 | # implemented. Granted, this could in theory lead to 542 | # false negatives, but I did not encounter one yet. 543 | # Since we are grepping the source code this should NOT 544 | # happen for implemented syscalls. Nonetheless warn 545 | # about it, so we can double check and make sure 546 | # everything is fine. 547 | logging.warn('Assuming %s is not implemented as it ' 548 | 'points to %s:%d after adjustments', sc.name, 549 | self.__rel(file), line) 550 | continue 551 | 552 | if self.kdir: 553 | if file.match('*.S'): 554 | hint = ' (implemented in asm?)' 555 | elif file.match('*.c'): 556 | hint = ' (normal function w/o asmlinkage?)' 557 | else: 558 | hint = '' 559 | 560 | bad_loc_info.append(( 561 | sc.name, 562 | sc.symbol.name, 563 | self.__rel(file), 564 | str(line), 565 | hint 566 | )) 567 | 568 | if file is None and self.can_extract_location_info: 569 | no_loc_info.append((sc.name, sc.symbol.name)) 570 | 571 | if sc.signature is None and self.can_extract_signature_info: 572 | no_sig_info.append((sc.name, sc.symbol.name)) 573 | 574 | implemented.append(sc) 575 | 576 | for info in bad_loc_info: 577 | logging.warn('Potentially bad location for %s (%s): %s:%s%s', *info) 578 | 579 | for info in no_loc_info: 580 | logging.error('Unable to find location for %s (%s)', *info) 581 | 582 | for info in no_sig_info: 583 | logging.error('Unable to extract signature for %s (%s)', *info) 584 | 585 | return implemented 586 | 587 | def __try_set_optimization_level(self, lvl: int) -> bool: 588 | # Might be the most ignorant thing in this whole codebase :') 589 | 590 | with (self.kdir / 'Makefile').open('r+') as f: 591 | self.__backup_makefile = data = f.read() 592 | assert f.seek(0) == 0 593 | 594 | match = re.search(r'^KBUILD_CFLAGS\s*\+=\s*-O(2)\n', data, re.MULTILINE) 595 | if not match: 596 | return False 597 | 598 | start, end = match.span(1) 599 | f.write(data[:start] + str(lvl) + data[end:]) 600 | f.truncate() 601 | 602 | return True 603 | 604 | def __restore_makefile(self): 605 | if self.__backup_makefile: 606 | with (self.kdir / 'Makefile').open('w') as f: 607 | f.write(self.__backup_makefile) 608 | else: 609 | logging.error('Restoring Makefile without backing it up first???') 610 | 611 | atexit.unregister(self.__restore_makefile) 612 | 613 | def make(self, target: str, stdin=None, ensure=True) -> int: 614 | j = max(len(sched_getaffinity(0)) - 1, 1) 615 | cmd = ['make', f'-j{j}', f'ARCH={self.arch.name}'] 616 | 617 | # Generate debug info with relative paths to make our life easier for 618 | # later analysis. 619 | cmd += [f"KCFLAGS='-fdebug-prefix-map={self.kdir.absolute()}=.'"] 620 | 621 | if self.toolchain_prefix: 622 | cmd += [f'CROSS_COMPILE={self.toolchain_prefix}'] 623 | if self.outdir: 624 | cmd += [f'O={self.outdir}'] 625 | 626 | if ensure: 627 | ensure_command(cmd + [target], self.kdir, stdin, False, high_verbosity()) 628 | return 0 629 | 630 | return run_command(cmd + [target], self.kdir, stdin, high_verbosity()) 631 | 632 | def sync_config(self): 633 | '''Set any config that was "unlocked" by others to its default value. 634 | The make target for this depends on the kernel version. 635 | ''' 636 | if self.version >= (3, 7): 637 | self.make('olddefconfig') 638 | else: 639 | # Ugly, but oldconfig can error out if no input is given. 640 | self.make('oldconfig', stdin='\n' * 1000) 641 | 642 | def clean(self): 643 | self.__version = None 644 | self.make('distclean') 645 | 646 | def configure(self): 647 | config_file = (self.outdir or self.kdir) / '.config' 648 | self.__version = None 649 | 650 | logging.info('Configuring for Arch: %r', self.arch) 651 | logging.info('Base config target(s): %s', ', '.join(self.arch.config_targets)) 652 | 653 | for target in self.arch.config_targets: 654 | self.make(target) 655 | 656 | # TODO: maybe create a check that ensures these are actually applied and 657 | # consistent? E.G. check if all the configs that are supposed to exist 658 | # in a version actually exist when running the tool and keep the wanted 659 | # value after `make olddefconfig`. 660 | 661 | logging.info('Applying debugging configs') 662 | edit_config(self.kdir, config_file, kconfig_debugging(self.version)) 663 | self.sync_config() 664 | 665 | logging.info('Applying compatibility configs') 666 | edit_config(self.kdir, config_file, kconfig_compatibility(self.version)) 667 | self.sync_config() 668 | 669 | logging.info('Enabling more syscalls') 670 | edit_config_check_deps(self.kdir, config_file, kconfig_more_syscalls(self.version)) 671 | self.sync_config() 672 | 673 | logging.info('Applying arch-specific configs') 674 | edit_config_check_deps(self.kdir, config_file, self.arch.kconfig[self.version]) 675 | self.sync_config() 676 | 677 | def build(self, try_disable_opt: bool = False) -> float: 678 | start = monotonic() 679 | self.__version = None 680 | 681 | if try_disable_opt: 682 | logging.info('Trying to build with optimizations disabled (-O0)') 683 | 684 | # This will either work or fail for any level. If it fails, just 685 | # do a normal build with ensure=True, which will exit in case of 686 | # failure. 687 | if self.__try_set_optimization_level(0): 688 | atexit.register(self.__restore_makefile) 689 | res = self.make('vmlinux', ensure=False) 690 | self.__restore_makefile() 691 | 692 | if res == 0: 693 | return monotonic() - start 694 | 695 | logging.error('Failed to build with -O0, trying -O1') 696 | 697 | self.__try_set_optimization_level(1) 698 | res = self.make('vmlinux', ensure=False) 699 | self.__restore_makefile() 700 | 701 | if res == 0: 702 | return monotonic() - start 703 | 704 | logging.error('Failed to build with -O1, doing a normal build') 705 | else: 706 | logging.warn('Unable to patch Makefile to disable ' 707 | 'optimizations, doing a normal build instead') 708 | 709 | self.make('vmlinux') 710 | return monotonic() - start 711 | -------------------------------------------------------------------------------- /src/systrack/location.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import sys 4 | 5 | from operator import attrgetter 6 | from pathlib import Path 7 | from typing import Tuple, List, Set, Iterable, Iterator, Optional 8 | 9 | from .arch import Arch 10 | from .elf import ELF 11 | from .syscall import Syscall 12 | from .utils import ensure_command, command_available, maybe_rel 13 | 14 | def addr2line(elf: Path, addrs: Iterable[int]) -> Iterator[Tuple[Optional[Path],Optional[int]]]: 15 | out = ensure_command(['addr2line', '-e', elf, *map(hex, addrs)]) 16 | 17 | for file, line in map(lambda d: d.split(':'), out.splitlines()): 18 | if file == '??': 19 | yield None, None 20 | continue 21 | 22 | line = int(line) if line.isdigit() else None 23 | yield Path(file), line 24 | 25 | def smart_addr2line(elf: Path, addrs: Iterable[int], srcdir: Path = None) -> Iterator[Tuple[Optional[Path],Optional[int]]]: 26 | '''Run addr2line on the given elf for the given virtual addresses remapping 27 | any returned paths to the given srcdir. 28 | 29 | addr2line will always output absolute paths. In case the paths in the ELF 30 | DWARF sections are relative (i.e. don't start with "/"), the directory 31 | containing the ELF is taken as base. This is problematic because if the ELF 32 | is moved from the original source directory the paths returned by addr2line 33 | will be invalid. 34 | 35 | To avoid this problem, whenever we know a different source directory, this 36 | function remaps the paths returned by addr2line to that directory instead. 37 | ''' 38 | locs = addr2line(elf, addrs) 39 | if srcdir is None: 40 | yield from locs 41 | 42 | elfdir = elf.parent 43 | for file, line in locs: 44 | if file is not None and file.is_relative_to(elfdir): 45 | yield srcdir / file.relative_to(elfdir), line 46 | else: 47 | yield file, line 48 | 49 | def grep_file(root: Path, exp: re.Pattern, file: Path) -> Iterator[str]: 50 | # Use binary mode since some kernel source files may contain weird 51 | # non-unicode chars and break everything (go figure). Decode a line only in 52 | # case of a match. 53 | with file.open('rb') as f: 54 | for lineno, line in enumerate(f, 1): 55 | if exp.search(line): 56 | yield f'{file.relative_to(root)}:{lineno}:{line.rstrip().decode()}' 57 | 58 | def grep_recursive(root: Path, exp: re.Pattern, exclude: Set[str], 59 | curdir: Path = None) -> Iterator[str]: 60 | if curdir is None: 61 | curdir = root 62 | 63 | for path in curdir.iterdir(): 64 | if path.match('.*'): 65 | continue 66 | 67 | if path.is_file() and path.match('*.c'): 68 | yield from grep_file(root, exp, path) 69 | elif path.is_dir() and path.resolve() not in exclude: 70 | yield from grep_recursive(root, exp, exclude, path) 71 | 72 | def grep_kernel_sources(kdir: Path, arch: Arch, syscalls: List[Syscall]) -> Iterator[Tuple[Syscall,Path,int]]: 73 | if arch.compat: 74 | base_exp = r'\b(COMPAT_)?SYSCALL(32)?_DEFINE\d\s*\(' 75 | elif arch.bits32: 76 | base_exp = r'\bSYSCALL(32)?_DEFINE\d\s*\(' 77 | else: 78 | base_exp = r'\bSYSCALL_DEFINE\d\s*\(' 79 | 80 | oddstyle = arch.syscall_def_regexp() 81 | if oddstyle is not None: 82 | exp = fr'({base_exp}|{oddstyle})\s*\w+' 83 | else: 84 | exp = base_exp + r'\s*\w+' 85 | 86 | if not command_available('rg'): 87 | logging.debug('No ripgrep available :( falling back to slow python implementation') 88 | 89 | exclude = { 90 | (kdir / 'Documentation').resolve(), 91 | (kdir / 'drivers').resolve(), 92 | (kdir / 'lib').resolve(), 93 | (kdir / 'samples').resolve(), 94 | (kdir / 'sound').resolve(), 95 | (kdir / 'tools').resolve(), 96 | (kdir / 'usr').resolve(), 97 | } 98 | 99 | # Ignore other architectures 100 | for path in (kdir / 'arch').iterdir(): 101 | if not path.match(arch.name): 102 | exclude.add(path.resolve()) 103 | 104 | out = list(grep_recursive(kdir, re.compile(exp.encode()), exclude)) 105 | else: 106 | out = ensure_command(( 107 | 'rg', '--line-number', 108 | '--glob', '!Documentation/*', 109 | '--glob', '!drivers/*', 110 | '--glob', '!lib/*', 111 | '--glob', '!samples/*', 112 | '--glob', '!sound/*', 113 | '--glob', '!tools/*', 114 | '--glob', '!usr/*', 115 | '--glob', '!arch/*', # ignore other architectures (important) 116 | '--glob', f'arch/{arch.name}', # include the correct one 117 | '--glob', '*.c', 118 | exp 119 | ), cwd=kdir).splitlines() 120 | 121 | exps = {s: re.compile(rf':{base_exp}{s.origname}[,)]') for s in syscalls} 122 | 123 | if arch.compat: 124 | key = lambda l: (l.startswith('arch'), ('COMPAT' in l) + ('SYSCALL32' in l)) 125 | elif arch.bits32: 126 | key = lambda l: (l.startswith('arch'), 'SYSCALL32' in l) 127 | else: 128 | key = lambda l: l.startswith('arch') 129 | 130 | # Prioritize files under arch/ and prefer compat/32bit syscalls if needed 131 | out.sort(key=key, reverse=True) 132 | 133 | for line in out: 134 | for sc, exp in exps.items(): 135 | if exp.search(line): 136 | file, line = line.split(':')[:2] 137 | yield sc, kdir / file, int(line) 138 | del exps[sc] 139 | break 140 | 141 | # Report failed matches 142 | for sc in exps: 143 | yield sc, None, None 144 | 145 | def good_definition(arch: Arch, definition: str, syscall_name: str) -> bool: 146 | # There are a lot of legacy/weird syscall definitions and some symbols can 147 | # therefore point (addr2line output) to old-style `asmlinkage` functions 148 | newstyle = ('^(COMPAT_)?' if arch.compat else '^') 149 | newstyle += rf'SYSCALL(32)?_DEFINE\d\s*\({syscall_name}\b' 150 | oldstyle = rf'^asmlinkage\s*(unsigned\s+)?\w+\s*sys(32)?_{syscall_name}\(' 151 | 152 | if re.match(f'{newstyle}|{oldstyle}', definition) is not None: 153 | return True 154 | 155 | # Also try matching old-style name if equal to full function name 156 | if syscall_name.startswith('sys_') and re.match(r'^asmlinkage\s*' 157 | rf'(unsigned\s+)?\w+\s*{syscall_name}\s*\(', definition) is not None: 158 | return True 159 | 160 | # Some archs use weirdly named SYSCALL_DEFINEn macros, e.g. PPC32 ABI on 161 | # PowerPC 64-bit with its "PPC32_SYSCALL_DEFINEn", or weirdly named sys_xxx 162 | # functions, e.g. ARM oabi with its "asmlinkage int sys_oabi_xxx(...)". 163 | oddstyle = arch.syscall_def_regexp(syscall_name) 164 | return oddstyle is not None and re.match(oddstyle, definition) is not None 165 | 166 | def good_location(file: Path, line: int, arch: Arch, sc: Syscall) -> bool: 167 | with file.open('rb') as f: 168 | for _ in range(line - 1): 169 | next(f) 170 | 171 | definition = f.readline().decode() 172 | 173 | return good_definition(arch, definition, sc.origname) \ 174 | or good_definition(arch, definition, sc.name) 175 | 176 | def adjust_line(file: Path, line: int) -> int: 177 | try: 178 | with file.open('rb') as f: 179 | lines = f.readlines() 180 | except FileNotFoundError: 181 | # This will happen if we mismatch vmlinux and kernel sources. There's no 182 | # way we can keep going if kernel sources do not match the kernel we are 183 | # analyzing. It'd be nice to detect this and abort earlier, but without 184 | # make or git we have no good way of knowing what's the version for the 185 | # source code we are inspecting, and we will only realize something's 186 | # wrong if we encounter a missing file or a file that is too short. 187 | logging.critical('File "%s" does not exist!', file) 188 | logging.critical('Do you have the correct source code version for this kernel?') 189 | sys.exit(1) 190 | 191 | # line is 1-based 192 | line_0 = line - 1 193 | 194 | # Try gettint up to the top of the current function body 195 | for i in range(line_0, -1, -1): 196 | try: 197 | l = lines[i].rstrip() 198 | except IndexError: 199 | # This will happen if we mismatch vmlinux and kernel sources. Same 200 | # reasoning as above applies. 201 | logging.critical('File "%s" does not have enough lines of code!', file) 202 | logging.critical('Do you have the correct source code version for this kernel?') 203 | sys.exit(1) 204 | 205 | if i < line_0 and l == b'}': 206 | # We went up at least one line and found a '}': this means we were 207 | # not inside a function, give up 208 | break 209 | 210 | if l == b'{': 211 | for j in range(i - 1, -1, -1): 212 | char = lines[j][0:1] 213 | 214 | if not char.isspace(): 215 | if char == b'#': 216 | # SYSCALL_DEFINE macro wrapped in preprocessor guards, 217 | # annoying but it can happen (e.g., clone in v5.0 at 218 | # kernel/fork.c:2328). Just skip the directive. 219 | continue 220 | 221 | # Found function signature 222 | return j + 1 223 | 224 | # Found start of function body (weird) 225 | logging.debug('Found start of function body, but not the actual ' 226 | 'signature: %s:%d', file, i + 1) 227 | return i + 1 228 | 229 | return line 230 | 231 | def extract_syscall_locations(syscalls: List[Syscall], vmlinux: ELF, arch: Arch, 232 | kdir: Optional[Path], rdir: Optional[Path]): 233 | if not command_available('addr2line'): 234 | logging.warning('Command "addr2line" unavailable, skipping location info extraction') 235 | return 236 | 237 | # STEP 1: Ask addr2line for file/lineno info. Most of the times this will 238 | # work with at most a simple line adjustment. 239 | 240 | vmlinux = vmlinux.path 241 | locs = smart_addr2line(vmlinux, map(lambda s: s.symbol.real_vaddr, syscalls), kdir) 242 | locs = list(locs) 243 | 244 | if not kdir: 245 | for sc, (file, line) in zip(syscalls, locs): 246 | sc.file = file 247 | sc.line = line 248 | sc.good_location = False 249 | 250 | if any(map(attrgetter('file'), syscalls)): 251 | logging.warning('No kernel source available, trusting addr2line output for location info') 252 | else: 253 | logging.warning('No kernel source available and no addr2line output, cannot extract location info') 254 | 255 | return 256 | 257 | rel = lambda p: maybe_rel(p, kdir) 258 | bad_paths = False 259 | to_adjust = [] 260 | to_retry = [] 261 | to_grep = [] 262 | 263 | if rdir: 264 | remap = lambda p: kdir / maybe_rel(p, rdir) if p is not None else None 265 | else: 266 | remap = lambda p: kdir / p if p is not None else None 267 | 268 | # Try a simple line adjustment: lineno might point inside a function, but we 269 | # want the function signature. 270 | 271 | for sc, loc in zip(syscalls, locs): 272 | file, line = loc 273 | sc.file = file = remap(file) 274 | sc.good_location = False 275 | 276 | if file is None or not file.is_file() or line is None: 277 | if sc.symbol.size > 1: 278 | to_adjust.append(sc) 279 | logging.debug('Location needs adjustment (invalid): %s (%s) -> ' 280 | '%s:%s', sc.origname, sc.symbol.name, *loc) 281 | else: 282 | to_grep.append(sc) 283 | logging.debug('Location needs grepping (invalid and sz <= 0): ' 284 | '%s (%s) -> %s:%s', sc.origname, sc.symbol.name, *loc) 285 | continue 286 | 287 | if not file.is_relative_to(kdir.resolve()): 288 | bad_paths = True 289 | 290 | sc.line = line = adjust_line(file, line) 291 | 292 | # For esoteric syscalls, only find a decent location for the symbol, 293 | # it's pointless to go deeper 294 | if sc.esoteric: 295 | continue 296 | 297 | if good_location(file, line, arch, sc): 298 | sc.good_location = True 299 | elif sc.symbol.size > 1: 300 | to_adjust.append(sc) 301 | logging.debug('Location needs adjustment (bad): %s (%s) -> %s:%d', 302 | sc.origname, sc.symbol.name, rel(file), line) 303 | else: 304 | to_grep.append(sc) 305 | logging.debug('Location needs grepping (bad and sz <= 0): %s (%s) ' 306 | '-> %s:%d', sc.origname, sc.symbol.name, rel(file), line) 307 | 308 | # STEP 2: Simple adjustment for bad/invalid locations: ask addr2line again 309 | # for vaddr + sz - 1 (except for symbols with sz <= 1). 310 | # 311 | # Rationale: The debug info for some syscall symbols points to the wrong 312 | # file/line, however the last few instructions of the function have a 313 | # better chance of pointing to the correct place. This is because in simple 314 | # syscalls (e.g. getuid, which only extracts a field from current) there is 315 | # no prolog/epilog, and since function calls like get_current() are 316 | # inlined, almost all instructions in the function body come from a macro or 317 | # function defined somewhere else. The final RET instruction is basically 318 | # the only one that truly belongs to the function. The workaround is to also 319 | # try checking vaddr + symbol_size - 1 with addr2line. 320 | to_adjust.sort(key=attrgetter('name')) 321 | 322 | if to_adjust: 323 | if len(to_adjust) == len(locs): 324 | # If we need to adjust every single location it's very likely that 325 | # the user gave us a wrong path as KDIR. This will make us attempt 326 | # full adjustment and grepping for every single syscall, which is 327 | # very slow. Warn so that the user figures this out without having 328 | # to wait for everything to complete. 329 | logging.warn('All the locations obtained from addr2line look bad, ' 330 | 'did you provide the correct KDIR?') 331 | logging.warn('Skipping location information extraction') 332 | return 333 | 334 | vaddrs = tuple(map(lambda s: s.symbol.real_vaddr + s.symbol.size - 1, to_adjust)) 335 | new_locs = smart_addr2line(vmlinux, vaddrs, kdir) 336 | 337 | for sc, loc in zip(to_adjust, new_locs): 338 | file, line = loc 339 | file = remap(file) 340 | 341 | if file is None or not file.is_file() or line is None: 342 | if sc.symbol.size > 2: 343 | to_retry.append(sc) 344 | logging.debug('Location needs full-range adjustment ' 345 | '(invalid): %s (%s+0x%x) -> %s:%s', sc.origname, 346 | sc.symbol.name, sc.symbol.size - 1, *loc) 347 | else: 348 | to_grep.append(sc) 349 | logging.debug('Location needs grepping (invalid and sz <= ' 350 | '1): %s (%s+0x%x) -> %s:%s', sc.origname, sc.symbol.name, 351 | sc.symbol.size - 1, *loc) 352 | continue 353 | 354 | line = adjust_line(file, line) 355 | 356 | if good_location(file, line, arch, sc): 357 | sc.file = file 358 | sc.line = line 359 | sc.good_location = True 360 | else: 361 | if sc.symbol.size > 2: 362 | to_retry.append(sc) 363 | logging.debug('Location needs full-range adjustment (bad): ' 364 | '%s (%s+0x%x) -> %s:%d', sc.origname, sc.symbol.name, 365 | sc.symbol.size - 1, rel(file), line) 366 | else: 367 | to_grep.append(sc) 368 | logging.debug('Location needs grepping (bad and sz <= 1): ' 369 | '%s (%s+0x%x) -> %s:%s', sc.origname, sc.symbol.name, 370 | sc.symbol.size - 1, rel(file), line) 371 | 372 | # STEP 3: Full-range adjustment for locations that are still bad/invalid: 373 | # ask addr2line again for ALL addresses from vaddr + 1 to vaddr + sz - 2 374 | # (except for symbols with sz <= 1). 375 | # 376 | # Reasoning: not much, just being optimistic. This is unlikely to work if 377 | # addr2line didn't find anything for vaddr nor for vaddr + sz - 1. If we get 378 | # to this point, there is probably no file/line debug info for it at all. 379 | to_retry.sort(key=attrgetter('name')) 380 | 381 | for sc in to_retry: 382 | addrs = range(sc.symbol.real_vaddr + 1, sc.symbol.real_vaddr + sc.symbol.size - 2) 383 | invalid = True 384 | 385 | for offset, loc in enumerate(smart_addr2line(vmlinux, addrs, kdir), 1): 386 | file, line = loc 387 | file = remap(file) 388 | 389 | if file is None or not file.is_file() or line is None: 390 | continue 391 | 392 | invalid = False 393 | line = adjust_line(file, int(line)) 394 | 395 | if good_location(file, line, arch, sc): 396 | sc.file = file 397 | sc.line = line 398 | sc.good_location = True 399 | logging.debug('Location found through full-range adjustment: %s' 400 | ' (%s+0x%x) -> %s:%d', sc.origname, sc.symbol.name, offset, 401 | rel(file), line) 402 | break 403 | else: 404 | if invalid: 405 | logging.debug('Location needs grepping (invalid): %s (%s) -> ' 406 | '%s:%s', sc.origname, sc.symbol.name, *loc) 407 | else: 408 | logging.debug('Location needs grepping (bad): %s (%s) -> %s:%d', 409 | sc.origname, sc.symbol.name, rel(file), line) 410 | 411 | to_grep.append(sc) 412 | 413 | if bad_paths: 414 | logging.error('Cannot grep source code, debug info points outside ' 415 | 'provided kernel source directory') 416 | return 417 | 418 | # STEP 4: Still bad? Use the big hammer: recursively grep kernel sources. 419 | # 420 | # Rationale: On x86 (and maybe others) some syscalls wrongly point to the 421 | # first COND_SYSCALL() macro found in kernel/sys_ni.c (e.g. userfaultfd). 422 | # Clang gives a slightly better location than gcc: still in this file, but 423 | # at the right line and not just pointing to the first COND_SYSCALL(). 424 | # Still, this is not the real location for the actual syscall code. The 425 | # symbols for these syscalls are also marked WEAK for some reason (as can be 426 | # seen from `readelf -Ws`). The only real workaround I can think of in this 427 | # case is to just grep the source code for definitions of the form 428 | # "SYSCALL_DEFINEx(name, ...". 429 | # 430 | # Disabling compiler optimizations could help, but the kernel does not have 431 | # a CONFIG_ option for that, and generally highly relies on optimizations. 432 | # Granted, the point here is not to build a runnable kernel, but still. 433 | # 434 | # In any case, even if they look legitimate, we cannot be sure of the 435 | # correctness of definitions found through grepping. For example, we could 436 | # be working with a 64-bit kernel with compat 32-bit support and find two 437 | # definitions using the exact same SYSCALL_DEFINEx macro guarded by some 438 | # preprocessor guards: we cannot know which one is correct in such case, the 439 | # only way would be to manually analyze the code or magically invoke the 440 | # preprocessor (which we are not even going to bother trying). 441 | 442 | # Sort by syscall name, group not found first 443 | grepped = grep_kernel_sources(kdir, arch, to_grep) 444 | grepped = sorted(grepped, key=lambda x: (x[1] is not None, x[0].name)) 445 | 446 | for sc, file, line in grepped: 447 | if file is None: 448 | logging.info('Location could not be found through grepping: %s ' 449 | '(orig name %s)', sc.name, sc.origname) 450 | continue 451 | 452 | if good_location(file, line, arch, sc): 453 | sc.file = file 454 | sc.line = line 455 | sc.good_location = True 456 | sc.grepped_location = True 457 | 458 | logging.warn('Location found through grepping: %s -> %s:%d', 459 | sc.origname, rel(file), line) 460 | -------------------------------------------------------------------------------- /src/systrack/output.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from itertools import starmap 4 | from json import JSONEncoder, dump 5 | from pathlib import Path 6 | from typing import Iterable 7 | 8 | from .kernel import Kernel 9 | from .utils import eprint, noprefix 10 | from .syscall import Syscall 11 | from .version import VERSION, VERSION_COPY 12 | 13 | class SyscallJSONEncoder(JSONEncoder): 14 | def default(self, o): 15 | if isinstance(o, Syscall): 16 | dikt = {k: getattr(o, k) for k in o.__slots__} 17 | # Symbol is a namedtuple subclass, but we only care about its .name 18 | dikt['symbol'] = o.symbol.name 19 | # Let's not waste space and remove CONFIG_ prefixes 20 | if o.kconfig: 21 | dikt['kconfig'] = noprefix(o.kconfig, 'CONFIG_') 22 | return dikt 23 | 24 | if isinstance(o, Path): 25 | return str(o) 26 | 27 | return super().default(o) 28 | 29 | def output_syscalls_text(syscalls: Iterable[Syscall], spacing: int = 2): 30 | prevnum = syscalls[0].number 31 | table = [( 32 | 'INDEX', 'NUMBER', 'NAME', 'ORIG NAME', 'SYMBOL', 'LOCATION', 'KCONFIG', 33 | 'SIGNATURE' 34 | )] 35 | 36 | for sc in syscalls: 37 | if sc.number - prevnum > 1: 38 | # Blank line to separate groups of contiguous syscall numbers 39 | table.append(None) 40 | 41 | prevnum = sc.number 42 | 43 | if sc.file and sc.line: 44 | loc = f'{sc.file}:{sc.line}' 45 | elif sc.file: 46 | loc = str(sc.file) 47 | else: 48 | loc = '' 49 | 50 | if loc and not sc.good_location: 51 | loc = '(?) ' + loc 52 | 53 | table.append(( 54 | f'{sc.index:-3d}' if sc.index is not None else '- ', 55 | hex(sc.number), 56 | sc.name, 57 | sc.origname if sc.origname != sc.name else '', 58 | sc.symbol.name, 59 | loc, 60 | sc.kconfig.replace('CONFIG_', '') if sc.kconfig else '', 61 | ', '.join(sc.signature) if sc.signature else '?' if sc.signature is None else 'void' 62 | )) 63 | 64 | widths = [max(map(lambda row: len(row[i]) if row else 0, table)) for i in range(len(table[0]))] 65 | sep = ' ' * spacing 66 | 67 | for row in table: 68 | if row: 69 | print(sep.join(starmap(lambda c, w: c.ljust(w), zip(row, widths)))) 70 | else: 71 | print() 72 | 73 | sys.stdout.flush() 74 | 75 | def output_syscalls_json(kernel: Kernel): 76 | data = { 77 | 'systrack_version': VERSION, 78 | 'kernel': { 79 | 'version': kernel.version_tag, 80 | 'version_source': kernel.version_source, 81 | 'architecture': { 82 | 'name': kernel.arch.name, 83 | 'bits': 32 if kernel.arch.bits32 else 64 84 | }, 85 | 'abi': { 86 | 'name': kernel.arch.abi, 87 | 'compat': kernel.arch.compat, 88 | 'bits': 32 if kernel.arch.abi_bits32 else 64, 89 | 'calling_convention': { 90 | 'syscall_nr': kernel.arch.syscall_num_reg, 91 | 'parameters': kernel.arch.syscall_arg_regs 92 | } 93 | }, 94 | 'syscall_table_symbol': kernel.arch.syscall_table_name 95 | }, 96 | 'syscalls': kernel.syscalls 97 | } 98 | 99 | dump(data, sys.stdout, cls=SyscallJSONEncoder, sort_keys=True, indent='\t') 100 | 101 | def output_syscalls_html(kernel: Kernel): 102 | try: 103 | from jinja2 import Environment, PackageLoader 104 | except ImportError: 105 | eprint('HTML output not supported, could not import needed dependencies.') 106 | eprint('Install the systrack[html] or systrack[full] package through pip.') 107 | sys.exit(1) 108 | 109 | env = Environment(loader=PackageLoader('systrack'), line_statement_prefix='#', autoescape=True) 110 | template = env.get_template('syscall_table.html') 111 | max_args = max(len(s.signature) for s in kernel.syscalls if s.signature is not None) 112 | 113 | template.stream( 114 | kernel_version_tag=kernel.version_tag, 115 | arch=kernel.arch.name, 116 | bits=32 if kernel.arch.bits32 else 64, 117 | abi=kernel.arch.abi, 118 | abi_bits=32 if kernel.arch.abi_bits32 else 64, 119 | compat=kernel.arch.compat, 120 | num_reg=kernel.arch.syscall_num_reg, 121 | arg_regs=kernel.arch.syscall_arg_regs, 122 | max_args=max_args, 123 | syscalls=kernel.syscalls, 124 | systrack_version=VERSION, 125 | systrack_copy=VERSION_COPY.strip().replace('\n', ' \u2014 ') 126 | ).dump(sys.stdout) 127 | 128 | def output_syscalls(kernel: Kernel, fmt: str): 129 | if fmt == 'text': 130 | output_syscalls_text(kernel.syscalls) 131 | elif fmt == 'json': 132 | output_syscalls_json(kernel) 133 | elif fmt == 'html': 134 | output_syscalls_html(kernel) 135 | else: 136 | sys.exit('Output format not implemented!') 137 | -------------------------------------------------------------------------------- /src/systrack/signature.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from operator import itemgetter 4 | from pathlib import Path 5 | from struct import unpack, iter_unpack 6 | from typing import Tuple, List, Iterable, Iterator 7 | 8 | from .elf import ELF 9 | from .syscall import Syscall 10 | from .utils import noprefix 11 | 12 | def expand_macros(sig: Iterable[str], big_endian: bool) -> Iterator[str]: 13 | for field in sig: 14 | newfield = noprefix(field, 'SC_ARG64(', 'arg_u32p(', 'compat_arg_u64_dual(', 'compat_arg_u64(') 15 | 16 | if newfield == field: 17 | yield field 18 | continue 19 | 20 | assert newfield[-1] == ')' 21 | field = newfield[:-1] 22 | 23 | if big_endian: 24 | yield from ('u32', f'{field}_hi', 'u32', f'{field}_lo') 25 | else: 26 | yield from ('u32', f'{field}_lo', 'u32', f'{field}_hi') 27 | 28 | def parse_signature(sig: str, big_endian: bool) -> Tuple[str, ...]: 29 | split_sig = map(str.strip, sig.split(',')) 30 | 31 | # SC_ARG64 is standard for all archs 32 | # arg_u32p is arm64-specific 33 | # compat_arg_u64[_dual] is riscv-specific 34 | if all(x not in sig for x in ('SC_ARG64', 'arg_u32p', 'compat_arg_u64')): 35 | # Make sure it doesn't contain any other macros that we don't know about 36 | assert '(' not in sig and ')' not in sig, f'Unexpected parentheses in signature: {sig!r}' 37 | return tuple(split_sig) 38 | 39 | return tuple(expand_macros(split_sig, big_endian)) 40 | 41 | def syscall_signature_from_source(file: Path, line: int, big_endian: bool) -> Tuple[str, ...]: 42 | sig = b'' 43 | 44 | with file.open('r') as f: 45 | for _ in range(line - 1): 46 | next(f) 47 | 48 | sig = f.readline().strip() 49 | while not sig.endswith(')'): 50 | sig += f.readline().strip() 51 | 52 | # We only handle two scenarios here: 53 | # 54 | # SYSCALL_DEFINEx(name, type1, arg1, type2, arg2, ...) 55 | # asmlinkage xxx sys_xxx(type1 arg1, type2 arg2, ...) 56 | 57 | newsig = noprefix(sig, 'SYSCALL_DEFINE', 'SYSCALL32_DEFINE', 58 | 'COMPAT_SYSCALL_DEFINE', 'COMPAT_SYSCALL32_DEFINE', 59 | 'PPC32_SYSCALL_DEFINE') 60 | 61 | if sig != newsig: 62 | sig = newsig 63 | start = sig.find(',') + 1 64 | nargs = int(sig[0]) 65 | assert nargs <= 6, f'SYSCALL_DEFINE{nargs}? {file}:{line}' 66 | 67 | if start == 0: 68 | assert nargs == 0, f'Expected {nargs} arguments, but found 0: {file}:{line}' 69 | return () # no arguments 70 | 71 | sig = sig[start:sig.rfind(')')] 72 | # Remove __user annotation, collapse multiple spaces into one and remove 73 | # spaces between double pointers 74 | sig = ' '.join(sig.replace(' __user', '').split()).replace('* *', '**') 75 | sig = parse_signature(sig, big_endian) 76 | 77 | assert len(sig) % 2 == 0 and len(sig) // 2 == nargs, f'Bad signature after parsing: {file}:{line}' 78 | sig = tuple(f'{t}{" " * (t[-1] != "*")}{n}' for t, n in zip(sig[::2], sig[1::2])) 79 | elif sig.startswith('asmlinkage'): 80 | start = sig.find('(') + 1 81 | sig = sig[start:sig.rfind(')')].strip() 82 | if not sig or sig == 'void': 83 | return () # no arguments 84 | 85 | # Remove __user annotation, collapse multiple spaces into one and remove 86 | # spaces between asterisks of pointers 87 | sig = ' '.join(sig.replace(' __user', '').split()).replace('* *', '**') 88 | 89 | # We are assuming macros like arg_u32p are only used for SYSCALL_DEFINEx 90 | assert '(' not in sig and ')' not in sig, f'Unexpected parentheses in signature: {file}:{line}' 91 | sig = tuple(map(str.strip, sig.split(','))) 92 | assert len(sig) <= 7, f'Syscall with {len(sig)} arguments? {file}:{line}' 93 | else: 94 | logging.error("This doesn't look like a syscall signature: %s:%d", file, line) 95 | return None 96 | 97 | return sig 98 | 99 | def extract_syscall_signatures(syscalls: List[Syscall], vmlinux: ELF, have_source: bool): 100 | have_syscall_metadata = '__start_syscalls_metadata' in vmlinux.symbols 101 | meta = {} 102 | res = [] 103 | 104 | # TODO: could we also extract signatures from DWARF or BTF even if we have 105 | # no ftrace metadata and no KDIR? How? 106 | 107 | # First extract signatures from ftrace metadata. If the kernel was compiled 108 | # with CONFIG_FTRACE_SYSCALLS=y we have signature information available in a 109 | # bunch of `struct syscall_metadata` objects. 110 | if have_syscall_metadata: 111 | logging.info('Kernel has ftrace syscall metadata from FTRACE_SYSCALLS=y') 112 | 113 | start = vmlinux.symbols['__start_syscalls_metadata'].real_vaddr 114 | stop = vmlinux.symbols['__stop_syscalls_metadata'].real_vaddr 115 | ptr_fmt = '<>'[vmlinux.big_endian] + 'QL'[vmlinux.bits32] 116 | meta_fmt = '<>'[vmlinux.big_endian] + ('QllQQ', 'LllLL')[vmlinux.bits32] 117 | ptr_sz = 4 if vmlinux.bits32 else 8 118 | meta_sz = 8 + 3 * ptr_sz 119 | ptrs = map(itemgetter(0), iter_unpack(ptr_fmt, vmlinux.vaddr_read(start, stop - start))) 120 | 121 | # Sanity check 122 | open_meta = vmlinux.symbols.get('__syscall_meta__open') 123 | if open_meta and open_meta.size: 124 | assert open_meta.size >= meta_sz 125 | 126 | for ptr in ptrs: 127 | # Number (second field) is filled at boot and always -1 128 | name, _, nargs, types, args = unpack(meta_fmt, vmlinux.vaddr_read(ptr, meta_sz)) 129 | # Sanity check: nargs > 0 => types != NULL and args != NULL 130 | assert nargs >= 0 and (nargs == 0 or (types and args)) 131 | 132 | name = vmlinux.vaddr_read_string(name).strip() 133 | name = noprefix(name, 'sys_') 134 | sig = [] 135 | 136 | for i in range(nargs): 137 | typ = unpack(ptr_fmt, vmlinux.vaddr_read(types + i * ptr_sz, ptr_sz))[0] 138 | arg = unpack(ptr_fmt, vmlinux.vaddr_read(args + i * ptr_sz, ptr_sz))[0] 139 | typ = vmlinux.vaddr_read_string(typ).strip() 140 | arg = vmlinux.vaddr_read_string(arg).strip() 141 | # Double pointers can have spaces between asterisks 142 | typ = typ.replace('* *', '**') 143 | sig.append(f'{typ}{" " * (typ[-1] != "*")}{arg}') 144 | 145 | meta[name] = tuple(sig) 146 | else: 147 | logging.info('Kernel DOES NOT have ftrace syscall metadata') 148 | 149 | # Now extract signatures from the source code based on the location info we 150 | # [should] already have 151 | for sc in filter(lambda s: not s.esoteric, syscalls): 152 | if sc.good_location: 153 | # We know that this location points to a `SYSCALL_DEFINEx` or an 154 | # `asmlinkage` function: extract signature right from source code 155 | assert have_source, 'good location with no kernel source? WHAT' 156 | sc.signature = syscall_signature_from_source(sc.file, sc.line, vmlinux.big_endian) 157 | elif have_syscall_metadata: 158 | # Weird/bad location, fallback to FTRACE_SYSCALLS metadata if 159 | # possible 160 | sig = meta.get(sc.origname) 161 | if sig is None: 162 | sig = meta.get(sc.name) 163 | if sig is None: 164 | logging.debug('Signature NOT found in ftrace metadata: %s', sc.name) 165 | continue 166 | 167 | sc.signature = sig 168 | logging.debug('Signature extracted from ftrace metadata: %s', sc.name) 169 | else: 170 | # Weird/bad location and no FTRACE_SYSCALLS metadata :( 171 | if sc.file is not None and sc.line is not None: 172 | logging.debug('Signature extraction skipped: %s at %s:%d', 173 | sc.name, sc.file, sc.line) 174 | 175 | return res 176 | 177 | -------------------------------------------------------------------------------- /src/systrack/syscall.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from .elf import Symbol 6 | 7 | class Syscall: 8 | '''Class representing a Linux syscall. 9 | ''' 10 | # NOTE: do not remove, __slots__ are used used by the JSON encoder 11 | __slots__ = ( 12 | 'index', 'number', 13 | 'name', 'origname', 'symbol', 14 | 'file', 'line', 'signature', 15 | 'esoteric', 'good_location', 'grepped_location', 'kconfig' 16 | ) 17 | 18 | def __init__(self, index: int, number: int, name: str, origname: str, 19 | symbol: Symbol, kconfig: str, file: Path = None, line: int = None, 20 | signature: List[str] = None, esoteric: bool = False): 21 | self.index = index 22 | self.number = number 23 | self.name = name 24 | self.origname = origname 25 | self.symbol = symbol 26 | self.kconfig = kconfig 27 | self.file = file 28 | self.line = line 29 | self.signature = signature 30 | self.esoteric = esoteric 31 | self.good_location = False 32 | self.grepped_location = False 33 | 34 | def __repr__(s): 35 | file = '??' if s.file is None else s.file 36 | line = '?' if s.line is None else s.line 37 | res = f'Syscall(index={s.index}, number={s.number}, name={s.name!r}, ' 38 | res += f'symbol={s.symbol.name!r}, defined at {file}:{line}, ' 39 | res += f'takes {len(s.signature) if s.signature else "?"} args' 40 | res += f', depends on {s.kconfig})' if s.kconfig else ')' 41 | return res 42 | 43 | def common_syscall_symbol_prefixes(names: List[str], threshold: int) -> List[str]: 44 | '''Given a list of symbol names, find and return a list of common prefixes 45 | of the form "xxx_" that appear in a number of symbols greater than or equal 46 | to threshold. 47 | 48 | For example, given that a bunch of syscalls in x86-64 start with __x64_sys_, 49 | this function returns ['__x64_sys_', '__x64_', '__']. 50 | ''' 51 | res = [] 52 | 53 | for l in range(max(map(len, names)), 1, -1): 54 | candidates = list(filter(lambda n: len(n) >= l and n[l - 1] == '_', names)) 55 | if len(candidates) < threshold: 56 | continue 57 | 58 | counts = Counter(name[:l] for name in candidates) 59 | res.extend(filter(lambda name: counts[name] >= threshold, counts)) 60 | 61 | return res 62 | -------------------------------------------------------------------------------- /src/systrack/templates/syscall_table.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --main-bg: white; 3 | --main-fg: black; 4 | --table-fg: black; 5 | --table-bg: white; 6 | --table-head-bg: #d7efff; 7 | --table-head-bg-hover: #93d4ff; 8 | --table-row-bg-hover: #c5ffdf; 9 | --table-border: 1px solid #d0d0d0; 10 | --link-fg: #00319f; 11 | --link-fg-visited: #7f009f; 12 | } 13 | 14 | body { 15 | font-family: consolas, monospace; 16 | font-size: 12px; 17 | color: var(--main-fg); 18 | background-color: var(--main-bg); 19 | } 20 | 21 | a, a:visited { 22 | color: var(--main-fg); 23 | } 24 | 25 | table { 26 | padding: 5px; 27 | color: var(--table-fg); 28 | border: var(--table-border); 29 | border-collapse: collapse; 30 | } 31 | 32 | table th { 33 | top: 0; 34 | position: sticky; 35 | text-align: left; 36 | padding: 5px; 37 | border: var(--table-border); 38 | background-color: var(--table-head-bg); 39 | } 40 | 41 | table th.sortable { 42 | cursor: pointer; 43 | } 44 | 45 | table tr.highlight td { 46 | background-color: var(--table-row-bg-hover); 47 | } 48 | 49 | table td { 50 | text-align: left; 51 | padding: 3px 5px 3px; 52 | border: var(--table-border); 53 | background-color: var(--table-bg); 54 | } 55 | 56 | table a { 57 | color: var(--link-fg); 58 | text-decoration: none; 59 | } 60 | 61 | table a:visited { 62 | color: var(--link-fg-visited); 63 | } 64 | 65 | table span.argtype { 66 | color: #006e8f; 67 | } 68 | 69 | table td.unknown { 70 | font-family: sans-serif; 71 | font-style: italic; 72 | } 73 | 74 | /* Emojis! Use U+202f (narrow no-break space) to space header sort arrows and 75 | U+2002 (en space) to space emojis for bad locations and esoteric syscalls. */ 76 | 77 | table th.ascending::before { content: '\2b07\fe0f\202f'; } 78 | table th.descending::before { content: '\2b06\fe0f\202f'; } 79 | table td.bad::after { content: '\2002\26a0\fe0f'; } 80 | table td.esoteric::after { content: '\2002\1f984\fe0f'; } 81 | 82 | @media (any-hover: hover) { 83 | table th.sortable:hover { 84 | cursor: pointer; 85 | background-color: var(--table-head-bg-hover); 86 | } 87 | 88 | table tr:hover td { 89 | background-color: var(--table-row-bg-hover); 90 | } 91 | 92 | table a:hover { 93 | text-decoration: underline; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/systrack/templates/syscall_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Linux {{kernel_version_tag}} {{arch}} {{bits}}-bit, {{'compat ' if compat else ''}}{{abi_bits}}-bit {{abi}} syscall table 5 | 6 | 7 | 8 | 9 | 10 |

Linux {{kernel_version_tag}} syscall table

11 |

Architecture: {{arch}} {{bits}}-bit

12 |

ABI: {{'compat ' if compat else ''}}{{abi_bits}}-bit {{abi}}

13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | # for i in range(max_args): 22 | 23 | # endfor 24 | 25 | # for sc in syscalls: 26 | 27 | 28 | 29 | {{sc.name}} 30 | 31 | # if sc.file and sc.line is not none 32 | # if not sc.good_location 33 | 45 | # elif sc.file 46 | 53 | # else 54 | 55 | # endif 56 | 57 | # if sc.signature is none 58 | 59 | # elif sc.signature | length == 0 60 | 61 | # else 62 | # for arg in sc.signature: 63 | # set i = arg.rfind(' ') + 1 64 | 65 | # endfor 66 | # set span = max_args - sc.signature|length 67 | # if span > 0: 68 | 69 | # endif 70 | # endif 71 | 72 | # endfor 73 | 74 |
Number{% if num_reg %} ({{num_reg}}){% endif %}NameSymbolDefinition locationKconfigArg {{i + 1}} ({{arg_regs[i]}})
{{sc.number}}{{ '0x%x' | format(sc.number) }}{{sc.symbol.name}} 34 | # elif sc.grepped_location 35 | 36 | # else 37 | 38 | # endif 39 | # if sc.file.is_absolute() 40 | {{sc.file}}:{{sc.line}} 41 | # else 42 | {{sc.file}}:{{sc.line}} 43 | # endif 44 | 47 | # if sc.file.is_absolute() 48 | {{sc.file}}:?? 49 | # else 50 | {{sc.file}}:?? 51 | # endif 52 | unknown{{(sc.kconfig | replace("CONFIG_", "") + "=y") if sc.kconfig else ''}}unknown signaturevoid{{arg[:i]}}{{arg[i:]}}
75 |

Auto-generated by Systrack v{{systrack_version}} — {{systrack_copy}}

76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /src/systrack/templates/syscall_table.js: -------------------------------------------------------------------------------- 1 | const table = document.getElementsByTagName('table')[0] 2 | 3 | function sortTable(e) { 4 | const header = e.target 5 | const idx = Array.from(header.parentNode.children).indexOf(e.target) 6 | const rows = Array.from(table.querySelectorAll('tr')).slice(1) 7 | const desc = header.classList.contains('ascending') 8 | const body = rows[0].parentElement 9 | let getValue 10 | 11 | if (idx === 0) { 12 | getValue = el => parseInt(el.children[0].textContent, 16) 13 | } else { 14 | // The "number" header spans two columns (for decimal and hexadecimal) 15 | getValue = el => el.children[idx + 1].textContent 16 | } 17 | 18 | rows.forEach(el => body.removeChild(el)) 19 | rows.sort((a, b) => { 20 | let va = getValue(a) 21 | let vb = getValue(b) 22 | 23 | if (desc) 24 | [va, vb] = [vb, va] 25 | 26 | if (va > vb) return 1 27 | if (va < vb) return -1 28 | return 0 29 | }) 30 | 31 | rows.forEach(el => body.appendChild(el)) 32 | table.querySelectorAll('th').forEach(h => h.classList.remove('ascending', 'descending')) 33 | header.classList.add(desc ? 'descending' : 'ascending') 34 | } 35 | 36 | function highlightRow(e) { 37 | const row = e.currentTarget 38 | row.classList.toggle('highlight') 39 | } 40 | 41 | document.querySelectorAll('th.sortable').forEach(el => el.addEventListener('click', sortTable)) 42 | document.querySelectorAll('tr:not(:first-child)').forEach(el => el.addEventListener('click', highlightRow)) 43 | -------------------------------------------------------------------------------- /src/systrack/type_hints.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Tuple, List, Optional 2 | 3 | KernelVersion = Union[Tuple[int],Tuple[int,int],Tuple[int,int,int]] 4 | EsotericSyscall = List[Tuple[int,str,str,Tuple[str,...],Optional[str]]] 5 | -------------------------------------------------------------------------------- /src/systrack/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | 4 | from collections import defaultdict 5 | from pathlib import Path 6 | from shlex import join as shlex_join, quote as shlex_quote 7 | from shutil import which 8 | from subprocess import Popen, DEVNULL, PIPE 9 | from textwrap import indent 10 | from typing import Union, Iterable, Tuple, Any, AnyStr, Hashable, Optional 11 | 12 | 13 | AnyStrOrPath = Union[AnyStr,Path] 14 | 15 | 16 | class VersionedDict: 17 | '''A dict that can have multiple versions with different contents. Accessing 18 | d[version] will return the value of the dict for the given version. Adding 19 | a {key: value} for a range of versions can be done through the .add() 20 | method. 21 | ''' 22 | __slots__ = ('versions', 'cache') 23 | 24 | def __init__(self, iterable: Iterable[Tuple[Hashable,Hashable,Hashable,Any]] = None): 25 | '''Instantiate a VersionedDict given initial version ranges and relative 26 | key-value pairs, or an empty VersionedDict if iterable is not given. 27 | 28 | The given iterable= is an iterable of tuples where each tuple is of the 29 | form (vstart, vend, key, val), i.e., the same parameters taken by the 30 | .add() method. 31 | ''' 32 | self.cache = {} 33 | self.versions = defaultdict(dict) 34 | 35 | if iterable is not None: 36 | for vstart, vend, key, val in iterable: 37 | self.versions[vstart, vend][key] = val 38 | 39 | def __getitem__(self, version: Hashable) -> dict: 40 | '''Get the dict corresponding to the given version. 41 | ''' 42 | if version not in self.cache: 43 | self.cache[version] = self.__getversion(version) 44 | return self.cache[version] 45 | 46 | def __getversion(self, version: Hashable) -> dict: 47 | '''Get the dict corresponding to a given version, or create and cache 48 | one if the given version was never requested before. 49 | ''' 50 | res = {} 51 | for (vstart, vend), dct in self.versions.items(): 52 | if vstart <= version < vend: 53 | res.update(dct) 54 | return res 55 | 56 | def add(self, vstart: Hashable, vend: Hashable, key: Hashable, value: Any): 57 | '''Add a {key: value} mapping for all versions of this VersionedDict 58 | between vstart (included) and vend (not included). 59 | ''' 60 | self.versions[vstart, vend][key] = value 61 | 62 | class VersionedList: 63 | '''A list that can have multiple versions with different contents. Accessing 64 | lst[version] will return the value of the list for the given version. Adding 65 | values for a range of versions can be done through the .add() method. 66 | ''' 67 | __slots__ = ('versions', 'cache') 68 | 69 | def __init__(self, iterable: Iterable[Tuple[Hashable,Hashable,Iterable[Any]]] = None): 70 | '''Instantiate a VersionedList given initial version ranges and relative 71 | values, or an empty VersionedList if iterable is not given. 72 | 73 | The given iterable= is an iterable of tuples where each tuple is of the 74 | form (vstart, vend, iterable_of_values), i.e., the same parameters taken 75 | by the .add() method. 76 | ''' 77 | self.cache = {} 78 | self.versions = defaultdict(list) 79 | 80 | if iterable is not None: 81 | for vstart, vend, values in iterable: 82 | self.versions[vstart, vend].extend(values) 83 | 84 | def __getitem__(self, version: Hashable) -> list: 85 | '''Get the list corresponding to the given version. 86 | ''' 87 | if version not in self.cache: 88 | self.cache[version] = self.__getversion(version) 89 | return self.cache[version] 90 | 91 | def __getversion(self, version: Hashable) -> list: 92 | '''Get the list corresponding to a given version, or create and cache 93 | one if the given version was never requested before. 94 | ''' 95 | res = [] 96 | for (vstart, vend), lst in self.versions.items(): 97 | if vstart <= version < vend: 98 | res.extend(lst) 99 | return res 100 | 101 | def add(self, vstart: Hashable, vend: Hashable, values: Iterable[Any]): 102 | '''Add all the values from values to for all versions of this 103 | VersionedList between vstart (included) and vend (not included). 104 | ''' 105 | self.versions[vstart, vend].extend(values) 106 | 107 | SILENT = False 108 | HIGH_VERBOSITY = False 109 | 110 | def high_verbosity() -> bool: 111 | '''Return whether high verbosity is enabled (True if a lot of -v are given). 112 | ''' 113 | return HIGH_VERBOSITY 114 | 115 | def enable_high_verbosity(): 116 | '''Enable high verbosity: logging of invoked subcommands and potentially 117 | more stuff. 118 | ''' 119 | # We don't want to log what subcommands are invoked unless high verbosity 120 | # is needed, as it clutters the output. 121 | global HIGH_VERBOSITY 122 | HIGH_VERBOSITY = True 123 | 124 | def silent() -> bool: 125 | '''Return whether silent mode is enabled (True if a lot of -q are given). 126 | ''' 127 | return SILENT 128 | 129 | def enable_silent(): 130 | '''Enable silent mode: output to standard error of any kind is disabled.''' 131 | global SILENT 132 | SILENT = True 133 | 134 | def eprint(*a, **kwa): 135 | '''print() wrapper that prints on standard error and flushes after printing, 136 | only if not in silent mode. 137 | ''' 138 | if not SILENT: 139 | print(*a, **kwa, file=sys.stderr, flush=True) 140 | 141 | def maybe_rel(path: Path, root: Path) -> Path: 142 | '''Calculate and return a the given path relative to root. If path is not 143 | relative to root, it is returned as is. 144 | ''' 145 | return path.relative_to(root) if root is not None and path.is_relative_to(root) else path 146 | 147 | def anyprefix(s: str, *pxs: str) -> bool: 148 | '''Determine whether the given string as any of the given prefixes. 149 | ''' 150 | return any(s.startswith(px) for px in pxs) 151 | 152 | def anysuffix(s: str, *sxs: str) -> bool: 153 | '''Determine whether the given string as any of the given suffixes. 154 | ''' 155 | return any(s.endswith(sx) for sx in sxs) 156 | 157 | def noprefix(s: str, *pxs: str) -> str: 158 | '''Find the first matching prefix among pxs and return the given string 159 | without it. If s does not have any of the given prefixes, it is returned as 160 | is. 161 | ''' 162 | for px in pxs: 163 | if s.startswith(px): 164 | return s[len(px):] 165 | return s 166 | 167 | def nosuffix(s: str, *sxs: str) -> str: 168 | '''Find the first matching suffix among pxs and return the given string 169 | without it. If s does not have any of the given suffixes, it is returned as 170 | is. 171 | ''' 172 | for sx in sxs: 173 | if s.endswith(sx): 174 | return s[:-len(sx)] 175 | return s 176 | 177 | def do_popen(cmd: Union[AnyStr,Iterable[AnyStr]], cwd: Union[AnyStr,Path], **kwargs) -> Popen: 178 | '''Conveniency wrapper around subprocess.Popen, which gracefully handles 179 | FileNotFoundError and NotADirectoryError providing useful logging to the 180 | user. 181 | ''' 182 | try: 183 | return Popen(cmd, cwd=cwd, **kwargs) 184 | except FileNotFoundError: 185 | # We can also get here if the passed cwd= is invalid, so differentiate 186 | # between the two. Yes this is racy... see if I care. 187 | if cwd.exists(): 188 | cmd = cmd.split()[0] if isinstance(cmd, str) else cmd[0] 189 | logging.critical('Command not found: %s', cmd) 190 | else: 191 | logging.critical('Directory does not exist: %s', cwd) 192 | except NotADirectoryError: 193 | logging.critical('Path is not a directory: %s', cwd) 194 | 195 | return None 196 | 197 | def command_argv_to_string(cmd: Union[AnyStrOrPath,Iterable[AnyStrOrPath]]) -> str: 198 | '''Convert the given command, which can be str, bytes, Path, or an iterable 199 | containing any of those, to a shlex-escaped string. 200 | ''' 201 | if not isinstance(cmd, Iterable): 202 | return shlex_quote(str(cmd)) 203 | 204 | parts = [] 205 | for part in cmd: 206 | if isinstance(part, Path): 207 | parts.append(str(part)) 208 | elif isinstance(part, bytes): 209 | parts.append(part.decode()) 210 | else: 211 | parts.append(part) 212 | 213 | return shlex_join(parts) 214 | 215 | def run_command(cmd: Union[AnyStrOrPath,Iterable[AnyStrOrPath]], 216 | cwd: Optional[AnyStrOrPath]=None, stdin: Optional[AnyStr]=None, 217 | console_output: bool=False) -> int: 218 | '''Run the given command (cmd), optionally under the given working directory 219 | (cwd), optionally passing the given data to standard input (stdin), and 220 | optionally enabling console output. The returned value is the exit code of 221 | the command. 222 | ''' 223 | if HIGH_VERBOSITY: 224 | logging.debug('Running command: %s', command_argv_to_string(cmd)) 225 | 226 | if console_output: 227 | stdout, stderr = sys.stdout, sys.stderr 228 | else: 229 | stdout = stderr = DEVNULL 230 | 231 | child = do_popen(cmd, cwd=cwd, shell=isinstance(cmd, str), stdout=stdout, stderr=stderr) 232 | if child is None: 233 | return 127 234 | 235 | child.communicate(stdin) 236 | return child.returncode 237 | 238 | def ensure_command(cmd: Union[AnyStrOrPath,Iterable[AnyStrOrPath]], 239 | cwd: Optional[AnyStrOrPath]=None, stdin: Optional[AnyStr]=None, 240 | capture_stdout: bool=True, console_output: bool=False) -> AnyStr: 241 | '''Run the given command (cmd), optionally under the given working directory 242 | (cwd), optionally passing the given data to standard input (stdin), 243 | capturing and returning its standard output (if capture_stdout=True) and 244 | optionally enabling console output (if console_output=True). 245 | 246 | If the given command is not found or exits with a non-zero exit code, the 247 | standard error produced by the command is loggged and the caller is 248 | terminated by means of sys.exit(). 249 | ''' 250 | # console_output implies not capture_stdout 251 | assert not console_output or not capture_stdout 252 | 253 | if HIGH_VERBOSITY: 254 | logging.debug('Running command: %s', command_argv_to_string(cmd)) 255 | 256 | if console_output: 257 | stdout, stderr = sys.stdout, sys.stderr 258 | else: 259 | stdout = PIPE if capture_stdout else DEVNULL 260 | stderr = PIPE 261 | 262 | child = do_popen(cmd, cwd=cwd, shell=isinstance(cmd, str), stdout=stdout, stderr=stderr, text=True) 263 | if child is None: 264 | sys.exit(127) 265 | 266 | out, err = child.communicate(stdin) 267 | 268 | if child.returncode != 0: 269 | if stderr == PIPE: 270 | err = ('\n' + indent(err, '\t')) if err.strip() else ' (no stderr output)' 271 | else: 272 | err = '' 273 | 274 | logging.critical('Command returned %d: %s%s', child.returncode, cmd, err) 275 | sys.exit(1) 276 | 277 | return out 278 | 279 | def command_available(name: AnyStr) -> bool: 280 | '''Wrapper for shutil.which to determine whether a command is available or 281 | not (i.e., whether it is under the current PATH paths) given its name. 282 | ''' 283 | return which(name) is not None 284 | 285 | def gcc_version(gcc_cmd: AnyStr) -> str: 286 | '''Run GCC to get its version and return it as a string. Execution will be 287 | aborted if the given GCC command is not found. 288 | ''' 289 | return ensure_command((gcc_cmd, '--version')).splitlines()[0].strip() 290 | 291 | def git_checkout(repo_dir: Union[AnyStr,Path], ref: AnyStr): 292 | '''Run git checkout inside repo_dir to check out to the given ref. Execution 293 | will be aborted if git is not found or errors out. 294 | ''' 295 | ensure_command(('git', 'checkout', ref), cwd=repo_dir, capture_stdout=False) 296 | 297 | def format_duration(s: float) -> str: 298 | '''Convert a duration in seconds to a human readable string specifying 299 | hours, minutes and seconds. 300 | ''' 301 | s = round(s) 302 | h = s // 3600 303 | s %= 3600 304 | m = s // 60 305 | s %= 60 306 | 307 | if h > 0: 308 | return f'{h}h {m:02d}m {s:02d}s' 309 | if m > 0: 310 | return f'{m}m {s:02d}s' 311 | return f'{s}s' 312 | -------------------------------------------------------------------------------- /src/systrack/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.7' 2 | VERSION_COPY = '''\ 3 | Copyright (C) 2023-2025 Marco Bonelli 4 | Licensed under the GNU General Public License v3.0 5 | ''' 6 | VERSION_HELP = f'''\ 7 | Systrack version {VERSION} 8 | {VERSION_COPY}\ 9 | ''' 10 | 11 | if __name__ == '__main__': 12 | print(VERSION) 13 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mebeim/systrack/e45f94d06d39f162b0939f1f45b4913d37609dc6/tests/__init__.py -------------------------------------------------------------------------------- /tests/data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !Makefile 4 | !*.s 5 | -------------------------------------------------------------------------------- /tests/data/Makefile: -------------------------------------------------------------------------------- 1 | ASMS = $(wildcard *.s) 2 | BINS = $(ASMS:.s=) 3 | 4 | .PHONY: all clean 5 | all: $(BINS) 6 | 7 | # Need to link because GNU AS generates relocations for the call insns. 8 | # OTOH clang is generates a .o w/o relocations for call insns, might consider 9 | # using it in the future. 10 | %: %.s 11 | $(CC) -ffreestanding -nostdlib -o $@ $@.s 12 | 13 | clean: 14 | rm -f $(BINS) 15 | -------------------------------------------------------------------------------- /tests/test_mips.py: -------------------------------------------------------------------------------- 1 | from systrack.arch import ArchMips 2 | 3 | from .utils import * 4 | 5 | 6 | def test_dummy_syscall_64(): 7 | for abi in ('n64', 'n32', 'o32'): 8 | # Big-endian 9 | assert arch_is_dummy_syscall(ArchMips((6,9), abi, False), True, 10 | bytes.fromhex( 11 | '03e00008' # jr ra 12 | '2402ffa7' # li v0,-89 (-ENOSYS) 13 | ) 14 | ) 15 | 16 | # Little-endian 17 | assert arch_is_dummy_syscall(ArchMips((6,9), abi, False), False, 18 | bytes.fromhex( 19 | '0800e003' # jr ra 20 | 'eaff0224' # li v0,-22 (-EINVAL) 21 | ) 22 | ) 23 | -------------------------------------------------------------------------------- /tests/test_powerpc.py: -------------------------------------------------------------------------------- 1 | from systrack.arch import ArchPowerPC 2 | 3 | from .utils import * 4 | 5 | 6 | def test_dummy_syscall_simple(): 7 | assert arch_is_dummy_syscall(ArchPowerPC((6,8), 'ppc64', False), True, 8 | bytes.fromhex( 9 | '38 60 ff da' # li r3,-38 10 | '4e 80 00 20' # blr 11 | ) 12 | ) 13 | 14 | assert arch_is_dummy_syscall(ArchPowerPC((6,8), 'ppc32', True), True, 15 | bytes.fromhex( 16 | '94 21 ff f0' # stwu r1,-16(r1) 17 | '38 60 ff da' # li r3,-38 18 | '38 21 00 10' # addi r1,r1,16 19 | '4e 80 00 20' # blr 20 | ) 21 | ) 22 | 23 | assert not arch_is_dummy_syscall(ArchPowerPC((6,8), 'ppc32', True), True, 24 | bytes.fromhex( 25 | '94 21 ff f0' # stwu r1,-16(r1) 26 | '38 60 ff 00' # li r3,-256 27 | '38 21 00 10' # addi r1,r1,16 28 | '4e 80 00 20' # blr 29 | ) 30 | ) 31 | 32 | 33 | def test_dummy_syscall_64(): 34 | assert arch_is_dummy_syscall(ArchPowerPC((5,0), 'ppc64', False), True, 35 | # <.sys_ni_syscall>: (64-bit, v5.0) 36 | bytes.fromhex( 37 | '7c 08 02 a6' # mflr r0 38 | 'f8 01 00 10' # std r0,16(r1) 39 | 'f8 21 ff 91' # stdu r1,-112(r1) 40 | '4b ee 47 45' # bl c0000000000707e0 <._mcount> 41 | '60 00 00 00' # nop 42 | '38 21 00 70' # addi r1,r1,112 43 | '38 60 ff da' # li r3,-38 44 | 'e8 01 00 10' # ld r0,16(r1) 45 | '7c 08 03 a6' # mtlr r0 46 | '4e 80 00 20' # blr 47 | '60 00 00 00' # nop 48 | '60 00 00 00' # nop 49 | ) 50 | ) 51 | 52 | 53 | def test_dummy_syscall_32(): 54 | assert arch_is_dummy_syscall(ArchPowerPC((6,8), 'ppc32', True), True, 55 | bytes.fromhex( 56 | # : (32-bit, v6.8) 57 | '7c 08 02 a6' # mflr r0 58 | '90 01 00 04' # stw r0,4(r1) 59 | '4b f9 20 11' # bl c0039860 <_mcount> 60 | '94 21 ff f0' # stwu r1,-16(r1) 61 | '38 60 ff da' # li r3,-38 62 | '38 21 00 10' # addi r1,r1,16 63 | '4e 80 00 20' # blr 64 | ) 65 | ) 66 | 67 | 68 | def test_esoteric_fast_endian_switch_simple(): 69 | sym = Symbol(0x0, 0x0, 0x0, 'NOTYPE', 'exc_real_0xc00_system_call') 70 | elf = MockELF(True, {sym: bytes.fromhex( 71 | # Minimal code example that should match 72 | '2c 20 1e be' # cmpdi r0,7870 73 | '41 c2 00 04' # beq- X 74 | '7d 9b 02 a6' # X: mfsrr1 r12 75 | '69 8c 00 01' # xori r12,r12,1 76 | '7d 9b 03 a6' # mtsrr1 r12 77 | '4c 00 00 24' # rfid 78 | )}) 79 | 80 | # Should only be available for 64-bit ppc64 ABI 81 | assert not ArchPowerPC((6,8), 'ppc32', True).extract_esoteric_syscalls(elf) 82 | assert not ArchPowerPC((6,8), 'ppc32', False).extract_esoteric_syscalls(elf) 83 | assert not ArchPowerPC((6,8), 'spu', False).extract_esoteric_syscalls(elf) 84 | 85 | arch = ArchPowerPC((6,8), 'ppc64', False) 86 | res = arch.extract_esoteric_syscalls(elf) 87 | assert res and res[0][:3] == (7870, 'switch_endian', sym.name) 88 | 89 | # Also test beq+, and beq (2 encodings) 90 | for beq in (0x41e2, 0x4182, 0x41a2): 91 | code = elf.symbols_code[sym] 92 | code = code[:4] + beq.to_bytes(2, 'big') + code[6:] 93 | elf.symbols_code[sym] = code 94 | 95 | res = arch.extract_esoteric_syscalls(elf) 96 | assert res and res[0][:3] == (7870, 'switch_endian', sym.name) 97 | 98 | 99 | def test_esoteric_fast_endian_switch_real(): 100 | sym = Symbol(0x0, 0x0, 0x0, 'NOTYPE', 'exc_real_0xc00_system_call') 101 | elf = MockELF(True, {sym: bytes.fromhex( 102 | # : (64-bit, v6.8) 103 | '7d a9 03 a6' # mtctr r13 104 | '7d b1 42 a6' # mfsprg r13,1 105 | 'f9 4d 00 88' # std r10,136(r13) 106 | '60 00 00 00' # nop 107 | '60 00 00 00' # nop 108 | '60 00 00 00' # nop 109 | '60 00 00 00' # nop 110 | '60 00 00 00' # nop 111 | '60 00 00 00' # nop 112 | '89 4d 0a b8' # lbz r10,2744(r13) 113 | '2c 0a 00 00' # cmpwi r10,0 114 | '39 40 0c 00' # li r10,3072 115 | '40 82 0d e0' # bne c000000000001a10 116 | '7d 29 02 a6' # mfctr r9 117 | '2c 20 1e be' # cmpdi r0,7870 118 | '41 c2 00 20' # beq- c000000000000c5c 119 | '7d 7a 02 a6' # mfsrr0 r11 120 | '7d 9b 02 a6' # mfsrr1 r12 121 | '7c 42 13 78' # mr r2,r2 122 | 'e9 4d 00 18' # ld r10,24(r13) 123 | '61 4a ca d0' # ori r10,r10,51920 124 | '7d 49 03 a6' # mtctr r10 125 | '4e 80 04 20' # bctr 126 | '7d 9b 02 a6' # mfsrr1 r12 127 | '69 8c 00 01' # xori r12,r12,1 128 | '7d 9b 03 a6' # mtsrr1 r12 129 | '7d 2d 4b 78' # mr r13,r9 130 | '60 00 00 00' # nop 131 | '60 00 00 00' # nop 132 | '60 00 00 00' # nop 133 | '60 00 00 00' # nop 134 | '60 00 00 00' # nop 135 | '60 00 00 00' # nop 136 | '60 00 00 00' # nop 137 | '60 00 00 00' # nop 138 | '60 00 00 00' # nop 139 | '4c 00 00 24' # rfid 140 | '48 00 14 ac' # b c000000000002140 141 | '48 00 00 00' # b c000000000000c98 (infinite loop) 142 | )}) 143 | 144 | # Should only be available for 64-bit ppc64 ABI 145 | assert not ArchPowerPC((6,8), 'ppc32', True).extract_esoteric_syscalls(elf) 146 | assert not ArchPowerPC((6,8), 'ppc32', False).extract_esoteric_syscalls(elf) 147 | assert not ArchPowerPC((6,8), 'spu', False).extract_esoteric_syscalls(elf) 148 | 149 | res = ArchPowerPC((6,8), 'ppc64', False).extract_esoteric_syscalls(elf) 150 | assert res and res[0][:3] == (7870, 'switch_endian', sym.name) 151 | -------------------------------------------------------------------------------- /tests/test_x86.py: -------------------------------------------------------------------------------- 1 | from systrack.arch import ArchX86 2 | from systrack.elf import ELF 3 | 4 | from .utils import * 5 | 6 | 7 | def test_x86_no_table_extract_syscall_vaddrs(): 8 | elf = ELF(make_test_elf('x86_no_table_syscall_handlers')) 9 | 10 | arch = ArchX86((6,11), 'x64') 11 | vaddrs = arch.extract_syscall_vaddrs(elf) 12 | assert len(vaddrs) == 358 13 | 14 | arch = ArchX86((6,11), 'x32') 15 | vaddrs = arch.extract_syscall_vaddrs(elf) 16 | assert len(vaddrs) == 358 17 | 18 | arch = ArchX86((6,11), 'ia32') 19 | vaddrs = arch.extract_syscall_vaddrs(elf) 20 | assert len(vaddrs) == 429 21 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from subprocess import check_call 3 | from typing import Dict, Union 4 | 5 | from systrack.arch import Arch 6 | from systrack.kernel import Syscall 7 | from systrack.elf import Symbol 8 | 9 | class MockELF: 10 | '''Mock ELF class to be used in place of the ELF class provided by Systrack 11 | for testing. 12 | ''' 13 | def __init__(self, big_endian: bool, symbols_with_code: Dict[Symbol,bytes]): 14 | self.big_endian = big_endian 15 | self.symbols_code = symbols_with_code 16 | self.symbols = {} 17 | 18 | for sym in symbols_with_code: 19 | self.symbols[sym.name] = sym 20 | 21 | def next_symbol(self, sym: Symbol) -> Union[Symbol,None]: 22 | return None 23 | 24 | def vaddr_read(self, vaddr: int, size: int) -> bytes: 25 | for sym in self.symbols_code: 26 | if sym.real_vaddr == vaddr: 27 | code = self.symbols_code[sym] 28 | return code.ljust(size, b'\x00') 29 | 30 | assert False, 'Bad call to mocked ELF.vaddr_read()' 31 | 32 | def read_symbol(self, sym: Union[str,Symbol]) -> bytes: 33 | if not isinstance(sym, Symbol): 34 | sym = self.symbols[sym] 35 | 36 | return self.vaddr_read(sym.real_vaddr, sym.size) 37 | 38 | def arch_is_dummy_syscall(arch: Arch, big_endian: bool, code: bytes) -> bool: 39 | sym = Symbol(0x0, 0x0, len(code), 'FUNC', 'test') 40 | sc = Syscall(0x0, 0x0, 'test', 'test', sym, None) 41 | elf = MockELF(big_endian, {sym: code}) 42 | return arch.is_dummy_syscall(sc, elf) 43 | 44 | def make_test_elf(name: str) -> Path: 45 | target = Path(__file__).parent / 'data' / name 46 | check_call(['make', '-C', target.parent, target.name]) 47 | return target 48 | --------------------------------------------------------------------------------