├── .github └── workflows │ └── CI.yml ├── .gitignore ├── README.md ├── black ├── build-vmlinux ├── ci ├── find-kallsyms ├── find_kallsyms.py ├── flake8 ├── ghidra-kallsyms.py ├── ida-kallsyms-import.py ├── ida-kallsyms.py ├── ida-like-import.py ├── ida_utils.py ├── ls-py0 ├── requirements.txt └── test ├── __init__.py ├── kallsyms-3.10.0-862.11.6.el7.x86_64.gz ├── kallsyms-4.16.3-301.fc28.s390x.gz ├── kallsyms-4.4.0-1085-raspi2.arm.gz ├── kallsyms-4.4.223.defcon2020.ooofs.i686.gz ├── kallsyms-4.4.223.defconfig.i686.gz ├── kallsyms-5.1.0.tasteless2019.tee.aarch64.gz ├── kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz ├── kallsyms-5.3.0.hitcon2019.poe.x86_64.gz ├── kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz ├── kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz ├── kallsyms-6.8.0-48-generic.noble.x86_64.gz └── test_find_kallsyms.py /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: ["master"] 4 | pull_request: 5 | branches: ["master"] 6 | 7 | name: Continuous integration 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Install packages 17 | run: sudo apt-get update && 18 | sudo apt-get install -y black flake8 python2 python3 jython pypy3 19 | 20 | - name: Check formatting 21 | run: ./black --check 22 | 23 | - name: Test python2 24 | run: python2 -m unittest discover 25 | 26 | - name: Test python3 27 | run: python2 -m unittest discover 28 | 29 | - name: Test jython 30 | run: jython -m unittest discover 31 | 32 | - name: Test pypy3 33 | run: pypy3 -m unittest discover 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | .idea/ 3 | .mypy_cache/ 4 | *.pyc 5 | __pycache__/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ida-kallsyms 2 | 3 | IDA script for parsing kallsyms. 4 | 5 | ## Usage 6 | 7 | * `git clone https://github.com/mephi42/ida-kallsyms.git` 8 | * Open the kernel in IDA, let the autoanalysis finish. 9 | * From `File` → `Script file...` (Alt+F7 / Alt+F9) run 10 | `ida-kallsyms/ida-kallsyms.py` script. 11 | 12 | ## Usage with Ghidra 13 | 14 | * `git clone https://github.com/mephi42/ida-kallsyms.git` 15 | * Open the kernel in Ghidra, let the autoanalysis finish. 16 | * Go to `Window` → `Script manager`. 17 | * Once: press `Script Directories` button and add `ida-kallsyms`. 18 | * In `Filter` edit box, type `kallsyms`. 19 | * Double-click `ghidra-kallsyms.py` and wait. 20 | 21 | ## Stand-alone usage 22 | 23 | * `git clone https://github.com/mephi42/ida-kallsyms.git` 24 | * `ida-kallsyms/find-kallsyms vmlinux >vmlinux.kallsyms` 25 | * The resulting `vmlinux.kallsyms` file can be imported into IDA using 26 | `ida-kallsyms-import.py` script. 27 | 28 | # build-vmlinux 29 | 30 | Script for obtaining function signatures and struct layouts. Works by building 31 | a Linux Kernel that is similar to the one being analyzed and extracting debug 32 | information from it. 33 | 34 | ## Usage 35 | 36 | * Load kallsyms into IDA as described above. 37 | * `ida-kallsyms/build-vmlinux --like vmlinux` 38 | 39 | This will run for a while and generate `vmlinux.like.json` file. 40 | 41 | Check out `ida-kallsyms/build-vmlinux --help` in case you already have 42 | `binutils-gdb` / `gcc` / `linux` local git repos or a `.config` that 43 | matches `vmlinux`. 44 | * Import `vmlinux.like.json` into IDA using `ida-kallsyms/ida-like-import.py` 45 | script. 46 | * If there are import errors, check `vmlinux.like.json.log` file. 47 | -------------------------------------------------------------------------------- /black: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -u -x 3 | cd "$(dirname "$0")" 4 | ./ls-py0 | xargs -0 black "$@" 5 | -------------------------------------------------------------------------------- /build-vmlinux: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env pypy3 2 | import argparse 3 | from contextlib import contextmanager 4 | import io 5 | import json 6 | import logging 7 | from multiprocessing import cpu_count 8 | import os 9 | from pathlib import Path 10 | import re 11 | import shutil 12 | import struct 13 | import subprocess 14 | import tempfile 15 | from typing import Any, Dict, Generator, List, NamedTuple, Optional, Set, Tuple 16 | 17 | from elftools.dwarf.die import DIE # type: ignore 18 | from elftools.elf.elffile import ELFFile # type: ignore 19 | from kconfiglib import Kconfig # type: ignore 20 | 21 | from find_kallsyms import find_kallsyms_in_rodata 22 | 23 | 24 | def get_elf_host(header: Any) -> str: 25 | return { 26 | ("EM_386", "ELFCLASS32", "ELFDATA2LSB"): "i686-linux-gnu", 27 | ("EM_MIPS", "ELFCLASS32", "ELFDATA2MSB"): "mips-linux-gnu", 28 | ("EM_PPC", "ELFCLASS32", "ELFDATA2MSB"): "powerpc-linux-gnu", 29 | ("EM_PPC64", "ELFCLASS64", "ELFDATA2MSB"): "powerpc64-linux-gnu", 30 | ("EM_S390", "ELFCLASS64", "ELFDATA2MSB"): "s390x-linux-gnu", 31 | ("EM_ARM", "ELFCLASS32", "ELFDATA2LSB"): "arm-linux-gnueabi", 32 | ("EM_X86_64", "ELFCLASS64", "ELFDATA2LSB"): "x86_64-linux-gnu", 33 | ("EM_AARCH64", "ELFCLASS64", "ELFDATA2LSB"): "aarch64-linux-gnu", 34 | }[header.e_machine, header.e_ident.EI_CLASS, header.e_ident.EI_DATA] 35 | 36 | 37 | def get_pe_host(machine: int) -> str: 38 | return { 39 | 0x14C: "i686-linux-gnu", 40 | 0x1C0: "arm-linux-gnueabi", 41 | 0x8664: "x86_64-linux-gnu", 42 | 0xAA64: "aarch64-linux-gnu", 43 | }[machine] 44 | 45 | 46 | def get_linux_arch(host: str) -> str: 47 | return { 48 | "i686": "i386", 49 | "mips": "mips", 50 | "powerpc": "ppc", 51 | "powerpc64": "ppc64", 52 | "s390x": "s390x", 53 | "arm": "arm", 54 | "x86_64": "x86_64", 55 | "aarch64": "arm64", 56 | }[host[: host.index("-")]] 57 | 58 | 59 | def arch2srcarch(arch: str) -> str: 60 | if arch in ("i386", "x86_64"): 61 | return "x86" 62 | return arch 63 | 64 | 65 | def fetch_tag(git: Path, remote: str, tag: str) -> None: 66 | git.mkdir(parents=True, exist_ok=True) 67 | subprocess.check_call(["git", "init"], cwd=git) 68 | subprocess.check_call(["git", "fetch", remote, f"{tag}:{tag}"], cwd=git) 69 | 70 | 71 | def prepare_worktree(worktree: Path, git: Path, remote: str, tag: str) -> None: 72 | fetch_tag(git, remote, tag) 73 | try: 74 | shutil.rmtree(worktree) 75 | except FileNotFoundError: 76 | pass 77 | subprocess.check_call( 78 | ["git", "worktree", "add", "-f", "-f", "--detach", worktree, tag], 79 | cwd=git, 80 | ) 81 | 82 | 83 | def build_or_reuse_toolchain( 84 | binutils_git: Path, 85 | binutils_version: str, 86 | gcc_git: Path, 87 | gcc_version: str, 88 | host: str, 89 | ) -> Path: 90 | toolchain = f"{host}-toolchain-{binutils_version}-{gcc_version}" 91 | install = Path.cwd() / toolchain 92 | bin = install / "bin" 93 | if (bin / f"{host}-gcc").exists(): 94 | return bin 95 | worktree = Path.cwd() / f"{toolchain}-build" 96 | binutils_worktree = worktree / "binutils-gdb" 97 | prepare_worktree( 98 | worktree=binutils_worktree, 99 | git=binutils_git, 100 | remote="git://sourceware.org/git/binutils-gdb.git", 101 | tag="refs/tags/binutils-" + binutils_version.replace(".", "_"), 102 | ) 103 | subprocess.check_call( 104 | [ 105 | "./configure", 106 | f"--target={host}", 107 | "--disable-multilib", 108 | "--disable-nls", 109 | f"--prefix={install}", 110 | ], 111 | cwd=binutils_worktree, 112 | env={**os.environ, "CXXFLAGS": "-fpermissive"}, 113 | ) 114 | for target in ("all", "install"): 115 | subprocess.check_call( 116 | [ 117 | "make", 118 | f"-j{cpu_count()}", 119 | f"{target}-binutils", 120 | f"{target}-gas", 121 | f"{target}-ld", 122 | ], 123 | cwd=binutils_worktree, 124 | ) 125 | gcc_worktree = worktree / "gcc" 126 | prepare_worktree( 127 | worktree=gcc_worktree, 128 | git=gcc_git, 129 | remote="git://gcc.gnu.org/git/gcc.git", 130 | tag=f"refs/tags/releases/gcc-{gcc_version}", 131 | ) 132 | subprocess.check_call( 133 | [ 134 | "./configure", 135 | f"--target={host}", 136 | "--enable-languages=c", 137 | "--disable-bootstrap", 138 | "--disable-multilib", 139 | "--disable-nls", 140 | f"--prefix={install}", 141 | ], 142 | cwd=gcc_worktree, 143 | env={**os.environ, "CXXFLAGS": "-fpermissive"}, 144 | ) 145 | for target in ("all", "install"): 146 | subprocess.check_call( 147 | [ 148 | "make", 149 | f"-j{cpu_count()}", 150 | f"{target}-gcc", 151 | ], 152 | cwd=gcc_worktree, 153 | ) 154 | shutil.rmtree(worktree) 155 | return bin 156 | 157 | 158 | def putenv(name: str, value: Optional[str]) -> None: 159 | if value is None: 160 | try: 161 | del os.environ[name] 162 | except KeyError: 163 | pass 164 | else: 165 | os.environ[name] = value 166 | 167 | 168 | @contextmanager 169 | def env(tmp_env: Dict[str, str]) -> Generator[None, None, None]: 170 | orig_env = {k: os.environ.get(k) for k in tmp_env.values()} 171 | try: 172 | os.environ.update(tmp_env) 173 | yield 174 | finally: 175 | for k, v in orig_env.items(): 176 | putenv(k, v) 177 | 178 | 179 | def cc_version_text(exe): 180 | p = subprocess.Popen( 181 | [exe, "--version"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL 182 | ) 183 | try: 184 | return p.stdout.readline().decode().strip() 185 | finally: 186 | while p.stdout.read(8192) != b"": 187 | pass 188 | if p.wait() != 0: 189 | raise subprocess.CalledProcessError(p.returncode, p.args) 190 | 191 | 192 | def build_or_reuse_vmlinux( 193 | output: Optional[Path], 194 | git: Path, 195 | host: str, 196 | version: str, 197 | linux_config: Optional[Path], 198 | toolchain_bin: Path, 199 | template_vmlinux: Optional[Path], 200 | ) -> Path: 201 | arch = get_linux_arch(host) 202 | srcarch = arch2srcarch(arch) 203 | if output is None: 204 | output = Path(f"vmlinux-{arch}-{version}") 205 | if output.exists(): 206 | return output 207 | worktree = Path.cwd() / f"{arch}-linux-{version}-build" 208 | prepare_worktree( 209 | worktree=worktree, 210 | git=git, 211 | remote="git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git", # noqa: E501 212 | tag=f"refs/tags/v{version}", 213 | ) 214 | try: 215 | subprocess.call( 216 | args=["ccache", "--version"], 217 | stdin=subprocess.DEVNULL, 218 | stdout=subprocess.DEVNULL, 219 | stderr=subprocess.DEVNULL, 220 | ) 221 | except subprocess.CalledProcessError: 222 | ccache = "" 223 | else: 224 | ccache = "ccache " 225 | env_path = str(toolchain_bin) + os.pathsep + os.environ["PATH"] 226 | 227 | def make(args: List[str]) -> None: 228 | args = [ 229 | "make", 230 | f"ARCH={arch}", 231 | f"CROSS_COMPILE={ccache}{host}-", 232 | f"-j{cpu_count()}", 233 | ] + args 234 | subprocess.check_call(args, cwd=worktree, env={**os.environ, "PATH": env_path}) 235 | 236 | worktree_config = worktree / ".config" 237 | if linux_config is None: 238 | if template_vmlinux is None: 239 | need_defconfig = True 240 | else: 241 | with open(worktree_config, "wb") as fp: 242 | returncode = subprocess.call( 243 | [ 244 | worktree / "scripts" / "extract-ikconfig", 245 | template_vmlinux, 246 | ], 247 | stdout=fp, 248 | ) 249 | if returncode == 0: 250 | logging.info("Extracted: %s", worktree_config) 251 | make(["olddefconfig"]) 252 | need_defconfig = False 253 | else: 254 | need_defconfig = True 255 | if need_defconfig: 256 | make(["defconfig"]) 257 | cc = f"{host}-gcc" 258 | ld = f"{host}-ld" 259 | with env( 260 | { 261 | "srctree": str(worktree), 262 | "ARCH": arch, 263 | "SRCARCH": srcarch, 264 | "KERNELVERSION": version, 265 | "CC": cc, 266 | "HOSTCC": "gcc", 267 | "HOSTCXX": "g++", 268 | "CC_VERSION_TEXT": cc_version_text(toolchain_bin / cc), 269 | "PATH": env_path, 270 | "LD": ld, 271 | } 272 | ): 273 | kconf = Kconfig() 274 | kconf.load_config(worktree_config) 275 | kconf.syms["DEBUG_INFO"].set_value("y") 276 | kconf.write_config(worktree_config) 277 | else: 278 | shutil.copyfile(linux_config, worktree_config) 279 | make(["olddefconfig"]) 280 | make(["vmlinux"]) 281 | with tempfile.TemporaryDirectory(dir=output.parent) as tempdir: 282 | vmlinux_tmp = Path(tempdir) / "vmlinux" 283 | vmlinux_tmp.symlink_to(worktree / "vmlinux") 284 | vmlinux_tmp.rename(output) 285 | return output 286 | 287 | 288 | def convert_name(die: DIE) -> Optional[str]: 289 | name = die.attributes.get("DW_AT_name") 290 | if name is None: 291 | return None 292 | return name.value.decode() 293 | 294 | 295 | def convert_type(die: DIE) -> Optional[int]: 296 | type = die.attributes.get("DW_AT_type") 297 | if type is None: 298 | return None 299 | return die.cu.cu_offset + type.raw_value 300 | 301 | 302 | class Member(NamedTuple): 303 | type: int 304 | name: Optional[str] 305 | offset: int 306 | 307 | 308 | def convert_member(die: DIE) -> Optional[Member]: 309 | offset_attr = die.attributes.get("DW_AT_data_member_location") 310 | if offset_attr is None: 311 | offset = 0 312 | else: 313 | offset = offset_attr.value 314 | type = convert_type(die) 315 | if type is None: 316 | return None 317 | return Member( 318 | type=type, 319 | name=convert_name(die), 320 | offset=offset, 321 | ) 322 | 323 | 324 | class Struct(NamedTuple): 325 | kind: str 326 | name: Optional[str] 327 | size: int 328 | members: List[Member] 329 | 330 | 331 | def convert_struct(die: DIE) -> Optional[Struct]: 332 | if "DW_AT_declaration" in die.attributes: 333 | return None 334 | members = [] 335 | for child_die in die.iter_children(): 336 | if child_die.tag == "DW_TAG_member": 337 | if ( 338 | "DW_AT_bit_size" in child_die.attributes 339 | or "DW_AT_bit_offset" in child_die.attributes 340 | ): 341 | continue 342 | member = convert_member(child_die) 343 | if member is None: 344 | return None 345 | members.append(member) 346 | return Struct( 347 | kind="struct" if die.tag == "DW_TAG_structure_type" else "union", 348 | name=convert_name(die), 349 | size=die.attributes["DW_AT_byte_size"].value, 350 | members=members, 351 | ) 352 | 353 | 354 | class Typedef(NamedTuple): 355 | kind: str 356 | name: str 357 | type: int 358 | 359 | 360 | def convert_typedef(die: DIE) -> Optional[Typedef]: 361 | name = convert_name(die) 362 | if name is None: 363 | return None 364 | type = convert_type(die) 365 | if type is None: 366 | return None 367 | return Typedef( 368 | kind="typedef", 369 | name=name, 370 | type=type, 371 | ) 372 | 373 | 374 | class Pointer(NamedTuple): 375 | kind: str 376 | type: Optional[int] 377 | 378 | 379 | def convert_pointer(die: DIE) -> Optional[Pointer]: 380 | return Pointer( 381 | kind="pointer", 382 | type=convert_type(die), 383 | ) 384 | 385 | 386 | class Int(NamedTuple): 387 | kind: str 388 | is_signed: bool 389 | size: int 390 | 391 | 392 | def convert_int(die: DIE) -> Optional[Int]: 393 | if "DW_AT_declaration" in die.attributes: 394 | return None 395 | return Int( 396 | kind="base", 397 | is_signed={ 398 | 2: False, # boolean 399 | 4: False, # float 400 | 5: True, # signed 401 | 6: True, # signed char 402 | 7: False, # unsigned 403 | 8: False, # unsigned char 404 | }[die.attributes["DW_AT_encoding"].value], 405 | size=die.attributes["DW_AT_byte_size"].value, 406 | ) 407 | 408 | 409 | class Qualified(NamedTuple): 410 | kind: str 411 | type: int 412 | 413 | 414 | def convert_qualified(die: DIE) -> Optional[Qualified]: 415 | type = convert_type(die) 416 | if type is None: 417 | return None 418 | return Qualified( 419 | kind={ 420 | "DW_TAG_const_type": "const", 421 | "DW_TAG_volatile_type": "volatile", 422 | }[die.tag], 423 | type=type, 424 | ) 425 | 426 | 427 | class Array(NamedTuple): 428 | kind: str 429 | type: int 430 | size: int 431 | 432 | 433 | def convert_array(die: DIE) -> Optional[Array]: 434 | type = convert_type(die) 435 | if type is None: 436 | return None 437 | size = None 438 | for child_die in die.iter_children(): 439 | if child_die.tag != "DW_TAG_subrange_type": 440 | return None 441 | if size is not None: 442 | return None 443 | if "DW_AT_lower_bound" in child_die.attributes: 444 | return None 445 | upper_bound = child_die.attributes.get("DW_AT_upper_bound") 446 | if upper_bound is None: 447 | return None 448 | size = upper_bound.value 449 | if size is None: 450 | return None 451 | return Array( 452 | kind="array", 453 | type=type, 454 | size=size, 455 | ) 456 | 457 | 458 | class Parameter(NamedTuple): 459 | type: int 460 | name: str 461 | 462 | 463 | def convert_parameter(die: DIE) -> Optional[Parameter]: 464 | type = convert_type(die) 465 | if type is None: 466 | return None 467 | name = convert_name(die) 468 | if name is None: 469 | return None 470 | return Parameter( 471 | type=type, 472 | name=name, 473 | ) 474 | 475 | 476 | class Subprogram(NamedTuple): 477 | kind: str 478 | return_type: Optional[int] 479 | name: str 480 | parameters: List[Parameter] 481 | has_varargs: bool 482 | 483 | 484 | def convert_subprogram(die: DIE, kallsyms_set: Set[str]) -> Optional[Subprogram]: 485 | if "DW_AT_inline" in die.attributes or "DW_AT_declaration" in die.attributes: 486 | return None 487 | return_type = convert_type(die) 488 | if return_type is None: 489 | return None 490 | name = convert_name(die) 491 | if name is None or name not in kallsyms_set: 492 | return None 493 | parameters = [] 494 | has_varargs = False 495 | for child_die in die.iter_children(): 496 | if child_die.tag == "DW_TAG_unspecified_parameters": 497 | has_varargs = True 498 | continue 499 | if child_die.tag != "DW_TAG_formal_parameter": 500 | continue 501 | parameter = convert_parameter(child_die) 502 | if parameter is None: 503 | return None 504 | parameters.append(parameter) 505 | return Subprogram( 506 | kind="subprogram", 507 | return_type=return_type, 508 | name=name, 509 | parameters=parameters, 510 | has_varargs=has_varargs, 511 | ) 512 | 513 | 514 | def extract_debug_info(output: Path, vmlinux: Path, kallsyms: List[str]) -> None: 515 | # Mostly copied from 516 | # https://github.com/mephi42/linetrace-cmd-record/blob/9769e9505cb2/linetrace-cmd-record#L179 517 | # https://github.com/mephi42/linetrace-cmd-record/blob/9769e9505cb2/linetrace-cmd-record#L59 518 | logging.info("Extracting debug info...") 519 | kallsyms_set = {kallsym[1:] for kallsym in kallsyms} 520 | tag2convertor = { 521 | "DW_TAG_structure_type": convert_struct, 522 | "DW_TAG_union_type": convert_struct, 523 | "DW_TAG_typedef": convert_typedef, 524 | "DW_TAG_pointer_type": convert_pointer, 525 | "DW_TAG_base_type": convert_int, 526 | "DW_TAG_enumeration_type": convert_int, 527 | "DW_TAG_const_type": convert_qualified, 528 | "DW_TAG_volatile_type": convert_qualified, 529 | "DW_TAG_array_type": convert_array, 530 | "DW_TAG_subprogram": lambda die: convert_subprogram(die, kallsyms_set), 531 | } 532 | items: Dict[int, Any] = {} 533 | with open(vmlinux, "rb") as fp: 534 | elf = ELFFile(fp) 535 | dwarf = elf.get_dwarf_info(relocate_dwarf_sections=elf["e_type"] == "ET_REL") 536 | for cu in dwarf.iter_CUs(): 537 | for die in cu.get_top_DIE().iter_children(): 538 | convertor = tag2convertor.get(die.tag) 539 | if convertor is not None: 540 | item = convertor(die) 541 | if item is not None: 542 | items[die.offset] = item 543 | logging.info("Saving extracted debug info...") 544 | with open(output, "w") as output_fp: 545 | json.dump(items, output_fp, indent=4, separators=(",", ": ")) 546 | 547 | 548 | def detect_host(vmlinux_bytes: bytes) -> str: 549 | if vmlinux_bytes[:2] == b"MZ": 550 | (pe_offset,) = struct.unpack(" 0: 672 | output = Path(f"{args.like}.like.json") 673 | extract_debug_info( 674 | output=output, 675 | vmlinux=vmlinux, 676 | kallsyms=[kallsym for _, kallsym in kallsyms], 677 | ) 678 | logging.info("Extracted: %s", output) 679 | 680 | 681 | if __name__ == "__main__": 682 | main() 683 | -------------------------------------------------------------------------------- /ci: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -u -x 3 | cd "$(dirname "$0")" 4 | ./black 5 | ./flake8 6 | mypy build-vmlinux 7 | python2 -m unittest discover 8 | python3 -m unittest discover 9 | jython -m unittest discover 10 | pypy3 -m unittest discover 11 | -------------------------------------------------------------------------------- /find-kallsyms: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | 5 | from find_kallsyms import find_kallsyms_in_rodata 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--verbose", action="store_true") 11 | parser.add_argument("path") 12 | args = parser.parse_args() 13 | if args.verbose: 14 | logging.basicConfig(level=logging.DEBUG) 15 | with open(args.path, "rb") as fp: 16 | rodata = fp.read() 17 | for address, name in find_kallsyms_in_rodata(rodata): 18 | print("{:016X} {}".format(address, name)) 19 | 20 | 21 | if __name__ == "__main__": 22 | main() 23 | -------------------------------------------------------------------------------- /find_kallsyms.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import logging 3 | import struct 4 | 5 | 6 | def align_up(p, a): 7 | return p & ~(a - 1) 8 | 9 | 10 | def align(p, a): 11 | return (p + (a - 1)) & ~(a - 1) 12 | 13 | 14 | def try_parse_token_index(rodata, endianness, offset): 15 | index_fmt = endianness + "H" 16 | (index,) = struct.unpack(index_fmt, rodata[offset : offset + 2]) 17 | assert index == 0, "The first token index must be 0" 18 | indices = [index] 19 | for _ in range(255): 20 | offset += 2 21 | (index,) = struct.unpack(index_fmt, rodata[offset : offset + 2]) 22 | if index <= indices[-1]: 23 | return None # Token indices must be monotonically increasing. 24 | indices.append(index) 25 | return indices 26 | 27 | 28 | def find_token_indices(rodata, endianness): 29 | token_index_offset = 0 30 | while True: 31 | # kallsyms_token_index is an array of monotonically increasing 256 32 | # shorts, the first of which is 0. It is located right after 33 | # kallsyms_token_table, which is a sequence of null-terminated strings. 34 | # Therefore, look for 1+2 consecutive zeroes. 35 | token_index_offset = rodata.find(b"\x00\x00\x00", token_index_offset) + 1 36 | if token_index_offset == 0 or token_index_offset + 512 > len(rodata): 37 | break 38 | token_index = try_parse_token_index(rodata, endianness, token_index_offset) 39 | if token_index is not None: 40 | yield token_index_offset, token_index 41 | 42 | 43 | def try_parse_token_table(rodata, token_index, start_offset, end_offset): 44 | tokens = [] 45 | for i in range(256): 46 | token_start_offset = start_offset + token_index[i] 47 | if i == 255: 48 | # Last token ends at the end of the table. 49 | token_end_offset = end_offset 50 | else: 51 | # Other tokens end at the start of the next respective token. 52 | token_end_offset = start_offset + token_index[i + 1] 53 | token = rodata[token_start_offset:token_end_offset] 54 | if b"\x00" in token[:-1]: 55 | return None # Tokens must be printable. 56 | if token[-1:] != b"\x00": 57 | return None # Tokens must be null-terminated. 58 | if token[:-1] in tokens: 59 | return None # Tokens must not repeat 60 | tokens.append(token[:-1]) 61 | return tokens 62 | 63 | 64 | def find_token_tables(rodata, token_index, token_index_offset): 65 | # kallsyms_token_table is a sequence of 256 null-terminated strings. 66 | # There may be some padding at the end; skip it. 67 | token_table_end_offset = token_index_offset 68 | while rodata[token_table_end_offset - 2 : token_table_end_offset - 1] == b"\x00": 69 | token_table_end_offset -= 1 70 | # Find the last token by looking for a trailing \0. 71 | last_token_offset = rodata.rfind(b"\x00", 0, token_table_end_offset - 1) + 1 72 | if last_token_offset == 0: 73 | return 74 | # The last kallsyms_token_index element corresponds to the last token. 75 | # Use that information to locate kallsyms_token_table. 76 | token_table_offset = last_token_offset - token_index[-1] 77 | if token_table_offset < 0: 78 | return 79 | token_table = try_parse_token_table( 80 | rodata, token_index, token_table_offset, token_table_end_offset 81 | ) 82 | if token_table is not None: 83 | yield token_table_offset, token_table 84 | 85 | 86 | def find_markers(rodata, marker_fmt, marker_size, marker_offset): 87 | first = True 88 | markers = [] 89 | while True: 90 | # kallsyms_markers is an array of monotonically increasing offsets, 91 | # which starts with 0. It is aligned on an 8-byte boundary, so if the 92 | # element size is 4 bytes and their number is odd, it is zero-padded at 93 | # the end. 94 | (marker,) = struct.unpack( 95 | marker_fmt, rodata[marker_offset : marker_offset + marker_size] 96 | ) 97 | if first: 98 | first = False 99 | if marker == 0 and marker_size == 4: 100 | # Skip padding. 101 | marker_offset -= marker_size 102 | continue 103 | elif len(markers) > 0 and marker >= markers[-1]: 104 | # The array is not monotonically increasing. 105 | return 106 | markers.append(marker) 107 | if marker == 0: 108 | # We found the first element. 109 | break 110 | marker_offset -= marker_size 111 | if marker_size == 4 and len(markers) == 2: 112 | # Marker size must be 8 bytes, and we must be taking the upper 113 | # part, which is always 0, for the first marker. 114 | return 115 | markers.reverse() 116 | yield marker_offset, markers 117 | 118 | 119 | # Since v6.1 (commit b8a94bfb3395). 120 | KSYM_NAME_LEN = 512 121 | 122 | 123 | def is_name_ok(rodata, token_lengths, offset): 124 | n_tokens = ord(rodata[offset : offset + 1]) 125 | if n_tokens == 0 or n_tokens >= KSYM_NAME_LEN: 126 | # Tokens are at least one byte long. Names must not be empty, and they 127 | # must be at most KSYM_NAME_LEN-1 characters long. 128 | return False 129 | offset += 1 130 | name_length = 0 131 | for _ in range(n_tokens): 132 | # The caller is expected to have verified that the name entry does not 133 | # span past the end of kallsyms_names, so just fetch the next token. 134 | name_length += token_lengths[ord(rodata[offset : offset + 1])] 135 | if name_length >= KSYM_NAME_LEN: 136 | # Name is longer than KSYM_NAME_LEN-1 characters. 137 | return False 138 | offset += 1 139 | return True 140 | 141 | 142 | def extract_name(rodata, token_table, offset): 143 | # Name must have already been checked, just expand tokens. 144 | n_tokens = ord(rodata[offset : offset + 1]) 145 | name = b"" 146 | for _ in range(n_tokens): 147 | offset += 1 148 | name += token_table[ord(rodata[offset : offset + 1])] 149 | return name 150 | 151 | 152 | def find_num_syms(rodata, endianness, token_table, markers_offset): 153 | # kallsyms_names is a sequence of length-prefixed entries ending with 154 | # padding to an 8-byte boundary, followed by kallsyms_markers. 155 | # Unfortunately, some guesswork is required to locate the start of 156 | # kallsyms_names given that we know the start of kallsyms_markers. 157 | num_syms_fmt = endianness + "I" 158 | token_lengths = [len(token) for token in token_table] 159 | # Indexed by (markers_offset - offset). Each element is a number of name 160 | # entries that follow the respective offset, or None if that offset is not 161 | # a start of a valid name entry. 162 | name_counts = [0] 163 | # Whether offset still points to one of the trailing zeroes. 164 | trailing_zeroes = True 165 | offset = markers_offset 166 | while offset >= 9: 167 | offset -= 1 168 | current_byte = ord(rodata[offset : offset + 1]) 169 | if current_byte != 0: 170 | # Trailing zeroes have ended. 171 | trailing_zeroes = False 172 | next_name_offset = offset + current_byte + 1 173 | if next_name_offset > markers_offset: 174 | # The current name entry spans past the end of kallsyms_names. This 175 | # is allowed if we are still looking at trailing zeroes. 176 | name_counts.append(0 if trailing_zeroes else None) 177 | continue 178 | next_name_count = name_counts[markers_offset - next_name_offset] 179 | if next_name_count is None: 180 | # The next name entry is invalid, which means the current name 181 | # entry cannot be valid. 182 | name_counts.append(None) 183 | continue 184 | if is_name_ok(rodata, token_lengths, offset): 185 | # The current name entry is valid. Check whether it is preceded by 186 | # kallsyms_num_syms value, which is consistent with the number of 187 | # name entries we've seen so far. 188 | name_counts.append(next_name_count + 1) 189 | num_syms_offset = None 190 | # How kallsyms_num_syms is aligned depends on a particular kernel, 191 | # so try different offsets. 192 | for i in (-4, -8, -12, -16): 193 | (num_syms,) = struct.unpack( 194 | num_syms_fmt, rodata[offset + i : offset + i + 4] 195 | ) 196 | if name_counts[-1] == num_syms: 197 | num_syms_offset = offset + i 198 | break 199 | if num_syms != 0: 200 | break 201 | if num_syms_offset is not None: 202 | break 203 | else: 204 | # The current name entry is not valid. This is allowed if we are 205 | # still looking at trailing zeroes. 206 | name_counts.append(0 if trailing_zeroes else None) 207 | else: 208 | return 209 | # We've found kallsyms_names, now parse it. 210 | names = [] 211 | for _ in range(name_counts[-1]): 212 | names.append(extract_name(rodata, token_table, offset).decode()) 213 | offset += ord(rodata[offset : offset + 1]) + 1 214 | yield num_syms_offset, names 215 | 216 | 217 | Word = namedtuple("Word", ("size", "fmt", "ctype")) 218 | WORD32 = Word(4, "I", "u32") 219 | WORD64 = Word(8, "Q", "u64") 220 | 221 | 222 | def find_addresses_no_kallsyms_base_relative( 223 | rodata, endianness, addresses_offset, num_syms, word 224 | ): 225 | if addresses_offset + num_syms * word.size > len(rodata): 226 | return 227 | address_fmt = endianness + word.fmt 228 | if word.size == 8 and addresses_offset % 8 != 0: 229 | addresses_offset -= 4 230 | offset = addresses_offset 231 | addresses = [] 232 | for _ in range(num_syms): 233 | (address,) = struct.unpack(address_fmt, rodata[offset : offset + word.size]) 234 | if len(addresses) > 0 and address < addresses[-1]: 235 | # The resulting addresses are not sorted. 236 | return 237 | addresses.append(address) 238 | offset += word.size 239 | logging.debug( 240 | "0x%08X: %s kallsyms_addresses[]", 241 | addresses_offset, 242 | word.ctype, 243 | ) 244 | yield addresses_offset, offset, addresses 245 | 246 | 247 | def find_addresses_kallsyms_base_relative( 248 | rodata, endianness, addresses_offset, num_syms, word 249 | ): 250 | if addresses_offset < 0: 251 | return 252 | addresses_end = addresses_offset + num_syms * 4 253 | relative_base_offset = align(addresses_end, word.size) 254 | relative_base_end = relative_base_offset + word.size 255 | if relative_base_end > len(rodata): 256 | return 257 | raw_addresses = struct.unpack( 258 | endianness + "i" * num_syms, rodata[addresses_offset:addresses_end] 259 | ) 260 | (kallsyms_relative_base,) = struct.unpack( 261 | endianness + word.fmt, 262 | rodata[relative_base_offset:relative_base_end], 263 | ) 264 | 265 | def log_ok(): 266 | logging.debug( 267 | "0x%08X: %s kallsyms_relative_base=0x%016X", 268 | relative_base_offset, 269 | word.ctype, 270 | kallsyms_relative_base, 271 | ) 272 | logging.debug("0x%08X: u32 kallsyms_offsets[]", addresses_offset) 273 | 274 | # Try !KALLSYMS_ABSOLUTE_PERCPU first. 275 | # A lot of small nonnegative numbers will match KALLSYMS_ABSOLUTE_PERCPU 276 | # too, but it's more likely to be !KALLSYMS_ABSOLUTE_PERCPU. 277 | addresses = [] 278 | for raw in raw_addresses: 279 | address = kallsyms_relative_base + (raw & 0xFFFFFFFF) 280 | if len(addresses) > 0 and address < addresses[-1]: 281 | # The resulting addresses are not sorted. 282 | break 283 | addresses.append(address) 284 | else: 285 | log_ok() 286 | yield addresses_offset, addresses_end, addresses 287 | 288 | # Try KALLSYMS_ABSOLUTE_PERCPU. 289 | addresses = [] 290 | for raw in raw_addresses: 291 | if raw >= 0: 292 | address = raw 293 | else: 294 | address = kallsyms_relative_base - 1 - raw 295 | if len(addresses) > 0 and address < addresses[-1]: 296 | # The resulting addresses are not sorted. 297 | break 298 | addresses.append(address) 299 | else: 300 | log_ok() 301 | yield addresses_offset, relative_base_end, addresses 302 | 303 | 304 | def find_kallsyms_in_rodata(rodata): 305 | for addresses, names in ( 306 | (addresses, names) 307 | for endianness in ("<", ">") 308 | for _ in (logging.debug("Endianness: %s", endianness),) 309 | for token_index_offset, token_index in find_token_indices(rodata, endianness) 310 | for _ in ( 311 | logging.debug( 312 | "0x%08X: kallsyms_token_index=%s", token_index_offset, token_index 313 | ), 314 | ) 315 | for token_table_offset, token_table in find_token_tables( 316 | rodata, token_index, token_index_offset 317 | ) 318 | for _ in ( 319 | logging.debug( 320 | "0x%08X: kallsyms_token_table=%s", token_table_offset, token_table 321 | ), 322 | ) 323 | # In 6.2 (commits 60443c88f3a8 and 19bd8981dc2e) kallsyms_seqs_of_names 324 | # was added between kallsyms_markers and kallsyms_token_table. 325 | for markers_end_offset in range(token_table_offset, -4, -4) 326 | # In 4.20 the size of markers was reduced to 4 bytes. 327 | for marker_fmt, marker_size in ( 328 | (endianness + "I", 4), 329 | (endianness + "Q", 8), 330 | ) 331 | for markers_offset, markers in find_markers( 332 | rodata, marker_fmt, marker_size, markers_end_offset - marker_size 333 | ) 334 | for _ in ( 335 | logging.debug("0x%08X: kallsyms_markers=%s", markers_offset, markers), 336 | ) 337 | for num_syms_offset, names in find_num_syms( 338 | rodata, endianness, token_table, markers_offset 339 | ) 340 | for _ in ( 341 | logging.debug("0x%08X: kallsyms_num_syms=%s", num_syms_offset, len(names)), 342 | ) 343 | for word in (WORD64, WORD32) 344 | for _ in (logging.debug("WORD%d", word.size),) 345 | # KALLSYMS_BASE_RELATIVE means that kallsyms_offsets are followed by 346 | # kallsyms_relative_base. This was introduced in 4.6 by commit 347 | # 2213e9a66bb8. 348 | for base_relative in (False, True) 349 | for _ in ( 350 | logging.debug("KALLSYMS_BASE_RELATIVE=%s", "y" if base_relative else "n"), 351 | ) 352 | # Since kernel 6.4 (commit 404bad70fcf7) kallsyms_addresses come after 353 | # kallsyms_token_index. 354 | for addresses_first in (True, False) 355 | for _ in (logging.debug("addresses_first=%s", addresses_first),) 356 | for addresses_offset, addresses_end, addresses in ( 357 | find_addresses_kallsyms_base_relative( 358 | rodata, 359 | endianness, 360 | ( 361 | align_up( 362 | align_up(num_syms_offset, word.size) 363 | - word.size 364 | - len(names) * 4, 365 | word.size, 366 | ) 367 | if addresses_first 368 | else align(token_index_offset + 512, word.size) 369 | ), 370 | len(names), 371 | word, 372 | ) 373 | if base_relative 374 | else find_addresses_no_kallsyms_base_relative( 375 | rodata, 376 | endianness, 377 | ( 378 | num_syms_offset - len(names) * word.size 379 | if addresses_first 380 | else token_index_offset + 512 381 | ), 382 | len(names), 383 | word, 384 | ) 385 | ) 386 | for _ in ( 387 | logging.debug( 388 | "0x%08X: kallsyms[0x%08X]", 389 | addresses_offset if addresses_first else num_syms_offset, 390 | ( 391 | token_index_offset + 512 - addresses_offset 392 | if addresses_first 393 | else ( 394 | align(addresses_end, word.size) 395 | + len(names) * 3 396 | - num_syms_offset 397 | ) 398 | ), 399 | ), 400 | ) 401 | ): 402 | return zip(addresses, names) 403 | return [] 404 | -------------------------------------------------------------------------------- /flake8: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -u -x 3 | cd "$(dirname "$0")" 4 | ./ls-py0 | xargs -0 flake8 --extend-ignore=E203 --max-line-length=88 "$@" 5 | -------------------------------------------------------------------------------- /ghidra-kallsyms.py: -------------------------------------------------------------------------------- 1 | # Add or rename symbols based on kallsyms 2 | import jarray 3 | import json 4 | 5 | from ghidra.app.cmd.function import CreateFunctionCmd 6 | from ghidra.program.model.data import VoidDataType, Undefined1DataType 7 | from ghidra.program.model.listing import Function, ParameterImpl, ReturnParameterImpl 8 | from ghidra.program.model.symbol import SourceType, SymbolType 9 | 10 | from find_kallsyms import find_kallsyms_in_rodata 11 | 12 | 13 | def load_like_json(program, symbols, functions, types): 14 | like_json_path = program.getExecutablePath() + ".like.json" 15 | try: 16 | fp = open(like_json_path) 17 | except FileNotFoundError: 18 | return 19 | try: 20 | like_json = json.load(fp) 21 | finally: 22 | fp.close() 23 | for return_type, name, parameters, has_varargs in like_json["subprograms"].values(): 24 | wtf = False 25 | existing_label = None 26 | existing_function = None 27 | for existing_symbol in symbols.getGlobalSymbols(name): 28 | symbol_type = existing_symbol.getSymbolType() 29 | if symbol_type == SymbolType.LABEL: 30 | if existing_label is not None: 31 | wtf = True 32 | break 33 | existing_label = existing_symbol 34 | elif symbol_type == SymbolType.FUNCTION: 35 | if existing_function is not None: 36 | wtf = True 37 | break 38 | existing_function = existing_symbol 39 | if wtf: 40 | continue 41 | if existing_function is None: 42 | if existing_label is None: 43 | continue 44 | try: 45 | function = functions.createFunction( 46 | name, 47 | existing_label.getAddress(), 48 | CreateFunctionCmd.getFunctionBody( 49 | program, existing_label.getAddress() 50 | ), 51 | SourceType.ANALYSIS, 52 | ) 53 | except: # noqa: E722 54 | # E.g. OverlappingFunctionException. 55 | continue 56 | else: 57 | function = functions.getFunction(existing_function.getID()) 58 | if return_type is None: 59 | return_var = ReturnParameterImpl(VoidDataType.dataType, program) 60 | else: 61 | return_var = function.getReturn() 62 | new_params = [] 63 | for i, (_, param_name) in enumerate(parameters): 64 | param_type = Undefined1DataType.dataType 65 | existing_param = function.getParameter(i) 66 | if existing_param is not None: 67 | param_type = existing_param.getDataType() 68 | new_params.append(ParameterImpl(param_name, param_type, program)) 69 | function.updateFunction( 70 | function.getCallingConventionName(), 71 | return_var, 72 | new_params, 73 | Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, 74 | True, 75 | SourceType.ANALYSIS, 76 | ) 77 | function.setVarArgs(has_varargs) 78 | 79 | 80 | program = currentProgram # noqa: F821 81 | memory = program.getMemory() 82 | rodata_block = memory.getBlock(".rodata") 83 | if rodata_block is None: 84 | rodata_block = memory.getBlock(".text") 85 | rodata = jarray.zeros(rodata_block.getSize(), "b") 86 | rodata_block.getBytes(rodata_block.getStart(), rodata) 87 | rodata = b"".join([chr(x & 0xFF) for x in rodata]) # it's py2 88 | ram = program.getAddressFactory().getDefaultAddressSpace() 89 | symbols = program.getSymbolTable() 90 | for address, name in find_kallsyms_in_rodata(rodata): 91 | if name[0] != "A": 92 | address = ram.getAddress(address) 93 | existing = list(symbols.getSymbols(address)) 94 | if len(existing) == 0: 95 | symbols.createLabel(address, name[1:], SourceType.ANALYSIS) 96 | elif len(existing) == 1: 97 | existing[0].setName(name[1:], SourceType.ANALYSIS) 98 | else: 99 | pass 100 | load_like_json( 101 | program=program, 102 | symbols=symbols, 103 | functions=program.getFunctionManager(), 104 | types=program.getDataTypeManager(), 105 | ) 106 | -------------------------------------------------------------------------------- /ida-kallsyms-import.py: -------------------------------------------------------------------------------- 1 | from ida_kernwin import ask_file 2 | from idaapi import require 3 | 4 | require("ida_utils") 5 | path = ask_file(False, "*.kallsyms", "find-kallsyms output") 6 | if path is not None: 7 | with open(path) as fp: 8 | ida_utils.apply_kallsyms(ida_utils.parse_kallsyms(fp)) # noqa: F821 9 | -------------------------------------------------------------------------------- /ida-kallsyms.py: -------------------------------------------------------------------------------- 1 | from idaapi import get_bytes, require 2 | from ida_segment import get_segm_by_name 3 | 4 | require("find_kallsyms") 5 | require("ida_utils") 6 | rodata_segm = get_segm_by_name(".rodata") 7 | if rodata_segm is None: 8 | rodata_segm = get_segm_by_name(".text") 9 | rodata_size = rodata_segm.end_ea - rodata_segm.start_ea + 1 10 | rodata = get_bytes(rodata_segm.start_ea, rodata_size) 11 | kallsyms = find_kallsyms.find_kallsyms_in_rodata(rodata) # noqa: F821 12 | ida_utils.apply_kallsyms(kallsyms) # noqa: F821 13 | -------------------------------------------------------------------------------- /ida-like-import.py: -------------------------------------------------------------------------------- 1 | from ida_kernwin import ask_file 2 | from idaapi import require 3 | 4 | require("ida_utils") 5 | path = ask_file(False, "*.like.json", "build-vmlinux output") 6 | if path is not None: 7 | ida_utils.apply_like(path) # noqa: F821 8 | -------------------------------------------------------------------------------- /ida_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from idc import add_struc, add_struc_member, apply_type, del_struc_member, parse_decl 4 | from ida_bytes import del_items, FF_BYTE, FF_DATA 5 | from ida_name import get_name_ea, is_uname, set_name 6 | from ida_struct import get_struc_id 7 | from idaapi import BADADDR, get_inf_structure 8 | from idautils import StructMembers 9 | 10 | 11 | def parse_kallsyms(fp): 12 | for line in fp: 13 | address, name = line.strip().split() 14 | yield int(address, 16), name 15 | 16 | 17 | def _is_uname(name): 18 | return is_uname(name) and name not in ( 19 | "class", 20 | "new", 21 | ) 22 | 23 | 24 | def apply_kallsyms(kallsyms): 25 | for address, name in kallsyms: 26 | if name[0] != "A": 27 | new_name = str(name[1:]) 28 | if not _is_uname(new_name): 29 | new_name = "_" + new_name 30 | if _is_uname(new_name): 31 | if not set_name(address, new_name): 32 | del_items(address) 33 | set_name(address, new_name) 34 | 35 | 36 | DEFAULT_TYPE = "char" 37 | DEFAULT_TYPE_SIZE = 1 38 | DEFAULT_TYPE_FLAGS = FF_DATA | FF_BYTE 39 | DEFAULT_TYPE_ID = -1 40 | INFO = get_inf_structure() 41 | if INFO.is_64bit(): 42 | PTR_SIZE = 8 43 | elif INFO.is_32bit(): 44 | PTR_SIZE = 4 45 | else: 46 | PTR_SIZE = 2 47 | 48 | 49 | def get_type_size(like, die_offset): 50 | type = like.get(str(die_offset)) 51 | if type is None: 52 | return DEFAULT_TYPE_SIZE 53 | kind = type[0] 54 | if kind in ("struct", "union"): 55 | return type[2] 56 | if kind == "typedef": 57 | return get_type_size(like, type[2]) 58 | if kind == "pointer": 59 | return PTR_SIZE 60 | if kind == "base": 61 | return type[2] 62 | if kind in ("const", "volatile"): 63 | return get_type_size(like, type[1]) 64 | if kind == "array": 65 | return get_type_size(like, type[1]) * type[2] 66 | return DEFAULT_TYPE_SIZE 67 | 68 | 69 | def add_end_member(struct_id, struct_name, struct_size, log_fp): 70 | """Forces struct size by creating a byte field at the end""" 71 | end_member_name = "field_{:X}".format(struct_size - 1) 72 | log_fp.write("{}.{}: ...\n".format(struct_name, end_member_name)) 73 | log_fp.flush() 74 | ret = add_struc_member( 75 | struct_id, 76 | end_member_name, 77 | struct_size - 1, 78 | DEFAULT_TYPE_FLAGS, 79 | DEFAULT_TYPE_ID, 80 | DEFAULT_TYPE_SIZE, 81 | ) 82 | log_fp.write("... ret={}\n".format(ret)) 83 | log_fp.flush() 84 | return ret 85 | 86 | 87 | def resolve_type(like, die_offset, log_fp, alias=None): 88 | if die_offset is None: 89 | return "void" 90 | type = like.get(str(die_offset)) 91 | if type is None: 92 | return DEFAULT_TYPE 93 | kind = type[0] 94 | if kind in ("struct", "union"): 95 | if type[1] is None: 96 | if alias is None: 97 | struct_name = "{}_{}".format(kind, hex(die_offset)) 98 | else: 99 | struct_name = alias 100 | else: 101 | struct_name = type[1] 102 | if not _is_uname(str(struct_name)) or ( 103 | get_struc_id(str(struct_name)) == BADADDR 104 | and get_name_ea(BADADDR, str(struct_name)) != BADADDR 105 | ): 106 | struct_name = "_" + struct_name 107 | struct_id = get_struc_id(str(struct_name)) 108 | if struct_id != BADADDR: 109 | if len(type) == 4: 110 | type.append(struct_id) 111 | return struct_name 112 | log_fp.write("{}: ...\n".format(struct_name)) 113 | log_fp.flush() 114 | struct_id = add_struc(BADADDR, str(struct_name), kind == "union") 115 | log_fp.write("... id={}\n".format(hex(struct_id))) 116 | log_fp.flush() 117 | if struct_id == BADADDR: 118 | return DEFAULT_TYPE 119 | type.append(struct_id) 120 | if kind == "struct" and type[2] != 0: 121 | ret = add_end_member(struct_id, struct_name, type[2], log_fp) 122 | have_end_member = ret == 0 123 | else: 124 | have_end_member = False 125 | for member_type_die_offset, member_name, member_offset in type[3]: 126 | if member_name is None: 127 | if kind == "struct": 128 | field_n = member_offset 129 | else: 130 | field_n = sum(1 for _ in StructMembers(struct_id)) 131 | member_name = "field_{:X}".format(field_n) 132 | elif not _is_uname(str(member_name)): 133 | member_name = "_" + member_name 134 | member_type_str = str(resolve_type(like, member_type_die_offset, log_fp)) 135 | member_size = get_type_size(like, member_type_die_offset) 136 | if have_end_member and member_offset + member_size == type[2]: 137 | del_struc_member(struct_id, type[2] - 1) 138 | have_end_member = False 139 | log_fp.write( 140 | "{} {}.{}: ...\n".format(member_type_str, struct_name, member_name) 141 | ) 142 | log_fp.flush() 143 | ret = add_struc_member( 144 | struct_id, 145 | str(member_name), 146 | member_offset, 147 | DEFAULT_TYPE_FLAGS, 148 | DEFAULT_TYPE_ID, 149 | DEFAULT_TYPE_SIZE, 150 | ) 151 | log_fp.write("... ret={}\n".format(ret)) 152 | log_fp.flush() 153 | if ret == 0: 154 | member_id = get_name_ea( 155 | BADADDR, "{}.{}".format(struct_name, member_name) 156 | ) 157 | apply_type(member_id, parse_decl(member_type_str, 0)) 158 | return struct_name 159 | if kind == "typedef": 160 | return resolve_type(like, type[2], log_fp, type[1]) 161 | if kind == "pointer": 162 | return resolve_type(like, type[1], log_fp) + "*" 163 | if kind == "base": 164 | if type[1]: 165 | return "__int" + str(type[2] * 8) 166 | else: 167 | return "unsigned __int" + str(type[2] * 8) 168 | if kind in ("const", "volatile"): 169 | return resolve_type(like, type[1], log_fp) 170 | if kind == "array": 171 | return "{}[{}]".format(resolve_type(like, type[1], log_fp), type[2]) 172 | return DEFAULT_TYPE 173 | 174 | 175 | def apply_like(path): 176 | with open("{}.log".format(path), "w") as log_fp: 177 | with open(path) as fp: 178 | like = json.load(fp) 179 | for item in like.values(): 180 | if item[0] != "subprogram": 181 | continue 182 | _, return_type, name, parameters, has_varargs = item 183 | address = get_name_ea(BADADDR, str(name)) 184 | if address == BADADDR: 185 | log_fp.write("Subprogram not found: {}\n".format(name)) 186 | log_fp.flush() 187 | continue 188 | decl = resolve_type(like, return_type, log_fp) + " " + name + "(" 189 | first = True 190 | for parameter_type, parameter_name in parameters: 191 | if first: 192 | first = False 193 | else: 194 | decl += ", " 195 | if not _is_uname(str(parameter_name)): 196 | parameter_name = "_" + parameter_name 197 | decl += resolve_type(like, parameter_type, log_fp) 198 | if _is_uname(str(parameter_name)): 199 | decl += " " + parameter_name 200 | if has_varargs: 201 | if not first: 202 | decl += ", " 203 | decl += "..." 204 | decl += ")" 205 | log_fp.write("{}: ...\n".format(decl)) 206 | log_fp.flush() 207 | ret = apply_type(address, parse_decl(str(decl), 0)) 208 | log_fp.write("... ret={}\n".format(ret)) 209 | log_fp.flush() 210 | -------------------------------------------------------------------------------- /ls-py0: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -u -x 3 | cd "$(dirname "$0")" 4 | { 5 | git ls-files -z '*.py' 6 | git grep --name-only -z '^#!/usr/bin/env python3' 7 | git grep --name-only -z '^#!/usr/bin/env pypy3' 8 | } | sort -u -z 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | kconfiglib 2 | nose 3 | pyelftools 4 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/__init__.py -------------------------------------------------------------------------------- /test/kallsyms-3.10.0-862.11.6.el7.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-3.10.0-862.11.6.el7.x86_64.gz -------------------------------------------------------------------------------- /test/kallsyms-4.16.3-301.fc28.s390x.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.16.3-301.fc28.s390x.gz -------------------------------------------------------------------------------- /test/kallsyms-4.4.0-1085-raspi2.arm.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.0-1085-raspi2.arm.gz -------------------------------------------------------------------------------- /test/kallsyms-4.4.223.defcon2020.ooofs.i686.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.223.defcon2020.ooofs.i686.gz -------------------------------------------------------------------------------- /test/kallsyms-4.4.223.defconfig.i686.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.223.defconfig.i686.gz -------------------------------------------------------------------------------- /test/kallsyms-5.1.0.tasteless2019.tee.aarch64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.1.0.tasteless2019.tee.aarch64.gz -------------------------------------------------------------------------------- /test/kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz -------------------------------------------------------------------------------- /test/kallsyms-5.3.0.hitcon2019.poe.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.3.0.hitcon2019.poe.x86_64.gz -------------------------------------------------------------------------------- /test/kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz -------------------------------------------------------------------------------- /test/kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz -------------------------------------------------------------------------------- /test/kallsyms-6.8.0-48-generic.noble.x86_64.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.8.0-48-generic.noble.x86_64.gz -------------------------------------------------------------------------------- /test/test_find_kallsyms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import gzip 3 | import os 4 | import unittest 5 | 6 | from find_kallsyms import find_kallsyms_in_rodata 7 | 8 | 9 | class TestFindKallsyms(unittest.TestCase): 10 | def __init__(self, *args, **kwargs): 11 | unittest.TestCase.__init__(self, *args, **kwargs) 12 | self.basedir = os.path.dirname(__file__) 13 | 14 | def _read(self, name): 15 | with gzip.GzipFile(os.path.join(self.basedir, name)) as fp: 16 | return fp.read() 17 | 18 | def test_kallsyms_4_16_3_s390x(self): 19 | addresses_and_names = list( 20 | find_kallsyms_in_rodata(self._read("kallsyms-4.16.3-301.fc28.s390x.gz")) 21 | ) 22 | self.assertEqual(62766, len(addresses_and_names)) 23 | self.assertEqual((0x200, "T_text"), addresses_and_names[0]) 24 | self.assertEqual((0xD32000, "B__bss_stop"), addresses_and_names[-1]) 25 | 26 | def test_kallsyms_3_10_0_x86_64(self): 27 | addresses_and_names = list( 28 | find_kallsyms_in_rodata( 29 | self._read("kallsyms-3.10.0-862.11.6.el7.x86_64.gz") 30 | ) 31 | ) 32 | self.assertEqual(82619, len(addresses_and_names)) 33 | self.assertEqual((0, "Airq_stack_union"), addresses_and_names[0]) 34 | (dump_stack_address,) = [ 35 | address for address, name in addresses_and_names if name == "Tdump_stack" 36 | ] 37 | self.assertEqual(0xFFFFFFFF817135BB, dump_stack_address) 38 | self.assertEqual( 39 | (0xFFFFFFFF82657000, "B__brk_limit"), 40 | addresses_and_names[-1], 41 | ) 42 | 43 | def test_kallsyms_5_1_9_x86_64(self): 44 | addresses_and_names = list( 45 | find_kallsyms_in_rodata( 46 | self._read("kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz") 47 | ) 48 | ) 49 | self.assertEqual(74045, len(addresses_and_names)) 50 | self.assertEqual((0, "Airq_stack_union"), addresses_and_names[0]) 51 | self.assertEqual((0xFFFFFFFF82A2C000, "B__brk_limit"), addresses_and_names[-1]) 52 | 53 | def test_kallsyms_5_1_0_aarch64(self): 54 | addresses_and_names = list( 55 | find_kallsyms_in_rodata( 56 | self._read("kallsyms-5.1.0.tasteless2019.tee.aarch64.gz") 57 | ) 58 | ) 59 | self.assertEqual(117079, len(addresses_and_names)) 60 | self.assertEqual((0xFFFF000010080000, "t_head"), addresses_and_names[0]) 61 | self.assertEqual((0xFFFF00001144E000, "B_end"), addresses_and_names[-1]) 62 | 63 | def test_kallsyms_5_3_0_x86_64(self): 64 | addresses_and_names = list( 65 | find_kallsyms_in_rodata( 66 | self._read("kallsyms-5.3.0.hitcon2019.poe.x86_64.gz") 67 | ) 68 | ) 69 | self.assertEqual(88612, len(addresses_and_names)) 70 | self.assertEqual((0, "Afixed_percpu_data"), addresses_and_names[0]) 71 | self.assertEqual( 72 | (0xFFFFFFFF83200000, "T__init_scratch_end"), 73 | addresses_and_names[-1], 74 | ) 75 | 76 | def test_kallsyms_4_4_0_arm(self): 77 | addresses_and_names = list( 78 | find_kallsyms_in_rodata(self._read("kallsyms-4.4.0-1085-raspi2.arm.gz")) 79 | ) 80 | self.assertEqual(78413, len(addresses_and_names)) 81 | self.assertEqual((0x80008000, "Tstext"), addresses_and_names[0]) 82 | self.assertEqual((0x80F56454, "B__bss_stop"), addresses_and_names[-1]) 83 | 84 | def test_kallsyms_4_4_223_i686(self): 85 | addresses_and_names = list( 86 | find_kallsyms_in_rodata( 87 | self._read("kallsyms-4.4.223.defcon2020.ooofs.i686.gz") 88 | ) 89 | ) 90 | self.assertEqual(80397, len(addresses_and_names)) 91 | self.assertEqual((0xC1000000, "Tstartup_32"), addresses_and_names[0]) 92 | self.assertEqual((0xC1E9B000, "B__brk_limit"), addresses_and_names[-1]) 93 | 94 | def test_kallsyms_4_4_223_i686_v2(self): 95 | addresses_and_names = list( 96 | find_kallsyms_in_rodata(self._read("kallsyms-4.4.223.defconfig.i686.gz")) 97 | ) 98 | self.assertEqual(39874, len(addresses_and_names)) 99 | self.assertEqual( 100 | (0xC1000338, "tsanitize_boot_params.constprop.0"), 101 | addresses_and_names[0], 102 | ) 103 | self.assertEqual((0xC1BE29BD, "T_einittext"), addresses_and_names[-1]) 104 | 105 | def test_kallsyms_6_5_0_x86_64(self): 106 | addresses_and_names = list( 107 | find_kallsyms_in_rodata( 108 | self._read("kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz") 109 | ) 110 | ) 111 | self.assertEqual(140153, len(addresses_and_names)) 112 | self.assertEqual( 113 | (0, "Afixed_percpu_data"), 114 | addresses_and_names[0], 115 | ) 116 | self.assertEqual( 117 | (0xFFFFFFFF84400000, "D__init_scratch_end"), addresses_and_names[-1] 118 | ) 119 | 120 | def test_kallsyms_6_2_8_x86_64(self): 121 | addresses_and_names = list( 122 | find_kallsyms_in_rodata( 123 | self._read("kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz") 124 | ) 125 | ) 126 | self.assertEqual(24624, len(addresses_and_names)) 127 | self.assertEqual( 128 | (0xFFFFFFFF81000000, "Tstartup_64"), 129 | addresses_and_names[0], 130 | ) 131 | self.assertEqual((0xFFFFFFFF81B4E01E, "T_einittext"), addresses_and_names[-1]) 132 | 133 | def test_kallsyms_6_8_0_x86_64(self): 134 | addresses_and_names = list( 135 | find_kallsyms_in_rodata( 136 | self._read("kallsyms-6.8.0-48-generic.noble.x86_64.gz") 137 | ) 138 | ) 139 | self.assertEqual(203185, len(addresses_and_names)) 140 | self.assertEqual( 141 | (0, "Afixed_percpu_data"), 142 | addresses_and_names[0], 143 | ) 144 | self.assertEqual( 145 | (0xFFFFFFFF84800000, "D__init_scratch_end"), addresses_and_names[-1] 146 | ) 147 | 148 | 149 | if __name__ == "__main__": 150 | unittest.main() 151 | --------------------------------------------------------------------------------