├── .github
    └── workflows
    │   └── CI.yml
├── .gitignore
├── README.md
├── black
├── build-vmlinux
├── ci
├── find-kallsyms
├── find_kallsyms.py
├── flake8
├── ghidra-kallsyms.py
├── ida-kallsyms-import.py
├── ida-kallsyms.py
├── ida-like-import.py
├── ida_utils.py
├── ls-py0
├── requirements.txt
└── test
    ├── __init__.py
    ├── kallsyms-3.10.0-862.11.6.el7.x86_64.gz
    ├── kallsyms-4.16.3-301.fc28.s390x.gz
    ├── kallsyms-4.4.0-1085-raspi2.arm.gz
    ├── kallsyms-4.4.223.defcon2020.ooofs.i686.gz
    ├── kallsyms-4.4.223.defconfig.i686.gz
    ├── kallsyms-5.1.0.tasteless2019.tee.aarch64.gz
    ├── kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz
    ├── kallsyms-5.3.0.hitcon2019.poe.x86_64.gz
    ├── kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz
    ├── kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz
    ├── kallsyms-6.8.0-48-generic.noble.x86_64.gz
    └── test_find_kallsyms.py


/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: ["master"]
 4 |   pull_request:
 5 |     branches: ["master"]
 6 | 
 7 | name: Continuous integration
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-22.04
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v3
15 | 
16 |     - name: Install packages
17 |       run: sudo apt-get update &&
18 |            sudo apt-get install -y black flake8 python2 python3 jython pypy3
19 | 
20 |     - name: Check formatting
21 |       run: ./black --check
22 | 
23 |     - name: Test python2
24 |       run: python2 -m unittest discover
25 | 
26 |     - name: Test python3
27 |       run: python2 -m unittest discover
28 | 
29 |     - name: Test jython
30 |       run: jython -m unittest discover
31 | 
32 |     - name: Test pypy3
33 |       run: pypy3 -m unittest discover
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | .idea/
3 | .mypy_cache/
4 | *.pyc
5 | __pycache__/
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ida-kallsyms
 2 | 
 3 | IDA script for parsing kallsyms.
 4 | 
 5 | ## Usage
 6 | 
 7 | * `git clone https://github.com/mephi42/ida-kallsyms.git`
 8 | * Open the kernel in IDA, let the autoanalysis finish.
 9 | * From `File` &#8594; `Script file...` (Alt+F7 / Alt+F9) run
10 |   `ida-kallsyms/ida-kallsyms.py` script.
11 | 
12 | ## Usage with Ghidra
13 | 
14 | * `git clone https://github.com/mephi42/ida-kallsyms.git`
15 | * Open the kernel in Ghidra, let the autoanalysis finish.
16 | * Go to `Window` &#8594; `Script manager`.
17 | * Once: press `Script Directories` button and add `ida-kallsyms`.
18 | * In `Filter` edit box, type `kallsyms`.
19 | * Double-click `ghidra-kallsyms.py` and wait.
20 | 
21 | ## Stand-alone usage
22 | 
23 | * `git clone https://github.com/mephi42/ida-kallsyms.git`
24 | * `ida-kallsyms/find-kallsyms vmlinux >vmlinux.kallsyms`
25 | * The resulting `vmlinux.kallsyms` file can be imported into IDA using
26 |   `ida-kallsyms-import.py` script.
27 | 
28 | # build-vmlinux
29 | 
30 | Script for obtaining function signatures and struct layouts. Works by building
31 | a Linux Kernel that is similar to the one being analyzed and extracting debug
32 | information from it.
33 | 
34 | ## Usage
35 | 
36 | * Load kallsyms into IDA as described above.
37 | * `ida-kallsyms/build-vmlinux --like vmlinux`
38 | 
39 |   This will run for a while and generate `vmlinux.like.json` file.
40 | 
41 |   Check out `ida-kallsyms/build-vmlinux --help` in case you already have
42 |   `binutils-gdb` / `gcc` / `linux` local git repos or a `.config` that
43 |   matches `vmlinux`.
44 | * Import `vmlinux.like.json` into IDA using `ida-kallsyms/ida-like-import.py`
45 |   script.
46 | * If there are import errors, check `vmlinux.like.json.log` file.
47 | 


--------------------------------------------------------------------------------
/black:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e -u -x
3 | cd "$(dirname "$0")"
4 | ./ls-py0 | xargs -0 black "$@"
5 | 


--------------------------------------------------------------------------------
/build-vmlinux:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env pypy3
  2 | import argparse
  3 | from contextlib import contextmanager
  4 | import io
  5 | import json
  6 | import logging
  7 | from multiprocessing import cpu_count
  8 | import os
  9 | from pathlib import Path
 10 | import re
 11 | import shutil
 12 | import struct
 13 | import subprocess
 14 | import tempfile
 15 | from typing import Any, Dict, Generator, List, NamedTuple, Optional, Set, Tuple
 16 | 
 17 | from elftools.dwarf.die import DIE  # type: ignore
 18 | from elftools.elf.elffile import ELFFile  # type: ignore
 19 | from kconfiglib import Kconfig  # type: ignore
 20 | 
 21 | from find_kallsyms import find_kallsyms_in_rodata
 22 | 
 23 | 
 24 | def get_elf_host(header: Any) -> str:
 25 |     return {
 26 |         ("EM_386", "ELFCLASS32", "ELFDATA2LSB"): "i686-linux-gnu",
 27 |         ("EM_MIPS", "ELFCLASS32", "ELFDATA2MSB"): "mips-linux-gnu",
 28 |         ("EM_PPC", "ELFCLASS32", "ELFDATA2MSB"): "powerpc-linux-gnu",
 29 |         ("EM_PPC64", "ELFCLASS64", "ELFDATA2MSB"): "powerpc64-linux-gnu",
 30 |         ("EM_S390", "ELFCLASS64", "ELFDATA2MSB"): "s390x-linux-gnu",
 31 |         ("EM_ARM", "ELFCLASS32", "ELFDATA2LSB"): "arm-linux-gnueabi",
 32 |         ("EM_X86_64", "ELFCLASS64", "ELFDATA2LSB"): "x86_64-linux-gnu",
 33 |         ("EM_AARCH64", "ELFCLASS64", "ELFDATA2LSB"): "aarch64-linux-gnu",
 34 |     }[header.e_machine, header.e_ident.EI_CLASS, header.e_ident.EI_DATA]
 35 | 
 36 | 
 37 | def get_pe_host(machine: int) -> str:
 38 |     return {
 39 |         0x14C: "i686-linux-gnu",
 40 |         0x1C0: "arm-linux-gnueabi",
 41 |         0x8664: "x86_64-linux-gnu",
 42 |         0xAA64: "aarch64-linux-gnu",
 43 |     }[machine]
 44 | 
 45 | 
 46 | def get_linux_arch(host: str) -> str:
 47 |     return {
 48 |         "i686": "i386",
 49 |         "mips": "mips",
 50 |         "powerpc": "ppc",
 51 |         "powerpc64": "ppc64",
 52 |         "s390x": "s390x",
 53 |         "arm": "arm",
 54 |         "x86_64": "x86_64",
 55 |         "aarch64": "arm64",
 56 |     }[host[: host.index("-")]]
 57 | 
 58 | 
 59 | def arch2srcarch(arch: str) -> str:
 60 |     if arch in ("i386", "x86_64"):
 61 |         return "x86"
 62 |     return arch
 63 | 
 64 | 
 65 | def fetch_tag(git: Path, remote: str, tag: str) -> None:
 66 |     git.mkdir(parents=True, exist_ok=True)
 67 |     subprocess.check_call(["git", "init"], cwd=git)
 68 |     subprocess.check_call(["git", "fetch", remote, f"{tag}:{tag}"], cwd=git)
 69 | 
 70 | 
 71 | def prepare_worktree(worktree: Path, git: Path, remote: str, tag: str) -> None:
 72 |     fetch_tag(git, remote, tag)
 73 |     try:
 74 |         shutil.rmtree(worktree)
 75 |     except FileNotFoundError:
 76 |         pass
 77 |     subprocess.check_call(
 78 |         ["git", "worktree", "add", "-f", "-f", "--detach", worktree, tag],
 79 |         cwd=git,
 80 |     )
 81 | 
 82 | 
 83 | def build_or_reuse_toolchain(
 84 |     binutils_git: Path,
 85 |     binutils_version: str,
 86 |     gcc_git: Path,
 87 |     gcc_version: str,
 88 |     host: str,
 89 | ) -> Path:
 90 |     toolchain = f"{host}-toolchain-{binutils_version}-{gcc_version}"
 91 |     install = Path.cwd() / toolchain
 92 |     bin = install / "bin"
 93 |     if (bin / f"{host}-gcc").exists():
 94 |         return bin
 95 |     worktree = Path.cwd() / f"{toolchain}-build"
 96 |     binutils_worktree = worktree / "binutils-gdb"
 97 |     prepare_worktree(
 98 |         worktree=binutils_worktree,
 99 |         git=binutils_git,
100 |         remote="git://sourceware.org/git/binutils-gdb.git",
101 |         tag="refs/tags/binutils-" + binutils_version.replace(".", "_"),
102 |     )
103 |     subprocess.check_call(
104 |         [
105 |             "./configure",
106 |             f"--target={host}",
107 |             "--disable-multilib",
108 |             "--disable-nls",
109 |             f"--prefix={install}",
110 |         ],
111 |         cwd=binutils_worktree,
112 |         env={**os.environ, "CXXFLAGS": "-fpermissive"},
113 |     )
114 |     for target in ("all", "install"):
115 |         subprocess.check_call(
116 |             [
117 |                 "make",
118 |                 f"-j{cpu_count()}",
119 |                 f"{target}-binutils",
120 |                 f"{target}-gas",
121 |                 f"{target}-ld",
122 |             ],
123 |             cwd=binutils_worktree,
124 |         )
125 |     gcc_worktree = worktree / "gcc"
126 |     prepare_worktree(
127 |         worktree=gcc_worktree,
128 |         git=gcc_git,
129 |         remote="git://gcc.gnu.org/git/gcc.git",
130 |         tag=f"refs/tags/releases/gcc-{gcc_version}",
131 |     )
132 |     subprocess.check_call(
133 |         [
134 |             "./configure",
135 |             f"--target={host}",
136 |             "--enable-languages=c",
137 |             "--disable-bootstrap",
138 |             "--disable-multilib",
139 |             "--disable-nls",
140 |             f"--prefix={install}",
141 |         ],
142 |         cwd=gcc_worktree,
143 |         env={**os.environ, "CXXFLAGS": "-fpermissive"},
144 |     )
145 |     for target in ("all", "install"):
146 |         subprocess.check_call(
147 |             [
148 |                 "make",
149 |                 f"-j{cpu_count()}",
150 |                 f"{target}-gcc",
151 |             ],
152 |             cwd=gcc_worktree,
153 |         )
154 |     shutil.rmtree(worktree)
155 |     return bin
156 | 
157 | 
158 | def putenv(name: str, value: Optional[str]) -> None:
159 |     if value is None:
160 |         try:
161 |             del os.environ[name]
162 |         except KeyError:
163 |             pass
164 |     else:
165 |         os.environ[name] = value
166 | 
167 | 
168 | @contextmanager
169 | def env(tmp_env: Dict[str, str]) -> Generator[None, None, None]:
170 |     orig_env = {k: os.environ.get(k) for k in tmp_env.values()}
171 |     try:
172 |         os.environ.update(tmp_env)
173 |         yield
174 |     finally:
175 |         for k, v in orig_env.items():
176 |             putenv(k, v)
177 | 
178 | 
179 | def cc_version_text(exe):
180 |     p = subprocess.Popen(
181 |         [exe, "--version"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL
182 |     )
183 |     try:
184 |         return p.stdout.readline().decode().strip()
185 |     finally:
186 |         while p.stdout.read(8192) != b"":
187 |             pass
188 |         if p.wait() != 0:
189 |             raise subprocess.CalledProcessError(p.returncode, p.args)
190 | 
191 | 
192 | def build_or_reuse_vmlinux(
193 |     output: Optional[Path],
194 |     git: Path,
195 |     host: str,
196 |     version: str,
197 |     linux_config: Optional[Path],
198 |     toolchain_bin: Path,
199 |     template_vmlinux: Optional[Path],
200 | ) -> Path:
201 |     arch = get_linux_arch(host)
202 |     srcarch = arch2srcarch(arch)
203 |     if output is None:
204 |         output = Path(f"vmlinux-{arch}-{version}")
205 |     if output.exists():
206 |         return output
207 |     worktree = Path.cwd() / f"{arch}-linux-{version}-build"
208 |     prepare_worktree(
209 |         worktree=worktree,
210 |         git=git,
211 |         remote="git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git",  # noqa: E501
212 |         tag=f"refs/tags/v{version}",
213 |     )
214 |     try:
215 |         subprocess.call(
216 |             args=["ccache", "--version"],
217 |             stdin=subprocess.DEVNULL,
218 |             stdout=subprocess.DEVNULL,
219 |             stderr=subprocess.DEVNULL,
220 |         )
221 |     except subprocess.CalledProcessError:
222 |         ccache = ""
223 |     else:
224 |         ccache = "ccache "
225 |     env_path = str(toolchain_bin) + os.pathsep + os.environ["PATH"]
226 | 
227 |     def make(args: List[str]) -> None:
228 |         args = [
229 |             "make",
230 |             f"ARCH={arch}",
231 |             f"CROSS_COMPILE={ccache}{host}-",
232 |             f"-j{cpu_count()}",
233 |         ] + args
234 |         subprocess.check_call(args, cwd=worktree, env={**os.environ, "PATH": env_path})
235 | 
236 |     worktree_config = worktree / ".config"
237 |     if linux_config is None:
238 |         if template_vmlinux is None:
239 |             need_defconfig = True
240 |         else:
241 |             with open(worktree_config, "wb") as fp:
242 |                 returncode = subprocess.call(
243 |                     [
244 |                         worktree / "scripts" / "extract-ikconfig",
245 |                         template_vmlinux,
246 |                     ],
247 |                     stdout=fp,
248 |                 )
249 |             if returncode == 0:
250 |                 logging.info("Extracted: %s", worktree_config)
251 |                 make(["olddefconfig"])
252 |                 need_defconfig = False
253 |             else:
254 |                 need_defconfig = True
255 |         if need_defconfig:
256 |             make(["defconfig"])
257 |         cc = f"{host}-gcc"
258 |         ld = f"{host}-ld"
259 |         with env(
260 |             {
261 |                 "srctree": str(worktree),
262 |                 "ARCH": arch,
263 |                 "SRCARCH": srcarch,
264 |                 "KERNELVERSION": version,
265 |                 "CC": cc,
266 |                 "HOSTCC": "gcc",
267 |                 "HOSTCXX": "g++",
268 |                 "CC_VERSION_TEXT": cc_version_text(toolchain_bin / cc),
269 |                 "PATH": env_path,
270 |                 "LD": ld,
271 |             }
272 |         ):
273 |             kconf = Kconfig()
274 |             kconf.load_config(worktree_config)
275 |             kconf.syms["DEBUG_INFO"].set_value("y")
276 |             kconf.write_config(worktree_config)
277 |     else:
278 |         shutil.copyfile(linux_config, worktree_config)
279 |         make(["olddefconfig"])
280 |     make(["vmlinux"])
281 |     with tempfile.TemporaryDirectory(dir=output.parent) as tempdir:
282 |         vmlinux_tmp = Path(tempdir) / "vmlinux"
283 |         vmlinux_tmp.symlink_to(worktree / "vmlinux")
284 |         vmlinux_tmp.rename(output)
285 |     return output
286 | 
287 | 
288 | def convert_name(die: DIE) -> Optional[str]:
289 |     name = die.attributes.get("DW_AT_name")
290 |     if name is None:
291 |         return None
292 |     return name.value.decode()
293 | 
294 | 
295 | def convert_type(die: DIE) -> Optional[int]:
296 |     type = die.attributes.get("DW_AT_type")
297 |     if type is None:
298 |         return None
299 |     return die.cu.cu_offset + type.raw_value
300 | 
301 | 
302 | class Member(NamedTuple):
303 |     type: int
304 |     name: Optional[str]
305 |     offset: int
306 | 
307 | 
308 | def convert_member(die: DIE) -> Optional[Member]:
309 |     offset_attr = die.attributes.get("DW_AT_data_member_location")
310 |     if offset_attr is None:
311 |         offset = 0
312 |     else:
313 |         offset = offset_attr.value
314 |     type = convert_type(die)
315 |     if type is None:
316 |         return None
317 |     return Member(
318 |         type=type,
319 |         name=convert_name(die),
320 |         offset=offset,
321 |     )
322 | 
323 | 
324 | class Struct(NamedTuple):
325 |     kind: str
326 |     name: Optional[str]
327 |     size: int
328 |     members: List[Member]
329 | 
330 | 
331 | def convert_struct(die: DIE) -> Optional[Struct]:
332 |     if "DW_AT_declaration" in die.attributes:
333 |         return None
334 |     members = []
335 |     for child_die in die.iter_children():
336 |         if child_die.tag == "DW_TAG_member":
337 |             if (
338 |                 "DW_AT_bit_size" in child_die.attributes
339 |                 or "DW_AT_bit_offset" in child_die.attributes
340 |             ):
341 |                 continue
342 |             member = convert_member(child_die)
343 |             if member is None:
344 |                 return None
345 |             members.append(member)
346 |     return Struct(
347 |         kind="struct" if die.tag == "DW_TAG_structure_type" else "union",
348 |         name=convert_name(die),
349 |         size=die.attributes["DW_AT_byte_size"].value,
350 |         members=members,
351 |     )
352 | 
353 | 
354 | class Typedef(NamedTuple):
355 |     kind: str
356 |     name: str
357 |     type: int
358 | 
359 | 
360 | def convert_typedef(die: DIE) -> Optional[Typedef]:
361 |     name = convert_name(die)
362 |     if name is None:
363 |         return None
364 |     type = convert_type(die)
365 |     if type is None:
366 |         return None
367 |     return Typedef(
368 |         kind="typedef",
369 |         name=name,
370 |         type=type,
371 |     )
372 | 
373 | 
374 | class Pointer(NamedTuple):
375 |     kind: str
376 |     type: Optional[int]
377 | 
378 | 
379 | def convert_pointer(die: DIE) -> Optional[Pointer]:
380 |     return Pointer(
381 |         kind="pointer",
382 |         type=convert_type(die),
383 |     )
384 | 
385 | 
386 | class Int(NamedTuple):
387 |     kind: str
388 |     is_signed: bool
389 |     size: int
390 | 
391 | 
392 | def convert_int(die: DIE) -> Optional[Int]:
393 |     if "DW_AT_declaration" in die.attributes:
394 |         return None
395 |     return Int(
396 |         kind="base",
397 |         is_signed={
398 |             2: False,  # boolean
399 |             4: False,  # float
400 |             5: True,  # signed
401 |             6: True,  # signed char
402 |             7: False,  # unsigned
403 |             8: False,  # unsigned char
404 |         }[die.attributes["DW_AT_encoding"].value],
405 |         size=die.attributes["DW_AT_byte_size"].value,
406 |     )
407 | 
408 | 
409 | class Qualified(NamedTuple):
410 |     kind: str
411 |     type: int
412 | 
413 | 
414 | def convert_qualified(die: DIE) -> Optional[Qualified]:
415 |     type = convert_type(die)
416 |     if type is None:
417 |         return None
418 |     return Qualified(
419 |         kind={
420 |             "DW_TAG_const_type": "const",
421 |             "DW_TAG_volatile_type": "volatile",
422 |         }[die.tag],
423 |         type=type,
424 |     )
425 | 
426 | 
427 | class Array(NamedTuple):
428 |     kind: str
429 |     type: int
430 |     size: int
431 | 
432 | 
433 | def convert_array(die: DIE) -> Optional[Array]:
434 |     type = convert_type(die)
435 |     if type is None:
436 |         return None
437 |     size = None
438 |     for child_die in die.iter_children():
439 |         if child_die.tag != "DW_TAG_subrange_type":
440 |             return None
441 |         if size is not None:
442 |             return None
443 |         if "DW_AT_lower_bound" in child_die.attributes:
444 |             return None
445 |         upper_bound = child_die.attributes.get("DW_AT_upper_bound")
446 |         if upper_bound is None:
447 |             return None
448 |         size = upper_bound.value
449 |     if size is None:
450 |         return None
451 |     return Array(
452 |         kind="array",
453 |         type=type,
454 |         size=size,
455 |     )
456 | 
457 | 
458 | class Parameter(NamedTuple):
459 |     type: int
460 |     name: str
461 | 
462 | 
463 | def convert_parameter(die: DIE) -> Optional[Parameter]:
464 |     type = convert_type(die)
465 |     if type is None:
466 |         return None
467 |     name = convert_name(die)
468 |     if name is None:
469 |         return None
470 |     return Parameter(
471 |         type=type,
472 |         name=name,
473 |     )
474 | 
475 | 
476 | class Subprogram(NamedTuple):
477 |     kind: str
478 |     return_type: Optional[int]
479 |     name: str
480 |     parameters: List[Parameter]
481 |     has_varargs: bool
482 | 
483 | 
484 | def convert_subprogram(die: DIE, kallsyms_set: Set[str]) -> Optional[Subprogram]:
485 |     if "DW_AT_inline" in die.attributes or "DW_AT_declaration" in die.attributes:
486 |         return None
487 |     return_type = convert_type(die)
488 |     if return_type is None:
489 |         return None
490 |     name = convert_name(die)
491 |     if name is None or name not in kallsyms_set:
492 |         return None
493 |     parameters = []
494 |     has_varargs = False
495 |     for child_die in die.iter_children():
496 |         if child_die.tag == "DW_TAG_unspecified_parameters":
497 |             has_varargs = True
498 |             continue
499 |         if child_die.tag != "DW_TAG_formal_parameter":
500 |             continue
501 |         parameter = convert_parameter(child_die)
502 |         if parameter is None:
503 |             return None
504 |         parameters.append(parameter)
505 |     return Subprogram(
506 |         kind="subprogram",
507 |         return_type=return_type,
508 |         name=name,
509 |         parameters=parameters,
510 |         has_varargs=has_varargs,
511 |     )
512 | 
513 | 
514 | def extract_debug_info(output: Path, vmlinux: Path, kallsyms: List[str]) -> None:
515 |     # Mostly copied from
516 |     # https://github.com/mephi42/linetrace-cmd-record/blob/9769e9505cb2/linetrace-cmd-record#L179
517 |     # https://github.com/mephi42/linetrace-cmd-record/blob/9769e9505cb2/linetrace-cmd-record#L59
518 |     logging.info("Extracting debug info...")
519 |     kallsyms_set = {kallsym[1:] for kallsym in kallsyms}
520 |     tag2convertor = {
521 |         "DW_TAG_structure_type": convert_struct,
522 |         "DW_TAG_union_type": convert_struct,
523 |         "DW_TAG_typedef": convert_typedef,
524 |         "DW_TAG_pointer_type": convert_pointer,
525 |         "DW_TAG_base_type": convert_int,
526 |         "DW_TAG_enumeration_type": convert_int,
527 |         "DW_TAG_const_type": convert_qualified,
528 |         "DW_TAG_volatile_type": convert_qualified,
529 |         "DW_TAG_array_type": convert_array,
530 |         "DW_TAG_subprogram": lambda die: convert_subprogram(die, kallsyms_set),
531 |     }
532 |     items: Dict[int, Any] = {}
533 |     with open(vmlinux, "rb") as fp:
534 |         elf = ELFFile(fp)
535 |         dwarf = elf.get_dwarf_info(relocate_dwarf_sections=elf["e_type"] == "ET_REL")
536 |         for cu in dwarf.iter_CUs():
537 |             for die in cu.get_top_DIE().iter_children():
538 |                 convertor = tag2convertor.get(die.tag)
539 |                 if convertor is not None:
540 |                     item = convertor(die)
541 |                     if item is not None:
542 |                         items[die.offset] = item
543 |     logging.info("Saving extracted debug info...")
544 |     with open(output, "w") as output_fp:
545 |         json.dump(items, output_fp, indent=4, separators=(",", ": "))
546 | 
547 | 
548 | def detect_host(vmlinux_bytes: bytes) -> str:
549 |     if vmlinux_bytes[:2] == b"MZ":
550 |         (pe_offset,) = struct.unpack("<I", vmlinux_bytes[0x3C:0x40])
551 |         if vmlinux_bytes[pe_offset : pe_offset + 4] == b"PE\0\0":
552 |             (machine,) = struct.unpack(
553 |                 "<H", vmlinux_bytes[pe_offset + 4 : pe_offset + 6]
554 |             )
555 |             return get_pe_host(machine)
556 |     elf = ELFFile(io.BytesIO(vmlinux_bytes))
557 |     return get_elf_host(elf.header)
558 | 
559 | 
560 | def main():
561 |     logging.basicConfig(
562 |         level=logging.INFO,
563 |         format="%(relativeCreated)7dms| %(message)s",
564 |     )
565 |     parser = argparse.ArgumentParser(description="Build Linux Kernel")
566 |     parser.add_argument("--like", help="Use an existing vmlinux as a template")
567 |     parser.add_argument("--host", help="Build for a specific architecture")
568 |     parser.add_argument(
569 |         "--binutils-git",
570 |         default="binutils-gdb",
571 |         help="Local Binutils git repo",
572 |     )
573 |     parser.add_argument("--binutils-version", help="Use a specific Binutils version")
574 |     parser.add_argument("--gcc-git", default="gcc", help="Local GCC git repo")
575 |     parser.add_argument("--gcc-version", help="Use a specific GCC version")
576 |     parser.add_argument(
577 |         "--linux-git", default="linux", help="Local Linux Kernel git repo"
578 |     )
579 |     parser.add_argument("--linux-version", help="Build a specific Linux Kernel version")
580 |     parser.add_argument(
581 |         "--linux-config", help="Use a specific Linux Kernel .config file"
582 |     )
583 |     args = parser.parse_args()
584 |     host: Optional[str] = args.host
585 |     gcc_version: Optional[str] = args.gcc_version
586 |     linux_version: Optional[str] = args.linux_version
587 |     binutils_version: Optional[str] = args.binutils_version
588 |     kallsyms: List[Tuple[int, str]] = []
589 |     if args.like is None:
590 |         template_vmlinux = None
591 |     else:
592 |         template_vmlinux = Path(args.like)
593 |         with open(args.like, "rb") as fp:
594 |             vmlinux_bytes = fp.read()
595 |         if host is None:
596 |             host = detect_host(vmlinux_bytes)
597 |             logging.info("Detected host: %s", host)
598 |         if linux_version is None:
599 |             m_linux = re.search(
600 |                 pattern=rb"Linux version ([0-9.]+)",
601 |                 string=vmlinux_bytes,
602 |                 flags=re.MULTILINE,
603 |             )
604 |             if m_linux is None:
605 |                 raise Exception("Could not detect Linux Kernel version")
606 |             linux_version = m_linux.group(1).decode()
607 |             if linux_version.endswith(".0"):
608 |                 linux_version = linux_version[:-2]
609 |             logging.info("Detected Linux Kernel version: %s", linux_version)
610 |         if gcc_version is None:
611 |             m_gcc = re.search(
612 |                 pattern=rb"\(gcc version ([0-9.]+)",
613 |                 string=vmlinux_bytes,
614 |                 flags=re.MULTILINE,
615 |             )
616 |             if m_gcc is None:
617 |                 m_gcc = re.search(
618 |                     pattern=rb"\(gcc \(Ubuntu [^ ]+\) ([0-9.]+)",
619 |                     string=vmlinux_bytes,
620 |                     flags=re.MULTILINE,
621 |                 )
622 |             if m_gcc is None:
623 |                 raise Exception("Could not detect GCC version")
624 |             gcc_version = m_gcc.group(1).decode()
625 |             logging.info("Detected GCC version: %s", gcc_version)
626 |         if binutils_version is None:
627 |             m_binutils = re.search(
628 |                 pattern=rb"GNU ld \(GNU Binutils for Ubuntu\) ([0-9.]+)",
629 |                 string=vmlinux_bytes,
630 |                 flags=re.MULTILINE,
631 |             )
632 |             if m_binutils is None:
633 |                 binutils_version = "2.32"
634 |                 logging.info("Assuming binutils version: %s", binutils_version)
635 |             else:
636 |                 binutils_version = m_binutils.group(1).decode()
637 |                 logging.info("Detected binutils version: %s", binutils_version)
638 |         kallsyms = list(find_kallsyms_in_rodata(vmlinux_bytes))
639 |         logging.info("Found kallsyms: %d", len(kallsyms))
640 |     if host is None:
641 |         raise Exception("Use --like or --host to specify architecture")
642 |     if linux_version is None:
643 |         raise Exception(
644 |             "Use --like or --linux-version to specify a Linux Kernel version"
645 |         )
646 |     if gcc_version is None:
647 |         raise Exception("Use --like or --gcc-version to specify a GCC version")
648 |     toolchain_bin = build_or_reuse_toolchain(
649 |         binutils_git=Path(args.binutils_git).expanduser(),
650 |         binutils_version=binutils_version,
651 |         gcc_git=Path(args.gcc_git).expanduser(),
652 |         gcc_version=gcc_version,
653 |         host=host,
654 |     )
655 |     linux_config = args.linux_config
656 |     if linux_config is not None:
657 |         linux_config = Path(linux_config).expanduser()
658 |     output = args.like
659 |     if output is not None:
660 |         output = Path(f"{output}.like")
661 |     vmlinux = build_or_reuse_vmlinux(
662 |         output=output,
663 |         git=Path(args.linux_git).expanduser(),
664 |         host=host,
665 |         version=linux_version,
666 |         linux_config=linux_config,
667 |         toolchain_bin=toolchain_bin,
668 |         template_vmlinux=template_vmlinux,
669 |     )
670 |     logging.info("Built: %s", vmlinux)
671 |     if args.like is not None and len(kallsyms) > 0:
672 |         output = Path(f"{args.like}.like.json")
673 |         extract_debug_info(
674 |             output=output,
675 |             vmlinux=vmlinux,
676 |             kallsyms=[kallsym for _, kallsym in kallsyms],
677 |         )
678 |         logging.info("Extracted: %s", output)
679 | 
680 | 
681 | if __name__ == "__main__":
682 |     main()
683 | 


--------------------------------------------------------------------------------
/ci:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e -u -x
 3 | cd "$(dirname "$0")"
 4 | ./black
 5 | ./flake8
 6 | mypy build-vmlinux
 7 | python2 -m unittest discover
 8 | python3 -m unittest discover
 9 | jython -m unittest discover
10 | pypy3 -m unittest discover
11 | 


--------------------------------------------------------------------------------
/find-kallsyms:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import logging
 4 | 
 5 | from find_kallsyms import find_kallsyms_in_rodata
 6 | 
 7 | 
 8 | def main():
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("--verbose", action="store_true")
11 |     parser.add_argument("path")
12 |     args = parser.parse_args()
13 |     if args.verbose:
14 |         logging.basicConfig(level=logging.DEBUG)
15 |     with open(args.path, "rb") as fp:
16 |         rodata = fp.read()
17 |     for address, name in find_kallsyms_in_rodata(rodata):
18 |         print("{:016X} {}".format(address, name))
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/find_kallsyms.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import logging
  3 | import struct
  4 | 
  5 | 
  6 | def align_up(p, a):
  7 |     return p & ~(a - 1)
  8 | 
  9 | 
 10 | def align(p, a):
 11 |     return (p + (a - 1)) & ~(a - 1)
 12 | 
 13 | 
 14 | def try_parse_token_index(rodata, endianness, offset):
 15 |     index_fmt = endianness + "H"
 16 |     (index,) = struct.unpack(index_fmt, rodata[offset : offset + 2])
 17 |     assert index == 0, "The first token index must be 0"
 18 |     indices = [index]
 19 |     for _ in range(255):
 20 |         offset += 2
 21 |         (index,) = struct.unpack(index_fmt, rodata[offset : offset + 2])
 22 |         if index <= indices[-1]:
 23 |             return None  # Token indices must be monotonically increasing.
 24 |         indices.append(index)
 25 |     return indices
 26 | 
 27 | 
 28 | def find_token_indices(rodata, endianness):
 29 |     token_index_offset = 0
 30 |     while True:
 31 |         # kallsyms_token_index is an array of monotonically increasing 256
 32 |         # shorts, the first of which is 0. It is located right after
 33 |         # kallsyms_token_table, which is a sequence of null-terminated strings.
 34 |         # Therefore, look for 1+2 consecutive zeroes.
 35 |         token_index_offset = rodata.find(b"\x00\x00\x00", token_index_offset) + 1
 36 |         if token_index_offset == 0 or token_index_offset + 512 > len(rodata):
 37 |             break
 38 |         token_index = try_parse_token_index(rodata, endianness, token_index_offset)
 39 |         if token_index is not None:
 40 |             yield token_index_offset, token_index
 41 | 
 42 | 
 43 | def try_parse_token_table(rodata, token_index, start_offset, end_offset):
 44 |     tokens = []
 45 |     for i in range(256):
 46 |         token_start_offset = start_offset + token_index[i]
 47 |         if i == 255:
 48 |             # Last token ends at the end of the table.
 49 |             token_end_offset = end_offset
 50 |         else:
 51 |             # Other tokens end at the start of the next respective token.
 52 |             token_end_offset = start_offset + token_index[i + 1]
 53 |         token = rodata[token_start_offset:token_end_offset]
 54 |         if b"\x00" in token[:-1]:
 55 |             return None  # Tokens must be printable.
 56 |         if token[-1:] != b"\x00":
 57 |             return None  # Tokens must be null-terminated.
 58 |         if token[:-1] in tokens:
 59 |             return None  # Tokens must not repeat
 60 |         tokens.append(token[:-1])
 61 |     return tokens
 62 | 
 63 | 
 64 | def find_token_tables(rodata, token_index, token_index_offset):
 65 |     # kallsyms_token_table is a sequence of 256 null-terminated strings.
 66 |     # There may be some padding at the end; skip it.
 67 |     token_table_end_offset = token_index_offset
 68 |     while rodata[token_table_end_offset - 2 : token_table_end_offset - 1] == b"\x00":
 69 |         token_table_end_offset -= 1
 70 |     # Find the last token by looking for a trailing \0.
 71 |     last_token_offset = rodata.rfind(b"\x00", 0, token_table_end_offset - 1) + 1
 72 |     if last_token_offset == 0:
 73 |         return
 74 |     # The last kallsyms_token_index element corresponds to the last token.
 75 |     # Use that information to locate kallsyms_token_table.
 76 |     token_table_offset = last_token_offset - token_index[-1]
 77 |     if token_table_offset < 0:
 78 |         return
 79 |     token_table = try_parse_token_table(
 80 |         rodata, token_index, token_table_offset, token_table_end_offset
 81 |     )
 82 |     if token_table is not None:
 83 |         yield token_table_offset, token_table
 84 | 
 85 | 
 86 | def find_markers(rodata, marker_fmt, marker_size, marker_offset):
 87 |     first = True
 88 |     markers = []
 89 |     while True:
 90 |         # kallsyms_markers is an array of monotonically increasing offsets,
 91 |         # which starts with 0. It is aligned on an 8-byte boundary, so if the
 92 |         # element size is 4 bytes and their number is odd, it is zero-padded at
 93 |         # the end.
 94 |         (marker,) = struct.unpack(
 95 |             marker_fmt, rodata[marker_offset : marker_offset + marker_size]
 96 |         )
 97 |         if first:
 98 |             first = False
 99 |             if marker == 0 and marker_size == 4:
100 |                 # Skip padding.
101 |                 marker_offset -= marker_size
102 |                 continue
103 |         elif len(markers) > 0 and marker >= markers[-1]:
104 |             # The array is not monotonically increasing.
105 |             return
106 |         markers.append(marker)
107 |         if marker == 0:
108 |             # We found the first element.
109 |             break
110 |         marker_offset -= marker_size
111 |     if marker_size == 4 and len(markers) == 2:
112 |         # Marker size must be 8 bytes, and we must be taking the upper
113 |         # part, which is always 0, for the first marker.
114 |         return
115 |     markers.reverse()
116 |     yield marker_offset, markers
117 | 
118 | 
119 | # Since v6.1 (commit b8a94bfb3395).
120 | KSYM_NAME_LEN = 512
121 | 
122 | 
123 | def is_name_ok(rodata, token_lengths, offset):
124 |     n_tokens = ord(rodata[offset : offset + 1])
125 |     if n_tokens == 0 or n_tokens >= KSYM_NAME_LEN:
126 |         # Tokens are at least one byte long. Names must not be empty, and they
127 |         # must be at most KSYM_NAME_LEN-1 characters long.
128 |         return False
129 |     offset += 1
130 |     name_length = 0
131 |     for _ in range(n_tokens):
132 |         # The caller is expected to have verified that the name entry does not
133 |         # span past the end of kallsyms_names, so just fetch the next token.
134 |         name_length += token_lengths[ord(rodata[offset : offset + 1])]
135 |         if name_length >= KSYM_NAME_LEN:
136 |             # Name is longer than KSYM_NAME_LEN-1 characters.
137 |             return False
138 |         offset += 1
139 |     return True
140 | 
141 | 
142 | def extract_name(rodata, token_table, offset):
143 |     # Name must have already been checked, just expand tokens.
144 |     n_tokens = ord(rodata[offset : offset + 1])
145 |     name = b""
146 |     for _ in range(n_tokens):
147 |         offset += 1
148 |         name += token_table[ord(rodata[offset : offset + 1])]
149 |     return name
150 | 
151 | 
152 | def find_num_syms(rodata, endianness, token_table, markers_offset):
153 |     # kallsyms_names is a sequence of length-prefixed entries ending with
154 |     # padding to an 8-byte boundary, followed by kallsyms_markers.
155 |     # Unfortunately, some guesswork is required to locate the start of
156 |     # kallsyms_names given that we know the start of kallsyms_markers.
157 |     num_syms_fmt = endianness + "I"
158 |     token_lengths = [len(token) for token in token_table]
159 |     # Indexed by (markers_offset - offset). Each element is a number of name
160 |     # entries that follow the respective offset, or None if that offset is not
161 |     # a start of a valid name entry.
162 |     name_counts = [0]
163 |     # Whether offset still points to one of the trailing zeroes.
164 |     trailing_zeroes = True
165 |     offset = markers_offset
166 |     while offset >= 9:
167 |         offset -= 1
168 |         current_byte = ord(rodata[offset : offset + 1])
169 |         if current_byte != 0:
170 |             # Trailing zeroes have ended.
171 |             trailing_zeroes = False
172 |         next_name_offset = offset + current_byte + 1
173 |         if next_name_offset > markers_offset:
174 |             # The current name entry spans past the end of kallsyms_names. This
175 |             # is allowed if we are still looking at trailing zeroes.
176 |             name_counts.append(0 if trailing_zeroes else None)
177 |             continue
178 |         next_name_count = name_counts[markers_offset - next_name_offset]
179 |         if next_name_count is None:
180 |             # The next name entry is invalid, which means the current name
181 |             # entry cannot be valid.
182 |             name_counts.append(None)
183 |             continue
184 |         if is_name_ok(rodata, token_lengths, offset):
185 |             # The current name entry is valid. Check whether it is preceded by
186 |             # kallsyms_num_syms value, which is consistent with the number of
187 |             # name entries we've seen so far.
188 |             name_counts.append(next_name_count + 1)
189 |             num_syms_offset = None
190 |             # How kallsyms_num_syms is aligned depends on a particular kernel,
191 |             # so try different offsets.
192 |             for i in (-4, -8, -12, -16):
193 |                 (num_syms,) = struct.unpack(
194 |                     num_syms_fmt, rodata[offset + i : offset + i + 4]
195 |                 )
196 |                 if name_counts[-1] == num_syms:
197 |                     num_syms_offset = offset + i
198 |                     break
199 |                 if num_syms != 0:
200 |                     break
201 |             if num_syms_offset is not None:
202 |                 break
203 |         else:
204 |             # The current name entry is not valid. This is allowed if we are
205 |             # still looking at trailing zeroes.
206 |             name_counts.append(0 if trailing_zeroes else None)
207 |     else:
208 |         return
209 |     # We've found kallsyms_names, now parse it.
210 |     names = []
211 |     for _ in range(name_counts[-1]):
212 |         names.append(extract_name(rodata, token_table, offset).decode())
213 |         offset += ord(rodata[offset : offset + 1]) + 1
214 |     yield num_syms_offset, names
215 | 
216 | 
217 | Word = namedtuple("Word", ("size", "fmt", "ctype"))
218 | WORD32 = Word(4, "I", "u32")
219 | WORD64 = Word(8, "Q", "u64")
220 | 
221 | 
222 | def find_addresses_no_kallsyms_base_relative(
223 |     rodata, endianness, addresses_offset, num_syms, word
224 | ):
225 |     if addresses_offset + num_syms * word.size > len(rodata):
226 |         return
227 |     address_fmt = endianness + word.fmt
228 |     if word.size == 8 and addresses_offset % 8 != 0:
229 |         addresses_offset -= 4
230 |     offset = addresses_offset
231 |     addresses = []
232 |     for _ in range(num_syms):
233 |         (address,) = struct.unpack(address_fmt, rodata[offset : offset + word.size])
234 |         if len(addresses) > 0 and address < addresses[-1]:
235 |             # The resulting addresses are not sorted.
236 |             return
237 |         addresses.append(address)
238 |         offset += word.size
239 |     logging.debug(
240 |         "0x%08X: %s kallsyms_addresses[]",
241 |         addresses_offset,
242 |         word.ctype,
243 |     )
244 |     yield addresses_offset, offset, addresses
245 | 
246 | 
247 | def find_addresses_kallsyms_base_relative(
248 |     rodata, endianness, addresses_offset, num_syms, word
249 | ):
250 |     if addresses_offset < 0:
251 |         return
252 |     addresses_end = addresses_offset + num_syms * 4
253 |     relative_base_offset = align(addresses_end, word.size)
254 |     relative_base_end = relative_base_offset + word.size
255 |     if relative_base_end > len(rodata):
256 |         return
257 |     raw_addresses = struct.unpack(
258 |         endianness + "i" * num_syms, rodata[addresses_offset:addresses_end]
259 |     )
260 |     (kallsyms_relative_base,) = struct.unpack(
261 |         endianness + word.fmt,
262 |         rodata[relative_base_offset:relative_base_end],
263 |     )
264 | 
265 |     def log_ok():
266 |         logging.debug(
267 |             "0x%08X: %s kallsyms_relative_base=0x%016X",
268 |             relative_base_offset,
269 |             word.ctype,
270 |             kallsyms_relative_base,
271 |         )
272 |         logging.debug("0x%08X: u32 kallsyms_offsets[]", addresses_offset)
273 | 
274 |     # Try !KALLSYMS_ABSOLUTE_PERCPU first.
275 |     # A lot of small nonnegative numbers will match KALLSYMS_ABSOLUTE_PERCPU
276 |     # too, but it's more likely to be !KALLSYMS_ABSOLUTE_PERCPU.
277 |     addresses = []
278 |     for raw in raw_addresses:
279 |         address = kallsyms_relative_base + (raw & 0xFFFFFFFF)
280 |         if len(addresses) > 0 and address < addresses[-1]:
281 |             # The resulting addresses are not sorted.
282 |             break
283 |         addresses.append(address)
284 |     else:
285 |         log_ok()
286 |         yield addresses_offset, addresses_end, addresses
287 | 
288 |     # Try KALLSYMS_ABSOLUTE_PERCPU.
289 |     addresses = []
290 |     for raw in raw_addresses:
291 |         if raw >= 0:
292 |             address = raw
293 |         else:
294 |             address = kallsyms_relative_base - 1 - raw
295 |         if len(addresses) > 0 and address < addresses[-1]:
296 |             # The resulting addresses are not sorted.
297 |             break
298 |         addresses.append(address)
299 |     else:
300 |         log_ok()
301 |         yield addresses_offset, relative_base_end, addresses
302 | 
303 | 
304 | def find_kallsyms_in_rodata(rodata):
305 |     for addresses, names in (
306 |         (addresses, names)
307 |         for endianness in ("<", ">")
308 |         for _ in (logging.debug("Endianness: %s", endianness),)
309 |         for token_index_offset, token_index in find_token_indices(rodata, endianness)
310 |         for _ in (
311 |             logging.debug(
312 |                 "0x%08X: kallsyms_token_index=%s", token_index_offset, token_index
313 |             ),
314 |         )
315 |         for token_table_offset, token_table in find_token_tables(
316 |             rodata, token_index, token_index_offset
317 |         )
318 |         for _ in (
319 |             logging.debug(
320 |                 "0x%08X: kallsyms_token_table=%s", token_table_offset, token_table
321 |             ),
322 |         )
323 |         # In 6.2 (commits 60443c88f3a8 and 19bd8981dc2e) kallsyms_seqs_of_names
324 |         # was added between kallsyms_markers and kallsyms_token_table.
325 |         for markers_end_offset in range(token_table_offset, -4, -4)
326 |         # In 4.20 the size of markers was reduced to 4 bytes.
327 |         for marker_fmt, marker_size in (
328 |             (endianness + "I", 4),
329 |             (endianness + "Q", 8),
330 |         )
331 |         for markers_offset, markers in find_markers(
332 |             rodata, marker_fmt, marker_size, markers_end_offset - marker_size
333 |         )
334 |         for _ in (
335 |             logging.debug("0x%08X: kallsyms_markers=%s", markers_offset, markers),
336 |         )
337 |         for num_syms_offset, names in find_num_syms(
338 |             rodata, endianness, token_table, markers_offset
339 |         )
340 |         for _ in (
341 |             logging.debug("0x%08X: kallsyms_num_syms=%s", num_syms_offset, len(names)),
342 |         )
343 |         for word in (WORD64, WORD32)
344 |         for _ in (logging.debug("WORD%d", word.size),)
345 |         # KALLSYMS_BASE_RELATIVE means that kallsyms_offsets are followed by
346 |         # kallsyms_relative_base. This was introduced in 4.6 by commit
347 |         # 2213e9a66bb8.
348 |         for base_relative in (False, True)
349 |         for _ in (
350 |             logging.debug("KALLSYMS_BASE_RELATIVE=%s", "y" if base_relative else "n"),
351 |         )
352 |         # Since kernel 6.4 (commit 404bad70fcf7) kallsyms_addresses come after
353 |         # kallsyms_token_index.
354 |         for addresses_first in (True, False)
355 |         for _ in (logging.debug("addresses_first=%s", addresses_first),)
356 |         for addresses_offset, addresses_end, addresses in (
357 |             find_addresses_kallsyms_base_relative(
358 |                 rodata,
359 |                 endianness,
360 |                 (
361 |                     align_up(
362 |                         align_up(num_syms_offset, word.size)
363 |                         - word.size
364 |                         - len(names) * 4,
365 |                         word.size,
366 |                     )
367 |                     if addresses_first
368 |                     else align(token_index_offset + 512, word.size)
369 |                 ),
370 |                 len(names),
371 |                 word,
372 |             )
373 |             if base_relative
374 |             else find_addresses_no_kallsyms_base_relative(
375 |                 rodata,
376 |                 endianness,
377 |                 (
378 |                     num_syms_offset - len(names) * word.size
379 |                     if addresses_first
380 |                     else token_index_offset + 512
381 |                 ),
382 |                 len(names),
383 |                 word,
384 |             )
385 |         )
386 |         for _ in (
387 |             logging.debug(
388 |                 "0x%08X: kallsyms[0x%08X]",
389 |                 addresses_offset if addresses_first else num_syms_offset,
390 |                 (
391 |                     token_index_offset + 512 - addresses_offset
392 |                     if addresses_first
393 |                     else (
394 |                         align(addresses_end, word.size)
395 |                         + len(names) * 3
396 |                         - num_syms_offset
397 |                     )
398 |                 ),
399 |             ),
400 |         )
401 |     ):
402 |         return zip(addresses, names)
403 |     return []
404 | 


--------------------------------------------------------------------------------
/flake8:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e -u -x
3 | cd "$(dirname "$0")"
4 | ./ls-py0 | xargs -0 flake8 --extend-ignore=E203 --max-line-length=88 "$@"
5 | 


--------------------------------------------------------------------------------
/ghidra-kallsyms.py:
--------------------------------------------------------------------------------
  1 | # Add or rename symbols based on kallsyms
  2 | import jarray
  3 | import json
  4 | 
  5 | from ghidra.app.cmd.function import CreateFunctionCmd
  6 | from ghidra.program.model.data import VoidDataType, Undefined1DataType
  7 | from ghidra.program.model.listing import Function, ParameterImpl, ReturnParameterImpl
  8 | from ghidra.program.model.symbol import SourceType, SymbolType
  9 | 
 10 | from find_kallsyms import find_kallsyms_in_rodata
 11 | 
 12 | 
 13 | def load_like_json(program, symbols, functions, types):
 14 |     like_json_path = program.getExecutablePath() + ".like.json"
 15 |     try:
 16 |         fp = open(like_json_path)
 17 |     except FileNotFoundError:
 18 |         return
 19 |     try:
 20 |         like_json = json.load(fp)
 21 |     finally:
 22 |         fp.close()
 23 |     for return_type, name, parameters, has_varargs in like_json["subprograms"].values():
 24 |         wtf = False
 25 |         existing_label = None
 26 |         existing_function = None
 27 |         for existing_symbol in symbols.getGlobalSymbols(name):
 28 |             symbol_type = existing_symbol.getSymbolType()
 29 |             if symbol_type == SymbolType.LABEL:
 30 |                 if existing_label is not None:
 31 |                     wtf = True
 32 |                     break
 33 |                 existing_label = existing_symbol
 34 |             elif symbol_type == SymbolType.FUNCTION:
 35 |                 if existing_function is not None:
 36 |                     wtf = True
 37 |                     break
 38 |                 existing_function = existing_symbol
 39 |         if wtf:
 40 |             continue
 41 |         if existing_function is None:
 42 |             if existing_label is None:
 43 |                 continue
 44 |             try:
 45 |                 function = functions.createFunction(
 46 |                     name,
 47 |                     existing_label.getAddress(),
 48 |                     CreateFunctionCmd.getFunctionBody(
 49 |                         program, existing_label.getAddress()
 50 |                     ),
 51 |                     SourceType.ANALYSIS,
 52 |                 )
 53 |             except:  # noqa: E722
 54 |                 # E.g. OverlappingFunctionException.
 55 |                 continue
 56 |         else:
 57 |             function = functions.getFunction(existing_function.getID())
 58 |         if return_type is None:
 59 |             return_var = ReturnParameterImpl(VoidDataType.dataType, program)
 60 |         else:
 61 |             return_var = function.getReturn()
 62 |         new_params = []
 63 |         for i, (_, param_name) in enumerate(parameters):
 64 |             param_type = Undefined1DataType.dataType
 65 |             existing_param = function.getParameter(i)
 66 |             if existing_param is not None:
 67 |                 param_type = existing_param.getDataType()
 68 |             new_params.append(ParameterImpl(param_name, param_type, program))
 69 |         function.updateFunction(
 70 |             function.getCallingConventionName(),
 71 |             return_var,
 72 |             new_params,
 73 |             Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
 74 |             True,
 75 |             SourceType.ANALYSIS,
 76 |         )
 77 |         function.setVarArgs(has_varargs)
 78 | 
 79 | 
 80 | program = currentProgram  # noqa: F821
 81 | memory = program.getMemory()
 82 | rodata_block = memory.getBlock(".rodata")
 83 | if rodata_block is None:
 84 |     rodata_block = memory.getBlock(".text")
 85 | rodata = jarray.zeros(rodata_block.getSize(), "b")
 86 | rodata_block.getBytes(rodata_block.getStart(), rodata)
 87 | rodata = b"".join([chr(x & 0xFF) for x in rodata])  # it's py2
 88 | ram = program.getAddressFactory().getDefaultAddressSpace()
 89 | symbols = program.getSymbolTable()
 90 | for address, name in find_kallsyms_in_rodata(rodata):
 91 |     if name[0] != "A":
 92 |         address = ram.getAddress(address)
 93 |         existing = list(symbols.getSymbols(address))
 94 |         if len(existing) == 0:
 95 |             symbols.createLabel(address, name[1:], SourceType.ANALYSIS)
 96 |         elif len(existing) == 1:
 97 |             existing[0].setName(name[1:], SourceType.ANALYSIS)
 98 |         else:
 99 |             pass
100 | load_like_json(
101 |     program=program,
102 |     symbols=symbols,
103 |     functions=program.getFunctionManager(),
104 |     types=program.getDataTypeManager(),
105 | )
106 | 


--------------------------------------------------------------------------------
/ida-kallsyms-import.py:
--------------------------------------------------------------------------------
1 | from ida_kernwin import ask_file
2 | from idaapi import require
3 | 
4 | require("ida_utils")
5 | path = ask_file(False, "*.kallsyms", "find-kallsyms output")
6 | if path is not None:
7 |     with open(path) as fp:
8 |         ida_utils.apply_kallsyms(ida_utils.parse_kallsyms(fp))  # noqa: F821
9 | 


--------------------------------------------------------------------------------
/ida-kallsyms.py:
--------------------------------------------------------------------------------
 1 | from idaapi import get_bytes, require
 2 | from ida_segment import get_segm_by_name
 3 | 
 4 | require("find_kallsyms")
 5 | require("ida_utils")
 6 | rodata_segm = get_segm_by_name(".rodata")
 7 | if rodata_segm is None:
 8 |     rodata_segm = get_segm_by_name(".text")
 9 | rodata_size = rodata_segm.end_ea - rodata_segm.start_ea + 1
10 | rodata = get_bytes(rodata_segm.start_ea, rodata_size)
11 | kallsyms = find_kallsyms.find_kallsyms_in_rodata(rodata)  # noqa: F821
12 | ida_utils.apply_kallsyms(kallsyms)  # noqa: F821
13 | 


--------------------------------------------------------------------------------
/ida-like-import.py:
--------------------------------------------------------------------------------
1 | from ida_kernwin import ask_file
2 | from idaapi import require
3 | 
4 | require("ida_utils")
5 | path = ask_file(False, "*.like.json", "build-vmlinux output")
6 | if path is not None:
7 |     ida_utils.apply_like(path)  # noqa: F821
8 | 


--------------------------------------------------------------------------------
/ida_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | from idc import add_struc, add_struc_member, apply_type, del_struc_member, parse_decl
  4 | from ida_bytes import del_items, FF_BYTE, FF_DATA
  5 | from ida_name import get_name_ea, is_uname, set_name
  6 | from ida_struct import get_struc_id
  7 | from idaapi import BADADDR, get_inf_structure
  8 | from idautils import StructMembers
  9 | 
 10 | 
 11 | def parse_kallsyms(fp):
 12 |     for line in fp:
 13 |         address, name = line.strip().split()
 14 |         yield int(address, 16), name
 15 | 
 16 | 
 17 | def _is_uname(name):
 18 |     return is_uname(name) and name not in (
 19 |         "class",
 20 |         "new",
 21 |     )
 22 | 
 23 | 
 24 | def apply_kallsyms(kallsyms):
 25 |     for address, name in kallsyms:
 26 |         if name[0] != "A":
 27 |             new_name = str(name[1:])
 28 |             if not _is_uname(new_name):
 29 |                 new_name = "_" + new_name
 30 |             if _is_uname(new_name):
 31 |                 if not set_name(address, new_name):
 32 |                     del_items(address)
 33 |                     set_name(address, new_name)
 34 | 
 35 | 
 36 | DEFAULT_TYPE = "char"
 37 | DEFAULT_TYPE_SIZE = 1
 38 | DEFAULT_TYPE_FLAGS = FF_DATA | FF_BYTE
 39 | DEFAULT_TYPE_ID = -1
 40 | INFO = get_inf_structure()
 41 | if INFO.is_64bit():
 42 |     PTR_SIZE = 8
 43 | elif INFO.is_32bit():
 44 |     PTR_SIZE = 4
 45 | else:
 46 |     PTR_SIZE = 2
 47 | 
 48 | 
 49 | def get_type_size(like, die_offset):
 50 |     type = like.get(str(die_offset))
 51 |     if type is None:
 52 |         return DEFAULT_TYPE_SIZE
 53 |     kind = type[0]
 54 |     if kind in ("struct", "union"):
 55 |         return type[2]
 56 |     if kind == "typedef":
 57 |         return get_type_size(like, type[2])
 58 |     if kind == "pointer":
 59 |         return PTR_SIZE
 60 |     if kind == "base":
 61 |         return type[2]
 62 |     if kind in ("const", "volatile"):
 63 |         return get_type_size(like, type[1])
 64 |     if kind == "array":
 65 |         return get_type_size(like, type[1]) * type[2]
 66 |     return DEFAULT_TYPE_SIZE
 67 | 
 68 | 
 69 | def add_end_member(struct_id, struct_name, struct_size, log_fp):
 70 |     """Forces struct size by creating a byte field at the end"""
 71 |     end_member_name = "field_{:X}".format(struct_size - 1)
 72 |     log_fp.write("{}.{}: ...\n".format(struct_name, end_member_name))
 73 |     log_fp.flush()
 74 |     ret = add_struc_member(
 75 |         struct_id,
 76 |         end_member_name,
 77 |         struct_size - 1,
 78 |         DEFAULT_TYPE_FLAGS,
 79 |         DEFAULT_TYPE_ID,
 80 |         DEFAULT_TYPE_SIZE,
 81 |     )
 82 |     log_fp.write("... ret={}\n".format(ret))
 83 |     log_fp.flush()
 84 |     return ret
 85 | 
 86 | 
 87 | def resolve_type(like, die_offset, log_fp, alias=None):
 88 |     if die_offset is None:
 89 |         return "void"
 90 |     type = like.get(str(die_offset))
 91 |     if type is None:
 92 |         return DEFAULT_TYPE
 93 |     kind = type[0]
 94 |     if kind in ("struct", "union"):
 95 |         if type[1] is None:
 96 |             if alias is None:
 97 |                 struct_name = "{}_{}".format(kind, hex(die_offset))
 98 |             else:
 99 |                 struct_name = alias
100 |         else:
101 |             struct_name = type[1]
102 |         if not _is_uname(str(struct_name)) or (
103 |             get_struc_id(str(struct_name)) == BADADDR
104 |             and get_name_ea(BADADDR, str(struct_name)) != BADADDR
105 |         ):
106 |             struct_name = "_" + struct_name
107 |         struct_id = get_struc_id(str(struct_name))
108 |         if struct_id != BADADDR:
109 |             if len(type) == 4:
110 |                 type.append(struct_id)
111 |             return struct_name
112 |         log_fp.write("{}: ...\n".format(struct_name))
113 |         log_fp.flush()
114 |         struct_id = add_struc(BADADDR, str(struct_name), kind == "union")
115 |         log_fp.write("... id={}\n".format(hex(struct_id)))
116 |         log_fp.flush()
117 |         if struct_id == BADADDR:
118 |             return DEFAULT_TYPE
119 |         type.append(struct_id)
120 |         if kind == "struct" and type[2] != 0:
121 |             ret = add_end_member(struct_id, struct_name, type[2], log_fp)
122 |             have_end_member = ret == 0
123 |         else:
124 |             have_end_member = False
125 |         for member_type_die_offset, member_name, member_offset in type[3]:
126 |             if member_name is None:
127 |                 if kind == "struct":
128 |                     field_n = member_offset
129 |                 else:
130 |                     field_n = sum(1 for _ in StructMembers(struct_id))
131 |                 member_name = "field_{:X}".format(field_n)
132 |             elif not _is_uname(str(member_name)):
133 |                 member_name = "_" + member_name
134 |             member_type_str = str(resolve_type(like, member_type_die_offset, log_fp))
135 |             member_size = get_type_size(like, member_type_die_offset)
136 |             if have_end_member and member_offset + member_size == type[2]:
137 |                 del_struc_member(struct_id, type[2] - 1)
138 |                 have_end_member = False
139 |             log_fp.write(
140 |                 "{} {}.{}: ...\n".format(member_type_str, struct_name, member_name)
141 |             )
142 |             log_fp.flush()
143 |             ret = add_struc_member(
144 |                 struct_id,
145 |                 str(member_name),
146 |                 member_offset,
147 |                 DEFAULT_TYPE_FLAGS,
148 |                 DEFAULT_TYPE_ID,
149 |                 DEFAULT_TYPE_SIZE,
150 |             )
151 |             log_fp.write("... ret={}\n".format(ret))
152 |             log_fp.flush()
153 |             if ret == 0:
154 |                 member_id = get_name_ea(
155 |                     BADADDR, "{}.{}".format(struct_name, member_name)
156 |                 )
157 |                 apply_type(member_id, parse_decl(member_type_str, 0))
158 |         return struct_name
159 |     if kind == "typedef":
160 |         return resolve_type(like, type[2], log_fp, type[1])
161 |     if kind == "pointer":
162 |         return resolve_type(like, type[1], log_fp) + "*"
163 |     if kind == "base":
164 |         if type[1]:
165 |             return "__int" + str(type[2] * 8)
166 |         else:
167 |             return "unsigned __int" + str(type[2] * 8)
168 |     if kind in ("const", "volatile"):
169 |         return resolve_type(like, type[1], log_fp)
170 |     if kind == "array":
171 |         return "{}[{}]".format(resolve_type(like, type[1], log_fp), type[2])
172 |     return DEFAULT_TYPE
173 | 
174 | 
175 | def apply_like(path):
176 |     with open("{}.log".format(path), "w") as log_fp:
177 |         with open(path) as fp:
178 |             like = json.load(fp)
179 |         for item in like.values():
180 |             if item[0] != "subprogram":
181 |                 continue
182 |             _, return_type, name, parameters, has_varargs = item
183 |             address = get_name_ea(BADADDR, str(name))
184 |             if address == BADADDR:
185 |                 log_fp.write("Subprogram not found: {}\n".format(name))
186 |                 log_fp.flush()
187 |                 continue
188 |             decl = resolve_type(like, return_type, log_fp) + " " + name + "("
189 |             first = True
190 |             for parameter_type, parameter_name in parameters:
191 |                 if first:
192 |                     first = False
193 |                 else:
194 |                     decl += ", "
195 |                 if not _is_uname(str(parameter_name)):
196 |                     parameter_name = "_" + parameter_name
197 |                 decl += resolve_type(like, parameter_type, log_fp)
198 |                 if _is_uname(str(parameter_name)):
199 |                     decl += " " + parameter_name
200 |             if has_varargs:
201 |                 if not first:
202 |                     decl += ", "
203 |                 decl += "..."
204 |             decl += ")"
205 |             log_fp.write("{}: ...\n".format(decl))
206 |             log_fp.flush()
207 |             ret = apply_type(address, parse_decl(str(decl), 0))
208 |             log_fp.write("... ret={}\n".format(ret))
209 |             log_fp.flush()
210 | 


--------------------------------------------------------------------------------
/ls-py0:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e -u -x
3 | cd "$(dirname "$0")"
4 | {
5 |     git ls-files -z '*.py'
6 |     git grep --name-only -z '^#!/usr/bin/env python3'
7 |     git grep --name-only -z '^#!/usr/bin/env pypy3'
8 | } | sort -u -z
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | kconfiglib
2 | nose
3 | pyelftools
4 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/__init__.py


--------------------------------------------------------------------------------
/test/kallsyms-3.10.0-862.11.6.el7.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-3.10.0-862.11.6.el7.x86_64.gz


--------------------------------------------------------------------------------
/test/kallsyms-4.16.3-301.fc28.s390x.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.16.3-301.fc28.s390x.gz


--------------------------------------------------------------------------------
/test/kallsyms-4.4.0-1085-raspi2.arm.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.0-1085-raspi2.arm.gz


--------------------------------------------------------------------------------
/test/kallsyms-4.4.223.defcon2020.ooofs.i686.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.223.defcon2020.ooofs.i686.gz


--------------------------------------------------------------------------------
/test/kallsyms-4.4.223.defconfig.i686.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-4.4.223.defconfig.i686.gz


--------------------------------------------------------------------------------
/test/kallsyms-5.1.0.tasteless2019.tee.aarch64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.1.0.tasteless2019.tee.aarch64.gz


--------------------------------------------------------------------------------
/test/kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz


--------------------------------------------------------------------------------
/test/kallsyms-5.3.0.hitcon2019.poe.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-5.3.0.hitcon2019.poe.x86_64.gz


--------------------------------------------------------------------------------
/test/kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz


--------------------------------------------------------------------------------
/test/kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz


--------------------------------------------------------------------------------
/test/kallsyms-6.8.0-48-generic.noble.x86_64.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mephi42/ida-kallsyms/79b9e141ba90bf7f1381e2adc669445afecc0108/test/kallsyms-6.8.0-48-generic.noble.x86_64.gz


--------------------------------------------------------------------------------
/test/test_find_kallsyms.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import gzip
  3 | import os
  4 | import unittest
  5 | 
  6 | from find_kallsyms import find_kallsyms_in_rodata
  7 | 
  8 | 
  9 | class TestFindKallsyms(unittest.TestCase):
 10 |     def __init__(self, *args, **kwargs):
 11 |         unittest.TestCase.__init__(self, *args, **kwargs)
 12 |         self.basedir = os.path.dirname(__file__)
 13 | 
 14 |     def _read(self, name):
 15 |         with gzip.GzipFile(os.path.join(self.basedir, name)) as fp:
 16 |             return fp.read()
 17 | 
 18 |     def test_kallsyms_4_16_3_s390x(self):
 19 |         addresses_and_names = list(
 20 |             find_kallsyms_in_rodata(self._read("kallsyms-4.16.3-301.fc28.s390x.gz"))
 21 |         )
 22 |         self.assertEqual(62766, len(addresses_and_names))
 23 |         self.assertEqual((0x200, "T_text"), addresses_and_names[0])
 24 |         self.assertEqual((0xD32000, "B__bss_stop"), addresses_and_names[-1])
 25 | 
 26 |     def test_kallsyms_3_10_0_x86_64(self):
 27 |         addresses_and_names = list(
 28 |             find_kallsyms_in_rodata(
 29 |                 self._read("kallsyms-3.10.0-862.11.6.el7.x86_64.gz")
 30 |             )
 31 |         )
 32 |         self.assertEqual(82619, len(addresses_and_names))
 33 |         self.assertEqual((0, "Airq_stack_union"), addresses_and_names[0])
 34 |         (dump_stack_address,) = [
 35 |             address for address, name in addresses_and_names if name == "Tdump_stack"
 36 |         ]
 37 |         self.assertEqual(0xFFFFFFFF817135BB, dump_stack_address)
 38 |         self.assertEqual(
 39 |             (0xFFFFFFFF82657000, "B__brk_limit"),
 40 |             addresses_and_names[-1],
 41 |         )
 42 | 
 43 |     def test_kallsyms_5_1_9_x86_64(self):
 44 |         addresses_and_names = list(
 45 |             find_kallsyms_in_rodata(
 46 |                 self._read("kallsyms-5.1.9.balsn2019.krazynote.x86_64.gz")
 47 |             )
 48 |         )
 49 |         self.assertEqual(74045, len(addresses_and_names))
 50 |         self.assertEqual((0, "Airq_stack_union"), addresses_and_names[0])
 51 |         self.assertEqual((0xFFFFFFFF82A2C000, "B__brk_limit"), addresses_and_names[-1])
 52 | 
 53 |     def test_kallsyms_5_1_0_aarch64(self):
 54 |         addresses_and_names = list(
 55 |             find_kallsyms_in_rodata(
 56 |                 self._read("kallsyms-5.1.0.tasteless2019.tee.aarch64.gz")
 57 |             )
 58 |         )
 59 |         self.assertEqual(117079, len(addresses_and_names))
 60 |         self.assertEqual((0xFFFF000010080000, "t_head"), addresses_and_names[0])
 61 |         self.assertEqual((0xFFFF00001144E000, "B_end"), addresses_and_names[-1])
 62 | 
 63 |     def test_kallsyms_5_3_0_x86_64(self):
 64 |         addresses_and_names = list(
 65 |             find_kallsyms_in_rodata(
 66 |                 self._read("kallsyms-5.3.0.hitcon2019.poe.x86_64.gz")
 67 |             )
 68 |         )
 69 |         self.assertEqual(88612, len(addresses_and_names))
 70 |         self.assertEqual((0, "Afixed_percpu_data"), addresses_and_names[0])
 71 |         self.assertEqual(
 72 |             (0xFFFFFFFF83200000, "T__init_scratch_end"),
 73 |             addresses_and_names[-1],
 74 |         )
 75 | 
 76 |     def test_kallsyms_4_4_0_arm(self):
 77 |         addresses_and_names = list(
 78 |             find_kallsyms_in_rodata(self._read("kallsyms-4.4.0-1085-raspi2.arm.gz"))
 79 |         )
 80 |         self.assertEqual(78413, len(addresses_and_names))
 81 |         self.assertEqual((0x80008000, "Tstext"), addresses_and_names[0])
 82 |         self.assertEqual((0x80F56454, "B__bss_stop"), addresses_and_names[-1])
 83 | 
 84 |     def test_kallsyms_4_4_223_i686(self):
 85 |         addresses_and_names = list(
 86 |             find_kallsyms_in_rodata(
 87 |                 self._read("kallsyms-4.4.223.defcon2020.ooofs.i686.gz")
 88 |             )
 89 |         )
 90 |         self.assertEqual(80397, len(addresses_and_names))
 91 |         self.assertEqual((0xC1000000, "Tstartup_32"), addresses_and_names[0])
 92 |         self.assertEqual((0xC1E9B000, "B__brk_limit"), addresses_and_names[-1])
 93 | 
 94 |     def test_kallsyms_4_4_223_i686_v2(self):
 95 |         addresses_and_names = list(
 96 |             find_kallsyms_in_rodata(self._read("kallsyms-4.4.223.defconfig.i686.gz"))
 97 |         )
 98 |         self.assertEqual(39874, len(addresses_and_names))
 99 |         self.assertEqual(
100 |             (0xC1000338, "tsanitize_boot_params.constprop.0"),
101 |             addresses_and_names[0],
102 |         )
103 |         self.assertEqual((0xC1BE29BD, "T_einittext"), addresses_and_names[-1])
104 | 
105 |     def test_kallsyms_6_5_0_x86_64(self):
106 |         addresses_and_names = list(
107 |             find_kallsyms_in_rodata(
108 |                 self._read("kallsyms-6.5.0-rc4.ctfzone2023.pwnfinity.x86_64.gz")
109 |             )
110 |         )
111 |         self.assertEqual(140153, len(addresses_and_names))
112 |         self.assertEqual(
113 |             (0, "Afixed_percpu_data"),
114 |             addresses_and_names[0],
115 |         )
116 |         self.assertEqual(
117 |             (0xFFFFFFFF84400000, "D__init_scratch_end"), addresses_and_names[-1]
118 |         )
119 | 
120 |     def test_kallsyms_6_2_8_x86_64(self):
121 |         addresses_and_names = list(
122 |             find_kallsyms_in_rodata(
123 |                 self._read("kallsyms-6.2.8.seccon2023.kmemo.x86_64.gz")
124 |             )
125 |         )
126 |         self.assertEqual(24624, len(addresses_and_names))
127 |         self.assertEqual(
128 |             (0xFFFFFFFF81000000, "Tstartup_64"),
129 |             addresses_and_names[0],
130 |         )
131 |         self.assertEqual((0xFFFFFFFF81B4E01E, "T_einittext"), addresses_and_names[-1])
132 | 
133 |     def test_kallsyms_6_8_0_x86_64(self):
134 |         addresses_and_names = list(
135 |             find_kallsyms_in_rodata(
136 |                 self._read("kallsyms-6.8.0-48-generic.noble.x86_64.gz")
137 |             )
138 |         )
139 |         self.assertEqual(203185, len(addresses_and_names))
140 |         self.assertEqual(
141 |             (0, "Afixed_percpu_data"),
142 |             addresses_and_names[0],
143 |         )
144 |         self.assertEqual(
145 |             (0xFFFFFFFF84800000, "D__init_scratch_end"), addresses_and_names[-1]
146 |         )
147 | 
148 | 
149 | if __name__ == "__main__":
150 |     unittest.main()
151 | 


--------------------------------------------------------------------------------