├── docs ├── api.md ├── examples.md ├── install.md ├── README.md ├── requirements.txt ├── api │ ├── .pages │ ├── bochscpu.InstructionType.md │ ├── README.md │ ├── bochscpu.Session.md │ ├── bochscpu.Segment.md │ ├── bochscpu.Hook.md │ ├── bochscpu.State.md │ └── bochscpu._bochscpu.md ├── gallery.md └── bochscpu.Hook.md ├── python ├── bochscpu │ ├── memory │ │ └── __init__.py │ ├── __init__.py │ ├── _bochscpu │ │ └── memory │ │ │ └── __init__.pyi │ ├── utils │ │ ├── __init__.py │ │ ├── cpu.py │ │ └── callbacks.py │ └── cpu │ │ └── __init__.py ├── CMakeLists.txt ├── src │ ├── bochscpu_callbacks.cpp │ └── bochscpu_mem.cpp └── inc │ └── bochscpu.hpp ├── bochscpu ├── lib │ └── README.md └── bochscpu.hpp ├── mkdocs.yml ├── CMakeLists.txt ├── .gitignore ├── LICENSE ├── .github ├── build-bochscpu.sh ├── Invoke-VisualStudio.ps1 ├── build-bochscpu.ps1 └── workflows │ └── build.yml ├── pyproject.toml ├── .clang-format ├── README.md └── examples ├── long_mode_fibonacci.py ├── template.py ├── real_mode_print_hello_world.py ├── long_mode_emulate_windows_kdump.py ├── long_mode_emulate_windows_udump.py └── long_mode_emulate_linux_udump.py /docs/api.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/examples.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # `bochscpu-python` 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs-material 2 | lazydocs 3 | -------------------------------------------------------------------------------- /docs/api/.pages: -------------------------------------------------------------------------------- 1 | title: API Reference 2 | nav: 3 | - Overview: README.md 4 | - ... 5 | -------------------------------------------------------------------------------- /python/bochscpu/memory/__init__.py: -------------------------------------------------------------------------------- 1 | from bochscpu._bochscpu.memory import * # type: ignore 2 | -------------------------------------------------------------------------------- /docs/gallery.md: -------------------------------------------------------------------------------- 1 | # Screenshots 2 | 3 | 4 | ## Fibonacci (long mode) 5 | 6 | ![image](https://i.imgur.com/YvXg2Tz.png) 7 | 8 | ## Windows user-mode dump emulation (long mode) 9 | 10 | -------------------------------------------------------------------------------- /bochscpu/lib/README.md: -------------------------------------------------------------------------------- 1 | ## BochsCPU build 2 | 3 | Place the build libraries (`.lib`, `.a`) generated by bochscpu-ffi in this folder under a folder named with the cmake build type, for instance `Debug`, `Release`, `RelWithDebInfo` 4 | -------------------------------------------------------------------------------- /docs/api/bochscpu.InstructionType.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `InstructionType` 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | --- 14 | 15 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 16 | -------------------------------------------------------------------------------- /docs/api/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # API Overview 4 | 5 | ## Modules 6 | 7 | - No modules 8 | 9 | ## Classes 10 | 11 | - No classes 12 | 13 | ## Functions 14 | 15 | - No functions 16 | 17 | 18 | --- 19 | 20 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 21 | -------------------------------------------------------------------------------- /python/bochscpu/__init__.py: -------------------------------------------------------------------------------- 1 | """Root module of `bochscpu` Python package.""" 2 | 3 | # 4 | # `_bochscpu` is the C++ module 5 | # 6 | from ._bochscpu import ( # type: ignore 7 | OpcodeOperationType, 8 | HookType, 9 | OpcodeOperationType, 10 | PrefetchType, 11 | CacheControlType, 12 | TlbControlType, 13 | InstructionType, 14 | Segment, 15 | GlobalSegment, 16 | Hook, 17 | State, 18 | Session, 19 | ) 20 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Python bindings for `bochscpu` 2 | theme: 3 | name: material 4 | font: 5 | text: Roboto 6 | code: Roboto Mono 7 | features: 8 | - navigation.instant 9 | - navigation.tabs 10 | 11 | 12 | nav: 13 | - Home: README.md 14 | - Gallery: gallery.md 15 | - Setup: 16 | - Installation: install.md 17 | - Building: build.md 18 | - Usage: 19 | - Basic Examples: examples.md 20 | - Development: 21 | - API: api/README.md 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | set(CMAKE_CXX_STANDARD 20) 3 | set(CMAKE_CXX_STANDARD_REQUIRED True) 4 | set(CMAKE_CXX_EXTENSIONS OFF) 5 | set(CMAKE_INTERPROCEDURAL_OPTIMIZATION True) 6 | 7 | project( 8 | bochscpu-python 9 | LANGUAGES CXX 10 | VERSION 0.4.0 11 | DESCRIPTION "Python bindings for BochsCPU" 12 | HOMEPAGE_URL https://github.com/hugsy/bochscpu-python 13 | ) 14 | 15 | set(PROJECT_AUTHOR hugsy) 16 | set(PROJECT_LICENSE MIT) 17 | 18 | set(CXX_STANDARD 20) 19 | 20 | add_subdirectory(python) 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | *.pyd 19 | *.pyc 20 | 21 | __pycache__ 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | 38 | build 39 | wheel 40 | .vscode 41 | bochscpu/lib/* 42 | !bochscpu/lib/README.md 43 | -------------------------------------------------------------------------------- /docs/api/bochscpu.Session.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `Session` 4 | Class session 5 | 6 | 7 | --- 8 | 9 | ### property Session.cpu 10 | 11 | Get the CPU associated to the session 12 | 13 | --- 14 | 15 | ### property Session.missing_page_handler 16 | 17 | Set the missing page callback 18 | 19 | 20 | --- 21 | 22 | ### handler Session.run 23 | 24 | --- 25 | 26 | ### handler Session.stop 27 | 28 | 29 | 30 | 31 | --- 32 | 33 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 34 | -------------------------------------------------------------------------------- /docs/api/bochscpu.Segment.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `Segment` 4 | Segment class 5 | 6 | 7 | --- 8 | 9 | ### property Segment.attr 10 | 11 | Get/Set the Segment `attr` attribute 12 | 13 | --- 14 | 15 | ### property Segment.base 16 | 17 | Get/Set the Segment `base` attribute 18 | 19 | --- 20 | 21 | ### property Segment.limit 22 | 23 | Get/Set the Segment `limit` attribute 24 | 25 | --- 26 | 27 | ### property Segment.present 28 | 29 | Get/Set the Segment `present` attribute 30 | 31 | --- 32 | 33 | ### property Segment.selector 34 | 35 | Get/Set the Segment `selector` attribute 36 | 37 | 38 | 39 | 40 | 41 | --- 42 | 43 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 crazy hugsy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/build-bochscpu.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Script taken from WTF (https://github.com/0vercl0k/wtf/) 3 | # 4 | # Axel '0vercl0k' Souchet - May 2 2020 5 | # Build / configure bxcpu-ffi 6 | set -x 7 | set -e 8 | 9 | pushd . 10 | 11 | test -z $NB_CPU && NB_CPU=1 12 | 13 | mkdir bxbuild 14 | cd bxbuild 15 | 16 | git clone https://github.com/yrp604/bochscpu-build.git 17 | git clone https://github.com/yrp604/bochscpu.git 18 | git clone https://github.com/yrp604/bochscpu-ffi.git 19 | 20 | cd bochscpu-build 21 | bash prep.sh && cd Bochs/bochs && sh .conf.cpu 22 | make -j ${NB_CPU} -C cpu/fpu 23 | make -j ${NB_CPU} -C cpu/avx 24 | make -j ${NB_CPU} -C cpu/cpudb 25 | make -j ${NB_CPU} -C cpu/softfloat3e 26 | make -j ${NB_CPU} -C cpu 27 | 28 | # Remove old files in bochscpu. 29 | rm -rf ../../../bochscpu/bochs 30 | rm -rf ../../../bochscpu/lib 31 | 32 | # Create the libs directory where we stuff all the libs. 33 | mkdir ../../../bochscpu/lib 34 | find . -type f -name 'lib*.a' -exec cp -v {} ../../../bochscpu/lib/ \; 35 | make all-clean 36 | 37 | # Now we want to copy the bochs directory over there. 38 | cd .. 39 | mv bochs ../../bochscpu/bochs 40 | 41 | # Now its time to build it. 42 | cd ../../bochscpu-ffi 43 | 44 | cargo clean 45 | cargo build -j ${NB_CPU} 46 | cargo build -j ${NB_CPU} --release 47 | 48 | # Get back to where we were. 49 | popd 50 | -------------------------------------------------------------------------------- /.github/Invoke-VisualStudio.ps1: -------------------------------------------------------------------------------- 1 | Function Invoke-CmdScript { 2 | param( 3 | [String] $scriptName 4 | ) 5 | $cmdLine = """$scriptName"" $args & set" 6 | & $env:SystemRoot\system32\cmd.exe /c $cmdLine | 7 | Select-String '^([^=]*)=(.*)$' | ForEach-Object { 8 | $varName = $_.Matches[0].Groups[1].Value 9 | $varValue = $_.Matches[0].Groups[2].Value 10 | Set-Item Env:$varName $varValue 11 | } 12 | } 13 | 14 | 15 | Function Invoke-VisualStudio2019win32 { 16 | Invoke-CmdScript "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars32.bat" 17 | } 18 | 19 | Function Invoke-VisualStudio2019x64 { 20 | Invoke-CmdScript "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat" 21 | } 22 | 23 | Function Invoke-VisualStudio2019arm64 { 24 | Invoke-CmdScript "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvarsamd64_arm64.bat" 25 | } 26 | 27 | Function Invoke-VisualStudio2022win32 { 28 | Invoke-CmdScript "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars32.bat" 29 | } 30 | 31 | Function Invoke-VisualStudio2022x64 { 32 | Invoke-CmdScript "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvars64.bat" 33 | } 34 | 35 | Function Invoke-VisualStudio2022arm64 { 36 | Invoke-CmdScript "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Auxiliary/Build/vcvarsamd64_arm64.bat" 37 | } 38 | -------------------------------------------------------------------------------- /.github/build-bochscpu.ps1: -------------------------------------------------------------------------------- 1 | $ErrorActionPreference = "Stop" 2 | 3 | Push-Location 4 | 5 | New-Item -ItemType Directory -Name bxbuild 6 | Set-Location bxbuild 7 | 8 | git clone https://github.com/yrp604/bochscpu-build.git 9 | git clone https://github.com/yrp604/bochscpu.git 10 | git clone https://github.com/yrp604/bochscpu-ffi.git 11 | 12 | bash -c "cd bochscpu-build && bash prep.sh && cd Bochs/bochs && bash .conf.cpu-msvc" 13 | 14 | Set-Location bochscpu-build\Bochs\bochs 15 | $env:CL = "/MP$env:NUMBER_OF_PROCESSORS" 16 | nmake cpu\softfloat3e\libsoftfloat.a 17 | nmake cpu\fpu\libfpu.a 18 | nmake cpu\avx\libavx.a 19 | nmake cpu\cpudb\libcpudb.a 20 | nmake cpu\libcpu.a 21 | 22 | # Don't actually need the rest 23 | # nmake 24 | 25 | Remove-Item -Recurse -Force -ErrorAction Ignore ..\..\..\bochscpu\bochs 26 | Remove-Item -Recurse -Force -ErrorAction Ignore ..\..\..\bochscpu\lib 27 | 28 | New-Item -ItemType Directory -Name ..\..\..\bochscpu\lib 29 | Copy-Item cpu\libcpu.a ..\..\..\bochscpu\lib\cpu.lib 30 | Copy-Item cpu\fpu\libfpu.a ..\..\..\bochscpu\lib\fpu.lib 31 | Copy-Item cpu\avx\libavx.a ..\..\..\bochscpu\lib\avx.lib 32 | Copy-Item cpu\cpudb\libcpudb.a ..\..\..\bochscpu\lib\cpudb.lib 33 | Copy-Item cpu\softfloat3e\libsoftfloat.a ..\..\..\bochscpu\lib\softfloat.lib 34 | 35 | New-Item -ItemType Directory -Name ..\..\..\bochscpu\bochs 36 | Copy-Item -Recurse -Force . ..\..\..\bochscpu 37 | 38 | Set-Location ..\..\..\bochscpu-ffi 39 | cargo clean 40 | cargo build --jobs $env:NUMBER_OF_PROCESSORS 41 | cargo build --jobs $env:NUMBER_OF_PROCESSORS --release 42 | 43 | Pop-Location 44 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["scikit-build-core >=0.4.3", "nanobind >=1.3.2"] 3 | build-backend = "scikit_build_core.build" 4 | 5 | [project] 6 | name = "bochscpu-python" 7 | version = "0.4.0" 8 | description = "Python bindings for BochsCPU." 9 | readme = "./README.md" 10 | license.file = "./LICENSE" 11 | requires-python = ">=3.9" 12 | authors = [{ name = "hugsy", email = "hugsy@blah.cat" }] 13 | classifiers = [ 14 | "Development Status :: 4 - Beta", 15 | "License :: OSI Approved :: MIT License", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Programming Language :: Python :: 3.13", 22 | "Topic :: System :: Emulators", 23 | "Natural Language :: English", 24 | ] 25 | dependencies = ["setuptools", "wheel", "nanobind"] 26 | 27 | [project.optional-dependencies] 28 | tests = ["pytest", "black", "capstone", "keystone-engine"] 29 | 30 | [project.urls] 31 | Homepage = "https://github.com/hugsy/bochscpu-python" 32 | 33 | [tool.isort] 34 | profile = "black" 35 | 36 | [tool.scikit-build] 37 | wheel.py-api = "cp313" 38 | minimum-version = "0.4" 39 | build-dir = "build/{wheel_tag}" 40 | cmake.minimum-version = "3.20" 41 | 42 | # Uncomment for debug (+ASAN) 43 | # cmake.verbose = true 44 | # logging.level = "DEBUG" 45 | # cmake.build-type = "Debug" 46 | 47 | [tool.cibuildwheel] 48 | build = "" 49 | skip = "cp27-* cp35-* cp36-* cp37-* cp38-* pp* *musllinux*" 50 | test-skip = "" 51 | free-threaded-support = false 52 | # use images from https://github.com/pypa/manylinux 53 | archs = ["x86_64", "aarch64"] 54 | manylinux-x86_64-image = "manylinux_2_28" 55 | manylinux-aarch64-image = "manylinux_2_28" 56 | musllinux-x86_64-image = "musllinux_1_2" 57 | musllinux-aarch64-image = "musllinux_1_2" 58 | -------------------------------------------------------------------------------- /python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Python 3 2 | REQUIRED COMPONENTS Interpreter Development.Module 3 | OPTIONAL_COMPONENTS Development.SABIModule 4 | ) 5 | 6 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 7 | set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) 8 | set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") 9 | endif() 10 | 11 | execute_process( 12 | COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir 13 | OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE NB_DIR) 14 | list(APPEND CMAKE_PREFIX_PATH "${NB_DIR}") 15 | 16 | find_package(nanobind CONFIG REQUIRED) 17 | 18 | nanobind_add_module( 19 | _bochscpu NB_STATIC 20 | src/bochscpu_callbacks.cpp 21 | src/bochscpu_cpu.cpp 22 | src/bochscpu_mem.cpp 23 | src/bochscpu.cpp 24 | ) 25 | 26 | target_include_directories(_bochscpu PRIVATE ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/python/inc) 27 | target_link_directories(_bochscpu PRIVATE ${CMAKE_SOURCE_DIR}/bochscpu/lib/${CMAKE_BUILD_TYPE}) 28 | target_link_libraries(_bochscpu PRIVATE) 29 | 30 | if(MSVC) 31 | target_link_libraries(_bochscpu PRIVATE Userenv.lib Bcrypt.lib Ws2_32.lib kernel32.lib ntdll.lib bochscpu_ffi.lib) 32 | else() 33 | target_link_libraries(_bochscpu PRIVATE bochscpu_ffi) 34 | endif() 35 | 36 | if(APPLE) 37 | target_link_libraries(_bochscpu PRIVATE "-framework CoreFoundation" -Wl -lm) 38 | endif() 39 | 40 | install(DIRECTORY bochscpu DESTINATION .) 41 | install(TARGETS _bochscpu LIBRARY DESTINATION bochscpu) 42 | 43 | if(MSVC) 44 | install(FILES $ DESTINATION . OPTIONAL) 45 | endif(MSVC) 46 | 47 | # 48 | # Build with ASAN if available AND debug build 49 | # 50 | include(CheckCXXCompilerFlag) 51 | CHECK_CXX_COMPILER_FLAG("-fsanitize=address" HAS_ASAN_AVAILABLE) 52 | 53 | if(HAS_ASAN_AVAILABLE) 54 | message(STATUS "Found Address Sanitizer") 55 | target_compile_definitions(_bochscpu 56 | PUBLIC 57 | $<$:_DISABLE_VECTOR_ANNOTATION _DISABLE_STRING_ANNOTATION > 58 | ) 59 | target_compile_options(_bochscpu 60 | PUBLIC 61 | $<$:-fsanitize=address> 62 | ) 63 | else() 64 | message(STATUS "Address Sanitizer not found") 65 | endif(HAS_ASAN_AVAILABLE) 66 | 67 | -------------------------------------------------------------------------------- /python/bochscpu/_bochscpu/memory/__init__.pyi: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from enum import Enum 3 | 4 | class AccessType(Enum): 5 | Execute: AccessType 6 | Read: AccessType 7 | Write: AccessType 8 | 9 | class PageMapLevel4Table: 10 | def __init__(self) -> None: 11 | """ 12 | Initiliaze the memory layout 13 | """ 14 | ... 15 | def commit(self, pml4_pa: int, /) -> list[tuple[int, int]]: 16 | """ 17 | Commit the layout of the tree to memory 18 | """ 19 | ... 20 | def insert(self, va: int, pa: int, flags: int, /) -> None: 21 | """ 22 | Associate the VA to PA 23 | """ 24 | ... 25 | def translate(self, va: int, /) -> Optional[int]: 26 | """ 27 | Translate a VA -> PA 28 | """ 29 | ... 30 | 31 | def align_address_to_page(addr: int, /) -> int: 32 | """ 33 | Align an address to the page it's in 34 | """ 35 | ... 36 | 37 | def page_size() -> int: 38 | """ 39 | Get the page size 40 | """ 41 | ... 42 | 43 | def allocate_host_page() -> int: 44 | """ 45 | Allocate (VirtualAlloc/mmap) a page on the host, returns the HVA on success, 0 otherwise 46 | """ 47 | ... 48 | 49 | def release_host_page(hva: int) -> None: 50 | """ 51 | Release (VirtualFree/munmap) a page on the host 52 | """ 53 | ... 54 | 55 | def page_insert(gpa: int, hva: int, /) -> None: 56 | """ 57 | Map a GPA to a HVA in Bochs 58 | """ 59 | ... 60 | 61 | def page_remove(gpa: int) -> None: 62 | """ 63 | Remove a page by its GPA 64 | """ 65 | ... 66 | 67 | def phy_read(gpa: int, size: int) -> list[int]: 68 | """ 69 | Read from GPA 70 | """ 71 | ... 72 | 73 | def phy_translate(gpa: int) -> int: 74 | """ 75 | Translate from GPA to HVA 76 | """ 77 | ... 78 | 79 | def phy_write(gpa: int, bytes: bytes) -> None: 80 | """ 81 | Write to GPA 82 | """ 83 | ... 84 | 85 | def virt_read(cr3: int, gva: int, sz: int) -> list[int]: 86 | """ 87 | Read from GVA 88 | """ 89 | ... 90 | 91 | def virt_translate(cr3: int, gva: int) -> int: 92 | """ 93 | Translate from GVA to HVA 94 | """ 95 | ... 96 | 97 | def virt_write(cr3: int, gva: int, bytes: bytes) -> bool: 98 | """ 99 | Write to GVA 100 | """ 101 | ... 102 | -------------------------------------------------------------------------------- /docs/bochscpu.Hook.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `Hook` 4 | Class Hook 5 | 6 | 7 | --- 8 | 9 | ### property Hook.after_execution 10 | 11 | Callback for Bochs `after_execution` callback 12 | 13 | --- 14 | 15 | ### property Hook.before_execution 16 | 17 | Callback for Bochs `before_execution` callback 18 | 19 | --- 20 | 21 | ### property Hook.cache_cntrl 22 | 23 | Callback for Bochs `cache_cntrl` callback 24 | 25 | --- 26 | 27 | ### property Hook.clflush 28 | 29 | Callback for Bochs `clflush` callback 30 | 31 | --- 32 | 33 | ### property Hook.cnear_branch_not_taken 34 | 35 | Callback for Bochs `cnear_branch_not_taken` callback 36 | 37 | --- 38 | 39 | ### property Hook.cnear_branch_taken 40 | 41 | Callback for Bochs `cnear_branch_taken` callback 42 | 43 | --- 44 | 45 | ### property Hook.ctx 46 | 47 | A raw pointer to the Session object 48 | 49 | --- 50 | 51 | ### property Hook.exception 52 | 53 | Callback for Bochs `exception` callback 54 | 55 | --- 56 | 57 | ### property Hook.far_branch 58 | 59 | Callback for Bochs `far_branch` callback 60 | 61 | --- 62 | 63 | ### property Hook.hlt 64 | 65 | Callback for Bochs `hlt` callback 66 | 67 | --- 68 | 69 | ### property Hook.hw_interrupt 70 | 71 | Callback for Bochs `hw_interrupt` callback 72 | 73 | --- 74 | 75 | ### property Hook.inp 76 | 77 | Callback for Bochs `inp` callback 78 | 79 | --- 80 | 81 | ### property Hook.inp2 82 | 83 | Callback for Bochs `inp2` callback 84 | 85 | --- 86 | 87 | ### property Hook.interrupt 88 | 89 | Callback for Bochs `interrupt` callback 90 | 91 | --- 92 | 93 | ### property Hook.lin_access 94 | 95 | Callback for Bochs `lin_access` callback 96 | 97 | --- 98 | 99 | ### property Hook.mwait 100 | 101 | Callback for Bochs `mwait` callback 102 | 103 | --- 104 | 105 | ### property Hook.opcode 106 | 107 | Callback for Bochs `opcode` callback 108 | 109 | --- 110 | 111 | ### property Hook.outp 112 | 113 | Callback for Bochs `outp` callback 114 | 115 | --- 116 | 117 | ### property Hook.phy_access 118 | 119 | Callback for Bochs `phy_access` callback 120 | 121 | --- 122 | 123 | ### property Hook.prefetch_hint 124 | 125 | Callback for Bochs `prefetch_hint` callback 126 | 127 | --- 128 | 129 | ### property Hook.repeat_iteration 130 | 131 | Callback for Bochs `repeat_iteration` callback 132 | 133 | --- 134 | 135 | ### property Hook.reset 136 | 137 | Callback for Bochs `reset` callback 138 | 139 | --- 140 | 141 | ### property Hook.tlb_cntrl 142 | 143 | Callback for Bochs `tlb_cntrl` callback 144 | 145 | --- 146 | 147 | ### property Hook.ucnear_branch 148 | 149 | Callback for Bochs `ucnear_branch` callback 150 | 151 | --- 152 | 153 | ### property Hook.vmexit 154 | 155 | Callback for Bochs `vmexit` callback 156 | 157 | --- 158 | 159 | ### property Hook.wrmsr 160 | 161 | Callback for Bochs `wrmsr` callback 162 | 163 | 164 | 165 | 166 | 167 | --- 168 | 169 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 170 | -------------------------------------------------------------------------------- /docs/api/bochscpu.Hook.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `Hook` 4 | Class Hook 5 | 6 | 7 | --- 8 | 9 | ### property Hook.after_execution 10 | 11 | Callback for Bochs `after_execution` callback 12 | 13 | --- 14 | 15 | ### property Hook.before_execution 16 | 17 | Callback for Bochs `before_execution` callback 18 | 19 | --- 20 | 21 | ### property Hook.cache_cntrl 22 | 23 | Callback for Bochs `cache_cntrl` callback 24 | 25 | --- 26 | 27 | ### property Hook.clflush 28 | 29 | Callback for Bochs `clflush` callback 30 | 31 | --- 32 | 33 | ### property Hook.cnear_branch_not_taken 34 | 35 | Callback for Bochs `cnear_branch_not_taken` callback 36 | 37 | --- 38 | 39 | ### property Hook.cnear_branch_taken 40 | 41 | Callback for Bochs `cnear_branch_taken` callback 42 | 43 | --- 44 | 45 | ### property Hook.ctx 46 | 47 | A raw pointer to the Session object 48 | 49 | --- 50 | 51 | ### property Hook.exception 52 | 53 | Callback for Bochs `exception` callback 54 | 55 | --- 56 | 57 | ### property Hook.far_branch 58 | 59 | Callback for Bochs `far_branch` callback 60 | 61 | --- 62 | 63 | ### property Hook.hlt 64 | 65 | Callback for Bochs `hlt` callback 66 | 67 | --- 68 | 69 | ### property Hook.hw_interrupt 70 | 71 | Callback for Bochs `hw_interrupt` callback 72 | 73 | --- 74 | 75 | ### property Hook.inp 76 | 77 | Callback for Bochs `inp` callback 78 | 79 | --- 80 | 81 | ### property Hook.inp2 82 | 83 | Callback for Bochs `inp2` callback 84 | 85 | --- 86 | 87 | ### property Hook.interrupt 88 | 89 | Callback for Bochs `interrupt` callback 90 | 91 | --- 92 | 93 | ### property Hook.lin_access 94 | 95 | Callback for Bochs `lin_access` callback 96 | 97 | --- 98 | 99 | ### property Hook.mwait 100 | 101 | Callback for Bochs `mwait` callback 102 | 103 | --- 104 | 105 | ### property Hook.opcode 106 | 107 | Callback for Bochs `opcode` callback 108 | 109 | --- 110 | 111 | ### property Hook.outp 112 | 113 | Callback for Bochs `outp` callback 114 | 115 | --- 116 | 117 | ### property Hook.phy_access 118 | 119 | Callback for Bochs `phy_access` callback 120 | 121 | --- 122 | 123 | ### property Hook.prefetch_hint 124 | 125 | Callback for Bochs `prefetch_hint` callback 126 | 127 | --- 128 | 129 | ### property Hook.repeat_iteration 130 | 131 | Callback for Bochs `repeat_iteration` callback 132 | 133 | --- 134 | 135 | ### property Hook.reset 136 | 137 | Callback for Bochs `reset` callback 138 | 139 | --- 140 | 141 | ### property Hook.tlb_cntrl 142 | 143 | Callback for Bochs `tlb_cntrl` callback 144 | 145 | --- 146 | 147 | ### property Hook.ucnear_branch 148 | 149 | Callback for Bochs `ucnear_branch` callback 150 | 151 | --- 152 | 153 | ### property Hook.vmexit 154 | 155 | Callback for Bochs `vmexit` callback 156 | 157 | --- 158 | 159 | ### property Hook.wrmsr 160 | 161 | Callback for Bochs `wrmsr` callback 162 | 163 | 164 | 165 | 166 | 167 | --- 168 | 169 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 170 | -------------------------------------------------------------------------------- /python/bochscpu/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import struct 2 | from typing import Optional 3 | 4 | import bochscpu.cpu 5 | import bochscpu.memory 6 | import bochscpu._bochscpu as _bochscpu 7 | 8 | from . import cpu, callbacks 9 | 10 | PAGE_SIZE = bochscpu.memory.page_size() 11 | 12 | 13 | def dump_registers(state: _bochscpu.State, include_kernel: bool = False): 14 | """Print registers for a given `State` in a WinDbg display type 15 | 16 | Args: 17 | state (_bochscpu.State): _description_ 18 | include_kernel (bool): _description_ 19 | """ 20 | 21 | gprs: dict[str, tuple[Optional[str], Optional[str], str]] = { 22 | # keyname: (real, protected, long) 23 | "rax": ("ax", "eax", "rax"), 24 | "rbx": ("bx", "ebx", "rbx"), 25 | "rdx": ("cx", "ecx", "rcx"), 26 | "rdx": ("dx", "edx", "rdx"), 27 | "rsi": ("si", "esi", "rsi"), 28 | "rdi": ("di", "edi", "rdi"), 29 | "rbp": ("bp", "ebp", "rbp"), 30 | "rsp": ("sp", "esp", "rsp"), 31 | "rip": ("ip", "eip", "rip"), 32 | "r8": (None, None, " r8"), 33 | "r9": (None, None, " r9"), 34 | "r10": (None, None, "r10"), 35 | "r11": (None, None, "r11"), 36 | "r12": (None, None, "r12"), 37 | "r13": (None, None, "r13"), 38 | "r14": (None, None, "r14"), 39 | "r15": (None, None, "r15"), 40 | } 41 | 42 | if bochscpu.cpu.is_real_mode(state): 43 | idx = 0 44 | fmt = 8 45 | elif bochscpu.cpu.is_protected_mode(state): 46 | idx = 1 47 | fmt = 8 48 | elif bochscpu.cpu.is_long_mode(state): 49 | idx = 2 50 | fmt = 16 51 | else: 52 | raise Exception("invalid state") 53 | 54 | i = 0 55 | max_regs_per_line = 3 56 | for reg, names in gprs.items(): 57 | if i % max_regs_per_line == 0: 58 | print("") 59 | name = names[idx] 60 | if not name: 61 | continue 62 | value = getattr(state, reg) 63 | print(f"{name}={value:0{fmt}x}", end=" ") 64 | i += 1 65 | 66 | print( 67 | f""" 68 | efl={state.rflags:08x} {str(bochscpu.cpu.FlagRegister(state.rflags))} 69 | cs={int(state.cs):04x} ss={int(state.ss):04x} ds={int(state.ds):04x} es={int(state.es):04x} fs={int(state.fs):04x} gs={int(state.gs):04x} 70 | """ 71 | ) 72 | 73 | if not include_kernel: 74 | return 75 | 76 | print( 77 | f""" 78 | cr0={state.cr0:016x} cr2={state.cr2:016x} cr3={state.cr3:016x} cr4={state.cr4:016x} 79 | dr0={state.dr0:016x} dr1={state.dr1:016x} dr2={state.dr2:016x} dr3={state.dr3:016x} 80 | dr6={state.dr6:016x} dr7={state.dr7:016x} xcr0={state.xcr0:016x} efer={state.efer:016x} 81 | """ 82 | ) 83 | return 84 | 85 | 86 | def dump_page_table(pml4: int): 87 | """Dump a Page Table from its PML4 88 | 89 | Args: 90 | pml4 (int): _description_ 91 | """ 92 | 93 | def __dump_page_table(addr: int, level: int = 0): 94 | level_str = ("PML", "PDPT", "PD", "PT") 95 | if level == 4: 96 | data = bytes(bochscpu.memory.phy_read(addr, 8)) 97 | entry = struct.unpack(" bool: 121 | """Indicates whether the given will run the CPU in real/legacy mode 122 | 123 | Args: 124 | state (State): the CPU state 125 | 126 | Returns: 127 | bool: True if the CPU will run in real/legacy mode 128 | """ 129 | cs = SegmentFlags(state.cs.attr) 130 | cr0 = ControlRegister(state.cr0) 131 | cr4 = ControlRegister(state.cr4) 132 | return not cs.L and not cr0.PAE and not cr4.PG and not cr4.PGE 133 | 134 | 135 | def is_protected_mode(state: State) -> bool: 136 | """Indicates whether the given will run the CPU in protected mode 137 | 138 | Args: 139 | state (State): the CPU state 140 | 141 | Returns: 142 | bool: True if the CPU will run in protected mode 143 | """ 144 | cr0 = ControlRegister(state.cr0) 145 | cr4 = ControlRegister(state.cr4) 146 | efer = FeatureRegister(state.efer) 147 | cs = SegmentFlags(state.cs.attr) 148 | rflags = FlagRegister(state.rflags) 149 | return cr0.PE and not efer.LME and not cr4.PAE and not cs.L and not rflags.VM 150 | 151 | 152 | def is_virtual8086_mode(state: State) -> bool: 153 | """Indicates whether the given will run the CPU in Virtual8086 mode 154 | 155 | Args: 156 | state (State): the CPU state 157 | 158 | Returns: 159 | bool: True if the CPU will run in Virtual8086 mode 160 | """ 161 | cr0 = ControlRegister(state.cr0) 162 | cr4 = ControlRegister(state.cr4) 163 | efer = FeatureRegister(state.efer) 164 | cs = SegmentFlags(state.cs.attr) 165 | rflags = FlagRegister(state.rflags) 166 | return cr0.PE and not efer.LME and not cr4.PAE and not cs.L and rflags.VM 167 | 168 | 169 | def is_long_mode(state: State) -> bool: 170 | """Indicates whether the given will run the CPU in long mode 171 | 172 | Args: 173 | state (State): the CPU state 174 | 175 | Returns: 176 | bool: True if the CPU will run in long mode 177 | """ 178 | cr0 = ControlRegister(state.cr0) 179 | cr4 = ControlRegister(state.cr4) 180 | efer = FeatureRegister(state.efer) 181 | cs = SegmentFlags(state.cs.attr) 182 | return efer.LME and cr4.PAE and cr0.PE and cs.L 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | logo3 3 |

4 | 5 | # bochscpu-python 6 | 7 | [![Python 3.8+](https://img.shields.io/pypi/v/bochscpu-python.svg)](https://pypi.org/project/bochscpu-python/) 8 | [![Downloads](https://static.pepy.tech/badge/bochscpu-python)](https://pepy.tech/project/bochscpu-python) 9 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 10 | [![Licence MIT](https://img.shields.io/packagist/l/doctrine/orm.svg?maxAge=2592000?style=plastic)](https://github.com/hugsy/bochscpu-python/blob/main/LICENSE) 11 | [![Builds](https://github.com/hugsy/bochscpu-python/actions/workflows/build.yml/badge.svg)](https://github.com/hugsy/bochscpu-python/actions/workflows/build.yml) 12 | 13 | Python bindings for [@yrp](https://github.com/yrp604/)'s [BochsCPU](https://github.com/yrp604/bochscpu) using [FFI](https://github.com/yrp604/bochscpu-ffi) to easily and accurately emulate x86 code. 14 | 15 | 16 | ## Install 17 | 18 | `bochscpu-python` requires a Python environment of 3.8 or more recent only. 19 | 20 | ### Via PyPI (preferred) 21 | 22 | By far the simplest way to get things up and running is using the [stable packaged version](https://pypi.org/project/bochscpu-python/) on [PyPI](https://pypi.org/) 23 | 24 | ```bash 25 | python -m pip install bochscpu-python 26 | ``` 27 | 28 | 29 | ### Via the generated builds 30 | 31 | Download the latest working artifact from [the repository Github Actions tab](https://github.com/hugsy/bochscpu-python/actions). Extract the ZIP file, install the `.whl` file you'll find inside the `wheel` folder. 32 | 33 | ```bash 34 | python -m pip install wheel/bochscpu-$version-$os-$arch.whl 35 | ``` 36 | 37 | ### From the source repository 38 | 39 | Fairly straight forward: 40 | 41 | ```bash 42 | python -m pip install . 43 | ``` 44 | 45 | Or without cloning 46 | 47 | ```bash 48 | python -m pip install git+https://github.com/hugsy/bochscpu-python.git#egg=bochscpu-python 49 | ``` 50 | 51 | Note that this approach will require you to have all the building tools necessary installed (as described below) 52 | 53 | ## Build 54 | 55 | ### Requirements 56 | 57 | * Python 3.8+ (with development kit) 58 | * `cmake` 59 | * `pip` 60 | * a C++20 compatible compiler (tested `cl` for Windows, `clang++` for MacOS and `g++` Linux) 61 | 62 | ### Steps 63 | 64 | * Build BochsCPU, BochsCPU-FFI, and BochsCPU-Build following the instructions on their respective pages 65 | * ... Alternatively BochsCPU-FFI for Windows & Linux libraries object files can be downloaded from the [`build` Github Actions](https://github.com/hugsy/bochscpu-python/actions/workflows/build.yml?query=branch%3Amain+is%3Asuccess+event%3Apush) 66 | * Move the `*.lib` in `bochscpu/lib/` (where `BuildType` can be `Debug`, `Release`, `RelWithDebInfo`, etc.) 67 | * Install the requirements: `python -m pip install -r requirements.txt` 68 | * Build with `cmake` 69 | This will generate the bochscpu `pyd` file (and its PDB) which you can import from a Python session with `import bochscpu`. 70 | * ... Alternatively you can also generate a `.whl` from the root of the project: 71 | 72 | ```bash 73 | python -m pip wheel . 74 | ``` 75 | 76 | ## Usage 77 | 78 | 79 | Just import the `bochscpu` module and let the fun begin! Installing the package will also install interface files, allowing modern IDEs (VSCode, PyCharm, etc.) to offer useful completion. 80 | 81 | A very crude template can be made as such: 82 | 83 | ```python 84 | import bochscpu, bochscpu.utils, bochscpu.cpu 85 | 86 | sess = bochscpu.Session() 87 | sess.missing_page_handler = bochscpu.utils.callbacks.missing_page_cb 88 | state = bochscpu.State() 89 | bochscpu.cpu.set_real_mode(state) 90 | hook = bochscpu.Hook() 91 | bochscpu.utils.callbacks.install_default_callbacks(hook) 92 | sess.run([hook,]) 93 | ``` 94 | 95 | This will create a bochscpu session, set the CPU state for real mode, and pre-populate hooks with basic callbacks (that only perform a `print` upon execution). Note that this sample will actually do nothing as we haven't allocated code pages nor correctly set registers to point to it. 96 | 97 | A more complete template can be found in the [`examples/` folder](https://github.com/hugsy/bochscpu-python/blob/main/examples/template.py) 98 | 99 | 100 | ## Some Examples 101 | 102 |
103 | 104 | 105 | Emulate a Fibonascii sequence in x64 long mode 106 | 107 | 108 | 109 | 110 | 111 | https://github.com/hugsy/bochscpu-python/assets/590234/adc5ac5a-a8eb-4982-9537-5ece3f32f8f8 112 | 113 | 114 | [Code](examples/long_mode_fibonacci.py) 115 |
116 | 117 | 118 |
119 | 120 | 121 | Emulate code from a Windows 11 x64 memory dump 122 | 123 | 124 | ![image](https://github.com/hugsy/bochscpu-python/assets/590234/2ea77b17-cf59-4ec3-a38b-602d63e201f8) 125 | 126 | [Code](examples/long_mode_emulate_windows_kdump.py) 127 |
128 | 129 | 130 |
131 | 132 | Emulate a print("hello world")-like assembly code in 16 bit real mode 133 | 134 | 135 | 136 | https://github.com/hugsy/bochscpu-python/assets/590234/eb06af06-4b10-490e-ae40-a1d0aed333ca 137 | 138 | 139 | [Code](examples/real_mode_print_hello_world.py) 140 |
141 | 142 |
143 | 144 | Emulate Linux Glibc's rand() function on x64 145 | 146 | 147 | 148 | https://github.com/hugsy/bochscpu-python/assets/590234/2486adbc-0878-46f5-83ed-3bcf9774fd26 149 | 150 | 151 | [Code](examples/long_mode_emulate_linux_udump.py) 152 | 153 |
154 | 155 | ## Enjoy 🍻 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /python/src/bochscpu_callbacks.cpp: -------------------------------------------------------------------------------- 1 | #include "bochscpu.hpp" 2 | 3 | #define ExecuteCallback(Context, Name, ...) \ 4 | { \ 5 | if ( !Context ) \ 6 | { \ 7 | err("Context for callback '" #Name "' is unexpectedly null"); \ 8 | return; \ 9 | } \ 10 | BochsCPU::Hook* hook = reinterpret_cast(Context); \ 11 | BochsCPU::Session* sess = (BochsCPU::Session*)hook->ctx; \ 12 | if ( !sess ) \ 13 | { \ 14 | err("Session for BochsCPU::Hook(%p)->" #Name " is null", hook); \ 15 | return; \ 16 | } \ 17 | if ( hook->Name ) \ 18 | { \ 19 | hook->Name(sess, __VA_ARGS__); \ 20 | return; \ 21 | } \ 22 | dbg("Callback BochsCPU::Hook(%p)->" #Name " in Session(%p) is null", sess, hook); \ 23 | return; \ 24 | } 25 | 26 | namespace BochsCPU::Callbacks 27 | { 28 | 29 | 30 | void 31 | before_execution_cb(context_t* ctx, uint32_t cpu_id, void* insn) 32 | { 33 | ExecuteCallback(ctx, before_execution, cpu_id, insn); 34 | } 35 | 36 | void 37 | after_execution_cb(context_t* ctx, uint32_t cpu_id, void* insn) 38 | { 39 | ExecuteCallback(ctx, after_execution, cpu_id, insn); 40 | } 41 | 42 | void 43 | reset_cb(context_t* ctx, uint32_t cpu_id, unsigned int type) 44 | { 45 | ExecuteCallback(ctx, reset, cpu_id, type); 46 | } 47 | 48 | void 49 | hlt_cb(context_t* ctx, uint32_t cpu_id) 50 | { 51 | ExecuteCallback(ctx, hlt, cpu_id); 52 | } 53 | 54 | void 55 | mwait_cb(context_t* ctx, uint32_t cpu_id, uint64_t addr, uintptr_t len, uint32_t flags) 56 | { 57 | ExecuteCallback(ctx, mwait, cpu_id, addr, len, flags); 58 | } 59 | 60 | void 61 | cnear_branch_taken_cb(context_t* ctx, uint32_t cpu_id, uint64_t branch_eip, uint64_t new_branch_eip) 62 | { 63 | ExecuteCallback(ctx, cnear_branch_taken, cpu_id, branch_eip, new_branch_eip); 64 | } 65 | 66 | void 67 | cnear_branch_not_taken_cb(context_t* ctx, uint32_t cpu_id, uint64_t branch_eip, uint64_t new_branch_eip) 68 | { 69 | ExecuteCallback(ctx, cnear_branch_not_taken, cpu_id, branch_eip, new_branch_eip); 70 | } 71 | 72 | void 73 | ucnear_branch_cb(context_t* ctx, uint32_t cpu_id, unsigned what, uint64_t branch_eip, uint64_t new_eip) 74 | { 75 | ExecuteCallback(ctx, ucnear_branch, cpu_id, what, branch_eip, new_eip); 76 | } 77 | 78 | void 79 | far_branch_cb( 80 | context_t* ctx, 81 | uint32_t cpu_id, 82 | uint32_t what, 83 | uint16_t new_cs, 84 | uint64_t new_eip, 85 | uint16_t cs, 86 | uint64_t eip) 87 | { 88 | ExecuteCallback(ctx, far_branch, cpu_id, what, new_cs, new_eip, cs, eip); 89 | } 90 | 91 | void 92 | vmexit_cb(context_t* ctx, uint32_t cpu_id, uint32_t reason, uint64_t qualification) 93 | 94 | { 95 | ExecuteCallback(ctx, vmexit, cpu_id, reason, qualification); 96 | } 97 | 98 | void 99 | interrupt_cb(context_t* ctx, uint32_t cpu_id, unsigned vector) 100 | { 101 | ExecuteCallback(ctx, interrupt, cpu_id, vector); 102 | } 103 | 104 | void 105 | hw_interrupt_cb(context_t* ctx, uint32_t cpu_id, unsigned vector, uint16_t cs, uint64_t eip) 106 | { 107 | ExecuteCallback(ctx, hw_interrupt, cpu_id, vector, cs, eip); 108 | } 109 | 110 | void 111 | clflush_cb(context_t* ctx, uint32_t cpu_id, uint64_t laddr, uint64_t paddr) 112 | { 113 | ExecuteCallback(ctx, clflush, cpu_id, laddr, paddr); 114 | } 115 | 116 | void 117 | tlb_cntrl_cb(context_t* ctx, uint32_t cpu_id, unsigned what, uint64_t new_cr_value) 118 | { 119 | ExecuteCallback(ctx, tlb_cntrl, cpu_id, what, new_cr_value); 120 | } 121 | 122 | void 123 | cache_cntrl_cb(context_t* ctx, uint32_t cpu_id, unsigned what) 124 | { 125 | ExecuteCallback(ctx, cache_cntrl, cpu_id, what); 126 | } 127 | 128 | void 129 | prefetch_hint_cb(context_t* ctx, uint32_t cpu_id, unsigned what, unsigned seg, uint64_t offset) 130 | { 131 | ExecuteCallback(ctx, prefetch_hint, cpu_id, what, seg, offset); 132 | } 133 | 134 | void 135 | wrmsr_cb(context_t* ctx, uint32_t cpu_id, unsigned msr, uint64_t value) 136 | { 137 | ExecuteCallback(ctx, wrmsr, cpu_id, msr, value); 138 | } 139 | 140 | void 141 | repeat_iteration_cb(context_t* ctx, uint32_t cpu_id, void* insn) 142 | { 143 | ExecuteCallback(ctx, repeat_iteration, cpu_id, insn); 144 | } 145 | 146 | void 147 | lin_access_cb(context_t* ctx, uint32_t cpu_id, uint64_t lin, uint64_t phy, uintptr_t len, uint32_t rw, uint32_t access) 148 | { 149 | ExecuteCallback(ctx, lin_access, cpu_id, lin, phy, len, rw, access); 150 | } 151 | 152 | void 153 | phy_access_cb(context_t* ctx, uint32_t cpu_id, uint64_t lin, uintptr_t phy, uint32_t len, uint32_t rw) 154 | { 155 | ExecuteCallback(ctx, phy_access, cpu_id, lin, phy, len, rw); 156 | } 157 | 158 | void 159 | inp_cb(context_t* ctx, uint16_t cpu_id, uintptr_t len) 160 | { 161 | ExecuteCallback(ctx, inp, cpu_id, len); 162 | } 163 | 164 | void 165 | inp2_cb(context_t* ctx, uint16_t cpu_id, uintptr_t len, unsigned val) 166 | { 167 | ExecuteCallback(ctx, inp2, cpu_id, len, val); 168 | } 169 | 170 | void 171 | outp_cb(context_t* ctx, uint16_t cpu_id, uintptr_t len, unsigned val) 172 | { 173 | ExecuteCallback(ctx, outp, cpu_id, len, val); 174 | } 175 | 176 | void 177 | opcode_cb(context_t* ctx, uint32_t cpu_id, void* insn, uint8_t* opcode, uintptr_t len, bool is32, bool is64) 178 | { 179 | ExecuteCallback(ctx, opcode, cpu_id, insn, opcode, len, is32, is64); 180 | } 181 | 182 | void 183 | exception_cb(context_t* ctx, uint32_t cpu_id, unsigned vector, unsigned error_code) 184 | { 185 | ExecuteCallback(ctx, exception, cpu_id, vector, error_code); 186 | } 187 | 188 | 189 | }; // namespace BochsCPU::Callbacks -------------------------------------------------------------------------------- /examples/long_mode_fibonacci.py: -------------------------------------------------------------------------------- 1 | # 2 | # Example: Run in bochscpu Fibonacci sequence in long mode 3 | # Requires: keystone-engine, capstone 4 | # 5 | 6 | import time 7 | 8 | import capstone 9 | import keystone 10 | 11 | import bochscpu 12 | import bochscpu.cpu 13 | import bochscpu.memory 14 | import bochscpu.utils 15 | 16 | 17 | DEBUG = True 18 | PAGE_SIZE = bochscpu.memory.page_size() 19 | 20 | 21 | class Stats: 22 | insn_nb: int = 0 23 | mem_access: dict[bochscpu.memory.AccessType, int] = { 24 | bochscpu.memory.AccessType.Read: 0, 25 | bochscpu.memory.AccessType.Write: 0, 26 | bochscpu.memory.AccessType.Execute: 0, 27 | } 28 | 29 | 30 | stats = Stats() 31 | 32 | 33 | def dbg(x: str): 34 | if DEBUG: 35 | print(f"[Py] {x}") 36 | 37 | 38 | def missing_page_cb(gpa): 39 | raise Exception(f"missing_page_cb({gpa=:#x})") 40 | 41 | 42 | def exception_cb( 43 | sess: bochscpu.Session, 44 | cpu_id: int, 45 | vector: bochscpu.cpu.ExceptionType, 46 | error_code: bochscpu.InstructionType, 47 | ): 48 | match (vector, error_code): 49 | case _: 50 | dbg(f"cpu#{cpu_id} received exception({vector=:d}, {error_code=:d}) ") 51 | sess.stop() 52 | 53 | 54 | def lin_access_cb( 55 | sess: bochscpu.Session, 56 | cpu_id: int, 57 | lin: int, 58 | phy: int, 59 | len: int, 60 | rw: int, 61 | access: bochscpu.memory.AccessType, 62 | ): 63 | global stats 64 | # dbg(f"{lin=:#x} {phy=:#x} {len=:d} {rw=:d} {access=:d}") 65 | if access not in stats.mem_access: 66 | stats.mem_access[access] = 0 67 | stats.mem_access[access] += 1 68 | 69 | 70 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 71 | global stats 72 | stats.insn_nb += 1 73 | 74 | 75 | def emulate(code: bytes): 76 | CODE = 0 77 | RW = 1 78 | 79 | # 80 | # Setup the PF handler very early to let Python handle it, rather than rust panicking 81 | # 82 | sess = bochscpu.Session() 83 | dbg(f"created session for cpu#{sess.cpu.id}") 84 | 85 | sess.missing_page_handler = missing_page_cb 86 | dbg("registered our own missing page handler") 87 | 88 | # 89 | # Manually craft the guest virtual & physical memory layout into a pagetable 90 | # Once done bind the resulting GPAs it to bochs 91 | # 92 | shellcode_hva = bochscpu.memory.allocate_host_page() 93 | shellcode_gva = 0x0000_0041_0000_0000 94 | shellcode_gpa = 0x0000_0000_1400_0000 95 | dbg(f"inserting {shellcode_gva=:#x} -> {shellcode_gpa=:#x} -> {shellcode_hva=:#x}") 96 | bochscpu.memory.page_insert(shellcode_gpa, shellcode_hva) 97 | 98 | stack_hva = bochscpu.memory.allocate_host_page() 99 | stack_gva = 0x0401_0000_0000 100 | stack_gpa = 0x1401_0000 101 | dbg(f"inserting {stack_gva=:#x} -> {stack_gpa=:#x} -> {stack_hva=:#x}") 102 | bochscpu.memory.page_insert(stack_gpa, stack_hva) 103 | 104 | pt = bochscpu.memory.PageMapLevel4Table() 105 | pt.insert(stack_gva, stack_gpa, RW) 106 | pt.insert(shellcode_gva, shellcode_gpa, CODE) 107 | 108 | assert pt.translate(stack_gva) == stack_gpa 109 | assert pt.translate(shellcode_gva) == shellcode_gpa 110 | 111 | pml4 = 0x10_0000 112 | layout = pt.commit(pml4) 113 | 114 | for hva, gpa in layout: 115 | bochscpu.memory.page_insert(gpa, hva) 116 | evaled_gpa = bochscpu.memory.phy_translate(gpa) 117 | assert evaled_gpa == hva, f"{evaled_gpa=:#x} == {hva=:#x}" 118 | 119 | evaled_gpa = bochscpu.memory.virt_translate(pml4, shellcode_gva) 120 | assert evaled_gpa == shellcode_gpa, f"{evaled_gpa=:#x} != {shellcode_gpa=:#x}" 121 | evaled_gpa = bochscpu.memory.virt_translate(pml4, stack_gva) 122 | assert evaled_gpa == stack_gpa, f"{evaled_gpa=:#x} != {stack_gpa=:#x}" 123 | 124 | bochscpu.utils.dump_page_table(pml4) 125 | 126 | dbg(f"copy code to {shellcode_gva=:#x}") 127 | assert bochscpu.memory.virt_write(pml4, shellcode_gva, bytearray(code)) 128 | dbg(f"copied to {shellcode_gva=:#x}, testing...") 129 | data = bochscpu.memory.virt_read(pml4, shellcode_gva, len(code)) 130 | assert data 131 | assert bytes(data) == bytes(code), f"{bytes(data).hex()} != {bytes(code).hex()}" 132 | dbg("success") 133 | 134 | # 135 | # Create a state and load it into a new CPU 136 | # 137 | state = bochscpu.State() 138 | 139 | # 140 | # Setup control registers to enable PG/PE and long mode 141 | # 142 | bochscpu.cpu.set_long_mode(state) 143 | 144 | # 145 | # Initialize CR3 with PML4 base. 146 | # 147 | state.cr3 = pml4 148 | 149 | # 150 | # Set the other registers 151 | # 152 | state.rsp = stack_gva + PAGE_SIZE // 2 153 | state.rip = shellcode_gva 154 | 155 | # 156 | # Set the selectors 157 | # 158 | cs = bochscpu.Segment() 159 | cs.present = True 160 | cs.selector = 0x33 161 | cs.base = 0 162 | cs.limit = 0xFFFF_FFFF 163 | cs.attr = 0x22FB 164 | state.cs = cs 165 | ds = bochscpu.Segment() 166 | ds.present = True 167 | ds.selector = 0x2B 168 | ds.base = 0 169 | ds.limit = 0xFFFF_FFFF 170 | ds.attr = 0xCF3 171 | state.ds = ds 172 | state.ss = ds 173 | state.es = ds 174 | state.fs = ds 175 | state.gs = ds 176 | 177 | # 178 | # Assign the state 179 | # 180 | sess.cpu.state = state 181 | dbg("loaded state for cpu#0") 182 | dbg("dumping start state") 183 | bochscpu.utils.dump_registers(sess.cpu.state) 184 | 185 | hook = bochscpu.Hook() 186 | hook.exception = exception_cb 187 | hook.after_execution = after_execution_cb 188 | hook.lin_access = lin_access_cb 189 | dbg("hook setup ok") 190 | 191 | dbg("starting the vm...") 192 | t1 = time.time_ns() 193 | sess.run( 194 | [ 195 | hook, 196 | ] 197 | ) 198 | t2 = time.time_ns() 199 | dbg( 200 | f"vm stopped, execution: {stats.insn_nb} insns in {t2-t1}ns (~{int(stats.insn_nb // ((t2-t1)/1_000_000_000))}) insn/s" 201 | ) 202 | dbg( 203 | f"mem accesses: read={stats.mem_access[bochscpu.memory.AccessType.Read]} " 204 | f"write={stats.mem_access[bochscpu.memory.AccessType.Write]} " 205 | f"execute={stats.mem_access[bochscpu.memory.AccessType.Execute]}" 206 | ) 207 | 208 | if stats.insn_nb < len(INSNS): 209 | dbg(f"last insn executed: {INSNS[stats.insn_nb]}") 210 | if stats.insn_nb - 1 < len(INSNS): 211 | dbg(f"next insn: {INSNS[stats.insn_nb+1]}") 212 | 213 | dbg("reading new state") 214 | new_state = sess.cpu.state 215 | dbg("dumping final state") 216 | bochscpu.utils.dump_registers(new_state) 217 | 218 | bochscpu.memory.release_host_page(stack_hva) 219 | bochscpu.memory.release_host_page(shellcode_hva) 220 | return 221 | 222 | 223 | if __name__ == "__main__": 224 | # from https://github.com/yrp604/bochscpu-bench/tree/master/asm 225 | ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64) 226 | fib = """ 227 | _start: 228 | push 0 229 | push 0 230 | push 1 231 | 232 | loop: 233 | pop rax 234 | pop rbx 235 | pop rcx 236 | 237 | mov rdx, rax 238 | add rax, rbx 239 | mov rbx, rdx 240 | 241 | inc rcx 242 | 243 | push rcx 244 | push rbx 245 | push rax 246 | 247 | cmp rcx, 0xfffff 248 | jne loop 249 | 250 | nop 251 | hlt 252 | """ 253 | 254 | code, _ = ks.asm(fib) 255 | assert isinstance(code, list) 256 | code = bytearray(code) 257 | cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) 258 | INSNS = [i for i in cs.disasm(code, 0)] 259 | emulate(code) 260 | -------------------------------------------------------------------------------- /examples/template.py: -------------------------------------------------------------------------------- 1 | # 2 | # This snippet does nothing, but can be used as a template for quickly build stuff from bochscpu 3 | # 4 | import logging 5 | 6 | import bochscpu 7 | import bochscpu.cpu 8 | import bochscpu.memory 9 | import bochscpu.utils 10 | 11 | 12 | # 13 | # Callbacks 14 | # 15 | def missing_page_cb(gpa: int): 16 | """Edit this function to change the page fault behavior 17 | Args: 18 | gpa (int): the physical address of where the page fault occured 19 | """ 20 | raise RuntimeError(f"missing_page_cb({gpa=:#x})") 21 | 22 | 23 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 24 | logging.debug(f"[CPU#{cpu_id}] before PC={sess.cpu.rip:#x}") 25 | 26 | 27 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 28 | logging.debug(f"[CPU#{cpu_id}] after PC={sess.cpu.rip:#x}") 29 | 30 | 31 | def exception_cb( 32 | sess: bochscpu.Session, 33 | cpu_id: int, 34 | vector: int, 35 | error_code: int, 36 | ): 37 | v = bochscpu.cpu.ExceptionType(vector) 38 | match (vector, error_code): 39 | case _: 40 | logging.warning(f"cpu#{cpu_id} received exception({v}, {error_code=:d}) ") 41 | sess.stop() 42 | 43 | 44 | # 45 | # All the other callback prototypes - see `instrumentation.txt` 46 | # 47 | def cache_cntrl_cb(sess: bochscpu.Session, cpu_id: int, what: int): 48 | pass 49 | 50 | 51 | def clflush_cb(sess: bochscpu.Session, cpu_id: int, lin_addr: int, phy_addr: int): 52 | pass 53 | 54 | 55 | def cnear_branch_not_taken_cb( 56 | sess: bochscpu.Session, cpu_id: int, branch_ip: int, new_ip: int 57 | ): 58 | pass 59 | 60 | 61 | def cnear_branch_taken_cb( 62 | sess: bochscpu.Session, cpu_id: int, branch_ip: int, new_ip: int 63 | ): 64 | pass 65 | 66 | 67 | def far_branch_cb( 68 | sess: bochscpu.Session, 69 | cpu_id: int, 70 | what: int, 71 | prev_cs: int, 72 | prev_ip: int, 73 | new_cs: int, 74 | new_ip: int, 75 | ): 76 | pass 77 | 78 | 79 | def hlt_cb(sess: bochscpu.Session, cpu_id: int): 80 | pass 81 | 82 | 83 | def hw_interrupt_cb(sess: bochscpu.Session, cpu_id: int, vector: int, cs: int, ip: int): 84 | pass 85 | 86 | 87 | def inp_cb(sess: bochscpu.Session, cpu_id: int, len: int): 88 | pass 89 | 90 | 91 | def inp2_cb(sess: bochscpu.Session, cpu_id: int, len: int, val: int): 92 | pass 93 | 94 | 95 | def interrupt_cb(sess: bochscpu.Session, cpu_id: int, int_num: int): 96 | pass 97 | 98 | 99 | def lin_access_cb( 100 | sess: bochscpu.Session, 101 | cpu_id: int, 102 | lin: int, 103 | phy: int, 104 | len: int, 105 | memtype: int, 106 | rw: int, 107 | ): 108 | pass 109 | 110 | 111 | def mwait_cb(sess: bochscpu.Session, cpu_id: int, addr: int, len: int, flags: int): 112 | pass 113 | 114 | 115 | def opcode_cb( 116 | sess: bochscpu.Session, 117 | cpu_id: int, 118 | insn: int, 119 | opcode: int, 120 | len: int, 121 | is32: bool, 122 | is64: bool, 123 | ): 124 | pass 125 | 126 | 127 | def outp_cb(sess: bochscpu.Session, cpu_id: int, len: int, val: int): 128 | pass 129 | 130 | 131 | def phy_access_cb( 132 | sess: bochscpu.Session, cpu_id: int, lin: int, phy: int, len: int, rw: int 133 | ): 134 | pass 135 | 136 | 137 | def prefetch_hint_cb( 138 | sess: bochscpu.Session, cpu_id: int, what: int, seg: int, offset: int 139 | ): 140 | pass 141 | 142 | 143 | def repeat_iteration_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 144 | pass 145 | 146 | 147 | def reset_cb(sess: bochscpu.Session, cpu_id: int, a2: int): 148 | pass 149 | 150 | 151 | def tlb_cntrl_cb(sess: bochscpu.Session, cpu_id: int, what: int, new_cr_value: int): 152 | pass 153 | 154 | 155 | def ucnear_branch_cb( 156 | sess: bochscpu.Session, cpu_id: int, what: int, branch_ip: int, new_branch_ip: int 157 | ): 158 | pass 159 | 160 | 161 | def vmexit_cb(sess: bochscpu.Session, cpu_id: int, reason: int, qualification: int): 162 | pass 163 | 164 | 165 | def wrmsr_cb(sess: bochscpu.Session, cpu_id: int, msr: int, value: int): 166 | pass 167 | 168 | 169 | def emulate(): 170 | # 171 | # Allocate a page on host, expose it to bochs, and fill it up 172 | # 173 | code_hva = bochscpu.memory.allocate_host_page() 174 | code_gpa = 0x0000_7000 175 | bochscpu.memory.page_insert(code_gpa, code_hva) 176 | bochscpu.memory.phy_write(code_gpa, b"\xcc" * bochscpu.memory.page_size()) 177 | 178 | # 179 | # Create a session. A session **MUST** have at least a callback pointing to a custom 180 | # page fault handler 181 | # 182 | sess = bochscpu.Session() 183 | sess.missing_page_handler = bochscpu.utils.callbacks.missing_page_cb 184 | 185 | # 186 | # Create a CPU state and assign it to the session. The different x86 modes can be set 187 | # thanks to helpers located in `bochscpu.cpu.set_XXXX_mode` where XXXX can be: 188 | # - real 189 | # - virtual8086 190 | # - protected 191 | # - long 192 | # 193 | state = bochscpu.State() 194 | bochscpu.cpu.set_real_mode(state) 195 | 196 | state.rip = code_gpa 197 | state.rsp = code_gpa + 0x0800 198 | sess.cpu.state = state 199 | 200 | # 201 | # Defines hook: a hook is one specific set of callbacks. Many hooks can be created, and 202 | # they are all chained together when running the code 203 | # 204 | # The instrumentation bible is here 205 | # https://github.com/bochs-emu/Bochs/blob/master/bochs/instrument/instrumentation.txt 206 | # 207 | hook = bochscpu.Hook() 208 | hook.after_execution = after_execution_cb 209 | hook.before_execution = before_execution_cb 210 | hook.cache_cntrl = cache_cntrl_cb 211 | hook.clflush = clflush_cb 212 | hook.cnear_branch_not_taken = cnear_branch_not_taken_cb 213 | hook.cnear_branch_taken = cnear_branch_taken_cb 214 | hook.exception = exception_cb 215 | hook.far_branch = far_branch_cb 216 | hook.hlt = hlt_cb 217 | hook.hw_interrupt = hw_interrupt_cb 218 | hook.inp = inp_cb 219 | hook.interrupt = interrupt_cb 220 | hook.lin_access = lin_access_cb 221 | hook.mwait = mwait_cb 222 | hook.opcode = opcode_cb 223 | hook.outp = outp_cb 224 | hook.phy_access = phy_access_cb 225 | hook.prefetch_hint = prefetch_hint_cb 226 | hook.repeat_iteration = repeat_iteration_cb 227 | hook.reset = reset_cb 228 | hook.tlb_cntrl = tlb_cntrl_cb 229 | hook.ucnear_branch = ucnear_branch_cb 230 | hook.vmexit = vmexit_cb 231 | hook.wrmsr = wrmsr_cb 232 | 233 | # 234 | # Since 0.2.0 you can use the helper `callbacks` helper module to quickly install default 235 | # callbacks as such: 236 | # 237 | # bochscpu.utils.callbacks.install_default_callbacks(hook) 238 | # 239 | 240 | # 241 | # Create the hook chain 242 | # 243 | hooks = [ 244 | hook, # here we only have one but we can set many 245 | ] 246 | 247 | # 248 | # Start the emulation 249 | # 250 | sess.run(hooks) 251 | 252 | # 253 | # With the execution finished, you can read the final state of the CPU 254 | # 255 | final_state = sess.cpu.state 256 | print(f"RIP={final_state.rip:#x}") 257 | bochscpu.memory.release_host_page(code_hva) 258 | 259 | 260 | if __name__ == "__main__": 261 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) 262 | emulate() 263 | # Note: it is recommended to use an intermediary function like `emulate` 264 | # rather than emulating directly from `main`. Finishing main will result 265 | # in a process exit, prevent some object to be correctly cleaned which 266 | # may resulting in `nanobind` throwing exception on object deletions. 267 | -------------------------------------------------------------------------------- /docs/api/bochscpu.State.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # class `State` 4 | Class State 5 | 6 | 7 | --- 8 | 9 | ### property State.apic_base 10 | 11 | Get/Set the register `apic_base` in the current state 12 | 13 | --- 14 | 15 | ### property State.cr0 16 | 17 | Get/Set the register `cr0` in the current state 18 | 19 | --- 20 | 21 | ### property State.cr2 22 | 23 | Get/Set the register `cr2` in the current state 24 | 25 | --- 26 | 27 | ### property State.cr3 28 | 29 | Get/Set the register `cr3` in the current state 30 | 31 | --- 32 | 33 | ### property State.cr4 34 | 35 | Get/Set the register `cr4` in the current state 36 | 37 | --- 38 | 39 | ### property State.cr8 40 | 41 | Get/Set the register `cr8` in the current state 42 | 43 | --- 44 | 45 | ### property State.cs 46 | 47 | Get/Set the register `cs` in the current state 48 | 49 | --- 50 | 51 | ### property State.cstar 52 | 53 | Get/Set the register `cstar` in the current state 54 | 55 | --- 56 | 57 | ### property State.dr0 58 | 59 | Get/Set the register `dr0` in the current state 60 | 61 | --- 62 | 63 | ### property State.dr1 64 | 65 | Get/Set the register `dr1` in the current state 66 | 67 | --- 68 | 69 | ### property State.dr2 70 | 71 | Get/Set the register `dr2` in the current state 72 | 73 | --- 74 | 75 | ### property State.dr3 76 | 77 | Get/Set the register `dr3` in the current state 78 | 79 | --- 80 | 81 | ### property State.dr6 82 | 83 | Get/Set the register `dr6` in the current state 84 | 85 | --- 86 | 87 | ### property State.dr7 88 | 89 | Get/Set the register `dr7` in the current state 90 | 91 | --- 92 | 93 | ### property State.ds 94 | 95 | Get/Set the register `ds` in the current state 96 | 97 | --- 98 | 99 | ### property State.efer 100 | 101 | Get/Set the register `efer` in the current state 102 | 103 | --- 104 | 105 | ### property State.es 106 | 107 | Get/Set the register `es` in the current state 108 | 109 | --- 110 | 111 | ### property State.fpcw 112 | 113 | Get/Set the register `fpcw` in the current state 114 | 115 | --- 116 | 117 | ### property State.fpop 118 | 119 | Get/Set the register `fpop` in the current state 120 | 121 | --- 122 | 123 | ### property State.fpst 124 | 125 | Get/Set the register `fpst` in the current state 126 | 127 | --- 128 | 129 | ### property State.fpsw 130 | 131 | Get/Set the register `fpsw` in the current state 132 | 133 | --- 134 | 135 | ### property State.fptw 136 | 137 | Get/Set the register `fptw` in the current state 138 | 139 | --- 140 | 141 | ### property State.fs 142 | 143 | Get/Set the register `fs` in the current state 144 | 145 | --- 146 | 147 | ### property State.gdtr 148 | 149 | Get/Set the register `gdtr` in the current state 150 | 151 | --- 152 | 153 | ### property State.gs 154 | 155 | Get/Set the register `gs` in the current state 156 | 157 | --- 158 | 159 | ### property State.idtr 160 | 161 | Get/Set the register `idtr` in the current state 162 | 163 | --- 164 | 165 | ### property State.kernel_gs_base 166 | 167 | Get/Set the register `kernel_gs_base` in the current state 168 | 169 | --- 170 | 171 | ### property State.ldtr 172 | 173 | Get/Set the register `ldtr` in the current state 174 | 175 | --- 176 | 177 | ### property State.lstar 178 | 179 | Get/Set the register `lstar` in the current state 180 | 181 | --- 182 | 183 | ### property State.mxcsr 184 | 185 | Get/Set the register `mxcsr` in the current state 186 | 187 | --- 188 | 189 | ### property State.mxcsr_mask 190 | 191 | Get/Set the register `mxcsr_mask` in the current state 192 | 193 | --- 194 | 195 | ### property State.pat 196 | 197 | Get/Set the register `pat` in the current state 198 | 199 | --- 200 | 201 | ### property State.r10 202 | 203 | Get/Set the register `r10` in the current state 204 | 205 | --- 206 | 207 | ### property State.r11 208 | 209 | Get/Set the register `r11` in the current state 210 | 211 | --- 212 | 213 | ### property State.r12 214 | 215 | Get/Set the register `r12` in the current state 216 | 217 | --- 218 | 219 | ### property State.r13 220 | 221 | Get/Set the register `r13` in the current state 222 | 223 | --- 224 | 225 | ### property State.r14 226 | 227 | Get/Set the register `r14` in the current state 228 | 229 | --- 230 | 231 | ### property State.r15 232 | 233 | Get/Set the register `r15` in the current state 234 | 235 | --- 236 | 237 | ### property State.r8 238 | 239 | Get/Set the register `r8` in the current state 240 | 241 | --- 242 | 243 | ### property State.r9 244 | 245 | Get/Set the register `r9` in the current state 246 | 247 | --- 248 | 249 | ### property State.rax 250 | 251 | Get/Set the register `rax` in the current state 252 | 253 | --- 254 | 255 | ### property State.rbp 256 | 257 | Get/Set the register `rbp` in the current state 258 | 259 | --- 260 | 261 | ### property State.rbx 262 | 263 | Get/Set the register `rbx` in the current state 264 | 265 | --- 266 | 267 | ### property State.rcx 268 | 269 | Get/Set the register `rcx` in the current state 270 | 271 | --- 272 | 273 | ### property State.rdi 274 | 275 | Get/Set the register `rdi` in the current state 276 | 277 | --- 278 | 279 | ### property State.rdx 280 | 281 | Get/Set the register `rdx` in the current state 282 | 283 | --- 284 | 285 | ### property State.rflags 286 | 287 | Get/Set the register `rflags` in the current state 288 | 289 | --- 290 | 291 | ### property State.rip 292 | 293 | Get/Set the register `rip` in the current state 294 | 295 | --- 296 | 297 | ### property State.rsi 298 | 299 | Get/Set the register `rsi` in the current state 300 | 301 | --- 302 | 303 | ### property State.rsp 304 | 305 | Get/Set the register `rsp` in the current state 306 | 307 | --- 308 | 309 | ### property State.seed 310 | 311 | Get/Set the seed in the current state 312 | 313 | --- 314 | 315 | ### property State.sfmask 316 | 317 | Get/Set the register `sfmask` in the current state 318 | 319 | --- 320 | 321 | ### property State.ss 322 | 323 | Get/Set the register `ss` in the current state 324 | 325 | --- 326 | 327 | ### property State.star 328 | 329 | Get/Set the register `star` in the current state 330 | 331 | --- 332 | 333 | ### property State.sysenter_cs 334 | 335 | Get/Set the register `sysenter_cs` in the current state 336 | 337 | --- 338 | 339 | ### property State.sysenter_eip 340 | 341 | Get/Set the register `sysenter_eip` in the current state 342 | 343 | --- 344 | 345 | ### property State.sysenter_esp 346 | 347 | Get/Set the register `sysenter_esp` in the current state 348 | 349 | --- 350 | 351 | ### property State.tr 352 | 353 | Get/Set the register `tr` in the current state 354 | 355 | --- 356 | 357 | ### property State.tsc 358 | 359 | Get/Set the register `tsc` in the current state 360 | 361 | --- 362 | 363 | ### property State.tsc_aux 364 | 365 | Get/Set the register `tsc_aux` in the current state 366 | 367 | --- 368 | 369 | ### property State.xcr0 370 | 371 | Get/Set the register `xcr0` in the current state 372 | 373 | --- 374 | 375 | ### property State.zmm 376 | 377 | Get/Set the register `zmm` in the current state 378 | 379 | 380 | 381 | 382 | 383 | --- 384 | 385 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 386 | -------------------------------------------------------------------------------- /examples/real_mode_print_hello_world.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | import keystone 5 | import bochscpu 6 | import bochscpu.cpu 7 | import bochscpu.memory 8 | import bochscpu.utils 9 | 10 | PAGE_SIZE = bochscpu.utils.PAGE_SIZE 11 | 12 | 13 | def missing_page_cb(gpa: int): 14 | raise Exception(f"missing_page_cb({gpa=:#x})") 15 | 16 | 17 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 18 | logging.debug(f"[CPU#{cpu_id}] before PC={sess.cpu.rip:#08x}") 19 | 20 | 21 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 22 | logging.debug(f"[CPU#{cpu_id}] after PC={sess.cpu.rip:#08x}") 23 | 24 | 25 | def cnear_branch_not_taken_cb( 26 | sess: bochscpu.Session, cpu_id: int, before: int, after: int 27 | ): 28 | logging.debug( 29 | f"in cnear_branch_not_taken_cb, {cpu_id=:#x} {before=:#x} {after=:#x}" 30 | ) 31 | 32 | 33 | def cnear_branch_taken_cb(sess: bochscpu.Session, cpu_id: int, before: int, after: int): 34 | logging.debug(f"in cnear_branch_taken_cb, {cpu_id=:#x} {before=:#x} {after=:#x}") 35 | 36 | 37 | def hlt_cb(sess: bochscpu.Session, reason: int): 38 | logging.debug(f"in hlt_cb, {reason=:#x}") 39 | sess.stop() 40 | 41 | 42 | def interrupt_cb(sess: bochscpu.Session, cpu_id: int, int_num: int): 43 | logging.debug(f"in interrupt_cb, {cpu_id=} received {int_num=:#x}") 44 | mode = sess.cpu.state.rax >> 8 45 | match int_num, mode: 46 | case 0x10, 0x0E: 47 | # 48 | # This is the main juice of the emulated interruption 49 | # ref: https://en.wikipedia.org/wiki/INT_10H 50 | # 51 | char = chr(sess.cpu.state.rax & 0xFF) 52 | print(f"{char}", end="") 53 | 54 | # 55 | # We've emulated a print, let's cheap and just resume executing at the next IP 56 | # (avoid creating an IDT etc.) 57 | # 58 | sess.cpu.rip += 1 59 | 60 | case _: 61 | logging.warning(f"[CPU#{cpu_id}] unsupported interrupt {int_num}") 62 | 63 | 64 | def exception_cb( 65 | sess: bochscpu.Session, 66 | cpu_id: int, 67 | vector: int, 68 | error_code: int, 69 | ): 70 | match (vector, error_code): 71 | case _: 72 | logging.warning( 73 | f"[CPU#{cpu_id}] Received exception({bochscpu.cpu.ExceptionType(vector)}, {error_code=:d}) " 74 | ) 75 | sess.stop() 76 | 77 | 78 | def emulate(code_str: str, data: bytes): 79 | # 80 | # Use Keystone to compile the assembly 81 | # 82 | ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_16) 83 | code, _ = ks.asm(code_str) 84 | assert isinstance(code, list) 85 | code = bytes(code) 86 | logging.debug(f"Compiled {len(code)} bytes") 87 | code = code.ljust( 88 | PAGE_SIZE, b"\xcc" 89 | ) # Make sure we hit an exception, helpful for debug 90 | assert len(code) == PAGE_SIZE 91 | 92 | # 93 | # Create the pages 94 | # 95 | logging.debug("Allocating host pages") 96 | code_hva = bochscpu.memory.allocate_host_page() 97 | code_gpa = 0x0000_1000 98 | bochscpu.memory.page_insert(code_gpa, code_hva) 99 | bochscpu.memory.phy_write(code_gpa, bytearray(code)) 100 | 101 | data_hva = bochscpu.memory.allocate_host_page() 102 | data_gpa = 0x0000_2000 103 | bochscpu.memory.page_insert(data_gpa, data_hva) 104 | bochscpu.memory.phy_write(data_gpa, bytearray(data)) 105 | 106 | stack_hva = bochscpu.memory.allocate_host_page() 107 | stack_gpa = 0x0000_8000 108 | bochscpu.memory.page_insert(stack_gpa, stack_hva) 109 | 110 | # 111 | # Intel 3A - 3.4.3 112 | # > For virtually any kind of program execution to take place, at least the code-segment (CS), 113 | # > data-segment (DS), and stack-segment (SS) registers must be loaded with valid segment selectors 114 | # 115 | logging.debug("Setting up CPU segments") 116 | 117 | # CS segment 118 | _cs = bochscpu.Segment() 119 | _cs.base = code_gpa 120 | _cs.limit = PAGE_SIZE 121 | _cs.selector = 6 << 3 | 0 << 2 | 0 # DescriptorTable[6], TI=0->GDT, RPL=0 122 | cs_attr = bochscpu.cpu.SegmentFlags() 123 | cs_attr.P = True 124 | cs_attr.E = True 125 | cs_attr.DB = False 126 | cs_attr.G = False 127 | cs_attr.S = True 128 | cs_attr.DPL = 0 129 | cs_attr.R = True 130 | _cs.attr = int(cs_attr) 131 | _cs.present = True 132 | 133 | # SS segment 134 | _ss = bochscpu.Segment() 135 | _ss.present = True 136 | _ss.base = stack_gpa 137 | _ss.limit = PAGE_SIZE >> 4 # Granularity is 16b (D off) 138 | _ss.selector = 8 << 3 | 0 << 2 | 0 # DescriptorTable[8], TI=0->GDT, RPL=0 139 | ss_attr = bochscpu.cpu.SegmentFlags() 140 | ss_attr.P = True 141 | ss_attr.DB = False 142 | ss_attr.E = False 143 | ss_attr.D = True 144 | ss_attr.W = True 145 | ss_attr.A = True 146 | ss_attr.S = True 147 | ss_attr.DPL = 0 148 | _ss.attr = int(ss_attr) 149 | 150 | # DS segment 151 | _ds = bochscpu.Segment() 152 | _ds.present = True 153 | _ds.base = data_gpa 154 | _ds.limit = PAGE_SIZE 155 | _ds.selector = 10 << 3 | 0 << 2 | 0 # DescriptorTable[10], TI=0->GDT, RPL=0 156 | ds_attr = bochscpu.cpu.SegmentFlags() 157 | ds_attr.P = True 158 | ds_attr.E = False 159 | ds_attr.D = False 160 | ds_attr.W = True 161 | ds_attr.G = False 162 | ds_attr.S = True 163 | _ds.attr = int(ds_attr) 164 | 165 | # 166 | # Create the VM session, add the missing page handler 167 | # 168 | sess = bochscpu.Session() 169 | sess.missing_page_handler = missing_page_cb 170 | 171 | # 172 | # And initialize a CPU state 173 | # 174 | state = bochscpu.State() 175 | bochscpu.cpu.set_real_mode(state) 176 | 177 | # 178 | # Assign segment to the state, CS, DS and SS are always required 179 | # We also set the interrupt table 180 | # 181 | state.ds = _ds 182 | 183 | # The SP will be at ss:[0x0800] 184 | state.ss = _ss 185 | state.rsp = 0x0800 186 | 187 | # The PC will be at cs:[0x0000] 188 | state.cs = _cs 189 | state.rip = 0x0000 190 | 191 | # 192 | # Ok! Our CPU is now in valid state for segmenting allowing to resolve 193 | # logical addresses.All left to do is to apply the state to the emulation 194 | # session 195 | # 196 | logging.debug("Applying CPU state") 197 | sess.cpu.state = state 198 | 199 | # 200 | # For demo purposes, we add a few callbacks but they're not all useful. 201 | # 202 | hook = bochscpu.Hook() 203 | hook.after_execution = after_execution_cb 204 | hook.before_execution = before_execution_cb 205 | hook.cnear_branch_not_taken = cnear_branch_not_taken_cb 206 | hook.cnear_branch_taken = cnear_branch_taken_cb 207 | hook.exception = exception_cb 208 | hook.hlt = hlt_cb 209 | hook.interrupt = interrupt_cb 210 | 211 | hooks = [hook] 212 | 213 | logging.debug( 214 | f"Starting emulation at cs:{state.rip:#08x} with {len(hooks)} hookchain(s)" 215 | ) 216 | if logging.getLogger().isEnabledFor(logging.DEBUG): 217 | bochscpu.utils.dump_registers(sess.cpu.state) 218 | sess.run(hooks) 219 | sess.stop() 220 | 221 | logging.debug(f"Stopped emulation at cs:{sess.cpu.rip:#08x}, final register state") 222 | if logging.getLogger().isEnabledFor(logging.DEBUG): 223 | bochscpu.utils.dump_registers(sess.cpu.state) 224 | 225 | # 226 | # Cleanup 227 | # 228 | logging.debug("Cleaning up") 229 | bochscpu.memory.release_host_page(code_hva) 230 | bochscpu.memory.release_host_page(stack_hva) 231 | bochscpu.memory.release_host_page(data_hva) 232 | 233 | 234 | if __name__ == "__main__": 235 | if "--debug" in sys.argv[1:]: 236 | logging.basicConfig(format="%(levelname)-7s - %(message)s", level=logging.DEBUG) 237 | else: 238 | logging.basicConfig(format="%(levelname)-7s - %(message)s", level=logging.INFO) 239 | print_msg_asm = """ 240 | _start: 241 | push ax 242 | 243 | printchar: 244 | lodsb 245 | test al, al 246 | je end 247 | 248 | mov ah, 0x0E 249 | int 0x10 250 | 251 | nop 252 | jmp printchar 253 | 254 | end: 255 | pop ax 256 | hlt 257 | """ 258 | emulate(print_msg_asm, b"Hello 16-bit assembly!\n\0") 259 | -------------------------------------------------------------------------------- /python/bochscpu/utils/cpu.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import bochscpu 3 | import bochscpu.cpu 4 | 5 | 6 | class GenericControlRegister: 7 | """Wrapper to easily manipulate control register""" 8 | 9 | # tuple(bitposition, name, descript, rw) 10 | FlagType = tuple[int, str, str, bool] 11 | bit: list[FlagType] = [] 12 | name: str 13 | 14 | def __int__(self) -> int: 15 | return int(self.value) 16 | 17 | def __repr__(self) -> str: 18 | return str(self) 19 | 20 | def __str__(self) -> str: 21 | flags = ",".join([x[1] for x in self.bit if getattr(self.value, x[1])]) 22 | return f"{self.name}({flags=})" 23 | 24 | def find(self, name: str) -> FlagType: 25 | res = [x for x in self.bit if x[1] == name] 26 | if len(res) != 1: 27 | raise IndexError(f"No index {name}") 28 | return res[0] 29 | 30 | def __getattr__(self, __name: str) -> bool: 31 | """Facilitate reading access to flags. 32 | 33 | Args: 34 | __name (str): _description_ 35 | 36 | Returns: 37 | bool: _description_ 38 | """ 39 | matches = [x for x in self.bit if x[1] == __name] 40 | if len(matches) != 1: 41 | raise AttributeError 42 | entry = matches[0] 43 | return getattr(self.value, entry[1]) 44 | 45 | def __setattr__(self, __name: str, __value: Any) -> None: 46 | """Facilitates direct write access to flags. Also validates access to the flag 47 | 48 | Args: 49 | __name (str): _description_ 50 | __value (Any): _description_ 51 | 52 | Returns: 53 | _type_: _description_ 54 | """ 55 | matches = [x for x in self.bit if x[1] == __name] 56 | if len(matches) != 1: 57 | return super().__setattr__(__name, __value) 58 | flag = self.find(__name) 59 | if flag[3] == False: 60 | print(f"Field '{flag[1]}' (bit {flag[0]}) is read-only") 61 | else: 62 | setattr(self.value, __name, __value) 63 | 64 | def __ior__(self, other: int): 65 | """Allow to use `|=` operator on another control register 66 | 67 | Args: 68 | other (_type_): _description_ 69 | """ 70 | for pos, name, _, rw in self.bit: 71 | if not rw: 72 | continue 73 | new_value = other & (1 << pos) != 0 74 | setattr(self, name, new_value) 75 | return self 76 | 77 | def __and__(self, bitpos: int) -> bool: 78 | return (int(self) & (1 << bitpos)) != 0 79 | 80 | 81 | class CR0(GenericControlRegister): 82 | """Manipulate CR0 83 | 84 | Ref: 85 | AMD Vol2 3.1.1 86 | 87 | Args: 88 | GenericControlRegister (_type_): _description_ 89 | """ 90 | 91 | def __init__(self, initial_value: int): 92 | self.name = "CR0" 93 | self.value = bochscpu.cpu.ControlRegister() 94 | self.bit: list[GenericControlRegister.FlagType] = [ 95 | (31, "PG", "Paging ", True), 96 | (30, "CD", "Cache Disable ", True), 97 | (29, "NW", "Not Writethrough ", True), 98 | (18, "AM", "Alignment Mask ", True), 99 | (16, "WP", "Write Protect ", True), 100 | (5, "NE", "Numeric Error ", True), 101 | (4, "ET", "Extension Type", False), 102 | (3, "TS", "Task Switched ", True), 103 | (2, "EM", "Emulation ", True), 104 | (1, "MP", "Monitor Coprocessor ", True), 105 | (0, "PE", "Protection Enabled ", True), 106 | ] 107 | if initial_value: 108 | self |= initial_value 109 | 110 | 111 | class CR4(GenericControlRegister): 112 | """Manipulate CR4 113 | 114 | Ref: 115 | AMD Vol2 3.1.3 116 | 117 | Args: 118 | GenericControlRegister (_type_): _description_ 119 | """ 120 | 121 | def __init__(self, initial_value: int): 122 | self.name = "CR4" 123 | self.value = bochscpu.cpu.ControlRegister() 124 | self.bit: list[GenericControlRegister.FlagType] = [ 125 | (18, "OSXSAVE", "XSAVE and Processor Extended States Enable Bit ", True), 126 | ( 127 | 16, 128 | "FSGSBASE", 129 | "Enable RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions", 130 | True, 131 | ), 132 | (10, "OSXMMEXCPT", "Operating System Unmasked Exception Support ", True), 133 | (9, "OSFXSR", "Operating System FXSAVE/FXRSTOR Support ", True), 134 | (8, "PCE", "Performance-Monitoring Counter Enable ", True), 135 | (7, "PGE", "Page-Global Enable ", True), 136 | (6, "MCE", "Machine Check Enable ", True), 137 | (5, "PAE", "Physical-Address Extension ", True), 138 | (4, "PSE", "Page Size Extensions ", True), 139 | (3, "DE", "Debugging Extensions ", True), 140 | (2, "TSD", "Time Stamp Disable ", True), 141 | (1, "PVI", "Protected-Mode Virtual Interrupts ", True), 142 | (0, "VME", "Virtual-8086 Mode Extensions ", True), 143 | ] 144 | if initial_value: 145 | self |= initial_value 146 | 147 | 148 | class EFER(GenericControlRegister): 149 | """Manipulate EFER 150 | 151 | Ref: 152 | AMD Vol2 3.1.7 153 | 154 | Args: 155 | GenericControlRegister (_type_): _description_ 156 | """ 157 | 158 | def __init__(self, initial_value: int): 159 | self.name = "EFER" 160 | self.value = bochscpu.cpu.FeatureRegister() 161 | self.bit: list[GenericControlRegister.FlagType] = [ 162 | (15, "TCE", "Translation Cache Extension", True), 163 | (14, "FFXSR", "Fast FXSAVE/FXRSTOR", True), 164 | (13, "LMSLE", "Long Mode Segment Limit Enable", True), 165 | (12, "SVME", "Secure Virtual Machine Enable", True), 166 | (11, "NXE", "No-Execute Enable", True), 167 | (10, "LMA", "Long Mode Active", True), 168 | (8, "LME", "Long Mode Enable", True), 169 | (0, "SCE", "System Call Extensions", True), 170 | ] 171 | if initial_value: 172 | self |= initial_value 173 | 174 | 175 | class RFLAGS(GenericControlRegister): 176 | """Manipulate RFLAGS 177 | 178 | Ref: 179 | AMD Vol2 3.1.6 180 | 181 | Args: 182 | GenericControlRegister (_type_): _description_ 183 | """ 184 | 185 | def __init__(self, initial_value: int): 186 | self.name = "RFLAGS" 187 | self.value = bochscpu.cpu.FlagRegister() 188 | self.bit: list[GenericControlRegister.FlagType] = [ 189 | (21, "ID", "ID Flag", True), 190 | (20, "VIP", "Virtual Interrupt Pending", True), 191 | (19, "VIF", "Virtual Interrupt Flag", True), 192 | (18, "AC", "Alignment Check", True), 193 | (17, "VM", "Virtual-8086 Mode", True), 194 | (16, "RF", "Resume Flag", True), 195 | (14, "NT", "Nested Task", True), 196 | (13, "IOPL1", "IOPL I/O Privilege Level - High", True), 197 | (12, "IOPL0", "IOPL I/O Privilege Level - Low", True), 198 | (11, "OF", "Overflow Flag", True), 199 | (10, "DF", "Direction Flag", True), 200 | (9, "IF", "Interrupt Flag", True), 201 | (8, "TF", "Trap Flag", True), 202 | (7, "SF", "Sign Flag", True), 203 | (6, "ZF", "Zero Flag", True), 204 | (4, "AF", "Auxiliary Flag", True), 205 | (2, "PF", "Parity Flag", True), 206 | (1, "Reserved1", "Reserved, Read as One", True), 207 | (0, "CF", "Carry Flag", True), 208 | ] 209 | if initial_value: 210 | self |= initial_value 211 | 212 | 213 | class XCR0(GenericControlRegister): 214 | """Manipulate XCR0 215 | 216 | Ref: 217 | AMD Vol2 11.5.2 218 | 219 | Args: 220 | GenericControlRegister (_type_): _description_ 221 | """ 222 | 223 | def __init__(self, initial_value: int): 224 | self.name = "XCR0" 225 | self.value = bochscpu.cpu.ControlRegister() 226 | self.bit: list[GenericControlRegister.FlagType] = [ 227 | (63, "X", "Reserved specifically for XCR0 bit vector expansion.", False), 228 | ( 229 | 62, 230 | "LWP", 231 | "When set, Lightweight Profiling (LWP) extensions are enabled and XSAVE/XRSTOR supports LWP state management.", 232 | True, 233 | ), 234 | ( 235 | 2, 236 | "YMM", 237 | "When set, 256-bit SSE state management is supported by XSAVE/XRSTOR. Must be set to enable AVX extensions", 238 | True, 239 | ), 240 | ( 241 | 1, 242 | "SSE", 243 | "When set, 128-bit SSE state management is supported by XSAVE/XRSTOR. This bit must be set if YMM is set. Must be set to enable AVX extensions.", 244 | True, 245 | ), 246 | ( 247 | 0, 248 | "x87", 249 | "x87 FPU state management is supported by XSAVE/XRSTOR. Must be set to 1.", 250 | False, 251 | ), 252 | ] 253 | if initial_value: 254 | self |= initial_value 255 | -------------------------------------------------------------------------------- /examples/long_mode_emulate_windows_kdump.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | from dataclasses import dataclass 3 | from typing import Optional 4 | 5 | import ctypes 6 | import logging 7 | import os 8 | import pathlib 9 | import sys 10 | import time 11 | 12 | import capstone 13 | import kdmp_parser 14 | 15 | import bochscpu 16 | import bochscpu.cpu 17 | import bochscpu.memory 18 | import bochscpu.utils 19 | 20 | 21 | cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) 22 | 23 | 24 | def disass(state, va: int) -> capstone.CsInsn: 25 | global cs 26 | raw = bytes(bochscpu.memory.virt_read(state.cr3, va, 16)) 27 | insn = next(cs.disasm(raw, va)) 28 | return insn 29 | 30 | 31 | emulation_end_address = 0 32 | 33 | hvas: list[int] = [] 34 | dmp: Optional[kdmp_parser.KernelDumpParser] = None 35 | session: Optional[bochscpu.Session] = None 36 | 37 | 38 | @dataclass 39 | class SessionPerf: 40 | start_time_ns: int = 0 41 | end_time_ns: int = 0 42 | executed_instruction: int = 0 43 | allocated_pages: int = 0 44 | 45 | @property 46 | def execution_time_ns(self) -> int: 47 | return self.end_time_ns - self.start_time_ns 48 | 49 | @property 50 | def execution_time(self) -> float: 51 | return self.execution_time_ns / 1_000_000_000 52 | 53 | @property 54 | def average(self) -> float: 55 | if not self.execution_time: 56 | return 0 57 | 58 | return self.executed_instruction / self.execution_time 59 | 60 | 61 | perf = SessionPerf() 62 | 63 | 64 | def hexdump( 65 | source: bytes, length: int = 0x10, separator: str = ".", base: int = 0x00 66 | ) -> str: 67 | result = [] 68 | align = 0x8 * 2 + 2 69 | 70 | def chunk2hexstr(chunk: bytes): 71 | return " ".join(map(lambda x: f"{x:02X}", chunk)) 72 | 73 | def chunk2ascii(chunk: bytes): 74 | return "".join([chr(b) if 0x20 <= b < 0x7F else separator for b in chunk]) 75 | 76 | for i in range(0, len(source), length): 77 | chunk = bytearray(source[i : i + length]) 78 | hexa = chunk2hexstr(chunk) 79 | text = chunk2ascii(chunk) 80 | result.append(f"{base + i:#0{align}x} {hexa:<{3 * length}} {text}") 81 | return os.linesep.join(result) 82 | 83 | 84 | def missing_page_cb(pa): 85 | global session, dmp, hvas, perf 86 | assert dmp and session 87 | 88 | gpa = bochscpu.memory.align_address_to_page(pa) 89 | logging.debug(f"Missing GPA={gpa:#x}") 90 | 91 | if gpa in dmp.pages: 92 | # lazily handle missing page: first try to look into the dump, if found load it to mem 93 | hva = bochscpu.memory.allocate_host_page() 94 | page = dmp.read_physical_page(gpa) 95 | if hva and page: 96 | perf.allocated_pages += 1 97 | bochscpu.memory.page_insert(gpa, hva) 98 | bochscpu.memory.phy_write(gpa, page) 99 | logging.debug(f"{gpa=:#x} -> {hva=:#x}") 100 | hvas.append(hva) 101 | # we've successfully mapped it 102 | return 103 | 104 | # otherwise the page is really missing, bail 105 | session.stop() 106 | raise Exception 107 | 108 | 109 | def exception_cb( 110 | sess: bochscpu.Session, 111 | cpu_id: int, 112 | vector: int, 113 | error_code: int, 114 | ): 115 | exception = bochscpu.cpu.ExceptionType(vector) 116 | match exception: 117 | case bochscpu.cpu.ExceptionType.BreakPoint: 118 | logging.info("[CPU#{cpu_id}] breakpoint hit") 119 | 120 | case bochscpu.cpu.ExceptionType.PageFault: 121 | state = sess.cpu.state 122 | 123 | # see Intel 3A - 4.7 124 | reason = "" 125 | if error_code & (1 << 15): 126 | reason = "SGX related" 127 | else: 128 | reason += ( 129 | "page-level protection violation" 130 | if error_code & 1 131 | else "non-present page" 132 | ) 133 | reason += ", write access" if error_code & (1 << 1) else ", read access" 134 | reason += ( 135 | ", user mode" if error_code & (1 << 2) else ", supervisor mode" 136 | ) 137 | reason += ( 138 | ", instruction fetch" 139 | if error_code & (1 << 4) 140 | else ", not an instruction fetch" 141 | ) 142 | 143 | logging.warning( 144 | f"[CPU#{cpu_id}] pagefault on VA={state.cr2:#016x} at IP={state.rip:#016x}: {reason=}" 145 | ) 146 | 147 | case _: 148 | logging.error( 149 | f"[CPU#{cpu_id}] received exception({exception=}, {error_code=:#x}) " 150 | ) 151 | sess.stop() 152 | 153 | 154 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 155 | global perf 156 | perf.executed_instruction += 1 157 | state = sess.cpu.state 158 | insn = disass(state, state.rip) 159 | logging.debug( 160 | f"[CPU#{cpu_id}] PC={state.rip:#x} {insn.bytes.hex()} - {insn.mnemonic} {insn.op_str}" 161 | ) 162 | 163 | 164 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 165 | global emulation_end_address 166 | 167 | if not emulation_end_address: 168 | return 169 | 170 | if emulation_end_address == sess.cpu.state.rip: 171 | logging.info( 172 | f"[CPU#{cpu_id}] Reaching end address @ {emulation_end_address}, ending emulation" 173 | ) 174 | sess.stop() 175 | 176 | 177 | def resolve_function(symbol: str) -> int: 178 | kernel32 = ctypes.windll.kernel32 179 | kernel32.GetModuleHandleW.argtypes = [ctypes.c_wchar_p] 180 | kernel32.GetModuleHandleW.restype = ctypes.c_void_p 181 | kernel32.GetProcAddress.argtypes = [ctypes.c_void_p, ctypes.c_char_p] 182 | kernel32.GetProcAddress.restype = ctypes.c_void_p 183 | 184 | dll, func = symbol.split("!", 1) 185 | if not dll.lower().endswith(".dll"): 186 | dll += ".dll" 187 | logging.info(f"Looking up {func} in {dll}") 188 | handle = kernel32.GetModuleHandleW(dll) 189 | address: int = kernel32.GetProcAddress(handle, func.encode()) 190 | if not address: 191 | raise RuntimeError(f"Failed to resolve {symbol}") 192 | logging.info(f"Resolved '{symbol:s}' -> {address:#x}") 193 | return address 194 | 195 | 196 | def emulate(dmp_path: pathlib.Path): 197 | global session, dmp 198 | 199 | assert session is None 200 | 201 | logging.info(f"Parsing {dmp_path}") 202 | dmp = kdmp_parser.KernelDumpParser(dmp_path) 203 | assert dmp 204 | 205 | logging.info(f"Successfully parsed {dmp_path}") 206 | logging.debug(f"{dmp=}") 207 | 208 | session = bochscpu.Session() 209 | session.missing_page_handler = missing_page_cb 210 | 211 | logging.debug("Preparing CPU state") 212 | state = bochscpu.State() 213 | bochscpu.cpu.set_long_mode(state) 214 | 215 | logging.debug("Enabling MMX (SSE/AVX) instructions") 216 | cr0 = bochscpu.utils.cpu.CR0(state.cr0) 217 | cr4 = bochscpu.utils.cpu.CR4(state.cr4) 218 | xcr0 = bochscpu.utils.cpu.XCR0(state.xcr0) 219 | # See AMD Vol2 - 11.3 220 | cr0.MP = True 221 | cr0.EM = False 222 | cr4.OSFXSR = True 223 | cr4.OSXSAVE = True 224 | # See AMD Vol2 - 11.5.2 225 | xcr0.x87 = True 226 | xcr0.SSE = True 227 | xcr0.YMM = True 228 | 229 | # TODO use bdump.js::regs.json instead 230 | logging.debug(f"Setting {cr0=:}") 231 | logging.debug(f"Setting {cr4=:}") 232 | logging.debug(f"Setting {xcr0=:}") 233 | state.cr0 = int(cr0) 234 | state.cr4 = int(cr4) 235 | state.xcr0 = int(xcr0) 236 | 237 | cr3 = dmp._KernelDumpParser__dump.GetDirectoryTableBase() # type: ignore # HACK 238 | logging.debug(f"Setting CR3={cr3:#x}") 239 | state.cr3 = cr3 240 | 241 | logging.debug(f"Setting the flag register") 242 | state.rflags = dmp.context.ContextFlags 243 | 244 | logging.debug(f"Setting the other GPRs") 245 | for regname in ( 246 | "rax", 247 | "rbx", 248 | "rcx", 249 | "rdx", 250 | "rsi", 251 | "rdi", 252 | "rip", 253 | "rsp", 254 | "rbp", 255 | "r8", 256 | "r9", 257 | "r10", 258 | "r11", 259 | "r12", 260 | "r13", 261 | "r14", 262 | "r15", 263 | ): 264 | value = int(getattr(dmp.context, regname.capitalize())) 265 | setattr(state, regname, value) 266 | 267 | logging.debug(f"Setting the segment selectors") 268 | _cs = bochscpu.Segment() 269 | _cs.base = 0 270 | _cs.limit = 0xFFFF_FFFF 271 | _cs.selector = dmp.context.SegCs 272 | _cs_attr = bochscpu.cpu.SegmentFlags() 273 | _cs_attr.A = True 274 | _cs_attr.R = True 275 | _cs_attr.E = True 276 | _cs_attr.S = True 277 | _cs_attr.P = True 278 | _cs_attr.L = True 279 | _cs.attr = int(_cs_attr) 280 | _ds = bochscpu.Segment() 281 | _ds.base = 0 282 | _ds.limit = 0xFFFF_FFFF 283 | _ds.selector = dmp.context.SegDs 284 | _ds.attr = 0xCF3 285 | _es = bochscpu.Segment() 286 | _es.base = 0 287 | _es.limit = 0xFFFF_FFFF 288 | _es.selector = dmp.context.SegEs 289 | _es.attr = 0xCF3 290 | _ss = bochscpu.Segment() 291 | _ss.base = 0 292 | _ss.limit = 0xFFFF_FFFF 293 | _ss.selector = dmp.context.SegSs 294 | _ss.attr = 0xCF3 295 | _fs = bochscpu.Segment() 296 | _fs.base = 0 297 | _fs.limit = 0xFFFF_FFFF 298 | _fs.selector = dmp.context.SegFs 299 | _fs.present = True 300 | _fs.attr = 0x4F3 301 | _gs = bochscpu.Segment() 302 | _gs.base = 0 303 | _gs.limit = 0xFFFF_FFFF 304 | _gs.selector = dmp.context.SegGs 305 | _gs.present = True 306 | _gs.attr = 0xCF3 307 | 308 | state.ss = _ss 309 | state.cs = _cs 310 | state.ds = _ds 311 | state.es = _es 312 | state.fs = _fs 313 | state.gs = _gs 314 | 315 | _idtr = bochscpu.GlobalSegment() 316 | _idtr.base = 0xFFFFF8065ED68000 317 | _idtr.limit = 0x0FFF 318 | state.idtr = _idtr 319 | 320 | _gdtr = bochscpu.GlobalSegment() 321 | _gdtr.base = 0xFFFFF8065ED6AFB0 322 | _gdtr.limit = 0x57 323 | state.gdtr = _gdtr 324 | 325 | _tr = bochscpu.Segment() 326 | _tr.base = 0x00000005ED69000 327 | _tr.limit = 0x67 328 | _tr.selector = 0x40 329 | _tr.attr = 0x8B 330 | 331 | # TODO missing kernel_gs_base 332 | # TODO missing gdtr/ldtr/idtr/tr 333 | # TODO missing simd/fpu 334 | 335 | # RIP should be at an int3 instruction, so patch it so we can resume execution 336 | state.rip += 1 337 | 338 | logging.debug(f"Apply the created state to the session CPU") 339 | session.cpu.state = state 340 | 341 | logging.debug("Preparing hooks") 342 | hook = bochscpu.Hook() 343 | hook.exception = exception_cb 344 | hook.before_execution = before_execution_cb 345 | hook.after_execution = after_execution_cb 346 | 347 | logging.debug("Initial register state") 348 | bochscpu.utils.dump_registers(session.cpu.state, True) 349 | 350 | logging.debug("Let's go baby!") 351 | 352 | perf.start_time_ns = int(time.time_ns()) 353 | session.run( 354 | [ 355 | hook, 356 | ] 357 | ) 358 | 359 | session.stop() 360 | perf.end_time_ns = int(time.time_ns()) 361 | 362 | logging.debug("Final register state") 363 | bochscpu.utils.dump_registers(session.cpu.state, True) 364 | 365 | logging.debug(f"{perf=}") 366 | logging.info(f"{perf.average=} insn/s") 367 | 368 | 369 | def clean(): 370 | logging.debug("Cleanup") 371 | for hva in hvas: 372 | bochscpu.memory.release_host_page(hva) 373 | 374 | 375 | if __name__ == "__main__": 376 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) 377 | arg = pathlib.Path(sys.argv[1]).resolve() 378 | assert arg.exists() 379 | emulate(arg) 380 | atexit.register(clean) 381 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Builds 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | tags: 9 | - 'v*' 10 | 11 | env: 12 | NB_CPU: 1 13 | VERSION: '0.4.0' 14 | BOCHS_REV: 3cf98b28541d337270c34b1068113fef294551a2 15 | 16 | jobs: 17 | bochscpu: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | variant: 22 | - {os: windows-2019, arch: x64} 23 | - {os: windows-2022, arch: x64} 24 | - {os: ubuntu-22.04, arch: x64} 25 | - {os: ubuntu-24.04, arch: x64} 26 | - {os: ubuntu-24.04-arm, arch: arm64} 27 | - {os: macos-13, arch: x64} 28 | - {os: macos-14, arch: arm64} 29 | - {os: macos-15, arch: arm64} 30 | runs-on: ${{ matrix.variant.os }} 31 | name: bochscpu / ${{ matrix.variant.os }} / ${{ matrix.variant.arch }} 32 | steps : 33 | - name: Checkout 34 | uses: actions/checkout@v4 35 | 36 | - name: Cache Artifacts 37 | id: cache-artifacts 38 | uses: actions/cache@v4 39 | with: 40 | path: artifact 41 | key: bochscpu-libs-${{ matrix.variant.os }}-${{ matrix.variant.arch }}-${{ env.BOCHS_REV }} 42 | 43 | - if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'windows-') 44 | run: echo NB_CPU=$env:NUMBER_OF_PROCESSORS | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append 45 | 46 | - if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'ubuntu-') 47 | run: echo "NB_CPU=$(grep -c ^processor /proc/cpuinfo)" >> $GITHUB_ENV 48 | 49 | - if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'macos-') 50 | run: echo NB_CPU=$(sysctl -n hw.ncpu) >> $GITHUB_ENV 51 | 52 | - uses: microsoft/setup-msbuild@v2 53 | if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'windows-') 54 | 55 | - uses: ilammy/msvc-dev-cmd@v1 56 | if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'windows-') 57 | 58 | - name: Setup 59 | run: | 60 | rustup update 61 | 62 | - name: Build BochsCPU (Windows) 63 | if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'windows-') 64 | run: | 65 | .\.github\build-bochscpu.ps1 66 | mkdir artifact; mkdir artifact/release; mkdir artifact/debug; mkdir artifact/relwithdebinfo 67 | cp -Verbose bxbuild/bochscpu-ffi/target/release/*.lib artifact/release/ 68 | cp -Verbose bxbuild/bochscpu-ffi/target/debug/*.lib artifact/debug/ 69 | cp -Verbose bxbuild/bochscpu-ffi/target/debug/*.lib artifact/relwithdebinfo/ 70 | 71 | - name: Build BochsCPU (Linux & MacOS) 72 | if: steps.cache-artifacts.outputs.cache-hit != 'true' && startsWith(matrix.variant.os, 'windows-') == false 73 | run: | 74 | bash .github/build-bochscpu.sh 75 | mkdir artifact artifact/Release artifact/Debug artifact/RelWithDebInfo 76 | cp -v bxbuild/bochscpu-ffi/target/release/lib*.a artifact/Release/ 77 | cp -v bxbuild/bochscpu-ffi/target/debug/lib*.a artifact/Debug/ 78 | cp -v bxbuild/bochscpu-ffi/target/debug/lib*.a artifact/RelWithDebInfo/ 79 | 80 | - name: Upload artifacts 81 | uses: actions/upload-artifact@v4 82 | with: 83 | if-no-files-found: error 84 | name: bochscpu-libs-${{ matrix.variant.os }}-${{ matrix.variant.arch }}-${{ env.BOCHS_REV }} 85 | path: artifact 86 | 87 | bindings: 88 | needs: bochscpu 89 | strategy: 90 | fail-fast: false 91 | matrix: 92 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 93 | variant: 94 | - {os: windows-2019, arch: x64, config: RelWithDebInfo, py-arch: x64} 95 | - {os: windows-2022, arch: x64, config: RelWithDebInfo, py-arch: x64} 96 | - {os: ubuntu-22.04, arch: x64, config: RelWithDebInfo, py-arch: x64} 97 | - {os: ubuntu-24.04, arch: x64, config: RelWithDebInfo, py-arch: x64} 98 | - {os: ubuntu-24.04-arm, arch: arm64, config: RelWithDebInfo, py-arch: arm64} 99 | - {os: macos-13, arch: x64, config: Release, py-arch: x64} 100 | - {os: macos-14, arch: arm64, config: Release, py-arch: arm64} 101 | - {os: macos-15, arch: arm64, config: Release, py-arch: arm64} 102 | runs-on: ${{ matrix.variant.os }} 103 | name: bindings / ${{ matrix.variant.os }} / ${{ matrix.python-version }} / ${{ matrix.variant.config }} 104 | env: 105 | CMAKE_FLAGS: "" 106 | steps: 107 | - name: Checkout 108 | uses: actions/checkout@v4 109 | 110 | - name: Download BochsCPU libs 111 | uses: actions/download-artifact@v4 112 | id: download_artifact 113 | with: 114 | name: bochscpu-libs-${{ matrix.variant.os }}-${{ matrix.variant.arch }}-${{ env.BOCHS_REV }} 115 | path: bochscpu-artifact 116 | 117 | - name: Setup BochsCPU libs 118 | run: | 119 | mv ${{steps.download_artifact.outputs.download-path}}/* bochscpu/lib/ 120 | 121 | - name: Setup Python 122 | uses: actions/setup-python@v5 123 | with: 124 | python-version: ${{ matrix.python-version }} 125 | architecture: ${{ matrix.variant.py-arch }} 126 | 127 | - name: Install Python pre-requisites 128 | run: | 129 | python -m pip install --user --upgrade nanobind 130 | 131 | - name: Environment Setup (Windows) 132 | if: startsWith(matrix.variant.os, 'windows-') 133 | run: | 134 | echo NB_CPU=$env:NUMBER_OF_PROCESSORS | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append 135 | echo CMAKE_ARCH='-A ${{ matrix.variant.arch }}' | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append 136 | Import-Module .\.github\Invoke-VisualStudio.ps1 137 | Invoke-VisualStudio2022${{ matrix.variant.arch }} 138 | 139 | - name: Environment Setup (Linux) 140 | if: startsWith(matrix.variant.os, 'ubuntu-') 141 | run: | 142 | echo "NB_CPU=$(grep -c ^processor /proc/cpuinfo)" >> $GITHUB_ENV 143 | sudo apt-get -y update 144 | sudo apt install -y g++ ninja-build 145 | echo CC=gcc >> $GITHUB_ENV 146 | echo CXX=g++ >> $GITHUB_ENV 147 | 148 | - name: Environment Setup (MacOS) 149 | if: startsWith(matrix.variant.os, 'macos-') 150 | run: | 151 | echo NB_CPU=$(sysctl -n hw.ncpu) >> $GITHUB_ENV 152 | echo CC=gcc >> $GITHUB_ENV 153 | echo CXX=g++ >> $GITHUB_ENV 154 | 155 | - name: Build PYD and WHL 156 | run: | 157 | mkdir build 158 | mkdir artifact 159 | mkdir wheel 160 | cmake -S . -B ./build ${{ env.CMAKE_FLAGS }} 161 | cmake --build ./build --verbose --config ${{ matrix.variant.config }} --parallel ${{ env.NB_CPU }} 162 | cmake --install ./build --verbose --config ${{ matrix.variant.config }} --prefix ./artifact 163 | 164 | 165 | - name: Install 166 | run: | 167 | python -m pip install .[tests] --user --upgrade 168 | 169 | - name: Build wheelhouse (x64) 170 | if: matrix.python-version == '3.13' && matrix.variant.arch == 'x64' && matrix.variant.os == 'ubuntu-24.04' 171 | run: | 172 | python -m pip install cibuildwheel==2.22.0 173 | python -m cibuildwheel --output-dir wheelhouse --archs x86_64 174 | 175 | - name: Build wheelhouse (arm64) 176 | if: matrix.python-version == '3.13' && matrix.variant.arch == 'arm64' && matrix.variant.os == 'ubuntu-24.04-arm' 177 | run: | 178 | python -m pip install cibuildwheel==2.22.0 179 | python -m cibuildwheel --output-dir wheelhouse --archs aarch64 180 | 181 | - name: Build wheel (other) 182 | if: matrix.variant.os != 'ubuntu-24.04' || matrix.variant.os != 'ubuntu-24.04-arm' 183 | run: | 184 | python -m pip wheel . -w ./wheel 185 | 186 | - name: Upload artifacts 187 | uses: actions/upload-artifact@v4 188 | with: 189 | name: bochscpu-${{ env.VERSION }}-py${{ matrix.python-version }}-${{ matrix.variant.os }}.${{ matrix.variant.config }}-${{ matrix.variant.arch }} 190 | path: | 191 | artifact/ 192 | wheel/ 193 | wheelhouse/ 194 | 195 | tests: 196 | needs: bindings 197 | strategy: 198 | fail-fast: false 199 | matrix: 200 | python-version: ['3.13'] 201 | variant: 202 | - {os: windows-2022, arch: x64, config: RelWithDebInfo, py-arch: x64} 203 | - {os: ubuntu-24.04, arch: x64, config: RelWithDebInfo, py-arch: x64} 204 | - {os: ubuntu-24.04-arm, arch: arm64, config: RelWithDebInfo, py-arch: arm64} 205 | - {os: macos-15, arch: arm64, config: Release, py-arch: arm64} 206 | runs-on: ${{ matrix.variant.os }} 207 | steps: 208 | - uses: actions/download-artifact@v4 209 | with: 210 | name: bochscpu-${{ env.VERSION }}-py${{ matrix.python-version }}-${{ matrix.variant.os }}.${{ matrix.variant.config }}-${{ matrix.variant.arch }} 211 | path: . 212 | 213 | - name: Setup Python 214 | uses: actions/setup-python@v5 215 | with: 216 | python-version: ${{ matrix.python-version }} 217 | architecture: ${{ matrix.variant.py-arch }} 218 | 219 | - name: Install 220 | shell: bash 221 | run: | 222 | python -m pip install wheel/*.whl --user --upgrade 223 | python -m pip install capstone keystone-engine --user --upgrade 224 | 225 | - name: Checkout 226 | uses: actions/checkout@v4 227 | 228 | - name: Tests 229 | run: | 230 | python -c "import bochscpu" 231 | python examples/long_mode_fibonacci.py 232 | python examples/real_mode_print_hello_world.py --debug 233 | 234 | publish: 235 | needs: tests 236 | strategy: 237 | fail-fast: false 238 | matrix: 239 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 240 | variant: 241 | - {os: windows-2019, arch: x64, config: RelWithDebInfo, py-arch: x64} 242 | - {os: windows-2022, arch: x64, config: RelWithDebInfo, py-arch: x64} 243 | - {os: ubuntu-22.04, arch: x64, config: RelWithDebInfo, py-arch: x64} 244 | - {os: ubuntu-24.04, arch: x64, config: RelWithDebInfo, py-arch: x64} 245 | - {os: ubuntu-24.04-arm, arch: arm64, config: RelWithDebInfo, py-arch: arm64} 246 | - {os: macos-13, arch: x64, config: Release, py-arch: x64} 247 | - {os: macos-14, arch: arm64, config: Release, py-arch: arm64} 248 | - {os: macos-15, arch: arm64, config: Release, py-arch: arm64} 249 | runs-on: ubuntu-24.04 250 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 251 | name: PyPI upload of ${{ matrix.variant.os }}/${{ matrix.variant.arch }}/${{ matrix.variant.config }}/py${{ matrix.python-version }} 252 | environment: 253 | name: pypi 254 | url: https://pypi.org/p/bochscpu-python 255 | permissions: 256 | id-token: write 257 | steps: 258 | - name: Download artifact for ${{ matrix.variant.os }}/${{ matrix.variant.arch }}/${{ matrix.python-version }}/${{ matrix.variant.config }} 259 | uses: actions/download-artifact@v4 260 | with: 261 | name: bochscpu-${{ env.VERSION }}-py${{ matrix.python-version }}-${{ matrix.variant.os }}.${{ matrix.variant.config }}-${{ matrix.variant.arch }} 262 | path: wheel 263 | - name: Cleanup 264 | run: | 265 | rm wheel/wheel/nanobind-* 266 | rm wheel/wheel/setuptools-* 267 | rm wheel/wheel/wheel-* 268 | - name: Publish package distributions to PyPI 269 | uses: pypa/gh-action-pypi-publish@release/v1 270 | with: 271 | packages-dir: wheel/wheel/ 272 | print-hash: true 273 | -------------------------------------------------------------------------------- /python/bochscpu/utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import bochscpu 4 | import bochscpu.cpu 5 | import bochscpu.memory 6 | import bochscpu.utils 7 | 8 | 9 | def missing_page_cb(gpa: int): 10 | """Edit this function to change the page fault behavior 11 | Args: 12 | gpa (int): the physical address of where the page fault occured 13 | """ 14 | raise RuntimeError(f"missing_page_cb({gpa=:#x})") 15 | 16 | 17 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 18 | """Default callback for `before_execution_cb` 19 | 20 | The callback is called each time, when Bochs simulator starts a new 21 | instruction execution. In case of repeat instruction the callback will 22 | be called only once before the first iteration will be started. 23 | """ 24 | logging.debug(f"[CPU#{cpu_id}] before PC={sess.cpu.rip:#x}") 25 | 26 | 27 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 28 | """Default callback for `after_execution_cb` 29 | 30 | The callback is called each time, when Bochs simulator finishes any 31 | instruction execution. In case of repeat instruction the callback will 32 | be called only once after all repeat iterations. 33 | """ 34 | logging.debug(f"[CPU#{cpu_id}] after PC={sess.cpu.rip:#x}") 35 | 36 | 37 | def exception_cb( 38 | sess: bochscpu.Session, 39 | cpu_id: int, 40 | vector: int, 41 | error_code: int, 42 | ): 43 | """Default callback for `exception` 44 | 45 | The callback is called each time, when Bochs simulator executes an exception. 46 | """ 47 | whatwhat = bochscpu.cpu.ExceptionType(vector) 48 | match (vector, error_code): 49 | case _: 50 | logging.warning( 51 | f"cpu#{cpu_id} received exception({whatwhat}, {error_code=:d}) " 52 | ) 53 | sess.stop() 54 | 55 | 56 | def cache_cntrl_cb(sess: bochscpu.Session, cpu_id: int, what: int): 57 | """Default callback for `cache_cntrl` 58 | 59 | The callback is called each time, when Bochs simulator executes a cache/tlb 60 | control instruction. 61 | """ 62 | whatwhat = bochscpu.CacheControlType(what) 63 | logging.debug(f"cache_cntrl_cb({sess=}, {cpu_id=}, {whatwhat})") 64 | 65 | 66 | def clflush_cb(sess: bochscpu.Session, cpu_id: int, lin_addr: int, phy_addr: int): 67 | """Default callback for `clflush` 68 | 69 | The callback is called each time the CLFLUSH instruction is executed. 70 | """ 71 | logging.debug(f"clflush_cb({sess=}, {cpu_id=}, {lin_addr=:#x}, {phy_addr=:#x})") 72 | 73 | 74 | def cnear_branch_not_taken_cb( 75 | sess: bochscpu.Session, cpu_id: int, branch_ip: int, new_ip: int 76 | ): 77 | """Default callback for `cnear_branch_not_taken` 78 | 79 | The callback is called each time, when currently executed instruction is a 80 | conditional near branch and it is not taken. 81 | """ 82 | logging.debug( 83 | f"cnear_branch_not_taken_cb({sess=}, {cpu_id=}, {branch_ip=:#x}, {new_ip=:#x})" 84 | ) 85 | 86 | 87 | def cnear_branch_taken_cb( 88 | sess: bochscpu.Session, cpu_id: int, branch_ip: int, new_ip: int 89 | ): 90 | """Default callback for `cnear_branch_taken` 91 | 92 | The callback is called each time, when currently executed instruction is a 93 | conditional near branch and it is taken. 94 | """ 95 | logging.debug( 96 | f"cnear_branch_taken_cb({sess=}, {cpu_id=}, {branch_ip=:#x}, {new_ip=:#x})" 97 | ) 98 | 99 | 100 | def far_branch_cb( 101 | sess: bochscpu.Session, 102 | cpu_id: int, 103 | what: int, 104 | prev_cs: int, 105 | prev_ip: int, 106 | new_cs: int, 107 | new_ip: int, 108 | ): 109 | """Default callback for `far_branch` 110 | 111 | The callback is called each time, when currently executed instruction is an 112 | unconditional far branch (always taken). 113 | """ 114 | whatwhat = bochscpu.InstructionType(what) 115 | logging.debug( 116 | f"far_branch_cb({sess=}, {cpu_id=}, {whatwhat}, {prev_cs=:#x}, {prev_ip=:#x}, {new_cs=:#x}, {new_ip=:#x})" 117 | ) 118 | 119 | 120 | def hlt_cb(sess: bochscpu.Session, cpu_id: int): 121 | """Default callback for `hlt` 122 | 123 | The callback is called each time, when Bochs' emulated CPU enters HALT or 124 | SHUTDOWN state. 125 | """ 126 | logging.debug(f"hlt_cb({sess=}, {cpu_id=})") 127 | 128 | 129 | def hw_interrupt_cb(sess: bochscpu.Session, cpu_id: int, vector: int, cs: int, ip: int): 130 | """Default callback for `hw_interrupt` 131 | 132 | The callback is called each time, when Bochs simulator executes a hardware 133 | interrupt. 134 | """ 135 | logging.debug(f"hw_interrupt_cb({sess=}, {cpu_id=}, {vector=}, {cs=}, {ip=:#x})") 136 | 137 | 138 | def inp_cb(sess: bochscpu.Session, cpu_id: int, len: int): 139 | """Default callback for `inp` 140 | 141 | These callback functions are a feedback from various system devices. 142 | """ 143 | logging.debug(f"inp_cb({sess=}, {cpu_id=}, {len=})") 144 | 145 | 146 | def inp2_cb(sess: bochscpu.Session, cpu_id: int, len: int, val: int): 147 | """Default callback for `inp2` 148 | 149 | These callback functions are a feedback from various system devices. 150 | """ 151 | logging.debug(f"inp2_cb({sess=}, {cpu_id=}, {len=}, {val=})") 152 | 153 | 154 | def interrupt_cb(sess: bochscpu.Session, cpu_id: int, int_num: int): 155 | """Default callback for `interrupt` 156 | 157 | The callback is called each time, when Bochs simulator executes an interrupt 158 | (software interrupt, hardware interrupt or an exception). 159 | """ 160 | logging.debug(f"interrupt_cb({sess=}, {cpu_id=}, {int_num=})") 161 | 162 | 163 | def lin_access_cb( 164 | sess: bochscpu.Session, 165 | cpu_id: int, 166 | lin: int, 167 | phy: int, 168 | len: int, 169 | memtype: int, 170 | rw: int, 171 | ): 172 | """Default callback for `lin_access` 173 | 174 | The callback is called each time, when Bochs simulator executes a linear 175 | memory access. Note that no page split accesses will be generated because 176 | Bochs splits page split accesses to two different memory accesses during its 177 | execution flow. The callback also will not be generated in case of direct 178 | physical memory access like page walks, SMM, VMM or SVM operations. 179 | """ 180 | whatwhat = bochscpu.memory.AccessType(memtype) 181 | logging.debug( 182 | f"lin_access_cb( {sess=}, {cpu_id=}, {lin=:#x}, {phy=:#x}, {len=}, {whatwhat=}, {rw=})" 183 | ) 184 | 185 | 186 | def mwait_cb(sess: bochscpu.Session, cpu_id: int, addr: int, len: int, flags: int): 187 | """Default callback for `mwait` 188 | 189 | The callback is called each time, when Bochs' emulated CPU enters to the MWAIT 190 | state. The callback receives monitored memory range and MWAIT flags as a 191 | parameters. 192 | """ 193 | logging.debug(f"mwait_cb({sess=}, {cpu_id=}, {addr=:#x}, {len=}, {flags=})") 194 | 195 | 196 | def opcode_cb( 197 | sess: bochscpu.Session, 198 | cpu_id: int, 199 | insn: int, 200 | opcode: int, 201 | len: int, 202 | is32: bool, 203 | is64: bool, 204 | ): 205 | """Default callback for `opcode` 206 | 207 | The callback is called each time, when Bochs completes to decode a new 208 | instruction. Through this callback function Bochs could provide an opcode of 209 | the instruction, opcode length and an execution mode (16/32/64). 210 | """ 211 | logging.debug( 212 | f"opcode_cb({sess=}, {cpu_id=}, {insn=:#x}, {opcode=:#x}, {len=}, {is32=}, {is64=})" 213 | ) 214 | 215 | 216 | def outp_cb(sess: bochscpu.Session, cpu_id: int, len: int, val: int): 217 | """Default callback for `outp` 218 | 219 | These callback functions are a feedback from various system devices. 220 | """ 221 | logging.debug(f"outp_cb({sess=}, {cpu_id=}, {len=}, {val=})") 222 | 223 | 224 | def phy_access_cb( 225 | sess: bochscpu.Session, cpu_id: int, lin: int, phy: int, len: int, memtype: int 226 | ): 227 | """Default callback for `phy_access` 228 | 229 | The callback is called each time, when Bochs simulator executes a physical 230 | memory access. Physical accesses include memory accesses generated by the 231 | CPU during page walks, SMM, VMM or SVM operations. Note that no page split 232 | accesses will be generated because Bochs splits page split accesses to two 233 | different memory accesses during its execution flow. 234 | """ 235 | whatwhat = bochscpu.memory.AccessType(memtype) 236 | logging.debug( 237 | f"phy_access_cb({sess=}, {cpu_id=}, {lin=:#x}, {phy=:#x}, {len=}, {whatwhat})" 238 | ) 239 | 240 | 241 | def prefetch_hint_cb( 242 | sess: bochscpu.Session, cpu_id: int, what: int, seg: int, offset: int 243 | ): 244 | """Default callback for `prefetch_hint` 245 | 246 | The callback is called each time, when Bochs simulator executes a PREFETCH 247 | instruction. 248 | """ 249 | whatwhat = bochscpu.PrefetchType(what) 250 | logging.debug( 251 | f"prefetch_hint_cb({sess=}, {cpu_id=}, {whatwhat}, {seg=:#x}, {offset=})" 252 | ) 253 | 254 | 255 | def repeat_iteration_cb(sess: bochscpu.Session, cpu_id: int, insn: int): 256 | """Default callback for `repeat_iteration` 257 | 258 | The callback is called each time, when Bochs simulator starts a new repeat 259 | iteration. 260 | """ 261 | logging.debug(f"repeat_iteration_cb({sess=}, {cpu_id=}, {insn=:#x})") 262 | 263 | 264 | def reset_cb(sess: bochscpu.Session, cpu_id: int, a2: int): 265 | """Default callback for `reset` 266 | 267 | The callback is called each time, when Bochs resets the CPU object. It would 268 | be executed once at the start of simulation and each time that user presses 269 | RESET BUTTON on the simulator's control panel. 270 | """ 271 | logging.debug(f"reset_cb({sess=}, {cpu_id=}, {a2=})") 272 | 273 | 274 | def tlb_cntrl_cb(sess: bochscpu.Session, cpu_id: int, what: int, new_cr_value: int): 275 | """Default callback for `tlb_cntrl` 276 | 277 | The callback is called each time, when Bochs simulator executes a tlb 278 | control instruction. 279 | """ 280 | whatwhat = bochscpu.TlbControlType(what) 281 | logging.debug(f"tlb_cntrl_cb({sess=}, {cpu_id=}, {whatwhat}, {new_cr_value=:#x})") 282 | 283 | 284 | def ucnear_branch_cb( 285 | sess: bochscpu.Session, cpu_id: int, what: int, branch_ip: int, new_branch_ip: int 286 | ): 287 | """Default callback for `ucnear_branch` 288 | 289 | The callback is called each time, when currently executed instruction is an 290 | unconditional near branch (always taken). 291 | """ 292 | whatwhat = bochscpu.InstructionType(what) 293 | logging.debug( 294 | f"ucnear_branch_cb({sess=}, {cpu_id=}, {whatwhat}, {branch_ip=:#x}, {new_branch_ip=:#x})" 295 | ) 296 | 297 | 298 | def vmexit_cb(sess: bochscpu.Session, cpu_id: int, reason: int, qualification: int): 299 | """Default callback for `vmexit` 300 | 301 | This callback is called right before Bochs executes a VMEXIT. 302 | """ 303 | logging.debug(f"vmexit_cb({sess=}, {cpu_id=}, {reason=}, {qualification=})") 304 | 305 | 306 | def wrmsr_cb(sess: bochscpu.Session, cpu_id: int, msr: int, value: int): 307 | """Default callback for `wrmsr` 308 | 309 | This callback is called each time when WRMSR instruction is executed. 310 | MSR number and written value passed as parameters to the callback function. 311 | """ 312 | logging.debug(f"wrmsr_cb({sess=}, {cpu_id=}, {msr=:#x}, {value=})") 313 | 314 | 315 | def install_default_callbacks(hook: bochscpu.Hook): 316 | """Install all default callbacks to the given hook. 317 | 318 | Args: 319 | hook (bochscpu.Hook): the hook to populate. Set callbacks will be replaced. 320 | """ 321 | hook.after_execution = after_execution_cb 322 | hook.before_execution = before_execution_cb 323 | hook.cache_cntrl = cache_cntrl_cb 324 | hook.clflush = clflush_cb 325 | hook.cnear_branch_not_taken = cnear_branch_not_taken_cb 326 | hook.cnear_branch_taken = cnear_branch_taken_cb 327 | hook.exception = exception_cb 328 | hook.far_branch = far_branch_cb 329 | hook.hlt = hlt_cb 330 | hook.hw_interrupt = hw_interrupt_cb 331 | hook.inp = inp_cb 332 | hook.inp2 = inp2_cb 333 | hook.interrupt = interrupt_cb 334 | hook.lin_access = lin_access_cb 335 | hook.mwait = mwait_cb 336 | hook.opcode = opcode_cb 337 | hook.outp = outp_cb 338 | hook.phy_access = phy_access_cb 339 | hook.prefetch_hint = prefetch_hint_cb 340 | hook.repeat_iteration = repeat_iteration_cb 341 | hook.reset = reset_cb 342 | hook.tlb_cntrl = tlb_cntrl_cb 343 | hook.ucnear_branch = ucnear_branch_cb 344 | hook.vmexit = vmexit_cb 345 | hook.wrmsr = wrmsr_cb 346 | -------------------------------------------------------------------------------- /examples/long_mode_emulate_windows_udump.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import logging 3 | import os 4 | import pathlib 5 | import sys 6 | import enum 7 | 8 | import capstone 9 | import udmp_parser 10 | 11 | import bochscpu 12 | import bochscpu.cpu 13 | import bochscpu.memory 14 | import bochscpu.utils 15 | 16 | 17 | kernel32 = ctypes.windll.kernel32 18 | kernel32.GetModuleHandleW.argtypes = [ctypes.c_wchar_p] 19 | kernel32.GetModuleHandleW.restype = ctypes.c_void_p 20 | kernel32.GetProcAddress.argtypes = [ctypes.c_void_p, ctypes.c_char_p] 21 | kernel32.GetProcAddress.restype = ctypes.c_void_p 22 | 23 | PAGE_SIZE = bochscpu.utils.PAGE_SIZE 24 | PA_START_ADDRESS = 0x100_0000 25 | PML4_ADDRESS = 0x10_0000 26 | MEM_FREE = 0x00010000 27 | PAGE_NOACCESS = 0x01 28 | 29 | 30 | class Permission(enum.IntEnum): 31 | CODE = 0 32 | RW = 1 33 | 34 | 35 | cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) 36 | 37 | emulation_end_address = 0 38 | 39 | 40 | def hexdump( 41 | source: bytes, length: int = 0x10, separator: str = ".", base: int = 0x00 42 | ) -> str: 43 | result = [] 44 | align = 0x8 * 2 + 2 45 | 46 | def chunk2hexstr(chunk: bytes): 47 | return " ".join(map(lambda x: f"{x:02X}", chunk)) 48 | 49 | def chunk2ascii(chunk: bytes): 50 | return "".join([chr(b) if 0x20 <= b < 0x7F else separator for b in chunk]) 51 | 52 | for i in range(0, len(source), length): 53 | chunk = bytearray(source[i : i + length]) 54 | hexa = chunk2hexstr(chunk) 55 | text = chunk2ascii(chunk) 56 | result.append(f"{base + i:#0{align}x} {hexa:<{3 * length}} {text}") 57 | return os.linesep.join(result) 58 | 59 | 60 | def missing_page_cb(gpa): 61 | raise Exception(f"missing_page_cb({gpa=:#x})") 62 | 63 | 64 | def phy_access_cb( 65 | sess: bochscpu.Session, cpu_id: int, lin: int, phy: int, len: int, rw: int 66 | ): 67 | logging.debug(f"{lin=:#x} -> {phy=:#x}, {len=:#x}, {bool(rw)=}") 68 | 69 | 70 | def exception_cb( 71 | sess: bochscpu.Session, 72 | cpu_id: int, 73 | vector: int, 74 | error_code: int, 75 | ): 76 | excpt = bochscpu.cpu.ExceptionType(vector) 77 | match excpt: 78 | case bochscpu.cpu.ExceptionType.BreakPoint: 79 | logging.info("breakpoint hit") 80 | 81 | case bochscpu.cpu.ExceptionType.PageFault: 82 | logging.warning( 83 | f"pagefault on VA={sess.cpu.cr2:#016x} at IP={sess.cpu.rip:#016x}" 84 | ) 85 | 86 | case _: 87 | logging.error( 88 | f"cpu#{cpu_id} received exception({excpt=}, {error_code=:d}) " 89 | ) 90 | sess.stop() 91 | 92 | 93 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 94 | state = sess.cpu.state 95 | raw = bytes(bochscpu.memory.virt_read(PML4_ADDRESS, state.rip, 16)) 96 | insn = next(cs.disasm(raw, state.rip)) 97 | logging.debug( 98 | f"[CPU#{cpu_id}] PC={state.rip:#x} {insn.bytes.hex()} - {insn.mnemonic} {insn.op_str}" 99 | ) 100 | 101 | 102 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 103 | global emulation_end_address 104 | if not emulation_end_address: 105 | return 106 | 107 | if emulation_end_address == sess.cpu.state.rip: 108 | logging.info( 109 | f"Reaching end address @ {emulation_end_address}, ending emulation" 110 | ) 111 | sess.stop() 112 | 113 | 114 | def convert_region_protection(protect: int) -> int: 115 | match protect: 116 | case 0x02: # PAGE_READONLY 117 | return Permission.RW 118 | case 0x04: # PAGE_READWRITE 119 | return Permission.RW 120 | case 0x08: # PAGE_WRITECOPY 121 | return Permission.RW 122 | case 0x10: # PAGE_EXECUTE 123 | return Permission.CODE 124 | case 0x20: # PAGE_EXECUTE_READ 125 | return Permission.CODE 126 | case 0x40: # PAGE_EXECUTE_READWRITE 127 | return Permission.CODE 128 | case 0x80: # PAGE_EXECUTE_WRITECOPY 129 | return Permission.RW 130 | 131 | # logging.warning(f"Unknown {protect=:#x})") 132 | return -1 133 | 134 | 135 | def switch_to_thread(state: bochscpu.State, thread: udmp_parser.Thread): 136 | assert isinstance(thread.Context, udmp_parser.Context64) 137 | 138 | # 139 | # AMD Vol2 - A.1 System Software MSRs 140 | # 141 | FSBase = 0xC000_0100 142 | GSBase = 0xC000_0101 143 | KernelGSBase = 0xC000_0102 144 | 145 | logging.debug(f"Switching context to {thread}") 146 | _cs = bochscpu.Segment() 147 | _cs.base = 0 148 | _cs.limit = 0xFFFF_FFFF 149 | _cs.selector = thread.Context.SegCs 150 | _cs_attr = bochscpu.cpu.SegmentFlags() 151 | _cs_attr.A = True 152 | _cs_attr.R = True 153 | _cs_attr.E = True 154 | _cs_attr.S = True 155 | _cs_attr.P = True 156 | _cs_attr.L = True 157 | _cs.attr = int(_cs_attr) 158 | 159 | _ds = bochscpu.Segment() 160 | _ds.base = 0 161 | _ds.limit = 0xFFFF_FFFF 162 | _ds.selector = thread.Context.SegDs 163 | _ds.attr = 0xCF3 164 | 165 | _es = bochscpu.Segment() 166 | _es.base = 0 167 | _es.limit = 0xFFFF_FFFF 168 | _es.selector = thread.Context.SegEs 169 | _es.attr = 0xCF3 170 | _ss = bochscpu.Segment() 171 | _ss.base = 0 172 | _ss.limit = 0xFFFF_FFFF 173 | _ss.selector = thread.Context.SegSs 174 | _ss.attr = 0xCF3 175 | # AMD Vol2 - 4.5.3 176 | # > In 64-bit mode, FS-segment and GS-segment overrides are not checked for limit or attributes. Instead, 177 | # > the processor checks that all virtual-address references are in canonical form 178 | _fs = bochscpu.Segment() 179 | _fs.base = 0 180 | _fs.limit = 0xFFFF_FFFF 181 | _fs.selector = thread.Context.SegFs 182 | _fs.present = True 183 | _fs.attr = 0xCF3 184 | _gs = bochscpu.Segment() 185 | _gs.base = thread.Teb 186 | _gs.limit = 0x0000_0FFF 187 | _gs.selector = thread.Context.SegGs 188 | _gs.present = True 189 | _gs.attr = 0x4F3 190 | 191 | state.ss = _ss 192 | state.cs = _cs 193 | state.ds = _ds 194 | state.es = _es 195 | state.fs = _fs 196 | state.gs = _gs 197 | 198 | state.rip = thread.Context.Rip 199 | state.rsp = thread.Context.Rsp 200 | return 201 | 202 | 203 | def call_function( 204 | sess: bochscpu.Session, 205 | start_address: int, 206 | end_address: int, 207 | args: list[int], 208 | ) -> None: 209 | global emulation_end_address 210 | 211 | state = sess.cpu.state 212 | state.rip = start_address 213 | 214 | if len(args) >= 1: 215 | state.rcx = args[0] 216 | if len(args) >= 2: 217 | state.rdx = args[1] 218 | if len(args) >= 3: 219 | state.r8 = args[2] 220 | if len(args) >= 4: 221 | state.r9 = args[3] 222 | 223 | logging.debug("Preparing hooks") 224 | hook = bochscpu.Hook() 225 | hook.exception = exception_cb 226 | hook.before_execution = before_execution_cb 227 | hook.after_execution = after_execution_cb 228 | 229 | logging.debug("Preparing emulation environment") 230 | sess.cpu.state = state 231 | 232 | if logging.getLogger().level == logging.DEBUG: 233 | logging.debug("Dumping initial register state") 234 | bochscpu.utils.dump_registers(sess.cpu.state) 235 | 236 | emulation_end_address = end_address 237 | 238 | logging.debug("Start emulation") 239 | sess.run( 240 | [ 241 | hook, 242 | ] 243 | ) 244 | 245 | if logging.getLogger().level == logging.DEBUG: 246 | logging.debug("Dumping final register state") 247 | bochscpu.utils.dump_registers(sess.cpu.state) 248 | return 249 | 250 | 251 | def resolve_function(symbol: str) -> int: 252 | dll, func = symbol.split("!", 1) 253 | if not dll.lower().endswith(".dll"): 254 | dll += ".dll" 255 | logging.info(f"Looking up {func} in {dll}") 256 | handle = kernel32.GetModuleHandleW(dll) 257 | address: int = kernel32.GetProcAddress(handle, func.encode()) 258 | if not address: 259 | raise RuntimeError(f"Failed to resolve {symbol}") 260 | logging.info(f"Resolved '{symbol:s}' -> {address:#x}") 261 | return address 262 | 263 | 264 | def emulate(dmp_path: pathlib.Path): 265 | logging.info(f"Parsing {dmp_path}") 266 | dmp = udmp_parser.UserDumpParser() 267 | assert dmp.Parse(dmp_path) 268 | logging.info(f"Successfully parsed {dmp_path}") 269 | logging.debug(f"{dmp=}") 270 | 271 | sess = bochscpu.Session() 272 | sess.missing_page_handler = missing_page_cb 273 | 274 | logging.debug("Preparing page table") 275 | pt = bochscpu.memory.PageMapLevel4Table() 276 | pa = PA_START_ADDRESS 277 | pgnb = 0 278 | 279 | for _, region in dmp.Memory().items(): 280 | # logging.debug(f"mapping {region=}") 281 | if region.State == MEM_FREE or region.Protect == PAGE_NOACCESS: 282 | continue 283 | start, end = region.BaseAddress, region.BaseAddress + region.RegionSize 284 | for va in range(start, end, PAGE_SIZE): 285 | flags = convert_region_protection(region.Protect) 286 | if flags < 0: 287 | continue 288 | pt.insert(va, pa, flags) 289 | assert pt.translate(va) == pa 290 | hva = bochscpu.memory.allocate_host_page() 291 | bochscpu.memory.page_insert(pa, hva) 292 | print(f"\bmapped {va=:#x} to {pa=:#x} with {flags=}\r", end="") 293 | pa += PAGE_SIZE 294 | pgnb += 1 295 | 296 | logging.debug(f"{pgnb} pages inserted") 297 | 298 | buffer_hva = bochscpu.memory.allocate_host_page() 299 | buffer_pa = 0x4100_0000 300 | buffer_va = 0x41_0000_0000 301 | pt.insert(buffer_va, buffer_pa, Permission.RW) 302 | bochscpu.memory.page_insert(buffer_pa, buffer_hva) 303 | 304 | stack_hva = bochscpu.memory.allocate_host_page() 305 | stack_pa = 0x4200_0000 306 | stack_va = 0x42_0000_0000 307 | pt.insert(stack_va, stack_pa, Permission.RW) 308 | bochscpu.memory.page_insert(stack_pa, stack_hva) 309 | 310 | logging.debug(f"Committing {pgnb} pages") 311 | layout = pt.commit(PML4_ADDRESS) 312 | for hva, gpa in layout: 313 | bochscpu.memory.page_insert(gpa, hva) 314 | evaled_hva = bochscpu.memory.phy_translate(gpa) 315 | assert evaled_hva == hva, f"{evaled_hva=:#x} == {hva=:#x}" 316 | # print(f"mapped {gpa=:#x} to {hva=:#x}\r", end="") 317 | 318 | # bochscpu.utils.dump_page_table(PML4_ADDRESS) 319 | 320 | logging.debug("Copy memory content") 321 | for _, region in dmp.Memory().items(): 322 | if region.State == MEM_FREE or region.AllocationProtect == PAGE_NOACCESS: 323 | continue 324 | start, end = region.BaseAddress, region.BaseAddress + region.RegionSize 325 | content = dmp.ReadMemory(start, end) 326 | assert content is not None 327 | bochscpu.memory.virt_write(PML4_ADDRESS, start, bytearray(content)) 328 | del content 329 | 330 | logging.debug("Preparing CPU state") 331 | state = bochscpu.State() 332 | bochscpu.cpu.set_long_mode(state) 333 | 334 | logging.debug("Enabling MMX (SSE/AVX) instructions") 335 | cr0 = bochscpu.utils.cpu.CR0(state.cr0) 336 | cr4 = bochscpu.utils.cpu.CR4(state.cr4) 337 | xcr0 = bochscpu.utils.cpu.XCR0(state.xcr0) 338 | # See AMD Vol2 - 11.3 339 | cr0.MP = True 340 | cr0.EM = False 341 | cr4.OSFXSR = True 342 | cr4.OSXSAVE = True 343 | # See AMD Vol2 - 11.5.2 344 | xcr0.x87 = True 345 | xcr0.SSE = True 346 | xcr0.YMM = True 347 | 348 | logging.debug(f"Setting {cr0=:}") 349 | logging.debug(f"Setting {cr4=:}") 350 | logging.debug(f"Setting {xcr0=:}") 351 | state.cr0 = int(cr0) 352 | state.cr4 = int(cr4) 353 | state.xcr0 = int(xcr0) 354 | 355 | logging.debug(f"Setting PML4 to {PML4_ADDRESS:#x}") 356 | state.cr3 = PML4_ADDRESS 357 | 358 | threads = dmp.Threads() 359 | tids = list(threads.keys()) 360 | switch_to_thread(state, threads[tids[0]]) 361 | 362 | sess.cpu.state = state 363 | 364 | fn_sym = "cryptbase!SystemFunction036" 365 | logging.debug(f"Resolving '{fn_sym}'") 366 | fn_start = resolve_function(fn_sym) 367 | fn_end = fn_start + 0x1C 368 | logging.info(f"{fn_sym} found at {fn_start:#x}") 369 | 370 | call_function( 371 | sess, 372 | fn_start, 373 | fn_end, 374 | [ 375 | buffer_va, 376 | 16, 377 | ], 378 | ) 379 | data = bytes(bochscpu.memory.virt_read(PML4_ADDRESS, buffer_va, 0x10)) 380 | print(hexdump(data)) 381 | 382 | bochscpu.memory.release_host_page(stack_pa) 383 | bochscpu.memory.release_host_page(buffer_pa) 384 | 385 | 386 | if __name__ == "__main__": 387 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) 388 | arg = pathlib.Path(sys.argv[1]) 389 | emulate(arg) 390 | -------------------------------------------------------------------------------- /python/src/bochscpu_mem.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "bochscpu.hpp" 13 | 14 | /// 15 | /// @brief Protects `g_GlobalPageAllocation` 16 | /// 17 | static std::mutex g_GlobalPageMutex; 18 | 19 | /// 20 | /// @brief Keep track of the allocated pages on the host so we can deallocate them on exit 21 | /// 22 | static std::vector g_GlobalPageAllocation; 23 | 24 | namespace nb = nanobind; 25 | using namespace nb::literals; 26 | 27 | /// 28 | /// @brief BochsCPU Memory submodule Python interface 29 | /// 30 | void 31 | bochscpu_memory_module(nb::module_& base_module) 32 | { 33 | auto m = base_module.def_submodule("memory", "Memory module"); 34 | 35 | nb::enum_(m, "AccessType") 36 | .value("Read", BochsCPU::Memory::Access::Read) 37 | .value("Write", BochsCPU::Memory::Access::Write) 38 | .value("Execute", BochsCPU::Memory::Access::Execute); 39 | 40 | m.def("page_size", &BochsCPU::Memory::PageSize); 41 | m.def("align_address_to_page", &BochsCPU::Memory::AlignAddressToPage); 42 | 43 | m.def( 44 | "page_insert", 45 | [](uint64_t gpa, uintptr_t hva) 46 | { 47 | dbg("mapping GPA=%#llx <-> HVA=%#llx", gpa, hva); 48 | ::bochscpu_mem_page_insert(gpa, (uint8_t*)hva); 49 | }, 50 | "Map a GPA to a HVA"); 51 | m.def("page_remove", &bochscpu_mem_page_remove, "gpa"_a); 52 | m.def( 53 | "phy_translate", 54 | [](const uint64_t gpa) 55 | { 56 | return (uintptr_t)(::bochscpu_mem_phy_translate(gpa)); 57 | }, 58 | "gpa"_a); 59 | m.def("virt_translate", &bochscpu_mem_virt_translate, "cr3"_a, "gva"_a); 60 | m.def( 61 | "phy_read", 62 | [](uint64_t gpa, uintptr_t sz) -> std::vector 63 | { 64 | std::vector hva(sz); 65 | ::bochscpu_mem_phy_read(gpa, hva.data(), hva.size()); 66 | return hva; 67 | }, 68 | "gpa"_a, 69 | "size"_a, 70 | "Read from GPA"); 71 | m.def( 72 | "phy_write", 73 | [](uint64_t gpa, std::vector const& bytes) 74 | { 75 | ::bochscpu_mem_phy_write(gpa, bytes.data(), bytes.size()); 76 | }, 77 | "gpa"_a, 78 | "hva"_a, 79 | "Write to GPA"); 80 | m.def( 81 | "virt_write", 82 | [](uint64_t cr3, uint64_t gva, std::vector const& bytes) 83 | { 84 | return ::bochscpu_mem_virt_write(cr3, gva, bytes.data(), bytes.size()) == 0; 85 | }, 86 | "cr3"_a, 87 | "gva"_a, 88 | "bytes"_a, 89 | "Write to GVA"); 90 | m.def( 91 | "virt_read", 92 | [](uint64_t cr3, uint64_t gva, const uint64_t sz) -> std::vector 93 | { 94 | std::vector bytes(sz); 95 | if ( ::bochscpu_mem_virt_read(cr3, gva, bytes.data(), bytes.size()) != 0 ) 96 | { 97 | throw std::runtime_error("Invalid access"); 98 | } 99 | return bytes; 100 | }, 101 | "cr3"_a, 102 | "gva"_a, 103 | "sz"_a, 104 | "Read from GVA"); 105 | m.def( 106 | "allocate_host_page", 107 | []() -> uint64_t 108 | { 109 | uint64_t addr = BochsCPU::Memory::AllocatePage(); 110 | if ( !addr ) 111 | { 112 | throw std::runtime_error("page allocation failed"); 113 | } 114 | return addr; 115 | }, 116 | "Allocate a page on the host, returns the HVA on success, 0 otherwise"); 117 | m.def("release_host_page", &BochsCPU::Memory::FreePage, "hva"_a, "Release a page on the host"); 118 | 119 | nb::class_(m, "PageMapLevel4Table") 120 | .def(nb::init<>()) 121 | .def("translate", &BochsCPU::Memory::PageMapLevel4Table::Translate, "gva"_a, "Translate a VA -> PA") 122 | .def( 123 | "insert", 124 | &BochsCPU::Memory::PageMapLevel4Table::Insert, 125 | "va"_a, 126 | "pa"_a, 127 | "flags"_a, 128 | "Associate the VA to PA") 129 | .def( 130 | "commit", 131 | &BochsCPU::Memory::PageMapLevel4Table::Commit, 132 | "pml4_pa"_a, 133 | "Commit the layout of the tree to memory"); 134 | } 135 | 136 | namespace BochsCPU::Memory 137 | { 138 | 139 | uintptr_t 140 | PageSize() 141 | { 142 | return 0x1000; 143 | // TODO: handle 2MB & 1GB pages, see Vol 2 5.1 144 | } 145 | 146 | 147 | uint64_t 148 | AlignAddressToPage(uint64_t va) 149 | { 150 | return va & ~0xfff; 151 | } 152 | 153 | 154 | uint64_t 155 | AllocatePage() 156 | { 157 | #if defined(_WIN32) 158 | auto addr = (uint64_t)::VirtualAlloc(nullptr, Memory::PageSize(), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 159 | #else 160 | auto addr = 161 | (uint64_t)::mmap(nullptr, Memory::PageSize(), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 162 | #endif // _WIN32 163 | if ( addr ) 164 | { 165 | std::lock_guard scoped_lock(g_GlobalPageMutex); 166 | g_GlobalPageAllocation.push_back(addr); 167 | } 168 | return addr; 169 | } 170 | 171 | 172 | bool 173 | FreePage(uint64_t addr) 174 | { 175 | #if defined(_WIN32) 176 | bool res = ::VirtualFree((LPVOID)addr, 0, MEM_RELEASE) == TRUE; 177 | #else 178 | bool res = ::munmap((void*)addr, Memory::PageSize()) == 0; 179 | #endif // _WIN32 180 | if ( res ) 181 | { 182 | std::lock_guard scoped_lock(g_GlobalPageMutex); 183 | std::erase_if( 184 | g_GlobalPageAllocation, 185 | [addr](uint64_t cur_addr) 186 | { 187 | return cur_addr == addr; 188 | }); 189 | } 190 | return res; 191 | } 192 | 193 | 194 | // 195 | // shameless port of @yrp's rust implementation, because it was late and I wanted to finish 196 | // kudos to him 197 | // 198 | 199 | PageMapLevel4Table::~PageMapLevel4Table() 200 | { 201 | for ( auto addr : m_AllocatedPages ) 202 | { 203 | FreePage(addr); 204 | } 205 | } 206 | 207 | std::optional 208 | PageMapLevel4Table::Translate(uint64_t va) 209 | { 210 | // L4 -> L3 211 | auto& pdpe = Entries.at(PageMapLevel4Index(va)); 212 | if ( !pdpe || !pdpe->Flags.test((int)PageDirectoryPointerTable::Flag::Present) ) 213 | return std::nullopt; 214 | 215 | 216 | // L3 -> L2 217 | auto& pde = pdpe->Entries.at(PageDirectoryPointerTableIndex(va)); 218 | if ( !pde || !pde->Flags.test((int)PageDirectory::Flag::Present) ) 219 | return std::nullopt; 220 | 221 | 222 | // L2 -> L1 223 | auto& pte = pde->Entries.at(PageDirectoryIndex(va)); 224 | if ( !pte || !pte->Flags.test((int)PageTable::Flag::Present) ) 225 | return std::nullopt; 226 | 227 | 228 | // L1 -> PA 229 | auto& page = pte->Entries.at(PageTableIndex(va)); 230 | if ( !page || !page->Flags.test((int)PageTableEntry::Flag::Present) ) 231 | return std::nullopt; 232 | 233 | return page->Address; 234 | } 235 | 236 | void 237 | PageMapLevel4Table::Insert(uint64_t va, uint64_t pa, int type) 238 | { 239 | // L4 insertion 240 | uint64_t idx = PageMapLevel4Index(va); 241 | if ( !Entries[idx] ) 242 | Entries[idx] = std::make_unique(); 243 | 244 | auto& pdpe = Entries.at(idx); 245 | pdpe->Flags.set((int)PageDirectoryPointerTable::Flag::Present); 246 | pdpe->Flags.set((int)PageDirectoryPointerTable::Flag::User); 247 | 248 | if ( type == 1 ) // RW 249 | pdpe->Flags.set((int)PageDirectoryPointerTable::Flag::Writable); 250 | 251 | 252 | // L3 insertion 253 | idx = PageDirectoryPointerTableIndex(va); 254 | if ( !pdpe->Entries[idx] ) 255 | pdpe->Entries[idx] = std::make_unique(); 256 | 257 | auto& pde = pdpe->Entries.at(idx); 258 | pde->Flags.set((int)PageDirectoryPointerTable::Flag::Present); 259 | pde->Flags.set((int)PageDirectoryPointerTable::Flag::User); 260 | 261 | if ( type == 1 ) // RW 262 | pde->Flags.set((int)PageDirectoryPointerTable::Flag::Writable); 263 | 264 | 265 | // L2 insertion 266 | idx = PageDirectoryIndex(va); 267 | if ( !pde->Entries[idx] ) 268 | pde->Entries[idx] = std::make_unique(); 269 | 270 | auto& pte = pde->Entries.at(idx); 271 | pte->Flags.set((int)PageTable::Flag::Present); 272 | pte->Flags.set((int)PageTable::Flag::User); 273 | 274 | if ( type == 1 ) // RW 275 | pte->Flags.set((int)PageDirectoryPointerTable::Flag::Writable); 276 | 277 | 278 | // L1 insertion 279 | idx = PageTableIndex(va); 280 | if ( !pte->Entries[idx] ) 281 | pte->Entries[idx] = std::make_unique(); 282 | 283 | auto& page = pte->Entries.at(idx); 284 | page->Flags.set((int)PageTableEntry::Flag::Present); 285 | page->Flags.set((int)PageTableEntry::Flag::User); 286 | 287 | if ( type == 1 ) // RW 288 | page->Flags.set((int)PageTableEntry::Flag::Writable); 289 | 290 | page->Address = pa; 291 | } 292 | 293 | std::vector> 294 | PageMapLevel4Table::Commit(uint64_t BasePA) 295 | { 296 | std::vector> mapped_locations; 297 | uint64_t PageSize = BochsCPU::Memory::PageSize(); 298 | uint64_t CurrentPA {BasePA}; 299 | 300 | // pair 301 | auto AllocatePageAndPA = [this, PageSize, &CurrentPA]() -> std::pair 302 | { 303 | auto h = BochsCPU::Memory::AllocatePage(); 304 | if ( !h ) 305 | throw std::bad_alloc(); 306 | 307 | // 308 | // Keep track of the allocated pages for deletion 309 | // 310 | m_AllocatedPages.push_back(h); 311 | 312 | uint64_t pa {CurrentPA}; 313 | CurrentPA += PageSize; 314 | return {h, pa}; 315 | }; 316 | 317 | const size_t view_size = PageSize / sizeof(uint64_t); 318 | const auto mapped_pml4 = AllocatePageAndPA(); 319 | std::span mapped_pml4_view {(uint64_t*)mapped_pml4.first, view_size}; 320 | 321 | for ( int i = -1; auto const& pdpt : this->Entries ) 322 | { 323 | i++; 324 | 325 | if ( !pdpt ) 326 | continue; 327 | 328 | if ( !pdpt->Flags.test((int)PageDirectoryPointerTable::Flag::Present) ) 329 | continue; 330 | 331 | auto mapped_pdpt = AllocatePageAndPA(); 332 | std::span mapped_pdpt_view {(uint64_t*)mapped_pdpt.first, view_size}; 333 | 334 | for ( int j = -1; auto const& pd : pdpt->Entries ) 335 | { 336 | j++; 337 | 338 | if ( !pd ) 339 | continue; 340 | 341 | if ( !pd->Flags.test((int)PageDirectory::Flag::Present) ) 342 | continue; 343 | 344 | auto mapped_pd = AllocatePageAndPA(); 345 | std::span mapped_pd_view {(uint64_t*)mapped_pd.first, view_size}; 346 | 347 | for ( int k = -1; auto const& pt : pd->Entries ) 348 | { 349 | k++; 350 | 351 | if ( !pt ) 352 | continue; 353 | 354 | if ( !pt->Flags.test((int)PageTable::Flag::Present) ) 355 | continue; 356 | 357 | auto mapped_pt = AllocatePageAndPA(); 358 | std::span mapped_pt_view {(uint64_t*)mapped_pt.first, view_size}; 359 | 360 | for ( int l = -1; auto const& page : pt->Entries ) 361 | { 362 | l++; 363 | 364 | if ( !page ) 365 | continue; 366 | 367 | if ( !page->Flags.test((int)PageTableEntry::Flag::Present) ) 368 | continue; 369 | 370 | mapped_pt_view[l] = page->Address | page->Flags.to_ulong(); 371 | } 372 | 373 | mapped_pd_view[k] = mapped_pt.second | pt->Flags.to_ulong(); 374 | mapped_locations.push_back(mapped_pt); 375 | } 376 | 377 | mapped_pdpt_view[j] = mapped_pd.second | pd->Flags.to_ulong(); 378 | mapped_locations.push_back(mapped_pd); 379 | } 380 | 381 | mapped_pml4_view[i] = mapped_pdpt.second | pdpt->Flags.to_ulong(); 382 | mapped_locations.push_back(mapped_pdpt); 383 | } 384 | 385 | mapped_locations.push_back(mapped_pml4); 386 | return mapped_locations; 387 | } 388 | 389 | uint64_t 390 | PageMapLevel4Table::PageMapLevel4Index(uint64_t va) 391 | { 392 | return (va >> (12 + (9 * 3))) & 0b1'1111'1111; 393 | } 394 | 395 | uint64_t 396 | PageMapLevel4Table::PageDirectoryPointerTableIndex(uint64_t va) 397 | { 398 | return (va >> (12 + (9 * 2))) & 0b1'1111'1111; 399 | } 400 | 401 | uint64_t 402 | PageMapLevel4Table::PageDirectoryIndex(uint64_t va) 403 | { 404 | return (va >> (12 + 9)) & 0b1'1111'1111; 405 | } 406 | 407 | uint64_t 408 | PageMapLevel4Table::PageTableIndex(uint64_t va) 409 | { 410 | return (va >> 12) & 0b1'1111'1111; 411 | } 412 | 413 | uint64_t 414 | PageMapLevel4Table::PageOffset(uint64_t va) 415 | { 416 | return va & 0xfff; 417 | } 418 | 419 | } // namespace BochsCPU::Memory -------------------------------------------------------------------------------- /examples/long_mode_emulate_linux_udump.py: -------------------------------------------------------------------------------- 1 | # 2 | # Using LIEF to emulate ELF coredump 3 | # 4 | # @ref 5 | # - https://lief-project.github.io/doc/latest/tutorials/12_elf_coredump.html 6 | # - https://www.gabriel.urdhr.fr/2015/05/29/core-file/ 7 | # 8 | import logging 9 | import os 10 | import pathlib 11 | import sys 12 | import enum 13 | 14 | import capstone 15 | import lief 16 | 17 | import bochscpu 18 | import bochscpu.cpu 19 | import bochscpu.memory 20 | import bochscpu.utils 21 | 22 | PAGE_SIZE = bochscpu.utils.PAGE_SIZE 23 | PA_START_ADDRESS = 0x100_0000 24 | PML4_ADDRESS = 0x10_0000 25 | MEM_FREE = 0x00010000 26 | PAGE_NOACCESS = 0x01 27 | 28 | 29 | class Permission(enum.IntEnum): 30 | CODE = 0 31 | DATA = 1 32 | 33 | 34 | cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) 35 | 36 | 37 | def disass(state, va: int) -> capstone.CsInsn: 38 | global cs 39 | raw = bytes(bochscpu.memory.virt_read(state.cr3, va, 16)) 40 | insn = next(cs.disasm(raw, va)) 41 | return insn 42 | 43 | 44 | emulation_end_address = 0 45 | 46 | 47 | def hexdump( 48 | source: bytes, length: int = 0x10, separator: str = ".", base: int = 0x00 49 | ) -> str: 50 | result = [] 51 | align = 0x8 * 2 + 2 52 | 53 | def chunk2hexstr(chunk: bytes): 54 | return " ".join(map(lambda x: f"{x:02X}", chunk)) 55 | 56 | def chunk2ascii(chunk: bytes): 57 | return "".join([chr(b) if 0x20 <= b < 0x7F else separator for b in chunk]) 58 | 59 | for i in range(0, len(source), length): 60 | chunk = bytearray(source[i : i + length]) 61 | hexa = chunk2hexstr(chunk) 62 | text = chunk2ascii(chunk) 63 | result.append(f"{base + i:#0{align}x} {hexa:<{3 * length}} {text}") 64 | return os.linesep.join(result) 65 | 66 | 67 | def missing_page_cb(gpa): 68 | raise Exception(f"missing_page_cb({gpa=:#x})") 69 | 70 | 71 | def far_branch_cb( 72 | sess: bochscpu.Session, cpud_id: int, a1: int, a2: int, a3: int, a4: int, a5: int 73 | ): 74 | logging.debug(f"FAR_BRANCH[{cpud_id=:#x}] -> {a1=:#x}, {a2=:#x}, {a3=:#x}") 75 | 76 | 77 | def exception_cb( 78 | sess: bochscpu.Session, 79 | cpu_id: int, 80 | vector: int, 81 | error_code: int, 82 | ): 83 | excpt = bochscpu.cpu.ExceptionType(vector) 84 | match excpt: 85 | case bochscpu.cpu.ExceptionType.BreakPoint: 86 | logging.info("breakpoint hit") 87 | 88 | case bochscpu.cpu.ExceptionType.PageFault: 89 | logging.warning( 90 | f"pagefault on VA={sess.cpu.cr2:#016x} at IP={sess.cpu.rip:#016x}" 91 | ) 92 | 93 | case _: 94 | logging.error( 95 | f"cpu#{cpu_id} received exception({excpt=}, {error_code=:d}) " 96 | ) 97 | sess.stop() 98 | 99 | 100 | def before_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 101 | state = sess.cpu.state 102 | raw = bytes(bochscpu.memory.virt_read(PML4_ADDRESS, state.rip, 16)) 103 | insn = next(cs.disasm(raw, state.rip)) 104 | logging.debug( 105 | f"BEFORE[CPU#{cpu_id}] PC={state.rip:#x} {insn.bytes.hex()} - {insn.mnemonic} {insn.op_str}" 106 | ) 107 | 108 | 109 | def after_execution_cb(sess: bochscpu.Session, cpu_id: int, _: int): 110 | global emulation_end_address 111 | if not emulation_end_address: 112 | return 113 | 114 | if emulation_end_address == sess.cpu.state.rip: 115 | logging.info( 116 | f"Reaching end address @ {emulation_end_address}, ending emulation" 117 | ) 118 | sess.stop() 119 | 120 | 121 | def phy_access_cb(sess: bochscpu.Session, cpu_id, a2, a3, a4, a5): 122 | logging.debug(f"PHY[CPU#{cpu_id}] {a2=:#x}") 123 | 124 | 125 | def lin_access_cb( 126 | sess: bochscpu.Session, cpu_id: int, a2: int, a3: int, a4: int, a5: int, a6: int 127 | ): 128 | logging.debug(f"LIN[CPU#{cpu_id}] {a2=:#x}") 129 | 130 | 131 | def convert_region_protection(flags: lief.ELF.SEGMENT_FLAGS) -> int: 132 | logging.debug(f"{flags.value=}") 133 | flags_i = flags.value 134 | if flags_i & 1: 135 | return Permission.CODE 136 | if flags_i & 4: 137 | return Permission.DATA 138 | 139 | logging.warning(f"Unknown {flags.value=:#x})") 140 | return -1 141 | 142 | 143 | def switch_to_thread(state: bochscpu.State, regs: dict): 144 | # 145 | # AMD Vol2 - A.1 System Software MSRs 146 | # 147 | FSBase = 0xC000_0100 148 | GSBase = 0xC000_0101 149 | KernelGSBase = 0xC000_0102 150 | 151 | _cs = bochscpu.Segment() 152 | _cs.base = 0 153 | _cs.limit = 0xFFFF_FFFF 154 | _cs.selector = regs["Cs"] 155 | _cs_attr = bochscpu.cpu.SegmentFlags() 156 | _cs_attr.A = True 157 | _cs_attr.R = True 158 | _cs_attr.E = True 159 | _cs_attr.S = True 160 | _cs_attr.P = True 161 | _cs_attr.L = True 162 | _cs.attr = int(_cs_attr) 163 | 164 | _ds = bochscpu.Segment() 165 | _ds.base = 0 166 | _ds.limit = 0xFFFF_FFFF 167 | _ds.selector = 0 168 | _ds.attr = 0xCF3 169 | 170 | _es = bochscpu.Segment() 171 | _es.base = 0 172 | _es.limit = 0xFFFF_FFFF 173 | _es.selector = 0 174 | _es.attr = 0xCF3 175 | _ss = bochscpu.Segment() 176 | _ss.base = 0 177 | _ss.limit = 0xFFFF_FFFF 178 | _ss.selector = regs["Ss"] 179 | _ss.attr = 0xCF3 180 | # AMD Vol2 - 4.5.3 181 | # > In 64-bit mode, FS-segment and GS-segment overrides are not checked for limit or attributes. Instead, 182 | # > the processor checks that all virtual-address references are in canonical form 183 | _fs = bochscpu.Segment() 184 | _fs.base = 0 185 | _fs.limit = 0xFFFF_FFFF 186 | _fs.selector = 0 187 | _fs.present = True 188 | _fs.attr = 0xCF3 189 | _gs = bochscpu.Segment() 190 | _gs.base = 0 191 | _gs.limit = 0x0000_0FFF 192 | _gs.selector = 0 193 | _gs.present = True 194 | _gs.attr = 0x4F3 195 | 196 | state.ss = _ss 197 | state.cs = _cs 198 | state.ds = _ds 199 | state.es = _es 200 | state.fs = _fs 201 | state.gs = _gs 202 | 203 | for name, value in regs.items(): 204 | if name.startswith("R"): 205 | setattr(state, name.lower(), value) 206 | 207 | return 208 | 209 | 210 | def call_function( 211 | sess: bochscpu.Session, 212 | start_address: int, 213 | end_address: int, 214 | args: list[int], 215 | ) -> None: 216 | global emulation_end_address 217 | 218 | state = sess.cpu.state 219 | state.rip = start_address 220 | 221 | ## Linux calling convention 222 | if len(args) >= 1: 223 | state.rdi = args[0] 224 | if len(args) >= 2: 225 | state.rsi = args[1] 226 | if len(args) >= 3: 227 | state.rdx = args[2] 228 | if len(args) >= 4: 229 | state.rcx = args[3] 230 | if len(args) >= 5: 231 | state.r8 = args[4] 232 | if len(args) >= 6: 233 | state.r9 = args[5] 234 | 235 | ## Windows calling convention 236 | # if len(args) >= 1: 237 | # state.rcx = args[0] 238 | # if len(args) >= 2: 239 | # state.rdx = args[1] 240 | # if len(args) >= 3: 241 | # state.r8 = args[2] 242 | # if len(args) >= 4: 243 | # state.r9 = args[3] 244 | 245 | logging.debug("Preparing hooks") 246 | hook = bochscpu.Hook() 247 | hook.exception = exception_cb 248 | hook.before_execution = before_execution_cb 249 | hook.after_execution = after_execution_cb 250 | hook.far_branch = far_branch_cb 251 | hook.lin_access = lin_access_cb 252 | 253 | logging.debug("Preparing emulation environment") 254 | sess.cpu.state = state 255 | 256 | if logging.getLogger().level == logging.DEBUG: 257 | logging.debug("Dumping initial register state") 258 | bochscpu.utils.dump_registers(sess.cpu.state) 259 | 260 | emulation_end_address = end_address 261 | 262 | logging.debug("Start emulation") 263 | sess.run( 264 | [ 265 | hook, 266 | ] 267 | ) 268 | 269 | if logging.getLogger().level == logging.DEBUG: 270 | logging.debug("Dumping final register state") 271 | bochscpu.utils.dump_registers(sess.cpu.state) 272 | return 273 | 274 | 275 | def emulate(dmp_path: str): 276 | logging.info(f"Parsing {dmp_path}") 277 | dmp = lief.ELF.parse(dmp_path) 278 | assert isinstance(dmp, lief.ELF.Binary), f"Invalid type {type(dmp)}" 279 | 280 | logging.info(f"Successfully parsed {dmp_path}") 281 | logging.debug(f"{dmp=}") 282 | 283 | sess = bochscpu.Session() 284 | sess.missing_page_handler = missing_page_cb 285 | 286 | logging.debug("Preparing page table") 287 | pt = bochscpu.memory.PageMapLevel4Table() 288 | pa = PA_START_ADDRESS 289 | pgnb = 0 290 | 291 | for segment in dmp.segments: 292 | assert isinstance(segment, lief.ELF.Segment) 293 | if segment.type == lief.ELF.SEGMENT_TYPES.NOTE: 294 | continue 295 | logging.debug(f"mapping {segment.virtual_address=:#x}") 296 | 297 | start, end = ( 298 | segment.virtual_address, 299 | segment.virtual_address + segment.virtual_size, 300 | ) 301 | for va in range(start, end, PAGE_SIZE): 302 | flags = convert_region_protection(segment.flags) 303 | if flags < 0: 304 | continue 305 | pt.insert(va, pa, flags) 306 | assert pt.translate(va) == pa 307 | hva = bochscpu.memory.allocate_host_page() 308 | bochscpu.memory.page_insert(pa, hva) 309 | print(f"\bmapped {va=:#x} to {pa=:#x} with {flags=}\r", end="") 310 | pa += PAGE_SIZE 311 | pgnb += 1 312 | 313 | logging.debug(f"{pgnb} pages inserted") 314 | 315 | buffer_hva = bochscpu.memory.allocate_host_page() 316 | buffer_pa = 0x4100_0000 317 | buffer_va = 0x41_0000_0000 318 | pt.insert(buffer_va, buffer_pa, Permission.DATA) 319 | bochscpu.memory.page_insert(buffer_pa, buffer_hva) 320 | 321 | stack_hva = bochscpu.memory.allocate_host_page() 322 | stack_pa = 0x4200_0000 323 | stack_va = 0x42_0000_0000 324 | pt.insert(stack_va, stack_pa, Permission.DATA) 325 | bochscpu.memory.page_insert(stack_pa, stack_hva) 326 | 327 | # Create a mock TLS and map it at VA 0 so we don't have to bother with FS 328 | fake_tls_hva = bochscpu.memory.allocate_host_page() 329 | fake_tls_pa = 0x4300_0000 330 | fake_tls_va = 0x0000_0000 331 | pt.insert(fake_tls_va, fake_tls_pa, Permission.DATA) 332 | bochscpu.memory.page_insert(fake_tls_pa, fake_tls_hva) 333 | 334 | logging.debug(f"Committing {pgnb} pages") 335 | layout = pt.commit(PML4_ADDRESS) 336 | for hva, gpa in layout: 337 | bochscpu.memory.page_insert(gpa, hva) 338 | evaled_hva = bochscpu.memory.phy_translate(gpa) 339 | assert evaled_hva == hva, f"{evaled_hva=:#x} == {hva=:#x}" 340 | # print(f"mapped {gpa=:#x} to {hva=:#x}\r", end="") 341 | 342 | # bochscpu.utils.dump_page_table(PML4_ADDRESS) 343 | 344 | logging.debug("Copy memory content") 345 | for segment in dmp.segments: 346 | assert isinstance(segment, lief.ELF.Segment) 347 | if segment.type == lief.ELF.SEGMENT_TYPES.NOTE: 348 | continue 349 | logging.debug(f"write content of {segment.virtual_address=:x}") 350 | 351 | start, end = ( 352 | segment.virtual_address, 353 | segment.virtual_address + segment.virtual_size, 354 | ) 355 | 356 | content = segment.content 357 | assert content is not None 358 | bochscpu.memory.virt_write(PML4_ADDRESS, start, bytearray(content)) 359 | del content 360 | 361 | logging.debug("Preparing CPU state") 362 | state = bochscpu.State() 363 | bochscpu.cpu.set_long_mode(state) 364 | 365 | logging.debug("Enabling MMX (SSE/AVX) instructions") 366 | cr0 = bochscpu.utils.cpu.CR0(state.cr0) 367 | cr4 = bochscpu.utils.cpu.CR4(state.cr4) 368 | xcr0 = bochscpu.utils.cpu.XCR0(state.xcr0) 369 | # See AMD Vol2 - 11.3 370 | cr0.MP = True 371 | cr0.EM = False 372 | cr4.OSFXSR = True 373 | cr4.OSXSAVE = True 374 | # See AMD Vol2 - 11.5.2 375 | xcr0.x87 = True 376 | xcr0.SSE = True 377 | xcr0.YMM = True 378 | 379 | logging.debug(f"Setting {cr0=:}") 380 | logging.debug(f"Setting {cr4=:}") 381 | logging.debug(f"Setting {xcr0=:}") 382 | state.cr0 = int(cr0) 383 | state.cr4 = int(cr4) 384 | state.xcr0 = int(xcr0) 385 | 386 | logging.debug(f"Setting PML4 to {PML4_ADDRESS:#x}") 387 | state.cr3 = PML4_ADDRESS 388 | 389 | prstatus = dmp.get(lief.ELF.Note.TYPE.CORE_PRSTATUS) 390 | regs = { name: prstatus.get(getattr(lief.ELF.CorePrStatus.Registers.X86_64, name.upper())) \ 391 | for name in ( 392 | "R15", 393 | "R14", 394 | "R13", 395 | "R12", 396 | "Rbp", 397 | "Rbx", 398 | "R11", 399 | "R10", 400 | "R9", 401 | "R8", 402 | "Rax", 403 | "Rcx", 404 | "Rdx", 405 | "Rsi", 406 | "Rdi", 407 | "Rip", 408 | "Eflags", 409 | "Rsp", 410 | "Cs", 411 | "Ss" 412 | ) 413 | } 414 | 415 | assert regs 416 | 417 | switch_to_thread(state, regs) 418 | 419 | sess.cpu.state = state 420 | 421 | fn_start = 0x0000000000401803 422 | fn_end = 0x0000000000401808 423 | 424 | for _ in range(1): 425 | call_function( 426 | sess, 427 | fn_start, 428 | fn_end, 429 | [ 430 | buffer_va, 431 | 16, 432 | ], 433 | ) 434 | data = bytes(bochscpu.memory.virt_read(PML4_ADDRESS, buffer_va, 0x10)) 435 | print(hexdump(data)) 436 | 437 | bochscpu.memory.release_host_page(stack_hva) 438 | bochscpu.memory.release_host_page(buffer_hva) 439 | bochscpu.memory.release_host_page(fake_tls_hva) 440 | 441 | 442 | if __name__ == "__main__": 443 | logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) 444 | arg = sys.argv[1] 445 | emulate(arg) 446 | -------------------------------------------------------------------------------- /docs/api/bochscpu._bochscpu.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # module `bochscpu._bochscpu` 4 | The native `bochscpu` module 5 | 6 | **Global Variables** 7 | --------------- 8 | - **memory** 9 | - **cpu** 10 | - **BX_INSTR_IS_JMP** 11 | - **BOCHSCPU_INSTR_IS_JMP_INDIRECT** 12 | - **BOCHSCPU_INSTR_IS_CALL** 13 | - **BOCHSCPU_INSTR_IS_CALL_INDIRECT** 14 | - **BOCHSCPU_INSTR_IS_RET** 15 | - **BOCHSCPU_INSTR_IS_IRET** 16 | - **BOCHSCPU_INSTR_IS_INT** 17 | - **BOCHSCPU_INSTR_IS_SYSCALL** 18 | - **BOCHSCPU_INSTR_IS_SYSRET** 19 | - **BOCHSCPU_INSTR_IS_SYSENTER** 20 | - **BOCHSCPU_INSTR_IS_SYSEXIT** 21 | - **BOCHSCPU_HOOK_MEM_READ** 22 | - **BOCHSCPU_HOOK_MEM_WRITE** 23 | - **BOCHSCPU_HOOK_MEM_EXECUTE** 24 | - **BOCHSCPU_HOOK_MEM_RW** 25 | - **BOCHSCPU_HOOK_TLB_CR0** 26 | - **BOCHSCPU_HOOK_TLB_CR3** 27 | - **BOCHSCPU_HOOK_TLB_CR4** 28 | - **BOCHSCPU_HOOK_TLB_TASKSWITCH** 29 | - **BOCHSCPU_HOOK_TLB_CONTEXTSWITCH** 30 | - **BOCHSCPU_HOOK_TLB_INVLPG** 31 | - **BOCHSCPU_HOOK_TLB_INVEPT** 32 | - **BOCHSCPU_HOOK_TLB_INVVPID** 33 | - **BOCHSCPU_HOOK_TLB_INVPCID** 34 | - **BOCHSCPU_OPCODE_ERROR** 35 | - **BOCHSCPU_OPCODE_INSERTED** 36 | 37 | 38 | --- 39 | 40 | ## class `GlobalSegment` 41 | GlobalSegment class 42 | 43 | 44 | --- 45 | 46 | #### property GlobalSegment.base 47 | 48 | Get/Set the GlobalSegment `base` attribute 49 | 50 | --- 51 | 52 | #### property GlobalSegment.limit 53 | 54 | Get/Set the GlobalSegment `limit` attribute 55 | 56 | 57 | 58 | 59 | --- 60 | 61 | ## class `Hook` 62 | Class Hook 63 | 64 | 65 | --- 66 | 67 | #### property Hook.after_execution 68 | 69 | Callback for Bochs `after_execution` callback 70 | 71 | --- 72 | 73 | #### property Hook.before_execution 74 | 75 | Callback for Bochs `before_execution` callback 76 | 77 | --- 78 | 79 | #### property Hook.cache_cntrl 80 | 81 | Callback for Bochs `cache_cntrl` callback 82 | 83 | --- 84 | 85 | #### property Hook.clflush 86 | 87 | Callback for Bochs `clflush` callback 88 | 89 | --- 90 | 91 | #### property Hook.cnear_branch_not_taken 92 | 93 | Callback for Bochs `cnear_branch_not_taken` callback 94 | 95 | --- 96 | 97 | #### property Hook.cnear_branch_taken 98 | 99 | Callback for Bochs `cnear_branch_taken` callback 100 | 101 | --- 102 | 103 | #### property Hook.ctx 104 | 105 | A raw pointer to the Session object 106 | 107 | --- 108 | 109 | #### property Hook.exception 110 | 111 | Callback for Bochs `exception` callback 112 | 113 | --- 114 | 115 | #### property Hook.far_branch 116 | 117 | Callback for Bochs `far_branch` callback 118 | 119 | --- 120 | 121 | #### property Hook.hlt 122 | 123 | Callback for Bochs `hlt` callback 124 | 125 | --- 126 | 127 | #### property Hook.hw_interrupt 128 | 129 | Callback for Bochs `hw_interrupt` callback 130 | 131 | --- 132 | 133 | #### property Hook.inp 134 | 135 | Callback for Bochs `inp` callback 136 | 137 | --- 138 | 139 | #### property Hook.inp2 140 | 141 | Callback for Bochs `inp2` callback 142 | 143 | --- 144 | 145 | #### property Hook.interrupt 146 | 147 | Callback for Bochs `interrupt` callback 148 | 149 | --- 150 | 151 | #### property Hook.lin_access 152 | 153 | Callback for Bochs `lin_access` callback 154 | 155 | --- 156 | 157 | #### property Hook.mwait 158 | 159 | Callback for Bochs `mwait` callback 160 | 161 | --- 162 | 163 | #### property Hook.opcode 164 | 165 | Callback for Bochs `opcode` callback 166 | 167 | --- 168 | 169 | #### property Hook.outp 170 | 171 | Callback for Bochs `outp` callback 172 | 173 | --- 174 | 175 | #### property Hook.phy_access 176 | 177 | Callback for Bochs `phy_access` callback 178 | 179 | --- 180 | 181 | #### property Hook.prefetch_hint 182 | 183 | Callback for Bochs `prefetch_hint` callback 184 | 185 | --- 186 | 187 | #### property Hook.repeat_iteration 188 | 189 | Callback for Bochs `repeat_iteration` callback 190 | 191 | --- 192 | 193 | #### property Hook.reset 194 | 195 | Callback for Bochs `reset` callback 196 | 197 | --- 198 | 199 | #### property Hook.tlb_cntrl 200 | 201 | Callback for Bochs `tlb_cntrl` callback 202 | 203 | --- 204 | 205 | #### property Hook.ucnear_branch 206 | 207 | Callback for Bochs `ucnear_branch` callback 208 | 209 | --- 210 | 211 | #### property Hook.vmexit 212 | 213 | Callback for Bochs `vmexit` callback 214 | 215 | --- 216 | 217 | #### property Hook.wrmsr 218 | 219 | Callback for Bochs `wrmsr` callback 220 | 221 | 222 | 223 | 224 | --- 225 | 226 | ## class `HookType` 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | --- 236 | 237 | ## class `InstructionType` 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | --- 247 | 248 | ## class `OpcodeOperationType` 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | --- 258 | 259 | ## class `Segment` 260 | Segment class 261 | 262 | 263 | --- 264 | 265 | #### property Segment.attr 266 | 267 | Get/Set the Segment `attr` attribute 268 | 269 | --- 270 | 271 | #### property Segment.base 272 | 273 | Get/Set the Segment `base` attribute 274 | 275 | --- 276 | 277 | #### property Segment.limit 278 | 279 | Get/Set the Segment `limit` attribute 280 | 281 | --- 282 | 283 | #### property Segment.present 284 | 285 | Get/Set the Segment `present` attribute 286 | 287 | --- 288 | 289 | #### property Segment.selector 290 | 291 | Get/Set the Segment `selector` attribute 292 | 293 | 294 | 295 | 296 | --- 297 | 298 | ## class `Session` 299 | Class session 300 | 301 | 302 | --- 303 | 304 | #### property Session.cpu 305 | 306 | Get the CPU associated to the session 307 | 308 | --- 309 | 310 | #### property Session.missing_page_handler 311 | 312 | Set the missing page callback 313 | 314 | 315 | --- 316 | 317 | #### handler Session.run 318 | 319 | --- 320 | 321 | #### handler Session.stop 322 | 323 | 324 | 325 | --- 326 | 327 | ## class `State` 328 | Class State 329 | 330 | 331 | --- 332 | 333 | #### property State.apic_base 334 | 335 | Get/Set the register `apic_base` in the current state 336 | 337 | --- 338 | 339 | #### property State.cr0 340 | 341 | Get/Set the register `cr0` in the current state 342 | 343 | --- 344 | 345 | #### property State.cr2 346 | 347 | Get/Set the register `cr2` in the current state 348 | 349 | --- 350 | 351 | #### property State.cr3 352 | 353 | Get/Set the register `cr3` in the current state 354 | 355 | --- 356 | 357 | #### property State.cr4 358 | 359 | Get/Set the register `cr4` in the current state 360 | 361 | --- 362 | 363 | #### property State.cr8 364 | 365 | Get/Set the register `cr8` in the current state 366 | 367 | --- 368 | 369 | #### property State.cs 370 | 371 | Get/Set the register `cs` in the current state 372 | 373 | --- 374 | 375 | #### property State.cstar 376 | 377 | Get/Set the register `cstar` in the current state 378 | 379 | --- 380 | 381 | #### property State.dr0 382 | 383 | Get/Set the register `dr0` in the current state 384 | 385 | --- 386 | 387 | #### property State.dr1 388 | 389 | Get/Set the register `dr1` in the current state 390 | 391 | --- 392 | 393 | #### property State.dr2 394 | 395 | Get/Set the register `dr2` in the current state 396 | 397 | --- 398 | 399 | #### property State.dr3 400 | 401 | Get/Set the register `dr3` in the current state 402 | 403 | --- 404 | 405 | #### property State.dr6 406 | 407 | Get/Set the register `dr6` in the current state 408 | 409 | --- 410 | 411 | #### property State.dr7 412 | 413 | Get/Set the register `dr7` in the current state 414 | 415 | --- 416 | 417 | #### property State.ds 418 | 419 | Get/Set the register `ds` in the current state 420 | 421 | --- 422 | 423 | #### property State.efer 424 | 425 | Get/Set the register `efer` in the current state 426 | 427 | --- 428 | 429 | #### property State.es 430 | 431 | Get/Set the register `es` in the current state 432 | 433 | --- 434 | 435 | #### property State.fpcw 436 | 437 | Get/Set the register `fpcw` in the current state 438 | 439 | --- 440 | 441 | #### property State.fpop 442 | 443 | Get/Set the register `fpop` in the current state 444 | 445 | --- 446 | 447 | #### property State.fpst 448 | 449 | Get/Set the register `fpst` in the current state 450 | 451 | --- 452 | 453 | #### property State.fpsw 454 | 455 | Get/Set the register `fpsw` in the current state 456 | 457 | --- 458 | 459 | #### property State.fptw 460 | 461 | Get/Set the register `fptw` in the current state 462 | 463 | --- 464 | 465 | #### property State.fs 466 | 467 | Get/Set the register `fs` in the current state 468 | 469 | --- 470 | 471 | #### property State.gdtr 472 | 473 | Get/Set the register `gdtr` in the current state 474 | 475 | --- 476 | 477 | #### property State.gs 478 | 479 | Get/Set the register `gs` in the current state 480 | 481 | --- 482 | 483 | #### property State.idtr 484 | 485 | Get/Set the register `idtr` in the current state 486 | 487 | --- 488 | 489 | #### property State.kernel_gs_base 490 | 491 | Get/Set the register `kernel_gs_base` in the current state 492 | 493 | --- 494 | 495 | #### property State.ldtr 496 | 497 | Get/Set the register `ldtr` in the current state 498 | 499 | --- 500 | 501 | #### property State.lstar 502 | 503 | Get/Set the register `lstar` in the current state 504 | 505 | --- 506 | 507 | #### property State.mxcsr 508 | 509 | Get/Set the register `mxcsr` in the current state 510 | 511 | --- 512 | 513 | #### property State.mxcsr_mask 514 | 515 | Get/Set the register `mxcsr_mask` in the current state 516 | 517 | --- 518 | 519 | #### property State.pat 520 | 521 | Get/Set the register `pat` in the current state 522 | 523 | --- 524 | 525 | #### property State.r10 526 | 527 | Get/Set the register `r10` in the current state 528 | 529 | --- 530 | 531 | #### property State.r11 532 | 533 | Get/Set the register `r11` in the current state 534 | 535 | --- 536 | 537 | #### property State.r12 538 | 539 | Get/Set the register `r12` in the current state 540 | 541 | --- 542 | 543 | #### property State.r13 544 | 545 | Get/Set the register `r13` in the current state 546 | 547 | --- 548 | 549 | #### property State.r14 550 | 551 | Get/Set the register `r14` in the current state 552 | 553 | --- 554 | 555 | #### property State.r15 556 | 557 | Get/Set the register `r15` in the current state 558 | 559 | --- 560 | 561 | #### property State.r8 562 | 563 | Get/Set the register `r8` in the current state 564 | 565 | --- 566 | 567 | #### property State.r9 568 | 569 | Get/Set the register `r9` in the current state 570 | 571 | --- 572 | 573 | #### property State.rax 574 | 575 | Get/Set the register `rax` in the current state 576 | 577 | --- 578 | 579 | #### property State.rbp 580 | 581 | Get/Set the register `rbp` in the current state 582 | 583 | --- 584 | 585 | #### property State.rbx 586 | 587 | Get/Set the register `rbx` in the current state 588 | 589 | --- 590 | 591 | #### property State.rcx 592 | 593 | Get/Set the register `rcx` in the current state 594 | 595 | --- 596 | 597 | #### property State.rdi 598 | 599 | Get/Set the register `rdi` in the current state 600 | 601 | --- 602 | 603 | #### property State.rdx 604 | 605 | Get/Set the register `rdx` in the current state 606 | 607 | --- 608 | 609 | #### property State.rflags 610 | 611 | Get/Set the register `rflags` in the current state 612 | 613 | --- 614 | 615 | #### property State.rip 616 | 617 | Get/Set the register `rip` in the current state 618 | 619 | --- 620 | 621 | #### property State.rsi 622 | 623 | Get/Set the register `rsi` in the current state 624 | 625 | --- 626 | 627 | #### property State.rsp 628 | 629 | Get/Set the register `rsp` in the current state 630 | 631 | --- 632 | 633 | #### property State.seed 634 | 635 | Get/Set the seed in the current state 636 | 637 | --- 638 | 639 | #### property State.sfmask 640 | 641 | Get/Set the register `sfmask` in the current state 642 | 643 | --- 644 | 645 | #### property State.ss 646 | 647 | Get/Set the register `ss` in the current state 648 | 649 | --- 650 | 651 | #### property State.star 652 | 653 | Get/Set the register `star` in the current state 654 | 655 | --- 656 | 657 | #### property State.sysenter_cs 658 | 659 | Get/Set the register `sysenter_cs` in the current state 660 | 661 | --- 662 | 663 | #### property State.sysenter_eip 664 | 665 | Get/Set the register `sysenter_eip` in the current state 666 | 667 | --- 668 | 669 | #### property State.sysenter_esp 670 | 671 | Get/Set the register `sysenter_esp` in the current state 672 | 673 | --- 674 | 675 | #### property State.tr 676 | 677 | Get/Set the register `tr` in the current state 678 | 679 | --- 680 | 681 | #### property State.tsc 682 | 683 | Get/Set the register `tsc` in the current state 684 | 685 | --- 686 | 687 | #### property State.tsc_aux 688 | 689 | Get/Set the register `tsc_aux` in the current state 690 | 691 | --- 692 | 693 | #### property State.xcr0 694 | 695 | Get/Set the register `xcr0` in the current state 696 | 697 | --- 698 | 699 | #### property State.zmm 700 | 701 | Get/Set the register `zmm` in the current state 702 | 703 | 704 | 705 | 706 | --- 707 | 708 | ## class `Zmm` 709 | 710 | 711 | 712 | 713 | 714 | --- 715 | 716 | #### property Zmm.q 717 | 718 | (self) -> list[int] 719 | 720 | 721 | 722 | 723 | 724 | 725 | --- 726 | 727 | _This file was automatically generated via [lazydocs](https://github.com/ml-tooling/lazydocs)._ 728 | -------------------------------------------------------------------------------- /bochscpu/bochscpu.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | static const uint32_t BX_INSTR_IS_JMP = 10; 11 | 12 | static const uint32_t BOCHSCPU_INSTR_IS_JMP_INDIRECT = 11; 13 | 14 | static const uint32_t BOCHSCPU_INSTR_IS_CALL = 12; 15 | 16 | static const uint32_t BOCHSCPU_INSTR_IS_CALL_INDIRECT = 13; 17 | 18 | static const uint32_t BOCHSCPU_INSTR_IS_RET = 14; 19 | 20 | static const uint32_t BOCHSCPU_INSTR_IS_IRET = 15; 21 | 22 | static const uint32_t BOCHSCPU_INSTR_IS_INT = 16; 23 | 24 | static const uint32_t BOCHSCPU_INSTR_IS_SYSCALL = 17; 25 | 26 | static const uint32_t BOCHSCPU_INSTR_IS_SYSRET = 18; 27 | 28 | static const uint32_t BOCHSCPU_INSTR_IS_SYSENTER = 19; 29 | 30 | static const uint32_t BOCHSCPU_INSTR_IS_SYSEXIT = 20; 31 | 32 | static const uint32_t BOCHSCPU_HOOK_MEM_READ = 0; 33 | 34 | static const uint32_t BOCHSCPU_HOOK_MEM_WRITE = 1; 35 | 36 | static const uint32_t BOCHSCPU_HOOK_MEM_EXECUTE = 2; 37 | 38 | static const uint32_t BOCHSCPU_HOOK_MEM_RW = 3; 39 | 40 | static const uint32_t BOCHSCPU_HOOK_TLB_CR0 = 10; 41 | 42 | static const uint32_t BOCHSCPU_HOOK_TLB_CR3 = 11; 43 | 44 | static const uint32_t BOCHSCPU_HOOK_TLB_CR4 = 12; 45 | 46 | static const uint32_t BOCHSCPU_HOOK_TLB_TASKSWITCH = 13; 47 | 48 | static const uint32_t BOCHSCPU_HOOK_TLB_CONTEXTSWITCH = 14; 49 | 50 | static const uint32_t BOCHSCPU_HOOK_TLB_INVLPG = 15; 51 | 52 | static const uint32_t BOCHSCPU_HOOK_TLB_INVEPT = 16; 53 | 54 | static const uint32_t BOCHSCPU_HOOK_TLB_INVVPID = 17; 55 | 56 | static const uint32_t BOCHSCPU_HOOK_TLB_INVPCID = 18; 57 | 58 | static const uint32_t BOCHSCPU_OPCODE_ERROR = 0; 59 | 60 | static const uint32_t BOCHSCPU_OPCODE_INSERTED = 1; 61 | 62 | using bochscpu_cpu_t = void*; 63 | using context_t = void; 64 | using Address = uint64_t; 65 | 66 | /// FFI Hook object 67 | /// 68 | /// Full desciptions of hook points can be found here: 69 | /// http://bochs.sourceforge.net/cgi-bin/lxr/source/instrument/instrumentation.txt 70 | /// 71 | /// If the hook value is NULL it will be treated as a no-op. The value of the 72 | /// ctx field will be passed as the first paramter to every hook and is fully 73 | /// controlled by the API author 74 | struct bochscpu_hooks_t 75 | { 76 | void* ctx {nullptr}; 77 | 78 | void (*reset)(void*, uint32_t, uint32_t) {}; 79 | void (*hlt)(void*, uint32_t) {}; 80 | void (*mwait)(void*, uint32_t, uint64_t, uintptr_t, uint32_t) {}; 81 | void (*cnear_branch_taken)(void*, uint32_t, uint64_t, uint64_t) {}; 82 | void (*cnear_branch_not_taken)(void*, uint32_t, uint64_t, uint64_t) {}; 83 | void (*ucnear_branch)(void*, uint32_t, uint32_t, uint64_t, uint64_t) {}; 84 | void (*far_branch)(void*, uint32_t, uint32_t, uint16_t, uint64_t, uint16_t, uint64_t) {}; 85 | void (*opcode)(void*, uint32_t, void*, uint8_t*, uintptr_t, bool, bool) {}; 86 | void (*interrupt)(void*, uint32_t, uint32_t) {}; 87 | void (*exception)(void*, uint32_t, uint32_t, uint32_t) {}; 88 | void (*hw_interrupt)(void*, uint32_t, uint32_t, uint16_t, uint64_t) {}; 89 | void (*tlb_cntrl)(void*, uint32_t, uint32_t, uint64_t) {}; 90 | void (*cache_cntrl)(void*, uint32_t, uint32_t) {}; 91 | void (*prefetch_hint)(void*, uint32_t, uint32_t, uint32_t, uint64_t) {}; 92 | void (*clflush)(void*, uint32_t, uint64_t, uint64_t) {}; 93 | void (*before_execution)(void*, uint32_t, void*) {}; 94 | void (*after_execution)(void*, uint32_t, void*) {}; 95 | void (*repeat_iteration)(void*, uint32_t, void*) {}; 96 | void (*inp)(void*, uint16_t, uintptr_t) {}; 97 | void (*inp2)(void*, uint16_t, uintptr_t, uint32_t) {}; 98 | void (*outp)(void*, uint16_t, uintptr_t, uint32_t) {}; 99 | void (*lin_access)(void*, uint32_t, uint64_t, uint64_t, uintptr_t, uint32_t, uint32_t) {}; 100 | void (*phy_access)(void*, uint32_t, uint64_t, uintptr_t, uint32_t, uint32_t) {}; 101 | void (*wrmsr)(void*, uint32_t, uint32_t, uint64_t) {}; 102 | void (*vmexit)(void*, uint32_t, uint32_t, uint64_t) {}; 103 | 104 | bochscpu_hooks_t() = default; 105 | }; 106 | 107 | 108 | struct Seg 109 | { 110 | bool present {}; 111 | uint16_t selector {}; 112 | Address base {}; 113 | uint32_t limit {}; 114 | uint16_t attr {}; 115 | 116 | Seg() = default; 117 | }; 118 | 119 | struct GlobalSeg 120 | { 121 | Address base {}; 122 | uint16_t limit {}; 123 | 124 | GlobalSeg() = default; 125 | }; 126 | 127 | struct Zmm 128 | { 129 | std::array q {}; 130 | 131 | Zmm() = default; 132 | }; 133 | 134 | struct Floatx80 135 | { 136 | uint64_t fraction {}; 137 | uint16_t exp {}; 138 | 139 | Floatx80() = default; 140 | }; 141 | 142 | struct State 143 | { 144 | uint64_t bochscpu_seed {}; 145 | uint64_t rax {}; 146 | uint64_t rcx {}; 147 | uint64_t rdx {}; 148 | uint64_t rbx {}; 149 | uint64_t rsp {}; 150 | uint64_t rbp {}; 151 | uint64_t rsi {}; 152 | uint64_t rdi {}; 153 | uint64_t r8 {}; 154 | uint64_t r9 {}; 155 | uint64_t r10 {}; 156 | uint64_t r11 {}; 157 | uint64_t r12 {}; 158 | uint64_t r13 {}; 159 | uint64_t r14 {}; 160 | uint64_t r15 {}; 161 | uint64_t rip {}; 162 | uint64_t rflags {}; 163 | Seg es {}; 164 | Seg cs {}; 165 | Seg ss {}; 166 | Seg ds {}; 167 | Seg fs {}; 168 | Seg gs {}; 169 | Seg ldtr {}; 170 | Seg tr {}; 171 | GlobalSeg gdtr {}; 172 | GlobalSeg idtr {}; 173 | uint32_t cr0 {}; 174 | uint64_t cr2 {}; 175 | uint64_t cr3 {}; 176 | uint32_t cr4 {}; 177 | uint64_t cr8 {}; 178 | uint64_t dr0 {}; 179 | uint64_t dr1 {}; 180 | uint64_t dr2 {}; 181 | uint64_t dr3 {}; 182 | uint32_t dr6 {}; 183 | uint32_t dr7 {}; 184 | uint32_t xcr0 {}; 185 | std::array zmm {}; 186 | uint16_t fpcw {}; 187 | uint16_t fpsw {}; 188 | uint16_t fptw {}; 189 | uint16_t fpop {}; 190 | std::array fpst {}; 191 | uint32_t mxcsr {}; 192 | uint32_t mxcsr_mask {}; 193 | uint64_t tsc {}; 194 | uint32_t efer {}; 195 | uint64_t kernel_gs_base {}; 196 | uint64_t apic_base {}; 197 | uint64_t pat {}; 198 | uint64_t sysenter_cs {}; 199 | uint64_t sysenter_eip {}; 200 | uint64_t sysenter_esp {}; 201 | uint64_t star {}; 202 | uint64_t lstar {}; 203 | uint64_t cstar {}; 204 | uint64_t sfmask {}; 205 | uint64_t tsc_aux {}; 206 | 207 | State() = default; 208 | }; 209 | 210 | using bochscpu_cpu_state_t = State; 211 | 212 | using bochscpu_cpu_seg_t = Seg; 213 | 214 | using bochscpu_cpu_global_seg_t = GlobalSeg; 215 | 216 | using bochscpu_cpu_zmm_t = Zmm; 217 | 218 | using bochscpu_cpu_floatx80_t = Floatx80; 219 | 220 | using bochscpu_instr_t = const void*; 221 | 222 | extern "C" 223 | { 224 | 225 | /// Create a new Cpu 226 | /// 227 | /// Create a new Cpu with the specified id. If SMP is not enabled, the id is 228 | /// ignored. 229 | bochscpu_cpu_t 230 | bochscpu_cpu_new(uint32_t id); 231 | 232 | /// Create a new Cpu 233 | /// 234 | /// Instantiate an already existing cpu with the specified id. 235 | bochscpu_cpu_t 236 | bochscpu_cpu_from(uint32_t id); 237 | 238 | void 239 | bochscpu_cpu_forget(bochscpu_cpu_t p); 240 | 241 | /// Delete a cpu 242 | void 243 | bochscpu_cpu_delete(bochscpu_cpu_t p); 244 | 245 | void 246 | bochscpu_cpu_set_mode(bochscpu_cpu_t p); 247 | 248 | /// Start emulation 249 | /// 250 | /// To hook emulation, pass in a NULL terminated list of one or more pointers to 251 | /// bochscpu_hooks_t structs. 252 | void 253 | bochscpu_cpu_run(bochscpu_cpu_t p, bochscpu_hooks_t** h); 254 | 255 | /// Stop emulation 256 | /// 257 | void 258 | bochscpu_cpu_stop(bochscpu_cpu_t p); 259 | 260 | void 261 | bochscpu_cpu_state(bochscpu_cpu_t p, bochscpu_cpu_state_t* s); 262 | 263 | void 264 | bochscpu_cpu_set_state(bochscpu_cpu_t p, const bochscpu_cpu_state_t* s); 265 | 266 | void 267 | bochscpu_cpu_set_state_no_flush(bochscpu_cpu_t p, const bochscpu_cpu_state_t* s); 268 | 269 | void 270 | bochscpu_cpu_set_exception(bochscpu_cpu_t p, uint32_t vector, uint16_t error); 271 | 272 | uint64_t 273 | bochscpu_cpu_rax(bochscpu_cpu_t p); 274 | 275 | void 276 | bochscpu_cpu_set_rax(bochscpu_cpu_t p, uint64_t val); 277 | 278 | uint64_t 279 | bochscpu_cpu_rcx(bochscpu_cpu_t p); 280 | 281 | void 282 | bochscpu_cpu_set_rcx(bochscpu_cpu_t p, uint64_t val); 283 | 284 | uint64_t 285 | bochscpu_cpu_rdx(bochscpu_cpu_t p); 286 | 287 | void 288 | bochscpu_cpu_set_rdx(bochscpu_cpu_t p, uint64_t val); 289 | 290 | uint64_t 291 | bochscpu_cpu_rbx(bochscpu_cpu_t p); 292 | 293 | void 294 | bochscpu_cpu_set_rbx(bochscpu_cpu_t p, uint64_t val); 295 | 296 | uint64_t 297 | bochscpu_cpu_rsp(bochscpu_cpu_t p); 298 | 299 | void 300 | bochscpu_cpu_set_rsp(bochscpu_cpu_t p, uint64_t val); 301 | 302 | uint64_t 303 | bochscpu_cpu_rbp(bochscpu_cpu_t p); 304 | 305 | void 306 | bochscpu_cpu_set_rbp(bochscpu_cpu_t p, uint64_t val); 307 | 308 | uint64_t 309 | bochscpu_cpu_rsi(bochscpu_cpu_t p); 310 | 311 | void 312 | bochscpu_cpu_set_rsi(bochscpu_cpu_t p, uint64_t val); 313 | 314 | uint64_t 315 | bochscpu_cpu_rdi(bochscpu_cpu_t p); 316 | 317 | void 318 | bochscpu_cpu_set_rdi(bochscpu_cpu_t p, uint64_t val); 319 | 320 | uint64_t 321 | bochscpu_cpu_r8(bochscpu_cpu_t p); 322 | 323 | void 324 | bochscpu_cpu_set_r8(bochscpu_cpu_t p, uint64_t val); 325 | 326 | uint64_t 327 | bochscpu_cpu_r9(bochscpu_cpu_t p); 328 | 329 | void 330 | bochscpu_cpu_set_r9(bochscpu_cpu_t p, uint64_t val); 331 | 332 | uint64_t 333 | bochscpu_cpu_r10(bochscpu_cpu_t p); 334 | 335 | void 336 | bochscpu_cpu_set_r10(bochscpu_cpu_t p, uint64_t val); 337 | 338 | uint64_t 339 | bochscpu_cpu_r11(bochscpu_cpu_t p); 340 | 341 | void 342 | bochscpu_cpu_set_r11(bochscpu_cpu_t p, uint64_t val); 343 | 344 | uint64_t 345 | bochscpu_cpu_r12(bochscpu_cpu_t p); 346 | 347 | void 348 | bochscpu_cpu_set_r12(bochscpu_cpu_t p, uint64_t val); 349 | 350 | uint64_t 351 | bochscpu_cpu_r13(bochscpu_cpu_t p); 352 | 353 | void 354 | bochscpu_cpu_set_r13(bochscpu_cpu_t p, uint64_t val); 355 | 356 | uint64_t 357 | bochscpu_cpu_r14(bochscpu_cpu_t p); 358 | 359 | void 360 | bochscpu_cpu_set_r14(bochscpu_cpu_t p, uint64_t val); 361 | 362 | uint64_t 363 | bochscpu_cpu_r15(bochscpu_cpu_t p); 364 | 365 | void 366 | bochscpu_cpu_set_r15(bochscpu_cpu_t p, uint64_t val); 367 | 368 | uint64_t 369 | bochscpu_cpu_rip(bochscpu_cpu_t p); 370 | 371 | void 372 | bochscpu_cpu_set_rip(bochscpu_cpu_t p, uint64_t val); 373 | 374 | uint64_t 375 | bochscpu_cpu_rflags(bochscpu_cpu_t p); 376 | 377 | void 378 | bochscpu_cpu_set_rflags(bochscpu_cpu_t p, uint64_t val); 379 | 380 | void 381 | bochscpu_cpu_es(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 382 | 383 | void 384 | bochscpu_cpu_set_es(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 385 | 386 | void 387 | bochscpu_cpu_cs(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 388 | 389 | void 390 | bochscpu_cpu_set_cs(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 391 | 392 | void 393 | bochscpu_cpu_ss(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 394 | 395 | void 396 | bochscpu_cpu_set_ss(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 397 | 398 | void 399 | bochscpu_cpu_ds(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 400 | 401 | void 402 | bochscpu_cpu_set_ds(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 403 | 404 | void 405 | bochscpu_cpu_fs(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 406 | 407 | void 408 | bochscpu_cpu_set_fs(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 409 | 410 | void 411 | bochscpu_cpu_gs(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 412 | 413 | void 414 | bochscpu_cpu_set_gs(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 415 | 416 | void 417 | bochscpu_cpu_ldtr(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 418 | 419 | void 420 | bochscpu_cpu_set_ldtr(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 421 | 422 | void 423 | bochscpu_cpu_tr(bochscpu_cpu_t p, bochscpu_cpu_seg_t* s); 424 | 425 | void 426 | bochscpu_cpu_set_tr(bochscpu_cpu_t p, const bochscpu_cpu_seg_t* s); 427 | 428 | void 429 | bochscpu_cpu_gdtr(bochscpu_cpu_t p, bochscpu_cpu_global_seg_t* s); 430 | 431 | void 432 | bochscpu_cpu_set_gdtr(bochscpu_cpu_t p, const bochscpu_cpu_global_seg_t* s); 433 | 434 | void 435 | bochscpu_cpu_idtr(bochscpu_cpu_t p, bochscpu_cpu_global_seg_t* s); 436 | 437 | void 438 | bochscpu_cpu_set_idtr(bochscpu_cpu_t p, const bochscpu_cpu_global_seg_t* s); 439 | 440 | uint64_t 441 | bochscpu_cpu_cr2(bochscpu_cpu_t p); 442 | 443 | void 444 | bochscpu_cpu_set_cr2(bochscpu_cpu_t p, uint64_t val); 445 | 446 | uint64_t 447 | bochscpu_cpu_cr3(bochscpu_cpu_t p); 448 | 449 | void 450 | bochscpu_cpu_set_cr3(bochscpu_cpu_t p, uint64_t val); 451 | 452 | void 453 | bochscpu_cpu_zmm(bochscpu_cpu_t p, uintptr_t idx, bochscpu_cpu_zmm_t* z); 454 | 455 | void 456 | bochscpu_cpu_set_zmm(bochscpu_cpu_t p, uintptr_t idx, const bochscpu_cpu_zmm_t* z); 457 | 458 | uint32_t 459 | bochscpu_instr_bx_opcode(bochscpu_instr_t p); 460 | 461 | uint16_t 462 | bochscpu_instr_imm16(bochscpu_instr_t p); 463 | 464 | uint32_t 465 | bochscpu_instr_imm32(bochscpu_instr_t p); 466 | 467 | uint64_t 468 | bochscpu_instr_imm64(bochscpu_instr_t p); 469 | 470 | /// Add GPA mapping to HVA 471 | /// 472 | /// If the GPA was already mapped, this replaces the existing mapping 473 | /// 474 | /// # Panics 475 | /// 476 | /// Panics if the added page is not page aligned. 477 | void 478 | bochscpu_mem_page_insert(uint64_t gpa, uint8_t* hva); 479 | 480 | /// Delete GPA mapping 481 | /// 482 | /// If the GPA is not valid, this is a no-op. 483 | void 484 | bochscpu_mem_page_remove(uint64_t gpa); 485 | 486 | /// Install a physical page fault handler 487 | /// 488 | /// This function will be called any time a request is made to physical memory 489 | /// and the GPA is not present. This function should add a page using 490 | /// `bochscpu_mem_page_insert()` 491 | /// 492 | /// The paramter should have the type `void handler(gpa_t)` 493 | /// 494 | /// This allows you to lazily page in your backing physical memory. 495 | /// 496 | /// # Note 497 | /// 498 | /// This is a global singleton, and installing a new physical page fault 499 | /// handler will overwrite the existing handler. 500 | void 501 | bochscpu_mem_missing_page(void (*handler)(uint64_t gpa)); 502 | 503 | /// Translate GPA to HVA 504 | /// 505 | /// # Panics 506 | /// 507 | /// If the GPA does not exit, it will call the missing page handler. If no 508 | /// missing page handler is set or the missing page handler does not add the 509 | /// appropriate page, this will panic. 510 | /// 511 | /// # Example 512 | uint8_t* 513 | bochscpu_mem_phy_translate(uint64_t gpa); 514 | 515 | /// Translate GVA to GPA 516 | /// 517 | /// Use the provided cr3 to translate the GVA into a GPA. 518 | /// 519 | /// # Returns 520 | /// 521 | /// Translated gpa on success, -1 on failure 522 | uint64_t 523 | bochscpu_mem_virt_translate(uint64_t cr3, uint64_t gva); 524 | 525 | /// Read from GPA 526 | /// 527 | /// # Panics 528 | /// 529 | /// If the GPA does not exist, it will call the missing page function. If 530 | /// that function does not exist or does not resolve the fault, this routine 531 | /// will panic 532 | void 533 | bochscpu_mem_phy_read(uint64_t gpa, uint8_t* hva, uintptr_t sz); 534 | 535 | /// Write to GPA 536 | /// 537 | /// # Panics 538 | /// 539 | /// If the GPA does not exist, it will call the missing page function. If 540 | /// that function does not exist or does not resolve the fault, this routine 541 | /// will panic 542 | void 543 | bochscpu_mem_phy_write(uint64_t gpa, const uint8_t* hva, uintptr_t sz); 544 | 545 | /// Write to GVA 546 | /// 547 | /// Write to GVA, using specified cr3 to translate. 548 | /// 549 | /// # Returns 550 | /// 551 | /// Zero on success, non-zero on failure 552 | int32_t 553 | bochscpu_mem_virt_write(uint64_t cr3, uint64_t gva, const uint8_t* hva, uintptr_t sz); 554 | 555 | /// Read from GVA 556 | /// 557 | /// Read from GVA, using specified cr3 to translate. 558 | /// 559 | /// # Returns 560 | /// 561 | /// Zero on success, non-zero on failure 562 | int32_t 563 | bochscpu_mem_virt_read(uint64_t cr3, uint64_t gva, uint8_t* hva, uintptr_t sz); 564 | 565 | void 566 | bochscpu_log_set_level(uintptr_t level); 567 | 568 | 569 | } // extern "C" 570 | -------------------------------------------------------------------------------- /python/inc/bochscpu.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #if defined(_WIN32) 10 | #include 11 | #elif defined(__LINUX__) || defined(__linux__) 12 | #include 13 | #elif defined(__APPLE__) || defined(__MACH__) 14 | #include 15 | #else 16 | #error Not supported 17 | #endif // _WIN32 18 | 19 | #include "bochscpu/bochscpu.hpp" 20 | 21 | // #define DEBUG 22 | 23 | #ifdef DEBUG 24 | #define dbg(fmt, ...) ::printf("[*] %s:%d - " fmt "\n", __FUNCTION__, __LINE__, __VA_ARGS__) 25 | #define info(fmt, ...) ::printf("[+] %s:%d - " fmt "\n", __FUNCTION__, __LINE__, __VA_ARGS__) 26 | #define warn(fmt, ...) ::printf("[!] %s:%d - " fmt "\n", __FUNCTION__, __LINE__, __VA_ARGS__) 27 | #define err(fmt, ...) ::printf("[-] %s:%d - " fmt "\n", __FUNCTION__, __LINE__, __VA_ARGS__) 28 | #else 29 | #define dbg(fmt, ...) 30 | #if defined(_WIN32) 31 | #define info(fmt, ...) ::printf("[+] " fmt "\n", __VA_ARGS__) 32 | #define warn(fmt, ...) ::printf("[!] " fmt "\n", __VA_ARGS__) 33 | #define err(fmt, ...) ::printf("[-] " fmt "\n", __VA_ARGS__) 34 | #elif defined(__LINUX__) || defined(__linux__) || defined(__APPLE__) || defined(__MACH__) 35 | #define info(fmt, ...) ::printf("[+] " fmt "\n" __VA_OPT__(, ) __VA_ARGS__) 36 | #define warn(fmt, ...) ::printf("[!] " fmt "\n" __VA_OPT__(, ) __VA_ARGS__) 37 | #define err(fmt, ...) ::printf("[-] " fmt "\n" __VA_OPT__(, ) __VA_ARGS__) 38 | #endif 39 | #endif // DEBUG 40 | 41 | // 42 | // Some missing defines 43 | // 44 | 45 | // For bx_instr_tlb_cntrl 46 | #define BX_INSTR_MOV_CR0 10 47 | #define BX_INSTR_MOV_CR3 11 48 | #define BX_INSTR_MOV_CR4 12 49 | #define BX_INSTR_TASK_SWITCH 13 50 | #define BX_INSTR_CONTEXT_SWITCH 14 /* VMM and SMM enter/exit */ 51 | #define BX_INSTR_INVLPG 15 52 | #define BX_INSTR_INVEPT 16 53 | #define BX_INSTR_INVVPID 17 54 | #define BX_INSTR_INVPCID 18 55 | 56 | // For bx_instr_cache_cntrl 57 | #define BX_INSTR_INVD 10 58 | #define BX_INSTR_WBINVD 11 59 | 60 | // For bx_instr_prefetch_hint (what) 61 | #define BX_INSTR_PREFETCH_NTA 00 62 | #define BX_INSTR_PREFETCH_T0 01 63 | #define BX_INSTR_PREFETCH_T1 02 64 | #define BX_INSTR_PREFETCH_T2 03 65 | 66 | 67 | namespace BochsCPU 68 | { 69 | /// 70 | /// @brief Src https://github.com/bochs-emu/Bochs/blob/86eff7597d72af912d708a10c0a2000d0b9973c2/bochs/cpu/cpu.h#L312 71 | /// 72 | /// 73 | enum class BochsException : uint32_t 74 | { 75 | BX_DE_EXCEPTION = 0, // Divide Error (fault) 76 | BX_DB_EXCEPTION = 1, // Debug (fault/trap) 77 | BX_BP_EXCEPTION = 3, // Breakpoint (trap) 78 | BX_OF_EXCEPTION = 4, // Overflow (trap) 79 | BX_BR_EXCEPTION = 5, // BOUND (fault) 80 | BX_UD_EXCEPTION = 6, 81 | BX_NM_EXCEPTION = 7, 82 | BX_DF_EXCEPTION = 8, 83 | BX_TS_EXCEPTION = 10, 84 | BX_NP_EXCEPTION = 11, 85 | BX_SS_EXCEPTION = 12, 86 | BX_GP_EXCEPTION = 13, 87 | BX_PF_EXCEPTION = 14, 88 | BX_MF_EXCEPTION = 16, 89 | BX_AC_EXCEPTION = 17, 90 | BX_MC_EXCEPTION = 18, 91 | BX_XM_EXCEPTION = 19, 92 | BX_VE_EXCEPTION = 20, 93 | BX_CP_EXCEPTION = 21 // Control Protection (fault) 94 | }; 95 | 96 | enum class InstructionType : uint32_t 97 | { 98 | BX_INSTR_IS_JMP = BX_INSTR_IS_JMP, 99 | BOCHSCPU_INSTR_IS_JMP_INDIRECT = BOCHSCPU_INSTR_IS_JMP_INDIRECT, 100 | BOCHSCPU_INSTR_IS_CALL = BOCHSCPU_INSTR_IS_CALL, 101 | BOCHSCPU_INSTR_IS_CALL_INDIRECT = BOCHSCPU_INSTR_IS_CALL_INDIRECT, 102 | BOCHSCPU_INSTR_IS_RET = BOCHSCPU_INSTR_IS_RET, 103 | BOCHSCPU_INSTR_IS_IRET = BOCHSCPU_INSTR_IS_IRET, 104 | BOCHSCPU_INSTR_IS_INT = BOCHSCPU_INSTR_IS_INT, 105 | BOCHSCPU_INSTR_IS_SYSCALL = BOCHSCPU_INSTR_IS_SYSCALL, 106 | BOCHSCPU_INSTR_IS_SYSRET = BOCHSCPU_INSTR_IS_SYSRET, 107 | BOCHSCPU_INSTR_IS_SYSENTER = BOCHSCPU_INSTR_IS_SYSENTER, 108 | BOCHSCPU_INSTR_IS_SYSEXIT = BOCHSCPU_INSTR_IS_SYSEXIT, 109 | }; 110 | 111 | enum class HookType : uint32_t 112 | { 113 | BOCHSCPU_HOOK_MEM_READ = BOCHSCPU_HOOK_MEM_READ, 114 | BOCHSCPU_HOOK_MEM_WRITE = BOCHSCPU_HOOK_MEM_WRITE, 115 | BOCHSCPU_HOOK_MEM_EXECUTE = BOCHSCPU_HOOK_MEM_EXECUTE, 116 | BOCHSCPU_HOOK_MEM_RW = BOCHSCPU_HOOK_MEM_RW, 117 | BOCHSCPU_HOOK_TLB_CR0 = BOCHSCPU_HOOK_TLB_CR0, 118 | BOCHSCPU_HOOK_TLB_CR3 = BOCHSCPU_HOOK_TLB_CR3, 119 | BOCHSCPU_HOOK_TLB_CR4 = BOCHSCPU_HOOK_TLB_CR4, 120 | BOCHSCPU_HOOK_TLB_TASKSWITCH = BOCHSCPU_HOOK_TLB_TASKSWITCH, 121 | BOCHSCPU_HOOK_TLB_CONTEXTSWITCH = BOCHSCPU_HOOK_TLB_CONTEXTSWITCH, 122 | BOCHSCPU_HOOK_TLB_INVLPG = BOCHSCPU_HOOK_TLB_INVLPG, 123 | BOCHSCPU_HOOK_TLB_INVEPT = BOCHSCPU_HOOK_TLB_INVEPT, 124 | BOCHSCPU_HOOK_TLB_INVVPID = BOCHSCPU_HOOK_TLB_INVVPID, 125 | BOCHSCPU_HOOK_TLB_INVPCID = BOCHSCPU_HOOK_TLB_INVPCID, 126 | }; 127 | 128 | enum class OpcodeOperationType 129 | { 130 | BOCHSCPU_OPCODE_ERROR = BOCHSCPU_OPCODE_ERROR, 131 | BOCHSCPU_OPCODE_INSERTED = BOCHSCPU_OPCODE_INSERTED, 132 | }; 133 | 134 | /// 135 | /// @brief https://github.com/lubomyr/bochs/blob/8e0b9abcd81cd24d4d9c68f7fdef2f53bc180d33/cpu/cpu.h#L336 136 | /// 137 | /// 138 | enum class BochsCpuMode : uint32_t 139 | { 140 | BX_MODE_IA32_REAL = 0, // CR0.PE=0 | 141 | BX_MODE_IA32_V8086 = 1, // CR0.PE=1, EFLAGS.VM=1 | EFER.LMA=0 142 | BX_MODE_IA32_PROTECTED = 2, // CR0.PE=1, EFLAGS.VM=0 | 143 | BX_MODE_LONG_COMPAT = 3, // EFER.LMA = 1, CR0.PE=1, CS.L=0 144 | BX_MODE_LONG_64 = 4 // EFER.LMA = 1, CR0.PE=1, CS.L=1 145 | }; 146 | 147 | // For bx_instr_tlb_cntrl 148 | enum class TlbControlType 149 | { 150 | INSTR_MOV_CR0 = BX_INSTR_MOV_CR0, 151 | INSTR_MOV_CR3 = BX_INSTR_MOV_CR3, 152 | INSTR_MOV_CR4 = BX_INSTR_MOV_CR4, 153 | INSTR_TASK_SWITCH = BX_INSTR_TASK_SWITCH, 154 | INSTR_CONTEXT_SWITCH = BX_INSTR_CONTEXT_SWITCH, 155 | INSTR_INVLPG = BX_INSTR_INVLPG, 156 | INSTR_INVEPT = BX_INSTR_INVEPT, 157 | INSTR_INVVPID = BX_INSTR_INVVPID, 158 | INSTR_INVPCID = BX_INSTR_INVPCID, 159 | }; 160 | 161 | // For bx_instr_cache_cntrl 162 | enum class CacheControlType 163 | { 164 | INSTR_INVD = BX_INSTR_INVD, 165 | INSTR_WBINVD = BX_INSTR_WBINVD, 166 | }; 167 | 168 | // For bx_instr_prefetch_hint (what) 169 | enum class PrefetchType 170 | { 171 | INSTR_PREFETCH_NTA = BX_INSTR_PREFETCH_NTA, 172 | INSTR_PREFETCH_T0 = BX_INSTR_PREFETCH_T0, 173 | INSTR_PREFETCH_T1 = BX_INSTR_PREFETCH_T1, 174 | INSTR_PREFETCH_T2 = BX_INSTR_PREFETCH_T2, 175 | }; 176 | 177 | namespace Callbacks 178 | { 179 | namespace Memory 180 | { 181 | void 182 | missing_page_cb(uint64_t gpa); 183 | } // namespace Memory 184 | 185 | void 186 | before_execution_cb(context_t* ctx, uint32_t cpu_id, void* insn); 187 | 188 | void 189 | after_execution_cb(context_t* ctx, uint32_t cpu_id, void* insn); 190 | 191 | void 192 | reset_cb(context_t* ctx, uint32_t cpu_id, unsigned int type); 193 | 194 | void 195 | hlt_cb(context_t* ctx, uint32_t cpu_id); 196 | 197 | void 198 | mwait_cb(context_t* ctx, uint32_t cpu_id, uint64_t addr, uintptr_t len, uint32_t flags); 199 | 200 | void 201 | cnear_branch_taken_cb(context_t* ctx, uint32_t cpu_id, uint64_t branch_eip, uint64_t new_branch_eip); 202 | 203 | void 204 | cnear_branch_not_taken_cb(context_t* ctx, uint32_t cpu_id, uint64_t branch_eip, uint64_t new_branch_eip); 205 | 206 | void 207 | ucnear_branch_cb(context_t* ctx, uint32_t cpu_id, unsigned what, uint64_t branch_eip, uint64_t new_eip); 208 | 209 | void 210 | far_branch_cb(context_t* ctx, uint32_t cpu_id, uint32_t, uint16_t, uint64_t, uint16_t, uint64_t); 211 | 212 | void 213 | vmexit_cb(context_t* ctx, uint32_t cpu_id, uint32_t, uint64_t); 214 | 215 | void 216 | interrupt_cb(context_t* ctx, uint32_t cpu_id, unsigned); 217 | 218 | void 219 | hw_interrupt_cb(context_t* ctx, uint32_t cpu_id, unsigned, uint16_t, uint64_t); 220 | 221 | void 222 | clflush_cb(context_t* ctx, uint32_t cpu_id, uint64_t, uint64_t); 223 | 224 | void 225 | tlb_cntrl_cb(context_t* ctx, uint32_t cpu_id, unsigned, uint64_t); 226 | 227 | void 228 | cache_cntrl_cb(context_t* ctx, uint32_t cpu_id, unsigned); 229 | 230 | void 231 | prefetch_hint_cb(context_t* ctx, uint32_t cpu_id, unsigned, unsigned, uint64_t); 232 | 233 | void 234 | wrmsr_cb(context_t* ctx, uint32_t cpu_id, unsigned, uint64_t); 235 | 236 | void 237 | repeat_iteration_cb(context_t* ctx, uint32_t cpu_id, void*); 238 | 239 | void 240 | lin_access_cb(context_t* ctx, uint32_t cpu_id, uint64_t, uint64_t, uintptr_t, uint32_t, uint32_t); 241 | 242 | void 243 | phy_access_cb(context_t* ctx, uint32_t cpu_id, uint64_t, uintptr_t, uint32_t, uint32_t); 244 | 245 | void 246 | inp_cb(context_t* ctx, uint16_t cpu_id, uintptr_t); 247 | 248 | void 249 | inp2_cb(context_t* ctx, uint16_t cpu_id, uintptr_t, unsigned); 250 | 251 | void 252 | outp_cb(context_t* ctx, uint16_t cpu_id, uintptr_t, unsigned); 253 | 254 | void 255 | opcode_cb(context_t* ctx, uint32_t cpu_id, void*, uint8_t*, uintptr_t, bool, bool); 256 | 257 | void 258 | exception_cb(context_t* ctx, uint32_t cpu_id, unsigned vector, unsigned error_code); 259 | 260 | } // namespace Callbacks 261 | 262 | 263 | namespace Cpu 264 | { 265 | 266 | enum class SegmentRegisterFlag : uint16_t 267 | { 268 | RPL0 = 0, // Low-bit for Requested Privilege Level 269 | RPL1 = 1, // High-bit for Requested Privilege Level 270 | }; 271 | 272 | 273 | enum class SegmentFlag : uint16_t 274 | { 275 | /// CS - AMD Manual Vol2 - 4.7.2 276 | /// DS - AMD Manual Vol2 - 4.7.3 277 | A = 0, // Accessed bit 278 | R = 1, // Readable bit - CS only 279 | W = 1, // Writable bit - DS/ES/FS/SS only 280 | C = 2, // Conforming bit 281 | D = 2, // Expend-down (Data) 282 | E = 3, // Executable bit (Code - 1) 283 | S = 4, // SegmentType bit - CS/SS only (1) 284 | DPL0 = 5, // Low-bit for Descriptor Privilege Level 285 | DPL1 = 6, // High-bit for Descriptor Privilege Level 286 | P = 7, // Present bit 287 | AVL = 12, // Available bit 288 | L = 13, // Long bit - CS only 289 | DB = 14, // (32b) Default-Operand Size (D) Bit - CS only (1) 290 | G = 15, // Granularity (G) Bit - CS only 291 | }; 292 | 293 | 294 | enum class ControlRegisterFlag : uint64_t 295 | { 296 | /// CR0 - AMD Manual Vol2 - 3.1.1 297 | PG = 31, // Paging R/W 298 | CD = 30, // Cache Disable R/W 299 | NW = 29, // Not Writethrough R/W 300 | AM = 18, // Alignment Mask R/W 301 | WP = 16, // Write Protect R/W 302 | NE = 5, // Numeric Error R/W 303 | ET = 4, // Extension Type R 304 | TS = 3, // Task Switched R/W 305 | EM = 2, // Emulation R/W 306 | MP = 1, // Monitor Coprocessor R/W 307 | PE = 0, // Protection Enabled R/W 308 | 309 | 310 | /// CR4 - AMD Manual Vol2 - 3.7.1 311 | OSXSAVE = 18, // XSAVE and Processor Extended States Enable Bit R/W 312 | FSGSBASE = 16, // Enable RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions R/W 313 | OSXMMEXCPT = 10, // Operating System Unmasked Exception Support R/W 314 | OSFXSR = 9, // Operating System FXSAVE/FXRSTOR Support R/W 315 | PCE = 8, // Performance-Monitoring Counter Enable R/W 316 | PGE = 7, // Page-Global Enable R/W 317 | MCE = 6, // Machine Check Enable R/W 318 | PAE = 5, // Physical-Address Extension R/W 319 | PSE = 4, // Page Size Extensions R/W 320 | DE = 3, // Debugging Extensions R/W 321 | TSD = 2, // Time Stamp Disable R/W 322 | PVI = 1, // Protected-Mode Virtual Interrupts R/W 323 | VME = 0, // Virtual-8086 Mode Extensions R/W 324 | 325 | 326 | /// XCR0 - AMD Manual Vol2 - 3.7.1 327 | X = 63, // Reserved specifically for XCR0 bit vector expansion. 328 | LWP = 62, // When set, Lightweight Profiling (LWP) extensions are enabled and XSAVE/XRSTOR supports LWP state 329 | // management. 330 | YMM = 2, // When set, 256-bit SSE state management is supported by XSAVE/XRSTOR. Must be set to enable AVX 331 | // extensions 332 | SSE = 1, // When set, 128-bit SSE state management is supported by XSAVE/XRSTOR. This bit must be set if YMM is set. 333 | // Must be set to enable AVX extensions. 334 | x87 = 0, // x87 FPU state management is supported by XSAVE/XRSTOR. Must be set to 1. 335 | }; 336 | 337 | enum class FlagRegisterFlag : uint64_t 338 | { 339 | // RFLAGS - AMD Manual Vol2 - 3.8 340 | ID = 21, // ID Flag R/W 341 | VIP = 20, // Virtual Interrupt Pending R/W 342 | VIF = 19, // Virtual Interrupt Flag R/W 343 | AC = 18, // Alignment Check R/W 344 | VM = 17, // Virtual-8086 Mode R/W 345 | RF = 16, // Resume Flag R/W 346 | Reserved4 = 15, // Read as Zero 347 | NT = 14, // Nested Task R/W 348 | IOPL2 = 13, // IOPL I/O Privilege Level R/W 349 | IOPL1 = 12, // IOPL I/O Privilege Level R/W 350 | OF = 11, // Overflow Flag R/W 351 | DF = 10, // Direction Flag R/W 352 | IF = 9, // Interrupt Flag R/W 353 | TF = 8, // Trap Flag R/W 354 | SF = 7, // Sign Flag R/W 355 | ZF = 6, // Zero Flag R/W 356 | Reserved3 = 5, // Read as Zero 357 | AF = 4, // Auxiliary Flag R/W 358 | Reserved2 = 3, // Read as Zero 359 | PF = 2, // Parity Flag R/W 360 | Reserved1 = 1, // Read as One 361 | CF = 0, // Carry Flag R/W 362 | }; 363 | 364 | enum class FeatureRegisterFlag : uint64_t 365 | { 366 | TCE = 15, // Translation Cache Extension R/W 367 | FFXSR = 14, // Fast FXSAVE/FXRSTOR R/W 368 | LMSLE = 13, // Long Mode Segment Limit Enable R/W 369 | SVME = 12, // Secure Virtual Machine Enable R/W 370 | NXE = 11, // No-Execute Enable R/W 371 | LMA = 10, // Long Mode Active R/W 372 | LME = 8, // Long Mode Enable R/W 373 | SCE = 0, // System Call Extensions R/W 374 | }; 375 | 376 | struct ControlRegister : std::bitset<64> 377 | { 378 | }; 379 | 380 | struct FlagRegister : std::bitset<64> 381 | { 382 | }; 383 | 384 | struct FeatureRegister : std::bitset<64> 385 | { 386 | }; 387 | 388 | struct SegmentRegisterFlags : std::bitset<2> 389 | { 390 | }; 391 | 392 | struct SegmentFlags : std::bitset<16> 393 | { 394 | }; 395 | 396 | 397 | static uint32_t g_sessionId = 0; 398 | 399 | struct CPU 400 | { 401 | CPU() 402 | { 403 | this->id = g_sessionId++; 404 | // this->__cpu = ::bochscpu_cpu_new(this->id); 405 | this->__cpu = ::bochscpu_cpu_new(0); 406 | if ( !this->__cpu ) 407 | throw std::runtime_error("Invalid CPU ID"); 408 | dbg("Created CPU#%lu at %#x", this->id, this->__cpu); 409 | } 410 | 411 | ~CPU() 412 | { 413 | dbg("Destroying CPU#%lu at %#x", this->id, this->__cpu); 414 | ::bochscpu_cpu_delete(this->__cpu); 415 | this->__cpu = nullptr; 416 | } 417 | 418 | uint32_t id {0}; 419 | bochscpu_cpu_t __cpu {nullptr}; 420 | }; 421 | } // namespace Cpu 422 | 423 | 424 | namespace Memory 425 | { 426 | 427 | enum class Access : uint32_t 428 | { 429 | Read = (uint32_t)BochsCPU::HookType::BOCHSCPU_HOOK_MEM_READ, 430 | Write = (uint32_t)BochsCPU::HookType::BOCHSCPU_HOOK_MEM_WRITE, 431 | Execute = (uint32_t)BochsCPU::HookType::BOCHSCPU_HOOK_MEM_EXECUTE, 432 | }; 433 | 434 | uintptr_t 435 | PageSize(); 436 | 437 | uint64_t 438 | AlignAddressToPage(uint64_t va); 439 | 440 | uint64_t 441 | AllocatePage(); 442 | 443 | bool 444 | FreePage(uint64_t addr); 445 | 446 | 447 | // 448 | // @ref AMD Programmer's Manual Volume 2, Figure 5.17 449 | // 450 | 451 | struct PageTableEntry 452 | { 453 | enum class Flag 454 | { 455 | Present = 0, 456 | Writable = 1, 457 | User = 2, 458 | WriteThrough = 3, 459 | CacheDisabled = 4, 460 | Accessed = 5, 461 | Dirty = 6, 462 | AttributeTable = 7, 463 | Global = 8, 464 | NX = 63, 465 | }; 466 | 467 | uint64_t Address {}; 468 | std::bitset<64> Flags {}; 469 | 470 | PageTableEntry() = default; 471 | }; 472 | 473 | struct PageTable 474 | { 475 | enum class Flag 476 | { 477 | Present = 0, 478 | Writable = 1, 479 | User = 2, 480 | WriteThrough = 3, 481 | CacheDisabled = 4, 482 | Accessed = 5, 483 | Size = 7, 484 | NX = 63, 485 | }; 486 | 487 | std::array, 512> Entries {}; 488 | std::bitset<64> Flags {}; 489 | 490 | PageTable() = default; 491 | }; 492 | 493 | struct PageDirectory 494 | { 495 | enum class Flag 496 | { 497 | Present = 0, 498 | Writable = 1, 499 | User = 2, 500 | WriteThrough = 3, 501 | CacheDisabled = 4, 502 | Accessed = 5, 503 | Size = 7, 504 | NX = 63, 505 | }; 506 | 507 | std::array, 512> Entries {}; 508 | std::bitset<64> Flags {}; 509 | 510 | PageDirectory() = default; 511 | }; 512 | 513 | struct PageDirectoryPointerTable 514 | { 515 | enum class Flag : int 516 | { 517 | Present = 0, 518 | Writable = 1, 519 | User = 2, 520 | WriteThrough = 3, 521 | CacheDisabled = 4, 522 | Accessed = 5, 523 | Size = 7, 524 | NX = 63, 525 | }; 526 | 527 | std::array, 512> Entries {}; 528 | std::bitset<64> Flags {}; 529 | 530 | PageDirectoryPointerTable() = default; 531 | }; 532 | 533 | 534 | class PageMapLevel4Table 535 | { 536 | public: 537 | enum class Flag : int 538 | { 539 | Present = 0, 540 | Writable = 1, 541 | User = 2, 542 | WriteThrough = 3, 543 | CacheDisabled = 4, 544 | Accessed = 5, 545 | Size = 7, 546 | NX = 63, 547 | }; 548 | 549 | PageMapLevel4Table() = default; 550 | 551 | ~PageMapLevel4Table(); 552 | 553 | std::optional 554 | Translate(uint64_t va); 555 | 556 | void 557 | Insert(uint64_t va, uint64_t pa, int type); 558 | 559 | 560 | std::vector> 561 | Commit(uint64_t BasePA); 562 | 563 | void 564 | Decommit(); 565 | 566 | public: // members 567 | std::array, 512> Entries {}; 568 | std::bitset<64> Flags {}; 569 | 570 | 571 | private: 572 | uint64_t 573 | PageMapLevel4Index(uint64_t va); 574 | 575 | uint64_t 576 | PageDirectoryPointerTableIndex(uint64_t va); 577 | 578 | uint64_t 579 | PageDirectoryIndex(uint64_t va); 580 | 581 | uint64_t 582 | PageTableIndex(uint64_t va); 583 | 584 | uint64_t 585 | PageOffset(uint64_t va); 586 | 587 | private: 588 | std::vector m_AllocatedPages {}; 589 | }; 590 | 591 | static inline std::unique_ptr> missing_page_handler; 592 | 593 | static void 594 | missing_page_cb(uint64_t gpa) 595 | { 596 | if ( BochsCPU::Memory::missing_page_handler ) 597 | { 598 | dbg("Missing GPA=%#llx", gpa); 599 | (*missing_page_handler)(gpa); 600 | } 601 | else 602 | { 603 | err("Missing GPA=%#llx - no handler defined", gpa); 604 | } 605 | } 606 | } // namespace Memory 607 | 608 | 609 | struct Session 610 | { 611 | Session() : cpu {}, auxiliaries {} 612 | { 613 | ::bochscpu_mem_missing_page(BochsCPU::Memory::missing_page_cb); 614 | BochsCPU::Memory::missing_page_handler = 615 | std::unique_ptr>(&this->missing_page_handler); 616 | } 617 | 618 | ~Session() 619 | { 620 | BochsCPU::Memory::missing_page_handler.release(); 621 | } 622 | 623 | const static inline size_t MaxAuxiliaryVariables = 16; 624 | std::function missing_page_handler; 625 | BochsCPU::Cpu::CPU cpu; 626 | std::array auxiliaries; 627 | }; 628 | 629 | 630 | struct Hook 631 | { 632 | void* ctx {nullptr}; 633 | std::function before_execution; 634 | std::function after_execution; 635 | std::function reset; 636 | std::function hlt; 637 | std::function mwait; 638 | std::function cnear_branch_taken; 639 | std::function cnear_branch_not_taken; 640 | std::function ucnear_branch; 641 | std::function far_branch; 642 | std::function vmexit; 643 | std::function interrupt; 644 | std::function hw_interrupt; 645 | std::function clflush; 646 | std::function tlb_cntrl; 647 | std::function cache_cntrl; 648 | std::function prefetch_hint; 649 | std::function wrmsr; 650 | std::function repeat_iteration; 651 | std::function lin_access; 652 | std::function phy_access; 653 | std::function inp; 654 | std::function inp2; 655 | std::function outp; 656 | std::function opcode; 657 | std::function exception; 658 | }; 659 | 660 | 661 | } // namespace BochsCPU --------------------------------------------------------------------------------