├── .python-version ├── .github ├── CODEOWNERS ├── scripts │ ├── branch-order.txt │ └── cherry-picker.py └── workflows │ ├── test.yml │ ├── lint.yml │ ├── cherry-pick-bot.yml │ └── boot.bash ├── .gitattributes ├── .vscode └── settings.json ├── .gitignore ├── assets ├── ymir.png └── samples │ ├── bzImage │ └── rootfs.cpio.gz ├── _typos.toml ├── pyproject.toml ├── surtr ├── arch │ └── x86 │ │ ├── arch.zig │ │ ├── asm.zig │ │ └── page.zig ├── arch.zig ├── log.zig ├── defs.zig └── boot.zig ├── ymir ├── arch.zig ├── interrupts.zig ├── arch │ └── x86 │ │ ├── vmx │ │ ├── arch.zig │ │ ├── vmc.zig │ │ ├── cpuid.zig │ │ ├── cr.zig │ │ ├── msr.zig │ │ ├── asm.zig │ │ ├── io.zig │ │ └── ept.zig │ │ ├── vmx.zig │ │ ├── apic.zig │ │ ├── arch.zig │ │ ├── serial.zig │ │ ├── idt.zig │ │ ├── isr.zig │ │ ├── cpuid.zig │ │ ├── pic.zig │ │ ├── interrupt.zig │ │ └── gdt.zig ├── ymir.zig ├── linker.ld ├── spin.zig ├── panic.zig ├── bits.zig ├── log.zig ├── serial.zig ├── main.zig ├── mem.zig ├── mem │ ├── BinAllocator.zig │ └── PageAllocator.zig ├── linux.zig └── vmx.zig ├── ymirsh └── main.zig ├── .travis.yml ├── LICENSE └── README.md /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @smallkirby 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | assets/samples/** filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "typos.config": "_typos.toml" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.zig-cache 2 | /zig-out 3 | /docs 4 | 5 | .gdb_history 6 | .venv 7 | -------------------------------------------------------------------------------- /assets/ymir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smallkirby/ymir/HEAD/assets/ymir.png -------------------------------------------------------------------------------- /_typos.toml: -------------------------------------------------------------------------------- 1 | [default.extend-words] 2 | # ba: acronym for BinAllocator. 3 | ba = "ba" 4 | # IST: acronym for Interrupt Stack Table. 5 | ist = "ist" 6 | ists = "ists" 7 | -------------------------------------------------------------------------------- /assets/samples/bzImage: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ef6d797e94d7cdc2fd4b48a9d8cca051a7218f2f94bee6b3c9fef298d398b872 3 | size 13132800 4 | -------------------------------------------------------------------------------- /assets/samples/rootfs.cpio.gz: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0a4f86ef5dc53819a1cea4108c38033572626277dcb34ed3fcb9682a55e4e979 3 | size 3526571 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ymir-cherry-pick-bot" 3 | version = "0.0.0" 4 | requires-python = ">=3.12" 5 | dependencies = [ 6 | "pygithub>=2.5.0", 7 | ] 8 | -------------------------------------------------------------------------------- /surtr/arch/x86/arch.zig: -------------------------------------------------------------------------------- 1 | pub const page = @import("page.zig"); 2 | 3 | /// Enable NX-bit. 4 | pub fn enableNxBit() void { 5 | const efer_reg: *volatile u64 = @ptrFromInt(0xC000_0080); 6 | efer_reg.* = efer_reg.* | (1 << 11); 7 | } 8 | -------------------------------------------------------------------------------- /surtr/arch.zig: -------------------------------------------------------------------------------- 1 | //! Thin wrapper to access arch-specific modules. 2 | 3 | const builtin = @import("builtin"); 4 | pub usingnamespace switch (builtin.target.cpu.arch) { 5 | .x86_64 => @import("arch/x86/arch.zig"), 6 | else => @compileError("Unsupported architecture."), 7 | }; 8 | -------------------------------------------------------------------------------- /ymir/arch.zig: -------------------------------------------------------------------------------- 1 | //! Thin wrapper to access arch-specific modules. 2 | 3 | const builtin = @import("builtin"); 4 | 5 | // Export arch-specific implementation. 6 | pub usingnamespace switch (builtin.target.cpu.arch) { 7 | .x86_64 => @import("arch/x86/arch.zig"), 8 | else => @compileError("Unsupported architecture."), 9 | }; 10 | -------------------------------------------------------------------------------- /ymir/interrupts.zig: -------------------------------------------------------------------------------- 1 | //! User-defined interrupts. 2 | 3 | const arch = @import("ymir").arch; 4 | 5 | /// The start of user-defined interrupts number. 6 | pub const user_intr_base = arch.intr.num_system_exceptions; 7 | 8 | pub const pic_timer = 0 + user_intr_base; 9 | pub const pic_keyboard = 1 + user_intr_base; 10 | pub const pic_secondary = 2 + user_intr_base; 11 | pub const pic_serial2 = 3 + user_intr_base; 12 | pub const pic_serial1 = 4 + user_intr_base; 13 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/arch.zig: -------------------------------------------------------------------------------- 1 | //! Expected to be used only within x86/vmx. 2 | //! Should not be exported outside arch directory. 3 | 4 | pub const am = @import("../asm.zig"); 5 | pub const apic = @import("../apic.zig"); 6 | pub const cpuid = @import("../cpuid.zig"); 7 | pub const gdt = @import("../gdt.zig"); 8 | pub const intr = @import("../interrupt.zig"); 9 | pub const isr = @import("../isr.zig"); 10 | pub const pg = @import("../page.zig"); 11 | pub const pic = @import("../pic.zig"); 12 | pub const serial = @import("../serial.zig"); 13 | -------------------------------------------------------------------------------- /surtr/arch/x86/asm.zig: -------------------------------------------------------------------------------- 1 | pub inline fn loadCr3(cr3: u64) void { 2 | asm volatile ( 3 | \\mov %[cr3], %%cr3 4 | : 5 | : [cr3] "r" (cr3), 6 | ); 7 | } 8 | 9 | pub inline fn readCr3() u64 { 10 | var cr3: u64 = undefined; 11 | asm volatile ( 12 | \\mov %%cr3, %[cr3] 13 | : [cr3] "=r" (cr3), 14 | ); 15 | return cr3; 16 | } 17 | 18 | pub inline fn flushTlbSingle(virt: u64) void { 19 | asm volatile ( 20 | \\invlpg (%[virt]) 21 | : 22 | : [virt] "r" (virt), 23 | : "memory" 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /ymirsh/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const option = @import("option"); 3 | const log = std.log.scoped(.main); 4 | 5 | pub const default_log_options = std.Options{ 6 | .log_level = switch (option.log_level) { 7 | .debug => .debug, 8 | .info => .info, 9 | .warn => .warn, 10 | .err => .err, 11 | }, 12 | .logFn = log, 13 | }; 14 | 15 | fn asmVmcall(nr: u64) void { 16 | asm volatile ( 17 | \\movq %[nr], %%rax 18 | \\vmcall 19 | : 20 | : [nr] "rax" (nr), 21 | : "memory" 22 | ); 23 | } 24 | 25 | pub fn main() !void { 26 | asmVmcall(0); 27 | } 28 | -------------------------------------------------------------------------------- /.github/scripts/branch-order.txt: -------------------------------------------------------------------------------- 1 | whiz-vmm-vmc 2 | whiz-vmm-initramfs 3 | whiz-vmm-intr_injection 4 | whiz-vmm-io 5 | whiz-vmm-cr 6 | whiz-vmm-msr 7 | whiz-vmm-cpuid 8 | whiz-vmm-ept 9 | whiz-vmm-linux_boot 10 | whiz-vmm-vmentry_vmexit 11 | whiz-vmm-vmcs 12 | whiz-vmm-vmlaunch 13 | whiz-vmm-vmx_root 14 | whiz-ymir-general_allocator 15 | whiz-ymir-pic 16 | whiz-ymir-page_allocator 17 | whiz-ymir-paging 18 | whiz-ymir-panic 19 | whiz-ymir-interrupt 20 | whiz-ymir-gdt 21 | whiz-ymir-serial_logsystem 22 | whiz-ymir-bit_and_test 23 | whiz-ymir-serial_output 24 | whiz-surtr-jump_to_ymir 25 | whiz-surtr-cleanup_memmap 26 | whiz-surtr-load_kernel 27 | whiz-surtr-simple_pg 28 | whiz-surtr-parse_kernel 29 | whiz-surtr-uefi_log 30 | whiz-surtr-hello_uefi 31 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const Allocator = std.mem.Allocator; 3 | 4 | const ymir = @import("ymir"); 5 | const mem = ymir.mem; 6 | 7 | const vmx = @import("vmx/common.zig"); 8 | const ept = @import("vmx/ept.zig"); 9 | 10 | pub const Vcpu = @import("vmx/vcpu.zig").Vcpu; 11 | pub const VmxError = vmx.VmxError; 12 | 13 | /// Maps host pages to guest. 14 | /// Host pages are mapped to 0 in the guest. 15 | pub fn mapGuest(host_pages: []u8, allocator: Allocator) VmxError!ept.Eptp { 16 | return ept.initEpt( 17 | 0, 18 | mem.virt2phys(host_pages.ptr), 19 | host_pages.len, 20 | allocator, 21 | ); 22 | } 23 | 24 | test { 25 | std.testing.refAllDeclsRecursive(@This()); 26 | } 27 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | branches: 2 | only: 3 | - master 4 | language: cpp 5 | dist: noble 6 | 7 | env: 8 | - ZIG_VERSION=0.14.0 9 | - BUILD_TYPE=ReleaseFast 10 | 11 | before_install: 12 | - sudo apt install -y sudo binutils curl wget tar xz-utils qemu-system-x86 qemu-utils ovmf 13 | - curl https://ziglang.org/download/$ZIG_VERSION/zig-linux-x86_64-$ZIG_VERSION.tar.xz -L -o zig.tar.xz 14 | - tar -xf zig.tar.xz 15 | - export PATH="$(pwd)/zig-linux-x86_64-0.14.0:$PATH" 16 | 17 | - git lfs pull 18 | - sudo apt update 19 | 20 | - zig version 21 | - uname -a 22 | - ls /dev/kvm -la 23 | 24 | script: 25 | - zig build install-samples 26 | - zig build install -Dlog_level=debug -Doptimize=$BUILD_TYPE 27 | - bash ./.github/workflows/boot.bash 28 | -------------------------------------------------------------------------------- /ymir/arch/x86/apic.zig: -------------------------------------------------------------------------------- 1 | const ymir = @import("ymir"); 2 | const phys2virt = ymir.mem.phys2virt; 3 | 4 | /// Local APIC ID registers 5 | const lapic_id_register: u64 = 0xFEE0_0020; 6 | /// Local APIC version register 7 | const lapic_version_register: u64 = 0xFEE0_0030; 8 | /// Task Priority Register 9 | const tpr: u64 = 0xFEE0_0080; 10 | /// Arbitration Priority Register 11 | const apr: u64 = 0xFEE0_0090; 12 | /// Processor Priority Register 13 | const ppr: u64 = 0xFEE0_00A0; 14 | /// EOI Register 15 | const eoi: u64 = 0xFEE0_00B0; 16 | /// LVT (Local Vector Table) Timer Register 17 | const lvt_timer_register: u64 = 0xFEE0_0320; 18 | /// Initial Count Register for Timer 19 | const initial_count_register: u64 = 0xFEE0_0380; 20 | /// Current Count Register for Timer 21 | const current_count_register: u64 = 0xFEE0_0390; 22 | /// Divide Configuration Register for Timer 23 | const divide_config_register: u64 = 0xFEE0_03E0; 24 | 25 | /// Get a Local APIC ID of the current core. 26 | pub fn getLapicId() u8 { 27 | const addr: *u32 = @ptrFromInt(phys2virt(lapic_id_register)); 28 | return @truncate(addr.* >> 24); 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | env: 12 | ZIG_VERSION: 0.14.0 13 | LLVM_VERSION: 19 14 | 15 | jobs: 16 | test: 17 | runs-on: ubuntu-24.04 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Install dependency 22 | run: | 23 | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - 24 | sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble-${{env.LLVM_VERSION}} main' 25 | sudo apt update 26 | sudo apt install -y \ 27 | git sudo binutils curl wget tar xz-utils 28 | 29 | - name: Install zig 30 | run: | 31 | curl https://ziglang.org/download/${{env.ZIG_VERSION}}/zig-linux-x86_64-${{env.ZIG_VERSION}}.tar.xz -L -o zig.tar.xz 32 | tar -xf zig.tar.xz 33 | echo "$(pwd)/zig-linux-x86_64-${{env.ZIG_VERSION}}" >> $GITHUB_PATH 34 | 35 | - name: Unit Tests 36 | run: | 37 | zig build test --summary all 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 smallkirby 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | env: 12 | ZIG_VERSION: 0.14.0 13 | LLVM_VERSION: 19 14 | 15 | jobs: 16 | lint: 17 | runs-on: ubuntu-24.04 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - name: Install clang-format 22 | run: | 23 | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - 24 | sudo add-apt-repository 'deb http://apt.llvm.org/noble/ llvm-toolchain-noble main' 25 | sudo apt update 26 | sudo apt install clang-format-${{env.LLVM_VERSION}} make 27 | 28 | - name: Install zig 29 | run: | 30 | curl https://ziglang.org/download/${{env.ZIG_VERSION}}/zig-linux-x86_64-${{env.ZIG_VERSION}}.tar.xz -L -o zig.tar.xz 31 | tar -xf zig.tar.xz 32 | echo "$(pwd)/zig-linux-x86_64-${{env.ZIG_VERSION}}" >> $GITHUB_PATH 33 | 34 | - name: Lint Zig 35 | run: | 36 | zig fmt --check **/*.zig --exclude .zig-cache/**/*.zig 37 | 38 | - name: Spell Check 39 | uses: crate-ci/typos@v1.29.5 40 | with: 41 | files: .github/** surtr/** ymir/** *.zig *.zig.zon *.md 42 | config: _typos.toml 43 | -------------------------------------------------------------------------------- /.github/workflows/cherry-pick-bot.yml: -------------------------------------------------------------------------------- 1 | name: Cherry Pick Bot 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | jobs: 8 | cherry-pick: 9 | if: contains(github.event.comment.body, '@.ymir cherry-pick') && github.event.comment.user.login == 'smallkirby' 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Generate a token 15 | id: generate-token 16 | uses: actions/create-github-app-token@v1 17 | with: 18 | app_id: ${{ vars.APP_ID }} 19 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 20 | 21 | - name: Setup uv 22 | uses: astral-sh/setup-uv@v5 23 | 24 | - name: Setup Python 25 | run: | 26 | uv python install 27 | uv sync --all-extras 28 | 29 | - name: Setup Git 30 | run: | 31 | git config --global user.name 'smallkirby-ymir[bot]' 32 | git config --global user.email '195134948+smallkirby-ymir[bot]@users.noreply.github.com' 33 | git fetch --all 34 | 35 | - name: Run Cherry Pick Script 36 | env: 37 | GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }} 38 | PR_NUMBER: ${{ github.event.issue.number }} 39 | REPO: ${{ github.repository }} 40 | run: uv run .github/scripts/cherry-picker.py 41 | -------------------------------------------------------------------------------- /.github/workflows/boot.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | 5 | TIMEOUT=60 6 | TMPFILE=$(mktemp) 7 | 8 | HEYSTACK=( 9 | "Starting syslogd: OK" 10 | "Starting klogd: OK" 11 | "Running sysctl: OK" 12 | "seedrng: applet not found" 13 | ) 14 | 15 | function check_success() 16 | { 17 | ret=0 18 | 19 | for needle in "${HEYSTACK[@]}"; do 20 | if ! grep -qF -- "$needle" "$TMPFILE"; then 21 | echo "[ERROR] Missing: '$needle'" 22 | ret=1 23 | fi 24 | done 25 | 26 | return $ret 27 | } 28 | 29 | function cleanup() 30 | { 31 | rm -f "$TMPFILE" 32 | set +o pipefail 33 | } 34 | 35 | echo "[+] stdout/stderr will be saved to $TMPFILE" 36 | 37 | echo "[+] Running Norn on QEMU..." 38 | timeout --foreground $TIMEOUT \ 39 | qemu-system-x86_64 \ 40 | -m 512M \ 41 | -bios /usr/share/ovmf/OVMF.fd \ 42 | -drive file=fat:rw:zig-out/img,format=raw \ 43 | -nographic \ 44 | -serial mon:stdio \ 45 | -no-reboot \ 46 | -cpu host \ 47 | -enable-kvm \ 48 | 2>&1 \ 49 | | tee "$TMPFILE" 50 | 51 | ret=$? 52 | 53 | echo "" 54 | 55 | if [ $ret -eq 124 ]; then 56 | echo "[-] Timeout." 57 | fi 58 | 59 | echo "[+] Checking output..." 60 | if ! check_success; then 61 | echo "[ERROR] Output does not contain expected strings." 62 | cleanup 63 | exit 1 64 | fi 65 | echo "[+] All expected strings found." 66 | 67 | cleanup 68 | -------------------------------------------------------------------------------- /ymir/ymir.zig: -------------------------------------------------------------------------------- 1 | const builtin = @import("builtin"); 2 | pub const is_debug = builtin.mode == .Debug; 3 | 4 | pub const intr = @import("interrupts.zig"); 5 | pub const serial = @import("serial.zig"); 6 | pub const arch = @import("arch.zig"); 7 | pub const klog = @import("log.zig"); 8 | pub const linux = @import("linux.zig"); 9 | pub const mem = @import("mem.zig"); 10 | pub const spin = @import("spin.zig"); 11 | pub const vmx = @import("vmx.zig"); 12 | pub const panic = @import("panic.zig"); 13 | pub const bits = @import("bits.zig"); 14 | 15 | /// Base virtual address of direct mapping. 16 | /// The virtual address starting from the address is directly mapped to the physical address at 0x0. 17 | pub const direct_map_base = 0xFFFF_8880_0000_0000; 18 | /// Size in bytes of the direct mapping region. 19 | pub const direct_map_size = 512 * mem.gib; 20 | /// The base virtual address of the kernel. 21 | /// The virtual address starting from the address is directly mapped to the physical address at 0x0. 22 | pub const kernel_base = 0xFFFF_FFFF_8000_0000; 23 | 24 | /// Set the default VM. 25 | pub fn setVm(target_vm: *vmx.Vm) void { 26 | panic.setVm(target_vm); 27 | } 28 | 29 | /// Halt endlessly with interrupts disabled. 30 | pub fn endlessHalt() noreturn { 31 | arch.disableIntr(); 32 | while (true) arch.halt(); 33 | } 34 | 35 | test { 36 | @import("std").testing.refAllDeclsRecursive(@This()); 37 | } 38 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/vmc.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log.scoped(.vmc); 3 | 4 | const vmx = @import("common.zig"); 5 | const Vcpu = @import("vcpu.zig").Vcpu; 6 | 7 | const VmxError = vmx.VmxError; 8 | 9 | // Font Title: flowerpower.flf 10 | // Font Author: Myflix, LG Beard 11 | /// Logo of Ymir in ASCII art. 12 | const logo = 13 | \\ ____ __ ,---. ,---..-./`) .-------. 14 | \\ \ \ / /| \ / |\ .-.')| _ _ \ 15 | \\ \ _. / ' | , \/ , |/ `-' \| ( ' ) | 16 | \\ _( )_ .' | |\_ /| | `-'`"`|(_ o _) / 17 | \\ ___(_ o _)' | _( )_/ | | .---. | (_,_).' __ 18 | \\| |(_,_)' | (_ o _) | | | | | |\ \ | | 19 | \\| `-' / | (_,_) | | | | | | \ `' / 20 | \\ \ / | | | | | | | | \ / 21 | \\ `-..-' '--' '--' '---' ''-' `'-' 22 | ; 23 | 24 | const VmcallNr = enum(u64) { 25 | hello = 0, 26 | 27 | _, 28 | }; 29 | 30 | pub fn handleVmcall(vcpu: *Vcpu) VmxError!void { 31 | const rax = vcpu.guest_regs.rax; 32 | const nr: VmcallNr = @enumFromInt(rax); 33 | 34 | switch (nr) { 35 | .hello => try vmcHello(vcpu), 36 | _ => log.err("Unhandled VMCALL: nr={d}", .{rax}), 37 | } 38 | } 39 | 40 | fn vmcHello(_: *Vcpu) VmxError!void { 41 | log.info("GREETINGS FROM VMX-ROOT...\n{s}\n", .{logo}); 42 | log.info("This OS is hypervisored by Ymir.\n", .{}); 43 | } 44 | -------------------------------------------------------------------------------- /ymir/linker.ld: -------------------------------------------------------------------------------- 1 | KERNEL_VADDR_BASE = 0xFFFFFFFF80000000; 2 | KERNEL_VADDR_TEXT = 0xFFFFFFFF80100000; 3 | 4 | STACK_SIZE = 0x5000; 5 | 6 | PHDRS { 7 | text PT_LOAD; 8 | rodata PT_LOAD; 9 | data PT_LOAD; 10 | bss PT_LOAD; 11 | 12 | __stackguard_upper PT_LOAD FLAGS(4); 13 | __stack PT_LOAD FLAGS(6); 14 | __stackguard_lower PT_LOAD FLAGS(4); 15 | } 16 | 17 | SECTIONS { 18 | . = KERNEL_VADDR_TEXT; 19 | 20 | .text ALIGN(4K) : AT (ADDR(.text) - KERNEL_VADDR_BASE) { 21 | *(.text) 22 | *(.ltext) 23 | } :text 24 | 25 | .rodata ALIGN(4K) : AT (ADDR(.rodata) - KERNEL_VADDR_BASE) { 26 | *(.rodata) 27 | } :rodata 28 | 29 | .data ALIGN(4K) : AT (ADDR(.data) - KERNEL_VADDR_BASE) { 30 | *(.data) 31 | *(.ldata) 32 | } :data 33 | 34 | .bss ALIGN(4K) : AT (ADDR(.bss) - KERNEL_VADDR_BASE) { 35 | *(COMMON) 36 | *(.bss) 37 | *(.lbss) 38 | } :bss 39 | 40 | __stackguard_upper ALIGN(4K) (NOLOAD) : AT (ADDR(__stackguard_upper) - KERNEL_VADDR_BASE) { 41 | . += 4K; 42 | } :__stackguard_upper 43 | 44 | __stack ALIGN(4K) (NOLOAD) : AT (ADDR(__stack) - KERNEL_VADDR_BASE) { 45 | . += STACK_SIZE; 46 | } :__stack 47 | 48 | __stackguard_lower ALIGN(4K) (NOLOAD) : AT (ADDR(__stackguard_lower) - KERNEL_VADDR_BASE) { 49 | __stackguard_lower = .; 50 | . += 4K; 51 | } :__stackguard_lower 52 | } 53 | -------------------------------------------------------------------------------- /ymir/spin.zig: -------------------------------------------------------------------------------- 1 | const is_test = @import("builtin").is_test; 2 | 3 | const atomic = @import("std").atomic; 4 | 5 | const ymir = @import("ymir"); 6 | const arch = ymir.arch; 7 | 8 | pub const SpinLock = struct { 9 | const State = atomic.Value(bool); 10 | 11 | /// State of the spin lock. 12 | /// true when locked, false when unlocked. 13 | _state: State = State.init(false), 14 | 15 | /// Lock the spin lock. 16 | pub inline fn lock(self: *SpinLock) void { 17 | atomic.spinLoopHint(); 18 | while (self._state.cmpxchgWeak( 19 | false, 20 | true, 21 | .acq_rel, 22 | .monotonic, 23 | ) != null) { 24 | arch.pause(); 25 | } 26 | } 27 | 28 | /// Lock the spin lock and disable IRQ. 29 | /// Must be paired with `unlockRestoreIrq()`. 30 | pub fn lockSaveIrq(self: *SpinLock) u16 { 31 | if (!is_test) { 32 | const mask = arch.pic.getIrqMask(); 33 | arch.pic.setIrqMask(0xFFFF); 34 | lock(self); 35 | return mask; 36 | } else { 37 | lock(self); 38 | return 0; 39 | } 40 | } 41 | 42 | /// Unlock the spin lock. 43 | pub inline fn unlock(self: *SpinLock) void { 44 | self._state.store(false, .release); 45 | } 46 | 47 | /// Unlock the spin lock and restore IRQ mask. 48 | pub fn unlockRestoreIrq(self: *SpinLock, mask: u16) void { 49 | self.unlock(); 50 | if (!is_test) { 51 | arch.pic.setIrqMask(mask); 52 | } 53 | } 54 | }; 55 | -------------------------------------------------------------------------------- /ymir/panic.zig: -------------------------------------------------------------------------------- 1 | //! This module provides a panic implementation. 2 | //! Zig has panic impletentations for each target platform. 3 | //! However, the impl for .freestanding is just a @breakpoint. 4 | //! Therefore, we implement a simple panic handler here. 5 | 6 | const std = @import("std"); 7 | const builtin = std.builtin; 8 | const debug = std.debug; 9 | const log = std.log.scoped(.panic); 10 | const format = std.fmt.format; 11 | 12 | const ymir = @import("ymir"); 13 | const vmx = ymir.vmx; 14 | const arch = ymir.arch; 15 | 16 | /// Implementation of the panic function. 17 | pub const panic_fn = panic; 18 | 19 | /// Instance of the virtual machine. 20 | var vm: ?*vmx.Vm = null; 21 | 22 | /// Flag to indicate that a panic occurred. 23 | var panicked = false; 24 | 25 | /// Set the target VM that is dumped when a panic occurs. 26 | pub fn setVm(target_vm: *vmx.Vm) void { 27 | vm = target_vm; 28 | } 29 | 30 | fn panic(msg: []const u8, _: ?*builtin.StackTrace, _: ?usize) noreturn { 31 | @branchHint(.cold); 32 | 33 | arch.disableIntr(); 34 | 35 | log.err("{s}", .{msg}); 36 | 37 | if (panicked) { 38 | log.err("Double panic detected. Halting.", .{}); 39 | ymir.endlessHalt(); 40 | } 41 | panicked = true; 42 | 43 | var it = std.debug.StackIterator.init(@returnAddress(), null); 44 | var ix: usize = 0; 45 | log.err("=== Stack Trace ==============", .{}); 46 | while (it.next()) |frame| : (ix += 1) { 47 | log.err("#{d:0>2}: 0x{X:0>16}", .{ ix, frame }); 48 | } 49 | 50 | if (vm) |v| { 51 | v.vcpu.dump() catch |err| { 52 | log.err("Failed to dump VM information: {?}\n", .{err}); 53 | }; 54 | } 55 | 56 | ymir.endlessHalt(); 57 | } 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ymir: The Type-1 Hypervisor 2 | 3 | ![Zig](https://shields.io/badge/Zig-v0%2E14%2E0-blue?logo=zig&color=F7A41D&style=for-the-badge) 4 | 5 | ![Lint](https://github.com/smallkirby/ymir/actions/workflows/lint.yml/badge.svg) 6 | ![Test](https://github.com/smallkirby/ymir/actions/workflows/test.yml/badge.svg) 7 | ![Boot Linux](https://img.shields.io/travis/com/smallkirby/ymir?style=flat&logo=travis&label=Boot%20Linux) 8 | 9 | Ymir is an Intel VT-x based baremetal hypervisor written in Zig. 10 | It is minimalistic but can boot latest Linux kernel (v6.9). 11 | 12 | ![Ymir Logo](assets/ymir.png) 13 | 14 | ## Features 15 | 16 | - Supports Intel x64 architecture. 17 | - Can boot latest Linux kernel with initramfs. 18 | - Uses EPT (Extended Page Tables) for memory virtualization. 19 | - Super thin (passthrough) and minimalistic hypervisor. 20 | - No dependencty. Everything is written from scratch in Zig. 21 | 22 | ## Limitations / Future Work 23 | 24 | - No support of SMP. 25 | - No support of APIC and other fundamental features. 26 | - Can boot only Linux kernel with [x86 boot protocol](https://www.kernel.org/doc/html/v6.1/x86/boot.html). 27 | - Can host only one VM at a time. 28 | - And many more... 29 | 30 | ## Requirements 31 | 32 | Install `ovmf` package to run UEFI firmware on QEMU: 33 | 34 | ```sh 35 | sudo apt install ovmf 36 | ``` 37 | 38 | ## Development 39 | 40 | ```sh 41 | zig build install-samples 42 | zig build run -Dlog_level=info -Doptimize=ReleaseFast 43 | ``` 44 | 45 | If you want to use your own kernel image and initramfs, 46 | you can put your kernel image (`bzImage`) and initramfs (`initramfs.cpio`) to `/zig-out/img`. 47 | It is tested that Ymir can boot Linux kernel v6.9 that is compiled with `defconfig` of x86-64. 48 | -------------------------------------------------------------------------------- /surtr/log.zig: -------------------------------------------------------------------------------- 1 | //! Log module for Surtr. 2 | //! Surtr outputs logs to the UEFI console output utilizing SimpleTextOutput protocol. 3 | //! You must call `init` function before using this module. 4 | 5 | const std = @import("std"); 6 | const uefi = std.os.uefi; 7 | const stdlog = std.log; 8 | const option = @import("option"); 9 | 10 | const Sto = uefi.protocol.SimpleTextOutput; 11 | 12 | const LogError = error{}; 13 | 14 | const Writer = std.io.Writer( 15 | void, 16 | LogError, 17 | writerFunction, 18 | ); 19 | 20 | /// Default log options. 21 | /// You can override std_options in your main file. 22 | pub const default_log_options = std.Options{ 23 | .log_level = switch (option.log_level) { 24 | .debug => .debug, 25 | .info => .info, 26 | .warn => .warn, 27 | .err => .err, 28 | }, 29 | .logFn = log, 30 | }; 31 | 32 | var con_out: *Sto = undefined; 33 | 34 | /// Initialize bootloader log. 35 | pub fn init(out: *Sto) void { 36 | con_out = out; 37 | } 38 | 39 | fn writerFunction(_: void, bytes: []const u8) LogError!usize { 40 | for (bytes) |b| { 41 | // EFI uses UCS-2 encoding. 42 | con_out.outputString(&[_:0]u16{b}).err() catch unreachable; 43 | } 44 | return bytes.len; 45 | } 46 | 47 | fn log( 48 | comptime level: stdlog.Level, 49 | comptime scope: @Type(.enum_literal), 50 | comptime fmt: []const u8, 51 | args: anytype, 52 | ) void { 53 | const level_str = comptime switch (level) { 54 | .debug => "[DEBUG]", 55 | .info => "[INFO ]", 56 | .warn => "[WARN ]", 57 | .err => "[ERROR]", 58 | }; 59 | const scope_str = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): "; 60 | 61 | std.fmt.format( 62 | Writer{ .context = {} }, 63 | level_str ++ " " ++ scope_str ++ fmt ++ "\r\n", 64 | args, 65 | ) catch unreachable; 66 | } 67 | -------------------------------------------------------------------------------- /ymir/bits.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | /// Set the integer where only the nth bit is set. 4 | pub fn tobit(T: type, nth: anytype) T { 5 | const val = switch (@typeInfo(@TypeOf(nth))) { 6 | .int, .comptime_int => nth, 7 | .@"enum" => @intFromEnum(nth), 8 | else => @compileError("setbit: invalid type"), 9 | }; 10 | return @as(T, 1) << @intCast(val); 11 | } 12 | 13 | /// Check if the nth bit is set. 14 | pub inline fn isset(val: anytype, nth: anytype) bool { 15 | const int_nth = switch (@typeInfo(@TypeOf(nth))) { 16 | .int, .comptime_int => nth, 17 | .@"enum" => @intFromEnum(nth), 18 | else => @compileError("isset: invalid type"), 19 | }; 20 | return ((val >> @intCast(int_nth)) & 1) != 0; 21 | } 22 | 23 | /// Concatnate two values and returns new value with twice the bit width. 24 | pub inline fn concat(T: type, a: anytype, b: @TypeOf(a)) T { 25 | const U = @TypeOf(a); 26 | const width_T = @typeInfo(T).int.bits; 27 | const width_U = switch (@typeInfo(U)) { 28 | .int => |t| t.bits, 29 | .comptime_int => width_T / 2, 30 | else => @compileError("concat: invalid type"), 31 | }; 32 | if (width_T != width_U * 2) @compileError("concat: invalid type"); 33 | return (@as(T, a) << width_U) | @as(T, b); 34 | } 35 | 36 | const testing = std.testing; 37 | 38 | test "tobit" { 39 | try testing.expectEqual(0b0000_0001, tobit(u8, 0)); 40 | try testing.expectEqual(0b0001_0000, tobit(u8, 4)); 41 | try testing.expectEqual(0b1000_0000, tobit(u8, 7)); 42 | } 43 | 44 | test "isset" { 45 | try testing.expectEqual(true, isset(0b10, 1)); 46 | try testing.expectEqual(false, isset(0b10, 0)); 47 | try testing.expectEqual(true, isset(0b1000_0000, 7)); 48 | try testing.expectEqual(false, isset(0b1000_0000, 99)); 49 | } 50 | 51 | test "concat" { 52 | try testing.expectEqual(0b10, concat(u2, @as(u1, 1), @as(u1, 0))); 53 | try testing.expectEqual(0x1234, concat(u16, 0x12, 0x34)); 54 | } 55 | -------------------------------------------------------------------------------- /ymir/log.zig: -------------------------------------------------------------------------------- 1 | //! This module provides a logging to the serial console. 2 | 3 | const std = @import("std"); 4 | const stdlog = std.log; 5 | const io = std.io; 6 | const option = @import("option"); 7 | 8 | const ymir = @import("ymir"); 9 | const Serial = ymir.serial.Serial; 10 | 11 | /// Instance of the initialized serial console. 12 | var serial: Serial = undefined; 13 | 14 | /// Skeleton for the error type. 15 | /// Not used but required by std.io.Writer interface. 16 | const LogError = error{}; 17 | 18 | const Writer = std.io.Writer( 19 | void, 20 | LogError, 21 | write, 22 | ); 23 | 24 | /// Log options. 25 | /// Can be configured by compile-time options. See build.zig. 26 | pub const default_log_options = std.Options{ 27 | .log_level = switch (option.log_level) { 28 | .debug => .debug, 29 | .info => .info, 30 | .warn => .warn, 31 | .err => .err, 32 | }, 33 | .logFn = log, 34 | }; 35 | 36 | /// Initialize the logger with the given serial console. 37 | /// You MUST call this function before using the logger. 38 | pub fn init(s: Serial) void { 39 | serial = s; 40 | } 41 | 42 | fn write(_: void, bytes: []const u8) LogError!usize { 43 | serial.writeString(bytes); 44 | return bytes.len; 45 | } 46 | 47 | fn log( 48 | comptime level: stdlog.Level, 49 | comptime scope: @Type(.enum_literal), 50 | comptime fmt: []const u8, 51 | args: anytype, 52 | ) void { 53 | const level_str = comptime switch (level) { 54 | .debug => "[DEBUG]", 55 | .info => "[INFO ]", 56 | .warn => "[WARN ]", 57 | .err => "[ERROR]", 58 | }; 59 | 60 | const scope_str = if (@tagName(scope).len <= 7) b: { 61 | break :b std.fmt.comptimePrint("{s: <7} | ", .{@tagName(scope)}); 62 | } else b: { 63 | break :b std.fmt.comptimePrint("{s: <7}-| ", .{@tagName(scope)[0..7]}); 64 | }; 65 | 66 | std.fmt.format( 67 | Writer{ .context = {} }, 68 | level_str ++ " " ++ scope_str ++ fmt ++ "\n", 69 | args, 70 | ) catch {}; 71 | } 72 | -------------------------------------------------------------------------------- /ymir/serial.zig: -------------------------------------------------------------------------------- 1 | const ymir = @import("ymir"); 2 | const spin = ymir.spin; 3 | const arch = ymir.arch; 4 | 5 | /// Spin lock for the serial console. 6 | var spin_lock: spin.SpinLock = spin.SpinLock{}; 7 | 8 | /// Serial console. 9 | pub const Serial = struct { 10 | const WriteFn = *const fn (u8) void; 11 | const ReadFn = *const fn () ?u8; 12 | 13 | /// Pointer to the arch-specific write-function. 14 | /// Do NOT access this field directly, use the `write` function instead. 15 | _write_fn: WriteFn = undefined, 16 | /// Pointer to the arch-specific read-function. 17 | /// Do NOT access this field directly, use the `read` function instead. 18 | _read_fn: ReadFn = undefined, 19 | 20 | const Self = @This(); 21 | 22 | /// Write a single byte to the serial console. 23 | pub fn write(self: Self, c: u8) void { 24 | const mask = spin_lock.lockSaveIrq(); 25 | defer spin_lock.unlockRestoreIrq(mask); 26 | self._write_fn(c); 27 | } 28 | 29 | fn writeUnlocked(self: Self, c: u8) void { 30 | self._write_fn(c); 31 | } 32 | 33 | /// Write a string to the serial console. 34 | pub fn writeString(self: Self, s: []const u8) void { 35 | const mask = spin_lock.lockSaveIrq(); 36 | defer spin_lock.unlockRestoreIrq(mask); 37 | for (s) |c| { 38 | self.writeUnlocked(c); 39 | } 40 | } 41 | 42 | /// Try to read a character from the serial console. 43 | /// Returns null if no character is available in Rx-buffer. 44 | pub fn tryRead(self: Self) ?u8 { 45 | const mask = spin_lock.lockSaveIrq(); 46 | defer spin_lock.unlockRestoreIrq(mask); 47 | return self._read_fn(); 48 | } 49 | }; 50 | 51 | /// Initialize the serial console. 52 | /// You MUST call this function before using the serial console. 53 | pub fn init() Serial { 54 | var serial = Serial{}; 55 | arch.serial.initSerial(&serial, .com1, 115200); 56 | 57 | return serial; 58 | } 59 | 60 | /// Get the serial console. 61 | /// You MUST call `init` before calling this function. 62 | pub fn get() Serial { 63 | var serial = Serial{}; 64 | arch.serial.getSerial(&serial, .com1); 65 | 66 | return serial; 67 | } 68 | -------------------------------------------------------------------------------- /surtr/defs.zig: -------------------------------------------------------------------------------- 1 | //! This file defines structures shared among Surtr and Ymir. 2 | 3 | const uefi = @import("std").os.uefi; 4 | 5 | pub const magic: usize = 0xDEADBEEF_CAFEBABE; 6 | 7 | /// Boot information. 8 | /// This struct is passed from the bootloader to the kernel in Win64 calling convention. 9 | pub const BootInfo = extern struct { 10 | /// Magic number to check if the boot info is valid. 11 | magic: usize = magic, 12 | memory_map: MemoryMap, 13 | guest_info: GuestInfo, 14 | acpi_table: *anyopaque, 15 | }; 16 | 17 | /// Memory map provided by UEFI. 18 | pub const MemoryMap = extern struct { 19 | /// Total buffer size prepared to store the memory map. 20 | buffer_size: usize, 21 | /// Memory descriptors. 22 | descriptors: [*]uefi.tables.MemoryDescriptor, 23 | /// Total memory map size. 24 | map_size: usize, 25 | /// Map key used to check if the memory map has been changed. 26 | map_key: usize, 27 | /// Size in bytes of each memory descriptor. 28 | descriptor_size: usize, 29 | /// UEFI memory descriptor version. 30 | descriptor_version: u32, 31 | }; 32 | 33 | /// Guest kernel information. 34 | pub const GuestInfo = extern struct { 35 | /// Physical address the guest image is loaded. 36 | guest_image: [*]u8, 37 | /// Size in bytes of the guest image. 38 | guest_size: usize, 39 | /// Physical address the initrd is loaded. 40 | initrd_addr: [*]u8, 41 | /// Size in bytes of the initrd. 42 | initrd_size: usize, 43 | }; 44 | 45 | /// Memory descriptor iterator. 46 | pub const MemoryDescriptorIterator = struct { 47 | const Self = @This(); 48 | const Md = uefi.tables.MemoryDescriptor; 49 | 50 | descriptors: [*]Md, 51 | current: *Md, 52 | descriptor_size: usize, 53 | total_size: usize, 54 | 55 | pub fn new(map: MemoryMap) Self { 56 | return Self{ 57 | .descriptors = map.descriptors, 58 | .current = @ptrCast(map.descriptors), 59 | .descriptor_size = map.descriptor_size, 60 | .total_size = map.map_size, 61 | }; 62 | } 63 | 64 | pub fn next(self: *Self) ?*Md { 65 | if (@intFromPtr(self.current) >= @intFromPtr(self.descriptors) + self.total_size) { 66 | return null; 67 | } 68 | const md = self.current; 69 | self.current = @ptrFromInt(@intFromPtr(self.current) + self.descriptor_size); 70 | return md; 71 | } 72 | }; 73 | -------------------------------------------------------------------------------- /ymir/arch/x86/arch.zig: -------------------------------------------------------------------------------- 1 | //! This module exposes x86_64-specific functions. 2 | 3 | const std = @import("std"); 4 | const log = std.log.scoped(.arch); 5 | 6 | const ymir = @import("ymir"); 7 | const mem = ymir.mem; 8 | 9 | pub const gdt = @import("gdt.zig"); 10 | pub const intr = @import("interrupt.zig"); 11 | pub const page = @import("page.zig"); 12 | pub const pic = @import("pic.zig"); 13 | pub const serial = @import("serial.zig"); 14 | pub const apic = @import("apic.zig"); 15 | 16 | const cpuid = @import("cpuid.zig"); 17 | const am = @import("asm.zig"); 18 | 19 | /// Pause a CPU for a short period of time. 20 | pub fn relax() void { 21 | am.relax(); 22 | } 23 | 24 | /// Disable interrupts. 25 | /// Note that exceptions and NMI are not ignored. 26 | pub inline fn disableIntr() void { 27 | am.cli(); 28 | } 29 | 30 | /// Enable interrupts. 31 | pub inline fn enableIntr() void { 32 | am.sti(); 33 | } 34 | 35 | /// Halt the current CPU. 36 | pub inline fn halt() void { 37 | am.hlt(); 38 | } 39 | 40 | /// Pause the CPU for a wait loop. 41 | pub inline fn pause() void { 42 | asm volatile ("pause"); 43 | } 44 | 45 | /// Port I/O In instruction. 46 | pub inline fn in(T: type, port: u16) T { 47 | return switch (T) { 48 | u8 => am.inb(port), 49 | u16 => am.inw(port), 50 | u32 => am.inl(port), 51 | else => @compileError("Unsupported type for asm in()"), 52 | }; 53 | } 54 | 55 | /// Enable CPUID instruction. 56 | pub inline fn enableCpuid() void { 57 | var eflags = am.readRflags(); 58 | if (!eflags.id) { 59 | eflags.id = true; 60 | _ = am.writeRflags(eflags); 61 | } 62 | } 63 | 64 | /// Get CPU Vendr ID string. 65 | /// Note that the string is not null-terminated. 66 | pub fn getCpuVendorId() [12]u8 { 67 | var ret: [12]u8 = undefined; 68 | const regs = cpuid.Leaf.query(.maximum_input, null); 69 | 70 | for ([_]u32{ regs.ebx, regs.edx, regs.ecx }, 0..) |reg, i| { 71 | for (0..4) |j| { 72 | const b: usize = (reg >> @truncate(j * 8)); 73 | ret[i * 4 + j] = @as(u8, @truncate(b)); 74 | } 75 | } 76 | return ret; 77 | } 78 | 79 | /// Check if virtualization technology is supported. 80 | pub fn isVmxSupported() bool { 81 | // Check CPUID if VMX is supported. 82 | const regs = cpuid.Leaf.vers_and_feat_info.query(null); 83 | const ecx: cpuid.FeatureInfoEcx = @bitCast(regs.ecx); 84 | if (!ecx.vmx) return false; 85 | 86 | // Check VMXON is allowed outside SMX. 87 | var msr_fctl = am.readMsrFeatureControl(); 88 | if (!msr_fctl.vmx_outside_smx) { 89 | // Enable VMX outside SMX. 90 | if (msr_fctl.lock) @panic("IA32_FEATURE_CONTROL is locked while VMX outside SMX is disabled"); 91 | msr_fctl.vmx_outside_smx = true; 92 | msr_fctl.lock = true; 93 | am.writeMsrFeatureControl(msr_fctl); 94 | } 95 | msr_fctl = am.readMsrFeatureControl(); 96 | if (!msr_fctl.vmx_outside_smx) return false; 97 | 98 | return true; 99 | } 100 | 101 | /// Enable supported XSAVE features. 102 | pub fn enableXstateFeature() void { 103 | // Enable XSAVE in CR4, which is necessary to access XCR0. 104 | var cr4 = am.readCr4(); 105 | cr4.osxsave = true; 106 | am.loadCr4(cr4); 107 | 108 | // Enable supported XSAVE features. 109 | const ext_info = cpuid.Leaf.ext_enumeration.query(0); 110 | const max_features = ((@as(u64, ext_info.edx) & 0xFFFF_FFFF) << 32) + ext_info.eax; 111 | am.xsetbv(0, max_features); // XCR0 enabled mask 112 | } 113 | 114 | test { 115 | std.testing.refAllDeclsRecursive(@This()); 116 | } 117 | -------------------------------------------------------------------------------- /.github/scripts/cherry-picker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from github import Github 4 | 5 | 6 | def read_branch_order(filename=".github/scripts/branch-order.txt"): 7 | with open(filename, "r") as f: 8 | return [ 9 | line.strip() 10 | for line in f 11 | if (line.strip() and not line.strip().startswith("#")) 12 | ] 13 | 14 | 15 | def get_current_branch(pr_number): 16 | g = Github(os.environ["GITHUB_TOKEN"]) 17 | repo = g.get_repo(os.environ["REPO"]) 18 | pr = repo.get_pull(int(pr_number)) 19 | return pr.base.ref 20 | 21 | 22 | def get_child_branches(current_branch, branches): 23 | try: 24 | current_index = branches.index(current_branch) 25 | return branches[:current_index] 26 | except ValueError: 27 | return [] 28 | 29 | 30 | def cherry_pick_to_branch(commit_sha, target_branch): 31 | try: 32 | # Fetch and checkout target branch 33 | subprocess.run(["git", "fetch", "origin", target_branch], check=True) 34 | subprocess.run(["git", "checkout", target_branch], check=True) 35 | 36 | # Cherry-pick the commit 37 | result = subprocess.run( 38 | ["git", "cherry-pick", "-m", "1", commit_sha], 39 | capture_output=True, 40 | text=True, 41 | ) 42 | print(result.stderr) 43 | 44 | if result.returncode == 0: 45 | # Push the changes 46 | subprocess.run( 47 | ["git", "push", "origin", target_branch], 48 | check=True 49 | ) 50 | return True 51 | else: 52 | # If there's a conflict, abort the cherry-pick 53 | subprocess.run(["git", "cherry-pick", "--abort"]) 54 | return False 55 | except subprocess.CalledProcessError: 56 | return False 57 | 58 | 59 | def main(): 60 | # Get PR number from environment 61 | pr_number = os.environ["PR_NUMBER"] 62 | 63 | # Initialize GitHub client 64 | g = Github(os.environ["GITHUB_TOKEN"]) 65 | repo = g.get_repo(os.environ["REPO"]) 66 | pr = repo.get_pull(int(pr_number)) 67 | 68 | # Check if the PR is merged 69 | if not pr.merged: 70 | print("❌ PR is not yet merged.") 71 | return 72 | 73 | # Get the merge commit SHA 74 | merge_commit_sha = pr.merge_commit_sha 75 | if not merge_commit_sha: 76 | print("No merge commit found") 77 | return 78 | print(f"merge_commit_sha: {merge_commit_sha}") 79 | 80 | # Read branch order 81 | branches = read_branch_order() 82 | 83 | # Get current branch and find children 84 | current_branch = get_current_branch(pr_number) 85 | child_branches = get_child_branches(current_branch, branches) 86 | 87 | if len(child_branches) == 0: 88 | pr.create_issue_comment( 89 | "💤 No target branches found. Cherry-pick skipped." 90 | ) 91 | return 92 | 93 | # Cherry-pick to each child branch 94 | failed_branches = [] 95 | for branch in child_branches: 96 | success = cherry_pick_to_branch(merge_commit_sha, branch) 97 | if not success: 98 | failed_branches.append(branch) 99 | 100 | if len(failed_branches) == 0: 101 | pr.create_issue_comment( 102 | f"✅ Successfully cherry-picked to {len(child_branches)} branches." 103 | ) 104 | else: 105 | pr.create_issue_comment( 106 | "❌ Cherry-pick failed for the following branches " 107 | "(manual intervention required):\n" + 108 | "\n".join([f"- {branch}" for branch in failed_branches]) 109 | ) 110 | 111 | 112 | if __name__ == "__main__": 113 | main() 114 | -------------------------------------------------------------------------------- /ymir/arch/x86/serial.zig: -------------------------------------------------------------------------------- 1 | //! Serial 8250 UART. 2 | 3 | const ymir = @import("ymir"); 4 | const bits = ymir.bits; 5 | const am = @import("asm.zig"); 6 | 7 | const Serial = ymir.serial.Serial; 8 | 9 | /// Available serial ports. 10 | pub const Ports = enum(u16) { 11 | com1 = 0x3F8, 12 | com2 = 0x2F8, 13 | com3 = 0x3E8, 14 | com4 = 0x2E8, 15 | }; 16 | 17 | /// IRQs to which serial ports can generate interrupts. 18 | const Irq = struct { 19 | pub const com1 = 4; 20 | pub const com2 = 3; 21 | pub const com3 = 4; 22 | pub const com4 = 3; 23 | }; 24 | 25 | const divisor_latch_numerator = 115200; 26 | const default_baud_rate = 9600; 27 | 28 | const offsets = struct { 29 | /// Transmitter Holding Buffer: DLAB=0, W 30 | pub const txr = 0; 31 | /// Receiver Buffer: DLAB=0, R 32 | pub const rxr = 0; 33 | /// Divisor Latch Low Byte: DLAB=1, R/W 34 | pub const dll = 0; 35 | /// Interrupt Enable Register: DLAB=0, R/W 36 | pub const ier = 1; 37 | /// Divisor Latch High Byte: DLAB=1, R/W 38 | pub const dlm = 1; 39 | /// Interrupt Identification Register: DLAB=X, R 40 | pub const iir = 2; 41 | /// FIFO Control Register: DLAB=X, W 42 | pub const fcr = 2; 43 | /// Line Control Register: DLAB=X, R/W 44 | pub const lcr = 3; 45 | /// Line Control Register: DLAB=0, R/W 46 | pub const mcr = 4; 47 | /// Line Status Register: DLAB=X, R 48 | pub const lsr = 5; 49 | /// Modem Status Register: DLAB=X, R 50 | pub const msr = 6; 51 | /// Scratch Register: DLAB=X, R/W 52 | pub const sr = 7; 53 | }; 54 | 55 | /// Initialize a serial console, then set a write-function to `Serial.write_fn`. 56 | pub fn initSerial(serial: *Serial, port: Ports, baud: u32) void { 57 | const p = @intFromEnum(port); 58 | am.outb(0b00_000_0_00, p + offsets.lcr); // 8n1: no parity, 1 stop bit, 8 data bit 59 | am.outb(0, p + offsets.ier); // Disable interrupts 60 | am.outb(0, p + offsets.fcr); // Disable FIFO 61 | 62 | // Set baud rate 63 | const divisor = divisor_latch_numerator / baud; 64 | const c = am.inb(p + offsets.lcr); 65 | am.outb(c | 0b1000_0000, p + offsets.lcr); // Enable DLAB 66 | am.outb(@truncate(divisor & 0xFF), p + offsets.dll); 67 | am.outb(@truncate((divisor >> 8) & 0xFF), p + offsets.dlm); 68 | am.outb(c & 0b0111_1111, p + offsets.lcr); // Disable DLAB 69 | 70 | getSerial(serial, port); 71 | } 72 | 73 | /// Get a serial console, then set a write-function to `Serial.write_fn`. 74 | /// You MUST ensure that the console of the `port` is initialized before calling this function. 75 | pub fn getSerial(serial: *Serial, port: Ports) void { 76 | serial._write_fn = switch (port) { 77 | .com1 => writeByteCom1, 78 | .com2 => writeByteCom2, 79 | .com3 => writeByteCom3, 80 | .com4 => writeByteCom4, 81 | }; 82 | serial._read_fn = switch (port) { 83 | .com1 => tryReadByteCom1, 84 | .com2 => tryReadByteCom2, 85 | .com3 => tryReadByteCom3, 86 | .com4 => tryReadByteCom4, 87 | }; 88 | } 89 | 90 | /// Enable serial console interrupt for Rx-available and Tx-empty. 91 | pub fn enableInterrupt(port: Ports) void { 92 | var ie = am.inb(@intFromEnum(port) + offsets.ier); 93 | ie |= 0b0000_0011; // Rx-available, Tx-empty 94 | am.outb(ie, @intFromEnum(port) + offsets.ier); // Rx-available, Tx-empty 95 | } 96 | 97 | /// Check if the given port is any of serial ports. 98 | pub fn isSerialPort(port: u16) bool { 99 | inline for (@typeInfo(Ports).@"enum".fields) |field| { 100 | if (field.value == port) return true; 101 | } 102 | return false; 103 | } 104 | 105 | /// Write a single byte to the serial console. 106 | pub fn writeByte(byte: u8, port: Ports) void { 107 | // Wait until the transmitter holding buffer is empty 108 | while (!bits.isset(am.inb(@intFromEnum(port) + offsets.lsr), 5)) { 109 | am.relax(); 110 | } 111 | 112 | // Put char to the transmitter holding buffer 113 | am.outb(byte, @intFromEnum(port)); 114 | } 115 | 116 | fn writeByteCom1(byte: u8) void { 117 | writeByte(byte, .com1); 118 | } 119 | 120 | fn writeByteCom2(byte: u8) void { 121 | writeByte(byte, .com2); 122 | } 123 | 124 | fn writeByteCom3(byte: u8) void { 125 | writeByte(byte, .com3); 126 | } 127 | 128 | fn writeByteCom4(byte: u8) void { 129 | writeByte(byte, .com4); 130 | } 131 | 132 | /// Read a byte from Rx buffer. 133 | /// If Rx buffer is empty, return null. 134 | fn tryReadByte(port: Ports) ?u8 { 135 | // Check if Rx buffer is not empty 136 | if (!bits.isset(am.inb(@intFromEnum(port) + offsets.lsr), 0)) { 137 | return null; 138 | } 139 | 140 | // read char from the receiver buffer 141 | return am.inb(@intFromEnum(port)); 142 | } 143 | 144 | fn tryReadByteCom1() ?u8 { 145 | return tryReadByte(.com1); 146 | } 147 | 148 | fn tryReadByteCom2() ?u8 { 149 | return tryReadByte(.com2); 150 | } 151 | 152 | fn tryReadByteCom3() ?u8 { 153 | return tryReadByte(.com3); 154 | } 155 | 156 | fn tryReadByteCom4() ?u8 { 157 | return tryReadByte(.com4); 158 | } 159 | -------------------------------------------------------------------------------- /ymir/main.zig: -------------------------------------------------------------------------------- 1 | //! Ymir: The hypervisor. 2 | //! 3 | 4 | const std = @import("std"); 5 | const log = std.log.scoped(.main); 6 | const surtr = @import("surtr"); 7 | 8 | const ymir = @import("ymir"); 9 | const idefs = ymir.intr; 10 | const serial = ymir.serial; 11 | const klog = ymir.klog; 12 | const arch = ymir.arch; 13 | const mem = ymir.mem; 14 | const vmx = ymir.vmx; 15 | 16 | const page_size = mem.page_size; 17 | 18 | pub const panic = ymir.panic.panic_fn; 19 | pub const std_options = klog.default_log_options; 20 | 21 | /// Guard page placed below the kernel stack. 22 | extern const __stackguard_lower: [*]const u8; 23 | 24 | /// Kernel entry point called by surtr. 25 | /// The function switches stack from the surtr stack to the kernel stack. 26 | export fn kernelEntry() callconv(.naked) noreturn { 27 | asm volatile ( 28 | \\movq %[new_stack], %%rsp 29 | \\call kernelTrampoline 30 | : 31 | : [new_stack] "r" (@intFromPtr(&__stackguard_lower) - 0x10), 32 | ); 33 | } 34 | 35 | /// Trampoline function to call the kernel main function. 36 | /// The role of this function is to make main function return errors. 37 | export fn kernelTrampoline(boot_info: surtr.BootInfo) callconv(.{ .x86_64_win = .{} }) noreturn { 38 | kernelMain(boot_info) catch |err| { 39 | log.err("Kernel aborted with error: {}", .{err}); 40 | @panic("Exiting..."); 41 | }; 42 | 43 | unreachable; 44 | } 45 | 46 | /// Kernel main function. 47 | fn kernelMain(boot_info: surtr.BootInfo) !void { 48 | // Initialize the serial console and logger. 49 | const sr = serial.init(); 50 | klog.init(sr); 51 | log.info("Booting Ymir...", .{}); 52 | 53 | // Validate the boot info. 54 | validateBootInfo(boot_info) catch |err| { 55 | log.err("Invalid boot info: {}", .{err}); 56 | return error.InvalidBootInfo; 57 | }; 58 | 59 | // Copy boot_info into Ymir's stack since it becomes inaccessible soon. 60 | const guest_info = boot_info.guest_info; 61 | const memory_map = boot_info.memory_map; 62 | 63 | // Enable CPUID instruction. 64 | arch.enableCpuid(); 65 | 66 | // Initialize GDT. 67 | // It switches GDT from the one prepared by surtr to the ymir GDT. 68 | arch.gdt.init(); 69 | log.info("Initialized GDT.", .{}); 70 | 71 | // Initialize IDT. 72 | // From this moment, interrupts are enabled. 73 | arch.intr.init(); 74 | log.info("Initialized IDT.", .{}); 75 | 76 | // Initialize page allocator. 77 | ymir.mem.initPageAllocator(memory_map); 78 | log.info("Initialized page allocator.", .{}); 79 | 80 | // Reconstruct memory mapping from the one provided by UEFI and Sutr. 81 | log.info("Reconstructing memory mapping...", .{}); 82 | try mem.reconstructMapping(mem.page_allocator); 83 | 84 | // Now, stack, GDT, and page tables are switched to the ymir's ones. 85 | // We are ready to destroy any usable regions in UEFI memory map. 86 | 87 | // Initialize general allocator. 88 | ymir.mem.initGeneralAllocator(); 89 | log.info("Initialized general allocator.", .{}); 90 | 91 | // Initialize PIC. 92 | arch.pic.init(); 93 | log.info("Initialized PIC.", .{}); 94 | 95 | // Enable PIT. 96 | arch.intr.registerHandler(idefs.pic_timer, blobIrqHandler); 97 | arch.pic.unsetMask(.timer); 98 | log.info("Enabled PIT.", .{}); 99 | 100 | // Unmask serial interrupt. 101 | arch.intr.registerHandler(idefs.pic_serial1, blobIrqHandler); 102 | arch.pic.unsetMask(.serial1); 103 | arch.serial.enableInterrupt(.com1); 104 | 105 | // Enable XSAVE features. 106 | arch.enableXstateFeature(); 107 | 108 | // Enter VMX root operation. 109 | var vm = try vmx.Vm.new(); 110 | try vm.init(ymir.mem.page_allocator); 111 | log.info("Entered VMX root operation.", .{}); 112 | 113 | // Set the default VM for panic. 114 | ymir.setVm(&vm); 115 | 116 | // Setup guest memory and load guest. 117 | const guest_kernel = b: { 118 | const ptr: [*]u8 = @ptrFromInt(ymir.mem.phys2virt(guest_info.guest_image)); 119 | break :b ptr[0..guest_info.guest_size]; 120 | }; 121 | const initrd = b: { 122 | const ptr: [*]u8 = @ptrFromInt(ymir.mem.phys2virt(guest_info.initrd_addr)); 123 | break :b ptr[0..guest_info.initrd_size]; 124 | }; 125 | try vm.setupGuestMemory( 126 | guest_kernel, 127 | initrd, 128 | ymir.mem.page_allocator, 129 | &ymir.mem.page_allocator_instance, 130 | ); 131 | log.info("Setup guest memory.", .{}); 132 | 133 | // Launch 134 | log.info("Starting the virtual machine...", .{}); 135 | try vm.loop(); 136 | 137 | // Exit VMX root operation. 138 | vm.devirtualize(); 139 | 140 | // EOL 141 | log.info("Reached EOL.", .{}); 142 | ymir.endlessHalt(); 143 | } 144 | 145 | fn validateBootInfo(boot_info: surtr.BootInfo) !void { 146 | if (boot_info.magic != surtr.magic) { 147 | return error.InvalidMagic; 148 | } 149 | } 150 | 151 | fn blobIrqHandler(ctx: *arch.intr.Context) void { 152 | const vector: u16 = @intCast(ctx.vector - idefs.user_intr_base); 153 | arch.pic.notifyEoi(@enumFromInt(vector)); 154 | } 155 | -------------------------------------------------------------------------------- /ymir/arch/x86/idt.zig: -------------------------------------------------------------------------------- 1 | //! LICENSE NOTICE 2 | //! 3 | //! The impletentation is heavily inspired by https://github.com/AndreaOrru/zen 4 | //! Original LICENSE follows: 5 | //! 6 | //! BSD 3-Clause License 7 | //! 8 | //! Copyright (c) 2017, Andrea Orru 9 | //! All rights reserved. 10 | //! 11 | //! Redistribution and use in source and binary forms, with or without 12 | //! modification, are permitted provided that the following conditions are met: 13 | //! 14 | //! * Redistributions of source code must retain the above copyright notice, this 15 | //! list of conditions and the following disclaimer. 16 | //! 17 | //! * Redistributions in binary form must reproduce the above copyright notice, 18 | //! this list of conditions and the following disclaimer in the documentation 19 | //! and/or other materials provided with the distribution. 20 | //! 21 | //! * Neither the name of the copyright holder nor the names of its 22 | //! contributors may be used to endorse or promote products derived from 23 | //! this software without specific prior written permission. 24 | //! 25 | //! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | //! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | //! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | //! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 29 | //! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | //! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | //! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | //! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | //! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | //! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | //! 36 | 37 | const std = @import("std"); 38 | const log = std.log.scoped(.idt); 39 | 40 | const am = @import("asm.zig"); 41 | const gdt = @import("gdt.zig"); 42 | 43 | /// Maximum number of gates in the IDT. 44 | pub const max_num_gates = 256; 45 | /// Interrupt Descriptor Table. 46 | var idt: [max_num_gates]GateDescriptor align(4096) = [_]GateDescriptor{std.mem.zeroes(GateDescriptor)} ** max_num_gates; 47 | /// IDT Register. 48 | var idtr = IdtRegister{ 49 | .limit = @sizeOf(@TypeOf(idt)) - 1, 50 | .base = &idt, 51 | }; 52 | 53 | /// ISR signature. 54 | pub const Isr = fn () callconv(.naked) void; 55 | 56 | /// Initialize the IDT. 57 | pub fn init() void { 58 | idtr.base = &idt; 59 | am.lidt(@intFromPtr(&idtr)); 60 | } 61 | 62 | /// Set a gate descriptor in the IDT. 63 | pub fn setGate( 64 | index: usize, 65 | gate_type: GateType, 66 | offset: Isr, 67 | ) void { 68 | idt[index] = GateDescriptor{ 69 | .offset_low = @truncate(@intFromPtr(&offset)), 70 | .seg_selector = gdt.kernel_cs_index << 3, 71 | .gate_type = gate_type, 72 | .offset_middle = @truncate(@as(u64, @intFromPtr(&offset)) >> 16), 73 | .offset_high = @truncate(@as(u64, @intFromPtr(&offset)) >> 32), 74 | .dpl = 0, 75 | }; 76 | } 77 | 78 | /// Entry in the Interrupt Descriptor Table. 79 | pub const GateDescriptor = packed struct(u128) { 80 | /// Lower 16 bits of the offset to the ISR. 81 | offset_low: u16, 82 | /// Segment Selector that must point to a valid code segment in the GDT. 83 | seg_selector: u16, 84 | /// Interrupt Stack Table. Not used. 85 | ist: u3 = 0, 86 | /// Reserved. 87 | _reserved1: u5 = 0, 88 | /// Gate Type. 89 | gate_type: GateType, 90 | /// Reserved. 91 | _reserved2: u1 = 0, 92 | /// Descriptor Privilege Level is the required CPL to call the ISR via the INT inst. 93 | /// Hardware interrupts ignore this field. 94 | dpl: u2, 95 | /// Present flag. Must be 1. 96 | present: bool = true, 97 | /// Middle 16 bits of the offset to the ISR. 98 | offset_middle: u16, 99 | /// Higher 32 bits of the offset to the ISR. 100 | offset_high: u32, 101 | /// Reserved. 102 | _reserved3: u32 = 0, 103 | 104 | pub fn offset(self: GateDescriptor) u64 { 105 | return @as(u64, self.offset_high) << 32 | @as(u64, self.offset_middle) << 16 | @as(u64, self.offset_low); 106 | } 107 | }; 108 | 109 | const IdtRegister = packed struct { 110 | limit: u16, 111 | base: *[max_num_gates]GateDescriptor, 112 | }; 113 | 114 | /// Gate type of the gate descriptor in IDT. 115 | pub const GateType = enum(u4) { 116 | Invalid = 0b0000, 117 | /// Interrupt Gate. 118 | /// Interrupts are disabled when the ISR is called. 119 | Interrupt64 = 0b1110, 120 | /// Trap Gate. 121 | Trap64 = 0b1111, 122 | }; 123 | 124 | const testing = std.testing; 125 | 126 | test "gate descriptor" { 127 | const gate = GateDescriptor{ 128 | .offset_low = 0x1234, 129 | .seg_selector = 0x5678, 130 | .gate_type = .Interrupt64, 131 | .offset_middle = 0x9abc, 132 | .offset_high = 0x0123def0, 133 | .dpl = 0, 134 | }; 135 | 136 | try testing.expectEqual(0x0123def0_9abc_1234, gate.offset()); 137 | } 138 | 139 | test "IDTR limit" { 140 | try testing.expectEqual(256 * 16 - 1, idtr.limit); 141 | } 142 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/cpuid.zig: -------------------------------------------------------------------------------- 1 | //! Handle CPUID instruction. 2 | //! Information returned by CPUID instruction is listed in SDM Chapter 3.3 Table 3-8. 3 | 4 | const std = @import("std"); 5 | const log = std.log.scoped(.vmcpuid); 6 | 7 | const arch = @import("arch.zig"); 8 | const cpuid = arch.cpuid; 9 | 10 | const vmx = @import("common.zig"); 11 | const Vcpu = @import("vcpu.zig").Vcpu; 12 | 13 | const VmxError = vmx.VmxError; 14 | const Leaf = cpuid.Leaf; 15 | 16 | var feature_info_ecx = cpuid.FeatureInfoEcx{ 17 | .pcid = true, 18 | }; 19 | const feature_info_edx = cpuid.FeatureInfoEdx{ 20 | .fpu = true, 21 | .vme = true, 22 | .de = true, 23 | .pse = true, 24 | .msr = true, 25 | .pae = true, 26 | .cx8 = true, 27 | .sep = true, 28 | .pge = true, 29 | .cmov = true, 30 | .pse36 = true, 31 | .acpi = false, 32 | .fxsr = true, 33 | .sse = true, 34 | .sse2 = true, 35 | }; 36 | const ext_feature0_ebx = cpuid.ExtFeatureEbx0{ 37 | .fsgsbase = false, // NOTE: rdfsbase seemingly cannot be intercepted. 38 | .smep = true, 39 | .invpcid = true, 40 | .smap = true, 41 | }; 42 | 43 | /// Handle VM-exit caused by CPUID instruction. 44 | /// Note that this function does not increment the RIP. 45 | pub fn handleCpuidExit(vcpu: *Vcpu) VmxError!void { 46 | const regs = &vcpu.guest_regs; 47 | 48 | switch (Leaf.from(regs.rax)) { 49 | .maximum_input => { 50 | setValue(®s.rax, 0x20); // Maximum input value for basic CPUID. 51 | setValue(®s.rbx, 0x72_69_6D_59); // Ymir 52 | setValue(®s.rcx, 0x72_69_6D_59); // Ymir 53 | setValue(®s.rdx, 0x72_69_6D_59); // Ymir 54 | }, 55 | .vers_and_feat_info => { 56 | const orig = Leaf.query(.vers_and_feat_info, null); 57 | 58 | // BUG: Intel Core CPU, since Alder Lake (12th Gen), 59 | // has a bug that INVLPG does not flush global translatinos. 60 | // To mitigate it, Linux kernel 6.4 and later disables PCID. 61 | // So check if PCID is supported for Ymir, and if not, disable it also for the guest. 62 | // 63 | // See: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=x86/urgent&id=ce0b15d11ad837fbacc5356941712218e38a0a83 64 | const feat: cpuid.FeatureInfoEcx = @bitCast(cpuid.Leaf.from(0).query(null).ecx); 65 | if (!feat.pcid) { 66 | feature_info_ecx.pcid = false; 67 | } 68 | 69 | setValue(®s.rax, orig.eax); // Version information. 70 | setValue(®s.rbx, orig.ebx); // Brand index / CLFLUSH line size / Addressable IDs / Initial APIC ID 71 | setValue(®s.rcx, @as(u32, @bitCast(feature_info_ecx))); 72 | setValue(®s.rdx, @as(u32, @bitCast(feature_info_edx))); 73 | }, 74 | .ext_func => { 75 | setValue(®s.rax, 0x8000_0000 + 1); // Maximum input value for extended function CPUID. 76 | setValue(®s.rbx, 0); // Reserved. 77 | setValue(®s.rcx, 0); // Reserved. 78 | setValue(®s.rdx, 0); // Reserved. 79 | }, 80 | .ext_proc_signature => { 81 | const orig = Leaf.ext_proc_signature.query(null); 82 | setValue(®s.rax, 0); // Extended processor signature and feature bits. 83 | setValue(®s.rbx, 0); // Reserved. 84 | setValue(®s.rcx, orig.ecx); // LAHF in 64-bit mode / LZCNT / PREFETCHW 85 | setValue(®s.rdx, orig.edx); // SYSCALL / XD / 1GB large page / RDTSCP and IA32_TSC_AUX / Intel64 86 | }, 87 | .thermal_power => invalid(vcpu), 88 | .ext_feature => { 89 | switch (regs.rcx) { 90 | 0 => { 91 | setValue(®s.rax, 1); // Maximum input value for supported leaf 7 sub-leaves. 92 | setValue(®s.rbx, @as(u32, @bitCast(ext_feature0_ebx))); 93 | setValue(®s.rcx, 0); // Unimplemented. 94 | setValue(®s.rdx, 0); // Unimplemented. 95 | }, 96 | 1, 2 => invalid(vcpu), 97 | else => { 98 | log.err("Unhandled CPUID: Leaf=0x{X:0>8}, Sub=0x{X:0>8}", .{ regs.rax, regs.rcx }); 99 | vcpu.abort(); 100 | }, 101 | } 102 | }, 103 | .ext_enumeration => { 104 | switch (regs.rcx) { 105 | 1 => invalid(vcpu), 106 | else => { 107 | log.err("Unhandled CPUID: Leaf=0x{X:0>8}, Sub=0x{X:0>8}", .{ regs.rax, regs.rcx }); 108 | vcpu.abort(); 109 | }, 110 | } 111 | }, 112 | _ => { 113 | log.warn("Unhandled CPUID: Leaf=0x{X:0>8}, Sub=0x{X:0>8}", .{ regs.rax, regs.rcx }); 114 | invalid(vcpu); 115 | }, 116 | } 117 | } 118 | 119 | /// Set a 32-bit value to the given 64-bit without modifying the upper 32-bits. 120 | inline fn setValue(reg: *u64, val: u64) void { 121 | @as(*u32, @ptrCast(reg)).* = @as(u32, @truncate(val)); 122 | } 123 | 124 | /// Set an invalid value to the registers. 125 | fn invalid(vcpu: *Vcpu) void { 126 | const gregs = &vcpu.guest_regs; 127 | setValue(&gregs.rax, 0); 128 | setValue(&gregs.rbx, 0); 129 | setValue(&gregs.rcx, 0); 130 | setValue(&gregs.rdx, 0); 131 | } 132 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/cr.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log.scoped(.cr); 3 | 4 | const arch = @import("arch.zig"); 5 | const am = arch.am; 6 | 7 | const vmx = @import("common.zig"); 8 | const vmcs = @import("vmcs.zig"); 9 | 10 | const Vcpu = @import("vcpu.zig").Vcpu; 11 | const VmxError = vmx.VmxError; 12 | const QualCr = vmx.qual.QualCr; 13 | 14 | /// Handle VM-exit caused by mov to CR3 instruction. 15 | /// Note that this function does not increment the RIP. 16 | pub fn handleAccessCr(vcpu: *Vcpu, qual: QualCr) VmxError!void { 17 | switch (qual.access_type) { 18 | .mov_to => { 19 | switch (qual.index) { 20 | 0, 4 => { 21 | try passthroughWrite(vcpu, qual); 22 | try updateIa32e(vcpu); 23 | }, 24 | 3 => { 25 | const val = try getValue(vcpu, qual); 26 | // NOTE: If CR3.PCIDE is set and bit 63 of `val` is 1, it means the operation is not necessarily invalidate TLBs. 27 | // However, SDM says that the bit 63 MUST be cleared even if CR4.PCIDE is set. 28 | try vmx.vmwrite(vmcs.guest.cr3, val & ~@as(u64, (1 << 63))); 29 | // Invalidate the combined mappings (GVA to HPA mappings) 30 | am.invvpid(.single_context, vcpu.vpid); 31 | }, 32 | else => try passthroughWrite(vcpu, qual), 33 | } 34 | }, 35 | .mov_from => try passthroughRead(vcpu, qual), 36 | else => { 37 | log.err("Unimplemented CR access: {?}", .{qual}); 38 | vcpu.abort(); 39 | }, 40 | } 41 | } 42 | 43 | /// Update IA-32e mode of the vCPU. 44 | fn updateIa32e(vcpu: *Vcpu) VmxError!void { 45 | const cr0: am.Cr0 = @bitCast(try vmx.vmread(vmcs.guest.cr0)); 46 | const cr4: am.Cr4 = @bitCast(try vmx.vmread(vmcs.guest.cr4)); 47 | const ia32e_enabled = cr0.pg and cr4.pae; 48 | 49 | vcpu.ia32_enabled = ia32e_enabled; 50 | 51 | var entry_ctrl = try vmcs.EntryCtrl.store(); 52 | entry_ctrl.ia32e_mode_guest = ia32e_enabled; 53 | try entry_ctrl.load(); 54 | 55 | var efer: am.Efer = @bitCast(try vmx.vmread(vmcs.guest.efer)); 56 | efer.lma = vcpu.ia32_enabled; 57 | efer.lme = if (cr0.pg) efer.lma else efer.lme; 58 | try vmx.vmwrite(vmcs.guest.efer, efer); 59 | } 60 | 61 | fn passthroughRead(vcpu: *Vcpu, qual: QualCr) VmxError!void { 62 | const value = switch (qual.index) { 63 | 0 => try vmx.vmread(vmcs.guest.cr0), 64 | 3 => try vmx.vmread(vmcs.guest.cr3), 65 | 4 => try vmx.vmread(vmcs.guest.cr4), 66 | else => { 67 | log.err("Unhandled CR read: {}", .{qual.index}); 68 | vcpu.abort(); 69 | }, 70 | }; 71 | 72 | try setValue(vcpu, qual, value); 73 | } 74 | 75 | fn passthroughWrite(vcpu: *Vcpu, qual: QualCr) VmxError!void { 76 | const value = try getValue(vcpu, qual); 77 | switch (qual.index) { 78 | 0 => { 79 | try vmx.vmwrite(vmcs.guest.cr0, adjustCr0(value)); 80 | try vmx.vmwrite(vmcs.ctrl.cr0_read_shadow, value); 81 | }, 82 | 4 => { 83 | try vmx.vmwrite(vmcs.guest.cr4, adjustCr4(value)); 84 | try vmx.vmwrite(vmcs.ctrl.cr4_read_shadow, value); 85 | }, 86 | else => { 87 | log.err("Unhandled CR write to: {}", .{qual.index}); 88 | vcpu.abort(); 89 | }, 90 | } 91 | } 92 | 93 | fn getValue(vcpu: *Vcpu, qual: QualCr) VmxError!u64 { 94 | const gregs = &vcpu.guest_regs; 95 | return switch (qual.reg) { 96 | .rax => gregs.rax, 97 | .rcx => gregs.rcx, 98 | .rdx => gregs.rdx, 99 | .rbx => gregs.rbx, 100 | .rbp => gregs.rbp, 101 | .rsi => gregs.rsi, 102 | .rdi => gregs.rdi, 103 | .r8 => gregs.r8, 104 | .r9 => gregs.r9, 105 | .r10 => gregs.r10, 106 | .r11 => gregs.r11, 107 | .r12 => gregs.r12, 108 | .r13 => gregs.r13, 109 | .r14 => gregs.r14, 110 | .r15 => gregs.r15, 111 | .rsp => try vmx.vmread(vmcs.guest.rsp), 112 | }; 113 | } 114 | 115 | fn setValue(vcpu: *Vcpu, qual: QualCr, value: u64) VmxError!void { 116 | const gregs = &vcpu.guest_regs; 117 | switch (qual.reg) { 118 | .rax => gregs.rax = value, 119 | .rcx => gregs.rcx = value, 120 | .rdx => gregs.rdx = value, 121 | .rbx => gregs.rbx = value, 122 | .rbp => gregs.rbp = value, 123 | .rsi => gregs.rsi = value, 124 | .rdi => gregs.rdi = value, 125 | .r8 => gregs.r8 = value, 126 | .r9 => gregs.r9 = value, 127 | .r10 => gregs.r10 = value, 128 | .r11 => gregs.r11 = value, 129 | .r12 => gregs.r12 = value, 130 | .r13 => gregs.r13 = value, 131 | .r14 => gregs.r14 = value, 132 | .r15 => gregs.r15 = value, 133 | .rsp => try vmx.vmwrite(vmcs.guest.rsp, value), 134 | } 135 | } 136 | 137 | fn adjustCr0(value: u64) u64 { 138 | var ret: u64 = @bitCast(value); 139 | const vmx_cr0_fixed0: u32 = @truncate(am.readMsr(.vmx_cr0_fixed0)); 140 | const vmx_cr0_fixed1: u32 = @truncate(am.readMsr(.vmx_cr0_fixed1)); 141 | 142 | ret |= vmx_cr0_fixed0; 143 | ret &= vmx_cr0_fixed1; 144 | 145 | return ret; 146 | } 147 | 148 | fn adjustCr4(value: u64) u64 { 149 | var ret: u64 = @bitCast(value); 150 | const vmx_cr4_fixed0: u32 = @truncate(am.readMsr(.vmx_cr4_fixed0)); 151 | const vmx_cr4_fixed1: u32 = @truncate(am.readMsr(.vmx_cr4_fixed1)); 152 | 153 | ret |= vmx_cr4_fixed0; 154 | ret &= vmx_cr4_fixed1; 155 | 156 | return ret; 157 | } 158 | -------------------------------------------------------------------------------- /ymir/arch/x86/isr.zig: -------------------------------------------------------------------------------- 1 | //! LICENSE NOTICE 2 | //! 3 | //! The impletentation is heavily inspired by https://github.com/AndreaOrru/zen 4 | //! Original LICENSE follows: 5 | //! 6 | //! BSD 3-Clause License 7 | //! 8 | //! Copyright (c) 2017, Andrea Orru 9 | //! All rights reserved. 10 | //! 11 | //! Redistribution and use in source and binary forms, with or without 12 | //! modification, are permitted provided that the following conditions are met: 13 | //! 14 | //! * Redistributions of source code must retain the above copyright notice, this 15 | //! list of conditions and the following disclaimer. 16 | //! 17 | //! * Redistributions in binary form must reproduce the above copyright notice, 18 | //! this list of conditions and the following disclaimer in the documentation 19 | //! and/or other materials provided with the distribution. 20 | //! 21 | //! * Neither the name of the copyright holder nor the names of its 22 | //! contributors may be used to endorse or promote products derived from 23 | //! this software without specific prior written permission. 24 | //! 25 | //! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | //! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | //! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | //! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 29 | //! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | //! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | //! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | //! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | //! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | //! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | //! 36 | //! 37 | 38 | const std = @import("std"); 39 | const log = std.log.scoped(.isr); 40 | 41 | const intr = @import("interrupt.zig"); 42 | const idt = @import("idt.zig"); 43 | 44 | // Execution Context 45 | pub const Context = packed struct { 46 | /// General purpose registers. 47 | registers: Registers, 48 | /// Interrupt Vector. 49 | vector: u64, 50 | /// Error Code. 51 | error_code: u64, 52 | 53 | // CPU status: 54 | rip: u64, 55 | cs: u64, 56 | rflags: u64, 57 | }; 58 | 59 | /// Structure holding general purpose registers as saved by PUSHA. 60 | const Registers = packed struct { 61 | r8: u64, 62 | r9: u64, 63 | r10: u64, 64 | r11: u64, 65 | r12: u64, 66 | r13: u64, 67 | r14: u64, 68 | r15: u64, 69 | rdi: u64, 70 | rsi: u64, 71 | rbp: u64, 72 | rsp: u64, 73 | rbx: u64, 74 | rdx: u64, 75 | rcx: u64, 76 | rax: u64, 77 | }; 78 | 79 | /// Zig entry point of the interrupt handler. 80 | export fn intrZigEntry(ctx: *Context) callconv(.c) void { 81 | intr.dispatch(ctx); 82 | } 83 | 84 | /// Get ISR function for the given vector. 85 | pub fn generateIsr(comptime vector: usize) idt.Isr { 86 | return struct { 87 | fn handler() callconv(.naked) void { 88 | // Clear the interrupt flag. 89 | asm volatile ( 90 | \\cli 91 | ); 92 | 93 | // If the interrupt does not provide an error code, push a dummy one. 94 | if (vector != 8 and !(vector >= 10 and vector <= 14) and vector != 17) { 95 | asm volatile ( 96 | \\pushq $0 97 | ); 98 | } 99 | 100 | // Push the vector. 101 | asm volatile ( 102 | \\pushq %[vector] 103 | : 104 | : [vector] "n" (vector), 105 | ); 106 | // Jump to the common ISR. 107 | asm volatile ( 108 | \\jmp isrCommon 109 | ); 110 | } 111 | }.handler; 112 | } 113 | 114 | /// Common stub for all ISR, that all the ISRs will use. 115 | /// This function assumes that `Context` is saved at the top of the stack except for general-purpose registers. 116 | export fn isrCommon() callconv(.naked) void { 117 | // Save the general-purpose registers. 118 | asm volatile ( 119 | \\pushq %%rax 120 | \\pushq %%rcx 121 | \\pushq %%rdx 122 | \\pushq %%rbx 123 | \\pushq %%rsp 124 | \\pushq %%rbp 125 | \\pushq %%rsi 126 | \\pushq %%rdi 127 | \\pushq %%r15 128 | \\pushq %%r14 129 | \\pushq %%r13 130 | \\pushq %%r12 131 | \\pushq %%r11 132 | \\pushq %%r10 133 | \\pushq %%r9 134 | \\pushq %%r8 135 | ); 136 | 137 | // Push the context and call the handler. 138 | asm volatile ( 139 | \\pushq %%rsp 140 | \\popq %%rdi 141 | // Align stack to 16 bytes. 142 | \\pushq %%rsp 143 | \\pushq (%%rsp) 144 | \\andq $-0x10, %%rsp 145 | // Call the dispatcher. 146 | \\call intrZigEntry 147 | // Restore the stack. 148 | \\movq 8(%%rsp), %%rsp 149 | ); 150 | 151 | // Remove general-purpose registers, error code, and vector from the stack. 152 | asm volatile ( 153 | \\popq %%r8 154 | \\popq %%r9 155 | \\popq %%r10 156 | \\popq %%r11 157 | \\popq %%r12 158 | \\popq %%r13 159 | \\popq %%r14 160 | \\popq %%r15 161 | \\popq %%rdi 162 | \\popq %%rsi 163 | \\popq %%rbp 164 | \\popq %%rsp 165 | \\popq %%rbx 166 | \\popq %%rdx 167 | \\popq %%rcx 168 | \\popq %%rax 169 | \\add $0x10, %%rsp 170 | \\iretq 171 | ); 172 | } 173 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/msr.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log.scoped(.vmmsr); 3 | const Allocator = std.mem.Allocator; 4 | 5 | const ymir = @import("ymir"); 6 | const mem = ymir.mem; 7 | 8 | const arch = @import("arch.zig"); 9 | const am = arch.am; 10 | 11 | const vmx = @import("common.zig"); 12 | const vmcs = @import("vmcs.zig"); 13 | 14 | const Vcpu = @import("vcpu.zig").Vcpu; 15 | const VmxError = vmx.VmxError; 16 | 17 | /// Handle VM-exit caused by RDMSR instruction. 18 | /// Note that this function does not increment the RIP. 19 | pub fn handleRdmsrExit(vcpu: *Vcpu) VmxError!void { 20 | const guest_regs = &vcpu.guest_regs; 21 | const msr_kind: am.Msr = @enumFromInt(guest_regs.rcx); 22 | 23 | switch (msr_kind) { 24 | .apic_base => setRetVal(vcpu, std.math.maxInt(u64)), 25 | .efer => setRetVal(vcpu, try vmx.vmread(vmcs.guest.efer)), 26 | .fs_base => setRetVal(vcpu, try vmx.vmread(vmcs.guest.fs_base)), 27 | .gs_base => setRetVal(vcpu, try vmx.vmread(vmcs.guest.gs_base)), 28 | .kernel_gs_base => shadowRead(vcpu, msr_kind), 29 | else => { 30 | log.err("Unhandled RDMSR: {?}", .{msr_kind}); 31 | vcpu.abort(); 32 | }, 33 | } 34 | } 35 | 36 | /// Handle VM-exit caused by WRMSR instruction. 37 | /// Note that this function does not increment the RIP. 38 | pub fn handleWrmsrExit(vcpu: *Vcpu) VmxError!void { 39 | const regs = &vcpu.guest_regs; 40 | const value = concat(regs.rdx, regs.rax); 41 | const msr_kind: am.Msr = @enumFromInt(regs.rcx); 42 | 43 | switch (msr_kind) { 44 | .star, 45 | .lstar, 46 | .cstar, 47 | .tsc_aux, 48 | .fmask, 49 | .kernel_gs_base, 50 | => shadowWrite(vcpu, msr_kind), 51 | .sysenter_cs => try vmx.vmwrite(vmcs.guest.sysenter_cs, value), 52 | .sysenter_eip => try vmx.vmwrite(vmcs.guest.sysenter_eip, value), 53 | .sysenter_esp => try vmx.vmwrite(vmcs.guest.sysenter_esp, value), 54 | .efer => try vmx.vmwrite(vmcs.guest.efer, value), 55 | .gs_base => try vmx.vmwrite(vmcs.guest.gs_base, value), 56 | .fs_base => try vmx.vmwrite(vmcs.guest.fs_base, value), 57 | else => { 58 | log.err("Unhandled WRMSR: {?}", .{msr_kind}); 59 | vcpu.abort(); 60 | }, 61 | } 62 | } 63 | 64 | /// Concatnate two 32-bit values into a 64-bit value. 65 | fn concat(r1: u64, r2: u64) u64 { 66 | return ((r1 & 0xFFFF_FFFF) << 32) | (r2 & 0xFFFF_FFFF); 67 | } 68 | 69 | /// Set the 64-bit return value to the guest registers without modifying upper 32-bits. 70 | fn setRetVal(vcpu: *Vcpu, val: u64) void { 71 | const regs = &vcpu.guest_regs; 72 | @as(*u32, @ptrCast(®s.rdx)).* = @as(u32, @truncate(val >> 32)); 73 | @as(*u32, @ptrCast(®s.rax)).* = @as(u32, @truncate(val)); 74 | } 75 | 76 | /// Read from the shadow MSR. 77 | fn shadowRead(vcpu: *Vcpu, msr_kind: am.Msr) void { 78 | if (vcpu.guest_msr.find(msr_kind)) |msr| { 79 | setRetVal(vcpu, msr.data); 80 | } else { 81 | log.err("RDMSR: MSR is not registered: {s}", .{@tagName(msr_kind)}); 82 | vcpu.abort(); 83 | } 84 | } 85 | 86 | /// Write to the shadow MSR. 87 | fn shadowWrite(vcpu: *Vcpu, msr_kind: am.Msr) void { 88 | const regs = &vcpu.guest_regs; 89 | if (vcpu.guest_msr.find(msr_kind)) |_| { 90 | vcpu.guest_msr.set(msr_kind, concat(regs.rdx, regs.rax)); 91 | } else { 92 | log.err("WRMSR: MSR is not registered: {s}", .{@tagName(msr_kind)}); 93 | vcpu.abort(); 94 | } 95 | } 96 | 97 | /// Shadow MSR page. 98 | pub const ShadowMsr = struct { 99 | /// Maximum number of MSR entries in a page. 100 | const max_num_ents = 512; 101 | 102 | /// MSR entries. 103 | ents: []SavedMsr, 104 | /// Number of registered MSR entries. 105 | num_ents: usize = 0, 106 | 107 | /// MSR Entry. 108 | /// cf. SDM Vol.3C. 25.7.2. Table 25-15. 109 | pub const SavedMsr = packed struct(u128) { 110 | index: u32, 111 | reserved: u32 = 0, 112 | data: u64, 113 | }; 114 | 115 | /// Initialize saved MSR page. 116 | pub fn init(allocator: Allocator) !ShadowMsr { 117 | const ents = try allocator.alloc(SavedMsr, max_num_ents); 118 | @memset(ents, std.mem.zeroes(SavedMsr)); 119 | 120 | return ShadowMsr{ 121 | .ents = ents, 122 | }; 123 | } 124 | 125 | /// Register or update MSR entry. 126 | pub fn set(self: *ShadowMsr, index: am.Msr, data: u64) void { 127 | return self.setByIndex(@intFromEnum(index), data); 128 | } 129 | 130 | /// Register or update MSR entry indexed by `index`. 131 | pub fn setByIndex(self: *ShadowMsr, index: u32, data: u64) void { 132 | for (0..self.num_ents) |i| { 133 | if (self.ents[i].index == index) { 134 | self.ents[i].data = data; 135 | return; 136 | } 137 | } 138 | self.ents[self.num_ents] = SavedMsr{ .index = index, .data = data }; 139 | self.num_ents += 1; 140 | if (self.num_ents > max_num_ents) { 141 | @panic("Too many MSR entries registered."); 142 | } 143 | } 144 | 145 | /// Get the saved MSRs. 146 | pub fn savedEnts(self: *ShadowMsr) []SavedMsr { 147 | return self.ents[0..self.num_ents]; 148 | } 149 | 150 | /// Find the saved MSR entry. 151 | pub fn find(self: *ShadowMsr, index: am.Msr) ?*SavedMsr { 152 | const index_num = @intFromEnum(index); 153 | for (0..self.num_ents) |i| { 154 | if (self.ents[i].index == index_num) { 155 | return &self.ents[i]; 156 | } 157 | } 158 | return null; 159 | } 160 | 161 | /// Get the host physical address of the MSR page. 162 | pub fn phys(self: *ShadowMsr) u64 { 163 | return mem.virt2phys(self.ents.ptr); 164 | } 165 | }; 166 | -------------------------------------------------------------------------------- /ymir/mem.zig: -------------------------------------------------------------------------------- 1 | //! Ymir has three memory regions. 2 | //! - Initial direct mapping 3 | //! - Direct mapping 4 | //! - Kernel text mapping 5 | //! 6 | //! Initial direct mapping is used until page tables provided by UEFI are reconstructed. 7 | //! It directly maps entire VA to PA without offset. 8 | //! After the UEFI page tables are cloned and new tables are created, the second direct mapping is used. 9 | //! The direct mapping maps entire memory with offset of `ymir.direct_map_base`. 10 | //! At the same time, kernel text is mapped to `ymir.kernel_base` as the ELF image requests. 11 | //! That means the kernel image is mapped to two VA: direct mapping and kernel text mapping. 12 | //! Page allocator allocates pages from the direct mapping region. 13 | //! 14 | //! While the initial direct mapping is in use, VA is equal to PA. 15 | //! After the initial direct mapping is discarded, VA-to-PA translation is done by simple calculation. 16 | //! If the VA is in the direct mapping region, the PA can be calculated by subtracting the base address. 17 | //! If the VA is in the kernel text mapping region, the PA can be calculated by subtracting the kernel base. 18 | 19 | const std = @import("std"); 20 | const builtin = @import("builtin"); 21 | const atomic = std.atomic; 22 | const Allocator = std.mem.Allocator; 23 | const log = std.log.scoped(.mem); 24 | const surtr = @import("surtr"); 25 | const MemoryMap = surtr.MemoryMap; 26 | 27 | const ymir = @import("ymir"); 28 | const arch = ymir.arch; 29 | 30 | /// Page allocator. 31 | pub const page_allocator = Allocator{ 32 | .ptr = &page_allocator_instance, 33 | .vtable = &PageAllocator.vtable, 34 | }; 35 | /// General memory allocator. 36 | pub const general_allocator = Allocator{ 37 | .ptr = &bin_allocator_instance, 38 | .vtable = &BinAllocator.vtable, 39 | }; 40 | 41 | pub const PageAllocator = @import("mem/PageAllocator.zig"); 42 | /// Page allocator instance. 43 | /// You should use this allocator via `page_allocator` interface. 44 | pub var page_allocator_instance = PageAllocator.newUninit(); 45 | 46 | const BinAllocator = @import("mem/BinAllocator.zig"); 47 | var bin_allocator_instance = BinAllocator.newUninit(); 48 | 49 | /// Physical address. 50 | pub const Phys = u64; 51 | /// Virtual address. 52 | pub const Virt = u64; 53 | 54 | pub const kib = 1024; 55 | pub const mib = 1024 * kib; 56 | pub const gib = 1024 * mib; 57 | 58 | pub const page_size: u64 = page_size_4k; 59 | pub const page_shift: u64 = page_shift_4k; 60 | pub const page_mask: u64 = page_mask_4k; 61 | 62 | /// Size in bytes of a 4K page. 63 | pub const page_size_4k = 4 * kib; 64 | /// Size in bytes of a 2M page. 65 | pub const page_size_2mb = page_size_4k << 9; 66 | /// Size in bytes of a 1G page. 67 | pub const page_size_1gb = page_size_2mb << 9; 68 | /// Shift in bits for a 4K page. 69 | pub const page_shift_4k = 12; 70 | /// Shift in bits for a 2M page. 71 | pub const page_shift_2mb = 21; 72 | /// Shift in bits for a 1G page. 73 | pub const page_shift_1gb = 30; 74 | /// Mask for a 4K page. 75 | pub const page_mask_4k: u64 = page_size_4k - 1; 76 | /// Mask for a 2M page. 77 | pub const page_mask_2mb: u64 = page_size_2mb - 1; 78 | /// Mask for a 1G page. 79 | pub const page_mask_1gb: u64 = page_size_1gb - 1; 80 | 81 | /// Status of the page remap. 82 | var mapping_reconstructed = atomic.Value(bool).init(false); 83 | 84 | /// Initialize the page allocator. 85 | /// You MUST call this function before using `page_allocator`. 86 | pub fn initPageAllocator(map: MemoryMap) void { 87 | page_allocator_instance.init(map); 88 | } 89 | 90 | /// Initialize the general allocator. 91 | /// You mUST call this function before using `general_allocator`. 92 | pub fn initGeneralAllocator() void { 93 | bin_allocator_instance.init(page_allocator); 94 | } 95 | 96 | /// Check if the address is canonical form. 97 | /// If the architecture does not have a concept of canonical form, this function always returns true. 98 | pub const isCanonical = switch (builtin.target.cpu.arch) { 99 | .x86_64 => arch.page.isCanonical, 100 | else => @compileError("Unsupported architecture."), 101 | }; 102 | 103 | /// Discard the initial direct mapping and construct Ymir's page tables. 104 | /// It creates two mappings: direct mapping and kernel text mapping. 105 | /// For the detail, refer to this module documentation. 106 | pub fn reconstructMapping(allocator: Allocator) !void { 107 | arch.disableIntr(); 108 | defer arch.enableIntr(); 109 | 110 | try arch.page.reconstruct(allocator); 111 | 112 | // Remap pages. 113 | mapping_reconstructed.store(true, .release); 114 | 115 | // Notify that BootServicesData region is no longer needed. 116 | page_allocator_instance.discardBootService(); 117 | } 118 | 119 | /// Translate the given virtual address to physical address. 120 | /// This function just use simple calculation and does not walk page tables. 121 | /// To do page table walk, use arch-specific functions. 122 | pub fn virt2phys(addr: anytype) Phys { 123 | const value = switch (@typeInfo(@TypeOf(addr))) { 124 | .int, .comptime_int => @as(u64, addr), 125 | .pointer => @as(u64, @intFromPtr(addr)), 126 | else => @compileError("phys2virt: invalid type"), 127 | }; 128 | return if (!mapping_reconstructed.load(.acquire)) b: { 129 | break :b value; 130 | } else if (value < ymir.kernel_base) b: { 131 | // Direct mapping region. 132 | break :b value - ymir.direct_map_base; 133 | } else b: { 134 | // Kernel image mapping region. 135 | break :b value - ymir.kernel_base; 136 | }; 137 | } 138 | 139 | /// Translate the given physical address to virtual address. 140 | /// This function just use simple calculation and does not walk page tables. 141 | /// To do page table walk, use arch-specific functions. 142 | pub fn phys2virt(addr: anytype) Virt { 143 | const value = switch (@typeInfo(@TypeOf(addr))) { 144 | .int, .comptime_int => @as(u64, addr), 145 | .pointer => @as(u64, @intFromPtr(addr)), 146 | else => @compileError("phys2virt: invalid type"), 147 | }; 148 | return if (!mapping_reconstructed.load(.acquire)) b: { 149 | break :b value; 150 | } else b: { 151 | break :b value + ymir.direct_map_base; 152 | }; 153 | } 154 | 155 | // ======================================== 156 | 157 | const testing = std.testing; 158 | 159 | test { 160 | testing.refAllDeclsRecursive(@This()); 161 | } 162 | 163 | test "address translation" { 164 | const direct_map_base = ymir.direct_map_base; 165 | const kernel_base = ymir.kernel_base; 166 | 167 | mapping_reconstructed.store(true, .release); 168 | 169 | // virt -> phys 170 | try testing.expectEqual(0x0, virt2phys(direct_map_base)); 171 | try testing.expectEqual(0x100, virt2phys(direct_map_base + 0x100)); 172 | try testing.expectEqual(page_size * 0x100, virt2phys(direct_map_base + page_size * 0x100)); 173 | try testing.expectEqual(kernel_base - direct_map_base - 1, virt2phys(kernel_base - 1)); 174 | try testing.expectEqual(0, virt2phys(kernel_base)); 175 | try testing.expectEqual(0x100000, virt2phys(kernel_base + 0x100000)); 176 | 177 | // phys -> virt 178 | try testing.expectEqual(direct_map_base, phys2virt(0x0)); 179 | try testing.expectEqual(direct_map_base + 0x100, phys2virt(0x100)); 180 | } 181 | -------------------------------------------------------------------------------- /ymir/mem/BinAllocator.zig: -------------------------------------------------------------------------------- 1 | //! General purpose allocator. 2 | 3 | const std = @import("std"); 4 | const Allocator = std.mem.Allocator; 5 | const Alignment = std.mem.Alignment; 6 | const spin = @import("ymir").spin; 7 | 8 | const Self = @This(); 9 | 10 | pub const vtable = Allocator.VTable{ 11 | .alloc = allocate, 12 | .free = free, 13 | .resize = resize, 14 | .remap = remap, 15 | }; 16 | 17 | const bin_sizes = [_]usize{ 18 | 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 19 | }; 20 | 21 | comptime { 22 | if (bin_sizes[0] < @sizeOf(ChunkMetaNode)) { 23 | @compileError("The smallest bin size is smaller than the size of ChunkMetaNode"); 24 | } 25 | if (bin_sizes[bin_sizes.len - 1] > 4096) { 26 | @compileError("The largest bin size exceeds a 4KiB page size"); 27 | } 28 | } 29 | 30 | /// Backing page allocator. 31 | page_allocator: Allocator, 32 | /// Heads of the chunk lists. 33 | list_heads: [bin_sizes.len]ChunkMetaPointer, 34 | /// Spin lock. 35 | lock: spin.SpinLock = spin.SpinLock{}, 36 | 37 | /// Get a instance of BinAllocator without initialization. 38 | pub fn newUninit() Self { 39 | return Self{ 40 | .page_allocator = undefined, 41 | .list_heads = undefined, 42 | .lock = spin.SpinLock{}, 43 | }; 44 | } 45 | 46 | /// Initialize the BinAllocator. 47 | pub fn init(self: *Self, page_allocator: Allocator) void { 48 | self.page_allocator = page_allocator; 49 | @memset(self.list_heads[0..self.list_heads.len], null); 50 | self.lock = spin.SpinLock{}; 51 | } 52 | 53 | /// Get the bin index for the given size. 54 | /// If the size exceeds the largest bin size, return null. 55 | fn binIndex(size: usize) ?usize { 56 | for (bin_sizes, 0..) |bin_size, i| { 57 | if (size <= bin_size) { 58 | return i; 59 | } 60 | } 61 | return null; 62 | } 63 | 64 | fn allocFromBin(self: *Self, bin_index: usize) ?[*]u8 { 65 | const mask = self.lock.lockSaveIrq(); 66 | defer self.lock.unlockRestoreIrq(mask); 67 | 68 | if (self.list_heads[bin_index] == null) { 69 | initBinPage(self, bin_index) orelse return null; 70 | } 71 | return @ptrCast(pop(&self.list_heads[bin_index])); 72 | } 73 | 74 | fn freeToBin(self: *Self, bin_index: usize, ptr: [*]u8) void { 75 | const mask = self.lock.lockSaveIrq(); 76 | defer self.lock.unlockRestoreIrq(mask); 77 | 78 | const chunk: *ChunkMetaNode = @alignCast(@ptrCast(ptr)); 79 | push(&self.list_heads[bin_index], chunk); 80 | } 81 | 82 | fn initBinPage(self: *Self, bin_index: usize) ?void { 83 | const new_page = self.page_allocator.alloc(u8, 4096) catch return null; 84 | const bin_size = bin_sizes[bin_index]; 85 | 86 | var i: usize = 4096 / bin_size - 1; 87 | while (true) : (i -= 1) { 88 | const chunk: *ChunkMetaNode = @ptrFromInt(@intFromPtr(new_page.ptr) + i * bin_size); 89 | push(&self.list_heads[bin_index], chunk); 90 | 91 | if (i == 0) break; 92 | } 93 | } 94 | 95 | fn push(list_head: *ChunkMetaPointer, node: *ChunkMetaNode) void { 96 | if (list_head.*) |next| { 97 | node.next = next; 98 | list_head.* = node; 99 | } else { 100 | list_head.* = node; 101 | node.next = null; 102 | } 103 | } 104 | 105 | fn pop(list_head: *ChunkMetaPointer) *ChunkMetaNode { 106 | if (list_head.*) |first| { 107 | list_head.* = first.next; 108 | return first; 109 | } else { 110 | @panic("BinAllocator: pop from empty list"); 111 | } 112 | } 113 | 114 | fn allocate(ctx: *anyopaque, n: usize, log2_align: Alignment, _: usize) ?[*]u8 { 115 | const self: *Self = @alignCast(@ptrCast(ctx)); 116 | 117 | const ptr_align = log2_align.toByteUnits(); 118 | const bin_index = binIndex(@max(ptr_align, n)); 119 | 120 | if (bin_index) |index| { 121 | return self.allocFromBin(index); 122 | } else { 123 | // Requested size including alignment exceeds a 4KiB page size. 124 | // Zig's Allocator does not assume an align larger than a page size. 125 | // So we can safely ignore the alignment, ang just return for requested size. 126 | const ret = self.page_allocator.alloc(u8, n) catch return null; 127 | return @ptrCast(ret.ptr); 128 | } 129 | } 130 | 131 | fn free(ctx: *anyopaque, slice: []u8, log2_align: Alignment, _: usize) void { 132 | const self: *Self = @alignCast(@ptrCast(ctx)); 133 | 134 | const ptr_align = log2_align.toByteUnits(); 135 | const bin_index = binIndex(@max(ptr_align, slice.len)); 136 | 137 | if (bin_index) |index| { 138 | self.freeToBin(index, @ptrCast(slice.ptr)); 139 | } else { 140 | self.page_allocator.free(slice); 141 | } 142 | } 143 | 144 | fn resize(_: *anyopaque, _: []u8, _: Alignment, _: usize, _: usize) bool { 145 | @panic("BinAllocator does not support resizing"); 146 | } 147 | 148 | fn remap(_: *anyopaque, _: []u8, _: Alignment, _: usize, _: usize) ?[*]u8 { 149 | @panic("BinAllocator does not support remap"); 150 | } 151 | 152 | /// Metadata of free chunk. 153 | /// NOTE: In zig, we don't need to store the size of the in-use chunk. 154 | const ChunkMetaNode = packed struct { 155 | next: ChunkMetaPointer = null, 156 | }; 157 | const ChunkMetaPointer = ?*ChunkMetaNode; 158 | 159 | // ======================================== 160 | 161 | const testing = std.testing; 162 | 163 | test { 164 | testing.refAllDeclsRecursive(@This()); 165 | } 166 | 167 | fn getTestingAllocator() Allocator { 168 | var bin_allocator_instance // we don't want an error check 169 | = std.heap.page_allocator.create(Self) catch unreachable; 170 | bin_allocator_instance.init(std.heap.page_allocator); 171 | 172 | return Allocator{ 173 | .ptr = bin_allocator_instance, 174 | .vtable = &vtable, 175 | }; 176 | } 177 | 178 | test "allocation order" { 179 | const ba = getTestingAllocator(); 180 | 181 | // Chunks are allocated in ascending order. 182 | // The distance between the chunks is the same as the chunk size. 183 | const sizes = bin_sizes; 184 | for (sizes) |size| { 185 | var prev = try ba.alloc(u8, size); 186 | for (0..4096 / size - 1) |_| { 187 | const ptr = try ba.alloc(u8, size); 188 | try testing.expectEqual(size, @intFromPtr(ptr.ptr) - @intFromPtr(prev.ptr)); 189 | prev = ptr; 190 | } 191 | } 192 | 193 | // Most recently freed chunk is allocated first. 194 | for (0..3) |_| _ = try ba.alloc(u8, 0x10); 195 | const ptr = try ba.alloc(u8, 0x10); 196 | for (0..3) |_| _ = try ba.alloc(u8, 0x10); 197 | ba.free(ptr); 198 | try testing.expectEqual(ptr, try ba.alloc(u8, 0x10)); 199 | } 200 | 201 | test "allocation size" { 202 | const ba = getTestingAllocator(); 203 | 204 | for (0..5000) |size| { 205 | const ptr = try ba.alloc(u8, size); 206 | try testing.expectEqual(size, ptr.len); 207 | ba.free(ptr); 208 | } 209 | } 210 | 211 | test "allocation exceeds page size" { 212 | const ba = getTestingAllocator(); 213 | 214 | for (0..4096 / 0x20 + 8) |_| { 215 | const ptr = try ba.alloc(u8, 0x20); 216 | try testing.expectEqual(0x20, ptr.len); 217 | } 218 | } 219 | 220 | test "no mitigation against double free" { 221 | const ba = getTestingAllocator(); 222 | 223 | const ptr = try ba.alloc(u8, 0x20); 224 | ba.free(ptr); 225 | ba.free(ptr); 226 | } 227 | -------------------------------------------------------------------------------- /ymir/linux.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub const layout = struct { 4 | /// Where the kernel boot parameters are loaded, known as "zero page". 5 | /// Must be initialized with zeros. 6 | pub const bootparam = 0x0001_0000; 7 | /// Where the kernel cmdline is located. 8 | pub const cmdline = 0x0002_0000; 9 | /// Where the protected-mode kernel code is loaded 10 | pub const kernel_base = 0x0010_0000; 11 | /// Where the initrd is loaded. 12 | pub const initrd = 0x0600_0000; 13 | }; 14 | 15 | /// Representation of the linux kernel header. 16 | /// This header compiles with protocol v2.15. 17 | pub const SetupHeader = extern struct { 18 | /// RO. The number of setup sectors. 19 | setup_sects: u8 align(1), 20 | root_flags: u16 align(1), 21 | syssize: u32 align(1), 22 | ram_size: u16 align(1), 23 | vid_mode: u16 align(1), 24 | root_dev: u16 align(1), 25 | boot_flag: u16 align(1), 26 | jump: u16 align(1), 27 | header: u32 align(1), 28 | /// RO. Boot protocol version supported. 29 | version: u16 align(1), 30 | realmode_switch: u32 align(1), 31 | start_sys_seg: u16 align(1), 32 | kernel_version: u16 align(1), 33 | /// M. The type of loader. Specify 0xFF if no ID is assigned. 34 | type_of_loader: u8 align(1), 35 | /// M. Bitmask. 36 | loadflags: LoadflagBitfield align(1), 37 | setup_move_size: u16 align(1), 38 | code32_start: u32 align(1), 39 | /// M. The 32-bit linear address of initial ramdisk or ramfs. 40 | /// Specify 0 if there is no ramdisk or ramfs. 41 | ramdisk_image: u32 align(1), 42 | /// M. The size of the initial ramdisk or ramfs. 43 | ramdisk_size: u32 align(1), 44 | bootsect_kludge: u32 align(1), 45 | /// W. Offset of the end of the setup/heap minus 0x200. 46 | heap_end_ptr: u16 align(1), 47 | /// W(opt). Extension of the loader ID. 48 | ext_loader_ver: u8 align(1), 49 | ext_loader_type: u8 align(1), 50 | /// W. The 32-bit linear address of the kernel command line. 51 | cmd_line_ptr: u32 align(1), 52 | /// R. Highest address that can be used for initrd. 53 | initrd_addr_max: u32 align(1), 54 | kernel_alignment: u32 align(1), 55 | relocatable_kernel: u8 align(1), 56 | min_alignment: u8 align(1), 57 | xloadflags: u16 align(1), 58 | /// R. Maximum size of the cmdline. 59 | cmdline_size: u32 align(1), 60 | hardware_subarch: u32 align(1), 61 | hardware_subarch_data: u64 align(1), 62 | payload_offset: u32 align(1), 63 | payload_length: u32 align(1), 64 | setup_data: u64 align(1), 65 | pref_address: u64 align(1), 66 | init_size: u32 align(1), 67 | handover_offset: u32 align(1), 68 | kernel_info_offset: u32 align(1), 69 | 70 | /// Bitfield for loadflags. 71 | const LoadflagBitfield = packed struct(u8) { 72 | /// If true, the protected-mode code is loaded at 0x100000. 73 | loaded_high: bool = false, 74 | /// If true, KASLR enabled. 75 | kaslr_flag: bool = false, 76 | /// Unused. 77 | _unused: u3 = 0, 78 | /// If false, print early messages. 79 | quiet_flag: bool = false, 80 | /// If false, reload the segment registers in the 32 bit entry point. 81 | keep_segments: bool = false, 82 | /// Set true to indicate that the value entered in the `heap_end_ptr` is valid. 83 | can_use_heap: bool = false, 84 | 85 | /// Convert to u8. 86 | pub fn to_u8(self: @This()) u8 { 87 | return @bitCast(self); 88 | } 89 | }; 90 | 91 | /// The offset where the header starts in the bzImage. 92 | pub const header_offset = 0x1F1; 93 | 94 | comptime { 95 | if (@sizeOf(@This()) != 0x7B) { 96 | @compileError("Unexpected SetupHeader size"); 97 | } 98 | } 99 | 100 | /// Instantiate a header from bzImage. 101 | pub fn from(bytes: []u8) @This() { 102 | var hdr = std.mem.bytesToValue( 103 | @This(), 104 | bytes[header_offset .. header_offset + @sizeOf(@This())], 105 | ); 106 | if (hdr.setup_sects == 0) { 107 | hdr.setup_sects = 4; 108 | } 109 | 110 | return hdr; 111 | } 112 | 113 | /// Get the offset of the protected-mode kernel code. 114 | /// Real-mode code consists of the boot sector (1 sector == 512 bytes) 115 | /// plus the setup code (`setup_sects` sectors). 116 | pub fn getProtectedCodeOffset(self: @This()) usize { 117 | return (@as(usize, self.setup_sects) + 1) * 512; 118 | } 119 | }; 120 | 121 | pub const E820Entry = extern struct { 122 | addr: u64 align(1), 123 | size: u64 align(1), 124 | type: Type align(1), 125 | 126 | pub const Type = enum(u32) { 127 | /// RAM. 128 | ram = 1, 129 | /// Reserved. 130 | reserved = 2, 131 | /// ACPI reclaimable memory. 132 | acpi = 3, 133 | /// ACPI NVS memory. 134 | nvs = 4, 135 | /// Unusable memory region. 136 | unusable = 5, 137 | }; 138 | 139 | comptime { 140 | std.debug.assert(@bitSizeOf(@This()) == 0x14 * 8); 141 | } 142 | }; 143 | 144 | /// Port of struct boot_params in linux kernel. 145 | /// Note that fields prefixed with `_` are not implemented and have incorrect types. 146 | pub const BootParams = extern struct { 147 | /// Maximum number of entries in the E820 map. 148 | const e820max = 128; 149 | 150 | _screen_info: [0x40]u8 align(1), 151 | _apm_bios_info: [0x14]u8 align(1), 152 | _pad2: [4]u8 align(1), 153 | tboot_addr: u64 align(1), 154 | ist_info: [0x10]u8 align(1), 155 | _pad3: [0x10]u8 align(1), 156 | hd0_info: [0x10]u8 align(1), 157 | hd1_info: [0x10]u8 align(1), 158 | _sys_desc_table: [0x10]u8 align(1), 159 | _olpc_ofw_header: [0x10]u8 align(1), 160 | _pad4: [0x80]u8 align(1), 161 | _edid_info: [0x80]u8 align(1), 162 | _efi_info: [0x20]u8 align(1), 163 | alt_mem_k: u32 align(1), 164 | scratch: u32 align(1), 165 | /// Number of entries in the E820 map. 166 | e820_entries: u8 align(1), 167 | eddbuf_entries: u8 align(1), 168 | edd_mbr_sig_buf_entries: u8 align(1), 169 | kbd_status: u8 align(1), 170 | _pad6: [5]u8 align(1), 171 | /// Setup header. 172 | hdr: SetupHeader, 173 | _pad7: [0x290 - SetupHeader.header_offset - @sizeOf(SetupHeader)]u8 align(1), 174 | _edd_mbr_sig_buffer: [0x10]u32 align(1), 175 | /// System memory map that can be retrieved by INT 15, E820h. 176 | e820_map: [e820max]E820Entry align(1), 177 | _unimplemented: [0x330]u8 align(1), 178 | 179 | comptime { 180 | if (@sizeOf(@This()) != 0x1000) { 181 | @compileError("Unexpected BootParams size"); 182 | } 183 | } 184 | 185 | /// Instantiate boot params from bzImage. 186 | pub fn from(bytes: []u8) @This() { 187 | return std.mem.bytesToValue( 188 | @This(), 189 | bytes[0..@sizeOf(@This())], 190 | ); 191 | } 192 | 193 | /// Add an entry to the E820 map. 194 | pub fn addE820entry( 195 | self: *@This(), 196 | addr: u64, 197 | size: u64, 198 | type_: E820Entry.Type, 199 | ) void { 200 | self.e820_map[self.e820_entries].addr = addr; 201 | self.e820_map[self.e820_entries].size = size; 202 | self.e820_map[self.e820_entries].type = type_; 203 | self.e820_entries += 1; 204 | } 205 | }; 206 | -------------------------------------------------------------------------------- /ymir/arch/x86/cpuid.zig: -------------------------------------------------------------------------------- 1 | /// CPUID Leaf. 2 | /// SDM Vol2A Chapter 3.3 Table 3-8. 3 | pub const Leaf = enum(u32) { 4 | /// Maximum input value for basic CPUID. 5 | maximum_input = 0x0, 6 | /// Version and feature information. 7 | vers_and_feat_info = 0x1, 8 | /// Thermal and power management. 9 | thermal_power = 0x6, 10 | /// Structured extended feature enumeration. 11 | /// Output depends on the value of ECX. 12 | ext_feature = 0x7, 13 | /// Processor extended state enumeration. 14 | /// Output depends on the ECX input value. 15 | ext_enumeration = 0xD, 16 | /// Maximum input value for extended function CPUID information. 17 | ext_func = 0x80000000, 18 | /// EAX: Extended processor signature and feature bits. 19 | ext_proc_signature = 0x80000001, 20 | /// Unimplemented 21 | _, 22 | 23 | /// Convert u64 to Leaf. 24 | pub fn from(rax: u64) Leaf { 25 | return @enumFromInt(rax); 26 | } 27 | 28 | /// Issues CPUID instruction to query the leaf and sub-leaf. 29 | pub fn query(self: Leaf, subleaf: ?u32) CpuidRegisters { 30 | return cpuid(@intFromEnum(self), subleaf orelse 0); 31 | } 32 | }; 33 | 34 | /// Return value of CPUID. 35 | const CpuidRegisters = struct { 36 | eax: u32, 37 | ebx: u32, 38 | ecx: u32, 39 | edx: u32, 40 | }; 41 | 42 | /// Asm CPUID instruction. 43 | fn cpuid(leaf: u32, subleaf: u32) CpuidRegisters { 44 | var eax: u32 = undefined; 45 | var ebx: u32 = undefined; 46 | var ecx: u32 = undefined; 47 | var edx: u32 = undefined; 48 | 49 | asm volatile ( 50 | \\mov %[leaf], %%eax 51 | \\mov %[subleaf], %%ecx 52 | \\cpuid 53 | \\mov %%eax, %[eax] 54 | \\mov %%ebx, %[ebx] 55 | \\mov %%ecx, %[ecx] 56 | \\mov %%edx, %[edx] 57 | : [eax] "=r" (eax), 58 | [ebx] "=r" (ebx), 59 | [ecx] "=r" (ecx), 60 | [edx] "=r" (edx), 61 | : [leaf] "r" (leaf), 62 | [subleaf] "r" (subleaf), 63 | : "rax", "rbx", "rcx", "rdx" 64 | ); 65 | 66 | return .{ 67 | .eax = eax, 68 | .ebx = ebx, 69 | .ecx = ecx, 70 | .edx = edx, 71 | }; 72 | } 73 | 74 | /// CPUID Feature Flags bitfield for ECX. 75 | /// Leaf=1, Sub-Leaf=null, 76 | pub const FeatureInfoEcx = packed struct(u32) { 77 | /// Streaming SIMD Extensions 3 (SSE3). 78 | sse3: bool = false, 79 | /// PCLMULQDQ. 80 | pclmulqdq: bool = false, 81 | /// 64-bit DS Area. 82 | dtes64: bool = false, 83 | /// MONITOR/MWAIT. 84 | monitor: bool = false, 85 | // CPL Qualified Debug Store. 86 | ds_cpl: bool = false, 87 | /// Virtual Machine Extensions. 88 | vmx: bool = false, 89 | /// Safer Mode Extensions. 90 | smx: bool = false, 91 | /// Enhanced Intel SpeedStep Technology. 92 | eist: bool = false, 93 | /// Thermal Monitor 2. 94 | tm2: bool = false, 95 | /// SSSE3 extensions. 96 | ssse3: bool = false, 97 | /// L1 context ID. 98 | cnxt_id: bool = false, 99 | /// IA32_DEBUG_INTERFACE. 100 | sdbg: bool = false, 101 | /// FMA extensions using YMM state. 102 | fma: bool = false, 103 | /// CMPXCHG16B available. 104 | cmpxchg16b: bool = false, 105 | /// xTPR update control. 106 | xtpr: bool = false, 107 | /// Perfmon and Debug Capability. 108 | pdcm: bool = false, 109 | /// Reserved. 110 | _reserved_0: bool = false, 111 | /// Process-context identifiers. 112 | pcid: bool = false, 113 | /// Ability to prevent data from memory mapped devices. 114 | dca: bool = false, 115 | /// SSE4.1 extensions. 116 | sse4_1: bool = false, 117 | /// SSE4.2 extensions. 118 | sse4_2: bool = false, 119 | /// x2APIC support. 120 | x2apic: bool = false, 121 | /// MOVBE instruction. 122 | movbe: bool = false, 123 | /// POPCNT instruction. 124 | popcnt: bool = false, 125 | /// Local APIC timer supports one-shot operation using TSC deadline. 126 | tsc_deadline: bool = false, 127 | /// AES instruction. 128 | aesni: bool = false, 129 | /// XSAVE/XRSTOR states. 130 | xsave: bool = false, 131 | /// OS has enabled XSETBV/XGETBV instructions to access XCR0. 132 | osxsave: bool = false, 133 | /// AVX. 134 | avx: bool = false, 135 | /// 16-bit floating-point conversion instructions. 136 | f16c: bool = false, 137 | /// RDRAND instruction. 138 | rdrand: bool = false, 139 | /// Not used. 140 | hypervisor: bool = false, 141 | }; 142 | 143 | /// CPUID Feature Flags bitfield for ECX. 144 | /// Leaf=1, Sub-Leaf=null, 145 | pub const FeatureInfoEdx = packed struct(u32) { 146 | /// x87 FPU. 147 | fpu: bool = false, 148 | /// Virtual 8086 mode enhancements. 149 | vme: bool = false, 150 | /// Debugging extensions. 151 | de: bool = false, 152 | /// Page Size Extension. 153 | pse: bool = false, 154 | /// Time Stamp Counter. 155 | tsc: bool = false, 156 | /// RDMSR and WRMSR instructions. 157 | msr: bool = false, 158 | /// Physical Address Extension. 159 | pae: bool = false, 160 | /// Machine Check Exception. 161 | mce: bool = false, 162 | /// CMPXCHG8B instruction. 163 | cx8: bool = false, 164 | /// APIC on-chip. 165 | apic: bool = false, 166 | /// Reserved. 167 | _reserved_0: bool = false, 168 | /// SYSENTER/SYSEXIT instructions. 169 | sep: bool = false, 170 | /// Memory Type Range Registers. 171 | mtrr: bool = false, 172 | /// Page Global Bit. 173 | pge: bool = false, 174 | /// Machine check architecture. 175 | mca: bool = false, 176 | /// Conditional move instructions. 177 | cmov: bool = false, 178 | /// Page attribute table. 179 | pat: bool = false, 180 | /// 36-bit Page Size Extension. 181 | pse36: bool = false, 182 | /// Processor serial number. 183 | psn: bool = false, 184 | /// CLFLUSH instruction. 185 | clfsh: bool = false, 186 | /// Reserved. 187 | _reserved_1: bool = false, 188 | /// Debug store. 189 | ds: bool = false, 190 | /// Thermal monitor and software controlled clock facilities. 191 | acpi: bool = false, 192 | /// Intel MMX Technology. 193 | mmx: bool = false, 194 | /// FXSAVE and FXRSTOR instructions. 195 | fxsr: bool = false, 196 | /// SSE extensions. 197 | sse: bool = false, 198 | /// SSE2 extensions. 199 | sse2: bool = false, 200 | /// Self snoop. 201 | ss: bool = false, 202 | /// Max APIC IDs reserved field. 203 | htt: bool = false, 204 | /// Thermal monitor. 205 | tm: bool = false, 206 | /// Reserved. 207 | _reserved_2: bool = false, 208 | /// Pending Break Enable. 209 | pbe: bool = false, 210 | }; 211 | 212 | /// CPUID Extended Feature Flags bitfield for EBX. 213 | /// Leaf=7, Sub-Leaf=0, 214 | pub const ExtFeatureEbx0 = packed struct(u32) { 215 | fsgsbase: bool = false, 216 | tsc_adjust: bool = false, 217 | sgx: bool = false, 218 | bmi1: bool = false, 219 | hle: bool = false, 220 | avx2: bool = false, 221 | fdp: bool = false, 222 | smep: bool = false, 223 | bmi2: bool = false, 224 | erms: bool = false, 225 | invpcid: bool = false, 226 | rtm: bool = false, 227 | rdtm: bool = false, 228 | fpucsds: bool = false, 229 | mpx: bool = false, 230 | rdta: bool = false, 231 | avx512f: bool = false, 232 | avx512dq: bool = false, 233 | rdseed: bool = false, 234 | adx: bool = false, 235 | smap: bool = false, 236 | avx512ifma: bool = false, 237 | _reserved1: u1 = 0, 238 | clflushopt: bool = false, 239 | clwb: bool = false, 240 | pt: bool = false, 241 | avx512pf: bool = false, 242 | avx512er: bool = false, 243 | avx512cd: bool = false, 244 | sha: bool = false, 245 | avx512bw: bool = false, 246 | avx512vl: bool = false, 247 | }; 248 | -------------------------------------------------------------------------------- /ymir/vmx.zig: -------------------------------------------------------------------------------- 1 | const builtin = @import("builtin"); 2 | const std = @import("std"); 3 | const log = std.log.scoped(.vmx); 4 | const Allocator = std.mem.Allocator; 5 | 6 | const ymir = @import("ymir"); 7 | const arch = ymir.arch; 8 | const mem = ymir.mem; 9 | const PageAllocator = mem.PageAllocator; 10 | const vmx = ymir.vmx; 11 | const linux = ymir.linux; 12 | const BootParams = linux.BootParams; 13 | const spin = ymir.spin; 14 | 15 | const impl = switch (builtin.target.cpu.arch) { 16 | .x86_64 => @import("arch/x86/vmx.zig"), 17 | else => @compileError("Unsupported architecture."), 18 | }; 19 | 20 | const VmError = error{ 21 | /// Memory allocation failed. 22 | OutOfMemory, 23 | /// The system does not support virtualization. 24 | SystemNotSupported, 25 | /// Unknown error. 26 | UnknownError, 27 | }; 28 | pub const Error = VmError || impl.VmxError; 29 | 30 | /// Next virtual processor ID. 31 | var vpid_next: u16 = 1; 32 | /// Global VMX lock. 33 | var global_lock = spin.SpinLock{}; 34 | 35 | /// Size in bytes of the guest memory. 36 | const guest_memory_size = 100 * mem.mib; 37 | comptime { 38 | if (guest_memory_size % (2 * mem.mib) != 0) { 39 | @compileError("Guest memory size must be a multiple of 2MiB."); 40 | } 41 | } 42 | 43 | /// Virtual machine instance. 44 | /// TODO: currently, supports only single CPU. 45 | pub const Vm = struct { 46 | const Self = @This(); 47 | 48 | /// Guest memory. 49 | guest_mem: []u8 = undefined, 50 | /// Virtualized logical CPU. 51 | vcpu: impl.Vcpu, 52 | 53 | /// Create a new virtual machine instance. 54 | /// You MUST initialize the VM before using it. 55 | pub fn new() VmError!Self { 56 | // TODO: check the number of CPUs and abort if it's not 1. 57 | // TODO: repeat the same process for all CPUs. 58 | 59 | // Check CPU vendor. 60 | const vendor = arch.getCpuVendorId(); 61 | if (!std.mem.eql(u8, vendor[0..], "GenuineIntel")) { 62 | log.err("Unsupported CPU vendor: {s}", .{vendor}); 63 | return Error.SystemNotSupported; 64 | } 65 | 66 | // Check if VMX is supported. 67 | if (!arch.isVmxSupported()) { 68 | log.err("Virtualization is not supported.", .{}); 69 | return Error.SystemNotSupported; 70 | } 71 | 72 | const irq = global_lock.lockSaveIrq(); 73 | const vpid = vpid_next; 74 | vpid_next += 1; 75 | global_lock.unlockRestoreIrq(irq); 76 | 77 | const vcpu = impl.Vcpu.new(vpid); 78 | return Self{ 79 | .vcpu = vcpu, 80 | }; 81 | } 82 | 83 | /// Initialize the virtual machine, entering VMX root operation. 84 | pub fn init(self: *Self, allocator: Allocator) Error!void { 85 | // Initialize vCPU. 86 | try self.vcpu.virtualize(allocator); 87 | log.info("vCPU #{X} is created.", .{self.vcpu.id}); 88 | 89 | // Setup VMCS. 90 | try self.vcpu.setupVmcs(allocator); 91 | } 92 | 93 | /// Deinitialize the virtual machine, exiting VMX root operation. 94 | pub fn devirtualize(self: *Self) void { 95 | self.vcpu.devirtualize(); 96 | } 97 | 98 | /// Setup guest memory and load a guest kernel on the memory. 99 | pub fn setupGuestMemory( 100 | self: *Self, 101 | guest_image: []u8, 102 | initrd: []u8, 103 | allocator: Allocator, 104 | page_allocator: *PageAllocator, 105 | ) Error!void { 106 | // Allocate guest memory. 107 | self.guest_mem = page_allocator.allocPages( 108 | guest_memory_size / mem.page_size_4k, 109 | mem.page_size_2mb, // This alignment is required because EPT maps 2MiB pages. 110 | ) orelse return Error.OutOfMemory; 111 | 112 | try self.loadKernel(guest_image, initrd); 113 | 114 | // Create simple EPT mapping. 115 | const eptp = try impl.mapGuest(self.guest_mem, allocator); 116 | try self.vcpu.setEptp(eptp, self.guest_mem.ptr); 117 | log.info("Guest memory is mapped: HVA=0x{X:0>16} (size=0x{X})", .{ @intFromPtr(self.guest_mem.ptr), self.guest_mem.len }); 118 | 119 | // Make the pages read only. 120 | for (0..self.guest_mem.len / mem.page_size_2mb) |i| { 121 | arch.page.changePageAttribute( 122 | .m2, 123 | @intFromPtr(self.guest_mem.ptr) + i * mem.page_size_2mb, 124 | .read_only, 125 | allocator, 126 | ) catch { 127 | @panic("Failed to make guest memory read-only."); 128 | }; 129 | } 130 | log.info("Guest memory is made read-only for Ymir.", .{}); 131 | } 132 | 133 | /// Kick off the virtual machine. 134 | pub fn loop(self: *Self) Error!void { 135 | arch.disableIntr(); 136 | try self.vcpu.loop(); 137 | } 138 | 139 | /// Load a protected kernel image and cmdline to the guest physical memory. 140 | fn loadKernel(self: *Self, kernel: []u8, initrd: []u8) Error!void { 141 | const guest_mem = self.guest_mem; 142 | 143 | if (kernel.len + initrd.len >= guest_mem.len) { 144 | return Error.OutOfMemory; 145 | } 146 | 147 | var bp = BootParams.from(kernel); 148 | bp.e820_entries = 0; 149 | 150 | // Setup necessary fields 151 | bp.hdr.type_of_loader = 0xFF; 152 | bp.hdr.ext_loader_ver = 0; 153 | bp.hdr.loadflags.loaded_high = true; // load kernel at 0x10_0000 154 | bp.hdr.loadflags.can_use_heap = true; // use memory 0..BOOTPARAM as heap 155 | bp.hdr.heap_end_ptr = linux.layout.bootparam - 0x200; 156 | bp.hdr.loadflags.keep_segments = true; // we set CS/DS/SS/ES to flag segments with a base of 0. 157 | bp.hdr.cmd_line_ptr = linux.layout.cmdline; 158 | bp.hdr.vid_mode = 0xFFFF; // VGA (normal) 159 | 160 | // Setup E820 map 161 | bp.addE820entry(0, linux.layout.kernel_base, .ram); 162 | bp.addE820entry( 163 | linux.layout.kernel_base, 164 | guest_mem.len - linux.layout.kernel_base, 165 | .ram, 166 | ); 167 | 168 | // Setup cmdline 169 | const cmdline_max_size = if (bp.hdr.cmdline_size < 256) bp.hdr.cmdline_size else 256; 170 | const cmdline = guest_mem[linux.layout.cmdline .. linux.layout.cmdline + cmdline_max_size]; 171 | const cmdline_val = "console=ttyS0 earlyprintk=serial nokaslr"; 172 | @memset(cmdline, 0); 173 | @memcpy(cmdline[0..cmdline_val.len], cmdline_val); 174 | 175 | // Load initrd 176 | if (guest_mem.len - linux.layout.initrd < initrd.len) { 177 | return Error.OutOfMemory; 178 | } 179 | if (bp.hdr.initrd_addr_max < linux.layout.initrd + initrd.len) { 180 | return Error.OutOfMemory; 181 | } 182 | bp.hdr.ramdisk_image = linux.layout.initrd; 183 | bp.hdr.ramdisk_size = @truncate(initrd.len); 184 | try loadImage(guest_mem, initrd, linux.layout.initrd); 185 | 186 | // Copy boot_params 187 | try loadImage( 188 | guest_mem, 189 | std.mem.asBytes(&bp), 190 | linux.layout.bootparam, 191 | ); 192 | 193 | // Load protected-mode kernel code 194 | const code_offset = bp.hdr.getProtectedCodeOffset(); 195 | const code_size = kernel.len - code_offset; 196 | try loadImage( 197 | guest_mem, 198 | kernel[code_offset .. code_offset + code_size], 199 | linux.layout.kernel_base, 200 | ); 201 | if (linux.layout.kernel_base + code_size > guest_mem.len) { 202 | return Error.OutOfMemory; 203 | } 204 | 205 | log.info("Guest memory region: 0x{X:0>16} - 0x{X:0>16}", .{ 0, guest_mem.len }); 206 | log.info("Guest kernel code offset: 0x{X:0>16}", .{code_offset}); 207 | } 208 | 209 | fn loadImage(memory: []u8, image: []u8, addr: usize) !void { 210 | if (memory.len < addr + image.len) { 211 | return Error.OutOfMemory; 212 | } 213 | @memcpy(memory[addr .. addr + image.len], image); 214 | } 215 | }; 216 | -------------------------------------------------------------------------------- /ymir/arch/x86/pic.zig: -------------------------------------------------------------------------------- 1 | //! Legacy Intel 8259 Programmable Interrupt Controller (PIC) driver. 2 | //! 3 | //! You can check the status of the PIC in QEMU by running: info pic 4 | //! 5 | //! Reference: 6 | //! - https://wiki.osdev.org/8259_PIC 7 | //! - https://pdos.csail.mit.edu/6.828/2014/readings/hardware/8259A.pdf 8 | 9 | const std = @import("std"); 10 | 11 | const ymir = @import("ymir"); 12 | const bits = ymir.bits; 13 | 14 | const am = @import("asm.zig"); 15 | 16 | /// Interrupt vector for the primary PIC. 17 | /// Must be divisible by 8. 18 | pub const primary_vector_offset: usize = 32; 19 | /// Interrupt vector for the secondary PIC. 20 | /// Must be divisible by 8. 21 | pub const secondary_vector_offset: usize = primary_vector_offset + 8; 22 | 23 | /// Primary command port 24 | const primary_command_port: u16 = 0x20; 25 | /// Primary data port 26 | const primary_data_port: u16 = primary_command_port + 1; 27 | /// Secondary command port 28 | const secondary_command_port: u16 = 0xA0; 29 | /// Secondary data port 30 | const secondary_data_port: u16 = secondary_command_port + 1; 31 | 32 | const icw = enum { icw1, icw2, icw3, icw4 }; 33 | const ocw = enum { ocw1, ocw2, ocw3 }; 34 | 35 | const Icw = union(icw) { 36 | icw1: Icw1, 37 | icw2: Icw2, 38 | icw3: Icw3, 39 | icw4: Icw4, 40 | 41 | const Icw1 = packed struct(u8) { 42 | /// ICW4 is needed. 43 | icw4: bool = true, 44 | /// Single or cascade mode. 45 | single: bool = false, 46 | /// CALL address interval 4 or 8. 47 | interval4: bool = false, 48 | /// Level triggered or edge triggered. 49 | level: bool = false, 50 | /// Initialization command. 51 | _icw1: u1 = 1, 52 | /// Unused in 8085 mode. 53 | _unused: u3 = 0, 54 | }; 55 | const Icw2 = packed struct(u8) { 56 | /// Vector offset. 57 | offset: u8, 58 | }; 59 | const Icw3 = packed struct(u8) { 60 | /// For primary PIC, IRQ that is cascaded. 61 | /// For secondary PIC, cascade identity. 62 | cascade_id: u8, 63 | }; 64 | const Icw4 = packed struct(u8) { 65 | /// 8086/8088 mode or MCS-80/85 mode. 66 | mode_8086: bool = true, 67 | /// Auto EOI or normal EOI. 68 | auto_eoi: bool = false, 69 | /// Buffered mode. 70 | buf: u2 = 0, 71 | /// Special fully nested mode. 72 | full_nested: bool = false, 73 | /// ReservedZ. 74 | _reserved: u3 = 0, 75 | }; 76 | }; 77 | 78 | const Ocw = union(ocw) { 79 | ocw1: Ocw1, 80 | ocw2: Ocw2, 81 | ocw3: Ocw3, 82 | 83 | const Ocw1 = packed struct(u8) { 84 | /// Interrupt mask. 85 | imr: u8, 86 | }; 87 | const Ocw2 = packed struct(u8) { 88 | /// Target IRQ. 89 | level: u3 = 0, 90 | /// ReservedZ. 91 | _reserved: u2 = 0, 92 | /// EOI 93 | eoi: bool, 94 | /// If set, specific EOI. 95 | sl: bool, 96 | /// Rotate priority. 97 | rotate: bool = false, 98 | }; 99 | const Ocw3 = packed struct(u8) { 100 | /// Target register to read. 101 | ris: Reg, 102 | /// Read register command. 103 | read: bool, 104 | /// Unused in Ymir. 105 | _unused1: u1 = 0, 106 | /// Reserved 01. 107 | _reserved1: u2 = 0b01, 108 | /// Unused in Ymir. 109 | _unused2: u2 = 0, 110 | /// ReservedZ. 111 | _reserved2: u1 = 0, 112 | 113 | const Reg = enum(u1) { irr = 0, isr = 1 }; 114 | }; 115 | }; 116 | 117 | // PS/2 I/O Ports 118 | const ps2_data_port: u16 = 0x60; 119 | const ps2_status_port: u16 = 0x64; 120 | const ps2_command_port: u16 = ps2_status_port; 121 | 122 | /// Initialize the PIC remapping its interrupt vectors. 123 | /// All interrupts are masked after initialization. 124 | /// You MUST call this function before using the PIC. 125 | pub fn init() void { 126 | // We have to disable interrupts to prevent PIC-driven interrupts before registering handlers. 127 | am.cli(); 128 | defer am.sti(); 129 | 130 | // Start initialization sequence. 131 | issue(Icw{ .icw1 = .{} }, primary_command_port); 132 | issue(Icw{ .icw1 = .{} }, secondary_command_port); 133 | 134 | // Set the vector offsets. 135 | issue(Icw{ .icw2 = .{ .offset = primary_vector_offset } }, primary_data_port); 136 | issue(Icw{ .icw2 = .{ .offset = secondary_vector_offset } }, secondary_data_port); 137 | 138 | // Tell primary PIC that there is a slave PIC at IRQ2. 139 | issue(Icw{ .icw3 = .{ .cascade_id = 0b100 } }, primary_data_port); 140 | // Tell secondary PIC its cascade identity. 141 | issue(Icw{ .icw3 = .{ .cascade_id = 2 } }, secondary_data_port); 142 | 143 | // Set the mode. 144 | issue(Icw{ .icw4 = .{} }, primary_data_port); 145 | issue(Icw{ .icw4 = .{} }, secondary_data_port); 146 | 147 | // Mask all IRQ lines. 148 | setImr(0xFF, primary_data_port); 149 | setImr(0xFF, secondary_data_port); 150 | } 151 | 152 | /// Issue the CW to the PIC. 153 | fn issue(cw: anytype, port: u16) void { 154 | const T = @TypeOf(cw); 155 | if (T != Icw and T != Ocw) { 156 | @compileError("Unsupported type for pic.issue()"); 157 | } 158 | switch (cw) { 159 | inline else => |s| am.outb(@bitCast(s), port), 160 | } 161 | am.relax(); 162 | } 163 | 164 | /// Set IMR. 165 | fn setImr(imr: u8, port: u16) void { 166 | issue(Ocw{ .ocw1 = .{ .imr = imr } }, port); 167 | } 168 | 169 | /// Mask the given IRQ line. 170 | pub fn setMask(irq: IrqLine) void { 171 | const port = irq.dataPort(); 172 | setImr(am.inb(port) | bits.tobit(u8, irq.delta()), port); 173 | } 174 | 175 | /// Unset the mask of the given IRQ line. 176 | pub fn unsetMask(irq: IrqLine) void { 177 | const port = irq.dataPort(); 178 | setImr(am.inb(port) & ~bits.tobit(u8, irq.delta()), port); 179 | } 180 | 181 | /// Notify the end of interrupt (EOI) to the PIC. 182 | /// This function uses specific-EOI. 183 | pub fn notifyEoi(irq: IrqLine) void { 184 | issue( 185 | Ocw{ .ocw2 = .{ .eoi = true, .sl = true, .level = irq.delta() } }, 186 | irq.commandPort(), 187 | ); 188 | if (!irq.isPrimary()) { 189 | issue( 190 | Ocw{ .ocw2 = .{ .eoi = true, .sl = true, .level = 2 } }, 191 | primary_command_port, 192 | ); 193 | } 194 | } 195 | 196 | /// Get IRQ mask from the PIC. 197 | pub inline fn getIrqMask() u16 { 198 | const val1: u16 = am.inb(primary_data_port); 199 | const val2: u16 = am.inb(secondary_data_port); 200 | return (val2 << 8) | val1; 201 | } 202 | 203 | /// Set IRQ mask to the PIC. 204 | pub inline fn setIrqMask(mask: u16) void { 205 | setImr(@truncate(mask), primary_data_port); 206 | setImr(@truncate(mask >> 8), secondary_data_port); 207 | } 208 | 209 | /// Line numbers for the PIC. 210 | pub const IrqLine = enum(u8) { 211 | /// Timer 212 | timer = 0, 213 | /// Keyboard 214 | keyboard = 1, 215 | /// Secondary PIC 216 | secondary = 2, 217 | /// Serial Port 2 218 | serial2 = 3, 219 | /// Serial Port 1 220 | serial1 = 4, 221 | /// Parallel Port 2/3 222 | parallel23 = 5, 223 | /// Floppy Disk 224 | floppy = 6, 225 | /// Parallel Port 1 226 | parallel1 = 7, 227 | /// Real Time Clock 228 | rtc = 8, 229 | /// ACPI 230 | acpi = 9, 231 | /// Available 1 232 | open1 = 10, 233 | /// Available 2 234 | open2 = 11, 235 | /// Mouse 236 | mouse = 12, 237 | /// Coprocessor 238 | cop = 13, 239 | /// Primary ATA 240 | primary_ata = 14, 241 | /// Secondary ATA 242 | secondary_ata = 15, 243 | 244 | /// Return true if the IRQ belongs to the primary PIC. 245 | pub fn isPrimary(self: IrqLine) bool { 246 | return @intFromEnum(self) < 8; 247 | } 248 | 249 | /// Get the command port for this IRQ. 250 | pub inline fn commandPort(self: IrqLine) u16 { 251 | return if (self.isPrimary()) primary_command_port else secondary_command_port; 252 | } 253 | 254 | /// Get the data port for this IRQ. 255 | pub inline fn dataPort(self: IrqLine) u16 { 256 | return if (self.isPrimary()) primary_data_port else secondary_data_port; 257 | } 258 | 259 | /// Get the offset of the IRQ within the PIC. 260 | pub fn delta(self: IrqLine) u3 { 261 | return @intCast(if (self.isPrimary()) @intFromEnum(self) else (@intFromEnum(self) - 8)); 262 | } 263 | }; 264 | -------------------------------------------------------------------------------- /ymir/arch/x86/interrupt.zig: -------------------------------------------------------------------------------- 1 | //! LICENSE NOTICE 2 | //! 3 | //! The impletentation is heavily inspired by https://github.com/AndreaOrru/zen 4 | //! Original LICENSE follows: 5 | //! 6 | //! BSD 3-Clause License 7 | //! 8 | //! Copyright (c) 2017, Andrea Orru 9 | //! All rights reserved. 10 | //! 11 | //! Redistribution and use in source and binary forms, with or without 12 | //! modification, are permitted provided that the following conditions are met: 13 | //! 14 | //! * Redistributions of source code must retain the above copyright notice, this 15 | //! list of conditions and the following disclaimer. 16 | //! 17 | //! * Redistributions in binary form must reproduce the above copyright notice, 18 | //! this list of conditions and the following disclaimer in the documentation 19 | //! and/or other materials provided with the distribution. 20 | //! 21 | //! * Neither the name of the copyright holder nor the names of its 22 | //! contributors may be used to endorse or promote products derived from 23 | //! this software without specific prior written permission. 24 | //! 25 | //! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 26 | //! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 | //! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | //! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 29 | //! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | //! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 31 | //! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 32 | //! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 | //! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34 | //! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35 | //! 36 | 37 | const std = @import("std"); 38 | const log = std.log.scoped(.intr); 39 | 40 | const ymir = @import("ymir"); 41 | const am = @import("asm.zig"); 42 | const idt = @import("idt.zig"); 43 | const isr = @import("isr.zig"); 44 | const page = @import("page.zig"); 45 | 46 | /// Context for interrupt handlers. 47 | pub const Context = isr.Context; 48 | 49 | /// Subscriber to interrupts. 50 | pub const Subscriber = struct { 51 | /// Context of the subscriber. 52 | self: *anyopaque, 53 | /// Context of the interrupt. 54 | callback: Callback, 55 | 56 | pub const Callback = *const fn (*anyopaque, *Context) void; 57 | }; 58 | 59 | /// Interrupt handler function signature. 60 | pub const Handler = *const fn (*Context) void; 61 | 62 | /// Maximum number of subscribers. 63 | const max_subscribers = 10; 64 | /// Subscribers to interrupts. 65 | var subscribers: [max_subscribers]?Subscriber = [_]?Subscriber{null} ** max_subscribers; 66 | 67 | /// Interrupt handlers. 68 | var handlers: [256]Handler = [_]Handler{unhandledHandler} ** 256; 69 | 70 | /// Initialize the IDT. 71 | pub fn init() void { 72 | inline for (0..idt.max_num_gates) |i| { 73 | idt.setGate( 74 | i, 75 | .Interrupt64, 76 | isr.generateIsr(i), 77 | ); 78 | } 79 | 80 | // Detailed handling for page faults. 81 | // TODO: For page fault, we have to allocate an interrupt stack, 82 | // register it to the TSS, and switch to it because it can be stack overflow. 83 | registerHandler(page_fault, unhandledFaultHandler); 84 | 85 | idt.init(); 86 | 87 | am.sti(); 88 | } 89 | 90 | /// Register interrupt handler. 91 | pub fn registerHandler(comptime vector: u8, handler: Handler) void { 92 | handlers[vector] = handler; 93 | idt.setGate( 94 | vector, 95 | .Interrupt64, 96 | isr.generateIsr(vector), 97 | ); 98 | } 99 | 100 | /// Subscribe to interrupts. 101 | /// Subscribers are called when an interrupt is triggered before the interrupt handler. 102 | pub fn subscribe(ctx: *anyopaque, callback: Subscriber.Callback) !void { 103 | for (subscribers, 0..) |sub, i| { 104 | if (sub == null) { 105 | subscribers[i] = Subscriber{ 106 | .callback = callback, 107 | .self = ctx, 108 | }; 109 | return; 110 | } 111 | } 112 | return error.SubscriberFull; 113 | } 114 | 115 | /// Called from the ISR stub. 116 | /// Dispatches the interrupt to the appropriate handler. 117 | pub fn dispatch(context: *Context) void { 118 | const vector = context.vector; 119 | // Notify subscribers. 120 | for (subscribers) |subscriber| { 121 | if (subscriber) |s| s.callback(s.self, context); 122 | } 123 | // Call the handler. 124 | handlers[vector](context); 125 | } 126 | 127 | fn unhandledHandler(context: *Context) void { 128 | @branchHint(.cold); 129 | 130 | log.err("============ Oops! ===================", .{}); 131 | log.err("Unhandled interrupt: {s} ({})", .{ 132 | exceptionName(context.vector), 133 | context.vector, 134 | }); 135 | log.err("Error Code: 0x{X}", .{context.error_code}); 136 | log.err("RIP : 0x{X:0>16}", .{context.rip}); 137 | log.err("EFLAGS : 0x{X:0>16}", .{context.rflags}); 138 | log.err("RAX : 0x{X:0>16}", .{context.registers.rax}); 139 | log.err("RBX : 0x{X:0>16}", .{context.registers.rbx}); 140 | log.err("RCX : 0x{X:0>16}", .{context.registers.rcx}); 141 | log.err("RDX : 0x{X:0>16}", .{context.registers.rdx}); 142 | log.err("RSI : 0x{X:0>16}", .{context.registers.rsi}); 143 | log.err("RDI : 0x{X:0>16}", .{context.registers.rdi}); 144 | log.err("RSP : 0x{X:0>16}", .{context.registers.rsp}); 145 | log.err("RBP : 0x{X:0>16}", .{context.registers.rbp}); 146 | log.err("R8 : 0x{X:0>16}", .{context.registers.r8}); 147 | log.err("R9 : 0x{X:0>16}", .{context.registers.r9}); 148 | log.err("R10 : 0x{X:0>16}", .{context.registers.r10}); 149 | log.err("R11 : 0x{X:0>16}", .{context.registers.r11}); 150 | log.err("R12 : 0x{X:0>16}", .{context.registers.r12}); 151 | log.err("R13 : 0x{X:0>16}", .{context.registers.r13}); 152 | log.err("R14 : 0x{X:0>16}", .{context.registers.r14}); 153 | log.err("R15 : 0x{X:0>16}", .{context.registers.r15}); 154 | log.err("CS : 0x{X:0>4}", .{context.cs}); 155 | 156 | ymir.endlessHalt(); 157 | } 158 | 159 | fn unhandledFaultHandler(context: *Context) void { 160 | @branchHint(.cold); 161 | 162 | log.err("============ Unhandled Fault ===================", .{}); 163 | 164 | const cr2 = am.readCr2(); 165 | log.err("Fault Address: 0x{X:0>16}", .{cr2}); 166 | page.showPageTable(cr2, log); 167 | log.err("\nCommon unhandled handler continues...\n", .{}); 168 | 169 | unhandledHandler(context); 170 | } 171 | 172 | // Exception vectors. 173 | const divide_by_zero = 0; 174 | const debug = 1; 175 | const non_maskable_interrupt = 2; 176 | const breakpoint = 3; 177 | const overflow = 4; 178 | const bound_range_exceeded = 5; 179 | const invalid_opcode = 6; 180 | const device_not_available = 7; 181 | const double_fault = 8; 182 | const coprocessor_segment_overrun = 9; 183 | const invalid_tss = 10; 184 | const segment_not_present = 11; 185 | const stack_segment_fault = 12; 186 | const general_protection_fault = 13; 187 | const page_fault = 14; 188 | const floating_point_exception = 16; 189 | const alignment_check = 17; 190 | const machine_check = 18; 191 | const simd_exception = 19; 192 | const virtualization_exception = 20; 193 | const control_protection_exception = 21; 194 | 195 | pub const num_system_exceptions = 32; 196 | 197 | /// Get the name of an exception. 198 | pub inline fn exceptionName(vector: u64) []const u8 { 199 | return switch (vector) { 200 | divide_by_zero => "#DE: Divide by zero", 201 | debug => "#DB: Debug", 202 | non_maskable_interrupt => "NMI: Non-maskable interrupt", 203 | breakpoint => "#BP: Breakpoint", 204 | overflow => "#OF: Overflow", 205 | bound_range_exceeded => "#BR: Bound range exceeded", 206 | invalid_opcode => "#UD: Invalid opcode", 207 | device_not_available => "#NM: Device not available", 208 | double_fault => "#DF: Double fault", 209 | coprocessor_segment_overrun => "Coprocessor segment overrun", 210 | invalid_tss => "#TS: Invalid TSS", 211 | segment_not_present => "#NP: Segment not present", 212 | stack_segment_fault => "#SS: Stack-segment fault", 213 | general_protection_fault => "#GP: General protection fault", 214 | page_fault => "#PF: Page fault", 215 | floating_point_exception => "#MF: Floating-point exception", 216 | alignment_check => "#AC: Alignment check", 217 | machine_check => "#MC: Machine check", 218 | simd_exception => "#XM: SIMD exception", 219 | virtualization_exception => "#VE: Virtualization exception", 220 | control_protection_exception => "#CP: Control protection exception", 221 | else => "Unknown exception", 222 | }; 223 | } 224 | -------------------------------------------------------------------------------- /ymir/arch/x86/gdt.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | const ymir = @import("ymir"); 4 | const Virt = ymir.mem.Virt; 5 | 6 | const am = @import("asm.zig"); 7 | 8 | /// Maximum number of GDT entries. 9 | const max_num_gdt = 0x10; 10 | 11 | /// Global Descriptor Table. 12 | var gdt: [max_num_gdt]SegmentDescriptor align(16) = [_]SegmentDescriptor{ 13 | SegmentDescriptor.newNull(), 14 | } ** max_num_gdt; 15 | /// GDT Register. 16 | var gdtr = GdtRegister{ 17 | .limit = @sizeOf(@TypeOf(gdt)) - 1, 18 | .base = &gdt, 19 | }; 20 | 21 | /// Index of the kernel data segment. 22 | pub const kernel_ds_index: u16 = 0x01; 23 | /// Index of the kernel code segment. 24 | pub const kernel_cs_index: u16 = 0x02; 25 | /// Index of the kernel TSS. 26 | /// Note that TSS descriptor occupies two GDT entries. 27 | pub const kernel_tss_index: u16 = 0x04; 28 | 29 | /// Unused TSS segment. 30 | const tssUnused: [4096]u8 align(4096) = [_]u8{0} ** 4096; 31 | 32 | /// Initialize the GDT. 33 | pub fn init() void { 34 | // Init GDT. 35 | gdtr.base = &gdt; 36 | 37 | gdt[kernel_cs_index] = SegmentDescriptor.new( 38 | true, 39 | false, 40 | true, 41 | 0, 42 | std.math.maxInt(u20), 43 | 0, 44 | .kbyte, 45 | ); 46 | gdt[kernel_ds_index] = SegmentDescriptor.new( 47 | true, 48 | false, 49 | false, 50 | 0, 51 | std.math.maxInt(u20), 52 | 0, 53 | .kbyte, 54 | ); 55 | 56 | am.lgdt(@intFromPtr(&gdtr)); 57 | 58 | // Changing the entries in the GDT, or setting GDTR 59 | // does not automatically update the hidden(shadow) part. 60 | // To flush the changes, we need to set segment registers. 61 | loadKernelDs(); 62 | loadKernelCs(); 63 | 64 | // TSS is not used by Ymir. But we have to set it for VMX. 65 | setTss(@intFromPtr(&tssUnused)); 66 | } 67 | 68 | /// Load the kernel data segment selector. 69 | /// This function flushes the changes of DS in the GDT. 70 | fn loadKernelDs() void { 71 | asm volatile ( 72 | \\mov %[kernel_ds], %di 73 | \\mov %%di, %%ds 74 | \\mov %%di, %%es 75 | \\mov %%di, %%fs 76 | \\mov %%di, %%gs 77 | \\mov %%di, %%ss 78 | : 79 | : [kernel_ds] "n" (@as(u16, @bitCast(SegmentSelector{ 80 | .rpl = 0, 81 | .index = kernel_ds_index, 82 | }))), 83 | : "di" 84 | ); 85 | } 86 | 87 | /// Set the TSS. 88 | fn setTss(tss: Virt) void { 89 | const desc = TssDescriptor.new(tss, std.math.maxInt(u20)); 90 | @as(*TssDescriptor, @ptrCast(&gdt[kernel_tss_index])).* = desc; 91 | 92 | loadKernelTss(); 93 | } 94 | 95 | /// Load the kernel code segment selector. 96 | /// This function flushes the changes of CS in the GDT. 97 | /// CS cannot be loaded directly by mov, so we use far-return. 98 | fn loadKernelCs() void { 99 | asm volatile ( 100 | \\ 101 | // Push CS 102 | \\mov %[kernel_cs], %%rax 103 | \\push %%rax 104 | // Push RIP 105 | \\leaq next(%%rip), %%rax 106 | \\pushq %%rax 107 | \\lretq 108 | \\next: 109 | \\ 110 | : 111 | : [kernel_cs] "n" (@as(u16, @bitCast(SegmentSelector{ 112 | .rpl = 0, 113 | .index = kernel_cs_index, 114 | }))), 115 | ); 116 | } 117 | 118 | /// Load the kernel TSS selector to TR. 119 | /// Not used in Ymir. 120 | fn loadKernelTss() void { 121 | asm volatile ( 122 | \\mov %[kernel_tss], %%di 123 | \\ltr %%di 124 | : 125 | : [kernel_tss] "n" (@as(u16, @bitCast(SegmentSelector{ 126 | .rpl = 0, 127 | .index = kernel_tss_index, 128 | }))), 129 | : "di" 130 | ); 131 | } 132 | 133 | /// Segment Descriptor Entry. 134 | /// SDM Vol.3A 3.4.5 135 | pub const SegmentDescriptor = packed struct(u64) { 136 | /// Lower 16 bits of the segment limit. 137 | limit_low: u16, 138 | /// Lower 24 bits of the base address. 139 | base_low: u24, 140 | 141 | /// Segment is accessed. 142 | /// You should set to true in case the descriptor is stored in the read-only pages. 143 | accessed: bool = true, 144 | /// Readable / Writable. 145 | /// For code segment, true means the segment is readable (write access is not allowed for CS). 146 | /// For data segment, true means the segment is writable (read access is always allowed for DS). 147 | rw: bool, 148 | /// Direction / Conforming. 149 | /// For code selectors, conforming bit. If set to 1, code in the segment can be executed from an equal or lower privilege level. 150 | /// For data selectors, direction bit. If set to 0, the segment grows up; if set to 1, the segment grows down. 151 | dc: bool, 152 | /// Executable. 153 | /// If set to true, code segment. If set to false, data segment. 154 | executable: bool, 155 | /// Descriptor type. 156 | desc_type: DescriptorType, 157 | /// Descriptor Privilege Level. 158 | dpl: u2, 159 | /// Segment present. 160 | present: bool = true, 161 | 162 | /// Upper 4 bits of the segment limit. 163 | limit_high: u4, 164 | /// Available for use by system software. 165 | avl: u1 = 0, 166 | /// 64-bit code segment. 167 | /// If set to true, the code segment contains native 64-bit code. 168 | /// For data segments, this bit must be cleared to 0. 169 | long: bool, 170 | /// Size flag. 171 | db: u1, 172 | /// Granularity. 173 | /// If set to .Byte, the segment limit is interpreted in byte units. 174 | /// Otherwise, the limit is interpreted in 4-KByte units. 175 | /// This field is ignored in 64-bit mode. 176 | granularity: Granularity, 177 | /// Upper 8 bits of the base address. 178 | base_high: u8, 179 | 180 | /// Create a null segment selector. 181 | pub fn newNull() SegmentDescriptor { 182 | return @bitCast(@as(u64, 0)); 183 | } 184 | 185 | /// Create a new segment descriptor. 186 | pub fn new( 187 | rw: bool, 188 | dc: bool, 189 | executable: bool, 190 | base: u32, 191 | limit: u20, 192 | dpl: u2, 193 | granularity: Granularity, 194 | ) SegmentDescriptor { 195 | return SegmentDescriptor{ 196 | .limit_low = @truncate(limit), 197 | .base_low = @truncate(base), 198 | .rw = rw, 199 | .dc = dc, 200 | .executable = executable, 201 | .desc_type = .code_data, 202 | .dpl = dpl, 203 | .present = true, 204 | .limit_high = @truncate(limit >> 16), 205 | .avl = 0, 206 | .long = executable, 207 | .db = @intFromBool(!executable), 208 | .granularity = granularity, 209 | .base_high = @truncate(base >> 24), 210 | }; 211 | } 212 | }; 213 | 214 | /// TSS Descriptor in 64-bit mode. 215 | /// 216 | /// Note that the descriptor is 16 bytes long and occupies two GDT entries. 217 | /// cf. SDM Vol.3A Figure 8-4. 218 | const TssDescriptor = packed struct(u128) { 219 | /// Lower 16 bits of the segment limit. 220 | limit_low: u16, 221 | /// Lower 24 bits of the base address. 222 | base_low: u24, 223 | 224 | /// Type: TSS. 225 | type: u4 = 0b1001, // tss-avail 226 | /// Descriptor type: System. 227 | desc_type: DescriptorType = .system, 228 | /// Descriptor Privilege Level. 229 | dpl: u2 = 0, 230 | present: bool = true, 231 | 232 | /// Upper 4 bits of the segment limit. 233 | limit_high: u4, 234 | /// Available for use by system software. 235 | avl: u1 = 0, 236 | /// Reserved. 237 | long: bool = true, 238 | /// Size flag. 239 | db: u1 = 0, 240 | /// Granularity. 241 | granularity: Granularity = .kbyte, 242 | /// Upper 40 bits of the base address. 243 | base_high: u40, 244 | /// Reserved. 245 | _reserved: u32 = 0, 246 | 247 | /// Create a new 64-bit TSS descriptor. 248 | pub fn new(base: Virt, limit: u20) TssDescriptor { 249 | return TssDescriptor{ 250 | .limit_low = @truncate(limit), 251 | .base_low = @truncate(base), 252 | .limit_high = @truncate(limit >> 16), 253 | .base_high = @truncate(base >> 24), 254 | }; 255 | } 256 | }; 257 | 258 | /// Descriptor Type. 259 | pub const DescriptorType = enum(u1) { 260 | /// System Descriptor. 261 | /// Must be System for TSS. 262 | system = 0, 263 | /// Application Descriptor. 264 | code_data = 1, 265 | }; 266 | 267 | /// Granularity of the descriptor. 268 | pub const Granularity = enum(u1) { 269 | byte = 0, 270 | kbyte = 1, 271 | }; 272 | 273 | /// Segment selector. 274 | pub const SegmentSelector = packed struct(u16) { 275 | /// Requested Privilege Level. 276 | rpl: u2, 277 | /// Table Indicator. 278 | ti: u1 = 0, 279 | /// Index. 280 | index: u13, 281 | 282 | pub fn from(val: anytype) SegmentSelector { 283 | return @bitCast(@as(u16, @truncate(val))); 284 | } 285 | }; 286 | 287 | /// GDTR. 288 | const GdtRegister = packed struct { 289 | limit: u16, 290 | base: *[max_num_gdt]SegmentDescriptor, 291 | }; 292 | -------------------------------------------------------------------------------- /ymir/mem/PageAllocator.zig: -------------------------------------------------------------------------------- 1 | //! Page allocator. 2 | //! 3 | //! This allocator allocates pages from direct map region. 4 | //! Therefore, returned pages are ensured to be physically contiguous. 5 | 6 | const std = @import("std"); 7 | const log = std.log.scoped(.pa); 8 | const uefi = std.os.uefi; 9 | const Allocator = std.mem.Allocator; 10 | const Alignment = std.mem.Alignment; 11 | const surtr = @import("surtr"); 12 | const MemoryMap = surtr.MemoryMap; 13 | const MemoryDescriptorIterator = surtr.MemoryDescriptorIterator; 14 | 15 | const ymir = @import("ymir"); 16 | const bits = ymir.bits; 17 | const mem = ymir.mem; 18 | const spin = ymir.spin; 19 | const arch = ymir.arch; 20 | const p2v = ymir.mem.phys2virt; 21 | const v2p = ymir.mem.virt2phys; 22 | const Phys = ymir.mem.Phys; 23 | const Virt = ymir.mem.Virt; 24 | const page_size = mem.page_size; 25 | const page_mask = mem.page_mask; 26 | const kib = mem.kib; 27 | const mib = mem.mib; 28 | const gib = mem.gib; 29 | 30 | pub const vtable = Allocator.VTable{ 31 | .alloc = allocate, 32 | .free = free, 33 | .resize = resize, 34 | .remap = remap, 35 | }; 36 | 37 | /// Physical page frame ID. 38 | const FrameId = u64; 39 | /// Bytes per page frame. 40 | const bytes_per_frame = 4 * kib; 41 | 42 | const Self = @This(); 43 | const PageAllocator = Self; 44 | 45 | /// Maximum physical memory size in bytes that can be managed by this allocator. 46 | const max_physical_size = 128 * gib; 47 | /// Maximum page frame count. 48 | const frame_count = max_physical_size / 4096; 49 | 50 | /// Single unit of bitmap line. 51 | const MapLineType = u64; 52 | /// Bits per map line. 53 | const bits_per_mapline = @sizeOf(MapLineType) * 8; 54 | /// Number of map lines. 55 | const num_maplines = frame_count / bits_per_mapline; 56 | /// Bitmap type. 57 | const BitMap = [num_maplines]MapLineType; 58 | 59 | /// First frame ID. 60 | /// Frame ID 0 is reserved. 61 | frame_begin: FrameId = 1, 62 | /// First frame ID that is not managed by this allocator. 63 | frame_end: FrameId, 64 | 65 | /// Bitmap to manage page frames. 66 | bitmap: BitMap = undefined, 67 | /// Spin lock. 68 | lock: spin.SpinLock = spin.SpinLock{}, 69 | 70 | /// Memory map provided by UEFI. 71 | memmap: MemoryMap = undefined, 72 | 73 | /// Instantiate an uninitialized PageAllocator. 74 | /// Returned instance must be initialized by calling `init`. 75 | pub fn newUninit() Self { 76 | return Self{ 77 | .frame_end = undefined, 78 | .bitmap = undefined, 79 | }; 80 | } 81 | 82 | /// Initialize the allocator. 83 | /// This function MUST be called before the direct mapping w/ offset 0x0 is unmapped. 84 | pub fn init(self: *Self, map: MemoryMap) void { 85 | const mask = self.lock.lockSaveIrq(); 86 | defer self.lock.unlockRestoreIrq(mask); 87 | 88 | self.memmap = map; 89 | var avail_end: Phys = 0; 90 | 91 | // Scan memory map and mark usable regions. 92 | var desc_iter = MemoryDescriptorIterator.new(map); 93 | while (true) { 94 | const desc: *uefi.tables.MemoryDescriptor = desc_iter.next() orelse break; 95 | 96 | if (desc.physical_start >= max_physical_size) { 97 | self.markAllocated(phys2frame(avail_end), frame_count - phys2frame(avail_end)); 98 | break; 99 | } 100 | 101 | // Mark holes between regions as allocated (used). 102 | if (avail_end < desc.physical_start) { 103 | self.markAllocated(phys2frame(avail_end), desc.number_of_pages); 104 | } 105 | // Mark the region described by the descriptor as used or unused. 106 | const phys_end = desc.physical_start + desc.number_of_pages * page_size; 107 | if (isUsableMemory(desc)) { 108 | avail_end = phys_end; 109 | self.markNotUsed(phys2frame(desc.physical_start), desc.number_of_pages); 110 | } else { 111 | self.markAllocated(phys2frame(desc.physical_start), desc.number_of_pages); 112 | } 113 | 114 | self.frame_end = phys2frame(avail_end); 115 | } 116 | } 117 | 118 | /// Notify that BootServicesData region is no longer needed. 119 | /// This function makes these regions available for the page allocator. 120 | pub fn discardBootService(self: *Self) void { 121 | self.memmap.descriptors = @ptrFromInt(p2v(self.memmap.descriptors)); 122 | var desc_iter = MemoryDescriptorIterator.new(self.memmap); 123 | while (true) { 124 | const desc: *uefi.tables.MemoryDescriptor = desc_iter.next() orelse break; 125 | if (desc.type != .boot_services_data) continue; 126 | 127 | const start = desc.physical_start; 128 | self.markNotUsed(phys2frame(start), desc.number_of_pages); 129 | } 130 | } 131 | 132 | fn markAllocated(self: *Self, frame: FrameId, num_frames: usize) void { 133 | for (0..num_frames) |i| { 134 | self.set(frame + i, .used); 135 | } 136 | } 137 | 138 | fn markNotUsed(self: *Self, frame: FrameId, num_frames: usize) void { 139 | for (0..num_frames) |i| { 140 | self.set(frame + i, .unused); 141 | } 142 | } 143 | 144 | /// Page frame status. 145 | const Status = enum(u1) { 146 | /// Page frame is in use. 147 | used = 0, 148 | /// Page frame is unused. 149 | unused = 1, 150 | 151 | pub inline fn from(boolean: bool) Status { 152 | return if (boolean) .used else .unused; 153 | } 154 | }; 155 | 156 | fn get(self: *Self, frame: FrameId) Status { 157 | const line_index = frame / bits_per_mapline; 158 | const bit_index: u6 = @truncate(frame % bits_per_mapline); 159 | return Status.from(self.bitmap[line_index] & bits.tobit(MapLineType, bit_index) != 0); 160 | } 161 | 162 | fn set(self: *Self, frame: FrameId, status: Status) void { 163 | const line_index = frame / bits_per_mapline; 164 | const bit_index: u6 = @truncate(frame % bits_per_mapline); 165 | switch (status) { 166 | .used => self.bitmap[line_index] |= bits.tobit(MapLineType, bit_index), 167 | .unused => self.bitmap[line_index] &= ~bits.tobit(MapLineType, bit_index), 168 | } 169 | } 170 | 171 | /// Allocate physically contiguous and aligned pages. 172 | pub fn allocPages(self: *Self, num_pages: usize, align_size: usize) ?[]u8 { 173 | const mask = self.lock.lockSaveIrq(); 174 | defer self.lock.unlockRestoreIrq(mask); 175 | 176 | if (align_size % page_size != 0) { 177 | log.err("Invalid alignment size: {}", .{align_size}); 178 | return null; 179 | } 180 | 181 | const num_frames = num_pages; 182 | const align_frame = (align_size + page_size - 1) / page_size; 183 | var start_frame = align_frame; 184 | 185 | while (true) { 186 | var i: usize = 0; 187 | while (i < num_frames) : (i += 1) { 188 | if (start_frame + i >= self.frame_end) return null; 189 | if (self.get(start_frame + i) == .used) break; 190 | } 191 | if (i == num_frames) { 192 | self.markAllocated(start_frame, num_frames); 193 | const virt_addr: [*]u8 = @ptrFromInt(p2v(frame2phys(start_frame))); 194 | return virt_addr[0 .. num_pages * page_size]; 195 | } 196 | 197 | start_frame += align_frame; 198 | if (start_frame + num_frames >= self.frame_end) return null; 199 | } 200 | } 201 | 202 | fn allocate(ctx: *anyopaque, n: usize, _: Alignment, _: usize) ?[*]u8 { 203 | // NOTE: 3rd argument (`ptr_align`) can be safely ignored for the page allocator 204 | // because the allocator always returns a page-aligned address 205 | // and Zig does not assumes an align larger than a page size is not requested for Allocator interface. 206 | 207 | const self: *Self = @alignCast(@ptrCast(ctx)); 208 | const mask = self.lock.lockSaveIrq(); 209 | defer self.lock.unlockRestoreIrq(mask); 210 | 211 | const num_frames = (n + page_size - 1) / page_size; 212 | var start_frame = self.frame_begin; 213 | 214 | while (true) { 215 | var i: usize = 0; 216 | while (i < num_frames) : (i += 1) { 217 | if (start_frame + i >= self.frame_end) return null; 218 | if (self.get(start_frame + i) == .used) break; 219 | } 220 | if (i == num_frames) { 221 | self.markAllocated(start_frame, num_frames); 222 | return @ptrFromInt(p2v(frame2phys(start_frame))); 223 | } 224 | 225 | start_frame += i + 1; 226 | } 227 | } 228 | 229 | fn free(ctx: *anyopaque, slice: []u8, _: Alignment, _: usize) void { 230 | // NOTE: 3rd argument (`ptr_align`) can be safely ignored for the page allocator. 231 | // See the comment in `allocate` function. 232 | 233 | const self: *Self = @alignCast(@ptrCast(ctx)); 234 | const mask = self.lock.lockSaveIrq(); 235 | defer self.lock.unlockRestoreIrq(mask); 236 | 237 | const num_frames = (slice.len + page_size - 1) / page_size; 238 | const start_frame_vaddr: Virt = @intFromPtr(slice.ptr) & ~page_mask; 239 | const start_frame = phys2frame(v2p(start_frame_vaddr)); 240 | self.markNotUsed(start_frame, num_frames); 241 | } 242 | 243 | fn resize(_: *anyopaque, _: []u8, _: Alignment, _: usize, _: usize) bool { 244 | @panic("PageAllocator does not support resizing"); 245 | } 246 | 247 | fn remap(_: *anyopaque, _: []u8, _: Alignment, _: usize, _: usize) ?[*]u8 { 248 | @panic("PageAllocator does not support remap"); 249 | } 250 | 251 | inline fn phys2frame(phys: Phys) FrameId { 252 | return phys / bytes_per_frame; 253 | } 254 | 255 | inline fn frame2phys(frame: FrameId) Phys { 256 | return frame * bytes_per_frame; 257 | } 258 | 259 | /// Check if the memory region described by the descriptor is usable for ymir kernel. 260 | /// Note that these memory areas may contain crucial data for the kernel, 261 | /// including page tables, stack, and GDT. 262 | /// You MUST copy them before using the area. 263 | inline fn isUsableMemory(descriptor: *uefi.tables.MemoryDescriptor) bool { 264 | return switch (descriptor.type) { 265 | .conventional_memory, 266 | .boot_services_code, 267 | => true, 268 | else => false, 269 | }; 270 | } 271 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/asm.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | const vmx = @import("common.zig"); 4 | 5 | const Vcpu = @import("vcpu.zig").Vcpu; 6 | 7 | /// VMLAUNCH or VMRESUME. 8 | /// Returns 0 if succeeded, 1 if failed. 9 | export fn asmVmEntry() callconv(.naked) u8 { 10 | // Save callee saved registers. 11 | asm volatile ( 12 | \\push %%rbp 13 | \\push %%r15 14 | \\push %%r14 15 | \\push %%r13 16 | \\push %%r12 17 | \\push %%rbx 18 | ); 19 | 20 | // Save a pointer to guest registers 21 | asm volatile (std.fmt.comptimePrint( 22 | \\lea {d}(%%rdi), %%rbx 23 | \\push %%rbx 24 | , 25 | .{@offsetOf(Vcpu, "guest_regs")}, 26 | )); 27 | 28 | // Set host stack 29 | asm volatile ( 30 | \\push %%rdi 31 | \\lea 8(%%rsp), %%rdi 32 | \\call setHostStack 33 | \\pop %%rdi 34 | ); 35 | 36 | // Determine VMLAUNCH or VMRESUME. 37 | asm volatile (std.fmt.comptimePrint( 38 | \\testb $1, {d}(%%rdi) 39 | , 40 | .{@offsetOf(Vcpu, "launch_done")}, 41 | )); 42 | 43 | // Restore guest registers. 44 | asm volatile (std.fmt.comptimePrint( 45 | \\lea {[guest_regs]}(%%rdi), %%rax 46 | \\mov {[rcx]}(%%rax), %%rcx 47 | \\mov {[rdx]}(%%rax), %%rdx 48 | \\mov {[rbx]}(%%rax), %%rbx 49 | \\mov {[rsi]}(%%rax), %%rsi 50 | \\mov {[rdi]}(%%rax), %%rdi 51 | \\mov {[rbp]}(%%rax), %%rbp 52 | \\mov {[r8]}(%%rax), %%r8 53 | \\mov {[r9]}(%%rax), %%r9 54 | \\mov {[r10]}(%%rax), %%r10 55 | \\mov {[r11]}(%%rax), %%r11 56 | \\mov {[r12]}(%%rax), %%r12 57 | \\mov {[r13]}(%%rax), %%r13 58 | \\mov {[r14]}(%%rax), %%r14 59 | \\mov {[r15]}(%%rax), %%r15 60 | , .{ 61 | .guest_regs = @offsetOf(Vcpu, "guest_regs"), 62 | .rcx = @offsetOf(vmx.GuestRegisters, "rcx"), 63 | .rdx = @offsetOf(vmx.GuestRegisters, "rdx"), 64 | .rbx = @offsetOf(vmx.GuestRegisters, "rbx"), 65 | .rsi = @offsetOf(vmx.GuestRegisters, "rsi"), 66 | .rdi = @offsetOf(vmx.GuestRegisters, "rdi"), 67 | .rbp = @offsetOf(vmx.GuestRegisters, "rbp"), 68 | .r8 = @offsetOf(vmx.GuestRegisters, "r8"), 69 | .r9 = @offsetOf(vmx.GuestRegisters, "r9"), 70 | .r10 = @offsetOf(vmx.GuestRegisters, "r10"), 71 | .r11 = @offsetOf(vmx.GuestRegisters, "r11"), 72 | .r12 = @offsetOf(vmx.GuestRegisters, "r12"), 73 | .r13 = @offsetOf(vmx.GuestRegisters, "r13"), 74 | .r14 = @offsetOf(vmx.GuestRegisters, "r14"), 75 | .r15 = @offsetOf(vmx.GuestRegisters, "r15"), 76 | })); 77 | asm volatile (std.fmt.comptimePrint( 78 | \\movaps {[xmm0]}(%%rax), %%xmm0 79 | \\movaps {[xmm1]}(%%rax), %%xmm1 80 | \\movaps {[xmm2]}(%%rax), %%xmm2 81 | \\movaps {[xmm3]}(%%rax), %%xmm3 82 | \\movaps {[xmm4]}(%%rax), %%xmm4 83 | \\movaps {[xmm5]}(%%rax), %%xmm5 84 | \\movaps {[xmm6]}(%%rax), %%xmm6 85 | \\movaps {[xmm7]}(%%rax), %%xmm7 86 | \\movaps {[xmm8]}(%%rax), %%xmm8 87 | \\movaps {[xmm9]}(%%rax), %%xmm9 88 | \\movaps {[xmm10]}(%%rax), %%xmm10 89 | \\movaps {[xmm11]}(%%rax), %%xmm11 90 | \\movaps {[xmm12]}(%%rax), %%xmm12 91 | \\movaps {[xmm13]}(%%rax), %%xmm13 92 | \\movaps {[xmm14]}(%%rax), %%xmm14 93 | \\movaps {[xmm15]}(%%rax), %%xmm15 94 | \\mov {[rax]}(%%rax), %%rax 95 | , .{ 96 | .rax = @offsetOf(vmx.GuestRegisters, "rax"), 97 | .xmm0 = @offsetOf(vmx.GuestRegisters, "xmm0"), 98 | .xmm1 = @offsetOf(vmx.GuestRegisters, "xmm1"), 99 | .xmm2 = @offsetOf(vmx.GuestRegisters, "xmm2"), 100 | .xmm3 = @offsetOf(vmx.GuestRegisters, "xmm3"), 101 | .xmm4 = @offsetOf(vmx.GuestRegisters, "xmm4"), 102 | .xmm5 = @offsetOf(vmx.GuestRegisters, "xmm5"), 103 | .xmm6 = @offsetOf(vmx.GuestRegisters, "xmm6"), 104 | .xmm7 = @offsetOf(vmx.GuestRegisters, "xmm7"), 105 | .xmm8 = @offsetOf(vmx.GuestRegisters, "xmm8"), 106 | .xmm9 = @offsetOf(vmx.GuestRegisters, "xmm9"), 107 | .xmm10 = @offsetOf(vmx.GuestRegisters, "xmm10"), 108 | .xmm11 = @offsetOf(vmx.GuestRegisters, "xmm11"), 109 | .xmm12 = @offsetOf(vmx.GuestRegisters, "xmm12"), 110 | .xmm13 = @offsetOf(vmx.GuestRegisters, "xmm13"), 111 | .xmm14 = @offsetOf(vmx.GuestRegisters, "xmm14"), 112 | .xmm15 = @offsetOf(vmx.GuestRegisters, "xmm15"), 113 | })); 114 | 115 | // VMLAUNCH or VMRESUME. 116 | asm volatile ( 117 | \\jz .L_vmlaunch 118 | \\vmresume 119 | \\.L_vmlaunch: 120 | \\vmlaunch 121 | ::: "cc", "memory"); 122 | 123 | // Failed to launch. 124 | 125 | // Set return value to 1. 126 | asm volatile ( 127 | \\mov $1, %%al 128 | ); 129 | 130 | // Restore callee saved registers. 131 | asm volatile ( 132 | \\add $0x8, %%rsp 133 | \\pop %%rbx 134 | \\pop %%r12 135 | \\pop %%r13 136 | \\pop %%r14 137 | \\pop %%r15 138 | \\pop %%rbp 139 | ); 140 | 141 | // Return to caller of asmVmEntry() 142 | asm volatile ( 143 | \\ret 144 | ); 145 | } 146 | 147 | pub fn asmVmExit() callconv(.naked) void { 148 | // Disable IRQ. 149 | asm volatile ( 150 | \\cli 151 | ); 152 | 153 | // Save guest RAX, get &guest_regs 154 | asm volatile ( 155 | \\push %%rax 156 | \\movq 8(%%rsp), %%rax 157 | ); 158 | 159 | // Save guest registers. 160 | // TODO: should save/restore host AVX registers? 161 | asm volatile (std.fmt.comptimePrint( 162 | \\ 163 | // Save pushed RAX. 164 | \\pop {[rax]}(%%rax) 165 | // Discard pushed &guest_regs. 166 | \\add $0x8, %%rsp 167 | // Save guest registers. 168 | \\mov %%rcx, {[rcx]}(%%rax) 169 | \\mov %%rdx, {[rdx]}(%%rax) 170 | \\mov %%rbx, {[rbx]}(%%rax) 171 | \\mov %%rsi, {[rsi]}(%%rax) 172 | \\mov %%rdi, {[rdi]}(%%rax) 173 | \\mov %%rbp, {[rbp]}(%%rax) 174 | \\mov %%r8, {[r8]}(%%rax) 175 | \\mov %%r9, {[r9]}(%%rax) 176 | \\mov %%r10, {[r10]}(%%rax) 177 | \\mov %%r11, {[r11]}(%%rax) 178 | \\mov %%r12, {[r12]}(%%rax) 179 | \\mov %%r13, {[r13]}(%%rax) 180 | \\mov %%r14, {[r14]}(%%rax) 181 | \\mov %%r15, {[r15]}(%%rax) 182 | \\movaps %%xmm0, {[xmm0]}(%%rax) 183 | \\movaps %%xmm1, {[xmm1]}(%%rax) 184 | \\movaps %%xmm2, {[xmm2]}(%%rax) 185 | \\movaps %%xmm3, {[xmm3]}(%%rax) 186 | \\movaps %%xmm4, {[xmm4]}(%%rax) 187 | \\movaps %%xmm5, {[xmm5]}(%%rax) 188 | \\movaps %%xmm6, {[xmm6]}(%%rax) 189 | \\movaps %%xmm7, {[xmm7]}(%%rax) 190 | \\movaps %%xmm8, {[xmm8]}(%%rax) 191 | \\movaps %%xmm9, {[xmm9]}(%%rax) 192 | \\movaps %%xmm10, {[xmm10]}(%%rax) 193 | \\movaps %%xmm11, {[xmm11]}(%%rax) 194 | \\movaps %%xmm12, {[xmm12]}(%%rax) 195 | \\movaps %%xmm13, {[xmm13]}(%%rax) 196 | \\movaps %%xmm14, {[xmm14]}(%%rax) 197 | \\movaps %%xmm15, {[xmm15]}(%%rax) 198 | , 199 | .{ 200 | .rax = @offsetOf(vmx.GuestRegisters, "rax"), 201 | .rcx = @offsetOf(vmx.GuestRegisters, "rcx"), 202 | .rdx = @offsetOf(vmx.GuestRegisters, "rdx"), 203 | .rbx = @offsetOf(vmx.GuestRegisters, "rbx"), 204 | .rsi = @offsetOf(vmx.GuestRegisters, "rsi"), 205 | .rdi = @offsetOf(vmx.GuestRegisters, "rdi"), 206 | .rbp = @offsetOf(vmx.GuestRegisters, "rbp"), 207 | .r8 = @offsetOf(vmx.GuestRegisters, "r8"), 208 | .r9 = @offsetOf(vmx.GuestRegisters, "r9"), 209 | .r10 = @offsetOf(vmx.GuestRegisters, "r10"), 210 | .r11 = @offsetOf(vmx.GuestRegisters, "r11"), 211 | .r12 = @offsetOf(vmx.GuestRegisters, "r12"), 212 | .r13 = @offsetOf(vmx.GuestRegisters, "r13"), 213 | .r14 = @offsetOf(vmx.GuestRegisters, "r14"), 214 | .r15 = @offsetOf(vmx.GuestRegisters, "r15"), 215 | .xmm0 = @offsetOf(vmx.GuestRegisters, "xmm0"), 216 | .xmm1 = @offsetOf(vmx.GuestRegisters, "xmm1"), 217 | .xmm2 = @offsetOf(vmx.GuestRegisters, "xmm2"), 218 | .xmm3 = @offsetOf(vmx.GuestRegisters, "xmm3"), 219 | .xmm4 = @offsetOf(vmx.GuestRegisters, "xmm4"), 220 | .xmm5 = @offsetOf(vmx.GuestRegisters, "xmm5"), 221 | .xmm6 = @offsetOf(vmx.GuestRegisters, "xmm6"), 222 | .xmm7 = @offsetOf(vmx.GuestRegisters, "xmm7"), 223 | .xmm8 = @offsetOf(vmx.GuestRegisters, "xmm8"), 224 | .xmm9 = @offsetOf(vmx.GuestRegisters, "xmm9"), 225 | .xmm10 = @offsetOf(vmx.GuestRegisters, "xmm10"), 226 | .xmm11 = @offsetOf(vmx.GuestRegisters, "xmm11"), 227 | .xmm12 = @offsetOf(vmx.GuestRegisters, "xmm12"), 228 | .xmm13 = @offsetOf(vmx.GuestRegisters, "xmm13"), 229 | .xmm14 = @offsetOf(vmx.GuestRegisters, "xmm14"), 230 | .xmm15 = @offsetOf(vmx.GuestRegisters, "xmm15"), 231 | }, 232 | )); 233 | 234 | // Restore callee saved registers. 235 | asm volatile ( 236 | \\pop %%rbx 237 | \\pop %%r12 238 | \\pop %%r13 239 | \\pop %%r14 240 | \\pop %%r15 241 | \\pop %%rbp 242 | ); 243 | 244 | // Return to caller of asmVmEntry() 245 | asm volatile ( 246 | \\mov $0, %%rax 247 | \\ret 248 | ); 249 | } 250 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/io.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log.scoped(.vmio); 3 | 4 | const arch = @import("arch.zig"); 5 | const sr = arch.serial; 6 | const am = arch.am; 7 | 8 | const vmx = @import("common.zig"); 9 | const QualIo = vmx.qual.QualIo; 10 | const VmxError = vmx.VmxError; 11 | 12 | const Vcpu = @import("vcpu.zig").Vcpu; 13 | const IrqLine = arch.pic.IrqLine; 14 | 15 | pub fn handleIo(vcpu: *Vcpu, qual: QualIo) VmxError!void { 16 | return switch (qual.direction) { 17 | .in => try handleIoIn(vcpu, qual), 18 | .out => try handleIoOut(vcpu, qual), 19 | }; 20 | } 21 | 22 | fn handleIoIn(vcpu: *Vcpu, qual: QualIo) VmxError!void { 23 | const regs = &vcpu.guest_regs; 24 | switch (qual.port) { 25 | 0x0020...0x0021 => try handlePicIn(vcpu, qual), 26 | 0x0060...0x0064 => regs.rax = 0, // PS/2. Unimplemented. 27 | 0x0070...0x0071 => regs.rax = 0, // RTC. Unimplemented. 28 | 0x0080...0x008F => {}, // DMA. Unimplemented. 29 | 0x00A0...0x00A1 => try handlePicIn(vcpu, qual), 30 | 0x02E8...0x02EF => {}, // Fourth serial port. Ignore. 31 | 0x02F8...0x02FF => {}, // Second serial port. Ignore. 32 | 0x03B0...0x03DF => regs.rax = 0, // VGA. Uniimplemented. 33 | 0x03E8...0x03EF => {}, // Third serial port. Ignore. 34 | 0x03F8...0x03FF => try handleSerialIn(vcpu, qual), 35 | 0x0CF8...0x0CFF => regs.rax = 0, // PCI. Unimplemented. 36 | 0xC000...0xCFFF => {}, // Old PCI. Ignore. 37 | else => { 38 | log.err("Unhandled I/O-in port: 0x{X}", .{qual.port}); 39 | log.err("I/O size: {s}", .{@tagName(qual.size)}); 40 | vcpu.abort(); 41 | }, 42 | } 43 | } 44 | 45 | fn handleIoOut(vcpu: *Vcpu, qual: QualIo) VmxError!void { 46 | switch (qual.port) { 47 | 0x0020...0x0021 => try handlePicOut(vcpu, qual), 48 | 0x0060...0x0064 => {}, // PS/2. Unimplemented. 49 | 0x0070...0x0071 => {}, // RTC. Unimplemented. 50 | 0x0080...0x008F => {}, // DMA. Unimplemented. 51 | 0x00A0...0x00A1 => try handlePicOut(vcpu, qual), 52 | 0x02E8...0x02EF => {}, // Fourth serial port. Ignore. 53 | 0x02F8...0x02FF => {}, // Second serial port. Ignore. 54 | 0x03B0...0x03DF => {}, // VGA. Uniimplemented. 55 | 0x03F8...0x03FF => try handleSerialOut(vcpu, qual), 56 | 0x03E8...0x03EF => {}, // Third serial port. Ignore. 57 | 0x0CF8...0x0CFF => {}, // PCI. Unimplemented. 58 | 0xC000...0xCFFF => {}, // Old PCI. Ignore. 59 | else => { 60 | log.err("Unhandled I/O-out port: 0x{X}", .{qual.port}); 61 | log.err("I/O size: {s}", .{@tagName(qual.size)}); 62 | vcpu.abort(); 63 | }, 64 | } 65 | } 66 | 67 | // ============================================================================= 68 | 69 | fn handleSerialIn(vcpu: *Vcpu, qual: QualIo) VmxError!void { 70 | const regs = &vcpu.guest_regs; 71 | switch (qual.port) { 72 | // Receive buffer. 73 | 0x3F8 => regs.rax = am.inb(qual.port), // pass-through 74 | // Interrupt Enable Register (DLAB=1) / Divisor Latch High Register (DLAB=0). 75 | 0x3F9 => regs.rax = vcpu.serial.ier, 76 | // Interrupt Identification Register. 77 | 0x3FA => regs.rax = am.inb(qual.port), // pass-through 78 | // Line Control Register (MSB is DLAB). 79 | 0x3FB => regs.rax = 0x00, // ignore 80 | // Modem Control Register. 81 | 0x3FC => regs.rax = vcpu.serial.mcr, 82 | // Line Status Register. 83 | 0x3FD => regs.rax = am.inb(qual.port), // pass-through 84 | // Modem Status Register. 85 | 0x3FE => regs.rax = am.inb(qual.port), // pass-through 86 | // Scratch Register. 87 | 0x3FF => regs.rax = 0, // 8250 88 | else => { 89 | log.err("Unsupported I/O-in to the first serial port: 0x{X}", .{qual.port}); 90 | vcpu.abort(); 91 | }, 92 | } 93 | } 94 | 95 | fn handleSerialOut(vcpu: *Vcpu, qual: QualIo) VmxError!void { 96 | const regs = &vcpu.guest_regs; 97 | switch (qual.port) { 98 | // Transmit buffer. 99 | 0x3F8 => sr.writeByte(@truncate(regs.rax), .com1), 100 | // Interrupt Enable Register. 101 | 0x3F9 => vcpu.serial.ier = @truncate(regs.rax), 102 | // FIFO control registers. 103 | 0x3FA => {}, // ignore 104 | // Line Control Register (MSB is DLAB). 105 | 0x3FB => {}, // ignore 106 | // Modem Control Register. 107 | 0x3FC => vcpu.serial.mcr = @truncate(regs.rax), 108 | // Scratch Register. 109 | 0x3FF => {}, // ignore 110 | else => { 111 | log.err("Unsupported I/O-out to the first serial port: 0x{X}", .{qual.port}); 112 | vcpu.abort(); 113 | }, 114 | } 115 | } 116 | 117 | fn handlePicIn(vcpu: *Vcpu, qual: QualIo) VmxError!void { 118 | if (qual.size != .byte) { 119 | log.err("Unsupported I/O-in size to PIC: size={s}, port=0x{X}", .{ @tagName(qual.size), qual.port }); 120 | vcpu.abort(); 121 | } 122 | 123 | const regs = &vcpu.guest_regs; 124 | const pic = &vcpu.pic; 125 | 126 | switch (qual.port) { 127 | // Primary PIC data. 128 | 0x21 => switch (pic.primary_phase) { 129 | .uninitialized, .initialized => regs.rax = pic.primary_mask, 130 | else => { 131 | log.err("Unsupported I/O-in to primary PIC: phase={s}", .{@tagName(pic.primary_phase)}); 132 | vcpu.abort(); 133 | }, 134 | }, 135 | // Secondary PIC data. 136 | 0xA1 => switch (pic.secondary_phase) { 137 | .uninitialized, .initialized => regs.rax = pic.secondary_mask, 138 | else => { 139 | log.err("Unsupported I/O-in to secondary PIC: phase={s}", .{@tagName(pic.secondary_phase)}); 140 | vcpu.abort(); 141 | }, 142 | }, 143 | else => { 144 | log.err("Unsupported I/O-in to PIC: port=0x{X}", .{qual.port}); 145 | vcpu.abort(); 146 | }, 147 | } 148 | } 149 | 150 | fn handlePicOut(vcpu: *Vcpu, qual: QualIo) VmxError!void { 151 | if (qual.size != .byte) { 152 | log.err("Unsupported I/O-out size to PIC: size={s}, port=0x{X}", .{ @tagName(qual.size), qual.port }); 153 | vcpu.abort(); 154 | } 155 | 156 | const regs = &vcpu.guest_regs; 157 | const pic = &vcpu.pic; 158 | const dx: u8 = @truncate(regs.rax); 159 | 160 | switch (qual.port) { 161 | // Primary PIC command. 162 | 0x20 => switch (dx) { 163 | 0x11 => pic.primary_phase = .phase1, 164 | // Specific-EOI. 165 | // It's Ymir's responsibility to send EOI, so guests are not allowed to send EOI. 166 | 0x60...0x67 => {}, 167 | else => { 168 | log.err("Unsupported command to primary PIC: command=0x{X}", .{dx}); 169 | vcpu.abort(); 170 | }, 171 | }, 172 | // Primary PIC data. 173 | 0x21 => switch (pic.primary_phase) { 174 | .uninitialized, .initialized => pic.primary_mask = dx, 175 | .phase1 => { 176 | log.info("Primary PIC vector offset: 0x{X}", .{dx}); 177 | pic.primary_base = dx; 178 | pic.primary_phase = .phase2; 179 | }, 180 | .phase2 => if (dx != (1 << 2)) { 181 | log.err("Invalid secondary PIC location: 0x{X}", .{dx}); 182 | vcpu.abort(); 183 | } else { 184 | pic.primary_phase = .phase3; 185 | }, 186 | .phase3 => pic.primary_phase = .initialized, 187 | }, 188 | // Secondary PIC command. 189 | 0xA0 => switch (dx) { 190 | 0x11 => pic.secondary_phase = .phase1, 191 | // Specific-EOI. 192 | // It's Ymir's responsibility to send EOI, so guests are not allowed to send EOI. 193 | 0x60...0x67 => {}, 194 | else => { 195 | log.err("Unsupported command to secondary PIC: command=0x{X}", .{dx}); 196 | vcpu.abort(); 197 | }, 198 | }, 199 | // Secondary PIC data. 200 | 0xA1 => switch (pic.secondary_phase) { 201 | .uninitialized, .initialized => pic.secondary_mask = dx, 202 | .phase1 => { 203 | log.info("Secondary PIC vector offset: 0x{X}", .{dx}); 204 | pic.secondary_base = dx; 205 | pic.secondary_phase = .phase2; 206 | }, 207 | .phase2 => if (dx != 2) { 208 | log.err("Invalid PIC cascade identity: 0x{X}", .{dx}); 209 | vcpu.abort(); 210 | } else { 211 | pic.secondary_phase = .phase3; 212 | }, 213 | .phase3 => pic.secondary_phase = .initialized, 214 | }, 215 | else => { 216 | log.err("Unsupported I/O-out to PIC: port=0x{X}", .{qual.port}); 217 | vcpu.abort(); 218 | }, 219 | } 220 | } 221 | 222 | /// 8259 Programmable Interrupt Controller. 223 | pub const Pic = struct { 224 | /// Mask of the primary PIC. 225 | primary_mask: u8, 226 | /// Mask of the secondary PIC. 227 | secondary_mask: u8, 228 | /// Initialization phase of the primary PIC. 229 | primary_phase: InitPhase = .uninitialized, 230 | /// Initialization phase of the secondary PIC. 231 | secondary_phase: InitPhase = .uninitialized, 232 | /// Vector offset of the primary PIC. 233 | primary_base: u8 = 0, 234 | /// Vector offset of the secondary PIC. 235 | secondary_base: u8 = 0, 236 | 237 | const InitPhase = enum { 238 | uninitialized, 239 | phase1, 240 | phase2, 241 | phase3, 242 | initialized, 243 | }; 244 | 245 | pub fn new() Pic { 246 | return Pic{ 247 | .primary_mask = 0xFF, 248 | .secondary_mask = 0xFF, 249 | }; 250 | } 251 | }; 252 | 253 | pub const Serial = struct { 254 | /// Interrupt Enable Register. 255 | ier: u8 = 0, 256 | /// Modem Control Register. 257 | mcr: u8 = 0, 258 | 259 | pub fn new() Serial { 260 | return Serial{}; 261 | } 262 | }; 263 | -------------------------------------------------------------------------------- /ymir/arch/x86/vmx/ept.zig: -------------------------------------------------------------------------------- 1 | //! Extended Page Table support. 2 | //! cf. SDM Vol.3C 29.3. 3 | 4 | const std = @import("std"); 5 | const log = std.log.scoped(.ept); 6 | const Allocator = std.mem.Allocator; 7 | 8 | const ymir = @import("ymir"); 9 | const mem = ymir.mem; 10 | const Phys = mem.Phys; 11 | const Virt = mem.Virt; 12 | 13 | const page_mask_4k = mem.page_mask_4k; 14 | const page_mask_2mb = mem.page_mask_2mb; 15 | const page_mask_1gb = mem.page_mask_1gb; 16 | const page_shift_4k = mem.page_shift_4k; 17 | const page_size_4k = mem.page_size_4k; 18 | const page_size_2mb = mem.page_size_2mb; 19 | const page_size_1gb = mem.page_size_1gb; 20 | 21 | const virt2phys = ymir.mem.virt2phys; 22 | const phys2virt = ymir.mem.phys2virt; 23 | 24 | /// Shift in bits to extract the level-4 index from a guest physical address. 25 | const lv4_shift = 39; 26 | /// Shift in bits to extract the level-3 index from a guest physical address. 27 | const lv3_shift = 30; 28 | /// Shift in bits to extract the level-2 index from a guest physical address. 29 | const lv2_shift = 21; 30 | /// Shift in bits to extract the level-1 index from a guest physical address. 31 | const lv1_shift = 12; 32 | /// Mask to extract page entry index from a shifted guest physical address. 33 | const index_mask = 0x1FF; 34 | 35 | /// Number of entries in a page table. 36 | const num_table_entries: usize = 512; 37 | 38 | const Error = error{ 39 | /// The page is already mapped. 40 | AlreadyMapped, 41 | /// No memory. 42 | OutOfMemory, 43 | }; 44 | 45 | /// Extended Page Table Pointer. 46 | /// cf. SDM Vol.3C 25.6.11. 47 | pub const Eptp = packed struct(u64) { 48 | /// Memory type. 49 | type: MemoryType = .write_back, 50 | /// EPT page-walk length. 51 | level: PageLevel = .four, 52 | /// Enable dirty and accessed flags for EPT. 53 | enable_ad: bool = true, 54 | /// Enable enforcement of access rights for supervisor shadow-stack pages. 55 | enable_ar: bool = false, 56 | /// Reserved. 57 | _reserved1: u4 = 0, 58 | /// 4KB aligned address of the Level-4 EPT table. 59 | phys: u52, 60 | 61 | pub fn new(lv4tbl: []Lv4Entry) Eptp { 62 | return Eptp{ 63 | .phys = @truncate(virt2phys(lv4tbl.ptr) >> page_shift_4k), 64 | }; 65 | } 66 | 67 | /// Get the host virtual address of the Level-4 EPT table. 68 | pub fn getLv4(self: *Eptp) []Lv4Entry { 69 | const virt: [*]Lv4Entry = @ptrFromInt(phys2virt(@as(u64, @intCast(self.phys)) << page_shift_4k)); 70 | return virt[0..num_table_entries]; 71 | } 72 | 73 | const PageLevel = enum(u3) { 74 | four = 3, 75 | five = 4, 76 | }; 77 | }; 78 | 79 | /// Init guest EPT. 80 | pub fn initEpt( 81 | /// Guest physical address to map. 82 | guest_start: Phys, 83 | /// Host physical address to map. 84 | host_start: Phys, 85 | /// Size in bytes of the memory region to map. 86 | size: usize, 87 | /// Page allocator. 88 | allocator: Allocator, 89 | ) Error!Eptp { 90 | if (size & page_mask_2mb != 0) { 91 | @panic("Requested end address is not 2MiB page aligned."); 92 | } 93 | if (size > page_size_1gb * num_table_entries) { 94 | @panic("Requested end address is too large."); 95 | } 96 | 97 | const lv4tbl = try initTable(Lv4Entry, allocator); 98 | log.debug("EPT Level4 Table @ {X:0>16}", .{@intFromPtr(lv4tbl.ptr)}); 99 | 100 | for (0..size / page_size_2mb) |i| { 101 | try map2m( 102 | guest_start + page_size_2mb * i, 103 | host_start + page_size_2mb * i, 104 | lv4tbl, 105 | allocator, 106 | ); 107 | } 108 | 109 | return Eptp.new(lv4tbl); 110 | } 111 | 112 | /// Translate guest physical address to host physical address. 113 | pub fn translate(guest: Phys, lv4tbl: []Lv4Entry) ?Phys { 114 | const lv4index = (guest >> lv4_shift) & index_mask; 115 | const lv4ent = lv4tbl[lv4index]; 116 | if (!lv4ent.present()) return null; 117 | 118 | const lv3ent = getLv3Entry(guest, lv4ent.address()); 119 | if (!lv3ent.present()) return null; 120 | if (lv3ent.map_memory) return lv3ent.address() + (guest & page_mask_1gb); 121 | 122 | const lv2ent = getLv2Entry(guest, lv3ent.address()); 123 | if (!lv2ent.present()) return null; 124 | if (lv2ent.map_memory) return lv2ent.address() + (guest & page_mask_2mb); 125 | 126 | const lv1ent = getLv1Entry(guest, lv2ent.address()); 127 | if (!lv1ent.present()) return null; 128 | return lv1ent.address() + (guest & page_mask_4k); 129 | } 130 | 131 | /// Maps the given 2MiB host physical memory to the guest physical memory. 132 | /// Caller must flush TLB. 133 | fn map2m( 134 | guest_phys: Phys, 135 | host_phys: Phys, 136 | lv4tbl: []Lv4Entry, 137 | allocator: Allocator, 138 | ) Error!void { 139 | const lv4index = (guest_phys >> lv4_shift) & index_mask; 140 | const lv4ent = &lv4tbl[lv4index]; 141 | if (!lv4ent.present()) { 142 | const lv3tbl = try initTable(Lv3Entry, allocator); 143 | lv4ent.* = Lv4Entry.newMapTable(lv3tbl); 144 | } 145 | 146 | const lv3ent = getLv3Entry(guest_phys, lv4ent.address()); 147 | if (!lv3ent.present()) { 148 | const lv2tbl = try initTable(Lv2Entry, allocator); 149 | lv3ent.* = Lv3Entry.newMapTable(lv2tbl); 150 | } 151 | if (lv3ent.map_memory) return error.AlreadyMapped; 152 | 153 | const lv2ent = getLv2Entry(guest_phys, lv3ent.address()); 154 | if (lv2ent.present()) return error.AlreadyMapped; 155 | lv2ent.* = Lv2Entry{ 156 | .map_memory = true, 157 | .phys = @truncate(host_phys >> page_shift_4k), 158 | }; 159 | } 160 | 161 | fn getTable(T: type, table: Phys) []T { 162 | const ents: [*]T = @ptrFromInt(phys2virt(table)); 163 | return ents[0..num_table_entries]; 164 | } 165 | 166 | fn getLv3Table(lv3tbl: Phys) []Lv3Entry { 167 | return getTable(Lv3Entry, lv3tbl); 168 | } 169 | 170 | fn getLv2Table(lv2tbl: Phys) []Lv2Entry { 171 | return getTable(Lv2Entry, lv2tbl); 172 | } 173 | 174 | fn getLv1Table(lv1tbl: Phys) []Lv1Entry { 175 | return getTable(Lv1Entry, lv1tbl); 176 | } 177 | 178 | fn getEntry(T: type, gpa: Phys, tbl_paddr: Phys) *T { 179 | const table = getTable(T, tbl_paddr); 180 | const shift = switch (T) { 181 | Lv4Entry => lv4_shift, 182 | Lv3Entry => lv3_shift, 183 | Lv2Entry => lv2_shift, 184 | Lv1Entry => lv1_shift, 185 | else => @compileError("Invalid type"), 186 | }; 187 | return &table[(gpa >> shift) & index_mask]; 188 | } 189 | 190 | fn getLv3Entry(gpa: Phys, lv3tbl_paddr: Phys) *Lv3Entry { 191 | return getEntry(Lv3Entry, gpa, lv3tbl_paddr); 192 | } 193 | 194 | fn getLv2Entry(gpa: Phys, lv2tbl_paddr: Phys) *Lv2Entry { 195 | return getEntry(Lv2Entry, gpa, lv2tbl_paddr); 196 | } 197 | 198 | fn getLv1Entry(gpa: Phys, lv1tbl_paddr: Phys) *Lv1Entry { 199 | return getEntry(Lv1Entry, gpa, lv1tbl_paddr); 200 | } 201 | 202 | fn initTable(T: type, allocator: Allocator) Error![]T { 203 | const tbl = try allocator.alloc(T, num_table_entries); 204 | for (0..tbl.len) |i| { 205 | tbl[i].read = false; 206 | tbl[i].write = false; 207 | tbl[i].exec_super = false; 208 | tbl[i].map_memory = false; 209 | tbl[i].type = @enumFromInt(0); 210 | } 211 | return tbl; 212 | } 213 | 214 | const MemoryType = enum(u3) { 215 | uncacheable = 0, 216 | write_back = 6, 217 | }; 218 | 219 | const TableLevel = enum { 220 | lv4, 221 | lv3, 222 | lv2, 223 | lv1, 224 | }; 225 | 226 | fn EntryBase(table_level: TableLevel) type { 227 | return packed struct(u64) { 228 | const Self = @This(); 229 | const level = table_level; 230 | const LowerType = switch (level) { 231 | .lv4 => Lv3Entry, 232 | .lv3 => Lv2Entry, 233 | .lv2 => Lv1Entry, 234 | .lv1 => struct {}, 235 | }; 236 | 237 | /// Whether reads are allowed. 238 | read: bool = true, 239 | /// Whether writes are allowed. 240 | write: bool = true, 241 | /// If "mode-based execute control for EPT" is 0, execute access. 242 | /// If that field is 1, execute access for supervisor-mode linear address. 243 | exec_super: bool = true, 244 | /// EPT memory type. 245 | /// ReservedZ when the entry maps a page. 246 | type: MemoryType = .uncacheable, 247 | /// Ignore PAT memory type. 248 | ignore_pat: bool = false, 249 | /// If true, this entry maps memory. Otherwise, this references a page table. 250 | map_memory: bool, 251 | /// If EPTP[6] is 1, accessed flag. Otherwise, ignored. 252 | accessed: bool = false, 253 | // If EPTP[6] is 1, dirty flag. Otherwise, ignored. 254 | dirty: bool = false, 255 | /// Execute access for user-mode linear address. 256 | exec_user: bool = true, 257 | /// Ignored 258 | _ignored2: u1 = 0, 259 | /// 4KB aligned physical address of the mapped page or page table. 260 | phys: u52, 261 | 262 | /// Return true if the entry is present. 263 | pub fn present(self: Self) bool { 264 | return self.read or self.write or self.exec_super; 265 | } 266 | 267 | /// Get the physical address of the page or page table that this entry references or maps. 268 | pub inline fn address(self: Self) Phys { 269 | return @as(u64, @intCast(self.phys)) << page_shift_4k; 270 | } 271 | 272 | /// Get a new page table entry that references a page table. 273 | pub fn newMapTable(table: []LowerType) Self { 274 | if (level == .lv1) @compileError("Lv1 EPT entry cannot reference a page table"); 275 | return Self{ 276 | .map_memory = false, 277 | .type = .uncacheable, 278 | .phys = @truncate(virt2phys(table.ptr) >> page_shift_4k), 279 | }; 280 | } 281 | 282 | /// Get a new page table entry that maps a page. 283 | pub fn newMapPage(phys: Phys) Self { 284 | if (level == .lv4) @compileError("Lv4 EPT entry cannot map a page"); 285 | return Self{ 286 | .read = true, 287 | .write = true, 288 | .exec_super = true, 289 | .exec_user = true, 290 | .map_memory = true, 291 | .type = @enumFromInt(0), 292 | .phys = @truncate(virt2phys(phys) >> page_shift_4k), 293 | }; 294 | } 295 | }; 296 | } 297 | 298 | const Lv4Entry = EntryBase(.lv4); 299 | const Lv3Entry = EntryBase(.lv3); 300 | const Lv2Entry = EntryBase(.lv2); 301 | const Lv1Entry = EntryBase(.lv1); 302 | -------------------------------------------------------------------------------- /surtr/arch/x86/page.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const log = std.log.scoped(.archp); 3 | const uefi = std.os.uefi; 4 | const elf = std.elf; 5 | const BootServices = uefi.tables.BootServices; 6 | 7 | const am = @import("asm.zig"); 8 | 9 | pub const PageError = error{ 10 | /// Failed to allocate memory. 11 | NoMemory, 12 | /// Requested page table entry is not present. 13 | NotPresent, 14 | /// Given virtual address is not canonical. 15 | NotCanonical, 16 | /// Given address is invalid. 17 | InvalidAddress, 18 | /// Requested mapping already exists. 19 | AlreadyMapped, 20 | }; 21 | 22 | pub const kib = 1024; 23 | pub const mib = 1024 * kib; 24 | pub const gib = 1024 * mib; 25 | 26 | /// Size in bytes of a 4K page. 27 | pub const page_size_4k = 4 * kib; 28 | /// Size in bytes of a 2M page. 29 | pub const page_size_2mb = page_size_4k << 9; 30 | /// Size in bytes of a 1G page. 31 | pub const page_size_1gb = page_size_2mb << 9; 32 | /// Shift in bits for a 4K page. 33 | pub const page_shift_4k = 12; 34 | /// Shift in bits for a 2M page. 35 | pub const page_shift_2mb = 21; 36 | /// Shift in bits for a 1G page. 37 | pub const page_shift_1gb = 30; 38 | /// Mask for a 4K page. 39 | pub const page_mask_4k: u64 = page_size_4k - 1; 40 | /// Mask for a 2M page. 41 | pub const page_mask_2mb: u64 = page_size_2mb - 1; 42 | /// Mask for a 1G page. 43 | pub const page_mask_1gb: u64 = page_size_1gb - 1; 44 | /// Number of entries in a page table. 45 | const num_table_entries: usize = 512; 46 | 47 | /// Shift in bits to extract the level-4 index from a virtual address. 48 | const lv4_shift = 39; 49 | /// Shift in bits to extract the level-3 index from a virtual address. 50 | const lv3_shift = 30; 51 | /// Shift in bits to extract the level-2 index from a virtual address. 52 | const lv2_shift = 21; 53 | /// Shift in bits to extract the level-1 index from a virtual address. 54 | const lv1_shift = 12; 55 | /// Mask to extract page entry index from a shifted virtual address. 56 | const index_mask = 0x1FF; 57 | 58 | /// Length of the implemented bits. 59 | const implemented_bit_length = 48; 60 | /// Most significant implemented bit in 0-origin. 61 | const msi_bit = 47; 62 | 63 | /// Physical address. 64 | pub const Phys = u64; 65 | /// Virtual address. 66 | pub const Virt = u64; 67 | 68 | pub const PageAttribute = enum { 69 | /// RO 70 | read_only, 71 | /// RW 72 | read_write, 73 | /// RX 74 | executable, 75 | 76 | pub fn fromFlags(flags: u32) PageAttribute { 77 | return if (flags & elf.PF_X != 0) .executable else if (flags & elf.PF_W != 0) .read_write else .read_only; 78 | } 79 | }; 80 | 81 | /// Return true if the given address is canonical form. 82 | /// The address is in canonical form if address bits 63 through 48 are copies of bit 47. 83 | pub fn isCanonical(addr: Virt) bool { 84 | if ((addr >> msi_bit) & 1 == 0) { 85 | return (addr >> (implemented_bit_length)) == 0; 86 | } else { 87 | return addr >> (implemented_bit_length) == 0xFFFF; 88 | } 89 | } 90 | 91 | fn getTable(T: type, addr: Phys) []T { 92 | const ptr: [*]T = @ptrFromInt(addr & ~page_mask_4k); 93 | return ptr[0..num_table_entries]; 94 | } 95 | 96 | fn getLv4Table(cr3: Phys) []Lv4Entry { 97 | return getTable(Lv4Entry, cr3); 98 | } 99 | 100 | fn getLv3Table(lv3_paddr: Phys) []Lv3Entry { 101 | return getTable(Lv3Entry, lv3_paddr); 102 | } 103 | 104 | fn getLv2Table(lv2_paddr: Phys) []Lv2Entry { 105 | return getTable(Lv2Entry, lv2_paddr); 106 | } 107 | 108 | fn getLv1Table(lv1_paddr: Phys) []Lv1Entry { 109 | return getTable(Lv1Entry, lv1_paddr); 110 | } 111 | 112 | fn getEntry(T: type, vaddr: Virt, paddr: Phys) *T { 113 | const table = getTable(T, paddr); 114 | const shift = switch (T) { 115 | Lv4Entry => lv4_shift, 116 | Lv3Entry => lv3_shift, 117 | Lv2Entry => lv2_shift, 118 | Lv1Entry => lv1_shift, 119 | else => @compileError("Unsupported type"), 120 | }; 121 | return &table[(vaddr >> shift) & index_mask]; 122 | } 123 | 124 | fn getLv4Entry(addr: Virt, cr3: Phys) *Lv4Entry { 125 | return getEntry(Lv4Entry, addr, cr3); 126 | } 127 | 128 | fn getLv3Entry(addr: Virt, lv3tbl_paddr: Phys) *Lv3Entry { 129 | return getEntry(Lv3Entry, addr, lv3tbl_paddr); 130 | } 131 | 132 | fn getLv2Entry(addr: Virt, lv2tbl_paddr: Phys) *Lv2Entry { 133 | return getEntry(Lv2Entry, addr, lv2tbl_paddr); 134 | } 135 | 136 | fn getLv1Entry(addr: Virt, lv1tbl_paddr: Phys) *Lv1Entry { 137 | return getEntry(Lv1Entry, addr, lv1tbl_paddr); 138 | } 139 | 140 | /// Make level-4 page table writable. 141 | /// The page tables prepared by the bootloader are marked as read-only. 142 | /// To modify page mappings, this function duplicates the level-4 page table 143 | /// and load the new level-4 page table to CR3. 144 | pub fn setLv4Writable(bs: *BootServices) PageError!void { 145 | var new_lv4ptr: [*]Lv4Entry = undefined; 146 | const status = bs.allocatePages(.allocate_any_pages, .boot_services_data, 1, @ptrCast(&new_lv4ptr)); 147 | if (status != .success) return PageError.NoMemory; 148 | 149 | const new_lv4tbl = new_lv4ptr[0..num_table_entries]; 150 | const lv4tbl = getLv4Table(am.readCr3()); 151 | @memcpy(new_lv4tbl, lv4tbl); 152 | 153 | am.loadCr3(@intFromPtr(new_lv4tbl.ptr)); 154 | } 155 | 156 | /// Change the attribute of the 4KiB page. 157 | pub fn changeMap4k(virt: Virt, attr: PageAttribute) PageError!void { 158 | if (virt & 0xFFF != 0) return PageError.InvalidAddress; 159 | if (!isCanonical(virt)) return PageError.NotCanonical; 160 | 161 | const rw = switch (attr) { 162 | .read_only, .executable => false, 163 | .read_write => true, 164 | }; 165 | const xd = attr != .executable; 166 | 167 | const lv4ent = getLv4Entry(virt, am.readCr3()); 168 | if (!lv4ent.present) return PageError.NotPresent; 169 | const lv3ent = getLv3Entry(virt, lv4ent.address()); 170 | if (!lv3ent.present) return PageError.NotPresent; 171 | const lv2ent = getLv2Entry(virt, lv3ent.address()); 172 | if (!lv2ent.present) return PageError.NotPresent; 173 | const lv1ent = getLv1Entry(virt, lv2ent.address()); 174 | if (!lv1ent.present) return PageError.NotPresent; 175 | 176 | lv1ent.rw = rw; 177 | lv1ent.xd = xd; 178 | am.flushTlbSingle(virt); 179 | } 180 | 181 | /// Maps 4KiB page at the given virtual address to the given physical address. 182 | /// If the mapping already exists, this function modifies the existing mapping. 183 | /// If the mapping does not exist, this function creates a new mapping, 184 | /// where new memory is allocated for page tables using BootServices. 185 | /// New page tables are allocated as 4KiB BootServicesData pages. 186 | pub fn map4kTo(virt: Virt, phys: Phys, attr: PageAttribute, bs: *BootServices) PageError!void { 187 | if (virt & page_mask_4k != 0) return PageError.InvalidAddress; 188 | if (phys & page_mask_4k != 0) return PageError.InvalidAddress; 189 | if (!isCanonical(virt)) return PageError.NotCanonical; 190 | 191 | const rw = switch (attr) { 192 | .read_only, .executable => false, 193 | .read_write => true, 194 | }; 195 | const xd = attr == .executable; 196 | 197 | const lv4ent = getLv4Entry(virt, am.readCr3()); 198 | if (!lv4ent.present) try allocateNewTable(Lv4Entry, lv4ent, bs); 199 | 200 | const lv3ent = getLv3Entry(virt, lv4ent.address()); 201 | if (!lv3ent.present) try allocateNewTable(Lv3Entry, lv3ent, bs); 202 | 203 | const lv2ent = getLv2Entry(virt, lv3ent.address()); 204 | if (!lv2ent.present) try allocateNewTable(Lv2Entry, lv2ent, bs); 205 | 206 | const lv1ent = getLv1Entry(virt, lv2ent.address()); 207 | if (lv1ent.present) return PageError.AlreadyMapped; 208 | var new_lv1ent = Lv1Entry.newMapPage(phys, true); 209 | 210 | new_lv1ent.rw = rw; 211 | new_lv1ent.xd = xd; 212 | lv1ent.* = new_lv1ent; 213 | // No need to flush TLB because the page was not present before. 214 | } 215 | 216 | /// Allocate new page tables and update the given page table entry. 217 | fn allocateNewTable(T: type, entry: *T, bs: *BootServices) PageError!void { 218 | var ptr: Phys = undefined; 219 | const status = bs.allocatePages(.allocate_any_pages, .boot_services_data, 1, @ptrCast(&ptr)); 220 | if (status != .success) return PageError.NoMemory; 221 | 222 | clearPage(ptr); 223 | entry.* = T.newMapTable(@ptrFromInt(ptr), true); 224 | } 225 | 226 | /// Zero-clear the given 4KiB page. 227 | fn clearPage(addr: Phys) void { 228 | const page_ptr: [*]u8 = @ptrFromInt(addr); 229 | @memset(page_ptr[0..page_size_4k], 0); 230 | } 231 | 232 | const TableLevel = enum { 233 | lv4, 234 | lv3, 235 | lv2, 236 | lv1, 237 | }; 238 | 239 | fn EntryBase(table_level: TableLevel) type { 240 | return packed struct(u64) { 241 | const Self = @This(); 242 | const level = table_level; 243 | const LowerType = switch (level) { 244 | .lv4 => Lv3Entry, 245 | .lv3 => Lv2Entry, 246 | .lv2 => Lv1Entry, 247 | .lv1 => struct {}, 248 | }; 249 | 250 | /// Present. 251 | present: bool = true, 252 | /// Read/Write. 253 | /// If set to false, write access is not allowed to the region. 254 | rw: bool, 255 | /// User/Supervisor. 256 | /// If set to false, user-mode access is not allowed to the region. 257 | us: bool, 258 | /// Page-level writh-through. 259 | /// Indirectly determines the memory type used to access the page or page table. 260 | pwt: bool = false, 261 | /// Page-level cache disable. 262 | /// Indirectly determines the memory type used to access the page or page table. 263 | pcd: bool = false, 264 | /// Accessed. 265 | /// Indicates whether this entry has been used for translation. 266 | accessed: bool = false, 267 | /// Dirty bit. 268 | /// Indicates whether software has written to this page. 269 | /// Ignored when this entry references a page table. 270 | dirty: bool = false, 271 | /// Page Size. 272 | /// If set to true, the entry maps a page. 273 | /// If set to false, the entry references a page table. 274 | ps: bool, 275 | /// Ignored when CR4.PGE != 1. 276 | /// Ignored when this entry references a page table. 277 | /// Ignored for level-4 entries. 278 | global: bool = true, 279 | /// Ignored 280 | _ignored1: u2 = 0, 281 | /// Ignored 282 | restart: bool = false, 283 | /// When the entry maps a page, physical address of the page. 284 | /// When the entry references a page table, physical address of the page table. 285 | phys: u51, 286 | /// Execute Disable. 287 | xd: bool = false, 288 | 289 | /// Get the physical address of the page or page table that this entry references or maps. 290 | pub inline fn address(self: Self) Phys { 291 | return @as(u64, @intCast(self.phys)) << page_shift_4k; 292 | } 293 | 294 | /// Get a new page table entry that references a page table. 295 | pub fn newMapTable(table: [*]LowerType, present: bool) Self { 296 | if (level == .lv1) @compileError("Lv1 entry cannot reference a page table"); 297 | return Self{ 298 | .present = present, 299 | .rw = true, 300 | .us = false, 301 | .ps = false, 302 | .phys = @truncate(@intFromPtr(table) >> page_shift_4k), 303 | }; 304 | } 305 | 306 | /// Get a new page table entry that maps a page. 307 | pub fn newMapPage(phys: Phys, present: bool) Self { 308 | if (level == .lv4) @compileError("Lv4 entry cannot map a page"); 309 | return Self{ 310 | .present = present, 311 | .rw = true, 312 | .us = false, 313 | .ps = true, 314 | .phys = @truncate(phys >> page_shift_4k), 315 | }; 316 | } 317 | }; 318 | } 319 | 320 | const Lv4Entry = EntryBase(.lv4); 321 | const Lv3Entry = EntryBase(.lv3); 322 | const Lv2Entry = EntryBase(.lv2); 323 | const Lv1Entry = EntryBase(.lv1); 324 | -------------------------------------------------------------------------------- /surtr/boot.zig: -------------------------------------------------------------------------------- 1 | //! Surtr: The bootloader for Ymir. 2 | //! 3 | //! Surtr is a simple bootloader that runs on UEFI firmware. 4 | //! Most of this file is based on programs listed in "Reference". 5 | //! 6 | //! Reference: 7 | //! - https://github.com/ssstoyama/bootloader_zig : Unlicense 8 | //! 9 | 10 | const std = @import("std"); 11 | const uefi = std.os.uefi; 12 | const elf = std.elf; 13 | const log = std.log.scoped(.surtr); 14 | 15 | const blog = @import("log.zig"); 16 | const defs = @import("defs.zig"); 17 | const arch = @import("arch.zig"); 18 | 19 | const page_size = arch.page.page_size_4k; 20 | const page_mask = arch.page.page_mask_4k; 21 | 22 | // Override the default log options 23 | pub const std_options = blog.default_log_options; 24 | 25 | // Bootloader entry point. 26 | pub fn main() uefi.Status { 27 | var status: uefi.Status = undefined; 28 | 29 | // Initialize log. 30 | const con_out = uefi.system_table.con_out orelse return .aborted; 31 | status = con_out.clearScreen(); 32 | blog.init(con_out); 33 | 34 | log.info("Initialized bootloader log.", .{}); 35 | 36 | // Get boot services. 37 | const boot_service: *uefi.tables.BootServices = uefi.system_table.boot_services orelse { 38 | log.err("Failed to get boot services.", .{}); 39 | return .aborted; 40 | }; 41 | log.info("Got boot services.", .{}); 42 | 43 | // Locate simple file system protocol. 44 | var fs: *uefi.protocol.SimpleFileSystem = undefined; 45 | status = boot_service.locateProtocol(&uefi.protocol.SimpleFileSystem.guid, null, @ptrCast(&fs)); 46 | if (status != .success) { 47 | log.err("Failed to locate simple file system protocol.", .{}); 48 | return status; 49 | } 50 | log.info("Located simple file system protocol.", .{}); 51 | 52 | // Open volume. 53 | var root_dir: *const uefi.protocol.File = undefined; 54 | status = fs.openVolume(&root_dir); 55 | if (status != .success) { 56 | log.err("Failed to open volume.", .{}); 57 | return status; 58 | } 59 | log.info("Opened filesystem volume.", .{}); 60 | 61 | // Open kernel file. 62 | const kernel = openFile(root_dir, "ymir.elf") catch return .aborted; 63 | log.info("Opened kernel file.", .{}); 64 | 65 | // Read kernel ELF header 66 | var header_size: usize = @sizeOf(elf.Elf64_Ehdr); 67 | var header_buffer: [*]align(8) u8 = undefined; 68 | status = boot_service.allocatePool(.loader_data, header_size, &header_buffer); 69 | if (status != .success) { 70 | log.err("Failed to allocate memory for kernel ELF header.", .{}); 71 | return status; 72 | } 73 | 74 | status = kernel.read(&header_size, header_buffer); 75 | if (status != .success) { 76 | log.err("Failed to read kernel ELF header.", .{}); 77 | return status; 78 | } 79 | 80 | const elf_header = elf.Header.parse(header_buffer[0..@sizeOf(elf.Elf64_Ehdr)]) catch |err| { 81 | log.err("Failed to parse kernel ELF header: {?}", .{err}); 82 | return .aborted; 83 | }; 84 | log.info("Parsed kernel ELF header.", .{}); 85 | log.debug( 86 | \\Kernel ELF information: 87 | \\ Entry Point : 0x{X} 88 | \\ Is 64-bit : {d} 89 | \\ # of Program Headers: {d} 90 | \\ # of Section Headers: {d} 91 | , 92 | .{ 93 | elf_header.entry, 94 | @intFromBool(elf_header.is_64), 95 | elf_header.phnum, 96 | elf_header.shnum, 97 | }, 98 | ); 99 | 100 | // Calculate necessary memory size for kernel image. 101 | const Addr = elf.Elf64_Addr; 102 | var kernel_start_virt: Addr = std.math.maxInt(Addr); 103 | var kernel_start_phys: Addr align(page_size) = std.math.maxInt(Addr); 104 | var kernel_end_phys: Addr = 0; 105 | var iter = elf_header.program_header_iterator(@constCast(kernel)); 106 | while (true) { 107 | const phdr = iter.next() catch |err| { 108 | log.err("Failed to get program header: {?}\n", .{err}); 109 | return .load_error; 110 | } orelse break; 111 | if (phdr.p_type != elf.PT_LOAD) continue; 112 | if (phdr.p_paddr < kernel_start_phys) kernel_start_phys = phdr.p_paddr; 113 | if (phdr.p_vaddr < kernel_start_virt) kernel_start_virt = phdr.p_vaddr; 114 | if (phdr.p_paddr + phdr.p_memsz > kernel_end_phys) kernel_end_phys = phdr.p_paddr + phdr.p_memsz; 115 | } 116 | const pages_4kib = (kernel_end_phys - kernel_start_phys + (page_size - 1)) / page_size; 117 | log.info("Kernel image: 0x{X:0>16} - 0x{X:0>16} (0x{X} pages)", .{ kernel_start_phys, kernel_end_phys, pages_4kib }); 118 | 119 | // Allocate memory for kernel image. 120 | status = boot_service.allocatePages(.allocate_address, .loader_data, pages_4kib, @ptrCast(&kernel_start_phys)); 121 | if (status != .success) { 122 | log.err("Failed to allocate memory for kernel image: {?}", .{status}); 123 | return status; 124 | } 125 | log.info("Allocated memory for kernel image @ 0x{X:0>16} ~ 0x{X:0>16}", .{ kernel_start_phys, kernel_start_phys + pages_4kib * page_size }); 126 | 127 | // Map memory for kernel image. 128 | arch.page.setLv4Writable(boot_service) catch |err| { 129 | log.err("Failed to set page table writable: {?}", .{err}); 130 | return .load_error; 131 | }; 132 | log.debug("Set page table writable.", .{}); 133 | 134 | for (0..pages_4kib) |i| { 135 | arch.page.map4kTo( 136 | kernel_start_virt + page_size * i, 137 | kernel_start_phys + page_size * i, 138 | .read_write, 139 | boot_service, 140 | ) catch |err| { 141 | log.err("Failed to map memory for kernel image: {?}", .{err}); 142 | return .load_error; 143 | }; 144 | } 145 | log.info("Mapped memory for kernel image.", .{}); 146 | 147 | // Load kernel image. 148 | log.info("Loading kernel image...", .{}); 149 | iter = elf_header.program_header_iterator(@constCast(kernel)); 150 | while (true) { 151 | const phdr = iter.next() catch |err| { 152 | log.err("Failed to get program header: {?}\n", .{err}); 153 | return .load_error; 154 | } orelse break; 155 | if (phdr.p_type != elf.PT_LOAD) continue; 156 | 157 | // Load data 158 | status = kernel.setPosition(phdr.p_offset); 159 | if (status != .success) { 160 | log.err("Failed to set position for kernel image.", .{}); 161 | return status; 162 | } 163 | const segment: [*]u8 = @ptrFromInt(phdr.p_vaddr); 164 | var mem_size = phdr.p_memsz; 165 | status = kernel.read(&mem_size, segment); 166 | if (status != .success) { 167 | log.err("Failed to read kernel image.", .{}); 168 | return status; 169 | } 170 | const chr_x: u8 = if (phdr.p_flags & elf.PF_X != 0) 'X' else '-'; 171 | const chr_w: u8 = if (phdr.p_flags & elf.PF_W != 0) 'W' else '-'; 172 | const chr_r: u8 = if (phdr.p_flags & elf.PF_R != 0) 'R' else '-'; 173 | log.info( 174 | " Seg @ 0x{X:0>16} - 0x{X:0>16} [{c}{c}{c}]", 175 | .{ phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz, chr_x, chr_w, chr_r }, 176 | ); 177 | 178 | // Zero-clear the BSS section and uninitialized data. 179 | const zero_count = phdr.p_memsz - phdr.p_filesz; 180 | if (zero_count > 0) { 181 | boot_service.setMem(@ptrFromInt(phdr.p_vaddr + phdr.p_filesz), zero_count, 0); 182 | } 183 | 184 | // Change memory protection. 185 | const page_start = phdr.p_vaddr & ~page_mask; 186 | const page_end = (phdr.p_vaddr + phdr.p_memsz + (page_size - 1)) & ~page_mask; 187 | const size = (page_end - page_start) / page_size; 188 | const attribute = arch.page.PageAttribute.fromFlags(phdr.p_flags); 189 | for (0..size) |i| { 190 | arch.page.changeMap4k( 191 | page_start + page_size * i, 192 | attribute, 193 | ) catch |err| { 194 | log.err("Failed to change memory protection: {?}", .{err}); 195 | return .load_error; 196 | }; 197 | } 198 | } 199 | 200 | // Enable NX-bit. 201 | arch.enableNxBit(); 202 | 203 | // Get guest kernel image info. 204 | const guest = openFile(root_dir, "bzImage") catch return .aborted; 205 | log.info("Opened guest kernel file.", .{}); 206 | 207 | const guest_info_buffer_size: usize = @sizeOf(uefi.FileInfo) + 0x100; 208 | var guest_info_actual_size = guest_info_buffer_size; 209 | var guest_info_buffer: [guest_info_buffer_size]u8 align(@alignOf(uefi.FileInfo)) = undefined; 210 | status = guest.getInfo(&uefi.FileInfo.guid, &guest_info_actual_size, &guest_info_buffer); 211 | if (status != .success) { 212 | log.err("Failed to get guest kernel file info.", .{}); 213 | return status; 214 | } 215 | const guest_info: *const uefi.FileInfo = @alignCast(@ptrCast(&guest_info_buffer)); 216 | log.info("Guest kernel size: {X} bytes", .{guest_info.file_size}); 217 | 218 | // Load guest kernel image. 219 | var guest_start: u64 align(page_size) = undefined; 220 | const guest_size_pages = (guest_info.file_size + (page_size - 1)) / page_size; 221 | status = boot_service.allocatePages(.allocate_any_pages, .loader_data, guest_size_pages, @ptrCast(&guest_start)); 222 | if (status != .success) { 223 | log.err("Failed to allocate memory for guest kernel image.", .{}); 224 | return status; 225 | } 226 | var guest_size = guest_info.file_size; 227 | 228 | status = guest.read(&guest_size, @ptrFromInt(guest_start)); 229 | if (status != .success) { 230 | log.err("Failed to read guest kernel image.", .{}); 231 | return status; 232 | } 233 | log.info("Loaded guest kernel image @ 0x{X:0>16} ~ 0x{X:0>16}", .{ guest_start, guest_start + guest_size }); 234 | 235 | // Load initrd. 236 | const initrd = openFile(root_dir, "rootfs.cpio.gz") catch return .aborted; 237 | log.info("Opened initrd file.", .{}); 238 | 239 | const initrd_info_buffer_size: usize = @sizeOf(uefi.FileInfo) + 0x100; 240 | var initrd_info_actual_size = initrd_info_buffer_size; 241 | var initrd_info_buffer: [initrd_info_buffer_size]u8 align(@alignOf(uefi.FileInfo)) = undefined; 242 | status = initrd.getInfo(&uefi.FileInfo.guid, &initrd_info_actual_size, &initrd_info_buffer); 243 | if (status != .success) { 244 | log.err("Failed to get initrd file info.", .{}); 245 | return status; 246 | } 247 | const initrd_info: *const uefi.FileInfo = @alignCast(@ptrCast(&initrd_info_buffer)); 248 | var initrd_size = initrd_info.file_size; 249 | log.info("Initrd size: 0x{X:0>16} bytes", .{initrd_size}); 250 | 251 | var initrd_start: u64 = undefined; 252 | const initrd_size_pages = (initrd_size + (page_size - 1)) / page_size; 253 | status = boot_service.allocatePages(.allocate_any_pages, .loader_data, initrd_size_pages, @ptrCast(&initrd_start)); 254 | if (status != .success) { 255 | log.err("Failed to allocate memory for initrd.", .{}); 256 | return status; 257 | } 258 | 259 | status = initrd.read(&initrd_size, @ptrFromInt(initrd_start)); 260 | if (status != .success) { 261 | log.err("Failed to read initrd.", .{}); 262 | return status; 263 | } 264 | log.info("Loaded initrd @ 0x{X:0>16} ~ 0x{X:0>16}", .{ initrd_start, initrd_start + initrd_size }); 265 | 266 | // Find RSDP. 267 | const acpi_table_guid = uefi.Guid{ 268 | .time_low = 0x8868E871, 269 | .time_mid = 0xE4F1, 270 | .time_high_and_version = 0x11D3, 271 | .clock_seq_high_and_reserved = 0xBC, 272 | .clock_seq_low = 0x22, 273 | .node = [_]u8{ 0x0, 0x80, 0xC7, 0x3C, 0x88, 0x81 }, 274 | }; 275 | const acpi_table = for (0..uefi.system_table.number_of_table_entries) |i| { 276 | const guid = uefi.system_table.configuration_table[i].vendor_guid; 277 | if (uefi.Guid.eql(acpi_table_guid, guid)) { 278 | break uefi.system_table.configuration_table[i].vendor_table; 279 | } 280 | } else { 281 | log.err("Failed to find ACPI table.", .{}); 282 | return .load_error; 283 | }; 284 | log.info("ACPI table @ 0x{X:0>16}", .{@intFromPtr(acpi_table)}); 285 | 286 | // Clean up memory. 287 | status = boot_service.freePool(header_buffer); 288 | if (status != .success) { 289 | log.err("Failed to free memory for kernel ELF header.", .{}); 290 | return status; 291 | } 292 | status = initrd.close(); 293 | if (status != .success) { 294 | log.err("Failed to close initrd file.", .{}); 295 | return status; 296 | } 297 | status = kernel.close(); 298 | if (status != .success) { 299 | log.err("Failed to close kernel file.", .{}); 300 | return status; 301 | } 302 | status = root_dir.close(); 303 | if (status != .success) { 304 | log.err("Failed to close filesystem volume.", .{}); 305 | return status; 306 | } 307 | 308 | // Get memory map. 309 | const map_buffer_size = page_size * 4; 310 | var map_buffer: [map_buffer_size]u8 = undefined; 311 | var map = defs.MemoryMap{ 312 | .buffer_size = map_buffer.len, 313 | .descriptors = @alignCast(@ptrCast(&map_buffer)), 314 | .map_key = 0, 315 | .map_size = map_buffer.len, 316 | .descriptor_size = 0, 317 | .descriptor_version = 0, 318 | }; 319 | status = getMemoryMap(&map, boot_service); 320 | if (status != .success) { 321 | log.err("Failed to get memory map.", .{}); 322 | return status; 323 | } 324 | 325 | // Print memory map. 326 | log.debug("Memory Map (Physical): Buf=0x{X}, MapSize=0x{X}, DescSize=0x{X}", .{ 327 | @intFromPtr(map.descriptors), 328 | map.map_size, 329 | map.descriptor_size, 330 | }); 331 | var map_iter = defs.MemoryDescriptorIterator.new(map); 332 | while (true) { 333 | if (map_iter.next()) |md| { 334 | log.debug(" 0x{X:0>16} - 0x{X:0>16} : {s}", .{ 335 | md.physical_start, 336 | md.physical_start + md.number_of_pages * page_size, 337 | @tagName(md.type), 338 | }); 339 | } else break; 340 | } 341 | 342 | // Exit boot services. 343 | // After this point, we can't use any boot services including logging. 344 | log.info("Exiting boot services.", .{}); 345 | status = boot_service.exitBootServices(uefi.handle, map.map_key); 346 | if (status != .success) { 347 | // May fail if the memory map has been changed. 348 | // Retry after getting the memory map again. 349 | map.buffer_size = map_buffer.len; 350 | map.map_size = map_buffer.len; 351 | status = getMemoryMap(&map, boot_service); 352 | if (status != .success) { 353 | log.err("Failed to get memory map after failed to exit boot services.", .{}); 354 | return status; 355 | } 356 | status = boot_service.exitBootServices(uefi.handle, map.map_key); 357 | if (status != .success) { 358 | log.err("Failed to exit boot services.", .{}); 359 | return status; 360 | } 361 | } 362 | 363 | // Jump to kernel entry point. 364 | const KernelEntryType = fn (defs.BootInfo) callconv(.{ .x86_64_win = .{} }) noreturn; 365 | const kernel_entry: *KernelEntryType = @ptrFromInt(elf_header.entry); 366 | const boot_info = defs.BootInfo{ 367 | .magic = defs.magic, 368 | .memory_map = map, 369 | .guest_info = .{ 370 | .guest_image = @ptrFromInt(guest_start), 371 | .guest_size = guest_size, 372 | .initrd_addr = @ptrFromInt(initrd_start), 373 | .initrd_size = initrd_info.file_size, 374 | }, 375 | .acpi_table = acpi_table, 376 | }; 377 | kernel_entry(boot_info); 378 | 379 | unreachable; 380 | } 381 | 382 | inline fn toUcs2(comptime s: [:0]const u8) [s.len * 2:0]u16 { 383 | var ucs2: [s.len * 2:0]u16 = [_:0]u16{0} ** (s.len * 2); 384 | for (s, 0..) |c, i| { 385 | ucs2[i] = c; 386 | ucs2[i + 1] = 0; 387 | } 388 | return ucs2; 389 | } 390 | 391 | /// Open a file using Simple File System protocol. 392 | fn openFile( 393 | root: *const uefi.protocol.File, 394 | comptime name: [:0]const u8, 395 | ) !*const uefi.protocol.File { 396 | var file: *const uefi.protocol.File = undefined; 397 | const status = root.open( 398 | &file, 399 | &toUcs2(name), 400 | uefi.protocol.File.efi_file_mode_read, 401 | 0, 402 | ); 403 | 404 | if (status != .success) { 405 | log.err("Failed to open file: {s}", .{name}); 406 | return error.aborted; 407 | } 408 | return file; 409 | } 410 | 411 | fn getMemoryMap(map: *defs.MemoryMap, boot_services: *uefi.tables.BootServices) uefi.Status { 412 | return boot_services.getMemoryMap( 413 | &map.map_size, 414 | map.descriptors, 415 | &map.map_key, 416 | &map.descriptor_size, 417 | &map.descriptor_version, 418 | ); 419 | } 420 | 421 | fn halt() void { 422 | asm volatile ("hlt"); 423 | } 424 | --------------------------------------------------------------------------------