├── guest └── bios │ ├── .gitignore │ ├── rvm-bios.lds │ ├── Makefile │ └── boot16.S ├── .gitignore ├── .gitmodules ├── hypervisor ├── src │ ├── arch │ │ ├── mod.rs │ │ └── x86_64 │ │ │ ├── mod.rs │ │ │ ├── instructions.rs │ │ │ ├── boot.rs │ │ │ ├── lapic.rs │ │ │ ├── timer.rs │ │ │ ├── idt.rs │ │ │ ├── trap.S │ │ │ ├── gdt.rs │ │ │ ├── trap.rs │ │ │ ├── uart16550.rs │ │ │ └── multiboot.S │ ├── lang_items.rs │ ├── mm │ │ ├── mod.rs │ │ ├── address.rs │ │ ├── heap.rs │ │ └── frame.rs │ ├── timer.rs │ ├── config.rs │ ├── hv │ │ ├── gconfig.rs │ │ ├── device_emu │ │ │ ├── i8259_pic.rs │ │ │ ├── mod.rs │ │ │ ├── lapic.rs │ │ │ └── uart16550.rs │ │ ├── hal.rs │ │ ├── mod.rs │ │ ├── gpm.rs │ │ └── vmexit.rs │ ├── main.rs │ └── logging.rs ├── x86_64.json ├── Cargo.toml ├── linker.lds ├── Makefile └── Cargo.lock ├── rvm ├── src │ ├── arch │ │ ├── mod.rs │ │ └── x86_64 │ │ │ ├── mod.rs │ │ │ ├── regs.rs │ │ │ ├── msr.rs │ │ │ ├── vmx │ │ │ ├── instructions.rs │ │ │ ├── ept.rs │ │ │ ├── mod.rs │ │ │ ├── definitions.rs │ │ │ ├── structs.rs │ │ │ ├── vcpu.rs │ │ │ └── vmcs.rs │ │ │ └── lapic.rs │ ├── hal.rs │ ├── error.rs │ ├── lib.rs │ └── mm │ │ ├── mod.rs │ │ └── page_table.rs └── Cargo.toml ├── rust-toolchain.toml ├── .github └── workflows │ └── build.yml └── README.md /guest/bios/.gitignore: -------------------------------------------------------------------------------- 1 | out/ 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .vscode 3 | .DS_Store 4 | rvm/Cargo.lock 5 | !hypervisor/Cargo.lock 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "guest/nimbos"] 2 | path = guest/nimbos 3 | url = https://github.com/equation314/nimbos.git 4 | -------------------------------------------------------------------------------- /hypervisor/src/arch/mod.rs: -------------------------------------------------------------------------------- 1 | cfg_if::cfg_if! { 2 | if #[cfg(target_arch = "x86_64")] { 3 | mod x86_64; 4 | pub use self::x86_64::*; 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /hypervisor/src/lang_items.rs: -------------------------------------------------------------------------------- 1 | use core::panic::PanicInfo; 2 | 3 | #[panic_handler] 4 | fn panic(info: &PanicInfo) -> ! { 5 | error!("{}", info); 6 | loop {} 7 | } 8 | -------------------------------------------------------------------------------- /rvm/src/arch/mod.rs: -------------------------------------------------------------------------------- 1 | //! Architecture dependent structures. 2 | 3 | cfg_if::cfg_if! { 4 | if #[cfg(target_arch = "x86_64")] { 5 | mod x86_64; 6 | pub use self::x86_64::*; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /hypervisor/src/mm/mod.rs: -------------------------------------------------------------------------------- 1 | mod heap; 2 | 3 | pub mod address; 4 | pub mod frame; 5 | 6 | pub const PAGE_SIZE: usize = 0x1000; 7 | 8 | pub fn init_heap_early() { 9 | heap::init(); 10 | } 11 | 12 | pub fn init() { 13 | frame::init(); 14 | } 15 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | profile = "minimal" 3 | # use the nightly version of the last stable toolchain, see 4 | channel = "nightly-2022-11-03" 5 | components = ["rust-src", "llvm-tools-preview", "rustfmt", "clippy"] 6 | -------------------------------------------------------------------------------- /hypervisor/src/timer.rs: -------------------------------------------------------------------------------- 1 | use core::time::Duration; 2 | 3 | use crate::arch::timer; 4 | 5 | pub type TimeValue = Duration; 6 | 7 | pub fn current_time() -> TimeValue { 8 | TimeValue::from_nanos(timer::ticks_to_nanos(timer::current_ticks())) 9 | } 10 | -------------------------------------------------------------------------------- /guest/bios/rvm-bios.lds: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH(i386) 2 | 3 | BASE_ADDRESS = 0x8000; 4 | 5 | ENTRY(entry16) 6 | SECTIONS 7 | { 8 | . = BASE_ADDRESS; 9 | .text : { 10 | *(.text .text.*) 11 | } 12 | 13 | /DISCARD/ : { 14 | *(.eh_frame) *(.eh_frame_hdr) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/mod.rs: -------------------------------------------------------------------------------- 1 | mod boot; 2 | mod gdt; 3 | mod idt; 4 | mod lapic; 5 | mod trap; 6 | 7 | pub mod instructions; 8 | pub mod timer; 9 | pub mod uart16550; 10 | 11 | pub use trap::handle_irq; 12 | pub use uart16550 as uart; 13 | 14 | pub fn init_early() { 15 | uart::init(); 16 | } 17 | 18 | pub fn init() { 19 | gdt::init(); 20 | idt::init(); 21 | lapic::init(); 22 | timer::init(); 23 | } 24 | -------------------------------------------------------------------------------- /hypervisor/src/config.rs: -------------------------------------------------------------------------------- 1 | pub const PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000; 2 | 3 | pub const BOOT_KERNEL_STACK_SIZE: usize = 4096 * 4; // 16K 4 | pub const KERNEL_HEAP_SIZE: usize = 0x40_0000; // 4M 5 | 6 | pub const PHYS_MEMORY_BASE: usize = 0; 7 | pub const PHYS_MEMORY_SIZE: usize = 0x400_0000; // 64M 8 | pub const PHYS_MEMORY_END: usize = PHYS_MEMORY_BASE + PHYS_MEMORY_SIZE; 9 | 10 | pub const TICKS_PER_SEC: u64 = 100; 11 | -------------------------------------------------------------------------------- /hypervisor/src/hv/gconfig.rs: -------------------------------------------------------------------------------- 1 | use rvm::{GuestPhysAddr, HostPhysAddr}; 2 | 3 | pub const BIOS_PADDR: HostPhysAddr = 0x400_0000; 4 | pub const BIOS_SIZE: usize = 0x1000; 5 | 6 | pub const GUEST_IMAGE_PADDR: HostPhysAddr = 0x400_1000; 7 | pub const GUEST_IMAGE_SIZE: usize = 0x10_0000; // 1M 8 | 9 | pub const GUEST_PHYS_MEMORY_BASE: GuestPhysAddr = 0; 10 | pub const BIOS_ENTRY: GuestPhysAddr = 0x8000; 11 | pub const GUEST_ENTRY: GuestPhysAddr = 0x20_0000; 12 | pub const GUEST_PHYS_MEMORY_SIZE: usize = 0x100_0000; // 16M 13 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/mod.rs: -------------------------------------------------------------------------------- 1 | mod lapic; 2 | pub(crate) mod msr; 3 | 4 | #[macro_use] 5 | pub(crate) mod regs; 6 | 7 | cfg_if::cfg_if! { 8 | if #[cfg(feature = "vmx")] { 9 | mod vmx; 10 | use vmx as vender; 11 | pub use vmx::{VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo}; 12 | } 13 | } 14 | 15 | pub(crate) use vender::{has_hardware_support, ArchPerCpuState}; 16 | 17 | pub use lapic::ApicTimer; 18 | pub use regs::GeneralRegisters; 19 | pub use vender::{NestedPageTable, RvmVcpu}; 20 | -------------------------------------------------------------------------------- /rvm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rvm" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Yuekai Jia "] 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [features] 10 | default = ["vmx"] 11 | vmx = [] 12 | 13 | [dependencies] 14 | log = "0.4" 15 | cfg-if = "1.0" 16 | bitflags = "1.3" 17 | bit_field = "0.10" 18 | numeric-enum-macro = "0.2" 19 | 20 | [target.'cfg(target_arch = "x86_64")'.dependencies] 21 | x86 = "0.52" 22 | x86_64 = "0.14" 23 | raw-cpuid = "10.6" 24 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/instructions.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use core::arch::asm; 4 | 5 | use x86_64::registers::{rflags, rflags::RFlags}; 6 | 7 | #[inline] 8 | pub fn enable_irqs() { 9 | unsafe { asm!("sti") }; 10 | } 11 | 12 | #[inline] 13 | pub fn disable_irqs() { 14 | unsafe { asm!("cli") }; 15 | } 16 | 17 | #[inline] 18 | pub fn irqs_disabled() -> bool { 19 | !rflags::read().contains(RFlags::INTERRUPT_FLAG) 20 | } 21 | 22 | #[inline] 23 | pub fn wait_for_ints() { 24 | if !irqs_disabled() { 25 | x86_64::instructions::hlt(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /hypervisor/x86_64.json: -------------------------------------------------------------------------------- 1 | { 2 | "arch": "x86_64", 3 | "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", 4 | "disable-redzone": true, 5 | "executables": true, 6 | "features": "-mmx,-sse,+soft-float", 7 | "linker": "rust-lld", 8 | "linker-flavor": "ld.lld", 9 | "llvm-target": "x86_64-unknown-none", 10 | "max-atomic-width": 64, 11 | "panic-strategy": "abort", 12 | "pre-link-args": { 13 | "ld.lld": [ 14 | "-Tlinker.lds" 15 | ] 16 | }, 17 | "target-pointer-width": "64" 18 | } 19 | -------------------------------------------------------------------------------- /hypervisor/src/mm/address.rs: -------------------------------------------------------------------------------- 1 | use super::PAGE_SIZE; 2 | use crate::config::PHYS_VIRT_OFFSET; 3 | 4 | pub(super) type PhysAddr = usize; 5 | pub(super) type VirtAddr = usize; 6 | 7 | pub const fn phys_to_virt(paddr: PhysAddr) -> VirtAddr { 8 | paddr + PHYS_VIRT_OFFSET 9 | } 10 | 11 | pub const fn virt_to_phys(vaddr: VirtAddr) -> PhysAddr { 12 | vaddr - PHYS_VIRT_OFFSET 13 | } 14 | 15 | pub const fn align_down(addr: usize) -> usize { 16 | addr & !(PAGE_SIZE - 1) 17 | } 18 | 19 | pub const fn align_up(addr: usize) -> usize { 20 | (addr + PAGE_SIZE - 1) & !(PAGE_SIZE - 1) 21 | } 22 | 23 | pub const fn is_aligned(addr: usize) -> bool { 24 | (addr & (PAGE_SIZE - 1)) == 0 25 | } 26 | -------------------------------------------------------------------------------- /hypervisor/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rvm-hypervisor" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Yuekai Jia "] 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | log = "0.4" 11 | spin = "0.9" 12 | cfg-if = "1.0" 13 | bitflags = "1.3" 14 | buddy_system_allocator = "0.8" 15 | lazy_static = { version = "1.4.0", features = ["spin_no_std"] } 16 | bitmap-allocator = { git = "https://github.com/rcore-os/bitmap-allocator", rev = "88e871a" } 17 | rvm = { path = "../rvm" } 18 | 19 | [target.'cfg(target_arch = "x86_64")'.dependencies] 20 | x86 = "0.52" 21 | x86_64 = "0.14" 22 | x2apic = "0.4" 23 | raw-cpuid = "10.6" 24 | 25 | [profile.release] 26 | lto = true 27 | -------------------------------------------------------------------------------- /hypervisor/src/hv/device_emu/i8259_pic.rs: -------------------------------------------------------------------------------- 1 | //! Emulated Intel 8259 Programmable Interrupt Controller. (ref: https://wiki.osdev.org/8259_PIC) 2 | 3 | use super::PortIoDevice; 4 | use rvm::{RvmError, RvmResult}; 5 | 6 | pub struct I8259Pic { 7 | port_base: u16, 8 | } 9 | 10 | impl PortIoDevice for I8259Pic { 11 | fn port_range(&self) -> core::ops::Range { 12 | self.port_base..self.port_base + 2 13 | } 14 | 15 | fn read(&self, _port: u16, _access_size: u8) -> RvmResult { 16 | Err(RvmError::Unsupported) // report error for read 17 | } 18 | 19 | fn write(&self, _port: u16, _access_size: u8, _value: u32) -> RvmResult { 20 | Ok(()) // ignore write 21 | } 22 | } 23 | 24 | impl I8259Pic { 25 | pub const fn new(port_base: u16) -> Self { 26 | Self { port_base } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /guest/bios/Makefile: -------------------------------------------------------------------------------- 1 | OUT ?= out 2 | 3 | SRC := boot16.S 4 | ldscript := rvm-bios.lds 5 | target := $(OUT)/rvm-bios 6 | target-obj := $(target).o 7 | target-elf := $(target).elf 8 | target-bin := $(target).bin 9 | target-disasm := $(target).asm 10 | 11 | AS ?= as 12 | LD ?= ld 13 | OBJCOPY ?= objcopy 14 | OBJDUMP ?= objdump 15 | 16 | all: $(OUT) $(target).bin 17 | 18 | disasm: 19 | $(OBJDUMP) -d -m i8086 -M intel $(target).elf | less 20 | 21 | $(OUT): 22 | mkdir -p $(OUT) 23 | 24 | $(target-obj): $(SRC) 25 | $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@ 26 | 27 | $(target-elf): $(target-obj) $(ldscript) 28 | $(LD) -T$(ldscript) $< -o $@ 29 | $(OBJDUMP) -d -m i8086 -M intel $@ > $(target-disasm) 30 | 31 | $(target-bin): $(target-elf) 32 | $(OBJCOPY) $< --strip-all -O binary $@ 33 | 34 | clean: 35 | rm -rf $(OUT) 36 | 37 | .PHONY: all disasm clean 38 | -------------------------------------------------------------------------------- /hypervisor/src/mm/heap.rs: -------------------------------------------------------------------------------- 1 | use buddy_system_allocator::LockedHeap; 2 | use core::{alloc::Layout, mem::size_of}; 3 | 4 | use crate::config::KERNEL_HEAP_SIZE; 5 | 6 | #[global_allocator] 7 | static HEAP_ALLOCATOR: LockedHeap<32> = LockedHeap::empty(); 8 | 9 | #[alloc_error_handler] 10 | fn handle_alloc_error(layout: Layout) -> ! { 11 | panic!("Heap allocation error, layout = {:?}", layout); 12 | } 13 | 14 | static mut HEAP_SPACE: [u64; KERNEL_HEAP_SIZE / size_of::()] = 15 | [0; KERNEL_HEAP_SIZE / size_of::()]; 16 | 17 | pub(super) fn init() { 18 | let heap_start = unsafe { HEAP_SPACE.as_ptr() as usize }; 19 | println!( 20 | "Initializing heap at: [{:#x}, {:#x})", 21 | heap_start, 22 | heap_start + KERNEL_HEAP_SIZE 23 | ); 24 | unsafe { HEAP_ALLOCATOR.lock().init(heap_start, KERNEL_HEAP_SIZE) } 25 | } 26 | -------------------------------------------------------------------------------- /rvm/src/hal.rs: -------------------------------------------------------------------------------- 1 | use crate::{HostPhysAddr, HostVirtAddr}; 2 | 3 | /// The interfaces which the underlying software (kernel or hypervisor) must implement. 4 | pub trait RvmHal: Sized { 5 | /// Allocates a 4K-sized contiguous physical page, returns its physical address. 6 | fn alloc_page() -> Option; 7 | /// Deallocates the given physical page. 8 | fn dealloc_page(paddr: HostPhysAddr); 9 | /// Converts a physical address to a virtual address which can access. 10 | fn phys_to_virt(paddr: HostPhysAddr) -> HostVirtAddr; 11 | /// Converts a virtual address to the corresponding physical address. 12 | fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr; 13 | /// VM-Exit handler. 14 | fn vmexit_handler(vcpu: &mut crate::RvmVcpu); 15 | /// Current time in nanoseconds. 16 | fn current_time_nanos() -> u64; 17 | } 18 | -------------------------------------------------------------------------------- /rvm/src/error.rs: -------------------------------------------------------------------------------- 1 | /// The error type for RVM operations. 2 | #[derive(Debug)] 3 | pub enum RvmError { 4 | AlreadyExists, 5 | BadState, 6 | InvalidParam, 7 | OutOfMemory, 8 | ResourceBusy, 9 | Unsupported, 10 | } 11 | 12 | /// A [`Result`] type with [`RvmError`] as the error type. 13 | pub type RvmResult = Result; 14 | 15 | macro_rules! rvm_err_type { 16 | ($err: ident) => {{ 17 | use $crate::error::RvmError::*; 18 | warn!("[RvmError::{:?}]", $err); 19 | $err 20 | }}; 21 | ($err: ident, $msg: expr) => {{ 22 | use $crate::error::RvmError::*; 23 | warn!("[RvmError::{:?}] {}", $err, $msg); 24 | $err 25 | }}; 26 | } 27 | 28 | macro_rules! rvm_err { 29 | ($err: ident) => { 30 | Err(rvm_err_type!($err)) 31 | }; 32 | ($err: ident, $msg: expr) => { 33 | Err(rvm_err_type!($err, $msg)) 34 | }; 35 | } 36 | -------------------------------------------------------------------------------- /hypervisor/src/hv/hal.rs: -------------------------------------------------------------------------------- 1 | use rvm::{HostPhysAddr, HostVirtAddr, RvmHal, RvmVcpu}; 2 | 3 | use super::vmexit; 4 | use crate::arch::timer; 5 | use crate::mm::{address, frame}; 6 | 7 | pub struct RvmHalImpl; 8 | 9 | impl RvmHal for RvmHalImpl { 10 | fn alloc_page() -> Option { 11 | unsafe { frame::alloc_page() } 12 | } 13 | 14 | fn dealloc_page(paddr: HostPhysAddr) { 15 | unsafe { frame::dealloc_page(paddr) } 16 | } 17 | 18 | fn phys_to_virt(paddr: HostPhysAddr) -> HostVirtAddr { 19 | address::phys_to_virt(paddr) 20 | } 21 | 22 | fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr { 23 | address::virt_to_phys(vaddr) 24 | } 25 | 26 | fn vmexit_handler(vcpu: &mut RvmVcpu) { 27 | vmexit::vmexit_handler(vcpu).unwrap() 28 | } 29 | 30 | fn current_time_nanos() -> u64 { 31 | timer::ticks_to_nanos(timer::current_ticks()) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/boot.rs: -------------------------------------------------------------------------------- 1 | use core::arch::global_asm; 2 | 3 | use x86_64::registers::control::{Cr0Flags, Cr4Flags}; 4 | use x86_64::registers::model_specific::EferFlags; 5 | 6 | use crate::config::{BOOT_KERNEL_STACK_SIZE, PHYS_VIRT_OFFSET}; 7 | 8 | const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits() 9 | | Cr0Flags::MONITOR_COPROCESSOR.bits() 10 | | Cr0Flags::TASK_SWITCHED.bits() 11 | | Cr0Flags::NUMERIC_ERROR.bits() 12 | | Cr0Flags::WRITE_PROTECT.bits() 13 | | Cr0Flags::PAGING.bits(); 14 | const CR4: u64 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION.bits() | Cr4Flags::PAGE_GLOBAL.bits(); 15 | const EFER: u64 = EferFlags::LONG_MODE_ENABLE.bits() | EferFlags::NO_EXECUTE_ENABLE.bits(); 16 | 17 | global_asm!( 18 | include_str!("multiboot.S"), 19 | main_entry = sym crate::main, 20 | offset = const PHYS_VIRT_OFFSET, 21 | boot_stack_size = const BOOT_KERNEL_STACK_SIZE, 22 | cr0 = const CR0, 23 | cr4 = const CR4, 24 | efer_msr = const x86::msr::IA32_EFER, 25 | efer = const EFER, 26 | ); 27 | -------------------------------------------------------------------------------- /hypervisor/linker.lds: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH(x86_64) 2 | 3 | BASE_ADDRESS = 0xffffff8000200000; 4 | 5 | ENTRY(_start) 6 | SECTIONS 7 | { 8 | . = BASE_ADDRESS; 9 | skernel = .; 10 | 11 | .text : { 12 | stext = .; 13 | *(.text.boot) 14 | *(.text .text.*) 15 | . = ALIGN(4K); 16 | etext = .; 17 | } 18 | 19 | .rodata : { 20 | srodata = .; 21 | *(.rodata .rodata.*) 22 | *(.srodata .srodata.*) 23 | . = ALIGN(4K); 24 | erodata = .; 25 | } 26 | 27 | .data : { 28 | sdata = .; 29 | *(.data.boot_page_table) 30 | *(.data .data.*) 31 | *(.sdata .sdata.*) 32 | *(.got .got.*) 33 | . = ALIGN(4K); 34 | edata = .; 35 | } 36 | 37 | .bss : { 38 | boot_stack = .; 39 | *(.bss.stack) 40 | . = ALIGN(4K); 41 | boot_stack_top = .; 42 | 43 | sbss = .; 44 | *(.bss .bss.*) 45 | *(.sbss .sbss.*) 46 | . = ALIGN(4K); 47 | ebss = .; 48 | } 49 | 50 | ekernel = .; 51 | 52 | /DISCARD/ : { 53 | *(.eh_frame) *(.eh_frame_hdr) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/lapic.rs: -------------------------------------------------------------------------------- 1 | use x2apic::lapic::{LocalApic, LocalApicBuilder}; 2 | use x86_64::instructions::port::Port; 3 | 4 | use self::vectors::*; 5 | 6 | pub mod vectors { 7 | pub const APIC_TIMER_VECTOR: u8 = 0xf0; 8 | pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1; 9 | pub const APIC_ERROR_VECTOR: u8 = 0xf2; 10 | } 11 | 12 | static mut LOCAL_APIC: Option = None; 13 | 14 | pub fn local_apic<'a>() -> &'a mut LocalApic { 15 | // It's safe as LAPIC is per-cpu. 16 | unsafe { LOCAL_APIC.as_mut().unwrap() } 17 | } 18 | 19 | pub fn init() { 20 | println!("Initializing Local APIC..."); 21 | 22 | unsafe { 23 | // Disable 8259A interrupt controllers 24 | Port::::new(0x20).write(0xff); 25 | Port::::new(0xA0).write(0xff); 26 | } 27 | 28 | let mut lapic = LocalApicBuilder::new() 29 | .timer_vector(APIC_TIMER_VECTOR as _) 30 | .error_vector(APIC_ERROR_VECTOR as _) 31 | .spurious_vector(APIC_SPURIOUS_VECTOR as _) 32 | .build() 33 | .unwrap(); 34 | unsafe { 35 | lapic.enable(); 36 | LOCAL_APIC = Some(lapic); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/timer.rs: -------------------------------------------------------------------------------- 1 | use raw_cpuid::CpuId; 2 | use x2apic::lapic::{TimerDivide, TimerMode}; 3 | 4 | use crate::config::TICKS_PER_SEC; 5 | 6 | use super::lapic::local_apic; 7 | 8 | const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate 9 | 10 | static mut CPU_FREQ_MHZ: u64 = 4_000; 11 | 12 | pub fn current_ticks() -> u64 { 13 | unsafe { core::arch::x86_64::_rdtsc() } 14 | } 15 | 16 | pub fn ticks_to_nanos(ticks: u64) -> u64 { 17 | ticks * 1_000 / unsafe { CPU_FREQ_MHZ } 18 | } 19 | 20 | pub fn init() { 21 | if let Some(freq) = CpuId::new() 22 | .get_processor_frequency_info() 23 | .map(|info| info.processor_base_frequency()) 24 | { 25 | if freq > 0 { 26 | println!("Got TSC frequency by CPUID: {} MHz", freq); 27 | unsafe { CPU_FREQ_MHZ = freq as u64 } 28 | } 29 | } 30 | 31 | let lapic = local_apic(); 32 | unsafe { 33 | lapic.set_timer_mode(TimerMode::Periodic); 34 | lapic.set_timer_divide(TimerDivide::Div256); // indeed it is Div1, the name is confusing. 35 | lapic.set_timer_initial((LAPIC_TICKS_PER_SEC / TICKS_PER_SEC) as u32); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/idt.rs: -------------------------------------------------------------------------------- 1 | use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable}; 2 | 3 | const NUM_INT: usize = 256; 4 | 5 | lazy_static::lazy_static! { 6 | static ref IDT: IdtStruct = IdtStruct::new(); 7 | } 8 | 9 | struct IdtStruct { 10 | table: InterruptDescriptorTable, 11 | } 12 | 13 | impl IdtStruct { 14 | fn new() -> Self { 15 | extern "C" { 16 | #[link_name = "trap_handler_table"] 17 | static ENTRIES: [extern "C" fn(); NUM_INT]; 18 | } 19 | let mut idt = Self { 20 | table: InterruptDescriptorTable::new(), 21 | }; 22 | 23 | let entries = unsafe { 24 | core::slice::from_raw_parts_mut( 25 | &mut idt.table as *mut _ as *mut Entry, 26 | NUM_INT, 27 | ) 28 | }; 29 | for i in 0..NUM_INT { 30 | entries[i].set_handler_fn(unsafe { core::mem::transmute(ENTRIES[i]) }); 31 | } 32 | idt 33 | } 34 | 35 | fn load(&'static self) { 36 | self.table.load(); 37 | } 38 | } 39 | 40 | pub fn init() { 41 | println!("Initializing IDT..."); 42 | lazy_static::initialize(&IDT); 43 | IDT.load(); 44 | } 45 | -------------------------------------------------------------------------------- /guest/bios/boot16.S: -------------------------------------------------------------------------------- 1 | .section .text 2 | .code16 3 | .global entry16 4 | entry16: 5 | cli 6 | cld 7 | 8 | xor ax, ax 9 | mov ds, ax 10 | mov es, ax 11 | mov ss, ax 12 | 13 | lgdt [prot_gdt_desc] 14 | mov eax, cr0 15 | or eax, 0x1 16 | mov cr0, eax 17 | 18 | ljmp 0x8, entry32 19 | 20 | .code32 21 | .global entry32 22 | entry32: 23 | mov ax, 0x10 24 | mov ds, ax 25 | mov es, ax 26 | mov ss, ax 27 | mov fs, ax 28 | mov gs, ax 29 | 30 | mov esp, 0x7000 # temporary stack 31 | mov ecx, 0x200000 # kernel entry 32 | mov eax, 0x1BADB002 # multiboot magic 33 | mov ebx, 0 # multiboot information (unsupported) 34 | jmp ecx 35 | 36 | .balign 16 37 | prot_gdt: 38 | .quad 0x0000000000000000 # 0x00: null 39 | .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) 40 | .quad 0x00cf93000000ffff # 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) 41 | 42 | prot_gdt_desc: 43 | .short prot_gdt_desc - prot_gdt - 1 # limit 44 | .long prot_gdt # base 45 | -------------------------------------------------------------------------------- /hypervisor/src/hv/device_emu/mod.rs: -------------------------------------------------------------------------------- 1 | mod i8259_pic; 2 | mod lapic; 3 | mod uart16550; 4 | 5 | use alloc::{sync::Arc, vec, vec::Vec}; 6 | 7 | pub use self::lapic::VirtLocalApic; 8 | 9 | pub trait PortIoDevice: Send + Sync { 10 | fn port_range(&self) -> core::ops::Range; 11 | fn read(&self, port: u16, access_size: u8) -> rvm::RvmResult; 12 | fn write(&self, port: u16, access_size: u8, value: u32) -> rvm::RvmResult; 13 | } 14 | 15 | pub struct VirtDeviceList { 16 | port_io_devices: Vec>, 17 | } 18 | 19 | impl VirtDeviceList { 20 | pub fn find_port_io_device(&self, port: u16) -> Option<&Arc> { 21 | self.port_io_devices 22 | .iter() 23 | .find(|dev| dev.port_range().contains(&port)) 24 | } 25 | } 26 | 27 | lazy_static::lazy_static! { 28 | static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList { 29 | port_io_devices: vec![ 30 | Arc::new(uart16550::Uart16550::new(0x3f8)), // COM1 31 | Arc::new(i8259_pic::I8259Pic::new(0x20)), // PIC1 32 | Arc::new(i8259_pic::I8259Pic::new(0xA0)), // PIC2 33 | ], 34 | }; 35 | } 36 | 37 | pub fn all_virt_devices() -> &'static VirtDeviceList { 38 | &VIRT_DEVICES 39 | } 40 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | clippy: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | arch: [x86_64] 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions-rs/toolchain@v1 15 | with: 16 | profile: minimal 17 | toolchain: nightly-2022-11-03 18 | override: true 19 | components: rust-src, clippy, rustfmt 20 | - name: Clippy 21 | run: make -C hypervisor clippy ARCH=${{ matrix.arch }} 22 | - name: Check code format 23 | run: cd hypervisor && cargo fmt -- --check 24 | 25 | build: 26 | runs-on: ${{ matrix.os }} 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | os: [ubuntu-latest] 31 | arch: [x86_64] 32 | steps: 33 | - uses: actions/checkout@v2 34 | - uses: actions-rs/toolchain@v1 35 | with: 36 | profile: minimal 37 | toolchain: nightly-2022-11-03 38 | components: rust-src, llvm-tools-preview 39 | - uses: actions-rs/install@v0.1 40 | with: 41 | crate: cargo-binutils 42 | version: latest 43 | use-tool-cache: true 44 | - name: Build hypervisor 45 | run: make -C hypervisor ARCH=${{ matrix.arch }} 46 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/regs.rs: -------------------------------------------------------------------------------- 1 | /// General-Purpose Registers for 64-bit x86 architecture. 2 | #[repr(C)] 3 | #[derive(Debug, Default, Clone)] 4 | pub struct GeneralRegisters { 5 | pub rax: u64, 6 | pub rcx: u64, 7 | pub rdx: u64, 8 | pub rbx: u64, 9 | _unused_rsp: u64, 10 | pub rbp: u64, 11 | pub rsi: u64, 12 | pub rdi: u64, 13 | pub r8: u64, 14 | pub r9: u64, 15 | pub r10: u64, 16 | pub r11: u64, 17 | pub r12: u64, 18 | pub r13: u64, 19 | pub r14: u64, 20 | pub r15: u64, 21 | } 22 | 23 | macro_rules! save_regs_to_stack { 24 | () => { 25 | " 26 | push r15 27 | push r14 28 | push r13 29 | push r12 30 | push r11 31 | push r10 32 | push r9 33 | push r8 34 | push rdi 35 | push rsi 36 | push rbp 37 | sub rsp, 8 38 | push rbx 39 | push rdx 40 | push rcx 41 | push rax" 42 | }; 43 | } 44 | 45 | macro_rules! restore_regs_from_stack { 46 | () => { 47 | " 48 | pop rax 49 | pop rcx 50 | pop rdx 51 | pop rbx 52 | add rsp, 8 53 | pop rbp 54 | pop rsi 55 | pop rdi 56 | pop r8 57 | pop r9 58 | pop r10 59 | pop r11 60 | pop r12 61 | pop r13 62 | pop r14 63 | pop r15" 64 | }; 65 | } 66 | -------------------------------------------------------------------------------- /hypervisor/Makefile: -------------------------------------------------------------------------------- 1 | # Arguments 2 | ARCH ?= x86_64 3 | MODE ?= release 4 | LOG ?= warn 5 | 6 | BIOS_IMG ?= ../guest/bios/out/rvm-bios.bin 7 | GUEST_IMG ?= ../guest/nimbos/kernel/target/x86_64/release/nimbos.bin 8 | 9 | export ARCH 10 | export MODE 11 | export LOG 12 | 13 | # Paths 14 | target_elf := target/$(ARCH)/$(MODE)/rvm-hypervisor 15 | target_bin := $(target_elf).bin 16 | 17 | build_args := --target $(ARCH).json -Zbuild-std=core,alloc -Zbuild-std-features=compiler-builtins-mem 18 | ifeq ($(MODE), release) 19 | build_args += --release 20 | endif 21 | 22 | # Binutils 23 | OBJDUMP := rust-objdump -d --print-imm-hex --x86-asm-syntax=intel 24 | OBJCOPY := rust-objcopy --binary-architecture=$(ARCH) 25 | GDB := gdb-multiarch 26 | 27 | # QEMU 28 | qemu := qemu-system-$(ARCH) 29 | qemu_args := -nographic -m 128M 30 | 31 | qemu_args += -cpu host,+x2apic,+vmx -accel kvm \ 32 | -device loader,addr=0x4000000,file=$(BIOS_IMG),force-raw=on \ 33 | -device loader,addr=0x4001000,file=$(GUEST_IMG),force-raw=on 34 | 35 | ifeq ($(ARCH), x86_64) 36 | qemu_args += \ 37 | -machine q35 \ 38 | -serial mon:stdio \ 39 | -kernel $(target_elf) 40 | endif 41 | 42 | build: $(target_bin) 43 | 44 | $(target_bin): elf 45 | @$(OBJCOPY) $(target_elf) --strip-all -O binary $@ 46 | 47 | elf: 48 | @echo Arch: $(ARCH) 49 | cargo build $(build_args) 50 | 51 | clean: 52 | cargo clean 53 | 54 | clippy: 55 | cargo clippy $(build_args) 56 | 57 | fmt: 58 | cargo fmt 59 | 60 | disasm: 61 | @$(OBJDUMP) $(target_elf) | less 62 | 63 | run: build justrun 64 | 65 | justrun: 66 | $(qemu) $(qemu_args) 67 | 68 | .PHONY: build elf clean clippy disasm run justrun 69 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/trap.S: -------------------------------------------------------------------------------- 1 | .equ NUM_INT, 256 2 | 3 | .altmacro 4 | .macro DEF_HANDLER, i 5 | .Ltrap_handler_\i: 6 | .if \i == 8 || (\i >= 10 && \i <= 14) || \i == 17 7 | // error code pushed by CPU 8 | push \i // interrupt vector 9 | jmp .Ltrap_common 10 | .else 11 | push 0 // fill in error code in TrapFrame 12 | push \i // interrupt vector 13 | jmp .Ltrap_common 14 | .endif 15 | .endm 16 | 17 | .macro DEF_TABLE_ENTRY, i 18 | .quad .Ltrap_handler_\i 19 | .endm 20 | 21 | .section .text 22 | _trap_handlers: 23 | .set i, 0 24 | .rept NUM_INT 25 | DEF_HANDLER %i 26 | .set i, i + 1 27 | .endr 28 | 29 | .Ltrap_common: 30 | push r15 31 | push r14 32 | push r13 33 | push r12 34 | push r11 35 | push r10 36 | push r9 37 | push r8 38 | push rdi 39 | push rsi 40 | push rbp 41 | push rbx 42 | push rdx 43 | push rcx 44 | push rax 45 | 46 | mov rdi, rsp 47 | call x86_trap_handler 48 | 49 | pop rax 50 | pop rcx 51 | pop rdx 52 | pop rbx 53 | pop rbp 54 | pop rsi 55 | pop rdi 56 | pop r8 57 | pop r9 58 | pop r10 59 | pop r11 60 | pop r12 61 | pop r13 62 | pop r14 63 | pop r15 64 | 65 | add rsp, 16 // pop vector, error_code 66 | iretq 67 | 68 | .section .rodata 69 | .global trap_handler_table 70 | trap_handler_table: 71 | .set i, 0 72 | .rept NUM_INT 73 | DEF_TABLE_ENTRY %i 74 | .set i, i + 1 75 | .endr 76 | -------------------------------------------------------------------------------- /hypervisor/src/main.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![no_main] 3 | #![feature(asm_const)] 4 | #![feature(panic_info_message, alloc_error_handler)] 5 | 6 | #[macro_use] 7 | extern crate log; 8 | 9 | extern crate alloc; 10 | 11 | #[macro_use] 12 | mod logging; 13 | 14 | mod arch; 15 | mod config; 16 | mod hv; 17 | mod mm; 18 | mod timer; 19 | 20 | #[cfg(not(test))] 21 | mod lang_items; 22 | 23 | use core::sync::atomic::{AtomicBool, Ordering}; 24 | 25 | static INIT_OK: AtomicBool = AtomicBool::new(false); 26 | 27 | const LOGO: &str = r" 28 | 29 | RRRRRR VV VV MM MM 30 | RR RR VV VV MMM MMM 31 | RRRRRR VV VV MM MM MM 32 | RR RR VV VV MM MM 33 | RR RR VVV MM MM 34 | ___ ____ ___ ___ 35 | |__ \ / __ \ |__ \ |__ \ 36 | __/ / / / / / __/ / __/ / 37 | / __/ / /_/ / / __/ / __/ 38 | /____/ \____/ /____/ /____/ 39 | "; 40 | 41 | fn clear_bss() { 42 | extern "C" { 43 | fn sbss(); 44 | fn ebss(); 45 | } 46 | unsafe { 47 | core::slice::from_raw_parts_mut(sbss as usize as *mut u8, ebss as usize - sbss as usize) 48 | .fill(0); 49 | } 50 | } 51 | 52 | pub fn init_ok() -> bool { 53 | INIT_OK.load(Ordering::SeqCst) 54 | } 55 | 56 | fn main() -> ! { 57 | clear_bss(); 58 | arch::init_early(); 59 | println!("{}", LOGO); 60 | println!( 61 | "\ 62 | arch = {}\n\ 63 | build_mode = {}\n\ 64 | log_level = {}\n\ 65 | ", 66 | option_env!("ARCH").unwrap_or(""), 67 | option_env!("MODE").unwrap_or(""), 68 | option_env!("LOG").unwrap_or(""), 69 | ); 70 | 71 | mm::init_heap_early(); 72 | logging::init(); 73 | info!("Logging is enabled."); 74 | 75 | arch::init(); 76 | mm::init(); 77 | INIT_OK.store(true, Ordering::SeqCst); 78 | println!("Initialization completed.\n"); 79 | 80 | hv::run(); 81 | } 82 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/msr.rs: -------------------------------------------------------------------------------- 1 | use x86::msr::{rdmsr, wrmsr}; 2 | 3 | /// X86 model-specific registers. (SDM Vol. 4) 4 | #[repr(u32)] 5 | #[derive(Debug, Copy, Clone)] 6 | #[allow(non_camel_case_types, dead_code)] 7 | pub enum Msr { 8 | IA32_FEATURE_CONTROL = 0x3a, 9 | 10 | IA32_PAT = 0x277, 11 | 12 | IA32_VMX_BASIC = 0x480, 13 | IA32_VMX_PINBASED_CTLS = 0x481, 14 | IA32_VMX_PROCBASED_CTLS = 0x482, 15 | IA32_VMX_EXIT_CTLS = 0x483, 16 | IA32_VMX_ENTRY_CTLS = 0x484, 17 | IA32_VMX_MISC = 0x485, 18 | IA32_VMX_CR0_FIXED0 = 0x486, 19 | IA32_VMX_CR0_FIXED1 = 0x487, 20 | IA32_VMX_CR4_FIXED0 = 0x488, 21 | IA32_VMX_CR4_FIXED1 = 0x489, 22 | IA32_VMX_PROCBASED_CTLS2 = 0x48b, 23 | IA32_VMX_EPT_VPID_CAP = 0x48c, 24 | IA32_VMX_TRUE_PINBASED_CTLS = 0x48d, 25 | IA32_VMX_TRUE_PROCBASED_CTLS = 0x48e, 26 | IA32_VMX_TRUE_EXIT_CTLS = 0x48f, 27 | IA32_VMX_TRUE_ENTRY_CTLS = 0x490, 28 | 29 | IA32_EFER = 0xc000_0080, 30 | IA32_STAR = 0xc000_0081, 31 | IA32_LSTAR = 0xc000_0082, 32 | IA32_CSTAR = 0xc000_0083, 33 | IA32_FMASK = 0xc000_0084, 34 | 35 | IA32_FS_BASE = 0xc000_0100, 36 | IA32_GS_BASE = 0xc000_0101, 37 | IA32_KERNEL_GSBASE = 0xc000_0102, 38 | } 39 | 40 | impl Msr { 41 | /// Read 64 bits msr register. 42 | #[inline(always)] 43 | pub fn read(self) -> u64 { 44 | unsafe { rdmsr(self as _) } 45 | } 46 | 47 | /// Write 64 bits to msr register. 48 | /// 49 | /// # Safety 50 | /// 51 | /// The caller must ensure that this write operation has no unsafe side 52 | /// effects. 53 | #[inline(always)] 54 | pub unsafe fn write(self, value: u64) { 55 | wrmsr(self as _, value) 56 | } 57 | } 58 | 59 | pub(super) trait MsrReadWrite { 60 | const MSR: Msr; 61 | 62 | fn read_raw() -> u64 { 63 | Self::MSR.read() 64 | } 65 | 66 | unsafe fn write_raw(flags: u64) { 67 | Self::MSR.write(flags); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /hypervisor/src/mm/frame.rs: -------------------------------------------------------------------------------- 1 | use bitmap_allocator::BitAlloc; 2 | use spin::Mutex; 3 | 4 | use super::address::{align_down, align_up, virt_to_phys, PhysAddr}; 5 | use super::PAGE_SIZE; 6 | use crate::config::PHYS_MEMORY_END; 7 | 8 | // Support max 1M * 4096 = 1GB memory. 9 | type FrameAlloc = bitmap_allocator::BitAlloc1M; 10 | 11 | static FRAME_ALLOCATOR: Mutex = Mutex::new(FrameAllocator::empty()); 12 | 13 | struct FrameAllocator { 14 | base: PhysAddr, 15 | inner: FrameAlloc, 16 | } 17 | 18 | impl FrameAllocator { 19 | const fn empty() -> Self { 20 | Self { 21 | base: 0, 22 | inner: FrameAlloc::DEFAULT, 23 | } 24 | } 25 | 26 | fn init(&mut self, base: PhysAddr, size: usize) { 27 | self.base = align_up(base); 28 | let page_count = align_up(size) / PAGE_SIZE; 29 | self.inner.insert(0..page_count); 30 | } 31 | 32 | unsafe fn alloc(&mut self) -> Option { 33 | let ret = self.inner.alloc().map(|idx| idx * PAGE_SIZE + self.base); 34 | trace!("Allocate frame: {:x?}", ret); 35 | ret 36 | } 37 | 38 | unsafe fn dealloc(&mut self, target: PhysAddr) { 39 | trace!("Deallocate frame: {:x}", target); 40 | self.inner.dealloc((target - self.base) / PAGE_SIZE) 41 | } 42 | } 43 | 44 | pub unsafe fn alloc_page() -> Option { 45 | FRAME_ALLOCATOR.lock().alloc() 46 | } 47 | 48 | pub unsafe fn dealloc_page(paddr: PhysAddr) { 49 | FRAME_ALLOCATOR.lock().dealloc(paddr) 50 | } 51 | 52 | pub(super) fn init() { 53 | extern "C" { 54 | fn ekernel(); 55 | } 56 | 57 | let mem_pool_start = align_up(virt_to_phys(ekernel as usize)); 58 | let mem_pool_end = align_down(PHYS_MEMORY_END); 59 | let mem_pool_size = mem_pool_end - mem_pool_start; 60 | println!( 61 | "Initializing frame allocator at: [{:#x?}, {:#x?})", 62 | mem_pool_start, mem_pool_end 63 | ); 64 | FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size); 65 | } 66 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/instructions.rs: -------------------------------------------------------------------------------- 1 | use core::arch::asm; 2 | use x86::bits64::rflags::{self, RFlags}; 3 | use x86::vmx::{Result, VmFail}; 4 | 5 | /// Helper used to extract VMX-specific Result in accordance with 6 | /// conventions described in Intel SDM, Volume 3C, Section 30.2. 7 | // We inline this to provide an obstruction-free path from this function's 8 | // call site to the moment where `rflags::read()` reads RFLAGS. Otherwise it's 9 | // possible for RFLAGS register to be clobbered by a function prologue, 10 | // see https://github.com/gz/rust-x86/pull/50. 11 | #[inline(always)] 12 | fn vmx_capture_status() -> Result<()> { 13 | let flags = rflags::read(); 14 | 15 | if flags.contains(RFlags::FLAGS_ZF) { 16 | Err(VmFail::VmFailValid) 17 | } else if flags.contains(RFlags::FLAGS_CF) { 18 | Err(VmFail::VmFailInvalid) 19 | } else { 20 | Ok(()) 21 | } 22 | } 23 | 24 | /// INVEPT type. (SDM Vol. 3C, Section 30.3) 25 | #[repr(u64)] 26 | #[derive(Debug)] 27 | #[allow(dead_code)] 28 | pub enum InvEptType { 29 | /// The logical processor invalidates all mappings associated with bits 30 | /// 51:12 of the EPT pointer (EPTP) specified in the INVEPT descriptor. 31 | /// It may invalidate other mappings as well. 32 | SingleContext = 1, 33 | /// The logical processor invalidates mappings associated with all EPTPs. 34 | Global = 2, 35 | } 36 | 37 | /// Invalidate Translations Derived from EPT. (SDM Vol. 3C, Section 30.3) 38 | /// 39 | /// Invalidates mappings in the translation lookaside buffers (TLBs) and 40 | /// paging-structure caches that were derived from extended page tables (EPT). 41 | /// (See Chapter 28, “VMX Support for Address Translation”.) Invalidation is 42 | /// based on the INVEPT type specified in the register operand and the INVEPT 43 | /// descriptor specified in the memory operand. 44 | pub unsafe fn invept(inv_type: InvEptType, eptp: u64) -> Result<()> { 45 | let invept_desc = [eptp, 0]; 46 | asm!("invept {0}, [{1}]", in(reg) inv_type as u64, in(reg) &invept_desc); 47 | vmx_capture_status() 48 | } 49 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/gdt.rs: -------------------------------------------------------------------------------- 1 | use x86_64::instructions::tables::{lgdt, load_tss}; 2 | use x86_64::registers::segmentation::{Segment, SegmentSelector, CS}; 3 | use x86_64::structures::gdt::{Descriptor, DescriptorFlags}; 4 | use x86_64::structures::{tss::TaskStateSegment, DescriptorTablePointer}; 5 | use x86_64::{addr::VirtAddr, PrivilegeLevel}; 6 | 7 | lazy_static::lazy_static! { 8 | static ref TSS: TaskStateSegment = TaskStateSegment::new(); 9 | static ref GDT: GdtStruct = GdtStruct::new(&TSS); 10 | } 11 | 12 | struct GdtStruct { 13 | table: [u64; 16], 14 | } 15 | 16 | impl GdtStruct { 17 | pub const KCODE_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0); 18 | pub const _KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0); 19 | pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring0); 20 | 21 | pub fn new(tss: &'static TaskStateSegment) -> Self { 22 | let mut table = [0; 16]; 23 | table[1] = DescriptorFlags::KERNEL_CODE64.bits(); // 0x00af9b000000ffff 24 | table[2] = DescriptorFlags::KERNEL_DATA.bits(); // 0x00cf93000000ffff 25 | if let Descriptor::SystemSegment(low, high) = Descriptor::tss_segment(tss) { 26 | table[3] = low; 27 | table[4] = high; 28 | } 29 | Self { table } 30 | } 31 | 32 | fn pointer(&self) -> DescriptorTablePointer { 33 | DescriptorTablePointer { 34 | base: VirtAddr::new(self.table.as_ptr() as u64), 35 | limit: (core::mem::size_of_val(&self.table) - 1) as u16, 36 | } 37 | } 38 | 39 | pub fn load(&'static self) { 40 | unsafe { 41 | lgdt(&self.pointer()); 42 | CS::set_reg(GdtStruct::KCODE_SELECTOR); 43 | } 44 | } 45 | 46 | pub fn load_tss(&'static self, selector: SegmentSelector) { 47 | unsafe { load_tss(selector) }; 48 | } 49 | } 50 | 51 | pub fn init() { 52 | println!("Initializing GDT..."); 53 | lazy_static::initialize(&GDT); 54 | GDT.load(); 55 | GDT.load_tss(GdtStruct::TSS_SELECTOR); 56 | } 57 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/trap.rs: -------------------------------------------------------------------------------- 1 | use core::arch::global_asm; 2 | 3 | use x86::{controlregs::cr2, irq::*}; 4 | 5 | use super::lapic::{local_apic, vectors::*}; 6 | 7 | global_asm!(include_str!("trap.S")); 8 | 9 | const IRQ_VECTOR_START: u8 = 0x20; 10 | const IRQ_VECTOR_END: u8 = 0xff; 11 | 12 | #[repr(C)] 13 | #[derive(Debug, Default, Clone, Copy)] 14 | pub struct TrapFrame { 15 | pub rax: u64, 16 | pub rcx: u64, 17 | pub rdx: u64, 18 | pub rbx: u64, 19 | pub rbp: u64, 20 | pub rsi: u64, 21 | pub rdi: u64, 22 | pub r8: u64, 23 | pub r9: u64, 24 | pub r10: u64, 25 | pub r11: u64, 26 | pub r12: u64, 27 | pub r13: u64, 28 | pub r14: u64, 29 | pub r15: u64, 30 | 31 | // Pushed by 'trap.S' 32 | pub vector: u64, 33 | pub error_code: u64, 34 | 35 | // Pushed by CPU 36 | pub rip: u64, 37 | pub cs: u64, 38 | pub rflags: u64, 39 | pub rsp: u64, 40 | pub ss: u64, 41 | } 42 | 43 | #[no_mangle] 44 | fn x86_trap_handler(tf: &mut TrapFrame) { 45 | trace!("trap {} @ {:#x}: {:#x?}", tf.vector, tf.rip, tf); 46 | match tf.vector as u8 { 47 | PAGE_FAULT_VECTOR => { 48 | panic!( 49 | "Hypervisor Page Fault @ {:#x}, fault_vaddr={:#x}, error_code={:#x}", 50 | tf.rip, 51 | unsafe { cr2() }, 52 | tf.error_code, 53 | ); 54 | } 55 | GENERAL_PROTECTION_FAULT_VECTOR => { 56 | panic!( 57 | "General Protection Exception @ {:#x}, error_code = {:#x}, kernel killed it.", 58 | tf.rip, tf.error_code, 59 | ); 60 | } 61 | IRQ_VECTOR_START..=IRQ_VECTOR_END => handle_irq(tf.vector as u8), 62 | _ => { 63 | panic!( 64 | "Unhandled exception {} (error_code = {:#x}) @ {:#x}:\n{:#x?}", 65 | tf.vector, tf.error_code, tf.rip, tf 66 | ); 67 | } 68 | } 69 | } 70 | 71 | pub fn handle_irq(vector: u8) { 72 | match vector { 73 | APIC_TIMER_VECTOR => { 74 | trace!("TIMER"); 75 | unsafe { local_apic().end_of_interrupt() }; 76 | } 77 | _ => warn!("Unhandled IRQ {}", vector), 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RVM-Tutorial 2 | 3 | Let's write an x86 hypervisor in Rust from scratch! 4 | 5 | ## Features 6 | 7 | * Lightweight enough, only 3K+ LoC 8 | * Supported guest OS: [NimbOS](https://github.com/equation314/nimbos) 9 | * Guest/host memory isolation with nested paging 10 | * Device emulation: 11 | + serial port I/O 12 | + APIC timer 13 | * Currently, only supports single core single vCPU and single guest 14 | 15 | ## Install Build Dependencies 16 | 17 | Install [cargo-binutils](https://github.com/rust-embedded/cargo-binutils) to use `rust-objcopy` and `rust-objdump` tools: 18 | 19 | ```console 20 | $ cargo install cargo-binutils 21 | ``` 22 | 23 | Your also need to install [musl-gcc](http://musl.cc/x86_64-linux-musl-cross.tgz) to build guest user applications. 24 | 25 | ## Build Guest OS 26 | 27 | ```console 28 | $ git submodule init && git submodule update 29 | $ cd guest/nimbos/kernel 30 | $ make user 31 | $ make GUEST=on 32 | ``` 33 | 34 | ## Build Guest BIOS 35 | 36 | ```console 37 | $ cd guest/bios 38 | $ make 39 | ``` 40 | 41 | ## Build & Run Hypervisor 42 | 43 | ```console 44 | $ cd hypervisor 45 | $ make run [LOG=warn|info|debug|trace] 46 | ...... 47 | Booting from ROM.. 48 | 49 | RRRRRR VV VV MM MM 50 | RR RR VV VV MMM MMM 51 | RRRRRR VV VV MM MM MM 52 | RR RR VV VV MM MM 53 | RR RR VVV MM MM 54 | ___ ____ ___ ___ 55 | |__ \ / __ \ |__ \ |__ \ 56 | __/ / / / / / __/ / __/ / 57 | / __/ / /_/ / / __/ / __/ 58 | /____/ \____/ /____/ /____/ 59 | 60 | arch = x86_64 61 | build_mode = release 62 | log_level = info 63 | ...... 64 | Running guest... 65 | 66 | NN NN iii bb OOOOO SSSSS 67 | NNN NN mm mm mmmm bb OO OO SS 68 | NN N NN iii mmm mm mm bbbbbb OO OO SSSSS 69 | NN NNN iii mmm mm mm bb bb OO OO SS 70 | NN NN iii mmm mm mm bbbbbb OOOO0 SSSSS 71 | ___ ____ ___ ___ 72 | |__ \ / __ \ |__ \ |__ \ 73 | __/ / / / / / __/ / __/ / 74 | / __/ / /_/ / / __/ / __/ 75 | /____/ \____/ /____/ /____/ 76 | 77 | arch = x86_64 78 | platform = rvm-guest-x86_64 79 | build_mode = release 80 | log_level = warn 81 | ...... 82 | ``` 83 | 84 | ## Documents 85 | 86 | * [in Chinese](https://github.com/equation314/RVM-Tutorial/wiki) 87 | -------------------------------------------------------------------------------- /rvm/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(asm_const)] 3 | #![feature(concat_idents)] 4 | #![feature(naked_functions)] 5 | 6 | extern crate alloc; 7 | #[macro_use] 8 | extern crate log; 9 | 10 | #[macro_use] 11 | mod error; 12 | mod hal; 13 | mod mm; 14 | 15 | pub mod arch; 16 | 17 | use arch::ArchPerCpuState; 18 | 19 | pub use arch::{NestedPageTable, RvmVcpu}; 20 | pub use error::{RvmError, RvmResult}; 21 | pub use hal::RvmHal; 22 | pub use mm::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, HostVirtAddr}; 23 | pub use mm::{Level4PageTable, MemFlags, NestedPageFaultInfo}; 24 | 25 | /// Whether the hardware has virtualization support. 26 | pub fn has_hardware_support() -> bool { 27 | arch::has_hardware_support() 28 | } 29 | 30 | /// Host per-CPU states to run the guest. All methods must be called on the corresponding CPU. 31 | pub struct RvmPerCpu { 32 | _cpu_id: usize, 33 | arch: ArchPerCpuState, 34 | } 35 | 36 | impl RvmPerCpu { 37 | /// Create an uninitialized instance. 38 | pub fn new(cpu_id: usize) -> Self { 39 | Self { 40 | _cpu_id: cpu_id, 41 | arch: ArchPerCpuState::new(), 42 | } 43 | } 44 | 45 | /// Whether the current CPU has hardware virtualization enabled. 46 | pub fn is_enabled(&self) -> bool { 47 | self.arch.is_enabled() 48 | } 49 | 50 | /// Enable hardware virtualization on the current CPU. 51 | pub fn hardware_enable(&mut self) -> RvmResult { 52 | self.arch.hardware_enable() 53 | } 54 | 55 | /// Disable hardware virtualization on the current CPU. 56 | pub fn hardware_disable(&mut self) -> RvmResult { 57 | self.arch.hardware_disable() 58 | } 59 | 60 | /// Create a [`RvmVcpu`], set the entry point to `entry`, set the nested 61 | /// page table root to `npt_root`. 62 | pub fn create_vcpu( 63 | &self, 64 | entry: GuestPhysAddr, 65 | npt_root: HostPhysAddr, 66 | ) -> RvmResult> { 67 | if !self.is_enabled() { 68 | rvm_err!(BadState, "virtualization is not enabled") 69 | } else { 70 | RvmVcpu::new(&self.arch, entry, npt_root) 71 | } 72 | } 73 | } 74 | 75 | impl Drop for RvmPerCpu { 76 | fn drop(&mut self) { 77 | if self.is_enabled() { 78 | self.hardware_disable().unwrap(); 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /rvm/src/mm/mod.rs: -------------------------------------------------------------------------------- 1 | mod page_table; 2 | 3 | use core::marker::PhantomData; 4 | 5 | use crate::{RvmHal, RvmResult}; 6 | 7 | pub use page_table::{GenericPTE, Level4PageTable}; 8 | 9 | pub const PAGE_SIZE: usize = 0x1000; 10 | 11 | /// Guest virtual address. 12 | pub type GuestVirtAddr = usize; 13 | /// Guest physical address. 14 | pub type GuestPhysAddr = usize; 15 | /// Host virtual address. 16 | pub type HostVirtAddr = usize; 17 | /// Host physical address. 18 | pub type HostPhysAddr = usize; 19 | 20 | bitflags::bitflags! { 21 | /// Permission and type of a guest physical memory region. 22 | pub struct MemFlags: u64 { 23 | const READ = 1 << 0; 24 | const WRITE = 1 << 1; 25 | const EXECUTE = 1 << 2; 26 | const DEVICE = 1 << 3; 27 | } 28 | } 29 | 30 | /// Information about nested page faults. 31 | #[derive(Debug)] 32 | pub struct NestedPageFaultInfo { 33 | /// Access type that caused the nested page fault. 34 | pub access_flags: MemFlags, 35 | /// Guest physical address that caused the nested page fault. 36 | pub fault_guest_paddr: GuestPhysAddr, 37 | } 38 | 39 | /// A 4K-sized contiguous physical memory page, it will deallocate the page 40 | /// automatically on drop. 41 | #[derive(Debug)] 42 | pub struct PhysFrame { 43 | start_paddr: HostPhysAddr, 44 | _phantom: PhantomData, 45 | } 46 | 47 | impl PhysFrame { 48 | pub fn alloc() -> RvmResult { 49 | let start_paddr = H::alloc_page() 50 | .ok_or_else(|| rvm_err_type!(OutOfMemory, "allocate physical frame failed"))?; 51 | assert_ne!(start_paddr, 0); 52 | debug!("[RVM] allocated PhysFrame({:#x})", start_paddr); 53 | Ok(Self { 54 | start_paddr, 55 | _phantom: PhantomData, 56 | }) 57 | } 58 | 59 | pub fn alloc_zero() -> RvmResult { 60 | let mut f = Self::alloc()?; 61 | f.fill(0); 62 | Ok(f) 63 | } 64 | 65 | pub const unsafe fn uninit() -> Self { 66 | Self { 67 | start_paddr: 0, 68 | _phantom: PhantomData, 69 | } 70 | } 71 | 72 | pub fn start_paddr(&self) -> HostPhysAddr { 73 | self.start_paddr 74 | } 75 | 76 | pub fn as_mut_ptr(&self) -> *mut u8 { 77 | H::phys_to_virt(self.start_paddr) as *mut u8 78 | } 79 | 80 | pub fn fill(&mut self, byte: u8) { 81 | unsafe { core::ptr::write_bytes(self.as_mut_ptr(), byte, PAGE_SIZE) } 82 | } 83 | } 84 | 85 | impl Drop for PhysFrame { 86 | fn drop(&mut self) { 87 | if self.start_paddr > 0 { 88 | H::dealloc_page(self.start_paddr); 89 | debug!("[RVM] deallocated PhysFrame({:#x})", self.start_paddr); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/uart16550.rs: -------------------------------------------------------------------------------- 1 | //! Uart 16550. 2 | 3 | use spin::Mutex; 4 | use x86_64::instructions::port::{Port, PortReadOnly, PortWriteOnly}; 5 | 6 | const UART_CLOCK_FACTOR: usize = 16; 7 | const OSC_FREQ: usize = 1_843_200; 8 | 9 | static COM1: Mutex = Mutex::new(Uart16550::new(0x3f8)); 10 | 11 | bitflags::bitflags! { 12 | /// Line status flags 13 | struct LineStsFlags: u8 { 14 | const INPUT_FULL = 1; 15 | // 1 to 4 unknown 16 | const OUTPUT_EMPTY = 1 << 5; 17 | // 6 and 7 unknown 18 | } 19 | } 20 | 21 | struct Uart16550 { 22 | data: Port, 23 | int_en: PortWriteOnly, 24 | fifo_ctrl: PortWriteOnly, 25 | line_ctrl: PortWriteOnly, 26 | modem_ctrl: PortWriteOnly, 27 | line_sts: PortReadOnly, 28 | } 29 | 30 | impl Uart16550 { 31 | const fn new(port: u16) -> Self { 32 | Self { 33 | data: Port::new(port), 34 | int_en: PortWriteOnly::new(port + 1), 35 | fifo_ctrl: PortWriteOnly::new(port + 2), 36 | line_ctrl: PortWriteOnly::new(port + 3), 37 | modem_ctrl: PortWriteOnly::new(port + 4), 38 | line_sts: PortReadOnly::new(port + 5), 39 | } 40 | } 41 | 42 | fn init(&mut self, baud_rate: usize) { 43 | unsafe { 44 | // Disable interrupts 45 | self.int_en.write(0x00); 46 | 47 | // Enable DLAB 48 | self.line_ctrl.write(0x80); 49 | 50 | // Set maximum speed according the input baud rate by configuring DLL and DLM 51 | let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR); 52 | self.data.write((divisor & 0xff) as u8); 53 | self.int_en.write((divisor >> 8) as u8); 54 | 55 | // Disable DLAB and set data word length to 8 bits 56 | self.line_ctrl.write(0x03); 57 | 58 | // Enable FIFO, clear TX/RX queues and 59 | // set interrupt watermark at 14 bytes 60 | self.fifo_ctrl.write(0xC7); 61 | 62 | // Mark data terminal ready, signal request to send 63 | // and enable auxilliary output #2 (used as interrupt line for CPU) 64 | self.modem_ctrl.write(0x0B); 65 | } 66 | } 67 | 68 | fn line_sts(&mut self) -> LineStsFlags { 69 | unsafe { LineStsFlags::from_bits_truncate(self.line_sts.read()) } 70 | } 71 | 72 | fn putchar(&mut self, c: u8) { 73 | while !self.line_sts().contains(LineStsFlags::OUTPUT_EMPTY) {} 74 | unsafe { self.data.write(c) }; 75 | } 76 | 77 | fn getchar(&mut self) -> Option { 78 | if self.line_sts().contains(LineStsFlags::INPUT_FULL) { 79 | unsafe { Some(self.data.read()) } 80 | } else { 81 | None 82 | } 83 | } 84 | } 85 | 86 | pub fn console_putchar(c: u8) { 87 | COM1.lock().putchar(c); 88 | } 89 | 90 | pub fn console_getchar() -> Option { 91 | COM1.lock().getchar() 92 | } 93 | 94 | pub fn init() { 95 | COM1.lock().init(115200); 96 | } 97 | -------------------------------------------------------------------------------- /hypervisor/src/hv/mod.rs: -------------------------------------------------------------------------------- 1 | mod device_emu; 2 | mod gconfig; 3 | mod gpm; 4 | mod hal; 5 | mod vmexit; 6 | 7 | use rvm::{GuestPhysAddr, HostPhysAddr, HostVirtAddr, MemFlags, RvmPerCpu, RvmResult}; 8 | 9 | use self::gconfig::*; 10 | use self::gpm::{GuestMemoryRegion, GuestPhysMemorySet}; 11 | use self::hal::RvmHalImpl; 12 | use crate::mm::address::{phys_to_virt, virt_to_phys}; 13 | 14 | #[repr(align(4096))] 15 | struct AlignedMemory([u8; LEN]); 16 | 17 | static mut GUEST_PHYS_MEMORY: AlignedMemory = 18 | AlignedMemory([0; GUEST_PHYS_MEMORY_SIZE]); 19 | 20 | fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 { 21 | let offset = unsafe { &GUEST_PHYS_MEMORY as *const _ as usize }; 22 | let host_vaddr = guest_paddr + offset; 23 | host_vaddr as *mut u8 24 | } 25 | 26 | fn load_guest_image(hpa: HostPhysAddr, load_gpa: GuestPhysAddr, size: usize) { 27 | let image_ptr = phys_to_virt(hpa) as *const u8; 28 | let image = unsafe { core::slice::from_raw_parts(image_ptr, size) }; 29 | unsafe { 30 | core::slice::from_raw_parts_mut(gpa_as_mut_ptr(load_gpa), size).copy_from_slice(image) 31 | } 32 | } 33 | 34 | fn setup_gpm() -> RvmResult { 35 | // copy BIOS and guest images 36 | load_guest_image(BIOS_PADDR, BIOS_ENTRY, BIOS_SIZE); 37 | load_guest_image(GUEST_IMAGE_PADDR, GUEST_ENTRY, GUEST_IMAGE_SIZE); 38 | 39 | // create nested page table and add mapping 40 | let mut gpm = GuestPhysMemorySet::new()?; 41 | let guest_memory_regions = [ 42 | GuestMemoryRegion { 43 | // RAM 44 | gpa: GUEST_PHYS_MEMORY_BASE, 45 | hpa: virt_to_phys(gpa_as_mut_ptr(GUEST_PHYS_MEMORY_BASE) as HostVirtAddr), 46 | size: GUEST_PHYS_MEMORY_SIZE, 47 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE, 48 | }, 49 | GuestMemoryRegion { 50 | // IO APIC 51 | gpa: 0xfec0_0000, 52 | hpa: 0xfec0_0000, 53 | size: 0x1000, 54 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE, 55 | }, 56 | GuestMemoryRegion { 57 | // HPET 58 | gpa: 0xfed0_0000, 59 | hpa: 0xfed0_0000, 60 | size: 0x1000, 61 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE, 62 | }, 63 | GuestMemoryRegion { 64 | // Local APIC 65 | gpa: 0xfee0_0000, 66 | hpa: 0xfee0_0000, 67 | size: 0x1000, 68 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE, 69 | }, 70 | ]; 71 | for r in guest_memory_regions.into_iter() { 72 | gpm.map_region(r.into())?; 73 | } 74 | Ok(gpm) 75 | } 76 | 77 | pub fn run() -> ! { 78 | println!("Starting virtualization..."); 79 | println!("Hardware support: {:?}", rvm::has_hardware_support()); 80 | 81 | let mut percpu = RvmPerCpu::::new(0); 82 | percpu.hardware_enable().unwrap(); 83 | 84 | let gpm = setup_gpm().unwrap(); 85 | info!("{:#x?}", gpm); 86 | 87 | let mut vcpu = percpu 88 | .create_vcpu(BIOS_ENTRY, gpm.nest_page_table_root()) 89 | .unwrap(); 90 | 91 | println!("Running guest..."); 92 | vcpu.run(); 93 | } 94 | -------------------------------------------------------------------------------- /hypervisor/src/hv/device_emu/lapic.rs: -------------------------------------------------------------------------------- 1 | //! Emulated Local APIC. (SDM Vol. 3A, Chapter 10) 2 | 3 | #![allow(dead_code)] 4 | 5 | use rvm::{RvmError, RvmResult, RvmVcpu}; 6 | 7 | type Vcpu = RvmVcpu; 8 | 9 | /// ID register. 10 | const APICID: u32 = 0x2; 11 | /// Version register. 12 | const VERSION: u32 = 0x3; 13 | /// EOI register. 14 | const EOI: u32 = 0xB; 15 | /// Logical Destination Register. 16 | const LDR: u32 = 0xD; 17 | /// Spurious Interrupt Vector register. 18 | const SIVR: u32 = 0xF; 19 | /// Interrupt Command register. 20 | const ICR: u32 = 0x30; 21 | /// LVT Timer Interrupt register. 22 | const LVT_TIMER: u32 = 0x32; 23 | /// LVT Thermal Sensor Interrupt register. 24 | const LVT_THERMAL: u32 = 0x33; 25 | /// LVT Performance Monitor register. 26 | const LVT_PMI: u32 = 0x34; 27 | /// LVT LINT0 register. 28 | const LVT_LINT0: u32 = 0x35; 29 | /// LVT LINT1 register. 30 | const LVT_LINT1: u32 = 0x36; 31 | /// LVT Error register. 32 | const LVT_ERR: u32 = 0x37; 33 | /// Initial Count register. 34 | const INIT_COUNT: u32 = 0x38; 35 | /// Current Count register. 36 | const CUR_COUNT: u32 = 0x39; 37 | /// Divide Configuration register. 38 | const DIV_CONF: u32 = 0x3E; 39 | 40 | pub struct VirtLocalApic; 41 | 42 | impl VirtLocalApic { 43 | pub const fn msr_range() -> core::ops::Range { 44 | 0x800..0x840 45 | } 46 | 47 | pub fn rdmsr(vcpu: &mut Vcpu, msr: u32) -> RvmResult { 48 | Self::read(vcpu, msr - 0x800) 49 | } 50 | 51 | pub fn wrmsr(vcpu: &mut Vcpu, msr: u32, value: u64) -> RvmResult { 52 | Self::write(vcpu, msr - 0x800, value) 53 | } 54 | } 55 | 56 | impl VirtLocalApic { 57 | fn read(vcpu: &mut Vcpu, offset: u32) -> RvmResult { 58 | let apic_timer = vcpu.apic_timer_mut(); 59 | match offset { 60 | SIVR => Ok(0x1ff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit) 61 | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { 62 | Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit) 63 | } 64 | LVT_TIMER => Ok(apic_timer.lvt_timer() as u64), 65 | INIT_COUNT => Ok(apic_timer.initial_count() as u64), 66 | DIV_CONF => Ok(apic_timer.divide() as u64), 67 | CUR_COUNT => Ok(apic_timer.current_counter() as u64), 68 | _ => Err(RvmError::Unsupported), 69 | } 70 | } 71 | 72 | fn write(vcpu: &mut Vcpu, offset: u32, value: u64) -> RvmResult { 73 | if offset != ICR && (value >> 32) != 0 { 74 | return Err(RvmError::InvalidParam); // all registers except ICR are 32-bits 75 | } 76 | let apic_timer = vcpu.apic_timer_mut(); 77 | match offset { 78 | EOI => { 79 | if value != 0 { 80 | Err(RvmError::InvalidParam) // write a non-zero value causes #GP 81 | } else { 82 | Ok(()) 83 | } 84 | } 85 | SIVR | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => { 86 | Ok(()) // ignore these register writes 87 | } 88 | LVT_TIMER => apic_timer.set_lvt_timer(value as u32), 89 | INIT_COUNT => apic_timer.set_initial_count(value as u32), 90 | DIV_CONF => apic_timer.set_divide(value as u32), 91 | _ => Err(RvmError::Unsupported), 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /hypervisor/src/arch/x86_64/multiboot.S: -------------------------------------------------------------------------------- 1 | .equ MULTIBOOT_HEADER_MAGIC, 0x1BADB002 2 | .equ MULTIBOOT_HEADER_FLAGS, 0x00010002 3 | .equ MULTIBOOT_CHECKSUM, -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS) 4 | 5 | .section .text.boot 6 | .code32 7 | .global _start 8 | _start: 9 | mov edi, eax // magic 10 | mov esi, ebx // multiboot info 11 | jmp entry32 12 | 13 | .balign 4 14 | .type multiboot_header, STT_OBJECT 15 | multiboot_header: 16 | .int MULTIBOOT_HEADER_MAGIC 17 | .int MULTIBOOT_HEADER_FLAGS 18 | .int MULTIBOOT_CHECKSUM 19 | .int multiboot_header - {offset} // header_addr 20 | .int skernel - {offset} // load_addr 21 | .int edata - {offset} // load_end 22 | .int ebss - {offset} // bss_end_addr 23 | .int _start - {offset} // entry_addr 24 | 25 | entry32: 26 | // load the temporary GDT 27 | lgdt [.Ltmp_gdt_desc_phys - {offset}] 28 | mov ax, 0x18 // data segment selector 29 | mov ss, ax 30 | mov ds, ax 31 | mov es, ax 32 | mov fs, ax 33 | mov gs, ax 34 | 35 | // set PAE, PGE bit in CR4 36 | mov eax, {cr4} 37 | mov cr4, eax 38 | 39 | // load the temporary page table 40 | lea eax, [.Ltmp_pml4 - {offset}] 41 | mov cr3, eax 42 | 43 | // set LME, NXE bit in IA32_EFER 44 | mov ecx, {efer_msr} 45 | mov edx, 0 46 | mov eax, {efer} 47 | wrmsr 48 | 49 | // set protected mode, write protect, paging bit in CR0 50 | mov eax, {cr0} 51 | mov cr0, eax 52 | 53 | // long return to the 64-bit entry 54 | push 0x10 // code64 segment selector 55 | lea eax, [entry64 - {offset}] 56 | push eax 57 | retf 58 | 59 | .code64 60 | entry64: 61 | // reload GDT by high address 62 | movabs rax, offset .Ltmp_gdt_desc 63 | lgdt [rax] 64 | 65 | // clear segment selectors 66 | xor ax, ax 67 | mov ss, ax 68 | mov ds, ax 69 | mov es, ax 70 | mov fs, ax 71 | mov gs, ax 72 | 73 | // set stack and jump to rust_main 74 | movabs rsp, offset boot_stack_top 75 | movabs rax, offset {main_entry} 76 | call rax 77 | 1: jmp 1b 78 | 79 | .section .rodata 80 | .balign 8 81 | .Ltmp_gdt_desc_phys: 82 | .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit 83 | .long .Ltmp_gdt - {offset} // base 84 | 85 | .balign 8 86 | .Ltmp_gdt_desc: 87 | .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit 88 | .quad .Ltmp_gdt // base 89 | 90 | .section .data 91 | .balign 16 92 | .Ltmp_gdt: 93 | .quad 0x0000000000000000 // 0x00: null 94 | .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k) 95 | .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k) 96 | .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k) 97 | .Ltmp_gdt_end: 98 | 99 | .balign 4096 100 | .Ltmp_pml4: 101 | // 0x0000_0000 ~ 0x8000_0000 102 | .quad .Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) 103 | .zero 8 * 510 104 | // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_8000_0000 105 | .quad .Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt) 106 | 107 | .Ltmp_pdpt_low: 108 | .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) 109 | .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) 110 | .zero 8 * 510 111 | 112 | .Ltmp_pdpt_high: 113 | .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) 114 | .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0) 115 | .zero 8 * 510 116 | 117 | .section .bss.stack 118 | .balign 4096 119 | boot_stack: 120 | .space {boot_stack_size} 121 | boot_stack_top: 122 | -------------------------------------------------------------------------------- /hypervisor/src/logging.rs: -------------------------------------------------------------------------------- 1 | use core::fmt::{self, Write}; 2 | 3 | use log::{self, Level, LevelFilter, Log, Metadata, Record}; 4 | use spin::Mutex; 5 | 6 | use crate::arch::uart; 7 | 8 | struct Stdout; 9 | 10 | static PRINT_LOCK: Mutex<()> = Mutex::new(()); 11 | 12 | impl Write for Stdout { 13 | fn write_str(&mut self, s: &str) -> fmt::Result { 14 | for c in s.chars() { 15 | match c { 16 | '\n' => { 17 | uart::console_putchar(b'\r'); 18 | uart::console_putchar(b'\n'); 19 | } 20 | _ => uart::console_putchar(c as u8), 21 | } 22 | } 23 | Ok(()) 24 | } 25 | } 26 | 27 | pub fn init() { 28 | static LOGGER: SimpleLogger = SimpleLogger; 29 | log::set_logger(&LOGGER).unwrap(); 30 | log::set_max_level(match option_env!("LOG") { 31 | Some("error") => LevelFilter::Error, 32 | Some("warn") => LevelFilter::Warn, 33 | Some("info") => LevelFilter::Info, 34 | Some("debug") => LevelFilter::Debug, 35 | Some("trace") => LevelFilter::Trace, 36 | _ => LevelFilter::Off, 37 | }); 38 | } 39 | 40 | pub fn print(args: fmt::Arguments) { 41 | let _locked = PRINT_LOCK.lock(); 42 | Stdout.write_fmt(args).unwrap(); 43 | } 44 | 45 | #[macro_export] 46 | macro_rules! print { 47 | ($fmt: literal $(, $($arg: tt)+)?) => { 48 | $crate::logging::print(format_args!($fmt $(, $($arg)+)?)); 49 | } 50 | } 51 | 52 | #[macro_export] 53 | macro_rules! println { 54 | () => { print!("\n") }; 55 | ($fmt: literal $(, $($arg: tt)+)?) => { 56 | $crate::logging::print(format_args!(concat!($fmt, "\n") $(, $($arg)+)?)); 57 | } 58 | } 59 | 60 | macro_rules! with_color { 61 | ($color_code:expr, $($arg:tt)*) => {{ 62 | format_args!("\u{1B}[{}m{}\u{1B}[m", $color_code as u8, format_args!($($arg)*)) 63 | }}; 64 | } 65 | 66 | #[repr(u8)] 67 | #[allow(dead_code)] 68 | enum ColorCode { 69 | Black = 30, 70 | Red = 31, 71 | Green = 32, 72 | Yellow = 33, 73 | Blue = 34, 74 | Magenta = 35, 75 | Cyan = 36, 76 | White = 37, 77 | BrightBlack = 90, 78 | BrightRed = 91, 79 | BrightGreen = 92, 80 | BrightYellow = 93, 81 | BrightBlue = 94, 82 | BrightMagenta = 95, 83 | BrightCyan = 96, 84 | BrightWhite = 97, 85 | } 86 | 87 | struct SimpleLogger; 88 | 89 | impl Log for SimpleLogger { 90 | fn enabled(&self, _metadata: &Metadata) -> bool { 91 | true 92 | } 93 | 94 | fn log(&self, record: &Record) { 95 | if !self.enabled(record.metadata()) { 96 | return; 97 | } 98 | 99 | let level = record.level(); 100 | let line = record.line().unwrap_or(0); 101 | let target = record.target(); 102 | let level_color = match level { 103 | Level::Error => ColorCode::BrightRed, 104 | Level::Warn => ColorCode::BrightYellow, 105 | Level::Info => ColorCode::BrightGreen, 106 | Level::Debug => ColorCode::BrightCyan, 107 | Level::Trace => ColorCode::BrightBlack, 108 | }; 109 | let args_color = match level { 110 | Level::Error => ColorCode::Red, 111 | Level::Warn => ColorCode::Yellow, 112 | Level::Info => ColorCode::Green, 113 | Level::Debug => ColorCode::Cyan, 114 | Level::Trace => ColorCode::BrightBlack, 115 | }; 116 | if super::init_ok() { 117 | let now = crate::timer::current_time(); 118 | print(with_color!( 119 | ColorCode::White, 120 | "[{:>3}.{:06} {} {} {}\n", 121 | now.as_secs(), 122 | now.subsec_micros(), 123 | with_color!(level_color, "{:<5}", level), 124 | with_color!(ColorCode::White, "{}:{}]", target, line), 125 | with_color!(args_color, "{}", record.args()), 126 | )); 127 | } else { 128 | print(with_color!( 129 | ColorCode::White, 130 | "[{} {} {}\n", 131 | with_color!(level_color, "{:<5}", level), 132 | with_color!(ColorCode::White, "{}:{}]", target, line), 133 | with_color!(args_color, "{}", record.args()), 134 | )); 135 | } 136 | } 137 | 138 | fn flush(&self) {} 139 | } 140 | -------------------------------------------------------------------------------- /hypervisor/src/hv/device_emu/uart16550.rs: -------------------------------------------------------------------------------- 1 | //! Emulated UART 16550. (ref: https://wiki.osdev.org/Serial_Ports) 2 | 3 | use super::PortIoDevice; 4 | use crate::arch::uart; 5 | 6 | use rvm::{RvmError, RvmResult}; 7 | use spin::Mutex; 8 | 9 | const DATA_REG: u16 = 0; 10 | const INT_EN_REG: u16 = 1; 11 | const FIFO_CTRL_REG: u16 = 2; 12 | const LINE_CTRL_REG: u16 = 3; 13 | const MODEM_CTRL_REG: u16 = 4; 14 | const LINE_STATUS_REG: u16 = 5; 15 | const MODEM_STATUS_REG: u16 = 6; 16 | const SCRATCH_REG: u16 = 7; 17 | 18 | const UART_FIFO_CAPACITY: usize = 16; 19 | 20 | bitflags::bitflags! { 21 | /// Line status flags 22 | struct LineStsFlags: u8 { 23 | const INPUT_FULL = 1; 24 | // 1 to 4 unknown 25 | const OUTPUT_EMPTY = 1 << 5; 26 | // 6 and 7 unknown 27 | } 28 | } 29 | 30 | /// FIFO queue for caching bytes read. 31 | struct Fifo { 32 | buf: [u8; CAP], 33 | head: usize, 34 | num: usize, 35 | } 36 | 37 | impl Fifo { 38 | const fn new() -> Self { 39 | Self { 40 | buf: [0; CAP], 41 | head: 0, 42 | num: 0, 43 | } 44 | } 45 | 46 | fn is_empty(&self) -> bool { 47 | self.num == 0 48 | } 49 | 50 | fn is_full(&self) -> bool { 51 | self.num == CAP 52 | } 53 | 54 | fn push(&mut self, value: u8) { 55 | assert!(self.num < CAP); 56 | self.buf[(self.head + self.num) % CAP] = value; 57 | self.num += 1; 58 | } 59 | 60 | fn pop(&mut self) -> u8 { 61 | assert!(self.num > 0); 62 | let ret = self.buf[self.head]; 63 | self.head += 1; 64 | self.head %= CAP; 65 | self.num -= 1; 66 | ret 67 | } 68 | } 69 | 70 | pub struct Uart16550 { 71 | port_base: u16, 72 | fifo: Mutex>, 73 | } 74 | 75 | impl PortIoDevice for Uart16550 { 76 | fn port_range(&self) -> core::ops::Range { 77 | self.port_base..self.port_base + 8 78 | } 79 | 80 | fn read(&self, port: u16, access_size: u8) -> RvmResult { 81 | if access_size != 1 { 82 | error!("Invalid serial port I/O read size: {} != 1", access_size); 83 | return Err(RvmError::InvalidParam); 84 | } 85 | let ret = match port - self.port_base { 86 | DATA_REG => { 87 | // read a byte from FIFO 88 | let mut fifo = self.fifo.lock(); 89 | if fifo.is_empty() { 90 | 0 91 | } else { 92 | fifo.pop() 93 | } 94 | } 95 | LINE_STATUS_REG => { 96 | // check if the physical serial port has an available byte, and push it to FIFO. 97 | let mut fifo = self.fifo.lock(); 98 | if !fifo.is_full() { 99 | if let Some(c) = uart::console_getchar() { 100 | fifo.push(c); 101 | } 102 | } 103 | let mut lsr = LineStsFlags::OUTPUT_EMPTY; 104 | if !fifo.is_empty() { 105 | lsr |= LineStsFlags::INPUT_FULL; 106 | } 107 | lsr.bits() 108 | } 109 | INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG 110 | | SCRATCH_REG => { 111 | info!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented 112 | 0 113 | } 114 | _ => unreachable!(), 115 | }; 116 | Ok(ret as u32) 117 | } 118 | 119 | fn write(&self, port: u16, access_size: u8, value: u32) -> RvmResult { 120 | if access_size != 1 { 121 | error!("Invalid serial port I/O write size: {} != 1", access_size); 122 | return Err(RvmError::InvalidParam); 123 | } 124 | match port - self.port_base { 125 | DATA_REG => uart::console_putchar(value as u8), 126 | INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | SCRATCH_REG => { 127 | info!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented 128 | } 129 | LINE_STATUS_REG => {} // ignore 130 | _ => unreachable!(), 131 | } 132 | Ok(()) 133 | } 134 | } 135 | 136 | impl Uart16550 { 137 | pub const fn new(port_base: u16) -> Self { 138 | Self { 139 | port_base, 140 | fifo: Mutex::new(Fifo::new()), 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/lapic.rs: -------------------------------------------------------------------------------- 1 | use bit_field::BitField; 2 | use core::marker::PhantomData; 3 | 4 | use crate::{RvmHal, RvmResult}; 5 | 6 | const APIC_FREQ_MHZ: u64 = 1000; // 1000 MHz 7 | const APIC_CYCLE_NANOS: u64 = 1000 / APIC_FREQ_MHZ; 8 | 9 | /// Local APIC timer modes. 10 | #[derive(Debug, Copy, Clone)] 11 | #[repr(u8)] 12 | #[allow(dead_code)] 13 | pub enum TimerMode { 14 | /// Timer only fires once. 15 | OneShot = 0b00, 16 | /// Timer fires periodically. 17 | Periodic = 0b01, 18 | /// Timer fires at an absolute time. 19 | TscDeadline = 0b10, 20 | } 21 | 22 | /// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4) 23 | pub struct ApicTimer { 24 | lvt_timer_bits: u32, 25 | divide_shift: u8, 26 | initial_count: u32, 27 | last_start_ns: u64, 28 | deadline_ns: u64, 29 | _phantom: PhantomData, 30 | } 31 | 32 | impl ApicTimer { 33 | pub(crate) const fn new() -> Self { 34 | Self { 35 | lvt_timer_bits: 0x1_0000, // masked 36 | divide_shift: 0, 37 | initial_count: 0, 38 | last_start_ns: 0, 39 | deadline_ns: 0, 40 | _phantom: PhantomData, 41 | } 42 | } 43 | 44 | /// Check if an interrupt generated. if yes, update it's states. 45 | pub fn check_interrupt(&mut self) -> bool { 46 | if self.deadline_ns == 0 { 47 | false 48 | } else if H::current_time_nanos() >= self.deadline_ns { 49 | if self.is_periodic() { 50 | self.deadline_ns += self.interval_ns(); 51 | } else { 52 | self.deadline_ns = 0; 53 | } 54 | !self.is_masked() 55 | } else { 56 | false 57 | } 58 | } 59 | 60 | /// Whether the timer interrupt is masked. 61 | pub const fn is_masked(&self) -> bool { 62 | self.lvt_timer_bits & (1 << 16) != 0 63 | } 64 | 65 | /// Whether the timer mode is periodic. 66 | pub const fn is_periodic(&self) -> bool { 67 | let timer_mode = (self.lvt_timer_bits >> 17) & 0b11; 68 | timer_mode == TimerMode::Periodic as _ 69 | } 70 | 71 | /// The timer interrupt vector number. 72 | pub const fn vector(&self) -> u8 { 73 | (self.lvt_timer_bits & 0xff) as u8 74 | } 75 | 76 | /// LVT Timer Register. (SDM Vol. 3A, Section 10.5.1, Figure 10-8) 77 | pub const fn lvt_timer(&self) -> u32 { 78 | self.lvt_timer_bits 79 | } 80 | 81 | /// Divide Configuration Register. (SDM Vol. 3A, Section 10.5.4, Figure 10-10) 82 | pub const fn divide(&self) -> u32 { 83 | let dcr = self.divide_shift.wrapping_sub(1) as u32 & 0b111; 84 | (dcr & 0b11) | ((dcr & 0b100) << 1) 85 | } 86 | 87 | /// Initial Count Register. 88 | pub const fn initial_count(&self) -> u32 { 89 | self.initial_count 90 | } 91 | 92 | /// Current Count Register. 93 | pub fn current_counter(&self) -> u32 { 94 | let elapsed_ns = H::current_time_nanos() - self.last_start_ns; 95 | let elapsed_cycles = (elapsed_ns / APIC_CYCLE_NANOS) >> self.divide_shift; 96 | if self.is_periodic() { 97 | self.initial_count - (elapsed_cycles % self.initial_count as u64) as u32 98 | } else if elapsed_cycles < self.initial_count as u64 { 99 | self.initial_count - elapsed_cycles as u32 100 | } else { 101 | 0 102 | } 103 | } 104 | 105 | /// Set LVT Timer Register. 106 | pub fn set_lvt_timer(&mut self, bits: u32) -> RvmResult { 107 | let timer_mode = bits.get_bits(17..19); 108 | if timer_mode == TimerMode::TscDeadline as _ { 109 | return rvm_err!(Unsupported); // TSC deadline mode was not supported 110 | } else if timer_mode == 0b11 { 111 | return rvm_err!(InvalidParam); // reserved 112 | } 113 | self.lvt_timer_bits = bits; 114 | self.start_timer(); 115 | Ok(()) 116 | } 117 | 118 | /// Set Initial Count Register. 119 | pub fn set_initial_count(&mut self, initial: u32) -> RvmResult { 120 | self.initial_count = initial; 121 | self.start_timer(); 122 | Ok(()) 123 | } 124 | 125 | /// Set Divide Configuration Register. 126 | pub fn set_divide(&mut self, dcr: u32) -> RvmResult { 127 | let shift = (dcr & 0b11) | ((dcr & 0b1000) >> 1); 128 | self.divide_shift = (shift + 1) as u8 & 0b111; 129 | self.start_timer(); 130 | Ok(()) 131 | } 132 | 133 | const fn interval_ns(&self) -> u64 { 134 | (self.initial_count as u64 * APIC_CYCLE_NANOS) << self.divide_shift 135 | } 136 | 137 | fn start_timer(&mut self) { 138 | if self.initial_count != 0 { 139 | self.last_start_ns = H::current_time_nanos(); 140 | self.deadline_ns = self.last_start_ns + self.interval_ns(); 141 | } else { 142 | self.deadline_ns = 0; 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/ept.rs: -------------------------------------------------------------------------------- 1 | use core::{convert::TryFrom, fmt}; 2 | 3 | use bit_field::BitField; 4 | 5 | use crate::mm::{GenericPTE, HostPhysAddr, Level4PageTable, MemFlags}; 6 | 7 | bitflags::bitflags! { 8 | /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2) 9 | struct EPTFlags: u64 { 10 | /// Read access. 11 | const READ = 1 << 0; 12 | /// Write access. 13 | const WRITE = 1 << 1; 14 | /// Execute access. 15 | const EXECUTE = 1 << 2; 16 | /// EPT memory type. Only for terminate pages. 17 | const MEM_TYPE_MASK = 0b111 << 3; 18 | /// Ignore PAT memory type. Only for terminate pages. 19 | const IGNORE_PAT = 1 << 6; 20 | /// Specifies that the entry maps a huge frame instead of a page table. 21 | /// Only allowed in P2 or P3 tables. 22 | const HUGE_PAGE = 1 << 7; 23 | /// If bit 6 of EPTP is 1, accessed flag for EPT. 24 | const ACCESSED = 1 << 8; 25 | /// If bit 6 of EPTP is 1, dirty flag for EPT. 26 | const DIRTY = 1 << 9; 27 | /// Execute access for user-mode linear addresses. 28 | const EXECUTE_FOR_USER = 1 << 10; 29 | } 30 | } 31 | 32 | numeric_enum_macro::numeric_enum! { 33 | #[repr(u8)] 34 | #[derive(Debug, PartialEq, Clone, Copy)] 35 | /// EPT memory typing. (SDM Vol. 3C, Section 28.3.7) 36 | enum EPTMemType { 37 | Uncached = 0, 38 | WriteCombining = 1, 39 | WriteThrough = 4, 40 | WriteProtected = 5, 41 | WriteBack = 6, 42 | } 43 | } 44 | 45 | impl EPTFlags { 46 | fn set_mem_type(&mut self, mem_type: EPTMemType) { 47 | let mut bits = self.bits(); 48 | bits.set_bits(3..6, mem_type as u64); 49 | *self = Self::from_bits_truncate(bits) 50 | } 51 | fn mem_type(&self) -> Result { 52 | EPTMemType::try_from(self.bits().get_bits(3..6) as u8) 53 | } 54 | } 55 | 56 | impl From for EPTFlags { 57 | fn from(f: MemFlags) -> Self { 58 | if f.is_empty() { 59 | return Self::empty(); 60 | } 61 | let mut ret = Self::empty(); 62 | if f.contains(MemFlags::READ) { 63 | ret |= Self::READ; 64 | } 65 | if f.contains(MemFlags::WRITE) { 66 | ret |= Self::WRITE; 67 | } 68 | if f.contains(MemFlags::EXECUTE) { 69 | ret |= Self::EXECUTE; 70 | } 71 | if !f.contains(MemFlags::DEVICE) { 72 | ret.set_mem_type(EPTMemType::WriteBack); 73 | } 74 | ret 75 | } 76 | } 77 | 78 | impl From for MemFlags { 79 | fn from(f: EPTFlags) -> Self { 80 | let mut ret = MemFlags::empty(); 81 | if f.contains(EPTFlags::READ) { 82 | ret |= Self::READ; 83 | } 84 | if f.contains(EPTFlags::WRITE) { 85 | ret |= Self::WRITE; 86 | } 87 | if f.contains(EPTFlags::EXECUTE) { 88 | ret |= Self::EXECUTE; 89 | } 90 | if let Ok(EPTMemType::Uncached) = f.mem_type() { 91 | ret |= Self::DEVICE; 92 | } 93 | ret 94 | } 95 | } 96 | 97 | #[derive(Clone, Copy)] 98 | #[repr(transparent)] 99 | pub struct EPTEntry(u64); 100 | 101 | const PHYS_ADDR_MASK: usize = 0x000f_ffff_ffff_f000; // 12..52 102 | 103 | impl GenericPTE for EPTEntry { 104 | fn new_page(paddr: HostPhysAddr, flags: MemFlags, is_huge: bool) -> Self { 105 | let mut flags = EPTFlags::from(flags); 106 | if is_huge { 107 | flags |= EPTFlags::HUGE_PAGE; 108 | } 109 | Self(flags.bits() | (paddr & PHYS_ADDR_MASK) as u64) 110 | } 111 | fn new_table(paddr: HostPhysAddr) -> Self { 112 | let flags = EPTFlags::READ | EPTFlags::WRITE | EPTFlags::EXECUTE; 113 | Self(flags.bits() | (paddr & PHYS_ADDR_MASK) as u64) 114 | } 115 | fn paddr(&self) -> HostPhysAddr { 116 | self.0 as usize & PHYS_ADDR_MASK 117 | } 118 | fn flags(&self) -> MemFlags { 119 | EPTFlags::from_bits_truncate(self.0).into() 120 | } 121 | fn is_unused(&self) -> bool { 122 | self.0 == 0 123 | } 124 | fn is_present(&self) -> bool { 125 | self.0 & 0x7 != 0 // RWX != 0 126 | } 127 | fn is_huge(&self) -> bool { 128 | EPTFlags::from_bits_truncate(self.0).contains(EPTFlags::HUGE_PAGE) 129 | } 130 | fn clear(&mut self) { 131 | self.0 = 0 132 | } 133 | } 134 | 135 | impl fmt::Debug for EPTEntry { 136 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 137 | f.debug_struct("EPTEntry") 138 | .field("raw", &self.0) 139 | .field("hpaddr", &self.paddr()) 140 | .field("flags", &self.flags()) 141 | .field("mem_type", &EPTFlags::from_bits_truncate(self.0).mem_type()) 142 | .finish() 143 | } 144 | } 145 | 146 | /// The VMX extended page table. (SDM Vol. 3C, Section 28.3) 147 | pub type ExtendedPageTable = Level4PageTable; 148 | -------------------------------------------------------------------------------- /hypervisor/src/hv/gpm.rs: -------------------------------------------------------------------------------- 1 | use alloc::collections::BTreeMap; 2 | use core::fmt::{Debug, Formatter, Result}; 3 | 4 | use rvm::{GuestPhysAddr, HostPhysAddr, MemFlags, NestedPageTable, RvmError, RvmResult}; 5 | 6 | use super::hal::RvmHalImpl; 7 | use crate::mm::{address::is_aligned, PAGE_SIZE}; 8 | 9 | #[derive(Debug)] 10 | enum Mapper { 11 | Offset(usize), 12 | } 13 | 14 | #[derive(Debug)] 15 | pub struct GuestMemoryRegion { 16 | pub gpa: GuestPhysAddr, 17 | pub hpa: HostPhysAddr, 18 | pub size: usize, 19 | pub flags: MemFlags, 20 | } 21 | 22 | pub struct MapRegion { 23 | pub start: GuestPhysAddr, 24 | pub size: usize, 25 | pub flags: MemFlags, 26 | mapper: Mapper, 27 | } 28 | 29 | impl MapRegion { 30 | pub fn new_offset( 31 | start_gpa: GuestPhysAddr, 32 | start_hpa: HostPhysAddr, 33 | size: usize, 34 | flags: MemFlags, 35 | ) -> Self { 36 | assert!(is_aligned(start_gpa)); 37 | assert!(is_aligned(start_hpa)); 38 | assert!(is_aligned(size)); 39 | let offset = start_gpa - start_hpa; 40 | Self { 41 | start: start_gpa, 42 | size, 43 | flags, 44 | mapper: Mapper::Offset(offset), 45 | } 46 | } 47 | 48 | fn is_overlap_with(&self, other: &Self) -> bool { 49 | let s0 = self.start; 50 | let e0 = s0 + self.size; 51 | let s1 = other.start; 52 | let e1 = s1 + other.size; 53 | !(e0 <= s1 || e1 <= s0) 54 | } 55 | 56 | fn target(&self, gpa: GuestPhysAddr) -> HostPhysAddr { 57 | match self.mapper { 58 | Mapper::Offset(off) => gpa.wrapping_sub(off), 59 | } 60 | } 61 | 62 | fn map_to(&self, npt: &mut NestedPageTable) -> RvmResult { 63 | let mut start = self.start; 64 | let end = start + self.size; 65 | while start < end { 66 | let target = self.target(start); 67 | npt.map(start, target, self.flags)?; 68 | start += PAGE_SIZE; 69 | } 70 | Ok(()) 71 | } 72 | 73 | fn unmap_to(&self, npt: &mut NestedPageTable) -> RvmResult { 74 | let mut start = self.start; 75 | let end = start + self.size; 76 | while start < end { 77 | npt.unmap(start)?; 78 | start += PAGE_SIZE; 79 | } 80 | Ok(()) 81 | } 82 | } 83 | 84 | impl Debug for MapRegion { 85 | fn fmt(&self, f: &mut Formatter) -> Result { 86 | f.debug_struct("MapRegion") 87 | .field("range", &(self.start..self.start + self.size)) 88 | .field("size", &self.size) 89 | .field("flags", &self.flags) 90 | .field("mapper", &self.mapper) 91 | .finish() 92 | } 93 | } 94 | 95 | impl From for MapRegion { 96 | fn from(r: GuestMemoryRegion) -> Self { 97 | Self::new_offset(r.gpa, r.hpa, r.size, r.flags) 98 | } 99 | } 100 | 101 | pub struct GuestPhysMemorySet { 102 | regions: BTreeMap, 103 | npt: NestedPageTable, 104 | } 105 | 106 | impl GuestPhysMemorySet { 107 | pub fn new() -> RvmResult { 108 | Ok(Self { 109 | npt: NestedPageTable::new()?, 110 | regions: BTreeMap::new(), 111 | }) 112 | } 113 | 114 | pub fn nest_page_table_root(&self) -> HostPhysAddr { 115 | self.npt.root_paddr() 116 | } 117 | 118 | fn test_free_area(&self, other: &MapRegion) -> bool { 119 | if let Some((_, before)) = self.regions.range(..other.start).last() { 120 | if before.is_overlap_with(other) { 121 | return false; 122 | } 123 | } 124 | if let Some((_, after)) = self.regions.range(other.start..).next() { 125 | if after.is_overlap_with(other) { 126 | return false; 127 | } 128 | } 129 | true 130 | } 131 | 132 | pub fn map_region(&mut self, region: MapRegion) -> RvmResult { 133 | if region.size == 0 { 134 | return Ok(()); 135 | } 136 | if !self.test_free_area(®ion) { 137 | warn!( 138 | "MapRegion({:#x}..{:#x}) overlapped in:\n{:#x?}", 139 | region.start, 140 | region.start + region.size, 141 | self 142 | ); 143 | return Err(RvmError::InvalidParam); 144 | } 145 | region.map_to(&mut self.npt)?; 146 | self.regions.insert(region.start, region); 147 | Ok(()) 148 | } 149 | 150 | pub fn clear(&mut self) { 151 | for region in self.regions.values() { 152 | region.unmap_to(&mut self.npt).unwrap(); 153 | } 154 | self.regions.clear(); 155 | } 156 | } 157 | 158 | impl Drop for GuestPhysMemorySet { 159 | fn drop(&mut self) { 160 | self.clear(); 161 | } 162 | } 163 | 164 | impl Debug for GuestPhysMemorySet { 165 | fn fmt(&self, f: &mut Formatter) -> Result { 166 | f.debug_struct("GuestPhysMemorySet") 167 | .field("page_table_root", &self.nest_page_table_root()) 168 | .field("regions", &self.regions) 169 | .finish() 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/mod.rs: -------------------------------------------------------------------------------- 1 | mod definitions; 2 | mod ept; 3 | mod instructions; 4 | mod structs; 5 | mod vcpu; 6 | mod vmcs; 7 | 8 | use raw_cpuid::CpuId; 9 | use x86::{bits64::vmx, vmx::VmFail}; 10 | use x86_64::registers::control::{Cr0, Cr4, Cr4Flags}; 11 | 12 | use self::structs::{FeatureControl, FeatureControlFlags, VmxBasic, VmxRegion}; 13 | use crate::arch::msr::Msr; 14 | use crate::error::{RvmError, RvmResult}; 15 | use crate::hal::RvmHal; 16 | 17 | pub use self::definitions::VmxExitReason; 18 | pub use self::ept::ExtendedPageTable as NestedPageTable; 19 | pub use self::vcpu::VmxVcpu as RvmVcpu; 20 | pub use self::vmcs::{VmxExitInfo, VmxInterruptInfo, VmxIoExitInfo}; 21 | pub use self::VmxPerCpuState as ArchPerCpuState; 22 | 23 | pub fn has_hardware_support() -> bool { 24 | if let Some(feature) = CpuId::new().get_feature_info() { 25 | feature.has_vmx() 26 | } else { 27 | false 28 | } 29 | } 30 | 31 | pub struct VmxPerCpuState { 32 | vmcs_revision_id: u32, 33 | vmx_region: VmxRegion, 34 | } 35 | 36 | impl VmxPerCpuState { 37 | pub const fn new() -> Self { 38 | Self { 39 | vmcs_revision_id: 0, 40 | vmx_region: unsafe { VmxRegion::uninit() }, 41 | } 42 | } 43 | 44 | pub fn is_enabled(&self) -> bool { 45 | Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS) 46 | } 47 | 48 | pub fn hardware_enable(&mut self) -> RvmResult { 49 | if !has_hardware_support() { 50 | return rvm_err!(Unsupported, "CPU does not support feature VMX"); 51 | } 52 | if self.is_enabled() { 53 | return rvm_err!(ResourceBusy, "VMX is already turned on"); 54 | } 55 | 56 | // Enable VMXON, if required. 57 | let ctrl = FeatureControl::read(); 58 | let locked = ctrl.contains(FeatureControlFlags::LOCKED); 59 | let vmxon_outside = ctrl.contains(FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX); 60 | if !locked { 61 | FeatureControl::write( 62 | ctrl | FeatureControlFlags::LOCKED | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX, 63 | ) 64 | } else if !vmxon_outside { 65 | return rvm_err!(Unsupported, "VMX disabled by BIOS"); 66 | } 67 | 68 | // Check control registers are in a VMX-friendly state. (SDM Vol. 3C, Appendix A.7, A.8) 69 | macro_rules! cr_is_valid { 70 | ($value: expr, $crx: ident) => {{ 71 | use Msr::*; 72 | let value = $value; 73 | let fixed0 = concat_idents!(IA32_VMX_, $crx, _FIXED0).read(); 74 | let fixed1 = concat_idents!(IA32_VMX_, $crx, _FIXED1).read(); 75 | (!fixed0 | value != 0) && (fixed1 | !value != 0) 76 | }}; 77 | } 78 | if !cr_is_valid!(Cr0::read().bits(), CR0) { 79 | return rvm_err!(BadState, "host CR0 is not valid in VMX operation"); 80 | } 81 | if !cr_is_valid!(Cr4::read().bits(), CR4) { 82 | return rvm_err!(BadState, "host CR4 is not valid in VMX operation"); 83 | } 84 | 85 | // Get VMCS revision identifier in IA32_VMX_BASIC MSR. 86 | let vmx_basic = VmxBasic::read(); 87 | if vmx_basic.region_size as usize != crate::mm::PAGE_SIZE { 88 | return rvm_err!(Unsupported); 89 | } 90 | if vmx_basic.mem_type != VmxBasic::VMX_MEMORY_TYPE_WRITE_BACK { 91 | return rvm_err!(Unsupported); 92 | } 93 | if vmx_basic.is_32bit_address { 94 | return rvm_err!(Unsupported); 95 | } 96 | if !vmx_basic.io_exit_info { 97 | return rvm_err!(Unsupported); 98 | } 99 | if !vmx_basic.vmx_flex_controls { 100 | return rvm_err!(Unsupported); 101 | } 102 | self.vmcs_revision_id = vmx_basic.revision_id; 103 | self.vmx_region = VmxRegion::new(vmx_basic.revision_id, false)?; 104 | 105 | unsafe { 106 | // Enable VMX using the VMXE bit. 107 | Cr4::write(Cr4::read() | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS); 108 | // Execute VMXON. 109 | vmx::vmxon(self.vmx_region.phys_addr() as _)?; 110 | } 111 | info!("[RVM] successed to turn on VMX."); 112 | 113 | Ok(()) 114 | } 115 | 116 | pub fn hardware_disable(&mut self) -> RvmResult { 117 | if !self.is_enabled() { 118 | return rvm_err!(BadState, "VMX is not enabled"); 119 | } 120 | 121 | unsafe { 122 | // Execute VMXOFF. 123 | vmx::vmxoff()?; 124 | // Remove VMXE bit in CR4. 125 | Cr4::update(|cr4| cr4.remove(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS)); 126 | }; 127 | info!("[RVM] successed to turn off VMX."); 128 | 129 | self.vmx_region = unsafe { VmxRegion::uninit() }; 130 | Ok(()) 131 | } 132 | } 133 | 134 | impl From for RvmError { 135 | fn from(err: VmFail) -> Self { 136 | match err { 137 | VmFail::VmFailValid => rvm_err_type!(BadState, vmcs::instruction_error().as_str()), 138 | _ => rvm_err_type!(BadState, format_args!("VMX instruction failed: {:?}", err)), 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /hypervisor/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.1.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 10 | 11 | [[package]] 12 | name = "bit" 13 | version = "0.1.1" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "2b645c5c09a7d4035949cfce1a915785aaad6f17800c35fda8a8c311c491f284" 16 | 17 | [[package]] 18 | name = "bit_field" 19 | version = "0.10.1" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4" 22 | 23 | [[package]] 24 | name = "bitflags" 25 | version = "1.3.2" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 28 | 29 | [[package]] 30 | name = "bitmap-allocator" 31 | version = "0.1.0" 32 | source = "git+https://github.com/rcore-os/bitmap-allocator?rev=88e871a#88e871a54f28a3d6795478f237466b3332e2fb1d" 33 | dependencies = [ 34 | "bit_field", 35 | ] 36 | 37 | [[package]] 38 | name = "buddy_system_allocator" 39 | version = "0.8.0" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "55703ac5f02c246ce6158eff6ae2dd9e9069917969682b6831f8a5123abb8a48" 42 | dependencies = [ 43 | "spin 0.7.1", 44 | ] 45 | 46 | [[package]] 47 | name = "cfg-if" 48 | version = "1.0.0" 49 | source = "registry+https://github.com/rust-lang/crates.io-index" 50 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 51 | 52 | [[package]] 53 | name = "lazy_static" 54 | version = "1.4.0" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 57 | dependencies = [ 58 | "spin 0.5.2", 59 | ] 60 | 61 | [[package]] 62 | name = "lock_api" 63 | version = "0.4.9" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" 66 | dependencies = [ 67 | "autocfg", 68 | "scopeguard", 69 | ] 70 | 71 | [[package]] 72 | name = "log" 73 | version = "0.4.17" 74 | source = "registry+https://github.com/rust-lang/crates.io-index" 75 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 76 | dependencies = [ 77 | "cfg-if", 78 | ] 79 | 80 | [[package]] 81 | name = "numeric-enum-macro" 82 | version = "0.2.0" 83 | source = "registry+https://github.com/rust-lang/crates.io-index" 84 | checksum = "300e4bdb6b46b592948e700ea1ef24a4296491f6a0ee722b258040abd15a3714" 85 | 86 | [[package]] 87 | name = "paste" 88 | version = "1.0.9" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" 91 | 92 | [[package]] 93 | name = "raw-cpuid" 94 | version = "10.6.0" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb" 97 | dependencies = [ 98 | "bitflags", 99 | ] 100 | 101 | [[package]] 102 | name = "rustversion" 103 | version = "1.0.9" 104 | source = "registry+https://github.com/rust-lang/crates.io-index" 105 | checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" 106 | 107 | [[package]] 108 | name = "rvm" 109 | version = "0.1.0" 110 | dependencies = [ 111 | "bit_field", 112 | "bitflags", 113 | "cfg-if", 114 | "log", 115 | "numeric-enum-macro", 116 | "raw-cpuid", 117 | "x86", 118 | "x86_64", 119 | ] 120 | 121 | [[package]] 122 | name = "rvm-hypervisor" 123 | version = "0.1.0" 124 | dependencies = [ 125 | "bitflags", 126 | "bitmap-allocator", 127 | "buddy_system_allocator", 128 | "cfg-if", 129 | "lazy_static", 130 | "log", 131 | "raw-cpuid", 132 | "rvm", 133 | "spin 0.9.4", 134 | "x2apic", 135 | "x86", 136 | "x86_64", 137 | ] 138 | 139 | [[package]] 140 | name = "scopeguard" 141 | version = "1.1.0" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 144 | 145 | [[package]] 146 | name = "spin" 147 | version = "0.5.2" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" 150 | 151 | [[package]] 152 | name = "spin" 153 | version = "0.7.1" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "13287b4da9d1207a4f4929ac390916d64eacfe236a487e9a9f5b3be392be5162" 156 | 157 | [[package]] 158 | name = "spin" 159 | version = "0.9.4" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" 162 | dependencies = [ 163 | "lock_api", 164 | ] 165 | 166 | [[package]] 167 | name = "volatile" 168 | version = "0.4.5" 169 | source = "registry+https://github.com/rust-lang/crates.io-index" 170 | checksum = "e3ca98349dda8a60ae74e04fd90c7fb4d6a4fbe01e6d3be095478aa0b76f6c0c" 171 | 172 | [[package]] 173 | name = "x2apic" 174 | version = "0.4.1" 175 | source = "registry+https://github.com/rust-lang/crates.io-index" 176 | checksum = "32b6a3e030cfc71d614954e1de6dcb09e40bf1437f620c27b4526f978bee912e" 177 | dependencies = [ 178 | "bit", 179 | "bitflags", 180 | "paste", 181 | "raw-cpuid", 182 | "x86_64", 183 | ] 184 | 185 | [[package]] 186 | name = "x86" 187 | version = "0.52.0" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "2781db97787217ad2a2845c396a5efe286f87467a5810836db6d74926e94a385" 190 | dependencies = [ 191 | "bit_field", 192 | "bitflags", 193 | "raw-cpuid", 194 | ] 195 | 196 | [[package]] 197 | name = "x86_64" 198 | version = "0.14.10" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | checksum = "100555a863c0092238c2e0e814c1096c1e5cf066a309c696a87e907b5f8c5d69" 201 | dependencies = [ 202 | "bit_field", 203 | "bitflags", 204 | "rustversion", 205 | "volatile", 206 | ] 207 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/definitions.rs: -------------------------------------------------------------------------------- 1 | use core::fmt::{Debug, Formatter, Result}; 2 | 3 | /// VM instruction error numbers. (SDM Vol. 3C, Section 30.4) 4 | pub struct VmxInstructionError(u32); 5 | 6 | impl VmxInstructionError { 7 | pub fn as_str(&self) -> &str { 8 | match self.0 { 9 | 0 => "OK", 10 | 1 => "VMCALL executed in VMX root operation", 11 | 2 => "VMCLEAR with invalid physical address", 12 | 3 => "VMCLEAR with VMXON pointer", 13 | 4 => "VMLAUNCH with non-clear VMCS", 14 | 5 => "VMRESUME with non-launched VMCS", 15 | 6 => "VMRESUME after VMXOFF (VMXOFF and VMXON between VMLAUNCH and VMRESUME)", 16 | 7 => "VM entry with invalid control field(s)", 17 | 8 => "VM entry with invalid host-state field(s)", 18 | 9 => "VMPTRLD with invalid physical address", 19 | 10 => "VMPTRLD with VMXON pointer", 20 | 11 => "VMPTRLD with incorrect VMCS revision identifier", 21 | 12 => "VMREAD/VMWRITE from/to unsupported VMCS component", 22 | 13 => "VMWRITE to read-only VMCS component", 23 | 15 => "VMXON executed in VMX root operation", 24 | 16 => "VM entry with invalid executive-VMCS pointer", 25 | 17 => "VM entry with non-launched executive VMCS", 26 | 18 => "VM entry with executive-VMCS pointer not VMXON pointer (when attempting to deactivate the dual-monitor treatment of SMIs and SMM)", 27 | 19 => "VMCALL with non-clear VMCS (when attempting to activate the dual-monitor treatment of SMIs and SMM)", 28 | 20 => "VMCALL with invalid VM-exit control fields", 29 | 22 => "VMCALL with incorrect MSEG revision identifier (when attempting to activate the dual-monitor treatment of SMIs and SMM)", 30 | 23 => "VMXOFF under dual-monitor treatment of SMIs and SMM", 31 | 24 => "VMCALL with invalid SMM-monitor features (when attempting to activate the dual-monitor treatment of SMIs and SMM)", 32 | 25 => "VM entry with invalid VM-execution control fields in executive VMCS (when attempting to return from SMM)", 33 | 26 => "VM entry with events blocked by MOV SS", 34 | 28 => "Invalid operand to INVEPT/INVVPID", 35 | _ => "[INVALID]", 36 | } 37 | } 38 | } 39 | 40 | impl From for VmxInstructionError { 41 | fn from(value: u32) -> Self { 42 | Self(value) 43 | } 44 | } 45 | 46 | impl Debug for VmxInstructionError { 47 | fn fmt(&self, f: &mut Formatter) -> Result { 48 | write!(f, "VmxInstructionError({}, {:?})", self.0, self.as_str()) 49 | } 50 | } 51 | 52 | numeric_enum_macro::numeric_enum! { 53 | #[repr(u32)] 54 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 55 | #[allow(non_camel_case_types)] 56 | /// VMX basic exit reasons. (SDM Vol. 3D, Appendix C) 57 | pub enum VmxExitReason { 58 | EXCEPTION_NMI = 0, 59 | EXTERNAL_INTERRUPT = 1, 60 | TRIPLE_FAULT = 2, 61 | INIT = 3, 62 | SIPI = 4, 63 | SMI = 5, 64 | OTHER_SMI = 6, 65 | INTERRUPT_WINDOW = 7, 66 | NMI_WINDOW = 8, 67 | TASK_SWITCH = 9, 68 | CPUID = 10, 69 | GETSEC = 11, 70 | HLT = 12, 71 | INVD = 13, 72 | INVLPG = 14, 73 | RDPMC = 15, 74 | RDTSC = 16, 75 | RSM = 17, 76 | VMCALL = 18, 77 | VMCLEAR = 19, 78 | VMLAUNCH = 20, 79 | VMPTRLD = 21, 80 | VMPTRST = 22, 81 | VMREAD = 23, 82 | VMRESUME = 24, 83 | VMWRITE = 25, 84 | VMOFF = 26, 85 | VMON = 27, 86 | CR_ACCESS = 28, 87 | DR_ACCESS = 29, 88 | IO_INSTRUCTION = 30, 89 | MSR_READ = 31, 90 | MSR_WRITE = 32, 91 | INVALID_GUEST_STATE = 33, 92 | MSR_LOAD_FAIL = 34, 93 | MWAIT_INSTRUCTION = 36, 94 | MONITOR_TRAP_FLAG = 37, 95 | MONITOR_INSTRUCTION = 39, 96 | PAUSE_INSTRUCTION = 40, 97 | MCE_DURING_VMENTRY = 41, 98 | TPR_BELOW_THRESHOLD = 43, 99 | APIC_ACCESS = 44, 100 | VIRTUALIZED_EOI = 45, 101 | GDTR_IDTR = 46, 102 | LDTR_TR = 47, 103 | EPT_VIOLATION = 48, 104 | EPT_MISCONFIG = 49, 105 | INVEPT = 50, 106 | RDTSCP = 51, 107 | PREEMPTION_TIMER = 52, 108 | INVVPID = 53, 109 | WBINVD = 54, 110 | XSETBV = 55, 111 | APIC_WRITE = 56, 112 | RDRAND = 57, 113 | INVPCID = 58, 114 | VMFUNC = 59, 115 | ENCLS = 60, 116 | RDSEED = 61, 117 | PML_FULL = 62, 118 | XSAVES = 63, 119 | XRSTORS = 64, 120 | PCONFIG = 65, 121 | SPP_EVENT = 66, 122 | UMWAIT = 67, 123 | TPAUSE = 68, 124 | LOADIWKEY = 69, 125 | } 126 | } 127 | 128 | numeric_enum_macro::numeric_enum! { 129 | #[repr(u8)] 130 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] 131 | /// The interruption type (bits 10:8) in VM-Entry Interruption-Information Field 132 | /// and VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) 133 | pub enum VmxInterruptionType { 134 | /// External interrupt 135 | External = 0, 136 | /// Reserved 137 | Reserved = 1, 138 | /// Non-maskable interrupt (NMI) 139 | NMI = 2, 140 | /// Hardware exception (e.g,. #PF) 141 | HardException = 3, 142 | /// Software interrupt (INT n) 143 | SoftIntr = 4, 144 | /// Privileged software exception (INT1) 145 | PrivSoftException = 5, 146 | /// Software exception (INT3 or INTO) 147 | SoftException = 6, 148 | /// Other event 149 | Other = 7, 150 | } 151 | } 152 | 153 | impl VmxInterruptionType { 154 | /// Whether the exception/interrupt with `vector` has an error code. 155 | pub const fn vector_has_error_code(vector: u8) -> bool { 156 | use x86::irq::*; 157 | matches!( 158 | vector, 159 | DOUBLE_FAULT_VECTOR 160 | | INVALID_TSS_VECTOR 161 | | SEGMENT_NOT_PRESENT_VECTOR 162 | | STACK_SEGEMENT_FAULT_VECTOR 163 | | GENERAL_PROTECTION_FAULT_VECTOR 164 | | PAGE_FAULT_VECTOR 165 | | ALIGNMENT_CHECK_VECTOR 166 | ) 167 | } 168 | 169 | /// Determine interruption type by the interrupt vector. 170 | pub const fn from_vector(vector: u8) -> Self { 171 | // SDM Vol. 3C, Section 24.8.3 172 | use x86::irq::*; 173 | match vector { 174 | DEBUG_VECTOR => Self::PrivSoftException, 175 | NONMASKABLE_INTERRUPT_VECTOR => Self::NMI, 176 | BREAKPOINT_VECTOR | OVERFLOW_VECTOR => Self::SoftException, 177 | // SDM Vol. 3A, Section 6.15: All other vectors from 0 to 21 are exceptions. 178 | 0..=VIRTUALIZATION_VECTOR => Self::HardException, 179 | 32..=255 => Self::External, 180 | _ => Self::Other, 181 | } 182 | } 183 | 184 | /// For software interrupt, software exception, or privileged software 185 | /// exception,we need to set VM-Entry Instruction Length Field. 186 | pub const fn is_soft(&self) -> bool { 187 | matches!( 188 | *self, 189 | Self::SoftIntr | Self::SoftException | Self::PrivSoftException 190 | ) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/structs.rs: -------------------------------------------------------------------------------- 1 | use bit_field::BitField; 2 | use bitflags::bitflags; 3 | 4 | use crate::arch::msr::{Msr, MsrReadWrite}; 5 | use crate::mm::{PhysFrame, PAGE_SIZE}; 6 | use crate::{HostPhysAddr, RvmHal, RvmResult}; 7 | 8 | /// VMCS/VMXON region in 4K size. (SDM Vol. 3C, Section 24.2) 9 | #[derive(Debug)] 10 | pub struct VmxRegion { 11 | frame: PhysFrame, 12 | } 13 | 14 | impl VmxRegion { 15 | pub const unsafe fn uninit() -> Self { 16 | Self { 17 | frame: PhysFrame::uninit(), 18 | } 19 | } 20 | 21 | pub fn new(revision_id: u32, shadow_indicator: bool) -> RvmResult { 22 | let frame = PhysFrame::alloc_zero()?; 23 | unsafe { 24 | (*(frame.as_mut_ptr() as *mut u32)) 25 | .set_bits(0..=30, revision_id) 26 | .set_bit(31, shadow_indicator); 27 | } 28 | Ok(Self { frame }) 29 | } 30 | 31 | pub fn phys_addr(&self) -> HostPhysAddr { 32 | self.frame.start_paddr() 33 | } 34 | } 35 | 36 | #[derive(Debug)] 37 | pub struct MsrBitmap { 38 | frame: PhysFrame, 39 | } 40 | 41 | impl MsrBitmap { 42 | pub fn passthrough_all() -> RvmResult { 43 | Ok(Self { 44 | frame: PhysFrame::alloc_zero()?, 45 | }) 46 | } 47 | 48 | #[allow(unused)] 49 | pub fn intercept_all() -> RvmResult { 50 | let mut frame = PhysFrame::alloc()?; 51 | frame.fill(u8::MAX); 52 | Ok(Self { frame }) 53 | } 54 | 55 | pub fn phys_addr(&self) -> HostPhysAddr { 56 | self.frame.start_paddr() 57 | } 58 | 59 | fn set_intercept(&mut self, msr: u32, is_write: bool, intercept: bool) { 60 | let offset = if msr <= 0x1fff { 61 | if !is_write { 62 | 0 // Read bitmap for low MSRs (0x0000_0000..0x0000_1FFF) 63 | } else { 64 | 2 // Write bitmap for low MSRs (0x0000_0000..0x0000_1FFF) 65 | } 66 | } else if (0xc000_0000..=0xc000_1fff).contains(&msr) { 67 | if !is_write { 68 | 1 // Read bitmap for high MSRs (0xC000_0000..0xC000_1FFF) 69 | } else { 70 | 3 // Write bitmap for high MSRs (0xC000_0000..0xC000_1FFF) 71 | } 72 | } else { 73 | unreachable!() 74 | } * 1024; 75 | let bitmap = 76 | unsafe { core::slice::from_raw_parts_mut(self.frame.as_mut_ptr().add(offset), 1024) }; 77 | let msr = msr & 0x1fff; 78 | let byte = (msr / 8) as usize; 79 | let bits = msr % 8; 80 | if intercept { 81 | bitmap[byte] |= 1 << bits; 82 | } else { 83 | bitmap[byte] &= !(1 << bits); 84 | } 85 | } 86 | 87 | pub fn set_read_intercept(&mut self, msr: u32, intercept: bool) { 88 | self.set_intercept(msr, false, intercept); 89 | } 90 | 91 | pub fn set_write_intercept(&mut self, msr: u32, intercept: bool) { 92 | self.set_intercept(msr, true, intercept); 93 | } 94 | } 95 | 96 | /// Reporting Register of Basic VMX Capabilities. (SDM Vol. 3D, Appendix A.1) 97 | #[derive(Debug)] 98 | pub struct VmxBasic { 99 | /// The 31-bit VMCS revision identifier used by the processor. 100 | pub revision_id: u32, 101 | /// The number of bytes that software should allocate for the VMXON region 102 | /// and any VMCS region. 103 | pub region_size: u16, 104 | /// The width of the physical addresses that may be used for the VMXON 105 | /// region, each VMCS, and data structures referenced by pointers in a VMCS. 106 | pub is_32bit_address: bool, 107 | /// The memory type that should be used for the VMCS, for data structures 108 | /// referenced by pointers in the VMCS. 109 | pub mem_type: u8, 110 | /// The processor reports information in the VM-exit instruction-information 111 | /// field on VM exits due to execution of the INS and OUTS instructions. 112 | pub io_exit_info: bool, 113 | /// If any VMX controls that default to 1 may be cleared to 0. 114 | pub vmx_flex_controls: bool, 115 | } 116 | 117 | impl MsrReadWrite for VmxBasic { 118 | const MSR: Msr = Msr::IA32_VMX_BASIC; 119 | } 120 | 121 | impl VmxBasic { 122 | pub const VMX_MEMORY_TYPE_WRITE_BACK: u8 = 6; 123 | 124 | /// Read the current IA32_VMX_BASIC flags. 125 | pub fn read() -> Self { 126 | let msr = Self::read_raw(); 127 | Self { 128 | revision_id: msr.get_bits(0..31) as u32, 129 | region_size: msr.get_bits(32..45) as u16, 130 | is_32bit_address: msr.get_bit(48), 131 | mem_type: msr.get_bits(50..54) as u8, 132 | io_exit_info: msr.get_bit(54), 133 | vmx_flex_controls: msr.get_bit(55), 134 | } 135 | } 136 | } 137 | 138 | bitflags! { 139 | /// IA32_FEATURE_CONTROL flags. 140 | pub struct FeatureControlFlags: u64 { 141 | /// Lock bit: when set, locks this MSR from being written. when clear, 142 | /// VMXON causes a #GP. 143 | const LOCKED = 1 << 0; 144 | /// Enable VMX inside SMX operation. 145 | const VMXON_ENABLED_INSIDE_SMX = 1 << 1; 146 | /// Enable VMX outside SMX operation. 147 | const VMXON_ENABLED_OUTSIDE_SMX = 1 << 2; 148 | } 149 | } 150 | 151 | /// Control Features in Intel 64 Processor. (SDM Vol. 3C, Section 23.7) 152 | pub struct FeatureControl; 153 | 154 | impl MsrReadWrite for FeatureControl { 155 | const MSR: Msr = Msr::IA32_FEATURE_CONTROL; 156 | } 157 | 158 | impl FeatureControl { 159 | /// Read the current IA32_FEATURE_CONTROL flags. 160 | pub fn read() -> FeatureControlFlags { 161 | FeatureControlFlags::from_bits_truncate(Self::read_raw()) 162 | } 163 | 164 | /// Write IA32_FEATURE_CONTROL flags, preserving reserved values. 165 | pub fn write(flags: FeatureControlFlags) { 166 | let old_value = Self::read_raw(); 167 | let reserved = old_value & !(FeatureControlFlags::all().bits()); 168 | let new_value = reserved | flags.bits(); 169 | unsafe { Self::write_raw(new_value) }; 170 | } 171 | } 172 | 173 | bitflags! { 174 | /// Extended-Page-Table Pointer. (SDM Vol. 3C, Section 24.6.11) 175 | pub struct EPTPointer: u64 { 176 | /// EPT paging-structure memory type: Uncacheable (UC). 177 | #[allow(clippy::identity_op)] 178 | const MEM_TYPE_UC = 0 << 0; 179 | /// EPT paging-structure memory type: Write-back (WB). 180 | #[allow(clippy::identity_op)] 181 | const MEM_TYPE_WB = 6 << 0; 182 | /// EPT page-walk length 1. 183 | const WALK_LENGTH_1 = 0 << 3; 184 | /// EPT page-walk length 2. 185 | const WALK_LENGTH_2 = 1 << 3; 186 | /// EPT page-walk length 3. 187 | const WALK_LENGTH_3 = 2 << 3; 188 | /// EPT page-walk length 4. 189 | const WALK_LENGTH_4 = 3 << 3; 190 | /// Setting this control to 1 enables accessed and dirty flags for EPT. 191 | const ENABLE_ACCESSED_DIRTY = 1 << 6; 192 | } 193 | } 194 | 195 | impl EPTPointer { 196 | pub fn from_table_phys(pml4_paddr: HostPhysAddr) -> Self { 197 | let aligned_addr = pml4_paddr & !(PAGE_SIZE - 1); 198 | let flags = unsafe { Self::from_bits_unchecked(aligned_addr as u64) }; 199 | flags | Self::MEM_TYPE_WB | Self::WALK_LENGTH_4 | Self::ENABLE_ACCESSED_DIRTY 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /hypervisor/src/hv/vmexit.rs: -------------------------------------------------------------------------------- 1 | use super::device_emu::{self, VirtLocalApic}; 2 | use super::hal::RvmHalImpl; 3 | use rvm::arch::{VmxExitInfo, VmxExitReason}; 4 | use rvm::{RvmError, RvmResult, RvmVcpu}; 5 | 6 | type Vcpu = RvmVcpu; 7 | 8 | const VM_EXIT_INSTR_LEN_CPUID: u8 = 2; 9 | const VM_EXIT_INSTR_LEN_RDMSR: u8 = 2; 10 | const VM_EXIT_INSTR_LEN_WRMSR: u8 = 2; 11 | const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3; 12 | 13 | fn handle_external_interrupt(vcpu: &mut Vcpu) -> RvmResult { 14 | let int_info = vcpu.interrupt_exit_info()?; 15 | trace!("VM-exit: external interrupt: {:#x?}", int_info); 16 | assert!(int_info.valid); 17 | crate::arch::handle_irq(int_info.vector); 18 | Ok(()) 19 | } 20 | 21 | fn handle_cpuid(vcpu: &mut Vcpu) -> RvmResult { 22 | use raw_cpuid::{cpuid, CpuIdResult}; 23 | 24 | const LEAF_FEATURE_INFO: u32 = 0x1; 25 | const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000; 26 | const LEAF_HYPERVISOR_FEATURE: u32 = 0x4000_0001; 27 | const VENDOR_STR: &[u8; 12] = b"RVMRVMRVMRVM"; 28 | let vendor_regs = unsafe { &*(VENDOR_STR.as_ptr() as *const [u32; 3]) }; 29 | 30 | let regs = vcpu.regs_mut(); 31 | let function = regs.rax as u32; 32 | let res = match function { 33 | LEAF_FEATURE_INFO => { 34 | const FEATURE_VMX: u32 = 1 << 5; 35 | const FEATURE_HYPERVISOR: u32 = 1 << 31; 36 | let mut res = cpuid!(regs.rax, regs.rcx); 37 | res.ecx &= !FEATURE_VMX; 38 | res.ecx |= FEATURE_HYPERVISOR; 39 | res 40 | } 41 | LEAF_HYPERVISOR_INFO => CpuIdResult { 42 | eax: LEAF_HYPERVISOR_FEATURE, 43 | ebx: vendor_regs[0], 44 | ecx: vendor_regs[1], 45 | edx: vendor_regs[2], 46 | }, 47 | LEAF_HYPERVISOR_FEATURE => CpuIdResult { 48 | eax: 0, 49 | ebx: 0, 50 | ecx: 0, 51 | edx: 0, 52 | }, 53 | _ => cpuid!(regs.rax, regs.rcx), 54 | }; 55 | 56 | debug!( 57 | "VM exit: CPUID({:#x}, {:#x}): {:?}", 58 | regs.rax, regs.rcx, res 59 | ); 60 | regs.rax = res.eax as _; 61 | regs.rbx = res.ebx as _; 62 | regs.rcx = res.ecx as _; 63 | regs.rdx = res.edx as _; 64 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_CPUID)?; 65 | Ok(()) 66 | } 67 | 68 | fn handle_hypercall(vcpu: &mut Vcpu) -> RvmResult { 69 | let regs = vcpu.regs(); 70 | info!( 71 | "VM exit: VMCALL({:#x}): {:?}", 72 | regs.rax, 73 | [regs.rdi, regs.rsi, regs.rdx, regs.rcx] 74 | ); 75 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_VMCALL)?; 76 | Ok(()) 77 | } 78 | 79 | fn handle_io_instruction(vcpu: &mut Vcpu, exit_info: &VmxExitInfo) -> RvmResult { 80 | let io_info = vcpu.io_exit_info()?; 81 | trace!( 82 | "VM exit: I/O instruction @ {:#x}: {:#x?}", 83 | exit_info.guest_rip, 84 | io_info, 85 | ); 86 | if io_info.is_string { 87 | error!("INS/OUTS instructions are not supported!"); 88 | return Err(RvmError::Unsupported); 89 | } 90 | if io_info.is_repeat { 91 | error!("REP prefixed I/O instructions are not supported!"); 92 | return Err(RvmError::Unsupported); 93 | } 94 | 95 | if let Some(dev) = device_emu::all_virt_devices().find_port_io_device(io_info.port) { 96 | if io_info.is_in { 97 | let value = dev.read(io_info.port, io_info.access_size)?; 98 | let rax = &mut vcpu.regs_mut().rax; 99 | // SDM Vol. 1, Section 3.4.1.1: 100 | // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the 101 | // destination general-purpose register. 102 | // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or 103 | // 48 bits (respectively) of the destination general-purpose register are not modified 104 | // by the operation. 105 | match io_info.access_size { 106 | 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64, 107 | 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64, 108 | 4 => *rax = value as u64, 109 | _ => unreachable!(), 110 | } 111 | } else { 112 | let rax = vcpu.regs().rax; 113 | let value = match io_info.access_size { 114 | 1 => rax & 0xff, 115 | 2 => rax & 0xffff, 116 | 4 => rax, 117 | _ => unreachable!(), 118 | } as u32; 119 | dev.write(io_info.port, io_info.access_size, value)?; 120 | } 121 | } else { 122 | panic!( 123 | "Unsupported I/O port {:#x} access: {:#x?}", 124 | io_info.port, io_info 125 | ) 126 | } 127 | vcpu.advance_rip(exit_info.exit_instruction_length as _)?; 128 | Ok(()) 129 | } 130 | 131 | fn handle_msr_read(vcpu: &mut Vcpu) -> RvmResult { 132 | let msr = vcpu.regs().rcx as u32; 133 | 134 | use x86::msr::*; 135 | let res = if msr == IA32_APIC_BASE { 136 | let mut apic_base = unsafe { rdmsr(IA32_APIC_BASE) }; 137 | apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC 138 | Ok(apic_base) 139 | } else if VirtLocalApic::msr_range().contains(&msr) { 140 | VirtLocalApic::rdmsr(vcpu, msr) 141 | } else { 142 | Err(RvmError::Unsupported) 143 | }; 144 | 145 | if let Ok(value) = res { 146 | debug!("VM exit: RDMSR({:#x}) -> {:#x}", msr, value); 147 | vcpu.regs_mut().rax = value & 0xffff_ffff; 148 | vcpu.regs_mut().rdx = value >> 32; 149 | } else { 150 | panic!("Failed to handle RDMSR({:#x}): {:?}", msr, res); 151 | } 152 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_RDMSR)?; 153 | Ok(()) 154 | } 155 | 156 | fn handle_msr_write(vcpu: &mut Vcpu) -> RvmResult { 157 | let msr = vcpu.regs().rcx as u32; 158 | let value = (vcpu.regs().rax & 0xffff_ffff) | (vcpu.regs().rdx << 32); 159 | debug!("VM exit: WRMSR({:#x}) <- {:#x}", msr, value); 160 | 161 | use x86::msr::*; 162 | let res = if msr == IA32_APIC_BASE { 163 | Ok(()) // ignore 164 | } else if VirtLocalApic::msr_range().contains(&msr) { 165 | VirtLocalApic::wrmsr(vcpu, msr, value) 166 | } else { 167 | Err(RvmError::Unsupported) 168 | }; 169 | 170 | if res.is_err() { 171 | panic!( 172 | "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}", 173 | msr, value, res 174 | ); 175 | } 176 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_WRMSR)?; 177 | Ok(()) 178 | } 179 | 180 | fn handle_ept_violation(vcpu: &Vcpu, guest_rip: usize) -> RvmResult { 181 | let fault_info = vcpu.nested_page_fault_info()?; 182 | panic!( 183 | "VM exit: EPT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?})", 184 | guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags 185 | ); 186 | } 187 | 188 | pub fn vmexit_handler(vcpu: &mut Vcpu) -> RvmResult { 189 | let exit_info = vcpu.exit_info()?; 190 | trace!("VM exit: {:#x?}", exit_info); 191 | 192 | if exit_info.entry_failure { 193 | panic!("VM entry failed: {:#x?}", exit_info); 194 | } 195 | 196 | let res = match exit_info.exit_reason { 197 | VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(vcpu), 198 | VmxExitReason::INTERRUPT_WINDOW => vcpu.set_interrupt_window(false), 199 | VmxExitReason::CPUID => handle_cpuid(vcpu), 200 | VmxExitReason::VMCALL => handle_hypercall(vcpu), 201 | VmxExitReason::IO_INSTRUCTION => handle_io_instruction(vcpu, &exit_info), 202 | VmxExitReason::MSR_READ => handle_msr_read(vcpu), 203 | VmxExitReason::MSR_WRITE => handle_msr_write(vcpu), 204 | VmxExitReason::EPT_VIOLATION => handle_ept_violation(vcpu, exit_info.guest_rip), 205 | _ => panic!( 206 | "Unhandled VM-Exit reason {:?}:\n{:#x?}", 207 | exit_info.exit_reason, vcpu 208 | ), 209 | }; 210 | 211 | if res.is_err() { 212 | panic!( 213 | "Failed to handle VM-exit {:?}:\n{:#x?}", 214 | exit_info.exit_reason, vcpu 215 | ); 216 | } 217 | 218 | Ok(()) 219 | } 220 | -------------------------------------------------------------------------------- /rvm/src/mm/page_table.rs: -------------------------------------------------------------------------------- 1 | use alloc::{vec, vec::Vec}; 2 | use core::{fmt::Debug, marker::PhantomData}; 3 | 4 | use super::{MemFlags, PhysFrame, PAGE_SIZE}; 5 | use crate::{RvmHal, RvmResult}; 6 | 7 | const LEVELS: usize = 4; 8 | const ENTRY_COUNT: usize = 512; 9 | 10 | type VirtAddr = super::GuestPhysAddr; 11 | type PhysAddr = super::HostPhysAddr; 12 | 13 | const fn p4_index(vaddr: VirtAddr) -> usize { 14 | (vaddr >> (12 + 27)) & (ENTRY_COUNT - 1) 15 | } 16 | 17 | const fn p3_index(vaddr: VirtAddr) -> usize { 18 | (vaddr >> (12 + 18)) & (ENTRY_COUNT - 1) 19 | } 20 | 21 | const fn p2_index(vaddr: VirtAddr) -> usize { 22 | (vaddr >> (12 + 9)) & (ENTRY_COUNT - 1) 23 | } 24 | 25 | const fn p1_index(vaddr: VirtAddr) -> usize { 26 | (vaddr >> 12) & (ENTRY_COUNT - 1) 27 | } 28 | 29 | const fn align_down(addr: usize) -> usize { 30 | addr & !(PAGE_SIZE - 1) 31 | } 32 | 33 | const fn page_offset(addr: usize) -> usize { 34 | addr & (PAGE_SIZE - 1) 35 | } 36 | 37 | pub trait GenericPTE: Debug + Clone + Copy + Sync + Send + Sized { 38 | // Create a page table entry point to a terminate 4K-sized page or a huge page. 39 | fn new_page(paddr: PhysAddr, flags: MemFlags, is_huge: bool) -> Self; 40 | // Create a page table entry point to a next level page table. 41 | fn new_table(paddr: PhysAddr) -> Self; 42 | 43 | /// Returns the physical address mapped by this entry. 44 | fn paddr(&self) -> PhysAddr; 45 | /// Returns the flags of this entry. 46 | fn flags(&self) -> MemFlags; 47 | /// Returns whether this entry is zero. 48 | fn is_unused(&self) -> bool; 49 | /// Returns whether this entry flag indicates present. 50 | fn is_present(&self) -> bool; 51 | /// For non-last level translation, returns whether this entry maps to a 52 | /// huge frame. 53 | fn is_huge(&self) -> bool; 54 | /// Set this entry to zero. 55 | fn clear(&mut self); 56 | } 57 | 58 | /// A generic 4-level page table structures. 59 | pub struct Level4PageTable { 60 | root_paddr: PhysAddr, 61 | intrm_tables: Vec>, 62 | _phantom: PhantomData, 63 | } 64 | 65 | impl Level4PageTable { 66 | /// Create a page table instance. 67 | pub fn new() -> RvmResult { 68 | let root_frame = PhysFrame::alloc_zero()?; 69 | Ok(Self { 70 | root_paddr: root_frame.start_paddr(), 71 | intrm_tables: vec![root_frame], 72 | _phantom: PhantomData, 73 | }) 74 | } 75 | 76 | /// Physical address of the page table root. 77 | pub fn root_paddr(&self) -> PhysAddr { 78 | self.root_paddr 79 | } 80 | 81 | /// Create a mapping from the virtual address `vaddr` to the physical address 82 | /// `paddr`, with memory permissions and types described by `flags`. 83 | pub fn map(&mut self, vaddr: VirtAddr, paddr: PhysAddr, flags: MemFlags) -> RvmResult { 84 | let entry = self.get_entry_mut_or_create(vaddr)?; 85 | if !entry.is_unused() { 86 | return rvm_err!( 87 | InvalidParam, 88 | format_args!("try to map an already mapped page {:#x}", vaddr) 89 | ); 90 | } 91 | *entry = GenericPTE::new_page(align_down(paddr), flags, false); 92 | Ok(()) 93 | } 94 | 95 | /// Remove mappings for the virtual address `vaddr`. 96 | pub fn unmap(&mut self, vaddr: VirtAddr) -> RvmResult { 97 | let entry = self.get_entry_mut(vaddr)?; 98 | if entry.is_unused() { 99 | return rvm_err!( 100 | InvalidParam, 101 | format_args!("try to unmap an unmapped page {:#x}", vaddr) 102 | ); 103 | } 104 | let paddr = entry.paddr(); 105 | entry.clear(); 106 | Ok(paddr) 107 | } 108 | 109 | /// Query the mapping target for the virtual address `vaddr`, return the 110 | /// target physical address and memory permissions. 111 | pub fn query(&self, vaddr: VirtAddr) -> RvmResult<(PhysAddr, MemFlags)> { 112 | let entry = self.get_entry_mut(vaddr)?; 113 | if entry.is_unused() { 114 | return rvm_err!( 115 | InvalidParam, 116 | format_args!("queried page {:#x} is not mapped", vaddr) 117 | ); 118 | } 119 | let off = page_offset(vaddr); 120 | Ok((entry.paddr() + off, entry.flags())) 121 | } 122 | 123 | /// Update the mapping target for the virtual address `vaddr`. 124 | pub fn update( 125 | &mut self, 126 | vaddr: VirtAddr, 127 | paddr: Option, 128 | flags: Option, 129 | ) -> RvmResult { 130 | let entry = self.get_entry_mut(vaddr)?; 131 | let paddr = align_down(paddr.unwrap_or_else(|| entry.paddr())); 132 | let flags = flags.unwrap_or_else(|| entry.flags()); 133 | *entry = GenericPTE::new_page(paddr, flags, entry.is_huge()); 134 | Ok(()) 135 | } 136 | 137 | /// Print the page table contents recursively for debugging. 138 | pub fn dump(&self, limit: usize) { 139 | info!("Root: {:x?}", self.root_paddr()); 140 | self.walk( 141 | self.table_of(self.root_paddr()), 142 | 0, 143 | 0, 144 | limit, 145 | &|level: usize, idx: usize, vaddr: VirtAddr, entry: &PTE| { 146 | for _ in 0..level { 147 | info!(" "); 148 | } 149 | info!("[{} - {:x}], 0x{:08x?}: {:x?}", level, idx, vaddr, entry); 150 | }, 151 | ); 152 | } 153 | } 154 | 155 | impl Level4PageTable { 156 | fn table_of<'a>(&self, paddr: PhysAddr) -> &'a [PTE] { 157 | let ptr = H::phys_to_virt(paddr) as *const PTE; 158 | unsafe { core::slice::from_raw_parts(ptr, ENTRY_COUNT) } 159 | } 160 | 161 | fn table_of_mut<'a>(&self, paddr: PhysAddr) -> &'a mut [PTE] { 162 | let ptr = H::phys_to_virt(paddr) as *mut PTE; 163 | unsafe { core::slice::from_raw_parts_mut(ptr, ENTRY_COUNT) } 164 | } 165 | 166 | fn next_table_mut<'a>(&self, entry: &PTE) -> RvmResult<&'a mut [PTE]> { 167 | if !entry.is_present() { 168 | rvm_err!(BadState, "next table entry not present") 169 | } else if entry.is_huge() { 170 | rvm_err!(BadState, "next table entry is huge") 171 | } else { 172 | Ok(self.table_of_mut(entry.paddr())) 173 | } 174 | } 175 | 176 | fn next_table_mut_or_create<'a>(&mut self, entry: &mut PTE) -> RvmResult<&'a mut [PTE]> { 177 | if entry.is_unused() { 178 | let paddr = self.alloc_intrm_table()?; 179 | *entry = GenericPTE::new_table(paddr); 180 | Ok(self.table_of_mut(paddr)) 181 | } else { 182 | self.next_table_mut(entry) 183 | } 184 | } 185 | 186 | fn alloc_intrm_table(&mut self) -> RvmResult { 187 | let frame = PhysFrame::alloc_zero()?; 188 | let paddr = frame.start_paddr(); 189 | self.intrm_tables.push(frame); 190 | Ok(paddr) 191 | } 192 | 193 | fn get_entry_mut(&self, vaddr: VirtAddr) -> RvmResult<&mut PTE> { 194 | let p4 = self.table_of_mut(self.root_paddr()); 195 | let p4e = &mut p4[p4_index(vaddr)]; 196 | 197 | let p3 = self.next_table_mut(p4e)?; 198 | let p3e = &mut p3[p3_index(vaddr)]; 199 | 200 | let p2 = self.next_table_mut(p3e)?; 201 | let p2e = &mut p2[p2_index(vaddr)]; 202 | 203 | let p1 = self.next_table_mut(p2e)?; 204 | let p1e = &mut p1[p1_index(vaddr)]; 205 | Ok(p1e) 206 | } 207 | 208 | fn get_entry_mut_or_create(&mut self, vaddr: VirtAddr) -> RvmResult<&mut PTE> { 209 | let p4 = self.table_of_mut(self.root_paddr()); 210 | let p4e = &mut p4[p4_index(vaddr)]; 211 | 212 | let p3 = self.next_table_mut_or_create(p4e)?; 213 | let p3e = &mut p3[p3_index(vaddr)]; 214 | 215 | let p2 = self.next_table_mut_or_create(p3e)?; 216 | let p2e = &mut p2[p2_index(vaddr)]; 217 | 218 | let p1 = self.next_table_mut_or_create(p2e)?; 219 | let p1e = &mut p1[p1_index(vaddr)]; 220 | Ok(p1e) 221 | } 222 | 223 | fn walk( 224 | &self, 225 | table: &[PTE], 226 | level: usize, 227 | start_vaddr: VirtAddr, 228 | limit: usize, 229 | func: &impl Fn(usize, usize, VirtAddr, &PTE), 230 | ) { 231 | let mut n = 0; 232 | for (i, entry) in table.iter().enumerate() { 233 | let vaddr = start_vaddr + (i << (12 + (LEVELS - 1 - level) * 9)); 234 | if entry.is_present() { 235 | func(level, i, vaddr, entry); 236 | if level < LEVELS - 1 && !entry.is_huge() { 237 | let table_entry = self.next_table_mut(entry).unwrap(); 238 | self.walk(table_entry, level + 1, vaddr, limit, func); 239 | } 240 | n += 1; 241 | if n >= limit { 242 | break; 243 | } 244 | } 245 | } 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/vcpu.rs: -------------------------------------------------------------------------------- 1 | use alloc::collections::VecDeque; 2 | use core::fmt::{Debug, Formatter, Result}; 3 | use core::{arch::asm, mem::size_of}; 4 | 5 | use bit_field::BitField; 6 | use x86::bits64::vmx; 7 | use x86::dtables::{self, DescriptorTablePointer}; 8 | use x86::segmentation::SegmentSelector; 9 | use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags}; 10 | 11 | use super::structs::{MsrBitmap, VmxRegion}; 12 | use super::vmcs::{ 13 | self, VmcsControl32, VmcsControl64, VmcsControlNW, VmcsGuest16, VmcsGuest32, VmcsGuest64, 14 | VmcsGuestNW, VmcsHost16, VmcsHost32, VmcsHost64, VmcsHostNW, 15 | }; 16 | use super::VmxPerCpuState; 17 | use crate::arch::{msr::Msr, ApicTimer, GeneralRegisters}; 18 | use crate::{GuestPhysAddr, HostPhysAddr, NestedPageFaultInfo, RvmHal, RvmResult}; 19 | 20 | /// A virtual CPU within a guest. 21 | #[repr(C)] 22 | pub struct VmxVcpu { 23 | guest_regs: GeneralRegisters, 24 | host_stack_top: u64, 25 | vmcs: VmxRegion, 26 | msr_bitmap: MsrBitmap, 27 | apic_timer: ApicTimer, 28 | pending_events: VecDeque<(u8, Option)>, 29 | } 30 | 31 | impl VmxVcpu { 32 | pub(crate) fn new( 33 | percpu: &VmxPerCpuState, 34 | entry: GuestPhysAddr, 35 | ept_root: HostPhysAddr, 36 | ) -> RvmResult { 37 | let mut vcpu = Self { 38 | guest_regs: GeneralRegisters::default(), 39 | host_stack_top: 0, 40 | vmcs: VmxRegion::new(percpu.vmcs_revision_id, false)?, 41 | msr_bitmap: MsrBitmap::passthrough_all()?, 42 | apic_timer: ApicTimer::new(), 43 | pending_events: VecDeque::with_capacity(8), 44 | }; 45 | vcpu.setup_msr_bitmap()?; 46 | vcpu.setup_vmcs(entry, ept_root)?; 47 | info!("[RVM] created VmxVcpu(vmcs: {:#x})", vcpu.vmcs.phys_addr()); 48 | Ok(vcpu) 49 | } 50 | 51 | /// Run the guest, never return. 52 | pub fn run(&mut self) -> ! { 53 | VmcsHostNW::RSP 54 | .write(&self.host_stack_top as *const _ as usize) 55 | .unwrap(); 56 | unsafe { self.vmx_launch() } 57 | } 58 | 59 | /// Basic information about VM exits. 60 | pub fn exit_info(&self) -> RvmResult { 61 | vmcs::exit_info() 62 | } 63 | 64 | /// Information for VM exits due to external interrupts. 65 | pub fn interrupt_exit_info(&self) -> RvmResult { 66 | vmcs::interrupt_exit_info() 67 | } 68 | 69 | /// Information for VM exits due to I/O instructions. 70 | pub fn io_exit_info(&self) -> RvmResult { 71 | vmcs::io_exit_info() 72 | } 73 | 74 | /// Information for VM exits due to nested page table faults (EPT violation). 75 | pub fn nested_page_fault_info(&self) -> RvmResult { 76 | vmcs::ept_violation_info() 77 | } 78 | 79 | /// Guest general-purpose registers. 80 | pub fn regs(&self) -> &GeneralRegisters { 81 | &self.guest_regs 82 | } 83 | 84 | /// Mutable reference of guest general-purpose registers. 85 | pub fn regs_mut(&mut self) -> &mut GeneralRegisters { 86 | &mut self.guest_regs 87 | } 88 | 89 | /// Guest stack pointer. (`RSP`) 90 | pub fn stack_pointer(&self) -> usize { 91 | VmcsGuestNW::RSP.read().unwrap() 92 | } 93 | 94 | /// Set guest stack pointer. (`RSP`) 95 | pub fn set_stack_pointer(&mut self, rsp: usize) { 96 | VmcsGuestNW::RSP.write(rsp).unwrap() 97 | } 98 | 99 | /// Advance guest `RIP` by `instr_len` bytes. 100 | pub fn advance_rip(&mut self, instr_len: u8) -> RvmResult { 101 | Ok(VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)?) 102 | } 103 | 104 | /// Add a virtual interrupt or exception to the pending events list, 105 | /// and try to inject it before later VM entries. 106 | pub fn inject_event(&mut self, vector: u8, err_code: Option) { 107 | self.pending_events.push_back((vector, err_code)); 108 | } 109 | 110 | /// If enable, a VM exit occurs at the beginning of any instruction if 111 | /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts. 112 | /// (see SDM, Vol. 3C, Section 24.4.2) 113 | pub fn set_interrupt_window(&mut self, enable: bool) -> RvmResult { 114 | let mut ctrl = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?; 115 | let bits = vmcs::controls::PrimaryControls::INTERRUPT_WINDOW_EXITING.bits(); 116 | if enable { 117 | ctrl |= bits 118 | } else { 119 | ctrl &= !bits 120 | } 121 | VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?; 122 | Ok(()) 123 | } 124 | 125 | /// Returns the mutable reference of [`ApicTimer`]. 126 | pub fn apic_timer_mut(&mut self) -> &mut ApicTimer { 127 | &mut self.apic_timer 128 | } 129 | } 130 | 131 | // Implementation of private methods 132 | impl VmxVcpu { 133 | fn setup_msr_bitmap(&mut self) -> RvmResult { 134 | // Intercept IA32_APIC_BASE MSR accesses 135 | let msr = x86::msr::IA32_APIC_BASE; 136 | self.msr_bitmap.set_read_intercept(msr, true); 137 | self.msr_bitmap.set_write_intercept(msr, true); 138 | // Intercept all x2APIC MSR accesses 139 | for msr in 0x800..=0x83f { 140 | self.msr_bitmap.set_read_intercept(msr, true); 141 | self.msr_bitmap.set_write_intercept(msr, true); 142 | } 143 | Ok(()) 144 | } 145 | 146 | fn setup_vmcs(&mut self, entry: GuestPhysAddr, ept_root: HostPhysAddr) -> RvmResult { 147 | let paddr = self.vmcs.phys_addr() as u64; 148 | unsafe { 149 | vmx::vmclear(paddr)?; 150 | vmx::vmptrld(paddr)?; 151 | } 152 | self.setup_vmcs_host()?; 153 | self.setup_vmcs_guest(entry)?; 154 | self.setup_vmcs_control(ept_root)?; 155 | Ok(()) 156 | } 157 | 158 | fn setup_vmcs_host(&mut self) -> RvmResult { 159 | VmcsHost64::IA32_PAT.write(Msr::IA32_PAT.read())?; 160 | VmcsHost64::IA32_EFER.write(Msr::IA32_EFER.read())?; 161 | 162 | VmcsHostNW::CR0.write(Cr0::read_raw() as _)?; 163 | VmcsHostNW::CR3.write(Cr3::read_raw().0.start_address().as_u64() as _)?; 164 | VmcsHostNW::CR4.write(Cr4::read_raw() as _)?; 165 | 166 | VmcsHost16::ES_SELECTOR.write(x86::segmentation::es().bits())?; 167 | VmcsHost16::CS_SELECTOR.write(x86::segmentation::cs().bits())?; 168 | VmcsHost16::SS_SELECTOR.write(x86::segmentation::ss().bits())?; 169 | VmcsHost16::DS_SELECTOR.write(x86::segmentation::ds().bits())?; 170 | VmcsHost16::FS_SELECTOR.write(x86::segmentation::fs().bits())?; 171 | VmcsHost16::GS_SELECTOR.write(x86::segmentation::gs().bits())?; 172 | VmcsHostNW::FS_BASE.write(Msr::IA32_FS_BASE.read() as _)?; 173 | VmcsHostNW::GS_BASE.write(Msr::IA32_GS_BASE.read() as _)?; 174 | 175 | let tr = unsafe { x86::task::tr() }; 176 | let mut gdtp = DescriptorTablePointer::::default(); 177 | let mut idtp = DescriptorTablePointer::::default(); 178 | unsafe { 179 | dtables::sgdt(&mut gdtp); 180 | dtables::sidt(&mut idtp); 181 | } 182 | VmcsHost16::TR_SELECTOR.write(tr.bits())?; 183 | VmcsHostNW::TR_BASE.write(get_tr_base(tr, &gdtp) as _)?; 184 | VmcsHostNW::GDTR_BASE.write(gdtp.base as _)?; 185 | VmcsHostNW::IDTR_BASE.write(idtp.base as _)?; 186 | VmcsHostNW::RIP.write(Self::vmx_exit as usize)?; 187 | 188 | VmcsHostNW::IA32_SYSENTER_ESP.write(0)?; 189 | VmcsHostNW::IA32_SYSENTER_EIP.write(0)?; 190 | VmcsHost32::IA32_SYSENTER_CS.write(0)?; 191 | Ok(()) 192 | } 193 | 194 | fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr) -> RvmResult { 195 | let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR; 196 | let cr0_host_owned = 197 | Cr0Flags::NUMERIC_ERROR | Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE; 198 | let cr0_read_shadow = Cr0Flags::NUMERIC_ERROR; 199 | VmcsGuestNW::CR0.write(cr0_guest.bits() as _)?; 200 | VmcsControlNW::CR0_GUEST_HOST_MASK.write(cr0_host_owned.bits() as _)?; 201 | VmcsControlNW::CR0_READ_SHADOW.write(cr0_read_shadow.bits() as _)?; 202 | 203 | let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; 204 | let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS; 205 | let cr4_read_shadow = 0; 206 | VmcsGuestNW::CR4.write(cr4_guest.bits() as _)?; 207 | VmcsControlNW::CR4_GUEST_HOST_MASK.write(cr4_host_owned.bits() as _)?; 208 | VmcsControlNW::CR4_READ_SHADOW.write(cr4_read_shadow)?; 209 | 210 | macro_rules! set_guest_segment { 211 | ($seg: ident, $access_rights: expr) => {{ 212 | use VmcsGuest16::*; 213 | use VmcsGuest32::*; 214 | use VmcsGuestNW::*; 215 | concat_idents!($seg, _SELECTOR).write(0)?; 216 | concat_idents!($seg, _BASE).write(0)?; 217 | concat_idents!($seg, _LIMIT).write(0xffff)?; 218 | concat_idents!($seg, _ACCESS_RIGHTS).write($access_rights)?; 219 | }}; 220 | } 221 | 222 | set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed 223 | set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed 224 | set_guest_segment!(SS, 0x93); 225 | set_guest_segment!(DS, 0x93); 226 | set_guest_segment!(FS, 0x93); 227 | set_guest_segment!(GS, 0x93); 228 | set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy 229 | set_guest_segment!(LDTR, 0x82); // present, system, LDT 230 | 231 | VmcsGuestNW::GDTR_BASE.write(0)?; 232 | VmcsGuest32::GDTR_LIMIT.write(0xffff)?; 233 | VmcsGuestNW::IDTR_BASE.write(0)?; 234 | VmcsGuest32::IDTR_LIMIT.write(0xffff)?; 235 | 236 | VmcsGuestNW::CR3.write(0)?; 237 | VmcsGuestNW::DR7.write(0x400)?; 238 | VmcsGuestNW::RSP.write(0)?; 239 | VmcsGuestNW::RIP.write(entry)?; 240 | VmcsGuestNW::RFLAGS.write(0x2)?; 241 | VmcsGuestNW::PENDING_DBG_EXCEPTIONS.write(0)?; 242 | VmcsGuestNW::IA32_SYSENTER_ESP.write(0)?; 243 | VmcsGuestNW::IA32_SYSENTER_EIP.write(0)?; 244 | VmcsGuest32::IA32_SYSENTER_CS.write(0)?; 245 | 246 | VmcsGuest32::INTERRUPTIBILITY_STATE.write(0)?; 247 | VmcsGuest32::ACTIVITY_STATE.write(0)?; 248 | VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(0)?; 249 | 250 | VmcsGuest64::LINK_PTR.write(u64::MAX)?; // SDM Vol. 3C, Section 24.4.2 251 | VmcsGuest64::IA32_DEBUGCTL.write(0)?; 252 | VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?; 253 | VmcsGuest64::IA32_EFER.write(0)?; 254 | Ok(()) 255 | } 256 | 257 | fn setup_vmcs_control(&mut self, ept_root: HostPhysAddr) -> RvmResult { 258 | // Intercept NMI and external interrupts. 259 | use super::vmcs::controls::*; 260 | use PinbasedControls as PinCtrl; 261 | vmcs::set_control( 262 | VmcsControl32::PINBASED_EXEC_CONTROLS, 263 | Msr::IA32_VMX_TRUE_PINBASED_CTLS, 264 | Msr::IA32_VMX_PINBASED_CTLS.read() as u32, 265 | (PinCtrl::NMI_EXITING | PinCtrl::EXTERNAL_INTERRUPT_EXITING).bits(), 266 | 0, 267 | )?; 268 | 269 | // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls, 270 | // disable CR3 load/store interception. 271 | use PrimaryControls as CpuCtrl; 272 | vmcs::set_control( 273 | VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS, 274 | Msr::IA32_VMX_TRUE_PROCBASED_CTLS, 275 | Msr::IA32_VMX_PROCBASED_CTLS.read() as u32, 276 | (CpuCtrl::UNCOND_IO_EXITING | CpuCtrl::USE_MSR_BITMAPS | CpuCtrl::SECONDARY_CONTROLS) 277 | .bits(), 278 | (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(), 279 | )?; 280 | 281 | // Enable EPT, RDTSCP, INVPCID, and unrestricted guest. 282 | use SecondaryControls as CpuCtrl2; 283 | vmcs::set_control( 284 | VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS, 285 | Msr::IA32_VMX_PROCBASED_CTLS2, 286 | 0, 287 | (CpuCtrl2::ENABLE_EPT 288 | | CpuCtrl2::ENABLE_RDTSCP 289 | | CpuCtrl2::ENABLE_INVPCID 290 | | CpuCtrl2::UNRESTRICTED_GUEST) 291 | .bits(), 292 | 0, 293 | )?; 294 | 295 | // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit. 296 | use ExitControls as ExitCtrl; 297 | vmcs::set_control( 298 | VmcsControl32::VMEXIT_CONTROLS, 299 | Msr::IA32_VMX_TRUE_EXIT_CTLS, 300 | Msr::IA32_VMX_EXIT_CTLS.read() as u32, 301 | (ExitCtrl::HOST_ADDRESS_SPACE_SIZE 302 | | ExitCtrl::ACK_INTERRUPT_ON_EXIT 303 | | ExitCtrl::SAVE_IA32_PAT 304 | | ExitCtrl::LOAD_IA32_PAT 305 | | ExitCtrl::SAVE_IA32_EFER 306 | | ExitCtrl::LOAD_IA32_EFER) 307 | .bits(), 308 | 0, 309 | )?; 310 | 311 | // Load guest IA32_PAT/IA32_EFER on VM entry. 312 | use EntryControls as EntryCtrl; 313 | vmcs::set_control( 314 | VmcsControl32::VMENTRY_CONTROLS, 315 | Msr::IA32_VMX_TRUE_ENTRY_CTLS, 316 | Msr::IA32_VMX_ENTRY_CTLS.read() as u32, 317 | (EntryCtrl::LOAD_IA32_PAT | EntryCtrl::LOAD_IA32_EFER).bits(), 318 | 0, 319 | )?; 320 | 321 | vmcs::set_ept_pointer(ept_root)?; 322 | 323 | // No MSR switches if hypervisor doesn't use and there is only one vCPU. 324 | VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?; 325 | VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?; 326 | VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?; 327 | 328 | // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps. 329 | VmcsControl32::EXCEPTION_BITMAP.write(0)?; 330 | VmcsControl64::IO_BITMAP_A_ADDR.write(0)?; 331 | VmcsControl64::IO_BITMAP_B_ADDR.write(0)?; 332 | VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr() as _)?; 333 | Ok(()) 334 | } 335 | 336 | #[naked] 337 | unsafe extern "C" fn vmx_launch(&mut self) -> ! { 338 | asm!( 339 | "mov [rdi + {host_stack_top}], rsp", // save current RSP to Vcpu::host_stack_top 340 | "mov rsp, rdi", // set RSP to guest regs area 341 | restore_regs_from_stack!(), 342 | "vmlaunch", 343 | "jmp {failed}", 344 | host_stack_top = const size_of::(), 345 | failed = sym Self::vmx_entry_failed, 346 | options(noreturn), 347 | ) 348 | } 349 | 350 | #[naked] 351 | unsafe extern "C" fn vmx_exit(&mut self) -> ! { 352 | asm!( 353 | save_regs_to_stack!(), 354 | "mov r15, rsp", // save temporary RSP to r15 355 | "mov rdi, rsp", // set the first arg to &Vcpu 356 | "mov rsp, [rsp + {host_stack_top}]", // set RSP to Vcpu::host_stack_top 357 | "call {vmexit_handler}", // call vmexit_handler 358 | "mov rsp, r15", // load temporary RSP from r15 359 | restore_regs_from_stack!(), 360 | "vmresume", 361 | "jmp {failed}", 362 | host_stack_top = const size_of::(), 363 | vmexit_handler = sym Self::vmexit_handler, 364 | failed = sym Self::vmx_entry_failed, 365 | options(noreturn), 366 | ); 367 | } 368 | 369 | fn vmx_entry_failed() -> ! { 370 | panic!("{}", vmcs::instruction_error().as_str()) 371 | } 372 | 373 | /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3) 374 | fn allow_interrupt(&self) -> bool { 375 | let rflags = VmcsGuestNW::RFLAGS.read().unwrap(); 376 | let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap(); 377 | rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0 378 | && block_state == 0 379 | } 380 | 381 | /// Try to inject a pending event before next VM entry. 382 | fn check_pending_events(&mut self) -> RvmResult { 383 | if let Some(event) = self.pending_events.front() { 384 | if event.0 < 32 || self.allow_interrupt() { 385 | // if it's an exception, or an interrupt that is not blocked, inject it directly. 386 | vmcs::inject_event(event.0, event.1)?; 387 | self.pending_events.pop_front(); 388 | } else { 389 | // interrupts are blocked, enable interrupt-window exiting. 390 | self.set_interrupt_window(true)?; 391 | } 392 | } 393 | Ok(()) 394 | } 395 | 396 | fn vmexit_handler(&mut self) { 397 | H::vmexit_handler(self); 398 | // Check if there is an APIC timer interrupt 399 | if self.apic_timer.check_interrupt() { 400 | self.inject_event(self.apic_timer.vector(), None); 401 | } 402 | self.check_pending_events().unwrap(); 403 | } 404 | } 405 | 406 | impl Drop for VmxVcpu { 407 | fn drop(&mut self) { 408 | unsafe { vmx::vmclear(self.vmcs.phys_addr() as u64).unwrap() }; 409 | info!("[RVM] dropped VmxVcpu(vmcs: {:#x})", self.vmcs.phys_addr()); 410 | } 411 | } 412 | 413 | fn get_tr_base(tr: SegmentSelector, gdt: &DescriptorTablePointer) -> u64 { 414 | let index = tr.index() as usize; 415 | let table_len = (gdt.limit as usize + 1) / core::mem::size_of::(); 416 | let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) }; 417 | let entry = table[index]; 418 | if entry & (1 << 47) != 0 { 419 | // present 420 | let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24; 421 | let base_high = table[index + 1] & 0xffff_ffff; 422 | base_low | base_high << 32 423 | } else { 424 | // no present 425 | 0 426 | } 427 | } 428 | 429 | impl Debug for VmxVcpu { 430 | fn fmt(&self, f: &mut Formatter) -> Result { 431 | (|| -> RvmResult { 432 | Ok(f.debug_struct("VmxVcpu") 433 | .field("guest_regs", &self.guest_regs) 434 | .field("rip", &VmcsGuestNW::RIP.read()?) 435 | .field("rsp", &VmcsGuestNW::RSP.read()?) 436 | .field("rflags", &VmcsGuestNW::RFLAGS.read()?) 437 | .field("cr0", &VmcsGuestNW::CR0.read()?) 438 | .field("cr3", &VmcsGuestNW::CR3.read()?) 439 | .field("cr4", &VmcsGuestNW::CR4.read()?) 440 | .field("cs", &VmcsGuest16::CS_SELECTOR.read()?) 441 | .field("fs_base", &VmcsGuestNW::FS_BASE.read()?) 442 | .field("gs_base", &VmcsGuestNW::GS_BASE.read()?) 443 | .field("tss", &VmcsGuest16::TR_SELECTOR.read()?) 444 | .finish()) 445 | })() 446 | .unwrap() 447 | } 448 | } 449 | -------------------------------------------------------------------------------- /rvm/src/arch/x86_64/vmx/vmcs.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | #![deny(missing_docs)] 3 | #![allow(non_camel_case_types)] 4 | #![allow(clippy::upper_case_acronyms)] 5 | 6 | use bit_field::BitField; 7 | use x86::bits64::vmx; 8 | 9 | use super::definitions::{VmxExitReason, VmxInstructionError, VmxInterruptionType}; 10 | use crate::{arch::msr::Msr, HostPhysAddr, MemFlags, NestedPageFaultInfo, RvmResult}; 11 | 12 | macro_rules! vmcs_read { 13 | ($field_enum: ident, u64) => { 14 | impl $field_enum { 15 | pub fn read(self) -> x86::vmx::Result { 16 | #[cfg(target_pointer_width = "64")] 17 | unsafe { 18 | vmx::vmread(self as u32) 19 | } 20 | #[cfg(target_pointer_width = "32")] 21 | unsafe { 22 | let field = self as u32; 23 | Ok(vmx::vmread(field)? + (vmx::vmread(field + 1)? << 32)) 24 | } 25 | } 26 | } 27 | }; 28 | ($field_enum: ident, $ux: ty) => { 29 | impl $field_enum { 30 | pub fn read(self) -> x86::vmx::Result<$ux> { 31 | unsafe { vmx::vmread(self as u32).map(|v| v as $ux) } 32 | } 33 | } 34 | }; 35 | } 36 | 37 | macro_rules! vmcs_write { 38 | ($field_enum: ident, u64) => { 39 | impl $field_enum { 40 | pub fn write(self, value: u64) -> x86::vmx::Result<()> { 41 | #[cfg(target_pointer_width = "64")] 42 | unsafe { 43 | vmx::vmwrite(self as u32, value) 44 | } 45 | #[cfg(target_pointer_width = "32")] 46 | unsafe { 47 | let field = self as u32; 48 | vmx::vmwrite(field, value & 0xffff_ffff)?; 49 | vmx::vmwrite(field + 1, value >> 32)?; 50 | Ok(()) 51 | } 52 | } 53 | } 54 | }; 55 | ($field_enum: ident, $ux: ty) => { 56 | impl $field_enum { 57 | pub fn write(self, value: $ux) -> x86::vmx::Result<()> { 58 | unsafe { vmx::vmwrite(self as u32, value as u64) } 59 | } 60 | } 61 | }; 62 | } 63 | 64 | /// 16-Bit Control Fields. (SDM Vol. 3D, Appendix B.1.1) 65 | #[derive(Clone, Copy, Debug)] 66 | pub enum VmcsControl16 { 67 | /// Virtual-processor identifier (VPID). 68 | VPID = 0x0, 69 | /// Posted-interrupt notification vector. 70 | POSTED_INTERRUPT_NOTIFICATION_VECTOR = 0x2, 71 | /// EPTP index. 72 | EPTP_INDEX = 0x4, 73 | } 74 | vmcs_read!(VmcsControl16, u16); 75 | vmcs_write!(VmcsControl16, u16); 76 | 77 | /// 64-Bit Control Fields. (SDM Vol. 3D, Appendix B.2.1) 78 | #[derive(Clone, Copy, Debug)] 79 | pub enum VmcsControl64 { 80 | /// Address of I/O bitmap A (full). 81 | IO_BITMAP_A_ADDR = 0x2000, 82 | /// Address of I/O bitmap B (full). 83 | IO_BITMAP_B_ADDR = 0x2002, 84 | /// Address of MSR bitmaps (full). 85 | MSR_BITMAPS_ADDR = 0x2004, 86 | /// VM-exit MSR-store address (full). 87 | VMEXIT_MSR_STORE_ADDR = 0x2006, 88 | /// VM-exit MSR-load address (full). 89 | VMEXIT_MSR_LOAD_ADDR = 0x2008, 90 | /// VM-entry MSR-load address (full). 91 | VMENTRY_MSR_LOAD_ADDR = 0x200A, 92 | /// Executive-VMCS pointer (full). 93 | EXECUTIVE_VMCS_PTR = 0x200C, 94 | /// PML address (full). 95 | PML_ADDR = 0x200E, 96 | /// TSC offset (full). 97 | TSC_OFFSET = 0x2010, 98 | /// Virtual-APIC address (full). 99 | VIRT_APIC_ADDR = 0x2012, 100 | /// APIC-access address (full). 101 | APIC_ACCESS_ADDR = 0x2014, 102 | /// Posted-interrupt descriptor address (full). 103 | POSTED_INTERRUPT_DESC_ADDR = 0x2016, 104 | /// VM-function controls (full). 105 | VM_FUNCTION_CONTROLS = 0x2018, 106 | /// EPT pointer (full). 107 | EPTP = 0x201A, 108 | /// EOI-exit bitmap 0 (full). 109 | EOI_EXIT0 = 0x201C, 110 | /// EOI-exit bitmap 1 (full). 111 | EOI_EXIT1 = 0x201E, 112 | /// EOI-exit bitmap 2 (full). 113 | EOI_EXIT2 = 0x2020, 114 | /// EOI-exit bitmap 3 (full). 115 | EOI_EXIT3 = 0x2022, 116 | /// EPTP-list address (full). 117 | EPTP_LIST_ADDR = 0x2024, 118 | /// VMREAD-bitmap address (full). 119 | VMREAD_BITMAP_ADDR = 0x2026, 120 | /// VMWRITE-bitmap address (full). 121 | VMWRITE_BITMAP_ADDR = 0x2028, 122 | /// Virtualization-exception information address (full). 123 | VIRT_EXCEPTION_INFO_ADDR = 0x202A, 124 | /// XSS-exiting bitmap (full). 125 | XSS_EXITING_BITMAP = 0x202C, 126 | /// ENCLS-exiting bitmap (full). 127 | ENCLS_EXITING_BITMAP = 0x202E, 128 | /// Sub-page-permission-table pointer (full). 129 | SUBPAGE_PERM_TABLE_PTR = 0x2030, 130 | /// TSC multiplier (full). 131 | TSC_MULTIPLIER = 0x2032, 132 | } 133 | vmcs_read!(VmcsControl64, u64); 134 | vmcs_write!(VmcsControl64, u64); 135 | 136 | /// 32-Bit Control Fields. (SDM Vol. 3D, Appendix B.3.1) 137 | #[derive(Clone, Copy, Debug)] 138 | pub enum VmcsControl32 { 139 | /// Pin-based VM-execution controls. 140 | PINBASED_EXEC_CONTROLS = 0x4000, 141 | /// Primary processor-based VM-execution controls. 142 | PRIMARY_PROCBASED_EXEC_CONTROLS = 0x4002, 143 | /// Exception bitmap. 144 | EXCEPTION_BITMAP = 0x4004, 145 | /// Page-fault error-code mask. 146 | PAGE_FAULT_ERR_CODE_MASK = 0x4006, 147 | /// Page-fault error-code match. 148 | PAGE_FAULT_ERR_CODE_MATCH = 0x4008, 149 | /// CR3-target count. 150 | CR3_TARGET_COUNT = 0x400A, 151 | /// VM-exit controls. 152 | VMEXIT_CONTROLS = 0x400C, 153 | /// VM-exit MSR-store count. 154 | VMEXIT_MSR_STORE_COUNT = 0x400E, 155 | /// VM-exit MSR-load count. 156 | VMEXIT_MSR_LOAD_COUNT = 0x4010, 157 | /// VM-entry controls. 158 | VMENTRY_CONTROLS = 0x4012, 159 | /// VM-entry MSR-load count. 160 | VMENTRY_MSR_LOAD_COUNT = 0x4014, 161 | /// VM-entry interruption-information field. 162 | VMENTRY_INTERRUPTION_INFO_FIELD = 0x4016, 163 | /// VM-entry exception error code. 164 | VMENTRY_EXCEPTION_ERR_CODE = 0x4018, 165 | /// VM-entry instruction length. 166 | VMENTRY_INSTRUCTION_LEN = 0x401A, 167 | /// TPR threshold. 168 | TPR_THRESHOLD = 0x401C, 169 | /// Secondary processor-based VM-execution controls. 170 | SECONDARY_PROCBASED_EXEC_CONTROLS = 0x401E, 171 | /// PLE_Gap. 172 | PLE_GAP = 0x4020, 173 | /// PLE_Window. 174 | PLE_WINDOW = 0x4022, 175 | } 176 | vmcs_read!(VmcsControl32, u32); 177 | vmcs_write!(VmcsControl32, u32); 178 | 179 | /// Natural-Width Control Fields. (SDM Vol. 3D, Appendix B.4.1) 180 | #[derive(Clone, Copy, Debug)] 181 | pub enum VmcsControlNW { 182 | /// CR0 guest/host mask. 183 | CR0_GUEST_HOST_MASK = 0x6000, 184 | /// CR4 guest/host mask. 185 | CR4_GUEST_HOST_MASK = 0x6002, 186 | /// CR0 read shadow. 187 | CR0_READ_SHADOW = 0x6004, 188 | /// CR4 read shadow. 189 | CR4_READ_SHADOW = 0x6006, 190 | /// CR3-target value 0. 191 | CR3_TARGET_VALUE0 = 0x6008, 192 | /// CR3-target value 1. 193 | CR3_TARGET_VALUE1 = 0x600A, 194 | /// CR3-target value 2. 195 | CR3_TARGET_VALUE2 = 0x600C, 196 | /// CR3-target value 3. 197 | CR3_TARGET_VALUE3 = 0x600E, 198 | } 199 | vmcs_read!(VmcsControlNW, usize); 200 | vmcs_write!(VmcsControlNW, usize); 201 | 202 | /// 16-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.1.2) 203 | pub enum VmcsGuest16 { 204 | /// Guest ES selector. 205 | ES_SELECTOR = 0x800, 206 | /// Guest CS selector. 207 | CS_SELECTOR = 0x802, 208 | /// Guest SS selector. 209 | SS_SELECTOR = 0x804, 210 | /// Guest DS selector. 211 | DS_SELECTOR = 0x806, 212 | /// Guest FS selector. 213 | FS_SELECTOR = 0x808, 214 | /// Guest GS selector. 215 | GS_SELECTOR = 0x80a, 216 | /// Guest LDTR selector. 217 | LDTR_SELECTOR = 0x80c, 218 | /// Guest TR selector. 219 | TR_SELECTOR = 0x80e, 220 | /// Guest interrupt status. 221 | INTERRUPT_STATUS = 0x810, 222 | /// PML index. 223 | PML_INDEX = 0x812, 224 | } 225 | vmcs_read!(VmcsGuest16, u16); 226 | vmcs_write!(VmcsGuest16, u16); 227 | 228 | /// 64-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.2.3) 229 | #[derive(Clone, Copy, Debug)] 230 | pub enum VmcsGuest64 { 231 | /// VMCS link pointer (full). 232 | LINK_PTR = 0x2800, 233 | /// Guest IA32_DEBUGCTL (full). 234 | IA32_DEBUGCTL = 0x2802, 235 | /// Guest IA32_PAT (full). 236 | IA32_PAT = 0x2804, 237 | /// Guest IA32_EFER (full). 238 | IA32_EFER = 0x2806, 239 | /// Guest IA32_PERF_GLOBAL_CTRL (full). 240 | IA32_PERF_GLOBAL_CTRL = 0x2808, 241 | /// Guest PDPTE0 (full). 242 | PDPTE0 = 0x280A, 243 | /// Guest PDPTE1 (full). 244 | PDPTE1 = 0x280C, 245 | /// Guest PDPTE2 (full). 246 | PDPTE2 = 0x280E, 247 | /// Guest PDPTE3 (full). 248 | PDPTE3 = 0x2810, 249 | /// Guest IA32_BNDCFGS (full). 250 | IA32_BNDCFGS = 0x2812, 251 | /// Guest IA32_RTIT_CTL (full). 252 | IA32_RTIT_CTL = 0x2814, 253 | } 254 | vmcs_read!(VmcsGuest64, u64); 255 | vmcs_write!(VmcsGuest64, u64); 256 | 257 | /// 32-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.3.3) 258 | #[derive(Clone, Copy, Debug)] 259 | pub enum VmcsGuest32 { 260 | /// Guest ES limit. 261 | ES_LIMIT = 0x4800, 262 | /// Guest CS limit. 263 | CS_LIMIT = 0x4802, 264 | /// Guest SS limit. 265 | SS_LIMIT = 0x4804, 266 | /// Guest DS limit. 267 | DS_LIMIT = 0x4806, 268 | /// Guest FS limit. 269 | FS_LIMIT = 0x4808, 270 | /// Guest GS limit. 271 | GS_LIMIT = 0x480A, 272 | /// Guest LDTR limit. 273 | LDTR_LIMIT = 0x480C, 274 | /// Guest TR limit. 275 | TR_LIMIT = 0x480E, 276 | /// Guest GDTR limit. 277 | GDTR_LIMIT = 0x4810, 278 | /// Guest IDTR limit. 279 | IDTR_LIMIT = 0x4812, 280 | /// Guest ES access rights. 281 | ES_ACCESS_RIGHTS = 0x4814, 282 | /// Guest CS access rights. 283 | CS_ACCESS_RIGHTS = 0x4816, 284 | /// Guest SS access rights. 285 | SS_ACCESS_RIGHTS = 0x4818, 286 | /// Guest DS access rights. 287 | DS_ACCESS_RIGHTS = 0x481A, 288 | /// Guest FS access rights. 289 | FS_ACCESS_RIGHTS = 0x481C, 290 | /// Guest GS access rights. 291 | GS_ACCESS_RIGHTS = 0x481E, 292 | /// Guest LDTR access rights. 293 | LDTR_ACCESS_RIGHTS = 0x4820, 294 | /// Guest TR access rights. 295 | TR_ACCESS_RIGHTS = 0x4822, 296 | /// Guest interruptibility state. 297 | INTERRUPTIBILITY_STATE = 0x4824, 298 | /// Guest activity state. 299 | ACTIVITY_STATE = 0x4826, 300 | /// Guest SMBASE. 301 | SMBASE = 0x4828, 302 | /// Guest IA32_SYSENTER_CS. 303 | IA32_SYSENTER_CS = 0x482A, 304 | /// VMX-preemption timer value. 305 | VMX_PREEMPTION_TIMER_VALUE = 0x482E, 306 | } 307 | vmcs_read!(VmcsGuest32, u32); 308 | vmcs_write!(VmcsGuest32, u32); 309 | 310 | /// Natural-Width Guest-State Fields. (SDM Vol. 3D, Appendix B.4.3) 311 | #[derive(Clone, Copy, Debug)] 312 | pub enum VmcsGuestNW { 313 | /// Guest CR0. 314 | CR0 = 0x6800, 315 | /// Guest CR3. 316 | CR3 = 0x6802, 317 | /// Guest CR4. 318 | CR4 = 0x6804, 319 | /// Guest ES base. 320 | ES_BASE = 0x6806, 321 | /// Guest CS base. 322 | CS_BASE = 0x6808, 323 | /// Guest SS base. 324 | SS_BASE = 0x680A, 325 | /// Guest DS base. 326 | DS_BASE = 0x680C, 327 | /// Guest FS base. 328 | FS_BASE = 0x680E, 329 | /// Guest GS base. 330 | GS_BASE = 0x6810, 331 | /// Guest LDTR base. 332 | LDTR_BASE = 0x6812, 333 | /// Guest TR base. 334 | TR_BASE = 0x6814, 335 | /// Guest GDTR base. 336 | GDTR_BASE = 0x6816, 337 | /// Guest IDTR base. 338 | IDTR_BASE = 0x6818, 339 | /// Guest DR7. 340 | DR7 = 0x681A, 341 | /// Guest RSP. 342 | RSP = 0x681C, 343 | /// Guest RIP. 344 | RIP = 0x681E, 345 | /// Guest RFLAGS. 346 | RFLAGS = 0x6820, 347 | /// Guest pending debug exceptions. 348 | PENDING_DBG_EXCEPTIONS = 0x6822, 349 | /// Guest IA32_SYSENTER_ESP. 350 | IA32_SYSENTER_ESP = 0x6824, 351 | /// Guest IA32_SYSENTER_EIP. 352 | IA32_SYSENTER_EIP = 0x6826, 353 | } 354 | vmcs_read!(VmcsGuestNW, usize); 355 | vmcs_write!(VmcsGuestNW, usize); 356 | 357 | /// 16-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.1.3) 358 | #[derive(Clone, Copy, Debug)] 359 | pub enum VmcsHost16 { 360 | /// Host ES selector. 361 | ES_SELECTOR = 0xC00, 362 | /// Host CS selector. 363 | CS_SELECTOR = 0xC02, 364 | /// Host SS selector. 365 | SS_SELECTOR = 0xC04, 366 | /// Host DS selector. 367 | DS_SELECTOR = 0xC06, 368 | /// Host FS selector. 369 | FS_SELECTOR = 0xC08, 370 | /// Host GS selector. 371 | GS_SELECTOR = 0xC0A, 372 | /// Host TR selector. 373 | TR_SELECTOR = 0xC0C, 374 | } 375 | vmcs_read!(VmcsHost16, u16); 376 | vmcs_write!(VmcsHost16, u16); 377 | 378 | /// 64-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.2.4) 379 | #[derive(Clone, Copy, Debug)] 380 | pub enum VmcsHost64 { 381 | /// Host IA32_PAT (full). 382 | IA32_PAT = 0x2C00, 383 | /// Host IA32_EFER (full). 384 | IA32_EFER = 0x2C02, 385 | /// Host IA32_PERF_GLOBAL_CTRL (full). 386 | IA32_PERF_GLOBAL_CTRL = 0x2C04, 387 | } 388 | vmcs_read!(VmcsHost64, u64); 389 | vmcs_write!(VmcsHost64, u64); 390 | 391 | /// 32-Bit Host-State Field. (SDM Vol. 3D, Appendix B.3.4) 392 | #[derive(Clone, Copy, Debug)] 393 | pub enum VmcsHost32 { 394 | /// Host IA32_SYSENTER_CS. 395 | IA32_SYSENTER_CS = 0x4C00, 396 | } 397 | vmcs_read!(VmcsHost32, u32); 398 | vmcs_write!(VmcsHost32, u32); 399 | 400 | /// Natural-Width Host-State Fields. (SDM Vol. 3D, Appendix B.4.4) 401 | #[derive(Clone, Copy, Debug)] 402 | pub enum VmcsHostNW { 403 | /// Host CR0. 404 | CR0 = 0x6C00, 405 | /// Host CR3. 406 | CR3 = 0x6C02, 407 | /// Host CR4. 408 | CR4 = 0x6C04, 409 | /// Host FS base. 410 | FS_BASE = 0x6C06, 411 | /// Host GS base. 412 | GS_BASE = 0x6C08, 413 | /// Host TR base. 414 | TR_BASE = 0x6C0A, 415 | /// Host GDTR base. 416 | GDTR_BASE = 0x6C0C, 417 | /// Host IDTR base. 418 | IDTR_BASE = 0x6C0E, 419 | /// Host IA32_SYSENTER_ESP. 420 | IA32_SYSENTER_ESP = 0x6C10, 421 | /// Host IA32_SYSENTER_EIP. 422 | IA32_SYSENTER_EIP = 0x6C12, 423 | /// Host RSP. 424 | RSP = 0x6C14, 425 | /// Host RIP. 426 | RIP = 0x6C16, 427 | } 428 | vmcs_read!(VmcsHostNW, usize); 429 | vmcs_write!(VmcsHostNW, usize); 430 | 431 | /// 64-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.2.2) 432 | #[derive(Clone, Copy, Debug)] 433 | pub enum VmcsReadOnly64 { 434 | /// Guest-physical address (full). 435 | GUEST_PHYSICAL_ADDR = 0x2400, 436 | } 437 | vmcs_read!(VmcsReadOnly64, u64); 438 | 439 | /// 32-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.3.2) 440 | #[derive(Clone, Copy, Debug)] 441 | pub enum VmcsReadOnly32 { 442 | /// VM-instruction error. 443 | VM_INSTRUCTION_ERROR = 0x4400, 444 | /// Exit reason. 445 | EXIT_REASON = 0x4402, 446 | /// VM-exit interruption information. 447 | VMEXIT_INTERRUPTION_INFO = 0x4404, 448 | /// VM-exit interruption error code. 449 | VMEXIT_INTERRUPTION_ERR_CODE = 0x4406, 450 | /// IDT-vectoring information field. 451 | IDT_VECTORING_INFO = 0x4408, 452 | /// IDT-vectoring error code. 453 | IDT_VECTORING_ERR_CODE = 0x440A, 454 | /// VM-exit instruction length. 455 | VMEXIT_INSTRUCTION_LEN = 0x440C, 456 | /// VM-exit instruction information. 457 | VMEXIT_INSTRUCTION_INFO = 0x440E, 458 | } 459 | vmcs_read!(VmcsReadOnly32, u32); 460 | 461 | /// Natural-Width Read-Only Data Fields. (SDM Vol. 3D, Appendix B.4.2) 462 | #[derive(Clone, Copy, Debug)] 463 | pub enum VmcsReadOnlyNW { 464 | /// Exit qualification. 465 | EXIT_QUALIFICATION = 0x6400, 466 | /// I/O RCX. 467 | IO_RCX = 0x6402, 468 | /// I/O RSI. 469 | IO_RSI = 0x6404, 470 | /// I/O RDI. 471 | IO_RDI = 0x6406, 472 | /// I/O RIP. 473 | IO_RIP = 0x6408, 474 | /// Guest-linear address. 475 | GUEST_LINEAR_ADDR = 0x640A, 476 | } 477 | vmcs_read!(VmcsReadOnlyNW, usize); 478 | 479 | /// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1) 480 | #[derive(Debug)] 481 | pub struct VmxExitInfo { 482 | /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure) 483 | pub entry_failure: bool, 484 | /// Basic exit reason. 485 | pub exit_reason: VmxExitReason, 486 | /// For VM exits resulting from instruction execution, this field receives 487 | /// the length in bytes of the instruction whose execution led to the VM exit. 488 | pub exit_instruction_length: u32, 489 | /// Guest `RIP` where the VM exit occurs. 490 | pub guest_rip: usize, 491 | } 492 | 493 | /// VM-Entry/VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2) 494 | #[derive(Debug)] 495 | pub struct VmxInterruptInfo { 496 | /// Vector of interrupt or exception. 497 | pub vector: u8, 498 | /// Determines details of how the injection is performed. 499 | pub int_type: VmxInterruptionType, 500 | /// For hardware exceptions that would have delivered an error code on the stack. 501 | pub err_code: Option, 502 | /// Whether the field is valid. 503 | pub valid: bool, 504 | } 505 | 506 | impl VmxInterruptInfo { 507 | /// Convert from the interrupt vector and the error code. 508 | pub fn from(vector: u8, err_code: Option) -> Self { 509 | Self { 510 | vector, 511 | int_type: VmxInterruptionType::from_vector(vector), 512 | err_code, 513 | valid: true, 514 | } 515 | } 516 | 517 | /// Raw bits for writing to VMCS. 518 | pub fn bits(&self) -> u32 { 519 | let mut bits = self.vector as u32; 520 | bits |= (self.int_type as u32) << 8; 521 | bits.set_bit(11, self.err_code.is_some()); 522 | bits.set_bit(31, self.valid); 523 | bits 524 | } 525 | } 526 | 527 | /// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5) 528 | #[derive(Debug)] 529 | pub struct VmxIoExitInfo { 530 | /// Size of access. 531 | pub access_size: u8, 532 | /// Direction of the attempted access (0 = OUT, 1 = IN). 533 | pub is_in: bool, 534 | /// String instruction (0 = not string; 1 = string). 535 | pub is_string: bool, 536 | /// REP prefixed (0 = not REP; 1 = REP). 537 | pub is_repeat: bool, 538 | /// Port number. (as specified in DX or in an immediate operand) 539 | pub port: u16, 540 | } 541 | 542 | pub mod controls { 543 | pub use x86::vmx::vmcs::control::{EntryControls, ExitControls}; 544 | pub use x86::vmx::vmcs::control::{PinbasedControls, PrimaryControls, SecondaryControls}; 545 | } 546 | 547 | pub fn set_control( 548 | control: VmcsControl32, 549 | capability_msr: Msr, 550 | old_value: u32, 551 | set: u32, 552 | clear: u32, 553 | ) -> RvmResult { 554 | let cap = capability_msr.read(); 555 | let allowed0 = cap as u32; 556 | let allowed1 = (cap >> 32) as u32; 557 | assert_eq!(allowed0 & allowed1, allowed0); 558 | debug!( 559 | "set {:?}: {:#x} (+{:#x}, -{:#x})", 560 | control, old_value, set, clear 561 | ); 562 | if (set & clear) != 0 { 563 | return rvm_err!( 564 | InvalidParam, 565 | format_args!("can not set and clear the same bit in {:?}", control) 566 | ); 567 | } 568 | if (allowed1 & set) != set { 569 | // failed if set 0-bits in allowed1 570 | return rvm_err!( 571 | Unsupported, 572 | format_args!("can not set bits {:#x} in {:?}", set, control) 573 | ); 574 | } 575 | if (allowed0 & clear) != 0 { 576 | // failed if clear 1-bits in allowed0 577 | return rvm_err!( 578 | Unsupported, 579 | format_args!("can not clear bits {:#x} in {:?}", clear, control) 580 | ); 581 | } 582 | // SDM Vol. 3C, Section 31.5.1, Algorithm 3 583 | let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1 584 | let unknown = flexible & !(set | clear); // hypervisor untouched bits 585 | let default = unknown & old_value; // these bits keep unchanged in old value 586 | let fixed1 = allowed0; // these bits are fixed to 1 587 | control.write(fixed1 | default | set)?; 588 | Ok(()) 589 | } 590 | 591 | pub fn set_ept_pointer(pml4_paddr: HostPhysAddr) -> RvmResult { 592 | use super::instructions::{invept, InvEptType}; 593 | let eptp = super::structs::EPTPointer::from_table_phys(pml4_paddr).bits(); 594 | VmcsControl64::EPTP.write(eptp)?; 595 | unsafe { invept(InvEptType::SingleContext, eptp)? }; 596 | Ok(()) 597 | } 598 | 599 | pub fn instruction_error() -> VmxInstructionError { 600 | VmcsReadOnly32::VM_INSTRUCTION_ERROR.read().unwrap().into() 601 | } 602 | 603 | pub fn exit_info() -> RvmResult { 604 | let full_reason = VmcsReadOnly32::EXIT_REASON.read()?; 605 | Ok(VmxExitInfo { 606 | exit_reason: full_reason 607 | .get_bits(0..16) 608 | .try_into() 609 | .expect("Unknown VM-exit reason"), 610 | entry_failure: full_reason.get_bit(31), 611 | exit_instruction_length: VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?, 612 | guest_rip: VmcsGuestNW::RIP.read()?, 613 | }) 614 | } 615 | 616 | pub fn interrupt_exit_info() -> RvmResult { 617 | // SDM Vol. 3C, Section 24.9.2 618 | let info = VmcsReadOnly32::VMEXIT_INTERRUPTION_INFO.read()?; 619 | Ok(VmxInterruptInfo { 620 | vector: info.get_bits(0..8) as u8, 621 | int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(), 622 | err_code: if info.get_bit(11) { 623 | Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read()?) 624 | } else { 625 | None 626 | }, 627 | valid: info.get_bit(31), 628 | }) 629 | } 630 | 631 | pub fn inject_event(vector: u8, err_code: Option) -> RvmResult { 632 | // SDM Vol. 3C, Section 24.8.3 633 | let err_code = if VmxInterruptionType::vector_has_error_code(vector) { 634 | err_code.or_else(|| Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read().unwrap())) 635 | } else { 636 | None 637 | }; 638 | let int_info = VmxInterruptInfo::from(vector, err_code); 639 | if let Some(err_code) = int_info.err_code { 640 | VmcsControl32::VMENTRY_EXCEPTION_ERR_CODE.write(err_code)?; 641 | } 642 | if int_info.int_type.is_soft() { 643 | VmcsControl32::VMENTRY_INSTRUCTION_LEN 644 | .write(VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?)?; 645 | } 646 | VmcsControl32::VMENTRY_INTERRUPTION_INFO_FIELD.write(int_info.bits())?; 647 | Ok(()) 648 | } 649 | 650 | pub fn io_exit_info() -> RvmResult { 651 | // SDM Vol. 3C, Section 27.2.1, Table 27-5 652 | let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; 653 | Ok(VmxIoExitInfo { 654 | access_size: qualification.get_bits(0..3) as u8 + 1, 655 | is_in: qualification.get_bit(3), 656 | is_string: qualification.get_bit(4), 657 | is_repeat: qualification.get_bit(5), 658 | port: qualification.get_bits(16..32) as u16, 659 | }) 660 | } 661 | 662 | pub fn ept_violation_info() -> RvmResult { 663 | // SDM Vol. 3C, Section 27.2.1, Table 27-7 664 | let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?; 665 | let fault_guest_paddr = VmcsReadOnly64::GUEST_PHYSICAL_ADDR.read()? as usize; 666 | let mut access_flags = MemFlags::empty(); 667 | if qualification.get_bit(0) { 668 | access_flags |= MemFlags::READ; 669 | } 670 | if qualification.get_bit(1) { 671 | access_flags |= MemFlags::WRITE; 672 | } 673 | if qualification.get_bit(2) { 674 | access_flags |= MemFlags::EXECUTE; 675 | } 676 | Ok(NestedPageFaultInfo { 677 | access_flags, 678 | fault_guest_paddr, 679 | }) 680 | } 681 | --------------------------------------------------------------------------------