├── guest
└── bios
│ ├── .gitignore
│ ├── rvm-bios.lds
│ ├── Makefile
│ └── boot16.S
├── .gitignore
├── .gitmodules
├── hypervisor
├── src
│ ├── arch
│ │ ├── mod.rs
│ │ └── x86_64
│ │ │ ├── mod.rs
│ │ │ ├── instructions.rs
│ │ │ ├── boot.rs
│ │ │ ├── lapic.rs
│ │ │ ├── timer.rs
│ │ │ ├── idt.rs
│ │ │ ├── trap.S
│ │ │ ├── gdt.rs
│ │ │ ├── trap.rs
│ │ │ ├── uart16550.rs
│ │ │ └── multiboot.S
│ ├── lang_items.rs
│ ├── mm
│ │ ├── mod.rs
│ │ ├── address.rs
│ │ ├── heap.rs
│ │ └── frame.rs
│ ├── timer.rs
│ ├── config.rs
│ ├── hv
│ │ ├── gconfig.rs
│ │ ├── device_emu
│ │ │ ├── i8259_pic.rs
│ │ │ ├── mod.rs
│ │ │ ├── lapic.rs
│ │ │ └── uart16550.rs
│ │ ├── hal.rs
│ │ ├── mod.rs
│ │ ├── gpm.rs
│ │ └── vmexit.rs
│ ├── main.rs
│ └── logging.rs
├── x86_64.json
├── Cargo.toml
├── linker.lds
├── Makefile
└── Cargo.lock
├── rvm
├── src
│ ├── arch
│ │ ├── mod.rs
│ │ └── x86_64
│ │ │ ├── mod.rs
│ │ │ ├── regs.rs
│ │ │ ├── msr.rs
│ │ │ ├── vmx
│ │ │ ├── instructions.rs
│ │ │ ├── ept.rs
│ │ │ ├── mod.rs
│ │ │ ├── definitions.rs
│ │ │ ├── structs.rs
│ │ │ ├── vcpu.rs
│ │ │ └── vmcs.rs
│ │ │ └── lapic.rs
│ ├── hal.rs
│ ├── error.rs
│ ├── lib.rs
│ └── mm
│ │ ├── mod.rs
│ │ └── page_table.rs
└── Cargo.toml
├── rust-toolchain.toml
├── .github
└── workflows
│ └── build.yml
└── README.md
/guest/bios/.gitignore:
--------------------------------------------------------------------------------
1 | out/
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .vscode
3 | .DS_Store
4 | rvm/Cargo.lock
5 | !hypervisor/Cargo.lock
6 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "guest/nimbos"]
2 | path = guest/nimbos
3 | url = https://github.com/equation314/nimbos.git
4 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/mod.rs:
--------------------------------------------------------------------------------
1 | cfg_if::cfg_if! {
2 | if #[cfg(target_arch = "x86_64")] {
3 | mod x86_64;
4 | pub use self::x86_64::*;
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/hypervisor/src/lang_items.rs:
--------------------------------------------------------------------------------
1 | use core::panic::PanicInfo;
2 |
3 | #[panic_handler]
4 | fn panic(info: &PanicInfo) -> ! {
5 | error!("{}", info);
6 | loop {}
7 | }
8 |
--------------------------------------------------------------------------------
/rvm/src/arch/mod.rs:
--------------------------------------------------------------------------------
1 | //! Architecture dependent structures.
2 |
3 | cfg_if::cfg_if! {
4 | if #[cfg(target_arch = "x86_64")] {
5 | mod x86_64;
6 | pub use self::x86_64::*;
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/hypervisor/src/mm/mod.rs:
--------------------------------------------------------------------------------
1 | mod heap;
2 |
3 | pub mod address;
4 | pub mod frame;
5 |
6 | pub const PAGE_SIZE: usize = 0x1000;
7 |
8 | pub fn init_heap_early() {
9 | heap::init();
10 | }
11 |
12 | pub fn init() {
13 | frame::init();
14 | }
15 |
--------------------------------------------------------------------------------
/rust-toolchain.toml:
--------------------------------------------------------------------------------
1 | [toolchain]
2 | profile = "minimal"
3 | # use the nightly version of the last stable toolchain, see
4 | channel = "nightly-2022-11-03"
5 | components = ["rust-src", "llvm-tools-preview", "rustfmt", "clippy"]
6 |
--------------------------------------------------------------------------------
/hypervisor/src/timer.rs:
--------------------------------------------------------------------------------
1 | use core::time::Duration;
2 |
3 | use crate::arch::timer;
4 |
5 | pub type TimeValue = Duration;
6 |
7 | pub fn current_time() -> TimeValue {
8 | TimeValue::from_nanos(timer::ticks_to_nanos(timer::current_ticks()))
9 | }
10 |
--------------------------------------------------------------------------------
/guest/bios/rvm-bios.lds:
--------------------------------------------------------------------------------
1 | OUTPUT_ARCH(i386)
2 |
3 | BASE_ADDRESS = 0x8000;
4 |
5 | ENTRY(entry16)
6 | SECTIONS
7 | {
8 | . = BASE_ADDRESS;
9 | .text : {
10 | *(.text .text.*)
11 | }
12 |
13 | /DISCARD/ : {
14 | *(.eh_frame) *(.eh_frame_hdr)
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/mod.rs:
--------------------------------------------------------------------------------
1 | mod boot;
2 | mod gdt;
3 | mod idt;
4 | mod lapic;
5 | mod trap;
6 |
7 | pub mod instructions;
8 | pub mod timer;
9 | pub mod uart16550;
10 |
11 | pub use trap::handle_irq;
12 | pub use uart16550 as uart;
13 |
14 | pub fn init_early() {
15 | uart::init();
16 | }
17 |
18 | pub fn init() {
19 | gdt::init();
20 | idt::init();
21 | lapic::init();
22 | timer::init();
23 | }
24 |
--------------------------------------------------------------------------------
/hypervisor/src/config.rs:
--------------------------------------------------------------------------------
1 | pub const PHYS_VIRT_OFFSET: usize = 0xffff_ff80_0000_0000;
2 |
3 | pub const BOOT_KERNEL_STACK_SIZE: usize = 4096 * 4; // 16K
4 | pub const KERNEL_HEAP_SIZE: usize = 0x40_0000; // 4M
5 |
6 | pub const PHYS_MEMORY_BASE: usize = 0;
7 | pub const PHYS_MEMORY_SIZE: usize = 0x400_0000; // 64M
8 | pub const PHYS_MEMORY_END: usize = PHYS_MEMORY_BASE + PHYS_MEMORY_SIZE;
9 |
10 | pub const TICKS_PER_SEC: u64 = 100;
11 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/gconfig.rs:
--------------------------------------------------------------------------------
1 | use rvm::{GuestPhysAddr, HostPhysAddr};
2 |
3 | pub const BIOS_PADDR: HostPhysAddr = 0x400_0000;
4 | pub const BIOS_SIZE: usize = 0x1000;
5 |
6 | pub const GUEST_IMAGE_PADDR: HostPhysAddr = 0x400_1000;
7 | pub const GUEST_IMAGE_SIZE: usize = 0x10_0000; // 1M
8 |
9 | pub const GUEST_PHYS_MEMORY_BASE: GuestPhysAddr = 0;
10 | pub const BIOS_ENTRY: GuestPhysAddr = 0x8000;
11 | pub const GUEST_ENTRY: GuestPhysAddr = 0x20_0000;
12 | pub const GUEST_PHYS_MEMORY_SIZE: usize = 0x100_0000; // 16M
13 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/mod.rs:
--------------------------------------------------------------------------------
1 | mod lapic;
2 | pub(crate) mod msr;
3 |
4 | #[macro_use]
5 | pub(crate) mod regs;
6 |
7 | cfg_if::cfg_if! {
8 | if #[cfg(feature = "vmx")] {
9 | mod vmx;
10 | use vmx as vender;
11 | pub use vmx::{VmxExitInfo, VmxExitReason, VmxInterruptInfo, VmxIoExitInfo};
12 | }
13 | }
14 |
15 | pub(crate) use vender::{has_hardware_support, ArchPerCpuState};
16 |
17 | pub use lapic::ApicTimer;
18 | pub use regs::GeneralRegisters;
19 | pub use vender::{NestedPageTable, RvmVcpu};
20 |
--------------------------------------------------------------------------------
/rvm/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "rvm"
3 | version = "0.1.0"
4 | edition = "2021"
5 | authors = ["Yuekai Jia "]
6 |
7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8 |
9 | [features]
10 | default = ["vmx"]
11 | vmx = []
12 |
13 | [dependencies]
14 | log = "0.4"
15 | cfg-if = "1.0"
16 | bitflags = "1.3"
17 | bit_field = "0.10"
18 | numeric-enum-macro = "0.2"
19 |
20 | [target.'cfg(target_arch = "x86_64")'.dependencies]
21 | x86 = "0.52"
22 | x86_64 = "0.14"
23 | raw-cpuid = "10.6"
24 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/instructions.rs:
--------------------------------------------------------------------------------
1 | #![allow(dead_code)]
2 |
3 | use core::arch::asm;
4 |
5 | use x86_64::registers::{rflags, rflags::RFlags};
6 |
7 | #[inline]
8 | pub fn enable_irqs() {
9 | unsafe { asm!("sti") };
10 | }
11 |
12 | #[inline]
13 | pub fn disable_irqs() {
14 | unsafe { asm!("cli") };
15 | }
16 |
17 | #[inline]
18 | pub fn irqs_disabled() -> bool {
19 | !rflags::read().contains(RFlags::INTERRUPT_FLAG)
20 | }
21 |
22 | #[inline]
23 | pub fn wait_for_ints() {
24 | if !irqs_disabled() {
25 | x86_64::instructions::hlt();
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/hypervisor/x86_64.json:
--------------------------------------------------------------------------------
1 | {
2 | "arch": "x86_64",
3 | "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128",
4 | "disable-redzone": true,
5 | "executables": true,
6 | "features": "-mmx,-sse,+soft-float",
7 | "linker": "rust-lld",
8 | "linker-flavor": "ld.lld",
9 | "llvm-target": "x86_64-unknown-none",
10 | "max-atomic-width": 64,
11 | "panic-strategy": "abort",
12 | "pre-link-args": {
13 | "ld.lld": [
14 | "-Tlinker.lds"
15 | ]
16 | },
17 | "target-pointer-width": "64"
18 | }
19 |
--------------------------------------------------------------------------------
/hypervisor/src/mm/address.rs:
--------------------------------------------------------------------------------
1 | use super::PAGE_SIZE;
2 | use crate::config::PHYS_VIRT_OFFSET;
3 |
4 | pub(super) type PhysAddr = usize;
5 | pub(super) type VirtAddr = usize;
6 |
7 | pub const fn phys_to_virt(paddr: PhysAddr) -> VirtAddr {
8 | paddr + PHYS_VIRT_OFFSET
9 | }
10 |
11 | pub const fn virt_to_phys(vaddr: VirtAddr) -> PhysAddr {
12 | vaddr - PHYS_VIRT_OFFSET
13 | }
14 |
15 | pub const fn align_down(addr: usize) -> usize {
16 | addr & !(PAGE_SIZE - 1)
17 | }
18 |
19 | pub const fn align_up(addr: usize) -> usize {
20 | (addr + PAGE_SIZE - 1) & !(PAGE_SIZE - 1)
21 | }
22 |
23 | pub const fn is_aligned(addr: usize) -> bool {
24 | (addr & (PAGE_SIZE - 1)) == 0
25 | }
26 |
--------------------------------------------------------------------------------
/hypervisor/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "rvm-hypervisor"
3 | version = "0.1.0"
4 | edition = "2021"
5 | authors = ["Yuekai Jia "]
6 |
7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
8 |
9 | [dependencies]
10 | log = "0.4"
11 | spin = "0.9"
12 | cfg-if = "1.0"
13 | bitflags = "1.3"
14 | buddy_system_allocator = "0.8"
15 | lazy_static = { version = "1.4.0", features = ["spin_no_std"] }
16 | bitmap-allocator = { git = "https://github.com/rcore-os/bitmap-allocator", rev = "88e871a" }
17 | rvm = { path = "../rvm" }
18 |
19 | [target.'cfg(target_arch = "x86_64")'.dependencies]
20 | x86 = "0.52"
21 | x86_64 = "0.14"
22 | x2apic = "0.4"
23 | raw-cpuid = "10.6"
24 |
25 | [profile.release]
26 | lto = true
27 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/device_emu/i8259_pic.rs:
--------------------------------------------------------------------------------
1 | //! Emulated Intel 8259 Programmable Interrupt Controller. (ref: https://wiki.osdev.org/8259_PIC)
2 |
3 | use super::PortIoDevice;
4 | use rvm::{RvmError, RvmResult};
5 |
6 | pub struct I8259Pic {
7 | port_base: u16,
8 | }
9 |
10 | impl PortIoDevice for I8259Pic {
11 | fn port_range(&self) -> core::ops::Range {
12 | self.port_base..self.port_base + 2
13 | }
14 |
15 | fn read(&self, _port: u16, _access_size: u8) -> RvmResult {
16 | Err(RvmError::Unsupported) // report error for read
17 | }
18 |
19 | fn write(&self, _port: u16, _access_size: u8, _value: u32) -> RvmResult {
20 | Ok(()) // ignore write
21 | }
22 | }
23 |
24 | impl I8259Pic {
25 | pub const fn new(port_base: u16) -> Self {
26 | Self { port_base }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/guest/bios/Makefile:
--------------------------------------------------------------------------------
1 | OUT ?= out
2 |
3 | SRC := boot16.S
4 | ldscript := rvm-bios.lds
5 | target := $(OUT)/rvm-bios
6 | target-obj := $(target).o
7 | target-elf := $(target).elf
8 | target-bin := $(target).bin
9 | target-disasm := $(target).asm
10 |
11 | AS ?= as
12 | LD ?= ld
13 | OBJCOPY ?= objcopy
14 | OBJDUMP ?= objdump
15 |
16 | all: $(OUT) $(target).bin
17 |
18 | disasm:
19 | $(OBJDUMP) -d -m i8086 -M intel $(target).elf | less
20 |
21 | $(OUT):
22 | mkdir -p $(OUT)
23 |
24 | $(target-obj): $(SRC)
25 | $(AS) --32 -msyntax=intel -mnaked-reg $< -o $@
26 |
27 | $(target-elf): $(target-obj) $(ldscript)
28 | $(LD) -T$(ldscript) $< -o $@
29 | $(OBJDUMP) -d -m i8086 -M intel $@ > $(target-disasm)
30 |
31 | $(target-bin): $(target-elf)
32 | $(OBJCOPY) $< --strip-all -O binary $@
33 |
34 | clean:
35 | rm -rf $(OUT)
36 |
37 | .PHONY: all disasm clean
38 |
--------------------------------------------------------------------------------
/hypervisor/src/mm/heap.rs:
--------------------------------------------------------------------------------
1 | use buddy_system_allocator::LockedHeap;
2 | use core::{alloc::Layout, mem::size_of};
3 |
4 | use crate::config::KERNEL_HEAP_SIZE;
5 |
6 | #[global_allocator]
7 | static HEAP_ALLOCATOR: LockedHeap<32> = LockedHeap::empty();
8 |
9 | #[alloc_error_handler]
10 | fn handle_alloc_error(layout: Layout) -> ! {
11 | panic!("Heap allocation error, layout = {:?}", layout);
12 | }
13 |
14 | static mut HEAP_SPACE: [u64; KERNEL_HEAP_SIZE / size_of::()] =
15 | [0; KERNEL_HEAP_SIZE / size_of::()];
16 |
17 | pub(super) fn init() {
18 | let heap_start = unsafe { HEAP_SPACE.as_ptr() as usize };
19 | println!(
20 | "Initializing heap at: [{:#x}, {:#x})",
21 | heap_start,
22 | heap_start + KERNEL_HEAP_SIZE
23 | );
24 | unsafe { HEAP_ALLOCATOR.lock().init(heap_start, KERNEL_HEAP_SIZE) }
25 | }
26 |
--------------------------------------------------------------------------------
/rvm/src/hal.rs:
--------------------------------------------------------------------------------
1 | use crate::{HostPhysAddr, HostVirtAddr};
2 |
3 | /// The interfaces which the underlying software (kernel or hypervisor) must implement.
4 | pub trait RvmHal: Sized {
5 | /// Allocates a 4K-sized contiguous physical page, returns its physical address.
6 | fn alloc_page() -> Option;
7 | /// Deallocates the given physical page.
8 | fn dealloc_page(paddr: HostPhysAddr);
9 | /// Converts a physical address to a virtual address which can access.
10 | fn phys_to_virt(paddr: HostPhysAddr) -> HostVirtAddr;
11 | /// Converts a virtual address to the corresponding physical address.
12 | fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr;
13 | /// VM-Exit handler.
14 | fn vmexit_handler(vcpu: &mut crate::RvmVcpu);
15 | /// Current time in nanoseconds.
16 | fn current_time_nanos() -> u64;
17 | }
18 |
--------------------------------------------------------------------------------
/rvm/src/error.rs:
--------------------------------------------------------------------------------
1 | /// The error type for RVM operations.
2 | #[derive(Debug)]
3 | pub enum RvmError {
4 | AlreadyExists,
5 | BadState,
6 | InvalidParam,
7 | OutOfMemory,
8 | ResourceBusy,
9 | Unsupported,
10 | }
11 |
12 | /// A [`Result`] type with [`RvmError`] as the error type.
13 | pub type RvmResult = Result;
14 |
15 | macro_rules! rvm_err_type {
16 | ($err: ident) => {{
17 | use $crate::error::RvmError::*;
18 | warn!("[RvmError::{:?}]", $err);
19 | $err
20 | }};
21 | ($err: ident, $msg: expr) => {{
22 | use $crate::error::RvmError::*;
23 | warn!("[RvmError::{:?}] {}", $err, $msg);
24 | $err
25 | }};
26 | }
27 |
28 | macro_rules! rvm_err {
29 | ($err: ident) => {
30 | Err(rvm_err_type!($err))
31 | };
32 | ($err: ident, $msg: expr) => {
33 | Err(rvm_err_type!($err, $msg))
34 | };
35 | }
36 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/hal.rs:
--------------------------------------------------------------------------------
1 | use rvm::{HostPhysAddr, HostVirtAddr, RvmHal, RvmVcpu};
2 |
3 | use super::vmexit;
4 | use crate::arch::timer;
5 | use crate::mm::{address, frame};
6 |
7 | pub struct RvmHalImpl;
8 |
9 | impl RvmHal for RvmHalImpl {
10 | fn alloc_page() -> Option {
11 | unsafe { frame::alloc_page() }
12 | }
13 |
14 | fn dealloc_page(paddr: HostPhysAddr) {
15 | unsafe { frame::dealloc_page(paddr) }
16 | }
17 |
18 | fn phys_to_virt(paddr: HostPhysAddr) -> HostVirtAddr {
19 | address::phys_to_virt(paddr)
20 | }
21 |
22 | fn virt_to_phys(vaddr: HostVirtAddr) -> HostPhysAddr {
23 | address::virt_to_phys(vaddr)
24 | }
25 |
26 | fn vmexit_handler(vcpu: &mut RvmVcpu) {
27 | vmexit::vmexit_handler(vcpu).unwrap()
28 | }
29 |
30 | fn current_time_nanos() -> u64 {
31 | timer::ticks_to_nanos(timer::current_ticks())
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/boot.rs:
--------------------------------------------------------------------------------
1 | use core::arch::global_asm;
2 |
3 | use x86_64::registers::control::{Cr0Flags, Cr4Flags};
4 | use x86_64::registers::model_specific::EferFlags;
5 |
6 | use crate::config::{BOOT_KERNEL_STACK_SIZE, PHYS_VIRT_OFFSET};
7 |
8 | const CR0: u64 = Cr0Flags::PROTECTED_MODE_ENABLE.bits()
9 | | Cr0Flags::MONITOR_COPROCESSOR.bits()
10 | | Cr0Flags::TASK_SWITCHED.bits()
11 | | Cr0Flags::NUMERIC_ERROR.bits()
12 | | Cr0Flags::WRITE_PROTECT.bits()
13 | | Cr0Flags::PAGING.bits();
14 | const CR4: u64 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION.bits() | Cr4Flags::PAGE_GLOBAL.bits();
15 | const EFER: u64 = EferFlags::LONG_MODE_ENABLE.bits() | EferFlags::NO_EXECUTE_ENABLE.bits();
16 |
17 | global_asm!(
18 | include_str!("multiboot.S"),
19 | main_entry = sym crate::main,
20 | offset = const PHYS_VIRT_OFFSET,
21 | boot_stack_size = const BOOT_KERNEL_STACK_SIZE,
22 | cr0 = const CR0,
23 | cr4 = const CR4,
24 | efer_msr = const x86::msr::IA32_EFER,
25 | efer = const EFER,
26 | );
27 |
--------------------------------------------------------------------------------
/hypervisor/linker.lds:
--------------------------------------------------------------------------------
1 | OUTPUT_ARCH(x86_64)
2 |
3 | BASE_ADDRESS = 0xffffff8000200000;
4 |
5 | ENTRY(_start)
6 | SECTIONS
7 | {
8 | . = BASE_ADDRESS;
9 | skernel = .;
10 |
11 | .text : {
12 | stext = .;
13 | *(.text.boot)
14 | *(.text .text.*)
15 | . = ALIGN(4K);
16 | etext = .;
17 | }
18 |
19 | .rodata : {
20 | srodata = .;
21 | *(.rodata .rodata.*)
22 | *(.srodata .srodata.*)
23 | . = ALIGN(4K);
24 | erodata = .;
25 | }
26 |
27 | .data : {
28 | sdata = .;
29 | *(.data.boot_page_table)
30 | *(.data .data.*)
31 | *(.sdata .sdata.*)
32 | *(.got .got.*)
33 | . = ALIGN(4K);
34 | edata = .;
35 | }
36 |
37 | .bss : {
38 | boot_stack = .;
39 | *(.bss.stack)
40 | . = ALIGN(4K);
41 | boot_stack_top = .;
42 |
43 | sbss = .;
44 | *(.bss .bss.*)
45 | *(.sbss .sbss.*)
46 | . = ALIGN(4K);
47 | ebss = .;
48 | }
49 |
50 | ekernel = .;
51 |
52 | /DISCARD/ : {
53 | *(.eh_frame) *(.eh_frame_hdr)
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/lapic.rs:
--------------------------------------------------------------------------------
1 | use x2apic::lapic::{LocalApic, LocalApicBuilder};
2 | use x86_64::instructions::port::Port;
3 |
4 | use self::vectors::*;
5 |
6 | pub mod vectors {
7 | pub const APIC_TIMER_VECTOR: u8 = 0xf0;
8 | pub const APIC_SPURIOUS_VECTOR: u8 = 0xf1;
9 | pub const APIC_ERROR_VECTOR: u8 = 0xf2;
10 | }
11 |
12 | static mut LOCAL_APIC: Option = None;
13 |
14 | pub fn local_apic<'a>() -> &'a mut LocalApic {
15 | // It's safe as LAPIC is per-cpu.
16 | unsafe { LOCAL_APIC.as_mut().unwrap() }
17 | }
18 |
19 | pub fn init() {
20 | println!("Initializing Local APIC...");
21 |
22 | unsafe {
23 | // Disable 8259A interrupt controllers
24 | Port::::new(0x20).write(0xff);
25 | Port::::new(0xA0).write(0xff);
26 | }
27 |
28 | let mut lapic = LocalApicBuilder::new()
29 | .timer_vector(APIC_TIMER_VECTOR as _)
30 | .error_vector(APIC_ERROR_VECTOR as _)
31 | .spurious_vector(APIC_SPURIOUS_VECTOR as _)
32 | .build()
33 | .unwrap();
34 | unsafe {
35 | lapic.enable();
36 | LOCAL_APIC = Some(lapic);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/timer.rs:
--------------------------------------------------------------------------------
1 | use raw_cpuid::CpuId;
2 | use x2apic::lapic::{TimerDivide, TimerMode};
3 |
4 | use crate::config::TICKS_PER_SEC;
5 |
6 | use super::lapic::local_apic;
7 |
8 | const LAPIC_TICKS_PER_SEC: u64 = 1_000_000_000; // TODO: need to calibrate
9 |
10 | static mut CPU_FREQ_MHZ: u64 = 4_000;
11 |
12 | pub fn current_ticks() -> u64 {
13 | unsafe { core::arch::x86_64::_rdtsc() }
14 | }
15 |
16 | pub fn ticks_to_nanos(ticks: u64) -> u64 {
17 | ticks * 1_000 / unsafe { CPU_FREQ_MHZ }
18 | }
19 |
20 | pub fn init() {
21 | if let Some(freq) = CpuId::new()
22 | .get_processor_frequency_info()
23 | .map(|info| info.processor_base_frequency())
24 | {
25 | if freq > 0 {
26 | println!("Got TSC frequency by CPUID: {} MHz", freq);
27 | unsafe { CPU_FREQ_MHZ = freq as u64 }
28 | }
29 | }
30 |
31 | let lapic = local_apic();
32 | unsafe {
33 | lapic.set_timer_mode(TimerMode::Periodic);
34 | lapic.set_timer_divide(TimerDivide::Div256); // indeed it is Div1, the name is confusing.
35 | lapic.set_timer_initial((LAPIC_TICKS_PER_SEC / TICKS_PER_SEC) as u32);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/idt.rs:
--------------------------------------------------------------------------------
1 | use x86_64::structures::idt::{Entry, HandlerFunc, InterruptDescriptorTable};
2 |
3 | const NUM_INT: usize = 256;
4 |
5 | lazy_static::lazy_static! {
6 | static ref IDT: IdtStruct = IdtStruct::new();
7 | }
8 |
9 | struct IdtStruct {
10 | table: InterruptDescriptorTable,
11 | }
12 |
13 | impl IdtStruct {
14 | fn new() -> Self {
15 | extern "C" {
16 | #[link_name = "trap_handler_table"]
17 | static ENTRIES: [extern "C" fn(); NUM_INT];
18 | }
19 | let mut idt = Self {
20 | table: InterruptDescriptorTable::new(),
21 | };
22 |
23 | let entries = unsafe {
24 | core::slice::from_raw_parts_mut(
25 | &mut idt.table as *mut _ as *mut Entry,
26 | NUM_INT,
27 | )
28 | };
29 | for i in 0..NUM_INT {
30 | entries[i].set_handler_fn(unsafe { core::mem::transmute(ENTRIES[i]) });
31 | }
32 | idt
33 | }
34 |
35 | fn load(&'static self) {
36 | self.table.load();
37 | }
38 | }
39 |
40 | pub fn init() {
41 | println!("Initializing IDT...");
42 | lazy_static::initialize(&IDT);
43 | IDT.load();
44 | }
45 |
--------------------------------------------------------------------------------
/guest/bios/boot16.S:
--------------------------------------------------------------------------------
1 | .section .text
2 | .code16
3 | .global entry16
4 | entry16:
5 | cli
6 | cld
7 |
8 | xor ax, ax
9 | mov ds, ax
10 | mov es, ax
11 | mov ss, ax
12 |
13 | lgdt [prot_gdt_desc]
14 | mov eax, cr0
15 | or eax, 0x1
16 | mov cr0, eax
17 |
18 | ljmp 0x8, entry32
19 |
20 | .code32
21 | .global entry32
22 | entry32:
23 | mov ax, 0x10
24 | mov ds, ax
25 | mov es, ax
26 | mov ss, ax
27 | mov fs, ax
28 | mov gs, ax
29 |
30 | mov esp, 0x7000 # temporary stack
31 | mov ecx, 0x200000 # kernel entry
32 | mov eax, 0x1BADB002 # multiboot magic
33 | mov ebx, 0 # multiboot information (unsupported)
34 | jmp ecx
35 |
36 | .balign 16
37 | prot_gdt:
38 | .quad 0x0000000000000000 # 0x00: null
39 | .quad 0x00cf9b000000ffff # 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k)
40 | .quad 0x00cf93000000ffff # 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k)
41 |
42 | prot_gdt_desc:
43 | .short prot_gdt_desc - prot_gdt - 1 # limit
44 | .long prot_gdt # base
45 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/device_emu/mod.rs:
--------------------------------------------------------------------------------
1 | mod i8259_pic;
2 | mod lapic;
3 | mod uart16550;
4 |
5 | use alloc::{sync::Arc, vec, vec::Vec};
6 |
7 | pub use self::lapic::VirtLocalApic;
8 |
9 | pub trait PortIoDevice: Send + Sync {
10 | fn port_range(&self) -> core::ops::Range;
11 | fn read(&self, port: u16, access_size: u8) -> rvm::RvmResult;
12 | fn write(&self, port: u16, access_size: u8, value: u32) -> rvm::RvmResult;
13 | }
14 |
15 | pub struct VirtDeviceList {
16 | port_io_devices: Vec>,
17 | }
18 |
19 | impl VirtDeviceList {
20 | pub fn find_port_io_device(&self, port: u16) -> Option<&Arc> {
21 | self.port_io_devices
22 | .iter()
23 | .find(|dev| dev.port_range().contains(&port))
24 | }
25 | }
26 |
27 | lazy_static::lazy_static! {
28 | static ref VIRT_DEVICES : VirtDeviceList = VirtDeviceList {
29 | port_io_devices: vec![
30 | Arc::new(uart16550::Uart16550::new(0x3f8)), // COM1
31 | Arc::new(i8259_pic::I8259Pic::new(0x20)), // PIC1
32 | Arc::new(i8259_pic::I8259Pic::new(0xA0)), // PIC2
33 | ],
34 | };
35 | }
36 |
37 | pub fn all_virt_devices() -> &'static VirtDeviceList {
38 | &VIRT_DEVICES
39 | }
40 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build CI
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | clippy:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | fail-fast: false
10 | matrix:
11 | arch: [x86_64]
12 | steps:
13 | - uses: actions/checkout@v2
14 | - uses: actions-rs/toolchain@v1
15 | with:
16 | profile: minimal
17 | toolchain: nightly-2022-11-03
18 | override: true
19 | components: rust-src, clippy, rustfmt
20 | - name: Clippy
21 | run: make -C hypervisor clippy ARCH=${{ matrix.arch }}
22 | - name: Check code format
23 | run: cd hypervisor && cargo fmt -- --check
24 |
25 | build:
26 | runs-on: ${{ matrix.os }}
27 | strategy:
28 | fail-fast: false
29 | matrix:
30 | os: [ubuntu-latest]
31 | arch: [x86_64]
32 | steps:
33 | - uses: actions/checkout@v2
34 | - uses: actions-rs/toolchain@v1
35 | with:
36 | profile: minimal
37 | toolchain: nightly-2022-11-03
38 | components: rust-src, llvm-tools-preview
39 | - uses: actions-rs/install@v0.1
40 | with:
41 | crate: cargo-binutils
42 | version: latest
43 | use-tool-cache: true
44 | - name: Build hypervisor
45 | run: make -C hypervisor ARCH=${{ matrix.arch }}
46 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/regs.rs:
--------------------------------------------------------------------------------
1 | /// General-Purpose Registers for 64-bit x86 architecture.
2 | #[repr(C)]
3 | #[derive(Debug, Default, Clone)]
4 | pub struct GeneralRegisters {
5 | pub rax: u64,
6 | pub rcx: u64,
7 | pub rdx: u64,
8 | pub rbx: u64,
9 | _unused_rsp: u64,
10 | pub rbp: u64,
11 | pub rsi: u64,
12 | pub rdi: u64,
13 | pub r8: u64,
14 | pub r9: u64,
15 | pub r10: u64,
16 | pub r11: u64,
17 | pub r12: u64,
18 | pub r13: u64,
19 | pub r14: u64,
20 | pub r15: u64,
21 | }
22 |
23 | macro_rules! save_regs_to_stack {
24 | () => {
25 | "
26 | push r15
27 | push r14
28 | push r13
29 | push r12
30 | push r11
31 | push r10
32 | push r9
33 | push r8
34 | push rdi
35 | push rsi
36 | push rbp
37 | sub rsp, 8
38 | push rbx
39 | push rdx
40 | push rcx
41 | push rax"
42 | };
43 | }
44 |
45 | macro_rules! restore_regs_from_stack {
46 | () => {
47 | "
48 | pop rax
49 | pop rcx
50 | pop rdx
51 | pop rbx
52 | add rsp, 8
53 | pop rbp
54 | pop rsi
55 | pop rdi
56 | pop r8
57 | pop r9
58 | pop r10
59 | pop r11
60 | pop r12
61 | pop r13
62 | pop r14
63 | pop r15"
64 | };
65 | }
66 |
--------------------------------------------------------------------------------
/hypervisor/Makefile:
--------------------------------------------------------------------------------
1 | # Arguments
2 | ARCH ?= x86_64
3 | MODE ?= release
4 | LOG ?= warn
5 |
6 | BIOS_IMG ?= ../guest/bios/out/rvm-bios.bin
7 | GUEST_IMG ?= ../guest/nimbos/kernel/target/x86_64/release/nimbos.bin
8 |
9 | export ARCH
10 | export MODE
11 | export LOG
12 |
13 | # Paths
14 | target_elf := target/$(ARCH)/$(MODE)/rvm-hypervisor
15 | target_bin := $(target_elf).bin
16 |
17 | build_args := --target $(ARCH).json -Zbuild-std=core,alloc -Zbuild-std-features=compiler-builtins-mem
18 | ifeq ($(MODE), release)
19 | build_args += --release
20 | endif
21 |
22 | # Binutils
23 | OBJDUMP := rust-objdump -d --print-imm-hex --x86-asm-syntax=intel
24 | OBJCOPY := rust-objcopy --binary-architecture=$(ARCH)
25 | GDB := gdb-multiarch
26 |
27 | # QEMU
28 | qemu := qemu-system-$(ARCH)
29 | qemu_args := -nographic -m 128M
30 |
31 | qemu_args += -cpu host,+x2apic,+vmx -accel kvm \
32 | -device loader,addr=0x4000000,file=$(BIOS_IMG),force-raw=on \
33 | -device loader,addr=0x4001000,file=$(GUEST_IMG),force-raw=on
34 |
35 | ifeq ($(ARCH), x86_64)
36 | qemu_args += \
37 | -machine q35 \
38 | -serial mon:stdio \
39 | -kernel $(target_elf)
40 | endif
41 |
42 | build: $(target_bin)
43 |
44 | $(target_bin): elf
45 | @$(OBJCOPY) $(target_elf) --strip-all -O binary $@
46 |
47 | elf:
48 | @echo Arch: $(ARCH)
49 | cargo build $(build_args)
50 |
51 | clean:
52 | cargo clean
53 |
54 | clippy:
55 | cargo clippy $(build_args)
56 |
57 | fmt:
58 | cargo fmt
59 |
60 | disasm:
61 | @$(OBJDUMP) $(target_elf) | less
62 |
63 | run: build justrun
64 |
65 | justrun:
66 | $(qemu) $(qemu_args)
67 |
68 | .PHONY: build elf clean clippy disasm run justrun
69 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/trap.S:
--------------------------------------------------------------------------------
1 | .equ NUM_INT, 256
2 |
3 | .altmacro
4 | .macro DEF_HANDLER, i
5 | .Ltrap_handler_\i:
6 | .if \i == 8 || (\i >= 10 && \i <= 14) || \i == 17
7 | // error code pushed by CPU
8 | push \i // interrupt vector
9 | jmp .Ltrap_common
10 | .else
11 | push 0 // fill in error code in TrapFrame
12 | push \i // interrupt vector
13 | jmp .Ltrap_common
14 | .endif
15 | .endm
16 |
17 | .macro DEF_TABLE_ENTRY, i
18 | .quad .Ltrap_handler_\i
19 | .endm
20 |
21 | .section .text
22 | _trap_handlers:
23 | .set i, 0
24 | .rept NUM_INT
25 | DEF_HANDLER %i
26 | .set i, i + 1
27 | .endr
28 |
29 | .Ltrap_common:
30 | push r15
31 | push r14
32 | push r13
33 | push r12
34 | push r11
35 | push r10
36 | push r9
37 | push r8
38 | push rdi
39 | push rsi
40 | push rbp
41 | push rbx
42 | push rdx
43 | push rcx
44 | push rax
45 |
46 | mov rdi, rsp
47 | call x86_trap_handler
48 |
49 | pop rax
50 | pop rcx
51 | pop rdx
52 | pop rbx
53 | pop rbp
54 | pop rsi
55 | pop rdi
56 | pop r8
57 | pop r9
58 | pop r10
59 | pop r11
60 | pop r12
61 | pop r13
62 | pop r14
63 | pop r15
64 |
65 | add rsp, 16 // pop vector, error_code
66 | iretq
67 |
68 | .section .rodata
69 | .global trap_handler_table
70 | trap_handler_table:
71 | .set i, 0
72 | .rept NUM_INT
73 | DEF_TABLE_ENTRY %i
74 | .set i, i + 1
75 | .endr
76 |
--------------------------------------------------------------------------------
/hypervisor/src/main.rs:
--------------------------------------------------------------------------------
1 | #![no_std]
2 | #![no_main]
3 | #![feature(asm_const)]
4 | #![feature(panic_info_message, alloc_error_handler)]
5 |
6 | #[macro_use]
7 | extern crate log;
8 |
9 | extern crate alloc;
10 |
11 | #[macro_use]
12 | mod logging;
13 |
14 | mod arch;
15 | mod config;
16 | mod hv;
17 | mod mm;
18 | mod timer;
19 |
20 | #[cfg(not(test))]
21 | mod lang_items;
22 |
23 | use core::sync::atomic::{AtomicBool, Ordering};
24 |
25 | static INIT_OK: AtomicBool = AtomicBool::new(false);
26 |
27 | const LOGO: &str = r"
28 |
29 | RRRRRR VV VV MM MM
30 | RR RR VV VV MMM MMM
31 | RRRRRR VV VV MM MM MM
32 | RR RR VV VV MM MM
33 | RR RR VVV MM MM
34 | ___ ____ ___ ___
35 | |__ \ / __ \ |__ \ |__ \
36 | __/ / / / / / __/ / __/ /
37 | / __/ / /_/ / / __/ / __/
38 | /____/ \____/ /____/ /____/
39 | ";
40 |
41 | fn clear_bss() {
42 | extern "C" {
43 | fn sbss();
44 | fn ebss();
45 | }
46 | unsafe {
47 | core::slice::from_raw_parts_mut(sbss as usize as *mut u8, ebss as usize - sbss as usize)
48 | .fill(0);
49 | }
50 | }
51 |
52 | pub fn init_ok() -> bool {
53 | INIT_OK.load(Ordering::SeqCst)
54 | }
55 |
56 | fn main() -> ! {
57 | clear_bss();
58 | arch::init_early();
59 | println!("{}", LOGO);
60 | println!(
61 | "\
62 | arch = {}\n\
63 | build_mode = {}\n\
64 | log_level = {}\n\
65 | ",
66 | option_env!("ARCH").unwrap_or(""),
67 | option_env!("MODE").unwrap_or(""),
68 | option_env!("LOG").unwrap_or(""),
69 | );
70 |
71 | mm::init_heap_early();
72 | logging::init();
73 | info!("Logging is enabled.");
74 |
75 | arch::init();
76 | mm::init();
77 | INIT_OK.store(true, Ordering::SeqCst);
78 | println!("Initialization completed.\n");
79 |
80 | hv::run();
81 | }
82 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/msr.rs:
--------------------------------------------------------------------------------
1 | use x86::msr::{rdmsr, wrmsr};
2 |
3 | /// X86 model-specific registers. (SDM Vol. 4)
4 | #[repr(u32)]
5 | #[derive(Debug, Copy, Clone)]
6 | #[allow(non_camel_case_types, dead_code)]
7 | pub enum Msr {
8 | IA32_FEATURE_CONTROL = 0x3a,
9 |
10 | IA32_PAT = 0x277,
11 |
12 | IA32_VMX_BASIC = 0x480,
13 | IA32_VMX_PINBASED_CTLS = 0x481,
14 | IA32_VMX_PROCBASED_CTLS = 0x482,
15 | IA32_VMX_EXIT_CTLS = 0x483,
16 | IA32_VMX_ENTRY_CTLS = 0x484,
17 | IA32_VMX_MISC = 0x485,
18 | IA32_VMX_CR0_FIXED0 = 0x486,
19 | IA32_VMX_CR0_FIXED1 = 0x487,
20 | IA32_VMX_CR4_FIXED0 = 0x488,
21 | IA32_VMX_CR4_FIXED1 = 0x489,
22 | IA32_VMX_PROCBASED_CTLS2 = 0x48b,
23 | IA32_VMX_EPT_VPID_CAP = 0x48c,
24 | IA32_VMX_TRUE_PINBASED_CTLS = 0x48d,
25 | IA32_VMX_TRUE_PROCBASED_CTLS = 0x48e,
26 | IA32_VMX_TRUE_EXIT_CTLS = 0x48f,
27 | IA32_VMX_TRUE_ENTRY_CTLS = 0x490,
28 |
29 | IA32_EFER = 0xc000_0080,
30 | IA32_STAR = 0xc000_0081,
31 | IA32_LSTAR = 0xc000_0082,
32 | IA32_CSTAR = 0xc000_0083,
33 | IA32_FMASK = 0xc000_0084,
34 |
35 | IA32_FS_BASE = 0xc000_0100,
36 | IA32_GS_BASE = 0xc000_0101,
37 | IA32_KERNEL_GSBASE = 0xc000_0102,
38 | }
39 |
40 | impl Msr {
41 | /// Read 64 bits msr register.
42 | #[inline(always)]
43 | pub fn read(self) -> u64 {
44 | unsafe { rdmsr(self as _) }
45 | }
46 |
47 | /// Write 64 bits to msr register.
48 | ///
49 | /// # Safety
50 | ///
51 | /// The caller must ensure that this write operation has no unsafe side
52 | /// effects.
53 | #[inline(always)]
54 | pub unsafe fn write(self, value: u64) {
55 | wrmsr(self as _, value)
56 | }
57 | }
58 |
59 | pub(super) trait MsrReadWrite {
60 | const MSR: Msr;
61 |
62 | fn read_raw() -> u64 {
63 | Self::MSR.read()
64 | }
65 |
66 | unsafe fn write_raw(flags: u64) {
67 | Self::MSR.write(flags);
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/hypervisor/src/mm/frame.rs:
--------------------------------------------------------------------------------
1 | use bitmap_allocator::BitAlloc;
2 | use spin::Mutex;
3 |
4 | use super::address::{align_down, align_up, virt_to_phys, PhysAddr};
5 | use super::PAGE_SIZE;
6 | use crate::config::PHYS_MEMORY_END;
7 |
8 | // Support max 1M * 4096 = 1GB memory.
9 | type FrameAlloc = bitmap_allocator::BitAlloc1M;
10 |
11 | static FRAME_ALLOCATOR: Mutex = Mutex::new(FrameAllocator::empty());
12 |
13 | struct FrameAllocator {
14 | base: PhysAddr,
15 | inner: FrameAlloc,
16 | }
17 |
18 | impl FrameAllocator {
19 | const fn empty() -> Self {
20 | Self {
21 | base: 0,
22 | inner: FrameAlloc::DEFAULT,
23 | }
24 | }
25 |
26 | fn init(&mut self, base: PhysAddr, size: usize) {
27 | self.base = align_up(base);
28 | let page_count = align_up(size) / PAGE_SIZE;
29 | self.inner.insert(0..page_count);
30 | }
31 |
32 | unsafe fn alloc(&mut self) -> Option {
33 | let ret = self.inner.alloc().map(|idx| idx * PAGE_SIZE + self.base);
34 | trace!("Allocate frame: {:x?}", ret);
35 | ret
36 | }
37 |
38 | unsafe fn dealloc(&mut self, target: PhysAddr) {
39 | trace!("Deallocate frame: {:x}", target);
40 | self.inner.dealloc((target - self.base) / PAGE_SIZE)
41 | }
42 | }
43 |
44 | pub unsafe fn alloc_page() -> Option {
45 | FRAME_ALLOCATOR.lock().alloc()
46 | }
47 |
48 | pub unsafe fn dealloc_page(paddr: PhysAddr) {
49 | FRAME_ALLOCATOR.lock().dealloc(paddr)
50 | }
51 |
52 | pub(super) fn init() {
53 | extern "C" {
54 | fn ekernel();
55 | }
56 |
57 | let mem_pool_start = align_up(virt_to_phys(ekernel as usize));
58 | let mem_pool_end = align_down(PHYS_MEMORY_END);
59 | let mem_pool_size = mem_pool_end - mem_pool_start;
60 | println!(
61 | "Initializing frame allocator at: [{:#x?}, {:#x?})",
62 | mem_pool_start, mem_pool_end
63 | );
64 | FRAME_ALLOCATOR.lock().init(mem_pool_start, mem_pool_size);
65 | }
66 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/instructions.rs:
--------------------------------------------------------------------------------
1 | use core::arch::asm;
2 | use x86::bits64::rflags::{self, RFlags};
3 | use x86::vmx::{Result, VmFail};
4 |
5 | /// Helper used to extract VMX-specific Result in accordance with
6 | /// conventions described in Intel SDM, Volume 3C, Section 30.2.
7 | // We inline this to provide an obstruction-free path from this function's
8 | // call site to the moment where `rflags::read()` reads RFLAGS. Otherwise it's
9 | // possible for RFLAGS register to be clobbered by a function prologue,
10 | // see https://github.com/gz/rust-x86/pull/50.
11 | #[inline(always)]
12 | fn vmx_capture_status() -> Result<()> {
13 | let flags = rflags::read();
14 |
15 | if flags.contains(RFlags::FLAGS_ZF) {
16 | Err(VmFail::VmFailValid)
17 | } else if flags.contains(RFlags::FLAGS_CF) {
18 | Err(VmFail::VmFailInvalid)
19 | } else {
20 | Ok(())
21 | }
22 | }
23 |
24 | /// INVEPT type. (SDM Vol. 3C, Section 30.3)
25 | #[repr(u64)]
26 | #[derive(Debug)]
27 | #[allow(dead_code)]
28 | pub enum InvEptType {
29 | /// The logical processor invalidates all mappings associated with bits
30 | /// 51:12 of the EPT pointer (EPTP) specified in the INVEPT descriptor.
31 | /// It may invalidate other mappings as well.
32 | SingleContext = 1,
33 | /// The logical processor invalidates mappings associated with all EPTPs.
34 | Global = 2,
35 | }
36 |
37 | /// Invalidate Translations Derived from EPT. (SDM Vol. 3C, Section 30.3)
38 | ///
39 | /// Invalidates mappings in the translation lookaside buffers (TLBs) and
40 | /// paging-structure caches that were derived from extended page tables (EPT).
41 | /// (See Chapter 28, “VMX Support for Address Translation”.) Invalidation is
42 | /// based on the INVEPT type specified in the register operand and the INVEPT
43 | /// descriptor specified in the memory operand.
44 | pub unsafe fn invept(inv_type: InvEptType, eptp: u64) -> Result<()> {
45 | let invept_desc = [eptp, 0];
46 | asm!("invept {0}, [{1}]", in(reg) inv_type as u64, in(reg) &invept_desc);
47 | vmx_capture_status()
48 | }
49 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/gdt.rs:
--------------------------------------------------------------------------------
1 | use x86_64::instructions::tables::{lgdt, load_tss};
2 | use x86_64::registers::segmentation::{Segment, SegmentSelector, CS};
3 | use x86_64::structures::gdt::{Descriptor, DescriptorFlags};
4 | use x86_64::structures::{tss::TaskStateSegment, DescriptorTablePointer};
5 | use x86_64::{addr::VirtAddr, PrivilegeLevel};
6 |
7 | lazy_static::lazy_static! {
8 | static ref TSS: TaskStateSegment = TaskStateSegment::new();
9 | static ref GDT: GdtStruct = GdtStruct::new(&TSS);
10 | }
11 |
12 | struct GdtStruct {
13 | table: [u64; 16],
14 | }
15 |
16 | impl GdtStruct {
17 | pub const KCODE_SELECTOR: SegmentSelector = SegmentSelector::new(1, PrivilegeLevel::Ring0);
18 | pub const _KDATA_SELECTOR: SegmentSelector = SegmentSelector::new(2, PrivilegeLevel::Ring0);
19 | pub const TSS_SELECTOR: SegmentSelector = SegmentSelector::new(3, PrivilegeLevel::Ring0);
20 |
21 | pub fn new(tss: &'static TaskStateSegment) -> Self {
22 | let mut table = [0; 16];
23 | table[1] = DescriptorFlags::KERNEL_CODE64.bits(); // 0x00af9b000000ffff
24 | table[2] = DescriptorFlags::KERNEL_DATA.bits(); // 0x00cf93000000ffff
25 | if let Descriptor::SystemSegment(low, high) = Descriptor::tss_segment(tss) {
26 | table[3] = low;
27 | table[4] = high;
28 | }
29 | Self { table }
30 | }
31 |
32 | fn pointer(&self) -> DescriptorTablePointer {
33 | DescriptorTablePointer {
34 | base: VirtAddr::new(self.table.as_ptr() as u64),
35 | limit: (core::mem::size_of_val(&self.table) - 1) as u16,
36 | }
37 | }
38 |
39 | pub fn load(&'static self) {
40 | unsafe {
41 | lgdt(&self.pointer());
42 | CS::set_reg(GdtStruct::KCODE_SELECTOR);
43 | }
44 | }
45 |
46 | pub fn load_tss(&'static self, selector: SegmentSelector) {
47 | unsafe { load_tss(selector) };
48 | }
49 | }
50 |
51 | pub fn init() {
52 | println!("Initializing GDT...");
53 | lazy_static::initialize(&GDT);
54 | GDT.load();
55 | GDT.load_tss(GdtStruct::TSS_SELECTOR);
56 | }
57 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/trap.rs:
--------------------------------------------------------------------------------
1 | use core::arch::global_asm;
2 |
3 | use x86::{controlregs::cr2, irq::*};
4 |
5 | use super::lapic::{local_apic, vectors::*};
6 |
7 | global_asm!(include_str!("trap.S"));
8 |
9 | const IRQ_VECTOR_START: u8 = 0x20;
10 | const IRQ_VECTOR_END: u8 = 0xff;
11 |
12 | #[repr(C)]
13 | #[derive(Debug, Default, Clone, Copy)]
14 | pub struct TrapFrame {
15 | pub rax: u64,
16 | pub rcx: u64,
17 | pub rdx: u64,
18 | pub rbx: u64,
19 | pub rbp: u64,
20 | pub rsi: u64,
21 | pub rdi: u64,
22 | pub r8: u64,
23 | pub r9: u64,
24 | pub r10: u64,
25 | pub r11: u64,
26 | pub r12: u64,
27 | pub r13: u64,
28 | pub r14: u64,
29 | pub r15: u64,
30 |
31 | // Pushed by 'trap.S'
32 | pub vector: u64,
33 | pub error_code: u64,
34 |
35 | // Pushed by CPU
36 | pub rip: u64,
37 | pub cs: u64,
38 | pub rflags: u64,
39 | pub rsp: u64,
40 | pub ss: u64,
41 | }
42 |
43 | #[no_mangle]
44 | fn x86_trap_handler(tf: &mut TrapFrame) {
45 | trace!("trap {} @ {:#x}: {:#x?}", tf.vector, tf.rip, tf);
46 | match tf.vector as u8 {
47 | PAGE_FAULT_VECTOR => {
48 | panic!(
49 | "Hypervisor Page Fault @ {:#x}, fault_vaddr={:#x}, error_code={:#x}",
50 | tf.rip,
51 | unsafe { cr2() },
52 | tf.error_code,
53 | );
54 | }
55 | GENERAL_PROTECTION_FAULT_VECTOR => {
56 | panic!(
57 | "General Protection Exception @ {:#x}, error_code = {:#x}, kernel killed it.",
58 | tf.rip, tf.error_code,
59 | );
60 | }
61 | IRQ_VECTOR_START..=IRQ_VECTOR_END => handle_irq(tf.vector as u8),
62 | _ => {
63 | panic!(
64 | "Unhandled exception {} (error_code = {:#x}) @ {:#x}:\n{:#x?}",
65 | tf.vector, tf.error_code, tf.rip, tf
66 | );
67 | }
68 | }
69 | }
70 |
71 | pub fn handle_irq(vector: u8) {
72 | match vector {
73 | APIC_TIMER_VECTOR => {
74 | trace!("TIMER");
75 | unsafe { local_apic().end_of_interrupt() };
76 | }
77 | _ => warn!("Unhandled IRQ {}", vector),
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RVM-Tutorial
2 |
3 | Let's write an x86 hypervisor in Rust from scratch!
4 |
5 | ## Features
6 |
7 | * Lightweight enough, only 3K+ LoC
8 | * Supported guest OS: [NimbOS](https://github.com/equation314/nimbos)
9 | * Guest/host memory isolation with nested paging
10 | * Device emulation:
11 | + serial port I/O
12 | + APIC timer
13 | * Currently, only supports single core single vCPU and single guest
14 |
15 | ## Install Build Dependencies
16 |
17 | Install [cargo-binutils](https://github.com/rust-embedded/cargo-binutils) to use `rust-objcopy` and `rust-objdump` tools:
18 |
19 | ```console
20 | $ cargo install cargo-binutils
21 | ```
22 |
23 | Your also need to install [musl-gcc](http://musl.cc/x86_64-linux-musl-cross.tgz) to build guest user applications.
24 |
25 | ## Build Guest OS
26 |
27 | ```console
28 | $ git submodule init && git submodule update
29 | $ cd guest/nimbos/kernel
30 | $ make user
31 | $ make GUEST=on
32 | ```
33 |
34 | ## Build Guest BIOS
35 |
36 | ```console
37 | $ cd guest/bios
38 | $ make
39 | ```
40 |
41 | ## Build & Run Hypervisor
42 |
43 | ```console
44 | $ cd hypervisor
45 | $ make run [LOG=warn|info|debug|trace]
46 | ......
47 | Booting from ROM..
48 |
49 | RRRRRR VV VV MM MM
50 | RR RR VV VV MMM MMM
51 | RRRRRR VV VV MM MM MM
52 | RR RR VV VV MM MM
53 | RR RR VVV MM MM
54 | ___ ____ ___ ___
55 | |__ \ / __ \ |__ \ |__ \
56 | __/ / / / / / __/ / __/ /
57 | / __/ / /_/ / / __/ / __/
58 | /____/ \____/ /____/ /____/
59 |
60 | arch = x86_64
61 | build_mode = release
62 | log_level = info
63 | ......
64 | Running guest...
65 |
66 | NN NN iii bb OOOOO SSSSS
67 | NNN NN mm mm mmmm bb OO OO SS
68 | NN N NN iii mmm mm mm bbbbbb OO OO SSSSS
69 | NN NNN iii mmm mm mm bb bb OO OO SS
70 | NN NN iii mmm mm mm bbbbbb OOOO0 SSSSS
71 | ___ ____ ___ ___
72 | |__ \ / __ \ |__ \ |__ \
73 | __/ / / / / / __/ / __/ /
74 | / __/ / /_/ / / __/ / __/
75 | /____/ \____/ /____/ /____/
76 |
77 | arch = x86_64
78 | platform = rvm-guest-x86_64
79 | build_mode = release
80 | log_level = warn
81 | ......
82 | ```
83 |
84 | ## Documents
85 |
86 | * [in Chinese](https://github.com/equation314/RVM-Tutorial/wiki)
87 |
--------------------------------------------------------------------------------
/rvm/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![no_std]
2 | #![feature(asm_const)]
3 | #![feature(concat_idents)]
4 | #![feature(naked_functions)]
5 |
6 | extern crate alloc;
7 | #[macro_use]
8 | extern crate log;
9 |
10 | #[macro_use]
11 | mod error;
12 | mod hal;
13 | mod mm;
14 |
15 | pub mod arch;
16 |
17 | use arch::ArchPerCpuState;
18 |
19 | pub use arch::{NestedPageTable, RvmVcpu};
20 | pub use error::{RvmError, RvmResult};
21 | pub use hal::RvmHal;
22 | pub use mm::{GuestPhysAddr, GuestVirtAddr, HostPhysAddr, HostVirtAddr};
23 | pub use mm::{Level4PageTable, MemFlags, NestedPageFaultInfo};
24 |
25 | /// Whether the hardware has virtualization support.
26 | pub fn has_hardware_support() -> bool {
27 | arch::has_hardware_support()
28 | }
29 |
30 | /// Host per-CPU states to run the guest. All methods must be called on the corresponding CPU.
31 | pub struct RvmPerCpu {
32 | _cpu_id: usize,
33 | arch: ArchPerCpuState,
34 | }
35 |
36 | impl RvmPerCpu {
37 | /// Create an uninitialized instance.
38 | pub fn new(cpu_id: usize) -> Self {
39 | Self {
40 | _cpu_id: cpu_id,
41 | arch: ArchPerCpuState::new(),
42 | }
43 | }
44 |
45 | /// Whether the current CPU has hardware virtualization enabled.
46 | pub fn is_enabled(&self) -> bool {
47 | self.arch.is_enabled()
48 | }
49 |
50 | /// Enable hardware virtualization on the current CPU.
51 | pub fn hardware_enable(&mut self) -> RvmResult {
52 | self.arch.hardware_enable()
53 | }
54 |
55 | /// Disable hardware virtualization on the current CPU.
56 | pub fn hardware_disable(&mut self) -> RvmResult {
57 | self.arch.hardware_disable()
58 | }
59 |
60 | /// Create a [`RvmVcpu`], set the entry point to `entry`, set the nested
61 | /// page table root to `npt_root`.
62 | pub fn create_vcpu(
63 | &self,
64 | entry: GuestPhysAddr,
65 | npt_root: HostPhysAddr,
66 | ) -> RvmResult> {
67 | if !self.is_enabled() {
68 | rvm_err!(BadState, "virtualization is not enabled")
69 | } else {
70 | RvmVcpu::new(&self.arch, entry, npt_root)
71 | }
72 | }
73 | }
74 |
75 | impl Drop for RvmPerCpu {
76 | fn drop(&mut self) {
77 | if self.is_enabled() {
78 | self.hardware_disable().unwrap();
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/rvm/src/mm/mod.rs:
--------------------------------------------------------------------------------
1 | mod page_table;
2 |
3 | use core::marker::PhantomData;
4 |
5 | use crate::{RvmHal, RvmResult};
6 |
7 | pub use page_table::{GenericPTE, Level4PageTable};
8 |
9 | pub const PAGE_SIZE: usize = 0x1000;
10 |
11 | /// Guest virtual address.
12 | pub type GuestVirtAddr = usize;
13 | /// Guest physical address.
14 | pub type GuestPhysAddr = usize;
15 | /// Host virtual address.
16 | pub type HostVirtAddr = usize;
17 | /// Host physical address.
18 | pub type HostPhysAddr = usize;
19 |
20 | bitflags::bitflags! {
21 | /// Permission and type of a guest physical memory region.
22 | pub struct MemFlags: u64 {
23 | const READ = 1 << 0;
24 | const WRITE = 1 << 1;
25 | const EXECUTE = 1 << 2;
26 | const DEVICE = 1 << 3;
27 | }
28 | }
29 |
30 | /// Information about nested page faults.
31 | #[derive(Debug)]
32 | pub struct NestedPageFaultInfo {
33 | /// Access type that caused the nested page fault.
34 | pub access_flags: MemFlags,
35 | /// Guest physical address that caused the nested page fault.
36 | pub fault_guest_paddr: GuestPhysAddr,
37 | }
38 |
39 | /// A 4K-sized contiguous physical memory page, it will deallocate the page
40 | /// automatically on drop.
41 | #[derive(Debug)]
42 | pub struct PhysFrame {
43 | start_paddr: HostPhysAddr,
44 | _phantom: PhantomData,
45 | }
46 |
47 | impl PhysFrame {
48 | pub fn alloc() -> RvmResult {
49 | let start_paddr = H::alloc_page()
50 | .ok_or_else(|| rvm_err_type!(OutOfMemory, "allocate physical frame failed"))?;
51 | assert_ne!(start_paddr, 0);
52 | debug!("[RVM] allocated PhysFrame({:#x})", start_paddr);
53 | Ok(Self {
54 | start_paddr,
55 | _phantom: PhantomData,
56 | })
57 | }
58 |
59 | pub fn alloc_zero() -> RvmResult {
60 | let mut f = Self::alloc()?;
61 | f.fill(0);
62 | Ok(f)
63 | }
64 |
65 | pub const unsafe fn uninit() -> Self {
66 | Self {
67 | start_paddr: 0,
68 | _phantom: PhantomData,
69 | }
70 | }
71 |
72 | pub fn start_paddr(&self) -> HostPhysAddr {
73 | self.start_paddr
74 | }
75 |
76 | pub fn as_mut_ptr(&self) -> *mut u8 {
77 | H::phys_to_virt(self.start_paddr) as *mut u8
78 | }
79 |
80 | pub fn fill(&mut self, byte: u8) {
81 | unsafe { core::ptr::write_bytes(self.as_mut_ptr(), byte, PAGE_SIZE) }
82 | }
83 | }
84 |
85 | impl Drop for PhysFrame {
86 | fn drop(&mut self) {
87 | if self.start_paddr > 0 {
88 | H::dealloc_page(self.start_paddr);
89 | debug!("[RVM] deallocated PhysFrame({:#x})", self.start_paddr);
90 | }
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/uart16550.rs:
--------------------------------------------------------------------------------
1 | //! Uart 16550.
2 |
3 | use spin::Mutex;
4 | use x86_64::instructions::port::{Port, PortReadOnly, PortWriteOnly};
5 |
6 | const UART_CLOCK_FACTOR: usize = 16;
7 | const OSC_FREQ: usize = 1_843_200;
8 |
9 | static COM1: Mutex = Mutex::new(Uart16550::new(0x3f8));
10 |
11 | bitflags::bitflags! {
12 | /// Line status flags
13 | struct LineStsFlags: u8 {
14 | const INPUT_FULL = 1;
15 | // 1 to 4 unknown
16 | const OUTPUT_EMPTY = 1 << 5;
17 | // 6 and 7 unknown
18 | }
19 | }
20 |
21 | struct Uart16550 {
22 | data: Port,
23 | int_en: PortWriteOnly,
24 | fifo_ctrl: PortWriteOnly,
25 | line_ctrl: PortWriteOnly,
26 | modem_ctrl: PortWriteOnly,
27 | line_sts: PortReadOnly,
28 | }
29 |
30 | impl Uart16550 {
31 | const fn new(port: u16) -> Self {
32 | Self {
33 | data: Port::new(port),
34 | int_en: PortWriteOnly::new(port + 1),
35 | fifo_ctrl: PortWriteOnly::new(port + 2),
36 | line_ctrl: PortWriteOnly::new(port + 3),
37 | modem_ctrl: PortWriteOnly::new(port + 4),
38 | line_sts: PortReadOnly::new(port + 5),
39 | }
40 | }
41 |
42 | fn init(&mut self, baud_rate: usize) {
43 | unsafe {
44 | // Disable interrupts
45 | self.int_en.write(0x00);
46 |
47 | // Enable DLAB
48 | self.line_ctrl.write(0x80);
49 |
50 | // Set maximum speed according the input baud rate by configuring DLL and DLM
51 | let divisor = OSC_FREQ / (baud_rate * UART_CLOCK_FACTOR);
52 | self.data.write((divisor & 0xff) as u8);
53 | self.int_en.write((divisor >> 8) as u8);
54 |
55 | // Disable DLAB and set data word length to 8 bits
56 | self.line_ctrl.write(0x03);
57 |
58 | // Enable FIFO, clear TX/RX queues and
59 | // set interrupt watermark at 14 bytes
60 | self.fifo_ctrl.write(0xC7);
61 |
62 | // Mark data terminal ready, signal request to send
63 | // and enable auxilliary output #2 (used as interrupt line for CPU)
64 | self.modem_ctrl.write(0x0B);
65 | }
66 | }
67 |
68 | fn line_sts(&mut self) -> LineStsFlags {
69 | unsafe { LineStsFlags::from_bits_truncate(self.line_sts.read()) }
70 | }
71 |
72 | fn putchar(&mut self, c: u8) {
73 | while !self.line_sts().contains(LineStsFlags::OUTPUT_EMPTY) {}
74 | unsafe { self.data.write(c) };
75 | }
76 |
77 | fn getchar(&mut self) -> Option {
78 | if self.line_sts().contains(LineStsFlags::INPUT_FULL) {
79 | unsafe { Some(self.data.read()) }
80 | } else {
81 | None
82 | }
83 | }
84 | }
85 |
86 | pub fn console_putchar(c: u8) {
87 | COM1.lock().putchar(c);
88 | }
89 |
90 | pub fn console_getchar() -> Option {
91 | COM1.lock().getchar()
92 | }
93 |
94 | pub fn init() {
95 | COM1.lock().init(115200);
96 | }
97 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/mod.rs:
--------------------------------------------------------------------------------
1 | mod device_emu;
2 | mod gconfig;
3 | mod gpm;
4 | mod hal;
5 | mod vmexit;
6 |
7 | use rvm::{GuestPhysAddr, HostPhysAddr, HostVirtAddr, MemFlags, RvmPerCpu, RvmResult};
8 |
9 | use self::gconfig::*;
10 | use self::gpm::{GuestMemoryRegion, GuestPhysMemorySet};
11 | use self::hal::RvmHalImpl;
12 | use crate::mm::address::{phys_to_virt, virt_to_phys};
13 |
14 | #[repr(align(4096))]
15 | struct AlignedMemory([u8; LEN]);
16 |
17 | static mut GUEST_PHYS_MEMORY: AlignedMemory =
18 | AlignedMemory([0; GUEST_PHYS_MEMORY_SIZE]);
19 |
20 | fn gpa_as_mut_ptr(guest_paddr: GuestPhysAddr) -> *mut u8 {
21 | let offset = unsafe { &GUEST_PHYS_MEMORY as *const _ as usize };
22 | let host_vaddr = guest_paddr + offset;
23 | host_vaddr as *mut u8
24 | }
25 |
26 | fn load_guest_image(hpa: HostPhysAddr, load_gpa: GuestPhysAddr, size: usize) {
27 | let image_ptr = phys_to_virt(hpa) as *const u8;
28 | let image = unsafe { core::slice::from_raw_parts(image_ptr, size) };
29 | unsafe {
30 | core::slice::from_raw_parts_mut(gpa_as_mut_ptr(load_gpa), size).copy_from_slice(image)
31 | }
32 | }
33 |
34 | fn setup_gpm() -> RvmResult {
35 | // copy BIOS and guest images
36 | load_guest_image(BIOS_PADDR, BIOS_ENTRY, BIOS_SIZE);
37 | load_guest_image(GUEST_IMAGE_PADDR, GUEST_ENTRY, GUEST_IMAGE_SIZE);
38 |
39 | // create nested page table and add mapping
40 | let mut gpm = GuestPhysMemorySet::new()?;
41 | let guest_memory_regions = [
42 | GuestMemoryRegion {
43 | // RAM
44 | gpa: GUEST_PHYS_MEMORY_BASE,
45 | hpa: virt_to_phys(gpa_as_mut_ptr(GUEST_PHYS_MEMORY_BASE) as HostVirtAddr),
46 | size: GUEST_PHYS_MEMORY_SIZE,
47 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::EXECUTE,
48 | },
49 | GuestMemoryRegion {
50 | // IO APIC
51 | gpa: 0xfec0_0000,
52 | hpa: 0xfec0_0000,
53 | size: 0x1000,
54 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE,
55 | },
56 | GuestMemoryRegion {
57 | // HPET
58 | gpa: 0xfed0_0000,
59 | hpa: 0xfed0_0000,
60 | size: 0x1000,
61 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE,
62 | },
63 | GuestMemoryRegion {
64 | // Local APIC
65 | gpa: 0xfee0_0000,
66 | hpa: 0xfee0_0000,
67 | size: 0x1000,
68 | flags: MemFlags::READ | MemFlags::WRITE | MemFlags::DEVICE,
69 | },
70 | ];
71 | for r in guest_memory_regions.into_iter() {
72 | gpm.map_region(r.into())?;
73 | }
74 | Ok(gpm)
75 | }
76 |
77 | pub fn run() -> ! {
78 | println!("Starting virtualization...");
79 | println!("Hardware support: {:?}", rvm::has_hardware_support());
80 |
81 | let mut percpu = RvmPerCpu::::new(0);
82 | percpu.hardware_enable().unwrap();
83 |
84 | let gpm = setup_gpm().unwrap();
85 | info!("{:#x?}", gpm);
86 |
87 | let mut vcpu = percpu
88 | .create_vcpu(BIOS_ENTRY, gpm.nest_page_table_root())
89 | .unwrap();
90 |
91 | println!("Running guest...");
92 | vcpu.run();
93 | }
94 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/device_emu/lapic.rs:
--------------------------------------------------------------------------------
1 | //! Emulated Local APIC. (SDM Vol. 3A, Chapter 10)
2 |
3 | #![allow(dead_code)]
4 |
5 | use rvm::{RvmError, RvmResult, RvmVcpu};
6 |
7 | type Vcpu = RvmVcpu;
8 |
9 | /// ID register.
10 | const APICID: u32 = 0x2;
11 | /// Version register.
12 | const VERSION: u32 = 0x3;
13 | /// EOI register.
14 | const EOI: u32 = 0xB;
15 | /// Logical Destination Register.
16 | const LDR: u32 = 0xD;
17 | /// Spurious Interrupt Vector register.
18 | const SIVR: u32 = 0xF;
19 | /// Interrupt Command register.
20 | const ICR: u32 = 0x30;
21 | /// LVT Timer Interrupt register.
22 | const LVT_TIMER: u32 = 0x32;
23 | /// LVT Thermal Sensor Interrupt register.
24 | const LVT_THERMAL: u32 = 0x33;
25 | /// LVT Performance Monitor register.
26 | const LVT_PMI: u32 = 0x34;
27 | /// LVT LINT0 register.
28 | const LVT_LINT0: u32 = 0x35;
29 | /// LVT LINT1 register.
30 | const LVT_LINT1: u32 = 0x36;
31 | /// LVT Error register.
32 | const LVT_ERR: u32 = 0x37;
33 | /// Initial Count register.
34 | const INIT_COUNT: u32 = 0x38;
35 | /// Current Count register.
36 | const CUR_COUNT: u32 = 0x39;
37 | /// Divide Configuration register.
38 | const DIV_CONF: u32 = 0x3E;
39 |
40 | pub struct VirtLocalApic;
41 |
42 | impl VirtLocalApic {
43 | pub const fn msr_range() -> core::ops::Range {
44 | 0x800..0x840
45 | }
46 |
47 | pub fn rdmsr(vcpu: &mut Vcpu, msr: u32) -> RvmResult {
48 | Self::read(vcpu, msr - 0x800)
49 | }
50 |
51 | pub fn wrmsr(vcpu: &mut Vcpu, msr: u32, value: u64) -> RvmResult {
52 | Self::write(vcpu, msr - 0x800, value)
53 | }
54 | }
55 |
56 | impl VirtLocalApic {
57 | fn read(vcpu: &mut Vcpu, offset: u32) -> RvmResult {
58 | let apic_timer = vcpu.apic_timer_mut();
59 | match offset {
60 | SIVR => Ok(0x1ff), // SDM Vol. 3A, Section 10.9, Figure 10-23 (with Software Enable bit)
61 | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => {
62 | Ok(0x1_0000) // SDM Vol. 3A, Section 10.5.1, Figure 10-8 (with Mask bit)
63 | }
64 | LVT_TIMER => Ok(apic_timer.lvt_timer() as u64),
65 | INIT_COUNT => Ok(apic_timer.initial_count() as u64),
66 | DIV_CONF => Ok(apic_timer.divide() as u64),
67 | CUR_COUNT => Ok(apic_timer.current_counter() as u64),
68 | _ => Err(RvmError::Unsupported),
69 | }
70 | }
71 |
72 | fn write(vcpu: &mut Vcpu, offset: u32, value: u64) -> RvmResult {
73 | if offset != ICR && (value >> 32) != 0 {
74 | return Err(RvmError::InvalidParam); // all registers except ICR are 32-bits
75 | }
76 | let apic_timer = vcpu.apic_timer_mut();
77 | match offset {
78 | EOI => {
79 | if value != 0 {
80 | Err(RvmError::InvalidParam) // write a non-zero value causes #GP
81 | } else {
82 | Ok(())
83 | }
84 | }
85 | SIVR | LVT_THERMAL | LVT_PMI | LVT_LINT0 | LVT_LINT1 | LVT_ERR => {
86 | Ok(()) // ignore these register writes
87 | }
88 | LVT_TIMER => apic_timer.set_lvt_timer(value as u32),
89 | INIT_COUNT => apic_timer.set_initial_count(value as u32),
90 | DIV_CONF => apic_timer.set_divide(value as u32),
91 | _ => Err(RvmError::Unsupported),
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/hypervisor/src/arch/x86_64/multiboot.S:
--------------------------------------------------------------------------------
1 | .equ MULTIBOOT_HEADER_MAGIC, 0x1BADB002
2 | .equ MULTIBOOT_HEADER_FLAGS, 0x00010002
3 | .equ MULTIBOOT_CHECKSUM, -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
4 |
5 | .section .text.boot
6 | .code32
7 | .global _start
8 | _start:
9 | mov edi, eax // magic
10 | mov esi, ebx // multiboot info
11 | jmp entry32
12 |
13 | .balign 4
14 | .type multiboot_header, STT_OBJECT
15 | multiboot_header:
16 | .int MULTIBOOT_HEADER_MAGIC
17 | .int MULTIBOOT_HEADER_FLAGS
18 | .int MULTIBOOT_CHECKSUM
19 | .int multiboot_header - {offset} // header_addr
20 | .int skernel - {offset} // load_addr
21 | .int edata - {offset} // load_end
22 | .int ebss - {offset} // bss_end_addr
23 | .int _start - {offset} // entry_addr
24 |
25 | entry32:
26 | // load the temporary GDT
27 | lgdt [.Ltmp_gdt_desc_phys - {offset}]
28 | mov ax, 0x18 // data segment selector
29 | mov ss, ax
30 | mov ds, ax
31 | mov es, ax
32 | mov fs, ax
33 | mov gs, ax
34 |
35 | // set PAE, PGE bit in CR4
36 | mov eax, {cr4}
37 | mov cr4, eax
38 |
39 | // load the temporary page table
40 | lea eax, [.Ltmp_pml4 - {offset}]
41 | mov cr3, eax
42 |
43 | // set LME, NXE bit in IA32_EFER
44 | mov ecx, {efer_msr}
45 | mov edx, 0
46 | mov eax, {efer}
47 | wrmsr
48 |
49 | // set protected mode, write protect, paging bit in CR0
50 | mov eax, {cr0}
51 | mov cr0, eax
52 |
53 | // long return to the 64-bit entry
54 | push 0x10 // code64 segment selector
55 | lea eax, [entry64 - {offset}]
56 | push eax
57 | retf
58 |
59 | .code64
60 | entry64:
61 | // reload GDT by high address
62 | movabs rax, offset .Ltmp_gdt_desc
63 | lgdt [rax]
64 |
65 | // clear segment selectors
66 | xor ax, ax
67 | mov ss, ax
68 | mov ds, ax
69 | mov es, ax
70 | mov fs, ax
71 | mov gs, ax
72 |
73 | // set stack and jump to rust_main
74 | movabs rsp, offset boot_stack_top
75 | movabs rax, offset {main_entry}
76 | call rax
77 | 1: jmp 1b
78 |
79 | .section .rodata
80 | .balign 8
81 | .Ltmp_gdt_desc_phys:
82 | .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit
83 | .long .Ltmp_gdt - {offset} // base
84 |
85 | .balign 8
86 | .Ltmp_gdt_desc:
87 | .short .Ltmp_gdt_end - .Ltmp_gdt - 1 // limit
88 | .quad .Ltmp_gdt // base
89 |
90 | .section .data
91 | .balign 16
92 | .Ltmp_gdt:
93 | .quad 0x0000000000000000 // 0x00: null
94 | .quad 0x00cf9b000000ffff // 0x08: code segment (base=0, limit=0xfffff, type=32bit code exec/read, DPL=0, 4k)
95 | .quad 0x00af9b000000ffff // 0x10: code segment (base=0, limit=0xfffff, type=64bit code exec/read, DPL=0, 4k)
96 | .quad 0x00cf93000000ffff // 0x18: data segment (base=0, limit=0xfffff, type=32bit data read/write, DPL=0, 4k)
97 | .Ltmp_gdt_end:
98 |
99 | .balign 4096
100 | .Ltmp_pml4:
101 | // 0x0000_0000 ~ 0x8000_0000
102 | .quad .Ltmp_pdpt_low - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt)
103 | .zero 8 * 510
104 | // 0xffff_ff80_0000_0000 ~ 0xffff_ff80_8000_0000
105 | .quad .Ltmp_pdpt_high - {offset} + 0x3 // PRESENT | WRITABLE | paddr(tmp_pdpt)
106 |
107 | .Ltmp_pdpt_low:
108 | .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0)
109 | .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0)
110 | .zero 8 * 510
111 |
112 | .Ltmp_pdpt_high:
113 | .quad 0x0000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0)
114 | .quad 0x40000000 | 0x83 // PRESENT | WRITABLE | HUGE_PAGE | paddr(0x0)
115 | .zero 8 * 510
116 |
117 | .section .bss.stack
118 | .balign 4096
119 | boot_stack:
120 | .space {boot_stack_size}
121 | boot_stack_top:
122 |
--------------------------------------------------------------------------------
/hypervisor/src/logging.rs:
--------------------------------------------------------------------------------
1 | use core::fmt::{self, Write};
2 |
3 | use log::{self, Level, LevelFilter, Log, Metadata, Record};
4 | use spin::Mutex;
5 |
6 | use crate::arch::uart;
7 |
8 | struct Stdout;
9 |
10 | static PRINT_LOCK: Mutex<()> = Mutex::new(());
11 |
12 | impl Write for Stdout {
13 | fn write_str(&mut self, s: &str) -> fmt::Result {
14 | for c in s.chars() {
15 | match c {
16 | '\n' => {
17 | uart::console_putchar(b'\r');
18 | uart::console_putchar(b'\n');
19 | }
20 | _ => uart::console_putchar(c as u8),
21 | }
22 | }
23 | Ok(())
24 | }
25 | }
26 |
27 | pub fn init() {
28 | static LOGGER: SimpleLogger = SimpleLogger;
29 | log::set_logger(&LOGGER).unwrap();
30 | log::set_max_level(match option_env!("LOG") {
31 | Some("error") => LevelFilter::Error,
32 | Some("warn") => LevelFilter::Warn,
33 | Some("info") => LevelFilter::Info,
34 | Some("debug") => LevelFilter::Debug,
35 | Some("trace") => LevelFilter::Trace,
36 | _ => LevelFilter::Off,
37 | });
38 | }
39 |
40 | pub fn print(args: fmt::Arguments) {
41 | let _locked = PRINT_LOCK.lock();
42 | Stdout.write_fmt(args).unwrap();
43 | }
44 |
45 | #[macro_export]
46 | macro_rules! print {
47 | ($fmt: literal $(, $($arg: tt)+)?) => {
48 | $crate::logging::print(format_args!($fmt $(, $($arg)+)?));
49 | }
50 | }
51 |
52 | #[macro_export]
53 | macro_rules! println {
54 | () => { print!("\n") };
55 | ($fmt: literal $(, $($arg: tt)+)?) => {
56 | $crate::logging::print(format_args!(concat!($fmt, "\n") $(, $($arg)+)?));
57 | }
58 | }
59 |
60 | macro_rules! with_color {
61 | ($color_code:expr, $($arg:tt)*) => {{
62 | format_args!("\u{1B}[{}m{}\u{1B}[m", $color_code as u8, format_args!($($arg)*))
63 | }};
64 | }
65 |
66 | #[repr(u8)]
67 | #[allow(dead_code)]
68 | enum ColorCode {
69 | Black = 30,
70 | Red = 31,
71 | Green = 32,
72 | Yellow = 33,
73 | Blue = 34,
74 | Magenta = 35,
75 | Cyan = 36,
76 | White = 37,
77 | BrightBlack = 90,
78 | BrightRed = 91,
79 | BrightGreen = 92,
80 | BrightYellow = 93,
81 | BrightBlue = 94,
82 | BrightMagenta = 95,
83 | BrightCyan = 96,
84 | BrightWhite = 97,
85 | }
86 |
87 | struct SimpleLogger;
88 |
89 | impl Log for SimpleLogger {
90 | fn enabled(&self, _metadata: &Metadata) -> bool {
91 | true
92 | }
93 |
94 | fn log(&self, record: &Record) {
95 | if !self.enabled(record.metadata()) {
96 | return;
97 | }
98 |
99 | let level = record.level();
100 | let line = record.line().unwrap_or(0);
101 | let target = record.target();
102 | let level_color = match level {
103 | Level::Error => ColorCode::BrightRed,
104 | Level::Warn => ColorCode::BrightYellow,
105 | Level::Info => ColorCode::BrightGreen,
106 | Level::Debug => ColorCode::BrightCyan,
107 | Level::Trace => ColorCode::BrightBlack,
108 | };
109 | let args_color = match level {
110 | Level::Error => ColorCode::Red,
111 | Level::Warn => ColorCode::Yellow,
112 | Level::Info => ColorCode::Green,
113 | Level::Debug => ColorCode::Cyan,
114 | Level::Trace => ColorCode::BrightBlack,
115 | };
116 | if super::init_ok() {
117 | let now = crate::timer::current_time();
118 | print(with_color!(
119 | ColorCode::White,
120 | "[{:>3}.{:06} {} {} {}\n",
121 | now.as_secs(),
122 | now.subsec_micros(),
123 | with_color!(level_color, "{:<5}", level),
124 | with_color!(ColorCode::White, "{}:{}]", target, line),
125 | with_color!(args_color, "{}", record.args()),
126 | ));
127 | } else {
128 | print(with_color!(
129 | ColorCode::White,
130 | "[{} {} {}\n",
131 | with_color!(level_color, "{:<5}", level),
132 | with_color!(ColorCode::White, "{}:{}]", target, line),
133 | with_color!(args_color, "{}", record.args()),
134 | ));
135 | }
136 | }
137 |
138 | fn flush(&self) {}
139 | }
140 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/device_emu/uart16550.rs:
--------------------------------------------------------------------------------
1 | //! Emulated UART 16550. (ref: https://wiki.osdev.org/Serial_Ports)
2 |
3 | use super::PortIoDevice;
4 | use crate::arch::uart;
5 |
6 | use rvm::{RvmError, RvmResult};
7 | use spin::Mutex;
8 |
9 | const DATA_REG: u16 = 0;
10 | const INT_EN_REG: u16 = 1;
11 | const FIFO_CTRL_REG: u16 = 2;
12 | const LINE_CTRL_REG: u16 = 3;
13 | const MODEM_CTRL_REG: u16 = 4;
14 | const LINE_STATUS_REG: u16 = 5;
15 | const MODEM_STATUS_REG: u16 = 6;
16 | const SCRATCH_REG: u16 = 7;
17 |
18 | const UART_FIFO_CAPACITY: usize = 16;
19 |
20 | bitflags::bitflags! {
21 | /// Line status flags
22 | struct LineStsFlags: u8 {
23 | const INPUT_FULL = 1;
24 | // 1 to 4 unknown
25 | const OUTPUT_EMPTY = 1 << 5;
26 | // 6 and 7 unknown
27 | }
28 | }
29 |
30 | /// FIFO queue for caching bytes read.
31 | struct Fifo {
32 | buf: [u8; CAP],
33 | head: usize,
34 | num: usize,
35 | }
36 |
37 | impl Fifo {
38 | const fn new() -> Self {
39 | Self {
40 | buf: [0; CAP],
41 | head: 0,
42 | num: 0,
43 | }
44 | }
45 |
46 | fn is_empty(&self) -> bool {
47 | self.num == 0
48 | }
49 |
50 | fn is_full(&self) -> bool {
51 | self.num == CAP
52 | }
53 |
54 | fn push(&mut self, value: u8) {
55 | assert!(self.num < CAP);
56 | self.buf[(self.head + self.num) % CAP] = value;
57 | self.num += 1;
58 | }
59 |
60 | fn pop(&mut self) -> u8 {
61 | assert!(self.num > 0);
62 | let ret = self.buf[self.head];
63 | self.head += 1;
64 | self.head %= CAP;
65 | self.num -= 1;
66 | ret
67 | }
68 | }
69 |
70 | pub struct Uart16550 {
71 | port_base: u16,
72 | fifo: Mutex>,
73 | }
74 |
75 | impl PortIoDevice for Uart16550 {
76 | fn port_range(&self) -> core::ops::Range {
77 | self.port_base..self.port_base + 8
78 | }
79 |
80 | fn read(&self, port: u16, access_size: u8) -> RvmResult {
81 | if access_size != 1 {
82 | error!("Invalid serial port I/O read size: {} != 1", access_size);
83 | return Err(RvmError::InvalidParam);
84 | }
85 | let ret = match port - self.port_base {
86 | DATA_REG => {
87 | // read a byte from FIFO
88 | let mut fifo = self.fifo.lock();
89 | if fifo.is_empty() {
90 | 0
91 | } else {
92 | fifo.pop()
93 | }
94 | }
95 | LINE_STATUS_REG => {
96 | // check if the physical serial port has an available byte, and push it to FIFO.
97 | let mut fifo = self.fifo.lock();
98 | if !fifo.is_full() {
99 | if let Some(c) = uart::console_getchar() {
100 | fifo.push(c);
101 | }
102 | }
103 | let mut lsr = LineStsFlags::OUTPUT_EMPTY;
104 | if !fifo.is_empty() {
105 | lsr |= LineStsFlags::INPUT_FULL;
106 | }
107 | lsr.bits()
108 | }
109 | INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | MODEM_STATUS_REG
110 | | SCRATCH_REG => {
111 | info!("Unimplemented serial port I/O read: {:#x}", port); // unimplemented
112 | 0
113 | }
114 | _ => unreachable!(),
115 | };
116 | Ok(ret as u32)
117 | }
118 |
119 | fn write(&self, port: u16, access_size: u8, value: u32) -> RvmResult {
120 | if access_size != 1 {
121 | error!("Invalid serial port I/O write size: {} != 1", access_size);
122 | return Err(RvmError::InvalidParam);
123 | }
124 | match port - self.port_base {
125 | DATA_REG => uart::console_putchar(value as u8),
126 | INT_EN_REG | FIFO_CTRL_REG | LINE_CTRL_REG | MODEM_CTRL_REG | SCRATCH_REG => {
127 | info!("Unimplemented serial port I/O write: {:#x}", port); // unimplemented
128 | }
129 | LINE_STATUS_REG => {} // ignore
130 | _ => unreachable!(),
131 | }
132 | Ok(())
133 | }
134 | }
135 |
136 | impl Uart16550 {
137 | pub const fn new(port_base: u16) -> Self {
138 | Self {
139 | port_base,
140 | fifo: Mutex::new(Fifo::new()),
141 | }
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/lapic.rs:
--------------------------------------------------------------------------------
1 | use bit_field::BitField;
2 | use core::marker::PhantomData;
3 |
4 | use crate::{RvmHal, RvmResult};
5 |
6 | const APIC_FREQ_MHZ: u64 = 1000; // 1000 MHz
7 | const APIC_CYCLE_NANOS: u64 = 1000 / APIC_FREQ_MHZ;
8 |
9 | /// Local APIC timer modes.
10 | #[derive(Debug, Copy, Clone)]
11 | #[repr(u8)]
12 | #[allow(dead_code)]
13 | pub enum TimerMode {
14 | /// Timer only fires once.
15 | OneShot = 0b00,
16 | /// Timer fires periodically.
17 | Periodic = 0b01,
18 | /// Timer fires at an absolute time.
19 | TscDeadline = 0b10,
20 | }
21 |
22 | /// A virtual local APIC timer. (SDM Vol. 3C, Section 10.5.4)
23 | pub struct ApicTimer {
24 | lvt_timer_bits: u32,
25 | divide_shift: u8,
26 | initial_count: u32,
27 | last_start_ns: u64,
28 | deadline_ns: u64,
29 | _phantom: PhantomData,
30 | }
31 |
32 | impl ApicTimer {
33 | pub(crate) const fn new() -> Self {
34 | Self {
35 | lvt_timer_bits: 0x1_0000, // masked
36 | divide_shift: 0,
37 | initial_count: 0,
38 | last_start_ns: 0,
39 | deadline_ns: 0,
40 | _phantom: PhantomData,
41 | }
42 | }
43 |
44 | /// Check if an interrupt generated. if yes, update it's states.
45 | pub fn check_interrupt(&mut self) -> bool {
46 | if self.deadline_ns == 0 {
47 | false
48 | } else if H::current_time_nanos() >= self.deadline_ns {
49 | if self.is_periodic() {
50 | self.deadline_ns += self.interval_ns();
51 | } else {
52 | self.deadline_ns = 0;
53 | }
54 | !self.is_masked()
55 | } else {
56 | false
57 | }
58 | }
59 |
60 | /// Whether the timer interrupt is masked.
61 | pub const fn is_masked(&self) -> bool {
62 | self.lvt_timer_bits & (1 << 16) != 0
63 | }
64 |
65 | /// Whether the timer mode is periodic.
66 | pub const fn is_periodic(&self) -> bool {
67 | let timer_mode = (self.lvt_timer_bits >> 17) & 0b11;
68 | timer_mode == TimerMode::Periodic as _
69 | }
70 |
71 | /// The timer interrupt vector number.
72 | pub const fn vector(&self) -> u8 {
73 | (self.lvt_timer_bits & 0xff) as u8
74 | }
75 |
76 | /// LVT Timer Register. (SDM Vol. 3A, Section 10.5.1, Figure 10-8)
77 | pub const fn lvt_timer(&self) -> u32 {
78 | self.lvt_timer_bits
79 | }
80 |
81 | /// Divide Configuration Register. (SDM Vol. 3A, Section 10.5.4, Figure 10-10)
82 | pub const fn divide(&self) -> u32 {
83 | let dcr = self.divide_shift.wrapping_sub(1) as u32 & 0b111;
84 | (dcr & 0b11) | ((dcr & 0b100) << 1)
85 | }
86 |
87 | /// Initial Count Register.
88 | pub const fn initial_count(&self) -> u32 {
89 | self.initial_count
90 | }
91 |
92 | /// Current Count Register.
93 | pub fn current_counter(&self) -> u32 {
94 | let elapsed_ns = H::current_time_nanos() - self.last_start_ns;
95 | let elapsed_cycles = (elapsed_ns / APIC_CYCLE_NANOS) >> self.divide_shift;
96 | if self.is_periodic() {
97 | self.initial_count - (elapsed_cycles % self.initial_count as u64) as u32
98 | } else if elapsed_cycles < self.initial_count as u64 {
99 | self.initial_count - elapsed_cycles as u32
100 | } else {
101 | 0
102 | }
103 | }
104 |
105 | /// Set LVT Timer Register.
106 | pub fn set_lvt_timer(&mut self, bits: u32) -> RvmResult {
107 | let timer_mode = bits.get_bits(17..19);
108 | if timer_mode == TimerMode::TscDeadline as _ {
109 | return rvm_err!(Unsupported); // TSC deadline mode was not supported
110 | } else if timer_mode == 0b11 {
111 | return rvm_err!(InvalidParam); // reserved
112 | }
113 | self.lvt_timer_bits = bits;
114 | self.start_timer();
115 | Ok(())
116 | }
117 |
118 | /// Set Initial Count Register.
119 | pub fn set_initial_count(&mut self, initial: u32) -> RvmResult {
120 | self.initial_count = initial;
121 | self.start_timer();
122 | Ok(())
123 | }
124 |
125 | /// Set Divide Configuration Register.
126 | pub fn set_divide(&mut self, dcr: u32) -> RvmResult {
127 | let shift = (dcr & 0b11) | ((dcr & 0b1000) >> 1);
128 | self.divide_shift = (shift + 1) as u8 & 0b111;
129 | self.start_timer();
130 | Ok(())
131 | }
132 |
133 | const fn interval_ns(&self) -> u64 {
134 | (self.initial_count as u64 * APIC_CYCLE_NANOS) << self.divide_shift
135 | }
136 |
137 | fn start_timer(&mut self) {
138 | if self.initial_count != 0 {
139 | self.last_start_ns = H::current_time_nanos();
140 | self.deadline_ns = self.last_start_ns + self.interval_ns();
141 | } else {
142 | self.deadline_ns = 0;
143 | }
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/ept.rs:
--------------------------------------------------------------------------------
1 | use core::{convert::TryFrom, fmt};
2 |
3 | use bit_field::BitField;
4 |
5 | use crate::mm::{GenericPTE, HostPhysAddr, Level4PageTable, MemFlags};
6 |
7 | bitflags::bitflags! {
8 | /// EPT entry flags. (SDM Vol. 3C, Section 28.3.2)
9 | struct EPTFlags: u64 {
10 | /// Read access.
11 | const READ = 1 << 0;
12 | /// Write access.
13 | const WRITE = 1 << 1;
14 | /// Execute access.
15 | const EXECUTE = 1 << 2;
16 | /// EPT memory type. Only for terminate pages.
17 | const MEM_TYPE_MASK = 0b111 << 3;
18 | /// Ignore PAT memory type. Only for terminate pages.
19 | const IGNORE_PAT = 1 << 6;
20 | /// Specifies that the entry maps a huge frame instead of a page table.
21 | /// Only allowed in P2 or P3 tables.
22 | const HUGE_PAGE = 1 << 7;
23 | /// If bit 6 of EPTP is 1, accessed flag for EPT.
24 | const ACCESSED = 1 << 8;
25 | /// If bit 6 of EPTP is 1, dirty flag for EPT.
26 | const DIRTY = 1 << 9;
27 | /// Execute access for user-mode linear addresses.
28 | const EXECUTE_FOR_USER = 1 << 10;
29 | }
30 | }
31 |
32 | numeric_enum_macro::numeric_enum! {
33 | #[repr(u8)]
34 | #[derive(Debug, PartialEq, Clone, Copy)]
35 | /// EPT memory typing. (SDM Vol. 3C, Section 28.3.7)
36 | enum EPTMemType {
37 | Uncached = 0,
38 | WriteCombining = 1,
39 | WriteThrough = 4,
40 | WriteProtected = 5,
41 | WriteBack = 6,
42 | }
43 | }
44 |
45 | impl EPTFlags {
46 | fn set_mem_type(&mut self, mem_type: EPTMemType) {
47 | let mut bits = self.bits();
48 | bits.set_bits(3..6, mem_type as u64);
49 | *self = Self::from_bits_truncate(bits)
50 | }
51 | fn mem_type(&self) -> Result {
52 | EPTMemType::try_from(self.bits().get_bits(3..6) as u8)
53 | }
54 | }
55 |
56 | impl From for EPTFlags {
57 | fn from(f: MemFlags) -> Self {
58 | if f.is_empty() {
59 | return Self::empty();
60 | }
61 | let mut ret = Self::empty();
62 | if f.contains(MemFlags::READ) {
63 | ret |= Self::READ;
64 | }
65 | if f.contains(MemFlags::WRITE) {
66 | ret |= Self::WRITE;
67 | }
68 | if f.contains(MemFlags::EXECUTE) {
69 | ret |= Self::EXECUTE;
70 | }
71 | if !f.contains(MemFlags::DEVICE) {
72 | ret.set_mem_type(EPTMemType::WriteBack);
73 | }
74 | ret
75 | }
76 | }
77 |
78 | impl From for MemFlags {
79 | fn from(f: EPTFlags) -> Self {
80 | let mut ret = MemFlags::empty();
81 | if f.contains(EPTFlags::READ) {
82 | ret |= Self::READ;
83 | }
84 | if f.contains(EPTFlags::WRITE) {
85 | ret |= Self::WRITE;
86 | }
87 | if f.contains(EPTFlags::EXECUTE) {
88 | ret |= Self::EXECUTE;
89 | }
90 | if let Ok(EPTMemType::Uncached) = f.mem_type() {
91 | ret |= Self::DEVICE;
92 | }
93 | ret
94 | }
95 | }
96 |
97 | #[derive(Clone, Copy)]
98 | #[repr(transparent)]
99 | pub struct EPTEntry(u64);
100 |
101 | const PHYS_ADDR_MASK: usize = 0x000f_ffff_ffff_f000; // 12..52
102 |
103 | impl GenericPTE for EPTEntry {
104 | fn new_page(paddr: HostPhysAddr, flags: MemFlags, is_huge: bool) -> Self {
105 | let mut flags = EPTFlags::from(flags);
106 | if is_huge {
107 | flags |= EPTFlags::HUGE_PAGE;
108 | }
109 | Self(flags.bits() | (paddr & PHYS_ADDR_MASK) as u64)
110 | }
111 | fn new_table(paddr: HostPhysAddr) -> Self {
112 | let flags = EPTFlags::READ | EPTFlags::WRITE | EPTFlags::EXECUTE;
113 | Self(flags.bits() | (paddr & PHYS_ADDR_MASK) as u64)
114 | }
115 | fn paddr(&self) -> HostPhysAddr {
116 | self.0 as usize & PHYS_ADDR_MASK
117 | }
118 | fn flags(&self) -> MemFlags {
119 | EPTFlags::from_bits_truncate(self.0).into()
120 | }
121 | fn is_unused(&self) -> bool {
122 | self.0 == 0
123 | }
124 | fn is_present(&self) -> bool {
125 | self.0 & 0x7 != 0 // RWX != 0
126 | }
127 | fn is_huge(&self) -> bool {
128 | EPTFlags::from_bits_truncate(self.0).contains(EPTFlags::HUGE_PAGE)
129 | }
130 | fn clear(&mut self) {
131 | self.0 = 0
132 | }
133 | }
134 |
135 | impl fmt::Debug for EPTEntry {
136 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
137 | f.debug_struct("EPTEntry")
138 | .field("raw", &self.0)
139 | .field("hpaddr", &self.paddr())
140 | .field("flags", &self.flags())
141 | .field("mem_type", &EPTFlags::from_bits_truncate(self.0).mem_type())
142 | .finish()
143 | }
144 | }
145 |
146 | /// The VMX extended page table. (SDM Vol. 3C, Section 28.3)
147 | pub type ExtendedPageTable = Level4PageTable;
148 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/gpm.rs:
--------------------------------------------------------------------------------
1 | use alloc::collections::BTreeMap;
2 | use core::fmt::{Debug, Formatter, Result};
3 |
4 | use rvm::{GuestPhysAddr, HostPhysAddr, MemFlags, NestedPageTable, RvmError, RvmResult};
5 |
6 | use super::hal::RvmHalImpl;
7 | use crate::mm::{address::is_aligned, PAGE_SIZE};
8 |
9 | #[derive(Debug)]
10 | enum Mapper {
11 | Offset(usize),
12 | }
13 |
14 | #[derive(Debug)]
15 | pub struct GuestMemoryRegion {
16 | pub gpa: GuestPhysAddr,
17 | pub hpa: HostPhysAddr,
18 | pub size: usize,
19 | pub flags: MemFlags,
20 | }
21 |
22 | pub struct MapRegion {
23 | pub start: GuestPhysAddr,
24 | pub size: usize,
25 | pub flags: MemFlags,
26 | mapper: Mapper,
27 | }
28 |
29 | impl MapRegion {
30 | pub fn new_offset(
31 | start_gpa: GuestPhysAddr,
32 | start_hpa: HostPhysAddr,
33 | size: usize,
34 | flags: MemFlags,
35 | ) -> Self {
36 | assert!(is_aligned(start_gpa));
37 | assert!(is_aligned(start_hpa));
38 | assert!(is_aligned(size));
39 | let offset = start_gpa - start_hpa;
40 | Self {
41 | start: start_gpa,
42 | size,
43 | flags,
44 | mapper: Mapper::Offset(offset),
45 | }
46 | }
47 |
48 | fn is_overlap_with(&self, other: &Self) -> bool {
49 | let s0 = self.start;
50 | let e0 = s0 + self.size;
51 | let s1 = other.start;
52 | let e1 = s1 + other.size;
53 | !(e0 <= s1 || e1 <= s0)
54 | }
55 |
56 | fn target(&self, gpa: GuestPhysAddr) -> HostPhysAddr {
57 | match self.mapper {
58 | Mapper::Offset(off) => gpa.wrapping_sub(off),
59 | }
60 | }
61 |
62 | fn map_to(&self, npt: &mut NestedPageTable) -> RvmResult {
63 | let mut start = self.start;
64 | let end = start + self.size;
65 | while start < end {
66 | let target = self.target(start);
67 | npt.map(start, target, self.flags)?;
68 | start += PAGE_SIZE;
69 | }
70 | Ok(())
71 | }
72 |
73 | fn unmap_to(&self, npt: &mut NestedPageTable) -> RvmResult {
74 | let mut start = self.start;
75 | let end = start + self.size;
76 | while start < end {
77 | npt.unmap(start)?;
78 | start += PAGE_SIZE;
79 | }
80 | Ok(())
81 | }
82 | }
83 |
84 | impl Debug for MapRegion {
85 | fn fmt(&self, f: &mut Formatter) -> Result {
86 | f.debug_struct("MapRegion")
87 | .field("range", &(self.start..self.start + self.size))
88 | .field("size", &self.size)
89 | .field("flags", &self.flags)
90 | .field("mapper", &self.mapper)
91 | .finish()
92 | }
93 | }
94 |
95 | impl From for MapRegion {
96 | fn from(r: GuestMemoryRegion) -> Self {
97 | Self::new_offset(r.gpa, r.hpa, r.size, r.flags)
98 | }
99 | }
100 |
101 | pub struct GuestPhysMemorySet {
102 | regions: BTreeMap,
103 | npt: NestedPageTable,
104 | }
105 |
106 | impl GuestPhysMemorySet {
107 | pub fn new() -> RvmResult {
108 | Ok(Self {
109 | npt: NestedPageTable::new()?,
110 | regions: BTreeMap::new(),
111 | })
112 | }
113 |
114 | pub fn nest_page_table_root(&self) -> HostPhysAddr {
115 | self.npt.root_paddr()
116 | }
117 |
118 | fn test_free_area(&self, other: &MapRegion) -> bool {
119 | if let Some((_, before)) = self.regions.range(..other.start).last() {
120 | if before.is_overlap_with(other) {
121 | return false;
122 | }
123 | }
124 | if let Some((_, after)) = self.regions.range(other.start..).next() {
125 | if after.is_overlap_with(other) {
126 | return false;
127 | }
128 | }
129 | true
130 | }
131 |
132 | pub fn map_region(&mut self, region: MapRegion) -> RvmResult {
133 | if region.size == 0 {
134 | return Ok(());
135 | }
136 | if !self.test_free_area(®ion) {
137 | warn!(
138 | "MapRegion({:#x}..{:#x}) overlapped in:\n{:#x?}",
139 | region.start,
140 | region.start + region.size,
141 | self
142 | );
143 | return Err(RvmError::InvalidParam);
144 | }
145 | region.map_to(&mut self.npt)?;
146 | self.regions.insert(region.start, region);
147 | Ok(())
148 | }
149 |
150 | pub fn clear(&mut self) {
151 | for region in self.regions.values() {
152 | region.unmap_to(&mut self.npt).unwrap();
153 | }
154 | self.regions.clear();
155 | }
156 | }
157 |
158 | impl Drop for GuestPhysMemorySet {
159 | fn drop(&mut self) {
160 | self.clear();
161 | }
162 | }
163 |
164 | impl Debug for GuestPhysMemorySet {
165 | fn fmt(&self, f: &mut Formatter) -> Result {
166 | f.debug_struct("GuestPhysMemorySet")
167 | .field("page_table_root", &self.nest_page_table_root())
168 | .field("regions", &self.regions)
169 | .finish()
170 | }
171 | }
172 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/mod.rs:
--------------------------------------------------------------------------------
1 | mod definitions;
2 | mod ept;
3 | mod instructions;
4 | mod structs;
5 | mod vcpu;
6 | mod vmcs;
7 |
8 | use raw_cpuid::CpuId;
9 | use x86::{bits64::vmx, vmx::VmFail};
10 | use x86_64::registers::control::{Cr0, Cr4, Cr4Flags};
11 |
12 | use self::structs::{FeatureControl, FeatureControlFlags, VmxBasic, VmxRegion};
13 | use crate::arch::msr::Msr;
14 | use crate::error::{RvmError, RvmResult};
15 | use crate::hal::RvmHal;
16 |
17 | pub use self::definitions::VmxExitReason;
18 | pub use self::ept::ExtendedPageTable as NestedPageTable;
19 | pub use self::vcpu::VmxVcpu as RvmVcpu;
20 | pub use self::vmcs::{VmxExitInfo, VmxInterruptInfo, VmxIoExitInfo};
21 | pub use self::VmxPerCpuState as ArchPerCpuState;
22 |
23 | pub fn has_hardware_support() -> bool {
24 | if let Some(feature) = CpuId::new().get_feature_info() {
25 | feature.has_vmx()
26 | } else {
27 | false
28 | }
29 | }
30 |
31 | pub struct VmxPerCpuState {
32 | vmcs_revision_id: u32,
33 | vmx_region: VmxRegion,
34 | }
35 |
36 | impl VmxPerCpuState {
37 | pub const fn new() -> Self {
38 | Self {
39 | vmcs_revision_id: 0,
40 | vmx_region: unsafe { VmxRegion::uninit() },
41 | }
42 | }
43 |
44 | pub fn is_enabled(&self) -> bool {
45 | Cr4::read().contains(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS)
46 | }
47 |
48 | pub fn hardware_enable(&mut self) -> RvmResult {
49 | if !has_hardware_support() {
50 | return rvm_err!(Unsupported, "CPU does not support feature VMX");
51 | }
52 | if self.is_enabled() {
53 | return rvm_err!(ResourceBusy, "VMX is already turned on");
54 | }
55 |
56 | // Enable VMXON, if required.
57 | let ctrl = FeatureControl::read();
58 | let locked = ctrl.contains(FeatureControlFlags::LOCKED);
59 | let vmxon_outside = ctrl.contains(FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX);
60 | if !locked {
61 | FeatureControl::write(
62 | ctrl | FeatureControlFlags::LOCKED | FeatureControlFlags::VMXON_ENABLED_OUTSIDE_SMX,
63 | )
64 | } else if !vmxon_outside {
65 | return rvm_err!(Unsupported, "VMX disabled by BIOS");
66 | }
67 |
68 | // Check control registers are in a VMX-friendly state. (SDM Vol. 3C, Appendix A.7, A.8)
69 | macro_rules! cr_is_valid {
70 | ($value: expr, $crx: ident) => {{
71 | use Msr::*;
72 | let value = $value;
73 | let fixed0 = concat_idents!(IA32_VMX_, $crx, _FIXED0).read();
74 | let fixed1 = concat_idents!(IA32_VMX_, $crx, _FIXED1).read();
75 | (!fixed0 | value != 0) && (fixed1 | !value != 0)
76 | }};
77 | }
78 | if !cr_is_valid!(Cr0::read().bits(), CR0) {
79 | return rvm_err!(BadState, "host CR0 is not valid in VMX operation");
80 | }
81 | if !cr_is_valid!(Cr4::read().bits(), CR4) {
82 | return rvm_err!(BadState, "host CR4 is not valid in VMX operation");
83 | }
84 |
85 | // Get VMCS revision identifier in IA32_VMX_BASIC MSR.
86 | let vmx_basic = VmxBasic::read();
87 | if vmx_basic.region_size as usize != crate::mm::PAGE_SIZE {
88 | return rvm_err!(Unsupported);
89 | }
90 | if vmx_basic.mem_type != VmxBasic::VMX_MEMORY_TYPE_WRITE_BACK {
91 | return rvm_err!(Unsupported);
92 | }
93 | if vmx_basic.is_32bit_address {
94 | return rvm_err!(Unsupported);
95 | }
96 | if !vmx_basic.io_exit_info {
97 | return rvm_err!(Unsupported);
98 | }
99 | if !vmx_basic.vmx_flex_controls {
100 | return rvm_err!(Unsupported);
101 | }
102 | self.vmcs_revision_id = vmx_basic.revision_id;
103 | self.vmx_region = VmxRegion::new(vmx_basic.revision_id, false)?;
104 |
105 | unsafe {
106 | // Enable VMX using the VMXE bit.
107 | Cr4::write(Cr4::read() | Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS);
108 | // Execute VMXON.
109 | vmx::vmxon(self.vmx_region.phys_addr() as _)?;
110 | }
111 | info!("[RVM] successed to turn on VMX.");
112 |
113 | Ok(())
114 | }
115 |
116 | pub fn hardware_disable(&mut self) -> RvmResult {
117 | if !self.is_enabled() {
118 | return rvm_err!(BadState, "VMX is not enabled");
119 | }
120 |
121 | unsafe {
122 | // Execute VMXOFF.
123 | vmx::vmxoff()?;
124 | // Remove VMXE bit in CR4.
125 | Cr4::update(|cr4| cr4.remove(Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS));
126 | };
127 | info!("[RVM] successed to turn off VMX.");
128 |
129 | self.vmx_region = unsafe { VmxRegion::uninit() };
130 | Ok(())
131 | }
132 | }
133 |
134 | impl From for RvmError {
135 | fn from(err: VmFail) -> Self {
136 | match err {
137 | VmFail::VmFailValid => rvm_err_type!(BadState, vmcs::instruction_error().as_str()),
138 | _ => rvm_err_type!(BadState, format_args!("VMX instruction failed: {:?}", err)),
139 | }
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/hypervisor/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "autocfg"
7 | version = "1.1.0"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
10 |
11 | [[package]]
12 | name = "bit"
13 | version = "0.1.1"
14 | source = "registry+https://github.com/rust-lang/crates.io-index"
15 | checksum = "2b645c5c09a7d4035949cfce1a915785aaad6f17800c35fda8a8c311c491f284"
16 |
17 | [[package]]
18 | name = "bit_field"
19 | version = "0.10.1"
20 | source = "registry+https://github.com/rust-lang/crates.io-index"
21 | checksum = "dcb6dd1c2376d2e096796e234a70e17e94cc2d5d54ff8ce42b28cef1d0d359a4"
22 |
23 | [[package]]
24 | name = "bitflags"
25 | version = "1.3.2"
26 | source = "registry+https://github.com/rust-lang/crates.io-index"
27 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
28 |
29 | [[package]]
30 | name = "bitmap-allocator"
31 | version = "0.1.0"
32 | source = "git+https://github.com/rcore-os/bitmap-allocator?rev=88e871a#88e871a54f28a3d6795478f237466b3332e2fb1d"
33 | dependencies = [
34 | "bit_field",
35 | ]
36 |
37 | [[package]]
38 | name = "buddy_system_allocator"
39 | version = "0.8.0"
40 | source = "registry+https://github.com/rust-lang/crates.io-index"
41 | checksum = "55703ac5f02c246ce6158eff6ae2dd9e9069917969682b6831f8a5123abb8a48"
42 | dependencies = [
43 | "spin 0.7.1",
44 | ]
45 |
46 | [[package]]
47 | name = "cfg-if"
48 | version = "1.0.0"
49 | source = "registry+https://github.com/rust-lang/crates.io-index"
50 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
51 |
52 | [[package]]
53 | name = "lazy_static"
54 | version = "1.4.0"
55 | source = "registry+https://github.com/rust-lang/crates.io-index"
56 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
57 | dependencies = [
58 | "spin 0.5.2",
59 | ]
60 |
61 | [[package]]
62 | name = "lock_api"
63 | version = "0.4.9"
64 | source = "registry+https://github.com/rust-lang/crates.io-index"
65 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
66 | dependencies = [
67 | "autocfg",
68 | "scopeguard",
69 | ]
70 |
71 | [[package]]
72 | name = "log"
73 | version = "0.4.17"
74 | source = "registry+https://github.com/rust-lang/crates.io-index"
75 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
76 | dependencies = [
77 | "cfg-if",
78 | ]
79 |
80 | [[package]]
81 | name = "numeric-enum-macro"
82 | version = "0.2.0"
83 | source = "registry+https://github.com/rust-lang/crates.io-index"
84 | checksum = "300e4bdb6b46b592948e700ea1ef24a4296491f6a0ee722b258040abd15a3714"
85 |
86 | [[package]]
87 | name = "paste"
88 | version = "1.0.9"
89 | source = "registry+https://github.com/rust-lang/crates.io-index"
90 | checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1"
91 |
92 | [[package]]
93 | name = "raw-cpuid"
94 | version = "10.6.0"
95 | source = "registry+https://github.com/rust-lang/crates.io-index"
96 | checksum = "a6823ea29436221176fe662da99998ad3b4db2c7f31e7b6f5fe43adccd6320bb"
97 | dependencies = [
98 | "bitflags",
99 | ]
100 |
101 | [[package]]
102 | name = "rustversion"
103 | version = "1.0.9"
104 | source = "registry+https://github.com/rust-lang/crates.io-index"
105 | checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8"
106 |
107 | [[package]]
108 | name = "rvm"
109 | version = "0.1.0"
110 | dependencies = [
111 | "bit_field",
112 | "bitflags",
113 | "cfg-if",
114 | "log",
115 | "numeric-enum-macro",
116 | "raw-cpuid",
117 | "x86",
118 | "x86_64",
119 | ]
120 |
121 | [[package]]
122 | name = "rvm-hypervisor"
123 | version = "0.1.0"
124 | dependencies = [
125 | "bitflags",
126 | "bitmap-allocator",
127 | "buddy_system_allocator",
128 | "cfg-if",
129 | "lazy_static",
130 | "log",
131 | "raw-cpuid",
132 | "rvm",
133 | "spin 0.9.4",
134 | "x2apic",
135 | "x86",
136 | "x86_64",
137 | ]
138 |
139 | [[package]]
140 | name = "scopeguard"
141 | version = "1.1.0"
142 | source = "registry+https://github.com/rust-lang/crates.io-index"
143 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
144 |
145 | [[package]]
146 | name = "spin"
147 | version = "0.5.2"
148 | source = "registry+https://github.com/rust-lang/crates.io-index"
149 | checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
150 |
151 | [[package]]
152 | name = "spin"
153 | version = "0.7.1"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "13287b4da9d1207a4f4929ac390916d64eacfe236a487e9a9f5b3be392be5162"
156 |
157 | [[package]]
158 | name = "spin"
159 | version = "0.9.4"
160 | source = "registry+https://github.com/rust-lang/crates.io-index"
161 | checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09"
162 | dependencies = [
163 | "lock_api",
164 | ]
165 |
166 | [[package]]
167 | name = "volatile"
168 | version = "0.4.5"
169 | source = "registry+https://github.com/rust-lang/crates.io-index"
170 | checksum = "e3ca98349dda8a60ae74e04fd90c7fb4d6a4fbe01e6d3be095478aa0b76f6c0c"
171 |
172 | [[package]]
173 | name = "x2apic"
174 | version = "0.4.1"
175 | source = "registry+https://github.com/rust-lang/crates.io-index"
176 | checksum = "32b6a3e030cfc71d614954e1de6dcb09e40bf1437f620c27b4526f978bee912e"
177 | dependencies = [
178 | "bit",
179 | "bitflags",
180 | "paste",
181 | "raw-cpuid",
182 | "x86_64",
183 | ]
184 |
185 | [[package]]
186 | name = "x86"
187 | version = "0.52.0"
188 | source = "registry+https://github.com/rust-lang/crates.io-index"
189 | checksum = "2781db97787217ad2a2845c396a5efe286f87467a5810836db6d74926e94a385"
190 | dependencies = [
191 | "bit_field",
192 | "bitflags",
193 | "raw-cpuid",
194 | ]
195 |
196 | [[package]]
197 | name = "x86_64"
198 | version = "0.14.10"
199 | source = "registry+https://github.com/rust-lang/crates.io-index"
200 | checksum = "100555a863c0092238c2e0e814c1096c1e5cf066a309c696a87e907b5f8c5d69"
201 | dependencies = [
202 | "bit_field",
203 | "bitflags",
204 | "rustversion",
205 | "volatile",
206 | ]
207 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/definitions.rs:
--------------------------------------------------------------------------------
1 | use core::fmt::{Debug, Formatter, Result};
2 |
3 | /// VM instruction error numbers. (SDM Vol. 3C, Section 30.4)
4 | pub struct VmxInstructionError(u32);
5 |
6 | impl VmxInstructionError {
7 | pub fn as_str(&self) -> &str {
8 | match self.0 {
9 | 0 => "OK",
10 | 1 => "VMCALL executed in VMX root operation",
11 | 2 => "VMCLEAR with invalid physical address",
12 | 3 => "VMCLEAR with VMXON pointer",
13 | 4 => "VMLAUNCH with non-clear VMCS",
14 | 5 => "VMRESUME with non-launched VMCS",
15 | 6 => "VMRESUME after VMXOFF (VMXOFF and VMXON between VMLAUNCH and VMRESUME)",
16 | 7 => "VM entry with invalid control field(s)",
17 | 8 => "VM entry with invalid host-state field(s)",
18 | 9 => "VMPTRLD with invalid physical address",
19 | 10 => "VMPTRLD with VMXON pointer",
20 | 11 => "VMPTRLD with incorrect VMCS revision identifier",
21 | 12 => "VMREAD/VMWRITE from/to unsupported VMCS component",
22 | 13 => "VMWRITE to read-only VMCS component",
23 | 15 => "VMXON executed in VMX root operation",
24 | 16 => "VM entry with invalid executive-VMCS pointer",
25 | 17 => "VM entry with non-launched executive VMCS",
26 | 18 => "VM entry with executive-VMCS pointer not VMXON pointer (when attempting to deactivate the dual-monitor treatment of SMIs and SMM)",
27 | 19 => "VMCALL with non-clear VMCS (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
28 | 20 => "VMCALL with invalid VM-exit control fields",
29 | 22 => "VMCALL with incorrect MSEG revision identifier (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
30 | 23 => "VMXOFF under dual-monitor treatment of SMIs and SMM",
31 | 24 => "VMCALL with invalid SMM-monitor features (when attempting to activate the dual-monitor treatment of SMIs and SMM)",
32 | 25 => "VM entry with invalid VM-execution control fields in executive VMCS (when attempting to return from SMM)",
33 | 26 => "VM entry with events blocked by MOV SS",
34 | 28 => "Invalid operand to INVEPT/INVVPID",
35 | _ => "[INVALID]",
36 | }
37 | }
38 | }
39 |
40 | impl From for VmxInstructionError {
41 | fn from(value: u32) -> Self {
42 | Self(value)
43 | }
44 | }
45 |
46 | impl Debug for VmxInstructionError {
47 | fn fmt(&self, f: &mut Formatter) -> Result {
48 | write!(f, "VmxInstructionError({}, {:?})", self.0, self.as_str())
49 | }
50 | }
51 |
52 | numeric_enum_macro::numeric_enum! {
53 | #[repr(u32)]
54 | #[derive(Debug, Copy, Clone, Eq, PartialEq)]
55 | #[allow(non_camel_case_types)]
56 | /// VMX basic exit reasons. (SDM Vol. 3D, Appendix C)
57 | pub enum VmxExitReason {
58 | EXCEPTION_NMI = 0,
59 | EXTERNAL_INTERRUPT = 1,
60 | TRIPLE_FAULT = 2,
61 | INIT = 3,
62 | SIPI = 4,
63 | SMI = 5,
64 | OTHER_SMI = 6,
65 | INTERRUPT_WINDOW = 7,
66 | NMI_WINDOW = 8,
67 | TASK_SWITCH = 9,
68 | CPUID = 10,
69 | GETSEC = 11,
70 | HLT = 12,
71 | INVD = 13,
72 | INVLPG = 14,
73 | RDPMC = 15,
74 | RDTSC = 16,
75 | RSM = 17,
76 | VMCALL = 18,
77 | VMCLEAR = 19,
78 | VMLAUNCH = 20,
79 | VMPTRLD = 21,
80 | VMPTRST = 22,
81 | VMREAD = 23,
82 | VMRESUME = 24,
83 | VMWRITE = 25,
84 | VMOFF = 26,
85 | VMON = 27,
86 | CR_ACCESS = 28,
87 | DR_ACCESS = 29,
88 | IO_INSTRUCTION = 30,
89 | MSR_READ = 31,
90 | MSR_WRITE = 32,
91 | INVALID_GUEST_STATE = 33,
92 | MSR_LOAD_FAIL = 34,
93 | MWAIT_INSTRUCTION = 36,
94 | MONITOR_TRAP_FLAG = 37,
95 | MONITOR_INSTRUCTION = 39,
96 | PAUSE_INSTRUCTION = 40,
97 | MCE_DURING_VMENTRY = 41,
98 | TPR_BELOW_THRESHOLD = 43,
99 | APIC_ACCESS = 44,
100 | VIRTUALIZED_EOI = 45,
101 | GDTR_IDTR = 46,
102 | LDTR_TR = 47,
103 | EPT_VIOLATION = 48,
104 | EPT_MISCONFIG = 49,
105 | INVEPT = 50,
106 | RDTSCP = 51,
107 | PREEMPTION_TIMER = 52,
108 | INVVPID = 53,
109 | WBINVD = 54,
110 | XSETBV = 55,
111 | APIC_WRITE = 56,
112 | RDRAND = 57,
113 | INVPCID = 58,
114 | VMFUNC = 59,
115 | ENCLS = 60,
116 | RDSEED = 61,
117 | PML_FULL = 62,
118 | XSAVES = 63,
119 | XRSTORS = 64,
120 | PCONFIG = 65,
121 | SPP_EVENT = 66,
122 | UMWAIT = 67,
123 | TPAUSE = 68,
124 | LOADIWKEY = 69,
125 | }
126 | }
127 |
128 | numeric_enum_macro::numeric_enum! {
129 | #[repr(u8)]
130 | #[derive(Debug, Copy, Clone, Eq, PartialEq)]
131 | /// The interruption type (bits 10:8) in VM-Entry Interruption-Information Field
132 | /// and VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2)
133 | pub enum VmxInterruptionType {
134 | /// External interrupt
135 | External = 0,
136 | /// Reserved
137 | Reserved = 1,
138 | /// Non-maskable interrupt (NMI)
139 | NMI = 2,
140 | /// Hardware exception (e.g,. #PF)
141 | HardException = 3,
142 | /// Software interrupt (INT n)
143 | SoftIntr = 4,
144 | /// Privileged software exception (INT1)
145 | PrivSoftException = 5,
146 | /// Software exception (INT3 or INTO)
147 | SoftException = 6,
148 | /// Other event
149 | Other = 7,
150 | }
151 | }
152 |
153 | impl VmxInterruptionType {
154 | /// Whether the exception/interrupt with `vector` has an error code.
155 | pub const fn vector_has_error_code(vector: u8) -> bool {
156 | use x86::irq::*;
157 | matches!(
158 | vector,
159 | DOUBLE_FAULT_VECTOR
160 | | INVALID_TSS_VECTOR
161 | | SEGMENT_NOT_PRESENT_VECTOR
162 | | STACK_SEGEMENT_FAULT_VECTOR
163 | | GENERAL_PROTECTION_FAULT_VECTOR
164 | | PAGE_FAULT_VECTOR
165 | | ALIGNMENT_CHECK_VECTOR
166 | )
167 | }
168 |
169 | /// Determine interruption type by the interrupt vector.
170 | pub const fn from_vector(vector: u8) -> Self {
171 | // SDM Vol. 3C, Section 24.8.3
172 | use x86::irq::*;
173 | match vector {
174 | DEBUG_VECTOR => Self::PrivSoftException,
175 | NONMASKABLE_INTERRUPT_VECTOR => Self::NMI,
176 | BREAKPOINT_VECTOR | OVERFLOW_VECTOR => Self::SoftException,
177 | // SDM Vol. 3A, Section 6.15: All other vectors from 0 to 21 are exceptions.
178 | 0..=VIRTUALIZATION_VECTOR => Self::HardException,
179 | 32..=255 => Self::External,
180 | _ => Self::Other,
181 | }
182 | }
183 |
184 | /// For software interrupt, software exception, or privileged software
185 | /// exception,we need to set VM-Entry Instruction Length Field.
186 | pub const fn is_soft(&self) -> bool {
187 | matches!(
188 | *self,
189 | Self::SoftIntr | Self::SoftException | Self::PrivSoftException
190 | )
191 | }
192 | }
193 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/structs.rs:
--------------------------------------------------------------------------------
1 | use bit_field::BitField;
2 | use bitflags::bitflags;
3 |
4 | use crate::arch::msr::{Msr, MsrReadWrite};
5 | use crate::mm::{PhysFrame, PAGE_SIZE};
6 | use crate::{HostPhysAddr, RvmHal, RvmResult};
7 |
8 | /// VMCS/VMXON region in 4K size. (SDM Vol. 3C, Section 24.2)
9 | #[derive(Debug)]
10 | pub struct VmxRegion {
11 | frame: PhysFrame,
12 | }
13 |
14 | impl VmxRegion {
15 | pub const unsafe fn uninit() -> Self {
16 | Self {
17 | frame: PhysFrame::uninit(),
18 | }
19 | }
20 |
21 | pub fn new(revision_id: u32, shadow_indicator: bool) -> RvmResult {
22 | let frame = PhysFrame::alloc_zero()?;
23 | unsafe {
24 | (*(frame.as_mut_ptr() as *mut u32))
25 | .set_bits(0..=30, revision_id)
26 | .set_bit(31, shadow_indicator);
27 | }
28 | Ok(Self { frame })
29 | }
30 |
31 | pub fn phys_addr(&self) -> HostPhysAddr {
32 | self.frame.start_paddr()
33 | }
34 | }
35 |
36 | #[derive(Debug)]
37 | pub struct MsrBitmap {
38 | frame: PhysFrame,
39 | }
40 |
41 | impl MsrBitmap {
42 | pub fn passthrough_all() -> RvmResult {
43 | Ok(Self {
44 | frame: PhysFrame::alloc_zero()?,
45 | })
46 | }
47 |
48 | #[allow(unused)]
49 | pub fn intercept_all() -> RvmResult {
50 | let mut frame = PhysFrame::alloc()?;
51 | frame.fill(u8::MAX);
52 | Ok(Self { frame })
53 | }
54 |
55 | pub fn phys_addr(&self) -> HostPhysAddr {
56 | self.frame.start_paddr()
57 | }
58 |
59 | fn set_intercept(&mut self, msr: u32, is_write: bool, intercept: bool) {
60 | let offset = if msr <= 0x1fff {
61 | if !is_write {
62 | 0 // Read bitmap for low MSRs (0x0000_0000..0x0000_1FFF)
63 | } else {
64 | 2 // Write bitmap for low MSRs (0x0000_0000..0x0000_1FFF)
65 | }
66 | } else if (0xc000_0000..=0xc000_1fff).contains(&msr) {
67 | if !is_write {
68 | 1 // Read bitmap for high MSRs (0xC000_0000..0xC000_1FFF)
69 | } else {
70 | 3 // Write bitmap for high MSRs (0xC000_0000..0xC000_1FFF)
71 | }
72 | } else {
73 | unreachable!()
74 | } * 1024;
75 | let bitmap =
76 | unsafe { core::slice::from_raw_parts_mut(self.frame.as_mut_ptr().add(offset), 1024) };
77 | let msr = msr & 0x1fff;
78 | let byte = (msr / 8) as usize;
79 | let bits = msr % 8;
80 | if intercept {
81 | bitmap[byte] |= 1 << bits;
82 | } else {
83 | bitmap[byte] &= !(1 << bits);
84 | }
85 | }
86 |
87 | pub fn set_read_intercept(&mut self, msr: u32, intercept: bool) {
88 | self.set_intercept(msr, false, intercept);
89 | }
90 |
91 | pub fn set_write_intercept(&mut self, msr: u32, intercept: bool) {
92 | self.set_intercept(msr, true, intercept);
93 | }
94 | }
95 |
96 | /// Reporting Register of Basic VMX Capabilities. (SDM Vol. 3D, Appendix A.1)
97 | #[derive(Debug)]
98 | pub struct VmxBasic {
99 | /// The 31-bit VMCS revision identifier used by the processor.
100 | pub revision_id: u32,
101 | /// The number of bytes that software should allocate for the VMXON region
102 | /// and any VMCS region.
103 | pub region_size: u16,
104 | /// The width of the physical addresses that may be used for the VMXON
105 | /// region, each VMCS, and data structures referenced by pointers in a VMCS.
106 | pub is_32bit_address: bool,
107 | /// The memory type that should be used for the VMCS, for data structures
108 | /// referenced by pointers in the VMCS.
109 | pub mem_type: u8,
110 | /// The processor reports information in the VM-exit instruction-information
111 | /// field on VM exits due to execution of the INS and OUTS instructions.
112 | pub io_exit_info: bool,
113 | /// If any VMX controls that default to 1 may be cleared to 0.
114 | pub vmx_flex_controls: bool,
115 | }
116 |
117 | impl MsrReadWrite for VmxBasic {
118 | const MSR: Msr = Msr::IA32_VMX_BASIC;
119 | }
120 |
121 | impl VmxBasic {
122 | pub const VMX_MEMORY_TYPE_WRITE_BACK: u8 = 6;
123 |
124 | /// Read the current IA32_VMX_BASIC flags.
125 | pub fn read() -> Self {
126 | let msr = Self::read_raw();
127 | Self {
128 | revision_id: msr.get_bits(0..31) as u32,
129 | region_size: msr.get_bits(32..45) as u16,
130 | is_32bit_address: msr.get_bit(48),
131 | mem_type: msr.get_bits(50..54) as u8,
132 | io_exit_info: msr.get_bit(54),
133 | vmx_flex_controls: msr.get_bit(55),
134 | }
135 | }
136 | }
137 |
138 | bitflags! {
139 | /// IA32_FEATURE_CONTROL flags.
140 | pub struct FeatureControlFlags: u64 {
141 | /// Lock bit: when set, locks this MSR from being written. when clear,
142 | /// VMXON causes a #GP.
143 | const LOCKED = 1 << 0;
144 | /// Enable VMX inside SMX operation.
145 | const VMXON_ENABLED_INSIDE_SMX = 1 << 1;
146 | /// Enable VMX outside SMX operation.
147 | const VMXON_ENABLED_OUTSIDE_SMX = 1 << 2;
148 | }
149 | }
150 |
151 | /// Control Features in Intel 64 Processor. (SDM Vol. 3C, Section 23.7)
152 | pub struct FeatureControl;
153 |
154 | impl MsrReadWrite for FeatureControl {
155 | const MSR: Msr = Msr::IA32_FEATURE_CONTROL;
156 | }
157 |
158 | impl FeatureControl {
159 | /// Read the current IA32_FEATURE_CONTROL flags.
160 | pub fn read() -> FeatureControlFlags {
161 | FeatureControlFlags::from_bits_truncate(Self::read_raw())
162 | }
163 |
164 | /// Write IA32_FEATURE_CONTROL flags, preserving reserved values.
165 | pub fn write(flags: FeatureControlFlags) {
166 | let old_value = Self::read_raw();
167 | let reserved = old_value & !(FeatureControlFlags::all().bits());
168 | let new_value = reserved | flags.bits();
169 | unsafe { Self::write_raw(new_value) };
170 | }
171 | }
172 |
173 | bitflags! {
174 | /// Extended-Page-Table Pointer. (SDM Vol. 3C, Section 24.6.11)
175 | pub struct EPTPointer: u64 {
176 | /// EPT paging-structure memory type: Uncacheable (UC).
177 | #[allow(clippy::identity_op)]
178 | const MEM_TYPE_UC = 0 << 0;
179 | /// EPT paging-structure memory type: Write-back (WB).
180 | #[allow(clippy::identity_op)]
181 | const MEM_TYPE_WB = 6 << 0;
182 | /// EPT page-walk length 1.
183 | const WALK_LENGTH_1 = 0 << 3;
184 | /// EPT page-walk length 2.
185 | const WALK_LENGTH_2 = 1 << 3;
186 | /// EPT page-walk length 3.
187 | const WALK_LENGTH_3 = 2 << 3;
188 | /// EPT page-walk length 4.
189 | const WALK_LENGTH_4 = 3 << 3;
190 | /// Setting this control to 1 enables accessed and dirty flags for EPT.
191 | const ENABLE_ACCESSED_DIRTY = 1 << 6;
192 | }
193 | }
194 |
195 | impl EPTPointer {
196 | pub fn from_table_phys(pml4_paddr: HostPhysAddr) -> Self {
197 | let aligned_addr = pml4_paddr & !(PAGE_SIZE - 1);
198 | let flags = unsafe { Self::from_bits_unchecked(aligned_addr as u64) };
199 | flags | Self::MEM_TYPE_WB | Self::WALK_LENGTH_4 | Self::ENABLE_ACCESSED_DIRTY
200 | }
201 | }
202 |
--------------------------------------------------------------------------------
/hypervisor/src/hv/vmexit.rs:
--------------------------------------------------------------------------------
1 | use super::device_emu::{self, VirtLocalApic};
2 | use super::hal::RvmHalImpl;
3 | use rvm::arch::{VmxExitInfo, VmxExitReason};
4 | use rvm::{RvmError, RvmResult, RvmVcpu};
5 |
6 | type Vcpu = RvmVcpu;
7 |
8 | const VM_EXIT_INSTR_LEN_CPUID: u8 = 2;
9 | const VM_EXIT_INSTR_LEN_RDMSR: u8 = 2;
10 | const VM_EXIT_INSTR_LEN_WRMSR: u8 = 2;
11 | const VM_EXIT_INSTR_LEN_VMCALL: u8 = 3;
12 |
13 | fn handle_external_interrupt(vcpu: &mut Vcpu) -> RvmResult {
14 | let int_info = vcpu.interrupt_exit_info()?;
15 | trace!("VM-exit: external interrupt: {:#x?}", int_info);
16 | assert!(int_info.valid);
17 | crate::arch::handle_irq(int_info.vector);
18 | Ok(())
19 | }
20 |
21 | fn handle_cpuid(vcpu: &mut Vcpu) -> RvmResult {
22 | use raw_cpuid::{cpuid, CpuIdResult};
23 |
24 | const LEAF_FEATURE_INFO: u32 = 0x1;
25 | const LEAF_HYPERVISOR_INFO: u32 = 0x4000_0000;
26 | const LEAF_HYPERVISOR_FEATURE: u32 = 0x4000_0001;
27 | const VENDOR_STR: &[u8; 12] = b"RVMRVMRVMRVM";
28 | let vendor_regs = unsafe { &*(VENDOR_STR.as_ptr() as *const [u32; 3]) };
29 |
30 | let regs = vcpu.regs_mut();
31 | let function = regs.rax as u32;
32 | let res = match function {
33 | LEAF_FEATURE_INFO => {
34 | const FEATURE_VMX: u32 = 1 << 5;
35 | const FEATURE_HYPERVISOR: u32 = 1 << 31;
36 | let mut res = cpuid!(regs.rax, regs.rcx);
37 | res.ecx &= !FEATURE_VMX;
38 | res.ecx |= FEATURE_HYPERVISOR;
39 | res
40 | }
41 | LEAF_HYPERVISOR_INFO => CpuIdResult {
42 | eax: LEAF_HYPERVISOR_FEATURE,
43 | ebx: vendor_regs[0],
44 | ecx: vendor_regs[1],
45 | edx: vendor_regs[2],
46 | },
47 | LEAF_HYPERVISOR_FEATURE => CpuIdResult {
48 | eax: 0,
49 | ebx: 0,
50 | ecx: 0,
51 | edx: 0,
52 | },
53 | _ => cpuid!(regs.rax, regs.rcx),
54 | };
55 |
56 | debug!(
57 | "VM exit: CPUID({:#x}, {:#x}): {:?}",
58 | regs.rax, regs.rcx, res
59 | );
60 | regs.rax = res.eax as _;
61 | regs.rbx = res.ebx as _;
62 | regs.rcx = res.ecx as _;
63 | regs.rdx = res.edx as _;
64 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_CPUID)?;
65 | Ok(())
66 | }
67 |
68 | fn handle_hypercall(vcpu: &mut Vcpu) -> RvmResult {
69 | let regs = vcpu.regs();
70 | info!(
71 | "VM exit: VMCALL({:#x}): {:?}",
72 | regs.rax,
73 | [regs.rdi, regs.rsi, regs.rdx, regs.rcx]
74 | );
75 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_VMCALL)?;
76 | Ok(())
77 | }
78 |
79 | fn handle_io_instruction(vcpu: &mut Vcpu, exit_info: &VmxExitInfo) -> RvmResult {
80 | let io_info = vcpu.io_exit_info()?;
81 | trace!(
82 | "VM exit: I/O instruction @ {:#x}: {:#x?}",
83 | exit_info.guest_rip,
84 | io_info,
85 | );
86 | if io_info.is_string {
87 | error!("INS/OUTS instructions are not supported!");
88 | return Err(RvmError::Unsupported);
89 | }
90 | if io_info.is_repeat {
91 | error!("REP prefixed I/O instructions are not supported!");
92 | return Err(RvmError::Unsupported);
93 | }
94 |
95 | if let Some(dev) = device_emu::all_virt_devices().find_port_io_device(io_info.port) {
96 | if io_info.is_in {
97 | let value = dev.read(io_info.port, io_info.access_size)?;
98 | let rax = &mut vcpu.regs_mut().rax;
99 | // SDM Vol. 1, Section 3.4.1.1:
100 | // * 32-bit operands generate a 32-bit result, zero-extended to a 64-bit result in the
101 | // destination general-purpose register.
102 | // * 8-bit and 16-bit operands generate an 8-bit or 16-bit result. The upper 56 bits or
103 | // 48 bits (respectively) of the destination general-purpose register are not modified
104 | // by the operation.
105 | match io_info.access_size {
106 | 1 => *rax = (*rax & !0xff) | (value & 0xff) as u64,
107 | 2 => *rax = (*rax & !0xffff) | (value & 0xffff) as u64,
108 | 4 => *rax = value as u64,
109 | _ => unreachable!(),
110 | }
111 | } else {
112 | let rax = vcpu.regs().rax;
113 | let value = match io_info.access_size {
114 | 1 => rax & 0xff,
115 | 2 => rax & 0xffff,
116 | 4 => rax,
117 | _ => unreachable!(),
118 | } as u32;
119 | dev.write(io_info.port, io_info.access_size, value)?;
120 | }
121 | } else {
122 | panic!(
123 | "Unsupported I/O port {:#x} access: {:#x?}",
124 | io_info.port, io_info
125 | )
126 | }
127 | vcpu.advance_rip(exit_info.exit_instruction_length as _)?;
128 | Ok(())
129 | }
130 |
131 | fn handle_msr_read(vcpu: &mut Vcpu) -> RvmResult {
132 | let msr = vcpu.regs().rcx as u32;
133 |
134 | use x86::msr::*;
135 | let res = if msr == IA32_APIC_BASE {
136 | let mut apic_base = unsafe { rdmsr(IA32_APIC_BASE) };
137 | apic_base |= 1 << 11 | 1 << 10; // enable xAPIC and x2APIC
138 | Ok(apic_base)
139 | } else if VirtLocalApic::msr_range().contains(&msr) {
140 | VirtLocalApic::rdmsr(vcpu, msr)
141 | } else {
142 | Err(RvmError::Unsupported)
143 | };
144 |
145 | if let Ok(value) = res {
146 | debug!("VM exit: RDMSR({:#x}) -> {:#x}", msr, value);
147 | vcpu.regs_mut().rax = value & 0xffff_ffff;
148 | vcpu.regs_mut().rdx = value >> 32;
149 | } else {
150 | panic!("Failed to handle RDMSR({:#x}): {:?}", msr, res);
151 | }
152 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_RDMSR)?;
153 | Ok(())
154 | }
155 |
156 | fn handle_msr_write(vcpu: &mut Vcpu) -> RvmResult {
157 | let msr = vcpu.regs().rcx as u32;
158 | let value = (vcpu.regs().rax & 0xffff_ffff) | (vcpu.regs().rdx << 32);
159 | debug!("VM exit: WRMSR({:#x}) <- {:#x}", msr, value);
160 |
161 | use x86::msr::*;
162 | let res = if msr == IA32_APIC_BASE {
163 | Ok(()) // ignore
164 | } else if VirtLocalApic::msr_range().contains(&msr) {
165 | VirtLocalApic::wrmsr(vcpu, msr, value)
166 | } else {
167 | Err(RvmError::Unsupported)
168 | };
169 |
170 | if res.is_err() {
171 | panic!(
172 | "Failed to handle WRMSR({:#x}) <- {:#x}: {:?}",
173 | msr, value, res
174 | );
175 | }
176 | vcpu.advance_rip(VM_EXIT_INSTR_LEN_WRMSR)?;
177 | Ok(())
178 | }
179 |
180 | fn handle_ept_violation(vcpu: &Vcpu, guest_rip: usize) -> RvmResult {
181 | let fault_info = vcpu.nested_page_fault_info()?;
182 | panic!(
183 | "VM exit: EPT violation @ {:#x}, fault_paddr={:#x}, access_flags=({:?})",
184 | guest_rip, fault_info.fault_guest_paddr, fault_info.access_flags
185 | );
186 | }
187 |
188 | pub fn vmexit_handler(vcpu: &mut Vcpu) -> RvmResult {
189 | let exit_info = vcpu.exit_info()?;
190 | trace!("VM exit: {:#x?}", exit_info);
191 |
192 | if exit_info.entry_failure {
193 | panic!("VM entry failed: {:#x?}", exit_info);
194 | }
195 |
196 | let res = match exit_info.exit_reason {
197 | VmxExitReason::EXTERNAL_INTERRUPT => handle_external_interrupt(vcpu),
198 | VmxExitReason::INTERRUPT_WINDOW => vcpu.set_interrupt_window(false),
199 | VmxExitReason::CPUID => handle_cpuid(vcpu),
200 | VmxExitReason::VMCALL => handle_hypercall(vcpu),
201 | VmxExitReason::IO_INSTRUCTION => handle_io_instruction(vcpu, &exit_info),
202 | VmxExitReason::MSR_READ => handle_msr_read(vcpu),
203 | VmxExitReason::MSR_WRITE => handle_msr_write(vcpu),
204 | VmxExitReason::EPT_VIOLATION => handle_ept_violation(vcpu, exit_info.guest_rip),
205 | _ => panic!(
206 | "Unhandled VM-Exit reason {:?}:\n{:#x?}",
207 | exit_info.exit_reason, vcpu
208 | ),
209 | };
210 |
211 | if res.is_err() {
212 | panic!(
213 | "Failed to handle VM-exit {:?}:\n{:#x?}",
214 | exit_info.exit_reason, vcpu
215 | );
216 | }
217 |
218 | Ok(())
219 | }
220 |
--------------------------------------------------------------------------------
/rvm/src/mm/page_table.rs:
--------------------------------------------------------------------------------
1 | use alloc::{vec, vec::Vec};
2 | use core::{fmt::Debug, marker::PhantomData};
3 |
4 | use super::{MemFlags, PhysFrame, PAGE_SIZE};
5 | use crate::{RvmHal, RvmResult};
6 |
7 | const LEVELS: usize = 4;
8 | const ENTRY_COUNT: usize = 512;
9 |
10 | type VirtAddr = super::GuestPhysAddr;
11 | type PhysAddr = super::HostPhysAddr;
12 |
13 | const fn p4_index(vaddr: VirtAddr) -> usize {
14 | (vaddr >> (12 + 27)) & (ENTRY_COUNT - 1)
15 | }
16 |
17 | const fn p3_index(vaddr: VirtAddr) -> usize {
18 | (vaddr >> (12 + 18)) & (ENTRY_COUNT - 1)
19 | }
20 |
21 | const fn p2_index(vaddr: VirtAddr) -> usize {
22 | (vaddr >> (12 + 9)) & (ENTRY_COUNT - 1)
23 | }
24 |
25 | const fn p1_index(vaddr: VirtAddr) -> usize {
26 | (vaddr >> 12) & (ENTRY_COUNT - 1)
27 | }
28 |
29 | const fn align_down(addr: usize) -> usize {
30 | addr & !(PAGE_SIZE - 1)
31 | }
32 |
33 | const fn page_offset(addr: usize) -> usize {
34 | addr & (PAGE_SIZE - 1)
35 | }
36 |
37 | pub trait GenericPTE: Debug + Clone + Copy + Sync + Send + Sized {
38 | // Create a page table entry point to a terminate 4K-sized page or a huge page.
39 | fn new_page(paddr: PhysAddr, flags: MemFlags, is_huge: bool) -> Self;
40 | // Create a page table entry point to a next level page table.
41 | fn new_table(paddr: PhysAddr) -> Self;
42 |
43 | /// Returns the physical address mapped by this entry.
44 | fn paddr(&self) -> PhysAddr;
45 | /// Returns the flags of this entry.
46 | fn flags(&self) -> MemFlags;
47 | /// Returns whether this entry is zero.
48 | fn is_unused(&self) -> bool;
49 | /// Returns whether this entry flag indicates present.
50 | fn is_present(&self) -> bool;
51 | /// For non-last level translation, returns whether this entry maps to a
52 | /// huge frame.
53 | fn is_huge(&self) -> bool;
54 | /// Set this entry to zero.
55 | fn clear(&mut self);
56 | }
57 |
58 | /// A generic 4-level page table structures.
59 | pub struct Level4PageTable {
60 | root_paddr: PhysAddr,
61 | intrm_tables: Vec>,
62 | _phantom: PhantomData,
63 | }
64 |
65 | impl Level4PageTable {
66 | /// Create a page table instance.
67 | pub fn new() -> RvmResult {
68 | let root_frame = PhysFrame::alloc_zero()?;
69 | Ok(Self {
70 | root_paddr: root_frame.start_paddr(),
71 | intrm_tables: vec![root_frame],
72 | _phantom: PhantomData,
73 | })
74 | }
75 |
76 | /// Physical address of the page table root.
77 | pub fn root_paddr(&self) -> PhysAddr {
78 | self.root_paddr
79 | }
80 |
81 | /// Create a mapping from the virtual address `vaddr` to the physical address
82 | /// `paddr`, with memory permissions and types described by `flags`.
83 | pub fn map(&mut self, vaddr: VirtAddr, paddr: PhysAddr, flags: MemFlags) -> RvmResult {
84 | let entry = self.get_entry_mut_or_create(vaddr)?;
85 | if !entry.is_unused() {
86 | return rvm_err!(
87 | InvalidParam,
88 | format_args!("try to map an already mapped page {:#x}", vaddr)
89 | );
90 | }
91 | *entry = GenericPTE::new_page(align_down(paddr), flags, false);
92 | Ok(())
93 | }
94 |
95 | /// Remove mappings for the virtual address `vaddr`.
96 | pub fn unmap(&mut self, vaddr: VirtAddr) -> RvmResult {
97 | let entry = self.get_entry_mut(vaddr)?;
98 | if entry.is_unused() {
99 | return rvm_err!(
100 | InvalidParam,
101 | format_args!("try to unmap an unmapped page {:#x}", vaddr)
102 | );
103 | }
104 | let paddr = entry.paddr();
105 | entry.clear();
106 | Ok(paddr)
107 | }
108 |
109 | /// Query the mapping target for the virtual address `vaddr`, return the
110 | /// target physical address and memory permissions.
111 | pub fn query(&self, vaddr: VirtAddr) -> RvmResult<(PhysAddr, MemFlags)> {
112 | let entry = self.get_entry_mut(vaddr)?;
113 | if entry.is_unused() {
114 | return rvm_err!(
115 | InvalidParam,
116 | format_args!("queried page {:#x} is not mapped", vaddr)
117 | );
118 | }
119 | let off = page_offset(vaddr);
120 | Ok((entry.paddr() + off, entry.flags()))
121 | }
122 |
123 | /// Update the mapping target for the virtual address `vaddr`.
124 | pub fn update(
125 | &mut self,
126 | vaddr: VirtAddr,
127 | paddr: Option,
128 | flags: Option,
129 | ) -> RvmResult {
130 | let entry = self.get_entry_mut(vaddr)?;
131 | let paddr = align_down(paddr.unwrap_or_else(|| entry.paddr()));
132 | let flags = flags.unwrap_or_else(|| entry.flags());
133 | *entry = GenericPTE::new_page(paddr, flags, entry.is_huge());
134 | Ok(())
135 | }
136 |
137 | /// Print the page table contents recursively for debugging.
138 | pub fn dump(&self, limit: usize) {
139 | info!("Root: {:x?}", self.root_paddr());
140 | self.walk(
141 | self.table_of(self.root_paddr()),
142 | 0,
143 | 0,
144 | limit,
145 | &|level: usize, idx: usize, vaddr: VirtAddr, entry: &PTE| {
146 | for _ in 0..level {
147 | info!(" ");
148 | }
149 | info!("[{} - {:x}], 0x{:08x?}: {:x?}", level, idx, vaddr, entry);
150 | },
151 | );
152 | }
153 | }
154 |
155 | impl Level4PageTable {
156 | fn table_of<'a>(&self, paddr: PhysAddr) -> &'a [PTE] {
157 | let ptr = H::phys_to_virt(paddr) as *const PTE;
158 | unsafe { core::slice::from_raw_parts(ptr, ENTRY_COUNT) }
159 | }
160 |
161 | fn table_of_mut<'a>(&self, paddr: PhysAddr) -> &'a mut [PTE] {
162 | let ptr = H::phys_to_virt(paddr) as *mut PTE;
163 | unsafe { core::slice::from_raw_parts_mut(ptr, ENTRY_COUNT) }
164 | }
165 |
166 | fn next_table_mut<'a>(&self, entry: &PTE) -> RvmResult<&'a mut [PTE]> {
167 | if !entry.is_present() {
168 | rvm_err!(BadState, "next table entry not present")
169 | } else if entry.is_huge() {
170 | rvm_err!(BadState, "next table entry is huge")
171 | } else {
172 | Ok(self.table_of_mut(entry.paddr()))
173 | }
174 | }
175 |
176 | fn next_table_mut_or_create<'a>(&mut self, entry: &mut PTE) -> RvmResult<&'a mut [PTE]> {
177 | if entry.is_unused() {
178 | let paddr = self.alloc_intrm_table()?;
179 | *entry = GenericPTE::new_table(paddr);
180 | Ok(self.table_of_mut(paddr))
181 | } else {
182 | self.next_table_mut(entry)
183 | }
184 | }
185 |
186 | fn alloc_intrm_table(&mut self) -> RvmResult {
187 | let frame = PhysFrame::alloc_zero()?;
188 | let paddr = frame.start_paddr();
189 | self.intrm_tables.push(frame);
190 | Ok(paddr)
191 | }
192 |
193 | fn get_entry_mut(&self, vaddr: VirtAddr) -> RvmResult<&mut PTE> {
194 | let p4 = self.table_of_mut(self.root_paddr());
195 | let p4e = &mut p4[p4_index(vaddr)];
196 |
197 | let p3 = self.next_table_mut(p4e)?;
198 | let p3e = &mut p3[p3_index(vaddr)];
199 |
200 | let p2 = self.next_table_mut(p3e)?;
201 | let p2e = &mut p2[p2_index(vaddr)];
202 |
203 | let p1 = self.next_table_mut(p2e)?;
204 | let p1e = &mut p1[p1_index(vaddr)];
205 | Ok(p1e)
206 | }
207 |
208 | fn get_entry_mut_or_create(&mut self, vaddr: VirtAddr) -> RvmResult<&mut PTE> {
209 | let p4 = self.table_of_mut(self.root_paddr());
210 | let p4e = &mut p4[p4_index(vaddr)];
211 |
212 | let p3 = self.next_table_mut_or_create(p4e)?;
213 | let p3e = &mut p3[p3_index(vaddr)];
214 |
215 | let p2 = self.next_table_mut_or_create(p3e)?;
216 | let p2e = &mut p2[p2_index(vaddr)];
217 |
218 | let p1 = self.next_table_mut_or_create(p2e)?;
219 | let p1e = &mut p1[p1_index(vaddr)];
220 | Ok(p1e)
221 | }
222 |
223 | fn walk(
224 | &self,
225 | table: &[PTE],
226 | level: usize,
227 | start_vaddr: VirtAddr,
228 | limit: usize,
229 | func: &impl Fn(usize, usize, VirtAddr, &PTE),
230 | ) {
231 | let mut n = 0;
232 | for (i, entry) in table.iter().enumerate() {
233 | let vaddr = start_vaddr + (i << (12 + (LEVELS - 1 - level) * 9));
234 | if entry.is_present() {
235 | func(level, i, vaddr, entry);
236 | if level < LEVELS - 1 && !entry.is_huge() {
237 | let table_entry = self.next_table_mut(entry).unwrap();
238 | self.walk(table_entry, level + 1, vaddr, limit, func);
239 | }
240 | n += 1;
241 | if n >= limit {
242 | break;
243 | }
244 | }
245 | }
246 | }
247 | }
248 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/vcpu.rs:
--------------------------------------------------------------------------------
1 | use alloc::collections::VecDeque;
2 | use core::fmt::{Debug, Formatter, Result};
3 | use core::{arch::asm, mem::size_of};
4 |
5 | use bit_field::BitField;
6 | use x86::bits64::vmx;
7 | use x86::dtables::{self, DescriptorTablePointer};
8 | use x86::segmentation::SegmentSelector;
9 | use x86_64::registers::control::{Cr0, Cr0Flags, Cr3, Cr4, Cr4Flags};
10 |
11 | use super::structs::{MsrBitmap, VmxRegion};
12 | use super::vmcs::{
13 | self, VmcsControl32, VmcsControl64, VmcsControlNW, VmcsGuest16, VmcsGuest32, VmcsGuest64,
14 | VmcsGuestNW, VmcsHost16, VmcsHost32, VmcsHost64, VmcsHostNW,
15 | };
16 | use super::VmxPerCpuState;
17 | use crate::arch::{msr::Msr, ApicTimer, GeneralRegisters};
18 | use crate::{GuestPhysAddr, HostPhysAddr, NestedPageFaultInfo, RvmHal, RvmResult};
19 |
20 | /// A virtual CPU within a guest.
21 | #[repr(C)]
22 | pub struct VmxVcpu {
23 | guest_regs: GeneralRegisters,
24 | host_stack_top: u64,
25 | vmcs: VmxRegion,
26 | msr_bitmap: MsrBitmap,
27 | apic_timer: ApicTimer,
28 | pending_events: VecDeque<(u8, Option)>,
29 | }
30 |
31 | impl VmxVcpu {
32 | pub(crate) fn new(
33 | percpu: &VmxPerCpuState,
34 | entry: GuestPhysAddr,
35 | ept_root: HostPhysAddr,
36 | ) -> RvmResult {
37 | let mut vcpu = Self {
38 | guest_regs: GeneralRegisters::default(),
39 | host_stack_top: 0,
40 | vmcs: VmxRegion::new(percpu.vmcs_revision_id, false)?,
41 | msr_bitmap: MsrBitmap::passthrough_all()?,
42 | apic_timer: ApicTimer::new(),
43 | pending_events: VecDeque::with_capacity(8),
44 | };
45 | vcpu.setup_msr_bitmap()?;
46 | vcpu.setup_vmcs(entry, ept_root)?;
47 | info!("[RVM] created VmxVcpu(vmcs: {:#x})", vcpu.vmcs.phys_addr());
48 | Ok(vcpu)
49 | }
50 |
51 | /// Run the guest, never return.
52 | pub fn run(&mut self) -> ! {
53 | VmcsHostNW::RSP
54 | .write(&self.host_stack_top as *const _ as usize)
55 | .unwrap();
56 | unsafe { self.vmx_launch() }
57 | }
58 |
59 | /// Basic information about VM exits.
60 | pub fn exit_info(&self) -> RvmResult {
61 | vmcs::exit_info()
62 | }
63 |
64 | /// Information for VM exits due to external interrupts.
65 | pub fn interrupt_exit_info(&self) -> RvmResult {
66 | vmcs::interrupt_exit_info()
67 | }
68 |
69 | /// Information for VM exits due to I/O instructions.
70 | pub fn io_exit_info(&self) -> RvmResult {
71 | vmcs::io_exit_info()
72 | }
73 |
74 | /// Information for VM exits due to nested page table faults (EPT violation).
75 | pub fn nested_page_fault_info(&self) -> RvmResult {
76 | vmcs::ept_violation_info()
77 | }
78 |
79 | /// Guest general-purpose registers.
80 | pub fn regs(&self) -> &GeneralRegisters {
81 | &self.guest_regs
82 | }
83 |
84 | /// Mutable reference of guest general-purpose registers.
85 | pub fn regs_mut(&mut self) -> &mut GeneralRegisters {
86 | &mut self.guest_regs
87 | }
88 |
89 | /// Guest stack pointer. (`RSP`)
90 | pub fn stack_pointer(&self) -> usize {
91 | VmcsGuestNW::RSP.read().unwrap()
92 | }
93 |
94 | /// Set guest stack pointer. (`RSP`)
95 | pub fn set_stack_pointer(&mut self, rsp: usize) {
96 | VmcsGuestNW::RSP.write(rsp).unwrap()
97 | }
98 |
99 | /// Advance guest `RIP` by `instr_len` bytes.
100 | pub fn advance_rip(&mut self, instr_len: u8) -> RvmResult {
101 | Ok(VmcsGuestNW::RIP.write(VmcsGuestNW::RIP.read()? + instr_len as usize)?)
102 | }
103 |
104 | /// Add a virtual interrupt or exception to the pending events list,
105 | /// and try to inject it before later VM entries.
106 | pub fn inject_event(&mut self, vector: u8, err_code: Option) {
107 | self.pending_events.push_back((vector, err_code));
108 | }
109 |
110 | /// If enable, a VM exit occurs at the beginning of any instruction if
111 | /// `RFLAGS.IF` = 1 and there are no other blocking of interrupts.
112 | /// (see SDM, Vol. 3C, Section 24.4.2)
113 | pub fn set_interrupt_window(&mut self, enable: bool) -> RvmResult {
114 | let mut ctrl = VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.read()?;
115 | let bits = vmcs::controls::PrimaryControls::INTERRUPT_WINDOW_EXITING.bits();
116 | if enable {
117 | ctrl |= bits
118 | } else {
119 | ctrl &= !bits
120 | }
121 | VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS.write(ctrl)?;
122 | Ok(())
123 | }
124 |
125 | /// Returns the mutable reference of [`ApicTimer`].
126 | pub fn apic_timer_mut(&mut self) -> &mut ApicTimer {
127 | &mut self.apic_timer
128 | }
129 | }
130 |
131 | // Implementation of private methods
132 | impl VmxVcpu {
133 | fn setup_msr_bitmap(&mut self) -> RvmResult {
134 | // Intercept IA32_APIC_BASE MSR accesses
135 | let msr = x86::msr::IA32_APIC_BASE;
136 | self.msr_bitmap.set_read_intercept(msr, true);
137 | self.msr_bitmap.set_write_intercept(msr, true);
138 | // Intercept all x2APIC MSR accesses
139 | for msr in 0x800..=0x83f {
140 | self.msr_bitmap.set_read_intercept(msr, true);
141 | self.msr_bitmap.set_write_intercept(msr, true);
142 | }
143 | Ok(())
144 | }
145 |
146 | fn setup_vmcs(&mut self, entry: GuestPhysAddr, ept_root: HostPhysAddr) -> RvmResult {
147 | let paddr = self.vmcs.phys_addr() as u64;
148 | unsafe {
149 | vmx::vmclear(paddr)?;
150 | vmx::vmptrld(paddr)?;
151 | }
152 | self.setup_vmcs_host()?;
153 | self.setup_vmcs_guest(entry)?;
154 | self.setup_vmcs_control(ept_root)?;
155 | Ok(())
156 | }
157 |
158 | fn setup_vmcs_host(&mut self) -> RvmResult {
159 | VmcsHost64::IA32_PAT.write(Msr::IA32_PAT.read())?;
160 | VmcsHost64::IA32_EFER.write(Msr::IA32_EFER.read())?;
161 |
162 | VmcsHostNW::CR0.write(Cr0::read_raw() as _)?;
163 | VmcsHostNW::CR3.write(Cr3::read_raw().0.start_address().as_u64() as _)?;
164 | VmcsHostNW::CR4.write(Cr4::read_raw() as _)?;
165 |
166 | VmcsHost16::ES_SELECTOR.write(x86::segmentation::es().bits())?;
167 | VmcsHost16::CS_SELECTOR.write(x86::segmentation::cs().bits())?;
168 | VmcsHost16::SS_SELECTOR.write(x86::segmentation::ss().bits())?;
169 | VmcsHost16::DS_SELECTOR.write(x86::segmentation::ds().bits())?;
170 | VmcsHost16::FS_SELECTOR.write(x86::segmentation::fs().bits())?;
171 | VmcsHost16::GS_SELECTOR.write(x86::segmentation::gs().bits())?;
172 | VmcsHostNW::FS_BASE.write(Msr::IA32_FS_BASE.read() as _)?;
173 | VmcsHostNW::GS_BASE.write(Msr::IA32_GS_BASE.read() as _)?;
174 |
175 | let tr = unsafe { x86::task::tr() };
176 | let mut gdtp = DescriptorTablePointer::::default();
177 | let mut idtp = DescriptorTablePointer::::default();
178 | unsafe {
179 | dtables::sgdt(&mut gdtp);
180 | dtables::sidt(&mut idtp);
181 | }
182 | VmcsHost16::TR_SELECTOR.write(tr.bits())?;
183 | VmcsHostNW::TR_BASE.write(get_tr_base(tr, &gdtp) as _)?;
184 | VmcsHostNW::GDTR_BASE.write(gdtp.base as _)?;
185 | VmcsHostNW::IDTR_BASE.write(idtp.base as _)?;
186 | VmcsHostNW::RIP.write(Self::vmx_exit as usize)?;
187 |
188 | VmcsHostNW::IA32_SYSENTER_ESP.write(0)?;
189 | VmcsHostNW::IA32_SYSENTER_EIP.write(0)?;
190 | VmcsHost32::IA32_SYSENTER_CS.write(0)?;
191 | Ok(())
192 | }
193 |
194 | fn setup_vmcs_guest(&mut self, entry: GuestPhysAddr) -> RvmResult {
195 | let cr0_guest = Cr0Flags::EXTENSION_TYPE | Cr0Flags::NUMERIC_ERROR;
196 | let cr0_host_owned =
197 | Cr0Flags::NUMERIC_ERROR | Cr0Flags::NOT_WRITE_THROUGH | Cr0Flags::CACHE_DISABLE;
198 | let cr0_read_shadow = Cr0Flags::NUMERIC_ERROR;
199 | VmcsGuestNW::CR0.write(cr0_guest.bits() as _)?;
200 | VmcsControlNW::CR0_GUEST_HOST_MASK.write(cr0_host_owned.bits() as _)?;
201 | VmcsControlNW::CR0_READ_SHADOW.write(cr0_read_shadow.bits() as _)?;
202 |
203 | let cr4_guest = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS;
204 | let cr4_host_owned = Cr4Flags::VIRTUAL_MACHINE_EXTENSIONS;
205 | let cr4_read_shadow = 0;
206 | VmcsGuestNW::CR4.write(cr4_guest.bits() as _)?;
207 | VmcsControlNW::CR4_GUEST_HOST_MASK.write(cr4_host_owned.bits() as _)?;
208 | VmcsControlNW::CR4_READ_SHADOW.write(cr4_read_shadow)?;
209 |
210 | macro_rules! set_guest_segment {
211 | ($seg: ident, $access_rights: expr) => {{
212 | use VmcsGuest16::*;
213 | use VmcsGuest32::*;
214 | use VmcsGuestNW::*;
215 | concat_idents!($seg, _SELECTOR).write(0)?;
216 | concat_idents!($seg, _BASE).write(0)?;
217 | concat_idents!($seg, _LIMIT).write(0xffff)?;
218 | concat_idents!($seg, _ACCESS_RIGHTS).write($access_rights)?;
219 | }};
220 | }
221 |
222 | set_guest_segment!(ES, 0x93); // 16-bit, present, data, read/write, accessed
223 | set_guest_segment!(CS, 0x9b); // 16-bit, present, code, exec/read, accessed
224 | set_guest_segment!(SS, 0x93);
225 | set_guest_segment!(DS, 0x93);
226 | set_guest_segment!(FS, 0x93);
227 | set_guest_segment!(GS, 0x93);
228 | set_guest_segment!(TR, 0x8b); // present, system, 32-bit TSS busy
229 | set_guest_segment!(LDTR, 0x82); // present, system, LDT
230 |
231 | VmcsGuestNW::GDTR_BASE.write(0)?;
232 | VmcsGuest32::GDTR_LIMIT.write(0xffff)?;
233 | VmcsGuestNW::IDTR_BASE.write(0)?;
234 | VmcsGuest32::IDTR_LIMIT.write(0xffff)?;
235 |
236 | VmcsGuestNW::CR3.write(0)?;
237 | VmcsGuestNW::DR7.write(0x400)?;
238 | VmcsGuestNW::RSP.write(0)?;
239 | VmcsGuestNW::RIP.write(entry)?;
240 | VmcsGuestNW::RFLAGS.write(0x2)?;
241 | VmcsGuestNW::PENDING_DBG_EXCEPTIONS.write(0)?;
242 | VmcsGuestNW::IA32_SYSENTER_ESP.write(0)?;
243 | VmcsGuestNW::IA32_SYSENTER_EIP.write(0)?;
244 | VmcsGuest32::IA32_SYSENTER_CS.write(0)?;
245 |
246 | VmcsGuest32::INTERRUPTIBILITY_STATE.write(0)?;
247 | VmcsGuest32::ACTIVITY_STATE.write(0)?;
248 | VmcsGuest32::VMX_PREEMPTION_TIMER_VALUE.write(0)?;
249 |
250 | VmcsGuest64::LINK_PTR.write(u64::MAX)?; // SDM Vol. 3C, Section 24.4.2
251 | VmcsGuest64::IA32_DEBUGCTL.write(0)?;
252 | VmcsGuest64::IA32_PAT.write(Msr::IA32_PAT.read())?;
253 | VmcsGuest64::IA32_EFER.write(0)?;
254 | Ok(())
255 | }
256 |
257 | fn setup_vmcs_control(&mut self, ept_root: HostPhysAddr) -> RvmResult {
258 | // Intercept NMI and external interrupts.
259 | use super::vmcs::controls::*;
260 | use PinbasedControls as PinCtrl;
261 | vmcs::set_control(
262 | VmcsControl32::PINBASED_EXEC_CONTROLS,
263 | Msr::IA32_VMX_TRUE_PINBASED_CTLS,
264 | Msr::IA32_VMX_PINBASED_CTLS.read() as u32,
265 | (PinCtrl::NMI_EXITING | PinCtrl::EXTERNAL_INTERRUPT_EXITING).bits(),
266 | 0,
267 | )?;
268 |
269 | // Intercept all I/O instructions, use MSR bitmaps, activate secondary controls,
270 | // disable CR3 load/store interception.
271 | use PrimaryControls as CpuCtrl;
272 | vmcs::set_control(
273 | VmcsControl32::PRIMARY_PROCBASED_EXEC_CONTROLS,
274 | Msr::IA32_VMX_TRUE_PROCBASED_CTLS,
275 | Msr::IA32_VMX_PROCBASED_CTLS.read() as u32,
276 | (CpuCtrl::UNCOND_IO_EXITING | CpuCtrl::USE_MSR_BITMAPS | CpuCtrl::SECONDARY_CONTROLS)
277 | .bits(),
278 | (CpuCtrl::CR3_LOAD_EXITING | CpuCtrl::CR3_STORE_EXITING).bits(),
279 | )?;
280 |
281 | // Enable EPT, RDTSCP, INVPCID, and unrestricted guest.
282 | use SecondaryControls as CpuCtrl2;
283 | vmcs::set_control(
284 | VmcsControl32::SECONDARY_PROCBASED_EXEC_CONTROLS,
285 | Msr::IA32_VMX_PROCBASED_CTLS2,
286 | 0,
287 | (CpuCtrl2::ENABLE_EPT
288 | | CpuCtrl2::ENABLE_RDTSCP
289 | | CpuCtrl2::ENABLE_INVPCID
290 | | CpuCtrl2::UNRESTRICTED_GUEST)
291 | .bits(),
292 | 0,
293 | )?;
294 |
295 | // Switch to 64-bit host, acknowledge interrupt info, switch IA32_PAT/IA32_EFER on VM exit.
296 | use ExitControls as ExitCtrl;
297 | vmcs::set_control(
298 | VmcsControl32::VMEXIT_CONTROLS,
299 | Msr::IA32_VMX_TRUE_EXIT_CTLS,
300 | Msr::IA32_VMX_EXIT_CTLS.read() as u32,
301 | (ExitCtrl::HOST_ADDRESS_SPACE_SIZE
302 | | ExitCtrl::ACK_INTERRUPT_ON_EXIT
303 | | ExitCtrl::SAVE_IA32_PAT
304 | | ExitCtrl::LOAD_IA32_PAT
305 | | ExitCtrl::SAVE_IA32_EFER
306 | | ExitCtrl::LOAD_IA32_EFER)
307 | .bits(),
308 | 0,
309 | )?;
310 |
311 | // Load guest IA32_PAT/IA32_EFER on VM entry.
312 | use EntryControls as EntryCtrl;
313 | vmcs::set_control(
314 | VmcsControl32::VMENTRY_CONTROLS,
315 | Msr::IA32_VMX_TRUE_ENTRY_CTLS,
316 | Msr::IA32_VMX_ENTRY_CTLS.read() as u32,
317 | (EntryCtrl::LOAD_IA32_PAT | EntryCtrl::LOAD_IA32_EFER).bits(),
318 | 0,
319 | )?;
320 |
321 | vmcs::set_ept_pointer(ept_root)?;
322 |
323 | // No MSR switches if hypervisor doesn't use and there is only one vCPU.
324 | VmcsControl32::VMEXIT_MSR_STORE_COUNT.write(0)?;
325 | VmcsControl32::VMEXIT_MSR_LOAD_COUNT.write(0)?;
326 | VmcsControl32::VMENTRY_MSR_LOAD_COUNT.write(0)?;
327 |
328 | // Pass-through exceptions, don't use I/O bitmap, set MSR bitmaps.
329 | VmcsControl32::EXCEPTION_BITMAP.write(0)?;
330 | VmcsControl64::IO_BITMAP_A_ADDR.write(0)?;
331 | VmcsControl64::IO_BITMAP_B_ADDR.write(0)?;
332 | VmcsControl64::MSR_BITMAPS_ADDR.write(self.msr_bitmap.phys_addr() as _)?;
333 | Ok(())
334 | }
335 |
336 | #[naked]
337 | unsafe extern "C" fn vmx_launch(&mut self) -> ! {
338 | asm!(
339 | "mov [rdi + {host_stack_top}], rsp", // save current RSP to Vcpu::host_stack_top
340 | "mov rsp, rdi", // set RSP to guest regs area
341 | restore_regs_from_stack!(),
342 | "vmlaunch",
343 | "jmp {failed}",
344 | host_stack_top = const size_of::(),
345 | failed = sym Self::vmx_entry_failed,
346 | options(noreturn),
347 | )
348 | }
349 |
350 | #[naked]
351 | unsafe extern "C" fn vmx_exit(&mut self) -> ! {
352 | asm!(
353 | save_regs_to_stack!(),
354 | "mov r15, rsp", // save temporary RSP to r15
355 | "mov rdi, rsp", // set the first arg to &Vcpu
356 | "mov rsp, [rsp + {host_stack_top}]", // set RSP to Vcpu::host_stack_top
357 | "call {vmexit_handler}", // call vmexit_handler
358 | "mov rsp, r15", // load temporary RSP from r15
359 | restore_regs_from_stack!(),
360 | "vmresume",
361 | "jmp {failed}",
362 | host_stack_top = const size_of::(),
363 | vmexit_handler = sym Self::vmexit_handler,
364 | failed = sym Self::vmx_entry_failed,
365 | options(noreturn),
366 | );
367 | }
368 |
369 | fn vmx_entry_failed() -> ! {
370 | panic!("{}", vmcs::instruction_error().as_str())
371 | }
372 |
373 | /// Whether the guest interrupts are blocked. (SDM Vol. 3C, Section 24.4.2, Table 24-3)
374 | fn allow_interrupt(&self) -> bool {
375 | let rflags = VmcsGuestNW::RFLAGS.read().unwrap();
376 | let block_state = VmcsGuest32::INTERRUPTIBILITY_STATE.read().unwrap();
377 | rflags as u64 & x86_64::registers::rflags::RFlags::INTERRUPT_FLAG.bits() != 0
378 | && block_state == 0
379 | }
380 |
381 | /// Try to inject a pending event before next VM entry.
382 | fn check_pending_events(&mut self) -> RvmResult {
383 | if let Some(event) = self.pending_events.front() {
384 | if event.0 < 32 || self.allow_interrupt() {
385 | // if it's an exception, or an interrupt that is not blocked, inject it directly.
386 | vmcs::inject_event(event.0, event.1)?;
387 | self.pending_events.pop_front();
388 | } else {
389 | // interrupts are blocked, enable interrupt-window exiting.
390 | self.set_interrupt_window(true)?;
391 | }
392 | }
393 | Ok(())
394 | }
395 |
396 | fn vmexit_handler(&mut self) {
397 | H::vmexit_handler(self);
398 | // Check if there is an APIC timer interrupt
399 | if self.apic_timer.check_interrupt() {
400 | self.inject_event(self.apic_timer.vector(), None);
401 | }
402 | self.check_pending_events().unwrap();
403 | }
404 | }
405 |
406 | impl Drop for VmxVcpu {
407 | fn drop(&mut self) {
408 | unsafe { vmx::vmclear(self.vmcs.phys_addr() as u64).unwrap() };
409 | info!("[RVM] dropped VmxVcpu(vmcs: {:#x})", self.vmcs.phys_addr());
410 | }
411 | }
412 |
413 | fn get_tr_base(tr: SegmentSelector, gdt: &DescriptorTablePointer) -> u64 {
414 | let index = tr.index() as usize;
415 | let table_len = (gdt.limit as usize + 1) / core::mem::size_of::();
416 | let table = unsafe { core::slice::from_raw_parts(gdt.base, table_len) };
417 | let entry = table[index];
418 | if entry & (1 << 47) != 0 {
419 | // present
420 | let base_low = entry.get_bits(16..40) | entry.get_bits(56..64) << 24;
421 | let base_high = table[index + 1] & 0xffff_ffff;
422 | base_low | base_high << 32
423 | } else {
424 | // no present
425 | 0
426 | }
427 | }
428 |
429 | impl Debug for VmxVcpu {
430 | fn fmt(&self, f: &mut Formatter) -> Result {
431 | (|| -> RvmResult {
432 | Ok(f.debug_struct("VmxVcpu")
433 | .field("guest_regs", &self.guest_regs)
434 | .field("rip", &VmcsGuestNW::RIP.read()?)
435 | .field("rsp", &VmcsGuestNW::RSP.read()?)
436 | .field("rflags", &VmcsGuestNW::RFLAGS.read()?)
437 | .field("cr0", &VmcsGuestNW::CR0.read()?)
438 | .field("cr3", &VmcsGuestNW::CR3.read()?)
439 | .field("cr4", &VmcsGuestNW::CR4.read()?)
440 | .field("cs", &VmcsGuest16::CS_SELECTOR.read()?)
441 | .field("fs_base", &VmcsGuestNW::FS_BASE.read()?)
442 | .field("gs_base", &VmcsGuestNW::GS_BASE.read()?)
443 | .field("tss", &VmcsGuest16::TR_SELECTOR.read()?)
444 | .finish())
445 | })()
446 | .unwrap()
447 | }
448 | }
449 |
--------------------------------------------------------------------------------
/rvm/src/arch/x86_64/vmx/vmcs.rs:
--------------------------------------------------------------------------------
1 | #![allow(dead_code)]
2 | #![deny(missing_docs)]
3 | #![allow(non_camel_case_types)]
4 | #![allow(clippy::upper_case_acronyms)]
5 |
6 | use bit_field::BitField;
7 | use x86::bits64::vmx;
8 |
9 | use super::definitions::{VmxExitReason, VmxInstructionError, VmxInterruptionType};
10 | use crate::{arch::msr::Msr, HostPhysAddr, MemFlags, NestedPageFaultInfo, RvmResult};
11 |
12 | macro_rules! vmcs_read {
13 | ($field_enum: ident, u64) => {
14 | impl $field_enum {
15 | pub fn read(self) -> x86::vmx::Result {
16 | #[cfg(target_pointer_width = "64")]
17 | unsafe {
18 | vmx::vmread(self as u32)
19 | }
20 | #[cfg(target_pointer_width = "32")]
21 | unsafe {
22 | let field = self as u32;
23 | Ok(vmx::vmread(field)? + (vmx::vmread(field + 1)? << 32))
24 | }
25 | }
26 | }
27 | };
28 | ($field_enum: ident, $ux: ty) => {
29 | impl $field_enum {
30 | pub fn read(self) -> x86::vmx::Result<$ux> {
31 | unsafe { vmx::vmread(self as u32).map(|v| v as $ux) }
32 | }
33 | }
34 | };
35 | }
36 |
37 | macro_rules! vmcs_write {
38 | ($field_enum: ident, u64) => {
39 | impl $field_enum {
40 | pub fn write(self, value: u64) -> x86::vmx::Result<()> {
41 | #[cfg(target_pointer_width = "64")]
42 | unsafe {
43 | vmx::vmwrite(self as u32, value)
44 | }
45 | #[cfg(target_pointer_width = "32")]
46 | unsafe {
47 | let field = self as u32;
48 | vmx::vmwrite(field, value & 0xffff_ffff)?;
49 | vmx::vmwrite(field + 1, value >> 32)?;
50 | Ok(())
51 | }
52 | }
53 | }
54 | };
55 | ($field_enum: ident, $ux: ty) => {
56 | impl $field_enum {
57 | pub fn write(self, value: $ux) -> x86::vmx::Result<()> {
58 | unsafe { vmx::vmwrite(self as u32, value as u64) }
59 | }
60 | }
61 | };
62 | }
63 |
64 | /// 16-Bit Control Fields. (SDM Vol. 3D, Appendix B.1.1)
65 | #[derive(Clone, Copy, Debug)]
66 | pub enum VmcsControl16 {
67 | /// Virtual-processor identifier (VPID).
68 | VPID = 0x0,
69 | /// Posted-interrupt notification vector.
70 | POSTED_INTERRUPT_NOTIFICATION_VECTOR = 0x2,
71 | /// EPTP index.
72 | EPTP_INDEX = 0x4,
73 | }
74 | vmcs_read!(VmcsControl16, u16);
75 | vmcs_write!(VmcsControl16, u16);
76 |
77 | /// 64-Bit Control Fields. (SDM Vol. 3D, Appendix B.2.1)
78 | #[derive(Clone, Copy, Debug)]
79 | pub enum VmcsControl64 {
80 | /// Address of I/O bitmap A (full).
81 | IO_BITMAP_A_ADDR = 0x2000,
82 | /// Address of I/O bitmap B (full).
83 | IO_BITMAP_B_ADDR = 0x2002,
84 | /// Address of MSR bitmaps (full).
85 | MSR_BITMAPS_ADDR = 0x2004,
86 | /// VM-exit MSR-store address (full).
87 | VMEXIT_MSR_STORE_ADDR = 0x2006,
88 | /// VM-exit MSR-load address (full).
89 | VMEXIT_MSR_LOAD_ADDR = 0x2008,
90 | /// VM-entry MSR-load address (full).
91 | VMENTRY_MSR_LOAD_ADDR = 0x200A,
92 | /// Executive-VMCS pointer (full).
93 | EXECUTIVE_VMCS_PTR = 0x200C,
94 | /// PML address (full).
95 | PML_ADDR = 0x200E,
96 | /// TSC offset (full).
97 | TSC_OFFSET = 0x2010,
98 | /// Virtual-APIC address (full).
99 | VIRT_APIC_ADDR = 0x2012,
100 | /// APIC-access address (full).
101 | APIC_ACCESS_ADDR = 0x2014,
102 | /// Posted-interrupt descriptor address (full).
103 | POSTED_INTERRUPT_DESC_ADDR = 0x2016,
104 | /// VM-function controls (full).
105 | VM_FUNCTION_CONTROLS = 0x2018,
106 | /// EPT pointer (full).
107 | EPTP = 0x201A,
108 | /// EOI-exit bitmap 0 (full).
109 | EOI_EXIT0 = 0x201C,
110 | /// EOI-exit bitmap 1 (full).
111 | EOI_EXIT1 = 0x201E,
112 | /// EOI-exit bitmap 2 (full).
113 | EOI_EXIT2 = 0x2020,
114 | /// EOI-exit bitmap 3 (full).
115 | EOI_EXIT3 = 0x2022,
116 | /// EPTP-list address (full).
117 | EPTP_LIST_ADDR = 0x2024,
118 | /// VMREAD-bitmap address (full).
119 | VMREAD_BITMAP_ADDR = 0x2026,
120 | /// VMWRITE-bitmap address (full).
121 | VMWRITE_BITMAP_ADDR = 0x2028,
122 | /// Virtualization-exception information address (full).
123 | VIRT_EXCEPTION_INFO_ADDR = 0x202A,
124 | /// XSS-exiting bitmap (full).
125 | XSS_EXITING_BITMAP = 0x202C,
126 | /// ENCLS-exiting bitmap (full).
127 | ENCLS_EXITING_BITMAP = 0x202E,
128 | /// Sub-page-permission-table pointer (full).
129 | SUBPAGE_PERM_TABLE_PTR = 0x2030,
130 | /// TSC multiplier (full).
131 | TSC_MULTIPLIER = 0x2032,
132 | }
133 | vmcs_read!(VmcsControl64, u64);
134 | vmcs_write!(VmcsControl64, u64);
135 |
136 | /// 32-Bit Control Fields. (SDM Vol. 3D, Appendix B.3.1)
137 | #[derive(Clone, Copy, Debug)]
138 | pub enum VmcsControl32 {
139 | /// Pin-based VM-execution controls.
140 | PINBASED_EXEC_CONTROLS = 0x4000,
141 | /// Primary processor-based VM-execution controls.
142 | PRIMARY_PROCBASED_EXEC_CONTROLS = 0x4002,
143 | /// Exception bitmap.
144 | EXCEPTION_BITMAP = 0x4004,
145 | /// Page-fault error-code mask.
146 | PAGE_FAULT_ERR_CODE_MASK = 0x4006,
147 | /// Page-fault error-code match.
148 | PAGE_FAULT_ERR_CODE_MATCH = 0x4008,
149 | /// CR3-target count.
150 | CR3_TARGET_COUNT = 0x400A,
151 | /// VM-exit controls.
152 | VMEXIT_CONTROLS = 0x400C,
153 | /// VM-exit MSR-store count.
154 | VMEXIT_MSR_STORE_COUNT = 0x400E,
155 | /// VM-exit MSR-load count.
156 | VMEXIT_MSR_LOAD_COUNT = 0x4010,
157 | /// VM-entry controls.
158 | VMENTRY_CONTROLS = 0x4012,
159 | /// VM-entry MSR-load count.
160 | VMENTRY_MSR_LOAD_COUNT = 0x4014,
161 | /// VM-entry interruption-information field.
162 | VMENTRY_INTERRUPTION_INFO_FIELD = 0x4016,
163 | /// VM-entry exception error code.
164 | VMENTRY_EXCEPTION_ERR_CODE = 0x4018,
165 | /// VM-entry instruction length.
166 | VMENTRY_INSTRUCTION_LEN = 0x401A,
167 | /// TPR threshold.
168 | TPR_THRESHOLD = 0x401C,
169 | /// Secondary processor-based VM-execution controls.
170 | SECONDARY_PROCBASED_EXEC_CONTROLS = 0x401E,
171 | /// PLE_Gap.
172 | PLE_GAP = 0x4020,
173 | /// PLE_Window.
174 | PLE_WINDOW = 0x4022,
175 | }
176 | vmcs_read!(VmcsControl32, u32);
177 | vmcs_write!(VmcsControl32, u32);
178 |
179 | /// Natural-Width Control Fields. (SDM Vol. 3D, Appendix B.4.1)
180 | #[derive(Clone, Copy, Debug)]
181 | pub enum VmcsControlNW {
182 | /// CR0 guest/host mask.
183 | CR0_GUEST_HOST_MASK = 0x6000,
184 | /// CR4 guest/host mask.
185 | CR4_GUEST_HOST_MASK = 0x6002,
186 | /// CR0 read shadow.
187 | CR0_READ_SHADOW = 0x6004,
188 | /// CR4 read shadow.
189 | CR4_READ_SHADOW = 0x6006,
190 | /// CR3-target value 0.
191 | CR3_TARGET_VALUE0 = 0x6008,
192 | /// CR3-target value 1.
193 | CR3_TARGET_VALUE1 = 0x600A,
194 | /// CR3-target value 2.
195 | CR3_TARGET_VALUE2 = 0x600C,
196 | /// CR3-target value 3.
197 | CR3_TARGET_VALUE3 = 0x600E,
198 | }
199 | vmcs_read!(VmcsControlNW, usize);
200 | vmcs_write!(VmcsControlNW, usize);
201 |
202 | /// 16-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.1.2)
203 | pub enum VmcsGuest16 {
204 | /// Guest ES selector.
205 | ES_SELECTOR = 0x800,
206 | /// Guest CS selector.
207 | CS_SELECTOR = 0x802,
208 | /// Guest SS selector.
209 | SS_SELECTOR = 0x804,
210 | /// Guest DS selector.
211 | DS_SELECTOR = 0x806,
212 | /// Guest FS selector.
213 | FS_SELECTOR = 0x808,
214 | /// Guest GS selector.
215 | GS_SELECTOR = 0x80a,
216 | /// Guest LDTR selector.
217 | LDTR_SELECTOR = 0x80c,
218 | /// Guest TR selector.
219 | TR_SELECTOR = 0x80e,
220 | /// Guest interrupt status.
221 | INTERRUPT_STATUS = 0x810,
222 | /// PML index.
223 | PML_INDEX = 0x812,
224 | }
225 | vmcs_read!(VmcsGuest16, u16);
226 | vmcs_write!(VmcsGuest16, u16);
227 |
228 | /// 64-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.2.3)
229 | #[derive(Clone, Copy, Debug)]
230 | pub enum VmcsGuest64 {
231 | /// VMCS link pointer (full).
232 | LINK_PTR = 0x2800,
233 | /// Guest IA32_DEBUGCTL (full).
234 | IA32_DEBUGCTL = 0x2802,
235 | /// Guest IA32_PAT (full).
236 | IA32_PAT = 0x2804,
237 | /// Guest IA32_EFER (full).
238 | IA32_EFER = 0x2806,
239 | /// Guest IA32_PERF_GLOBAL_CTRL (full).
240 | IA32_PERF_GLOBAL_CTRL = 0x2808,
241 | /// Guest PDPTE0 (full).
242 | PDPTE0 = 0x280A,
243 | /// Guest PDPTE1 (full).
244 | PDPTE1 = 0x280C,
245 | /// Guest PDPTE2 (full).
246 | PDPTE2 = 0x280E,
247 | /// Guest PDPTE3 (full).
248 | PDPTE3 = 0x2810,
249 | /// Guest IA32_BNDCFGS (full).
250 | IA32_BNDCFGS = 0x2812,
251 | /// Guest IA32_RTIT_CTL (full).
252 | IA32_RTIT_CTL = 0x2814,
253 | }
254 | vmcs_read!(VmcsGuest64, u64);
255 | vmcs_write!(VmcsGuest64, u64);
256 |
257 | /// 32-Bit Guest-State Fields. (SDM Vol. 3D, Appendix B.3.3)
258 | #[derive(Clone, Copy, Debug)]
259 | pub enum VmcsGuest32 {
260 | /// Guest ES limit.
261 | ES_LIMIT = 0x4800,
262 | /// Guest CS limit.
263 | CS_LIMIT = 0x4802,
264 | /// Guest SS limit.
265 | SS_LIMIT = 0x4804,
266 | /// Guest DS limit.
267 | DS_LIMIT = 0x4806,
268 | /// Guest FS limit.
269 | FS_LIMIT = 0x4808,
270 | /// Guest GS limit.
271 | GS_LIMIT = 0x480A,
272 | /// Guest LDTR limit.
273 | LDTR_LIMIT = 0x480C,
274 | /// Guest TR limit.
275 | TR_LIMIT = 0x480E,
276 | /// Guest GDTR limit.
277 | GDTR_LIMIT = 0x4810,
278 | /// Guest IDTR limit.
279 | IDTR_LIMIT = 0x4812,
280 | /// Guest ES access rights.
281 | ES_ACCESS_RIGHTS = 0x4814,
282 | /// Guest CS access rights.
283 | CS_ACCESS_RIGHTS = 0x4816,
284 | /// Guest SS access rights.
285 | SS_ACCESS_RIGHTS = 0x4818,
286 | /// Guest DS access rights.
287 | DS_ACCESS_RIGHTS = 0x481A,
288 | /// Guest FS access rights.
289 | FS_ACCESS_RIGHTS = 0x481C,
290 | /// Guest GS access rights.
291 | GS_ACCESS_RIGHTS = 0x481E,
292 | /// Guest LDTR access rights.
293 | LDTR_ACCESS_RIGHTS = 0x4820,
294 | /// Guest TR access rights.
295 | TR_ACCESS_RIGHTS = 0x4822,
296 | /// Guest interruptibility state.
297 | INTERRUPTIBILITY_STATE = 0x4824,
298 | /// Guest activity state.
299 | ACTIVITY_STATE = 0x4826,
300 | /// Guest SMBASE.
301 | SMBASE = 0x4828,
302 | /// Guest IA32_SYSENTER_CS.
303 | IA32_SYSENTER_CS = 0x482A,
304 | /// VMX-preemption timer value.
305 | VMX_PREEMPTION_TIMER_VALUE = 0x482E,
306 | }
307 | vmcs_read!(VmcsGuest32, u32);
308 | vmcs_write!(VmcsGuest32, u32);
309 |
310 | /// Natural-Width Guest-State Fields. (SDM Vol. 3D, Appendix B.4.3)
311 | #[derive(Clone, Copy, Debug)]
312 | pub enum VmcsGuestNW {
313 | /// Guest CR0.
314 | CR0 = 0x6800,
315 | /// Guest CR3.
316 | CR3 = 0x6802,
317 | /// Guest CR4.
318 | CR4 = 0x6804,
319 | /// Guest ES base.
320 | ES_BASE = 0x6806,
321 | /// Guest CS base.
322 | CS_BASE = 0x6808,
323 | /// Guest SS base.
324 | SS_BASE = 0x680A,
325 | /// Guest DS base.
326 | DS_BASE = 0x680C,
327 | /// Guest FS base.
328 | FS_BASE = 0x680E,
329 | /// Guest GS base.
330 | GS_BASE = 0x6810,
331 | /// Guest LDTR base.
332 | LDTR_BASE = 0x6812,
333 | /// Guest TR base.
334 | TR_BASE = 0x6814,
335 | /// Guest GDTR base.
336 | GDTR_BASE = 0x6816,
337 | /// Guest IDTR base.
338 | IDTR_BASE = 0x6818,
339 | /// Guest DR7.
340 | DR7 = 0x681A,
341 | /// Guest RSP.
342 | RSP = 0x681C,
343 | /// Guest RIP.
344 | RIP = 0x681E,
345 | /// Guest RFLAGS.
346 | RFLAGS = 0x6820,
347 | /// Guest pending debug exceptions.
348 | PENDING_DBG_EXCEPTIONS = 0x6822,
349 | /// Guest IA32_SYSENTER_ESP.
350 | IA32_SYSENTER_ESP = 0x6824,
351 | /// Guest IA32_SYSENTER_EIP.
352 | IA32_SYSENTER_EIP = 0x6826,
353 | }
354 | vmcs_read!(VmcsGuestNW, usize);
355 | vmcs_write!(VmcsGuestNW, usize);
356 |
357 | /// 16-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.1.3)
358 | #[derive(Clone, Copy, Debug)]
359 | pub enum VmcsHost16 {
360 | /// Host ES selector.
361 | ES_SELECTOR = 0xC00,
362 | /// Host CS selector.
363 | CS_SELECTOR = 0xC02,
364 | /// Host SS selector.
365 | SS_SELECTOR = 0xC04,
366 | /// Host DS selector.
367 | DS_SELECTOR = 0xC06,
368 | /// Host FS selector.
369 | FS_SELECTOR = 0xC08,
370 | /// Host GS selector.
371 | GS_SELECTOR = 0xC0A,
372 | /// Host TR selector.
373 | TR_SELECTOR = 0xC0C,
374 | }
375 | vmcs_read!(VmcsHost16, u16);
376 | vmcs_write!(VmcsHost16, u16);
377 |
378 | /// 64-Bit Host-State Fields. (SDM Vol. 3D, Appendix B.2.4)
379 | #[derive(Clone, Copy, Debug)]
380 | pub enum VmcsHost64 {
381 | /// Host IA32_PAT (full).
382 | IA32_PAT = 0x2C00,
383 | /// Host IA32_EFER (full).
384 | IA32_EFER = 0x2C02,
385 | /// Host IA32_PERF_GLOBAL_CTRL (full).
386 | IA32_PERF_GLOBAL_CTRL = 0x2C04,
387 | }
388 | vmcs_read!(VmcsHost64, u64);
389 | vmcs_write!(VmcsHost64, u64);
390 |
391 | /// 32-Bit Host-State Field. (SDM Vol. 3D, Appendix B.3.4)
392 | #[derive(Clone, Copy, Debug)]
393 | pub enum VmcsHost32 {
394 | /// Host IA32_SYSENTER_CS.
395 | IA32_SYSENTER_CS = 0x4C00,
396 | }
397 | vmcs_read!(VmcsHost32, u32);
398 | vmcs_write!(VmcsHost32, u32);
399 |
400 | /// Natural-Width Host-State Fields. (SDM Vol. 3D, Appendix B.4.4)
401 | #[derive(Clone, Copy, Debug)]
402 | pub enum VmcsHostNW {
403 | /// Host CR0.
404 | CR0 = 0x6C00,
405 | /// Host CR3.
406 | CR3 = 0x6C02,
407 | /// Host CR4.
408 | CR4 = 0x6C04,
409 | /// Host FS base.
410 | FS_BASE = 0x6C06,
411 | /// Host GS base.
412 | GS_BASE = 0x6C08,
413 | /// Host TR base.
414 | TR_BASE = 0x6C0A,
415 | /// Host GDTR base.
416 | GDTR_BASE = 0x6C0C,
417 | /// Host IDTR base.
418 | IDTR_BASE = 0x6C0E,
419 | /// Host IA32_SYSENTER_ESP.
420 | IA32_SYSENTER_ESP = 0x6C10,
421 | /// Host IA32_SYSENTER_EIP.
422 | IA32_SYSENTER_EIP = 0x6C12,
423 | /// Host RSP.
424 | RSP = 0x6C14,
425 | /// Host RIP.
426 | RIP = 0x6C16,
427 | }
428 | vmcs_read!(VmcsHostNW, usize);
429 | vmcs_write!(VmcsHostNW, usize);
430 |
431 | /// 64-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.2.2)
432 | #[derive(Clone, Copy, Debug)]
433 | pub enum VmcsReadOnly64 {
434 | /// Guest-physical address (full).
435 | GUEST_PHYSICAL_ADDR = 0x2400,
436 | }
437 | vmcs_read!(VmcsReadOnly64, u64);
438 |
439 | /// 32-Bit Read-Only Data Fields. (SDM Vol. 3D, Appendix B.3.2)
440 | #[derive(Clone, Copy, Debug)]
441 | pub enum VmcsReadOnly32 {
442 | /// VM-instruction error.
443 | VM_INSTRUCTION_ERROR = 0x4400,
444 | /// Exit reason.
445 | EXIT_REASON = 0x4402,
446 | /// VM-exit interruption information.
447 | VMEXIT_INTERRUPTION_INFO = 0x4404,
448 | /// VM-exit interruption error code.
449 | VMEXIT_INTERRUPTION_ERR_CODE = 0x4406,
450 | /// IDT-vectoring information field.
451 | IDT_VECTORING_INFO = 0x4408,
452 | /// IDT-vectoring error code.
453 | IDT_VECTORING_ERR_CODE = 0x440A,
454 | /// VM-exit instruction length.
455 | VMEXIT_INSTRUCTION_LEN = 0x440C,
456 | /// VM-exit instruction information.
457 | VMEXIT_INSTRUCTION_INFO = 0x440E,
458 | }
459 | vmcs_read!(VmcsReadOnly32, u32);
460 |
461 | /// Natural-Width Read-Only Data Fields. (SDM Vol. 3D, Appendix B.4.2)
462 | #[derive(Clone, Copy, Debug)]
463 | pub enum VmcsReadOnlyNW {
464 | /// Exit qualification.
465 | EXIT_QUALIFICATION = 0x6400,
466 | /// I/O RCX.
467 | IO_RCX = 0x6402,
468 | /// I/O RSI.
469 | IO_RSI = 0x6404,
470 | /// I/O RDI.
471 | IO_RDI = 0x6406,
472 | /// I/O RIP.
473 | IO_RIP = 0x6408,
474 | /// Guest-linear address.
475 | GUEST_LINEAR_ADDR = 0x640A,
476 | }
477 | vmcs_read!(VmcsReadOnlyNW, usize);
478 |
479 | /// VM-Exit Informations. (SDM Vol. 3C, Section 24.9.1)
480 | #[derive(Debug)]
481 | pub struct VmxExitInfo {
482 | /// VM-entry failure. (0 = true VM exit; 1 = VM-entry failure)
483 | pub entry_failure: bool,
484 | /// Basic exit reason.
485 | pub exit_reason: VmxExitReason,
486 | /// For VM exits resulting from instruction execution, this field receives
487 | /// the length in bytes of the instruction whose execution led to the VM exit.
488 | pub exit_instruction_length: u32,
489 | /// Guest `RIP` where the VM exit occurs.
490 | pub guest_rip: usize,
491 | }
492 |
493 | /// VM-Entry/VM-Exit Interruption-Information Field. (SDM Vol. 3C, Section 24.8.3, 24.9.2)
494 | #[derive(Debug)]
495 | pub struct VmxInterruptInfo {
496 | /// Vector of interrupt or exception.
497 | pub vector: u8,
498 | /// Determines details of how the injection is performed.
499 | pub int_type: VmxInterruptionType,
500 | /// For hardware exceptions that would have delivered an error code on the stack.
501 | pub err_code: Option,
502 | /// Whether the field is valid.
503 | pub valid: bool,
504 | }
505 |
506 | impl VmxInterruptInfo {
507 | /// Convert from the interrupt vector and the error code.
508 | pub fn from(vector: u8, err_code: Option) -> Self {
509 | Self {
510 | vector,
511 | int_type: VmxInterruptionType::from_vector(vector),
512 | err_code,
513 | valid: true,
514 | }
515 | }
516 |
517 | /// Raw bits for writing to VMCS.
518 | pub fn bits(&self) -> u32 {
519 | let mut bits = self.vector as u32;
520 | bits |= (self.int_type as u32) << 8;
521 | bits.set_bit(11, self.err_code.is_some());
522 | bits.set_bit(31, self.valid);
523 | bits
524 | }
525 | }
526 |
527 | /// Exit Qualification for I/O Instructions. (SDM Vol. 3C, Section 27.2.1, Table 27-5)
528 | #[derive(Debug)]
529 | pub struct VmxIoExitInfo {
530 | /// Size of access.
531 | pub access_size: u8,
532 | /// Direction of the attempted access (0 = OUT, 1 = IN).
533 | pub is_in: bool,
534 | /// String instruction (0 = not string; 1 = string).
535 | pub is_string: bool,
536 | /// REP prefixed (0 = not REP; 1 = REP).
537 | pub is_repeat: bool,
538 | /// Port number. (as specified in DX or in an immediate operand)
539 | pub port: u16,
540 | }
541 |
542 | pub mod controls {
543 | pub use x86::vmx::vmcs::control::{EntryControls, ExitControls};
544 | pub use x86::vmx::vmcs::control::{PinbasedControls, PrimaryControls, SecondaryControls};
545 | }
546 |
547 | pub fn set_control(
548 | control: VmcsControl32,
549 | capability_msr: Msr,
550 | old_value: u32,
551 | set: u32,
552 | clear: u32,
553 | ) -> RvmResult {
554 | let cap = capability_msr.read();
555 | let allowed0 = cap as u32;
556 | let allowed1 = (cap >> 32) as u32;
557 | assert_eq!(allowed0 & allowed1, allowed0);
558 | debug!(
559 | "set {:?}: {:#x} (+{:#x}, -{:#x})",
560 | control, old_value, set, clear
561 | );
562 | if (set & clear) != 0 {
563 | return rvm_err!(
564 | InvalidParam,
565 | format_args!("can not set and clear the same bit in {:?}", control)
566 | );
567 | }
568 | if (allowed1 & set) != set {
569 | // failed if set 0-bits in allowed1
570 | return rvm_err!(
571 | Unsupported,
572 | format_args!("can not set bits {:#x} in {:?}", set, control)
573 | );
574 | }
575 | if (allowed0 & clear) != 0 {
576 | // failed if clear 1-bits in allowed0
577 | return rvm_err!(
578 | Unsupported,
579 | format_args!("can not clear bits {:#x} in {:?}", clear, control)
580 | );
581 | }
582 | // SDM Vol. 3C, Section 31.5.1, Algorithm 3
583 | let flexible = !allowed0 & allowed1; // therse bits can be either 0 or 1
584 | let unknown = flexible & !(set | clear); // hypervisor untouched bits
585 | let default = unknown & old_value; // these bits keep unchanged in old value
586 | let fixed1 = allowed0; // these bits are fixed to 1
587 | control.write(fixed1 | default | set)?;
588 | Ok(())
589 | }
590 |
591 | pub fn set_ept_pointer(pml4_paddr: HostPhysAddr) -> RvmResult {
592 | use super::instructions::{invept, InvEptType};
593 | let eptp = super::structs::EPTPointer::from_table_phys(pml4_paddr).bits();
594 | VmcsControl64::EPTP.write(eptp)?;
595 | unsafe { invept(InvEptType::SingleContext, eptp)? };
596 | Ok(())
597 | }
598 |
599 | pub fn instruction_error() -> VmxInstructionError {
600 | VmcsReadOnly32::VM_INSTRUCTION_ERROR.read().unwrap().into()
601 | }
602 |
603 | pub fn exit_info() -> RvmResult {
604 | let full_reason = VmcsReadOnly32::EXIT_REASON.read()?;
605 | Ok(VmxExitInfo {
606 | exit_reason: full_reason
607 | .get_bits(0..16)
608 | .try_into()
609 | .expect("Unknown VM-exit reason"),
610 | entry_failure: full_reason.get_bit(31),
611 | exit_instruction_length: VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?,
612 | guest_rip: VmcsGuestNW::RIP.read()?,
613 | })
614 | }
615 |
616 | pub fn interrupt_exit_info() -> RvmResult {
617 | // SDM Vol. 3C, Section 24.9.2
618 | let info = VmcsReadOnly32::VMEXIT_INTERRUPTION_INFO.read()?;
619 | Ok(VmxInterruptInfo {
620 | vector: info.get_bits(0..8) as u8,
621 | int_type: VmxInterruptionType::try_from(info.get_bits(8..11) as u8).unwrap(),
622 | err_code: if info.get_bit(11) {
623 | Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read()?)
624 | } else {
625 | None
626 | },
627 | valid: info.get_bit(31),
628 | })
629 | }
630 |
631 | pub fn inject_event(vector: u8, err_code: Option) -> RvmResult {
632 | // SDM Vol. 3C, Section 24.8.3
633 | let err_code = if VmxInterruptionType::vector_has_error_code(vector) {
634 | err_code.or_else(|| Some(VmcsReadOnly32::VMEXIT_INTERRUPTION_ERR_CODE.read().unwrap()))
635 | } else {
636 | None
637 | };
638 | let int_info = VmxInterruptInfo::from(vector, err_code);
639 | if let Some(err_code) = int_info.err_code {
640 | VmcsControl32::VMENTRY_EXCEPTION_ERR_CODE.write(err_code)?;
641 | }
642 | if int_info.int_type.is_soft() {
643 | VmcsControl32::VMENTRY_INSTRUCTION_LEN
644 | .write(VmcsReadOnly32::VMEXIT_INSTRUCTION_LEN.read()?)?;
645 | }
646 | VmcsControl32::VMENTRY_INTERRUPTION_INFO_FIELD.write(int_info.bits())?;
647 | Ok(())
648 | }
649 |
650 | pub fn io_exit_info() -> RvmResult {
651 | // SDM Vol. 3C, Section 27.2.1, Table 27-5
652 | let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?;
653 | Ok(VmxIoExitInfo {
654 | access_size: qualification.get_bits(0..3) as u8 + 1,
655 | is_in: qualification.get_bit(3),
656 | is_string: qualification.get_bit(4),
657 | is_repeat: qualification.get_bit(5),
658 | port: qualification.get_bits(16..32) as u16,
659 | })
660 | }
661 |
662 | pub fn ept_violation_info() -> RvmResult {
663 | // SDM Vol. 3C, Section 27.2.1, Table 27-7
664 | let qualification = VmcsReadOnlyNW::EXIT_QUALIFICATION.read()?;
665 | let fault_guest_paddr = VmcsReadOnly64::GUEST_PHYSICAL_ADDR.read()? as usize;
666 | let mut access_flags = MemFlags::empty();
667 | if qualification.get_bit(0) {
668 | access_flags |= MemFlags::READ;
669 | }
670 | if qualification.get_bit(1) {
671 | access_flags |= MemFlags::WRITE;
672 | }
673 | if qualification.get_bit(2) {
674 | access_flags |= MemFlags::EXECUTE;
675 | }
676 | Ok(NestedPageFaultInfo {
677 | access_flags,
678 | fault_guest_paddr,
679 | })
680 | }
681 |
--------------------------------------------------------------------------------