├── rust-toolchain ├── src ├── guest.dtb ├── sum.rs ├── mlinker.ld ├── empty.rs ├── constants.rs ├── loadaddress.S ├── riscv │ ├── sbi.rs │ ├── mod.rs │ ├── bits.rs │ ├── instructions.rs │ └── csr.rs ├── slinker.ld ├── backtrace.rs ├── scode.S ├── drivers │ ├── macb.rs │ └── mod.rs ├── statics.rs ├── pmptest.rs ├── machine.rs ├── elf.rs ├── mcode.S ├── lib.rs ├── plic.rs ├── memory_region.rs ├── print.rs ├── pfault.rs ├── pmp.rs ├── pagedebug.rs ├── supervisor.rs ├── virtio.rs ├── trap.rs └── fdt.rs ├── hifive_u540.dtb ├── .gitignore ├── .gdbinit ├── Cargo.toml ├── notes.txt ├── .travis.yml ├── third_party └── travis-qemu.sh ├── uEnv.txt ├── uboot-fit-image.its ├── GETTING-STARTED.md ├── README.md ├── Makefile ├── Cargo.lock ├── virtio-order.md ├── interrupt-bug.md ├── LICENSE └── pmp-test-strategy.txt /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly-2019-12-17 -------------------------------------------------------------------------------- /src/guest.dtb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-pdos/RVirt/HEAD/src/guest.dtb -------------------------------------------------------------------------------- /hifive_u540.dtb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mit-pdos/RVirt/HEAD/hifive_u540.dtb -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | *~ 4 | */*~ 5 | 6 | fedora-vmlinux 7 | stage4-disk.img 8 | vmlinux.asm 9 | -------------------------------------------------------------------------------- /src/sum.rs: -------------------------------------------------------------------------------- 1 | 2 | // use crate::trap::constants::STATUS_SUM; 3 | 4 | #[inline(always)] 5 | pub fn access_user_memory T>(f: F) -> T { 6 | // csrs!(sstatus, STATUS_SUM); 7 | let t = f(); 8 | // csrc!(sstatus, STATUS_SUM); 9 | t 10 | } 11 | -------------------------------------------------------------------------------- /.gdbinit: -------------------------------------------------------------------------------- 1 | echo + set arch riscv\n 2 | set arch riscv 3 | 4 | echo + target remote localhost:26000\n 5 | target remote localhost:26000 6 | 7 | echo + symbol-file target/riscv64imac-unknown-none-elf/release/rvirt\n 8 | symbol-file target/riscv64imac-unknown-none-elf/release/rvirt 9 | -------------------------------------------------------------------------------- /src/mlinker.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY( _start ) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80200000; 7 | .payload : 8 | { 9 | *(.payload) 10 | } 11 | 12 | /* 13 | Seek backwards to the start of memory. Any additional sections will be 14 | appended directly after this one, and an error will be generated if there 15 | are any overlaps with the payload. 16 | */ 17 | . = 0x80000000; 18 | .text.entrypoint : AT(0x80000000) 19 | { 20 | *(.text.entrypoint) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/empty.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(asm)] 3 | #![feature(const_str_len)] 4 | #![feature(global_asm)] 5 | #![feature(lang_items)] 6 | #![feature(linkage)] 7 | #![feature(naked_functions)] 8 | #![feature(proc_macro_hygiene)] 9 | #![feature(ptr_offset_from)] 10 | #![feature(start)] 11 | #![feature(try_blocks)] 12 | 13 | // mandatory rust environment setup 14 | #[lang = "eh_personality"] extern fn eh_personality() {} 15 | #[panic_handler] fn panic(_info: &::core::panic::PanicInfo) -> ! { loop{} } 16 | #[start] fn start(_argc: isize, _argv: *const *const u8) -> isize {0} 17 | #[no_mangle] fn abort() -> ! { loop {} } 18 | 19 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | 2 | /// The shift between the physical addresses of symbols and the virtual addresses for those same 3 | /// symbols. This value must match the one used in the linker script (src/linker.ld). 4 | pub const SYMBOL_PA2VA_OFFSET: u64 = 0xffffffff40000000; 5 | 6 | /// Maximum number of harts on the host. If the platform has more than this many harts, it might 7 | /// result in buffer overflows in various places. 8 | pub const MAX_HOST_HARTS: usize = 16; 9 | 10 | pub const MAX_GUEST_HARTS: usize = 8; 11 | 12 | pub const MACHINE_SHARED_STATIC_ADDRESS: u64 = 0x80400000; 13 | pub const SUPERVISOR_SHARED_STATIC_ADDRESS: u64 = 0xffffffffc0200000; 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rvirt" 3 | version = "0.1.0" 4 | authors = ["Jonathan Behrens "] 5 | edition = "2018" 6 | publish = false 7 | 8 | [dependencies] 9 | spin = "0.5.0" 10 | riscv-decode = "0.2.0" 11 | arrayvec = { version = "0.4.10", default-features = false } 12 | byteorder = { version = "1.3.1", default-features = false } 13 | arr_macro = "0.1.2" 14 | 15 | [profile.release] 16 | debug = true 17 | 18 | [[bin]] 19 | name = "rvirt-bare-metal" 20 | path = "src/machine.rs" 21 | required-features = ["physical_symbol_addresses"] 22 | 23 | [[bin]] 24 | name = "rvirt" 25 | path = "src/supervisor.rs" 26 | 27 | [features] 28 | physical_symbol_addresses = [] 29 | embed_guest_kernel = [] -------------------------------------------------------------------------------- /src/loadaddress.S: -------------------------------------------------------------------------------- 1 | 2 | // LLVM's assembler is currently buggy and cannot produce position independent 3 | // code or static code that references addresses which aren't sign extended 4 | // 32-bit ints. Unfortunately none of physical memory falls into that 5 | // range. This macro helps work around the bug by using a hacky trick to support 6 | // addresses in the first couple MBs above 0x800000000. 7 | // 8 | // Uses of this macro can be replaced by the `la rd, symbol` psuedo-instruction 9 | // where supported. 10 | .macro LOAD_ADDRESS rd, symbol 11 | lui \rd, %hi(\symbol - (2047<<12)) 12 | srli \rd, \rd, 12 13 | addi \rd, \rd, 2047 14 | slli \rd, \rd, 12 15 | addi \rd, \rd, %lo(\symbol - (2047<<12)) 16 | .endm -------------------------------------------------------------------------------- /src/riscv/sbi.rs: -------------------------------------------------------------------------------- 1 | #[naked] 2 | #[inline(never)] 3 | fn ecall(_a0: u64, _a1: u64, _a2: u64, _a3: u64, _a4: u64, _a5: u64, _a6: u64, _a7: u64) { 4 | unsafe { asm!("ecall" :: : "a0" : "volatile") } 5 | } 6 | 7 | pub fn set_timer(stime_value: u64) { 8 | ecall(stime_value, 0, 0, 0, 0, 0, 0, 0); 9 | } 10 | 11 | pub fn clear_ipi() { 12 | ecall(0, 0, 0, 0, 0, 0, 0, 3); 13 | } 14 | 15 | pub fn send_ipi(hart_mask_pointer: u64) { 16 | ecall(hart_mask_pointer, 0, 0, 0, 0, 0, 0, 4); 17 | } 18 | 19 | pub fn shutdown() { 20 | ecall(0, 0, 0, 0, 0, 0, 0, 8); 21 | } 22 | 23 | pub fn send_ipi_to_hart(hart: u64) { 24 | let mask: u64 = 1 << hart; 25 | send_ipi(&mask as *const u64 as u64); 26 | } 27 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | 2 | RISC-V Spec 3 | ----------- 4 | https://content.riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf 5 | 6 | RISC-V Privileged Spec 7 | ---------------------- 8 | https://content.riscv.org/wp-content/uploads/2017/05/riscv-privileged-v1.10.pdf 9 | 10 | Draft Spec Releases 11 | ------------------- 12 | https://github.com/riscv/riscv-isa-manual/releases 13 | 14 | Device Configuration 15 | -------------------- 16 | https://github.com/qemu/qemu/blob/master/hw/riscv/virt.c 17 | 18 | Description of PLIC / CLINT 19 | --------------------------- 20 | https://static.dev.sifive.com/U54-MC-RVCoreIP.pdf 21 | 22 | Virtual I/O Device Spec 23 | ----------------------- 24 | https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.pdf 25 | 26 | Linux Constants 27 | --------------- 28 | 0xffffffe000000000: _start 29 | 0xffffffe000000000: PAGE_OFFSET 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - nightly 4 | 5 | cache: 6 | - directories: 7 | - $HOME/qemu 8 | 9 | install: 10 | - sudo apt-get install sshpass 11 | 12 | before_script: 13 | - bash -e third_party/travis-qemu.sh 14 | - export PATH=$PATH:$HOME/qemu/bin 15 | - wget https://fedorapeople.org/groups/risc-v/disk-images/vmlinux 16 | - mv vmlinux fedora-vmlinux 17 | - wget https://fedorapeople.org/groups/risc-v/disk-images/stage4-disk.img.xz 18 | - unxz --verbose stage4-disk.img.xz 19 | 20 | script: 21 | - make target/riscv64imac-unknown-none-elf/release/rvirt 22 | - make target/riscv64imac-unknown-none-elf/release/rvirt-bare-metal 23 | - (sleep 8m && sshpass -p riscv ssh -o PreferredAuthentications=password -o PubkeyAuthentication=no -o StrictHostKeyChecking=no -p 10001 root@localhost 'shutdown now') & 24 | - make qemu 25 | 26 | notifications: 27 | email: 28 | on_success: never 29 | -------------------------------------------------------------------------------- /src/slinker.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY( sstart ) 3 | 4 | SECTIONS 5 | { 6 | . = 0xffffffffc0000000; 7 | .text.supervisor : AT(0x80000000) 8 | { 9 | *(.text.entrypoint) 10 | *(.text) *(.text.*) 11 | *(.gnu.linkonce.t.*) 12 | } 13 | 14 | . = ALIGN(0x1000); 15 | .rodata.supervisor : 16 | { 17 | *(.rdata) *(.rodata) *(.rodata.*) 18 | *(.gnu.linkonce.r.*) 19 | } 20 | 21 | . = 0xffffffffc0200000; 22 | .shared.data : { 23 | *(.shared.data) 24 | } 25 | 26 | . = 0xffffffffc0400000; 27 | .data : 28 | { 29 | *(.data) 30 | *(.data.*) 31 | *(.srodata*) 32 | *(.gnu.linkonce.d.*) 33 | *(.comment) 34 | } 35 | 36 | .bss : 37 | { 38 | *(.bss) 39 | *(.bss.*) 40 | *(.sbss*) 41 | *(.gnu.linkonce.b.*) 42 | *(COMMON) 43 | } 44 | 45 | ASSERT(. < 0xffffffffc0600000, "") 46 | 47 | . = 0xffffffffe0000000; 48 | .initrd : 49 | { 50 | *(.initrd) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/riscv/mod.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | pub mod instructions; 3 | 4 | pub mod csr; 5 | pub mod bits; 6 | pub mod sbi; 7 | 8 | pub use instructions::*; 9 | 10 | pub const CAUSE_STRINGS: [&str; 16] = [ 11 | "Instruction address misaligned", 12 | "Instruction access fault", 13 | "Illegal instruction", 14 | "Breakpoint", 15 | "Load address misaligned", 16 | "Load access fault", 17 | "Store/AMO address misaligned", 18 | "Store/AMO access fault", 19 | "Environment call from U-mode", 20 | "Environment call from S-mode", 21 | "Reserved (10)", 22 | "Environment call from M-mode", 23 | "Instruction page fault", 24 | "Load page fault", 25 | "Reserved (13)", 26 | "Store/AMO page fault" 27 | ]; 28 | 29 | pub fn cause_to_str(cause: u64) -> &'static str { 30 | if (cause as i64) < 0 { 31 | "Interrupt" 32 | } else if cause >= 16 { 33 | "Reserved (>=16)" 34 | } else { 35 | CAUSE_STRINGS[cause as usize] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/backtrace.rs: -------------------------------------------------------------------------------- 1 | use crate::context::Context; 2 | use crate::memory_region::MemoryRegion; 3 | use crate::riscv::bits; 4 | use crate::pmap; 5 | 6 | #[allow(unused)] 7 | pub unsafe fn print_guest_backtrace(guest_memory: &MemoryRegion, state: &mut Context, pc: u64) { 8 | println!(" {:x}", pc); 9 | 10 | let mut ra = state.saved_registers.get(1); 11 | let mut sp = state.saved_registers.get(2); 12 | let mut fp = state.saved_registers.get(8); 13 | 14 | let page_table_ppn = state.csrs.satp & bits::SATP_PPN; 15 | 16 | let mut old_fp = 0; 17 | while old_fp != fp { 18 | println!(" {:x}", ra); 19 | 20 | ra = match fp.checked_sub(8).and_then(|a| pmap::read64(guest_memory, page_table_ppn, a)) { 21 | Some(v) => v, 22 | None => break, 23 | }; 24 | 25 | old_fp = fp; 26 | fp = match fp.checked_sub(16).and_then(|a| pmap::read64(guest_memory, page_table_ppn, a)) { 27 | Some(v) => v, 28 | None => break, 29 | }; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /third_party/travis-qemu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Based on https://github.com/jdub/travis-qemu 4 | 5 | set -e 6 | 7 | VERSION=${QEMU_VERSION:=4.0.0} 8 | ARCHES=${QEMU_ARCHES:=riscv64} 9 | TARGETS=${QEMU_TARGETS:=$(echo $ARCHES | sed 's#$# #;s#\([^ ]*\) #\1-softmmu \1-linux-user #g')} 10 | 11 | if echo "$VERSION $TARGETS" | cmp --silent $HOME/qemu/.build -; then 12 | echo "qemu $VERSION up to date!" 13 | exit 0 14 | fi 15 | 16 | echo "VERSION: $VERSION" 17 | echo "TARGETS: $TARGETS" 18 | 19 | cd $HOME 20 | rm -rf qemu 21 | 22 | # Checking for a tarball before downloading makes testing easier :-) 23 | test -f "qemu-$VERSION.tar.bz2" || wget "http://wiki.qemu-project.org/download/qemu-$VERSION.tar.bz2" 24 | tar -xf "qemu-$VERSION.tar.bz2" 25 | cd "qemu-$VERSION" 26 | 27 | ./configure \ 28 | --prefix="$HOME/qemu" \ 29 | --target-list="$TARGETS" \ 30 | --disable-docs \ 31 | --disable-sdl \ 32 | --disable-gtk \ 33 | --disable-gnutls \ 34 | --disable-gcrypt \ 35 | --disable-nettle \ 36 | --disable-curses \ 37 | --disable-kvm \ 38 | --static 39 | 40 | make -j4 41 | make install 42 | 43 | echo "$VERSION $TARGETS" > $HOME/qemu/.build 44 | -------------------------------------------------------------------------------- /uEnv.txt: -------------------------------------------------------------------------------- 1 | # This is the sample uEnv.txt file for HiFive Unleashed U-boot 2 | # The current convention (SUBJECT TO CHANGE) is that this file 3 | # will be loaded from the first MSDOS(fat) GPT partition on the 4 | # MMC card. 5 | 6 | bootargs=debug console=tty0 console=ttySIF0 root=/dev/mmcblk0p2 rootwait 7 | 8 | # below much match what's in FIT (ugha) 9 | bbladdr=80000000 10 | fdtaddr=81f00000 11 | vmladdr=80200000 12 | irdaddr=82000000 13 | # oh the hack.. use a large size.. ugh 14 | irdsize=01000000 15 | vmlsize=00800000 16 | newfdt=f0000000 17 | 18 | #Use fit image, but don't call fdt move (TODO: understand later) 19 | setupfdt1=fdt addr ${newfdt}; fdt resize; fdt chosen 20 | 21 | setupfdt2=fdt print /chosen; fdt set /chosen bootargs "${bootargs}"; fdt set /firmware uboot,ver ${ver}; fdt print /chosen 22 | 23 | setupird=setexpr irdend ${irdaddr} + ${irdsize}; fdt set /chosen linux,initrd-start <0x0 0x${irdaddr}>; fdt set /chosen linux,initrd-end <0x0 0x${irdend}> 24 | 25 | setupvml=setexpr vmlend ${vmladdr} + ${vmlsize}; fdt set /chosen riscv,kernel-start <0x0 0x${vmladdr}>; fdt set /chosen riscv,kernel-end <0x0 0x${vmlend}> 26 | 27 | bootwait=setenv _delay ${bootdelay}; echo ${_delay}; while test ${_delay} > 0; do sleep 1; setexpr _delay ${_delay} - 1; echo ${_delay}; done 28 | 29 | autoload=true 30 | bootfile=hifiveu.fit 31 | 32 | serverip=10.0.2.2 33 | ipaddr=10.0.2.50 34 | netmask=255.255.255.0 35 | boot2=dhcp; bootm start ${fileaddr}; run setupfdt1; run setupvml; run setupird; run setupfdt2; bootm loados ${fileaddr}; echo "Booting kernel"; go 80000000 36 | -------------------------------------------------------------------------------- /src/scode.S: -------------------------------------------------------------------------------- 1 | .globl sstart 2 | .section .text.entrypoint 3 | sstart: 4 | // a2 = offset all code/data is shifted by ("shared_segment_shift") 5 | auipc a2, 0 6 | li t0, 0x80000000 // = SUPERVISOR_START_ADDRESS - SYMBOL_PA2VA_OFFSET 7 | sub a2, a2, t0 8 | 9 | // sp = M_MODE_STACK_BASE + hartid * M_MODE_STACK_STRIDE 10 | li sp, 0x80810000 // = M_MODE_STACK_BASE 11 | slli t0, a0, 16 // = a0 * M_MODE_STACK_STRIDE 12 | add sp, sp, t0 13 | 14 | // sp = pa2va(sp) 15 | li t0, 0xffffffff40000000 // = SYMBOL_PA2VA_OFFSET 16 | add sp, sp, t0 17 | 18 | // s4 = &boot_page_tables[hartid][0] 19 | li s4, 0x80200000 // s4 = 0x80200000 20 | add s4, s4, a2 // + shared_segment_shift 21 | slli t0, a0, 13 22 | add s4, s4, t0 // + 1024 * hartid 23 | 24 | // s5 = &boot_page_tables[hartid][511] 25 | li s5, 511 * 8 26 | add s5, s5, s4 27 | 28 | // s6 = &boot_page_tables[hartid][1024] 29 | li s6, 1024 * 8 30 | add s6, s6, s4 31 | 32 | // boot_page_tables[hartid][511..1024] += shared_segment_shift >> 2 33 | srli t0, a2, 21 34 | slli t0, t0, 19 35 | 1: ld t1, 0(s5) 36 | add t1, t1, t0 37 | sd t1, 0(s5) 38 | addi s5, s5, 8 39 | blt s5, s6, 1b 40 | 41 | // stvec = trampoline 42 | lui t0, %hi(trampoline) 43 | addi t0, t0, %lo(trampoline) 44 | csrw stvec, t0 45 | 46 | // satp = &boot_page_tables[hartid][0] | 8 << 60 47 | srli t0, s4, 12 48 | li t1, 8 << 60 49 | or t0, t0, t1 50 | csrw satp, t0 51 | sfence.vma 52 | 53 | .align 3 54 | trampoline: 55 | lui t0, %hi(sstart2) 56 | addi t0, t0, %lo(sstart2) 57 | jr t0 58 | 59 | .align 3 60 | .globl hart_entry 61 | hart_entry: 62 | csrr a0, sscratch 63 | j hart_entry2 64 | 65 | .align 3 66 | .globl panic_trap_handler 67 | panic_trap_handler: 68 | call panic_trap_handler2 69 | -------------------------------------------------------------------------------- /src/riscv/bits.rs: -------------------------------------------------------------------------------- 1 | pub const TVEC_MODE: u64 = 0x3; 2 | pub const TVEC_BASE: u64 = !TVEC_MODE; 3 | 4 | pub const STATUS_UIE: u64 = 1 << 0; 5 | pub const STATUS_SIE: u64 = 1 << 1; 6 | pub const STATUS_UPIE: u64 = 1 << 4; 7 | pub const STATUS_SPIE: u64 = 1 << 5; 8 | pub const STATUS_SPP: u64 = 1 << 8; 9 | pub const STATUS_FS: u64 = 3 << 13; 10 | pub const STATUS_XS: u64 = 3 << 15; 11 | pub const STATUS_SUM: u64 = 1 << 18; 12 | pub const STATUS_MXR: u64 = 1 << 19; 13 | pub const STATUS_SD: u64 = 1 << 63; 14 | 15 | pub const STATUS_MPP_M: u64 = 3 << 11; 16 | pub const STATUS_MPP_S: u64 = 1 << 11; 17 | pub const STATUS_MPP_U: u64 = 0 << 11; 18 | 19 | // Mask of writable bits in sstatus. 20 | pub const SSTATUS_WRITABLE_MASK: u64 = 21 | STATUS_MXR | 22 | STATUS_SUM | 23 | STATUS_FS | 24 | STATUS_SPP | 25 | STATUS_SPIE | 26 | STATUS_SIE; 27 | pub const SSTATUS_DYNAMIC_MASK: u64 = STATUS_SD | STATUS_FS; 28 | 29 | pub const IP_SSIP: u64 = 1 << 1; 30 | pub const IP_STIP: u64 = 1 << 5; 31 | pub const IP_SEIP: u64 = 1 << 9; 32 | 33 | pub const IE_SSIE: u64 = 1 << 1; 34 | pub const IE_STIE: u64 = 1 << 5; 35 | pub const IE_SEIE: u64 = 1 << 9; 36 | 37 | pub const SATP_MODE: u64 = 0xf << 60; 38 | pub const SATP_ASID: u64 = 0xffff << 44; 39 | pub const SATP_PPN: u64 = 0xfff_ffffffff; 40 | 41 | pub const SSTACK_BASE: u64 = 0xffffffffc0a00000 - 32*8; 42 | 43 | pub const SCAUSE_INSN_MISALIGNED: u64 = 0; 44 | pub const SCAUSE_INSN_ACCESS_FAULT: u64 = 1; 45 | pub const SCAUSE_ILLEGAL_INSN: u64 = 2; 46 | pub const SCAUSE_BREAKPOINT: u64 = 3; 47 | pub const SCAUSE_LOAD_ACCESS_FAULT: u64 = 5; 48 | pub const SCAUSE_ATOMIC_MISALIGNED: u64 = 6; 49 | pub const SCAUSE_STORE_ACCESS_FAULT: u64 = 7; 50 | pub const SCAUSE_ENV_CALL: u64 = 8; 51 | pub const SCAUSE_INSN_PAGE_FAULT: u64 = 12; 52 | pub const SCAUSE_LOAD_PAGE_FAULT: u64 = 13; 53 | pub const SCAUSE_STORE_PAGE_FAULT: u64 = 15; 54 | -------------------------------------------------------------------------------- /uboot-fit-image.its: -------------------------------------------------------------------------------- 1 | /dts-v1/; 2 | 3 | / { 4 | description = "U-boot FIT image for HiFive Unleashed"; 5 | #address-cells = <2>; 6 | 7 | images { 8 | rvirt { 9 | description = "RVirt"; 10 | data = /incbin/("target/riscv64imac-unknown-none-elf/release/rvirt-bare-metal.bin"); 11 | type = "kernel"; 12 | arch = "riscv"; 13 | os = "linux"; 14 | load = <0x80000000>; 15 | entry = <0x80000000>; 16 | compression = "none"; 17 | hash-1 { 18 | algo = "sha256"; 19 | }; 20 | }; 21 | 22 | kernel { 23 | description = "Linux kernel"; 24 | data = /incbin/("fedora-vmlinux"); 25 | type = "ramdisk"; 26 | arch = "riscv"; 27 | os = "linux"; 28 | load = <0x82000000>; 29 | compression = "none"; 30 | hash-1 { 31 | algo = "sha256"; 32 | }; 33 | }; 34 | 35 | fdt { 36 | data = /incbin/("hifive_u540.dtb"); 37 | type = "flat_dt"; 38 | arch = "riscv"; 39 | load = <0x81f00000>; 40 | compression = "none"; 41 | hash-1 { 42 | algo = "sha256"; 43 | }; 44 | }; 45 | }; 46 | 47 | configurations { 48 | default = "config-1"; 49 | 50 | config-1 { 51 | description = "HiFive Unleashed with RVirt"; 52 | kernel = "rvirt"; 53 | fdt = "fdt"; 54 | loadables = "kernel"; 55 | }; 56 | }; 57 | }; 58 | 59 | -------------------------------------------------------------------------------- /src/drivers/macb.rs: -------------------------------------------------------------------------------- 1 | 2 | // References: 3 | // 4 | // https://github.com/qemu/qemu/blob/d522fba24478474911b0e6e488b6d1dcf1af54f8/hw/net/cadence_gem.c 5 | // https://github.com/torvalds/linux/blob/master/drivers/net/ethernet/cadence/macb_main.c 6 | // https://www.yumpu.com/en/document/view/31739994/gigabit-ethernet-mac-gem-technical-data-sheet-cadence- 7 | 8 | #![allow(unused)] 9 | 10 | use crate::memory_region::MemoryRegion; 11 | use super::*; 12 | 13 | const GEM_DMACFG: u64 = 0x00000010; 14 | 15 | const GEM_DMACFG_ADDR_64B: u32 = 1 << 30; 16 | 17 | const VIRTIO_MTU: u16 = 2048; 18 | 19 | #[repr(transparent)] 20 | struct RxDesc([u32; 4]); 21 | #[repr(transparent)] 22 | struct TxDesc([u32; 4]); 23 | 24 | /// Driver for the Cadence GEM Ethernet device. 25 | pub struct MacbDriver { 26 | control_registers: MemoryRegion, 27 | mac: [u8; 6], 28 | 29 | rx_buffers: [[u8; 2048]; 8], 30 | rx_queue: [RxDesc; 8], 31 | tx_buffers: [[u8; 2048]; 8], 32 | tx_queue: [TxDesc; 8], 33 | } 34 | 35 | impl Driver for MacbDriver { 36 | const DEVICE_ID: u32 = 1; 37 | const FEATURES: u64 = VIRTIO_NET_F_MAC | VIRTIO_NET_F_MTU; 38 | const QUEUE_NUM_MAX: u32 = 2; 39 | 40 | fn interrupt(device: &mut GuestDevice, _guest_memory: &mut MemoryRegion) -> bool { 41 | false 42 | } 43 | fn doorbell(device: &mut GuestDevice, _guest_memory: &mut MemoryRegion, queue: u32) { 44 | 45 | } 46 | 47 | fn read_config_u8(device: &GuestDevice, _guest_memory: &mut MemoryRegion, offset: u64) -> u8 { 48 | match offset { 49 | 0..=5 => device.host_driver.mac[offset as usize], 50 | 10 => VIRTIO_MTU.to_le_bytes()[0], 51 | 11 => VIRTIO_MTU.to_le_bytes()[1], 52 | _ => 0 53 | } 54 | } 55 | fn write_config_u8(device: &mut GuestDevice, _guest_memory: &mut MemoryRegion, offset: u64, value: u8) { 56 | match offset { 57 | 0..=5 => { 58 | device.host_driver.mac[offset as usize] = value; 59 | unimplemented!(); // TODO: set device MAC to updated value 60 | } 61 | _ => {} 62 | } 63 | } 64 | 65 | fn reset(device: &mut GuestDevice, _guest_memory: &mut MemoryRegion) { 66 | 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/statics.rs: -------------------------------------------------------------------------------- 1 | use arr_macro::arr; 2 | use core::sync::atomic::AtomicBool; 3 | use spin::Mutex; 4 | use crate::constants::*; 5 | use crate::print::{self, UartWriter}; 6 | use crate::pmap; 7 | 8 | #[derive(Copy, Clone, Debug)] 9 | pub enum IpiReason { 10 | TriggerHartEntry { 11 | a0: u64, 12 | a1: u64, 13 | a2: u64, 14 | a3: u64, 15 | a4: u64, 16 | sp: u64, 17 | satp: u64, 18 | } 19 | } 20 | 21 | #[repr(C,align(4096))] 22 | pub struct Shared { 23 | pub boot_page_tables: [[u64; 1024]; MAX_HOST_HARTS], 24 | pub ipi_reason_array: [Mutex>; MAX_HOST_HARTS], 25 | pub uart_writer: Mutex, 26 | pub hart_lottery: AtomicBool, 27 | } 28 | 29 | pub struct ConditionalPointer(u64); 30 | 31 | 32 | #[cfg(feature = "physical_symbol_addresses")] 33 | pub const SHARED_STATICS: ConditionalPointer = ConditionalPointer(MACHINE_SHARED_STATIC_ADDRESS); 34 | #[cfg(not(feature = "physical_symbol_addresses"))] 35 | pub const SHARED_STATICS: ConditionalPointer = ConditionalPointer(SUPERVISOR_SHARED_STATIC_ADDRESS); 36 | 37 | impl core::ops::Deref for ConditionalPointer { 38 | type Target = Shared; 39 | 40 | #[inline(always)] 41 | fn deref(&self) -> &Self::Target { 42 | unsafe { &*(self.0 as *const Shared) } 43 | } 44 | } 45 | 46 | const fn make_boot_page_tables_array() -> [[u64; 1024]; MAX_HOST_HARTS] { 47 | const BASE: u64 = SUPERVISOR_SHARED_STATIC_ADDRESS - SYMBOL_PA2VA_OFFSET; 48 | const STRIDE: u64 = 1024 * 8; 49 | 50 | let mut i = 0; 51 | arr![pmap::make_boot_page_table({i += 1; BASE + (i - 1) * STRIDE}); 16] 52 | } 53 | 54 | /// This static is never accessed directly, but is needed so that the memory backing SHARED_STATICS 55 | /// is properly initialized. 56 | /// 57 | /// We hard code an address for the UART. This value will be replaced once the device tree has been 58 | /// parsed, but until then this provides a way to debug early boot issues. Once the memory subsystem 59 | /// is initialized, this will again be updated to use virtual addresses instead of physical 60 | /// addresses. 61 | #[link_section = ".shared.data"] 62 | pub static __SHARED_STATICS_IMPL: Shared = Shared { 63 | boot_page_tables: make_boot_page_tables_array(), 64 | ipi_reason_array: arr![Mutex::new(None); 16], 65 | // see also: print::early_guess_uart 66 | uart_writer: Mutex::new(UartWriter { 67 | pa: 0x10000000, 68 | inner: print::UartWriterInner::Ns16550a { initialized: false }, 69 | }), 70 | hart_lottery: AtomicBool::new(true), 71 | }; 72 | -------------------------------------------------------------------------------- /src/pmptest.rs: -------------------------------------------------------------------------------- 1 | 2 | use rvirt::*; 3 | use crate::pagedebug; 4 | use crate::pmp; 5 | 6 | global_asm!(include_str!("mcode.S")); 7 | 8 | #[inline(never)] 9 | pub unsafe fn pmptest_mstart(hartid: u64, device_tree_blob: u64) { 10 | if hartid > 0 { 11 | loop {} 12 | } 13 | 14 | // Initialize some control registers 15 | csrs!(mideleg, 0x0222); 16 | csrs!(medeleg, 0xb1ff); 17 | csrw!(mie, 0x888); 18 | csrs!(mstatus, STATUS_MPP_S); 19 | // csrw!(mepc, sstart as u64); -- TODO!!!!!!!!!!!!!!1 20 | csrw!(mcounteren, 0xffffffff); 21 | csrw!(mscratch, 0x80800000 + 0x1000 * (hartid+1)); 22 | 23 | asm!("LOAD_ADDRESS t0, mtrap_entry 24 | csrw mtvec, t0" 25 | ::: "t0" : "volatile"); 26 | 27 | // Minimal page table to boot into S mode. 28 | let boot_page_table_pa = SHARED_STATICS.boot_page_tables[0].as_ptr() as u64; 29 | *((boot_page_table_pa) as *mut u64) = 0x00000000 | 0xcf; 30 | *((boot_page_table_pa+16) as *mut u64) = 0x20000000 | 0xcf; 31 | *((boot_page_table_pa+4088) as *mut u64) = 0x20000000 | 0xcf; 32 | csrw!(satp, 8 << 60 | (boot_page_table_pa >> 12)); 33 | 34 | // Text segment 35 | pmp::install_pmp_napot(0, pmp::LOCK | pmp::READ | pmp::EXEC, 0x80000000, 0x200000); 36 | // Shared data segment 37 | pmp::install_pmp_napot(1, pmp::LOCK | pmp::READ | pmp::WRITE, 0x80200000, 0x200000); 38 | 39 | // // M-mode stack 40 | // csrw!(pmpaddr2, pmpaddr(0x80180000, 1<<19)); 41 | // csrs!(pmpcfg0, M_ONLY << 16); 42 | // csrw!(pmpaddr3, pmpaddr(0x80200000 - (hartid+1) * 64*1024, 32*1024)); 43 | // csrs!(pmpcfg0, LRW << 24); 44 | // csrw!(pmpaddr2, pmpaddr(0x80180000, 1<<19)); 45 | // csrs!(pmpcfg0, LOCKED << 32); 46 | 47 | pmp::debug_pmp(); 48 | pagedebug::debug_paging(); 49 | 50 | asm!("mv a0, $0 51 | mret" :: "r"(device_tree_blob) : "a0", "a1" : "volatile"); 52 | } 53 | 54 | // unsafe fn sstart(device_tree_blob: u64) { 55 | // asm!("li t0, $0 56 | // add sp, sp, t0" :: "i"(SYMBOL_PA2VA_OFFSET) : "t0" : "volatile"); 57 | // csrw!(stvec, (||{panic!("Trap on hart 0?!")}) as fn() as *const () as u64); 58 | 59 | // // Read and process host FDT. 60 | // let fdt = Fdt::new(device_tree_blob); 61 | // assert!(fdt.magic_valid()); 62 | // assert!(fdt.version() >= 17 && fdt.last_comp_version() <= 17); 63 | // assert!(fdt.total_size() < 64 * 1024); 64 | // let machine = fdt.parse(); 65 | 66 | // // Initialize UART 67 | // if let Some(ty) = machine.uart_type { 68 | // print::UART_WRITER.lock().init(machine.uart_address, ty); 69 | // } 70 | 71 | // println!("ONLINE"); 72 | 73 | // loop {} 74 | // } 75 | 76 | // #[allow(unused)] 77 | // unsafe fn ustart() { 78 | 79 | // } 80 | -------------------------------------------------------------------------------- /src/machine.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(asm)] 3 | #![feature(global_asm)] 4 | #![feature(lang_items)] 5 | #![feature(naked_functions)] 6 | #![feature(start)] 7 | 8 | use rvirt::*; 9 | 10 | // mandatory rust environment setup 11 | #[lang = "eh_personality"] extern fn eh_personality() {} 12 | #[panic_handler] fn panic(info: &::core::panic::PanicInfo) -> ! { println!("{}", info); loop {}} 13 | #[start] fn start(_argc: isize, _argv: *const *const u8) -> isize {0} 14 | #[no_mangle] fn abort() -> ! { println!("Abort!"); loop {}} 15 | 16 | const M_MODE_STACK_BASE: u64 = 0x80810000; 17 | const M_MODE_STACK_STRIDE: u64 = 0x10000; 18 | 19 | #[link_section = ".payload"] 20 | static PAYLOAD: [u8; include_bytes!(concat!("../", env!("PAYLOAD"))).len()] = 21 | *include_bytes!(concat!("../", env!("PAYLOAD"))); 22 | 23 | global_asm!(include_str!("mcode.S")); 24 | 25 | #[naked] 26 | #[no_mangle] 27 | #[link_section = ".text.entrypoint"] 28 | unsafe fn _start(hartid: u64, device_tree_blob: u64) { 29 | asm!("li sp, $0 30 | li t1, $1 31 | mul t0, a0, t1 32 | add sp, sp, t0" :: "i"(M_MODE_STACK_BASE), "i"(M_MODE_STACK_STRIDE) :: "volatile"); 33 | 34 | // Simple trick to loop forever if this hart does not support supervisor mode. 35 | csrw!(mtvec, 0x80000000); 36 | csrw!(stvec, 0); 37 | 38 | mstart(hartid, device_tree_blob); 39 | } 40 | 41 | #[inline(never)] 42 | unsafe fn mstart(hartid: u64, device_tree_blob: u64) { 43 | csrs!(mideleg, 0x0222); 44 | csrs!(medeleg, 0xb1ff); 45 | csrw!(mie, 0x088); 46 | csrc!(mstatus, STATUS_MPP_M); 47 | csrs!(mstatus, STATUS_MPP_S); 48 | csrw!(mepc, PAYLOAD.as_ptr() as u64); 49 | csrw!(mcounteren, 0xffffffff); 50 | csrw!(mscratch, M_MODE_STACK_BASE + M_MODE_STACK_STRIDE * hartid); 51 | csrw!(pmpaddr0, 0xffffffffffffffff); 52 | csrw!(pmpcfg0, csrr!(pmpcfg0) | 0x1f); 53 | csrw!(satp, 0); 54 | 55 | asm!("lla t0, mtrap_entry 56 | csrw mtvec, t0" 57 | ::: "t0" : "volatile"); 58 | 59 | riscv::sfence_vma(); 60 | 61 | enter_supervisor(hartid, device_tree_blob); 62 | } 63 | 64 | #[naked] 65 | #[inline(never)] 66 | unsafe fn enter_supervisor(_hartid: u64, _device_tree_blob: u64) { 67 | asm!("mret" :::: "volatile"); 68 | } 69 | 70 | #[no_mangle] 71 | pub unsafe fn forward_exception() { 72 | use crate::riscv::bits::*; 73 | 74 | csrw!(sepc, csrr!(mepc)); 75 | csrw!(scause, csrr!(mcause)); 76 | csrw!(stval, csrr!(mtval)); 77 | csrw!(mepc, csrr!(stvec) & !0x3); 78 | 79 | let status = csrr!(mstatus); 80 | if status & STATUS_SIE != 0 { 81 | csrs!(mstatus, STATUS_SPIE); 82 | } else { 83 | csrc!(mstatus, STATUS_SPIE); 84 | } 85 | if status & STATUS_MPP_S != 0 { 86 | csrs!(mstatus, STATUS_SPP); 87 | } else { 88 | csrc!(mstatus, STATUS_SPP); 89 | } 90 | csrc!(mstatus, STATUS_SIE | STATUS_MPP_M); 91 | csrs!(mstatus, STATUS_MPP_S); 92 | } 93 | -------------------------------------------------------------------------------- /src/elf.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | 3 | // Values for ProgramHeader::type_ 4 | const ELF_PROG_LOAD: u32 = 1; 5 | 6 | // Flag bits for ProgramHeader::flags 7 | const ELF_PROG_FLAG_EXEC: u32 = 1; 8 | const ELF_PROG_FLAG_WRITE: u32 = 2; 9 | const ELF_PROG_FLAG_READ: u32 = 4; 10 | 11 | // Values for SectionHeader::type_ 12 | const ELF_SHT_NULL: u32 = 0; 13 | const ELF_SHT_PROGBITS: u32 = 1; 14 | const ELF_SHT_SYMTAB: u32 = 2; 15 | const ELF_SHT_STRTAB: u32 = 3; 16 | 17 | // Values for SectionHeader::name 18 | const ELF_SHN_UNDEF: u32 = 0; 19 | 20 | const ELF_MAGIC: u32 = 0; 21 | 22 | #[repr(C)] 23 | #[derive(Debug)] 24 | pub struct Ident { 25 | magic: u32, 26 | class: u8, 27 | data: u8, 28 | version: u8, 29 | osabi: u8, 30 | abiversion: u8, 31 | padding: [u8; 7], 32 | } 33 | 34 | #[repr(C)] 35 | #[derive(Debug)] 36 | pub struct Elf64 { 37 | ident: Ident, 38 | type_: u16, 39 | machine: u16, 40 | version: u32, 41 | entry: u64, 42 | phoff: u64, 43 | shoff: u64, 44 | flags: u32, 45 | ehsize: u16, 46 | phentsize: u16, 47 | phnum: u16, 48 | shentsize: u16, 49 | shnum: u16, 50 | shstrndx: u16, 51 | } 52 | 53 | #[repr(C)] 54 | #[derive(Debug)] 55 | pub struct ProgramHeader64 { 56 | type_: u32, 57 | flags: u32, 58 | offset: u64, 59 | va: u64, 60 | pa: u64, 61 | file_size: u64, 62 | memory_size: u64, 63 | align: u64, 64 | } 65 | 66 | // Returns (program entry point, max_address) 67 | pub unsafe fn load_elf(data: *const u8, base_address: *mut u8) -> (u64, u64) { 68 | let elf = &*(data as *const Elf64); 69 | assert_eq!(elf.ident.magic, 0x464C457F); 70 | assert_eq!(elf.ident.class, 2); // 64-bit 71 | assert_eq!(elf.ident.data, 1); // Little endian 72 | assert_eq!(elf.machine, 243); // Machine = RISCV 73 | assert_eq!(elf.type_, 2); // 64-bit 74 | assert_eq!(elf.version, 1); 75 | 76 | let mut max_addr = 0; 77 | for i in 0..(elf.phnum as usize) { 78 | let ph = &*(data.add(elf.phoff as usize + i * elf.phentsize as usize) as *const ProgramHeader64); 79 | 80 | if ph.type_ == ELF_PROG_LOAD { 81 | if ph.file_size > 0 { 82 | let dst = base_address.add(ph.pa as usize); 83 | let src = data.add(ph.offset as usize); 84 | core::ptr::copy(src, dst, ph.file_size as usize); 85 | } 86 | if ph.memory_size > ph.file_size { 87 | let dst = base_address.add((ph.pa + ph.file_size) as usize); 88 | core::ptr::write_bytes(dst, 0, (ph.memory_size - ph.file_size) as usize); 89 | } 90 | 91 | if max_addr < ph.pa + ph.memory_size { 92 | max_addr = ph.pa + ph.memory_size; 93 | } 94 | } 95 | } 96 | 97 | // base_address.add(elf.entry as usize) 98 | (0x80000000, 0x80000000 + max_addr) 99 | } 100 | -------------------------------------------------------------------------------- /src/mcode.S: -------------------------------------------------------------------------------- 1 | // This is the default M-mode trap handler. It forwards timer interrupts to 2 | // S-mode and loops for all other interrupt and exception causes. 3 | .align 4 4 | mtrap_entry: 5 | csrrw sp, mscratch, sp 6 | addi sp, sp, -128 7 | sd ra, 0(sp) 8 | sd t0, 8(sp) 9 | sd t1, 16(sp) 10 | sd t2, 24(sp) 11 | sd t3, 32(sp) 12 | sd t4, 40(sp) 13 | sd t5, 48(sp) 14 | sd t6, 56(sp) 15 | sd a0, 64(sp) 16 | sd a1, 72(sp) 17 | sd a2, 80(sp) 18 | sd a3, 88(sp) 19 | sd a4, 96(sp) 20 | sd a5, 104(sp) 21 | sd a6, 112(sp) 22 | sd a7, 120(sp) 23 | 24 | csrr t0, mcause 25 | bgez t0, exception 26 | 27 | li t1, 0x8000000000000003 28 | beq t0, t1, msoftware_interrupt 29 | 30 | li t1, 0x8000000000000007 31 | beq t0, t1, mtimer_interrupt 32 | 33 | li t1, 0x800000000000000b 34 | beq t0, t1, mexternal_interrupt 35 | 36 | unknown_cause: 37 | j unknown_cause 38 | 39 | msoftware_interrupt: 40 | csrsi mip, 0x2 // mip.ssip = 1 41 | 42 | csrr t0, mhartid 43 | slli t0, t0, 2 44 | li t1, 0x2000000 45 | add t1, t0, t1 46 | sw zero, 0(t1) // mtimecmp[hartid] = zero 47 | 48 | j return 49 | 50 | mtimer_interrupt: 51 | li t0, 0x20 52 | csrs mip, t0 // mip.stip = 1 53 | 54 | csrr t0, mhartid 55 | slli t0, t0, 3 56 | li t1, 0x2004000 57 | add t1, t0, t1 58 | li t0, 0xffffffffffff 59 | sd t0, 0(t1) // mtimecmp[hartid] = 2^48 - 1 60 | 61 | j return 62 | 63 | mexternal_interrupt: 64 | j mexternal_interrupt 65 | 66 | exception: 67 | li t1, 9 68 | beq t0, t1, ecall_exception 69 | call forward_exception 70 | j return 71 | 72 | ecall_exception: 73 | csrr t0, mepc 74 | addi t0, t0, 4 75 | csrw mepc, t0 76 | 77 | beqz a7, sbi_set_timer 78 | 79 | li t1, 3 80 | beq a7, t1, sbi_clear_ipi 81 | 82 | li t1, 4 83 | beq a7, t1, sbi_send_ipi 84 | 85 | li t1, 8 86 | beq a7, t1, sbi_shutdown 87 | 88 | j unknown_cause 89 | 90 | sbi_set_timer: 91 | csrr t0, mhartid 92 | slli t0, t0, 3 93 | li t1, 0x2004000 94 | add t1, t0, t1 95 | sd a0, 0(t1) // mtimecmp[hartid] = a0 96 | 97 | li t0, 0x20 98 | csrc mip, t0 // mip.stip = 0 99 | 100 | li a0, 0 101 | j return_with_value 102 | 103 | sbi_clear_ipi: 104 | csrci mip, 0x2 105 | li a0, 0 106 | j return_with_value 107 | 108 | sbi_send_ipi: 109 | li t2, 1 << 17 // t2 = MPRV 110 | csrrs t1, mstatus, t2 111 | ld t0, 0(a0) 112 | csrw mstatus, t1 113 | 114 | li t2, (0x02000000 - 4) 115 | 1: addi t2, t2, 4 116 | andi t1, t0, 0x1 117 | srli t0, t0, 1 118 | beqz t1, 1b 119 | sw t1, 0(t2) 120 | bnez t0, 1b 121 | 122 | li a0, 0 123 | j return_with_value 124 | 125 | sbi_shutdown: 126 | j sbi_shutdown 127 | 128 | return: 129 | ld a0, 64(sp) 130 | return_with_value: 131 | ld ra, 0(sp) 132 | ld t0, 8(sp) 133 | ld t1, 16(sp) 134 | ld t2, 24(sp) 135 | ld t3, 32(sp) 136 | ld t4, 40(sp) 137 | ld t5, 48(sp) 138 | ld t6, 56(sp) 139 | ld a1, 72(sp) 140 | ld a2, 80(sp) 141 | ld a3, 88(sp) 142 | ld a4, 96(sp) 143 | ld a5, 104(sp) 144 | ld a6, 112(sp) 145 | ld a7, 120(sp) 146 | addi sp, sp, 128 147 | csrrw sp, mscratch, sp 148 | mret 149 | -------------------------------------------------------------------------------- /GETTING-STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | ## Installing dependencies 4 | 5 | - rustup: https://rustup.rs/ 6 | - customize configuration to select the "nightly" build during setup. 7 | 8 | - qemu: 9 | - if the right version is available for your system (4.0.0-rc0 or greater): 10 | 11 | $ sudo apt-get install qemu-system-misc 12 | 13 | - if not: 14 | 15 | $ wget https://download.qemu.org/qemu-4.0.0-rc0.tar.xz 16 | $ tar -xf qemu-4.0.0-rc0.tar.xz qemu-4.0.0-rc0/ 17 | $ cd qemu-4.0.0-rc0 18 | $ ./configure --target-list=riscv64-softmmu 19 | $ make 20 | $ sudo make install 21 | 22 | - gdb (optional): 23 | 24 | to download and build: 25 | 26 | $ wget https://ftp.gnu.org/gnu/gdb/gdb-8.2.1.tar.xz 27 | $ tar -xf gdb-8.2.1.tar.xz gdb-8.2.1/ 28 | $ cd gdb-8.2.1 29 | $ ./configure --target=riscv64-unknown-elf --disable-nls 30 | $ make 31 | $ sudo make install 32 | 33 | ## Prepare guest operating system 34 | 35 | You'll need guest binaries to run RVirt. The easiest option are to get a kernel binary (vmlinux) and a disk image (stage4-disk.img) from [here](https://fedorapeople.org/groups/risc-v/disk-images/): 36 | 37 | # make sure to be in the root of the repository 38 | $ wget https://fedorapeople.org/groups/risc-v/disk-images/vmlinux 39 | $ mv vmlinux fedora-vmlinux 40 | $ wget https://fedorapeople.org/groups/risc-v/disk-images/stage4-disk.img.xz 41 | $ unxz stage4-disk.img.xz 42 | 43 | 44 | Instead of that disk image, you can also use a more recent one from [here](http://185.97.32.145/koji/tasks?state=closed&view=flat&method=createAppliance&order=-id) (Some links there have invalid TLS certs, replace 'https://fedora-riscv.tranquillity.se' with the IP address version 'http://185.97.32.145'). If you do, you may have to replace the disk image name or kernel boot arguments to select the right boot partition. 45 | 46 | ### Configure COW disk for the guest 47 | 48 | If you want to avoid accidentally corrupting the your base disk image, you can use a copy-on-write disk instead: 49 | 50 | $ chmod -w Fedora-Developer-Rawhide-20190506.n.0-sda 51 | $ qemu-img create -f qcow2 -b Fedora-Developer-Rawhide-20190506.n.0-sda.raw -F raw img01.qcow2 52 | 53 | ## Instructions 54 | 55 | Download RVirt's source code: 56 | 57 | $ git clone https://github.com/mit-pdos/rvirt && cd rvirt 58 | 59 | Make any necessary edits to Makefile 60 | 61 | - If your kernel image isn't named 'fedora-vmlinux' or your disk 'stage4-disk.img' then you'll want to change the appropriate line. 62 | - If you want to pass different arguments to Linux (say because the root directory of your disk image is /dev/vda1 instead of /dev/vda) edit the -append "..." line accordingly. 63 | 64 | Build and run RVirt: 65 | 66 | $ make qemu 67 | 68 | Once boot is complete (which can take 4-5 minutes) you can SSH into the guest machine. The root password is likely to be 'riscv': 69 | 70 | $ ssh -p 10001 root@localhost 71 | 72 | If you want to debug using gdb, run these commands in the project directory in separate shells: 73 | 74 | $ make qemu-gdb 75 | $ riscv64-unknown-elf-gdb -------------------------------------------------------------------------------- /src/riscv/instructions.rs: -------------------------------------------------------------------------------- 1 | 2 | use crate::riscv::bits::STATUS_FS; 3 | 4 | /// atomic read from CSR 5 | #[macro_export] 6 | macro_rules! csrr { 7 | ( $r:ident ) => {{ 8 | let value: u64; 9 | #[allow(unused_unsafe)] 10 | unsafe { asm!("csrr $0, $1" : "=r"(value) : "i"(crate::riscv::csr::$r)) }; 11 | value 12 | }}; 13 | } 14 | 15 | /// atomic write to CSR 16 | #[macro_export] 17 | macro_rules! csrw { 18 | ( $r:ident, $x:expr ) => {{ 19 | let x: u64 = $x; 20 | asm!("csrw $0, $1" :: "i"(crate::riscv::csr::$r), "r"(x) :: "volatile"); 21 | }}; 22 | } 23 | 24 | /// atomic write to CSR from immediate 25 | #[macro_export] 26 | macro_rules! csrwi { 27 | ( $r:ident, $x:expr ) => {{ 28 | const X: u64 = $x; 29 | asm!("csrwi $0, $1" :: "i"(crate::riscv::csr::$r), "i"(X) :: "volatile"); 30 | }}; 31 | } 32 | 33 | /// atomic read and set bits in CSR 34 | #[macro_export] 35 | macro_rules! csrs { 36 | ( $r:ident, $x:expr ) => {{ 37 | let x: u64 = $x; 38 | asm!("csrs $0, $1" :: "i"(crate::riscv::csr::$r), "r"(x) :: "volatile"); 39 | }}; 40 | } 41 | 42 | /// atomic read and set bits in CSR using immediate 43 | #[macro_export] 44 | macro_rules! csrsi { 45 | ( $r:ident, $x:expr ) => {{ 46 | const X: u64 = $x; 47 | asm!("csrsi $0, $1" :: "i"(crate::riscv::csr::$r), "i"(X) :: "volatile"); 48 | }}; 49 | } 50 | 51 | /// atomic read and clear bits in CSR 52 | #[macro_export] 53 | macro_rules! csrc { 54 | ( $r:ident, $x:expr ) => {{ 55 | let x: u64 = $x; 56 | asm!("csrc $0, $1" :: "i"(crate::riscv::csr::$r), "r"(x) :: "volatile"); 57 | }}; 58 | } 59 | 60 | /// atomic read and clear bits in CSR using immediate 61 | #[macro_export] 62 | macro_rules! csrci { 63 | ( $r:ident, $x:expr ) => {{ 64 | const X: u64 = $x; 65 | asm!("csrci $0, $1" :: "i"(crate::riscv::csr::$r), "i"(X) :: "volatile"); 66 | }}; 67 | } 68 | 69 | pub fn sfence_vma() { 70 | unsafe { asm!("sfence.vma" ::: "memory" : "volatile") } 71 | } 72 | 73 | pub fn sfence_vma_addr(vaddr: u64) { 74 | unsafe { asm!("sfence.vma $0" :: "r"(vaddr) : "memory" : "volatile") } 75 | } 76 | 77 | pub fn barrier() { 78 | unsafe { asm!("" ::: "memory" : "volatile") } 79 | } 80 | 81 | pub fn fence_i() { 82 | unsafe { asm!("fence.i" :::: "volatile") } 83 | } 84 | 85 | pub fn wfi() { 86 | unsafe { asm!("wfi" :::: "volatile") } 87 | } 88 | 89 | /// Set the `sepc` CSR to the indicated value. 90 | /// 91 | /// Since traps from S-mode always cause a hyperivsor panic, the value of `sstatus.spp` will always 92 | /// be zero. Thus, mret will always cause a vmexit and so any value for sepc is safe. 93 | pub fn set_sepc(value: u64) { 94 | unsafe { csrw!(sepc, value) } 95 | } 96 | 97 | /// Set the `sscratch` CSR. This is safe because `sscratch` does not impact processor execution. 98 | pub fn set_sscratch(value: u64) { 99 | unsafe { csrw!(sscratch, value) } 100 | } 101 | 102 | /// Clear the indicated bits of `sip`. This is safe because interrupt state is not used to enforce 103 | /// safety invariants. 104 | pub fn clear_sip(mask: u64) { 105 | unsafe { csrc!(sip, mask) } 106 | } 107 | 108 | /// Set the FS bits of `sstatus`. This is safe because rvirt does not use hardware floating point 109 | /// support. 110 | pub fn set_sstatus_fs(new: u64) { 111 | unsafe { csrw!(sstatus, (new & STATUS_FS) | (csrr!(sstatus) & !STATUS_FS)) } 112 | } 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RVirt 2 | ![Travis](https://img.shields.io/travis/mit-pdos/rvirt.svg) 3 | ![License](https://img.shields.io/github/license/mit-pdos/rvirt.svg) 4 | ![Language](https://img.shields.io/github/languages/top/mit-pdos/rvirt.svg) 5 | 6 | RVirt is an S-mode trap-and-emulate hypervisor for RISC-V. It is currently targeted at QEMU's virt machine type, but partially supports the HiFive Unleashed as well. It can run either on either the [Berkeley Boot Loader](https://github.com/riscv/riscv-pk) or with its own (considerably faster) M-mode stub. It is powerful enough to run multiple instances of Linux as guest operating systems. 7 | 8 | ## FAQ 9 | 10 | ### How is RVirt different from other hypervisors like [Firecracker](https://github.com/firecracker-microvm/firecracker), [Cloud Hypervisor](https://github.com/intel/cloud-hypervisor) or [xvisor](https://github.com/avpatel/xvisor-next)? 11 | 12 | All three of the other projects can only run on processors that have hardware virtualization extensions like Intel VT-x or RISC-V's planned H-extension. Firecracker and Cloud Hypervisor additionally depend on KVM (and by extension the entire Linux kernel). By contrast, RVirt doesn't need KVM or Linux and can run on any sufficiently powerful 64-bit RISC-V processor with an MMU. 13 | 14 | ### Why RISC-V? 15 | 16 | RISC-V is [classically virtualizable](https://en.wikipedia.org/wiki/Popek_and_Goldberg_virtualization_requirements) which means that a hypervisor can rely on any privileged instruction triggering an illegal instruction fault when executed by the (unprivileged) guest OS. This is in contrast to other ISAs like x86 which have instructions that behave differently in user and kernel mode but never trap. Additionally, RISC-V has only 12 supervisor level control registers and only a handful of privileged instructions making the work to implement trap and emulate much more manageable. 17 | 18 | ### Why Rust? 19 | 20 | Why not? Rust is a pleasant language to work with and can directly target bare metal platforms. I was also excited by Rust's ability to guarantee memory safety for safe code, but I found the amount of unsafe code required for initialization and vm entry/exit partially negated this benefit. 21 | 22 | ## Getting Started 23 | 24 | For more detailed instructions, see the [getting started guide](GETTING-STARTED.md). 25 | 26 | ### Install Dependencies 27 | 28 | $ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 29 | $ sudo apt-get install qemu-system-misc 30 | 31 | ### Clone the repository 32 | 33 | $ git clone https://github.com/mit-pdos/rvirt && cd rvirt 34 | 35 | ### Download guest images 36 | 37 | $ wget https://fedorapeople.org/groups/risc-v/disk-images/vmlinux 38 | $ mv vmlinux fedora-vmlinux 39 | $ wget https://fedorapeople.org/groups/risc-v/disk-images/stage4-disk.img.xz 40 | $ unxz stage4-disk.img.xz 41 | 42 | ### Compile and run 43 | 44 | $ make qemu 45 | 46 | This command launches an instance of QEMU with RVirt running inside and Linux running inside that. Once the boot process has completed you can SSH in through all the layers and directly interact with the guest (root password is 'riscv'): 47 | 48 | $ ssh -p 10001 root@localhost 49 | 50 | ## Current Status 51 | 52 | RVirt supports running both inside an emulator and on real hardware and does runtime detection to learn what platform it is executing on. It has so far been tested with Fedora RISC-V builds, but may work with other distributions as well. 53 | 54 | ### Supported Platforms 55 | 56 | Tier 1: Boots fully and supports interaction via SSH / serial console 57 | 58 | * QEMU virt machine type 59 | 60 | Tier 2: Boots partially but lacks driver support for block/network device to complete boot process 61 | 62 | * HiFive Unleashed board 63 | * QEMU sifiveu machine type 64 | 65 | ### Correctness 66 | 67 | - [x] Trap and emulate of privileged instructions (CSR related and SFENCE.VMA) 68 | - [x] Shadow page tables 69 | - [x] Update PTE accessed and dirty bits 70 | - [x] Timers 71 | - [x] Expose and/or emulate peripherals 72 | - [x] Address lingering bugs in boot process 73 | 74 | ### Functionality 75 | In addition to being able to boot and run a single guest, RVirt also supports some features not needed for the correct virtualization of a single guest: 76 | 77 | - [x] multiple guests 78 | - [x] passthrough of virtio block and network devices 79 | - [ ] paravirtualized network devices backed by HiFive Unleashed's NIC *(in progress)* 80 | - [ ] multicore guests and inter-processor interrupts between them 81 | 82 | Other features not used by Linux / not supported by current platforms are unlikely to be implemented: 83 | 84 | - [ ] ASID support 85 | - [ ] Sv48 or Sv57 guest page tables (only Sv39 currently allowed) 86 | - [ ] SR-IOV PCIe devices 87 | - [ ] 32-bit guests 88 | 89 | 90 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | OUT=target/riscv64imac-unknown-none-elf/release 2 | 3 | ################################################################################ 4 | # COMPILE BINARIES # 5 | ################################################################################ 6 | 7 | GUEST_KERNEL_FEATURE=$(if $(RVIRT_GUEST_KERNEL), --features embed_guest_kernel, ) 8 | 9 | # Build the main rvirt binary. Relies on an SBI inteface for some functionality. 10 | $(OUT)/rvirt: src/*.rs src/*/*.rs src/*.S Cargo.toml src/slinker.ld rustup-target 11 | cargo rustc --release --target riscv64imac-unknown-none-elf --bin rvirt \ 12 | $(GUEST_KERNEL_FEATURE) -- -C link-arg=-Tsrc/slinker.ld 13 | 14 | # Flattened version of rvirt binary. 15 | $(OUT)/rvirt.bin: $(OUT)/rvirt 16 | objcopy -S -I elf64-little -O binary --change-addresses -0x80000000 \ 17 | --set-section-flags .bss=alloc,load,contents \ 18 | $(OUT)/rvirt $(OUT)/rvirt.bin 19 | 20 | # Build a free standing binary that can run directly on bare metal without any 21 | # SBI provider. 22 | $(OUT)/rvirt-bare-metal: $(OUT)/rvirt.bin src/*.rs src/*/*.rs src/*.S Cargo.toml src/mlinker.ld rustup-target 23 | PAYLOAD=$(OUT)/rvirt.bin cargo rustc --release --target \ 24 | riscv64imac-unknown-none-elf --bin rvirt-bare-metal --features \ 25 | "physical_symbol_addresses" -- -C link-arg=-Tsrc/mlinker.ld 26 | 27 | # Flattened version of rvirt-bare-metal binary. 28 | $(OUT)/rvirt-bare-metal.bin: $(OUT)/rvirt-bare-metal 29 | objcopy -S -I elf64-little -O binary --change-addresses -0x80000000 \ 30 | $(OUT)/rvirt-bare-metal $(OUT)/rvirt-bare-metal.bin 31 | 32 | ################################################################################ 33 | # QEMU RUN COMMANDS # 34 | ################################################################################ 35 | 36 | # Run rvirt inside QEMU. 37 | qemu: $(OUT)/rvirt-bare-metal 38 | qemu-system-riscv64 -machine virt -nographic -m 2G -smp 1 $(GDBOPTS) \ 39 | -kernel $(OUT)/rvirt-bare-metal -initrd fedora-vmlinux \ 40 | -append "console=ttyS0 ro root=/dev/vda" \ 41 | -object rng-random,filename=/dev/urandom,id=rng1 \ 42 | -device virtio-rng-device,rng=rng1,bus=virtio-mmio-bus.0 \ 43 | -device virtio-blk-device,drive=hd1,bus=virtio-mmio-bus.1 \ 44 | -drive file=stage4-disk.img,format=raw,id=hd1 \ 45 | -device virtio-net-device,netdev=usernet1,bus=virtio-mmio-bus.2 \ 46 | -netdev user,id=usernet1,hostfwd=tcp::10001-:22 47 | 48 | # Run rvirt inside QEMU with BBL as the SBI provider. Requires a build of QEMU 49 | # with support for the `-bios` flag which mainline QEMU doesn't yet have. 50 | qemu-bbl: $(OUT)/rvirt.bin 51 | qemu-system-riscv64 -machine virt -nographic -m 2G -smp 1 \ 52 | -bios bbl -kernel $(OUT)/rvirt.bin -initrd fedora-vmlinux \ 53 | -append "console=ttyS0 root=/dev/vda2" \ 54 | -object rng-random,filename=/dev/urandom,id=rng1 \ 55 | -device virtio-rng-device,rng=rng1,bus=virtio-mmio-bus.0 \ 56 | -device virtio-blk-device,drive=hd1,bus=virtio-mmio-bus.1 \ 57 | -drive file=img01.qcow2,format=qcow2,id=hd1 \ 58 | -device virtio-net-device,netdev=usernet1,bus=virtio-mmio-bus.2 \ 59 | -netdev user,id=usernet1,hostfwd=tcp::10001-:22 60 | 61 | # Run rvirt inside QEMU but target the sifive_u machine type. 62 | qemu-sifive: $(OUT)/rvirt-bare-metal 63 | qemu-system-riscv64 -machine sifive_u -nographic -m 2G \ 64 | -kernel $(OUT)/rvirt-bare-metal 65 | 66 | # Run rvirt inside QEMU but wait for GDB to attach on port 26000 first. 67 | GDBOPTS=$(if $(DEBUG),-gdb tcp::26000 -S,) 68 | qemu-gdb: DEBUG=1 69 | qemu-gdb: qemu 70 | 71 | ################################################################################ 72 | # HIFIVE UNLEASHED COMMANDS # 73 | ################################################################################ 74 | 75 | # Prepare a `.fit` file and place it in /srv/tftp so the HiFive Unleashed can 76 | # boot from it. Requires atftpd with target directory set to /srv/tftp/. 77 | fit: $(OUT)/rvirt-bare-metal.bin uboot-fit-image.its 78 | mkimage -f uboot-fit-image.its -A riscv -O linux -T flat_dt $(OUT)/rvirt.fit 79 | cp $(OUT)/rvirt.fit /srv/tftp/hifiveu.fit 80 | 81 | # Display serial output from the HiFive Unleashed. To get line endings to be 82 | # correct, follow steps described on: 83 | # https://unix.stackexchange.com/questions/283924/how-can-minicom-permanently-translate-incoming-newline-n-to-crlf 84 | serial-output: 85 | sudo minicom -D /dev/serial/by-id/usb-FTDI_Dual_RS232-HS-if01-port0 86 | 87 | ################################################################################ 88 | # MISC COMMANDS # 89 | ################################################################################ 90 | 91 | rustup-target: 92 | rustup target add riscv64imac-unknown-none-elf || true 93 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | 2 | //! ## Start-up sequence summary: 3 | //! - QEMU loads hypervisor kernel (this program) and linux kernel (held in initrd) into memory 4 | //! - QEMU launches hardcoded mrom reset vector, which jumps to 0x80000000 5 | //! - _start is located at 0x80000000 as the only function in the .init.entrypoint section 6 | //! - `_start` sets up the stack and calls into mstart 7 | //! - `mstart` initializes machine-mode control registers as needed by the hypervisor 8 | //! - `mstart` returns into supervisor-mode in sstart 9 | //! - `sstart` returns into user-mode at the guest kernel entrypoint 10 | //! (running in emulated-supervisor-mode) 11 | //! 12 | //! ## Physical memory layout according to machine-mode 13 | //! (see also linker.ld, pmap.rs, qemu riscv/virt.c @ 4717595) 14 | //! note: although only 36 bits are described here, the address space is wider. 15 | //! ```text 16 | //! START - END REGION 17 | //! 0x 0 - 0x 100 QEMU VIRT_DEBUG 18 | //! 0x 100 - 0x 1000 unmapped 19 | //! 0x 1000 - 0x 12000 QEMU MROM (includes hard-coded reset vector; device tree) 20 | //! 0x 12000 - 0x 100000 unmapped 21 | //! 0x 100000 - 0x 101000 QEMU VIRT_TEST 22 | //! 0x 101000 - 0x 2000000 unmapped 23 | //! 0x 2000000 - 0x 2010000 QEMU VIRT_CLINT 24 | //! 0x 2010000 - 0x 3000000 unmapped 25 | //! 0x 3000000 - 0x 3010000 QEMU VIRT_PCIE_PIO 26 | //! 0x 3010000 - 0x c000000 unmapped 27 | //! 0x c000000 - 0x 10000000 QEMU VIRT_PLIC 28 | //! 0x 10000000 - 0x 10000100 QEMU VIRT_UART0 29 | //! 0x 10000100 - 0x 10001000 unmapped 30 | //! 0x 10001000 - 0x 10002000 QEMU VIRT_VIRTIO 31 | //! 0x 10002000 - 0x 30000000 unmapped 32 | //! 0x 30000000 - 0x 40000000 QEMU 33 | //! 0x 40000000 - 0x 80000000 QEMU VIRT_PCIE_MMIO 34 | //! 0x 80000000 - 0x 80200000 text segment 35 | //! 0x 80200000 - 0x 80400000 shared data 36 | //! 0x 80400000 - 0x 80600000 hart 0 data segment 37 | //! 0x 80600000 - 0x 80800000 hart 0 S-mode stack 38 | //! 0x 80800000 - 0x 80810000 hart 0 M-mode stack 39 | //! 0x 80810000 - 0x 80820000 hart 1 M-mode stack 40 | //! 0x 80820000 - 0x 80830000 hart 2 M-mode stack 41 | //! 0x 80830000 - 0x 80840000 hart 3 M-mode stack 42 | //! 0x 808xxxxx - 0x 808xxxxx ... 43 | //! 0x 808f0000 - 0x 80900000 hart 15 M-mode stack 44 | //! 0x c0000000 - 0x c0200000 hart 1 stack 45 | //! 0x c0200000 - 0x c0400000 hart 1 data segment 46 | //! 0x c0400000 - 0x c4000000 hart 1 heap 47 | //! 0x c2000000 - 0x c4000000 hart 1 page tables 48 | //! 0x c4000000 - 0x100000000 hart 1 guest memory 49 | //! 0x100000000 - 0x100200000 hart 2 stack 50 | //! 0x100200000 - 0x100400000 hart 2 data segment 51 | //! 0x100400000 - 0x104000000 hart 2 heap 52 | //! 0x102000000 - 0x104000000 hart 2 page tables 53 | //! 0x104000000 - 0x140000000 hart 2 guest memory 54 | //! 0x140000000 - 0x140200000 hart 3 stack 55 | //! 0x140200000 - 0x140400000 hart 3 data segment 56 | //! 0x140400000 - 0x144000000 hart 3 heap 57 | //! 0x142000000 - 0x144000000 hart 3 page tables 58 | //! 0x144000000 - 0x180000000 hart 3 guest memory 59 | //! ``` 60 | //! 61 | //! ## Initial supervisor virtual memory layout (boot page table) 62 | //! note: the Sv39 addressing mode is in use here 63 | //! ```text 64 | //! VIRTUAL START - VIRTUAL END PHYS START PHYS END MODE REGION 65 | //! 0x 00000000 - 0x 40000000 0x00000000 - 0x40000000 RWX QEMU memory sections 66 | //! 0x 80000000 - 0x c0000000 0x80000000 - 0xC0000000 RWX hypervisor memory 67 | //! 0xffffffffc0000000 - 0xffffffffffffffff 0x80000000 - 0xC0000000 RWX hypervisor memory 68 | //! ``` 69 | //! 70 | //! ## Linux address space layout (with Sv39 addressing) 71 | //! 72 | //! In this addressing mode, Linux does not reserve any address space for a hypervisor. However, the 73 | //! direct map region is 128GB (one quarter of the addres space) but physical memory takes up at 74 | //! most a handful of GBs and Linux never accesses any higher addresses. Thus rvirt is able to use 75 | //! the top 16GB of virtual addresses for its own code and data. 76 | //! 77 | //! ```text 78 | //! VIRTUAL START - VIRTUAL END REGION 79 | //! 0x0000000000000000 - 0x0000003fffffffff User memory 80 | //! 0xffffffbfffffffff - 0xffffffdfffffffff Kernel memory 81 | //! 0xffffffdfffffffff - 0xffffffffffffffff Direct map region 82 | //! ``` 83 | 84 | #![no_std] 85 | #![feature(asm)] 86 | #![feature(const_fn)] 87 | #![feature(const_raw_ptr_deref)] 88 | #![feature(global_asm)] 89 | #![feature(lang_items)] 90 | #![feature(linkage)] 91 | #![feature(naked_functions)] 92 | #![feature(proc_macro_hygiene)] 93 | #![feature(ptr_offset_from)] 94 | #![feature(start)] 95 | #![feature(try_blocks)] 96 | 97 | #[macro_use] 98 | pub mod riscv; 99 | #[macro_use] 100 | pub mod print; 101 | 102 | pub mod backtrace; 103 | pub mod constants; 104 | pub mod context; 105 | pub mod drivers; 106 | pub mod elf; 107 | pub mod fdt; 108 | pub mod memory_region; 109 | pub mod pfault; 110 | pub mod plic; 111 | pub mod pmap; 112 | pub mod statics; 113 | pub mod sum; 114 | pub mod trap; 115 | pub mod virtio; 116 | 117 | pub use core::sync::atomic::{AtomicBool, Ordering}; 118 | pub use constants::SYMBOL_PA2VA_OFFSET; 119 | pub use fdt::*; 120 | pub use riscv::bits::*; 121 | pub use pmap::{pa2va}; 122 | pub use statics::{__SHARED_STATICS_IMPL, IpiReason, SHARED_STATICS}; 123 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "arr_macro" 5 | version = "0.1.2" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | dependencies = [ 8 | "arr_macro_impl 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", 9 | "proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", 10 | ] 11 | 12 | [[package]] 13 | name = "arr_macro_impl" 14 | version = "0.1.2" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | dependencies = [ 17 | "proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)", 18 | "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", 19 | "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", 20 | ] 21 | 22 | [[package]] 23 | name = "arrayvec" 24 | version = "0.4.10" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | dependencies = [ 27 | "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", 28 | ] 29 | 30 | [[package]] 31 | name = "byteorder" 32 | version = "1.3.1" 33 | source = "registry+https://github.com/rust-lang/crates.io-index" 34 | 35 | [[package]] 36 | name = "nodrop" 37 | version = "0.1.13" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | 40 | [[package]] 41 | name = "proc-macro-hack" 42 | version = "0.5.7" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | dependencies = [ 45 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 46 | "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", 47 | "syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)", 48 | ] 49 | 50 | [[package]] 51 | name = "proc-macro2" 52 | version = "0.4.30" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | dependencies = [ 55 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 56 | ] 57 | 58 | [[package]] 59 | name = "quote" 60 | version = "0.6.12" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | dependencies = [ 63 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 64 | ] 65 | 66 | [[package]] 67 | name = "riscv-decode" 68 | version = "0.2.0" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | 71 | [[package]] 72 | name = "rvirt" 73 | version = "0.1.0" 74 | dependencies = [ 75 | "arr_macro 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", 76 | "arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", 77 | "byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", 78 | "riscv-decode 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 79 | "spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", 80 | ] 81 | 82 | [[package]] 83 | name = "spin" 84 | version = "0.5.0" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | 87 | [[package]] 88 | name = "syn" 89 | version = "0.15.34" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | dependencies = [ 92 | "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", 93 | "quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", 94 | "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 95 | ] 96 | 97 | [[package]] 98 | name = "unicode-xid" 99 | version = "0.1.0" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | 102 | [metadata] 103 | "checksum arr_macro 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d262b83f2f573121554ad6e764cd444303df85d86e5fcebc81903ddcf8dd3a97" 104 | "checksum arr_macro_impl 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8decbe97ffec939e44228d91e5d0829ceb1616c6ed0984c09df164b1e7ebaafc" 105 | "checksum arrayvec 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "92c7fb76bc8826a8b33b4ee5bb07a247a81e76764ab4d55e8f73e3a4d8808c71" 106 | "checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb" 107 | "checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" 108 | "checksum proc-macro-hack 0.5.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0c1dd4172a1e1f96f709341418f49b11ea6c2d95d53dca08c0f74cbd332d9cf3" 109 | "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" 110 | "checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" 111 | "checksum riscv-decode 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "920466101a1ec4ffac2ab9b72fa780eff62defa0ae9c4f77a07fa41dfd5450e6" 112 | "checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55" 113 | "checksum syn 0.15.34 (registry+https://github.com/rust-lang/crates.io-index)" = "a1393e4a97a19c01e900df2aec855a29f71cf02c402e2f443b8d2747c25c5dbe" 114 | "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" 115 | -------------------------------------------------------------------------------- /src/plic.rs: -------------------------------------------------------------------------------- 1 | 2 | use crate::constants::MAX_GUEST_HARTS; 3 | 4 | /// Number of contexts for the PLIC. Value is twice the max number of harts because each hart will 5 | /// have one M-mode context and one S-mode context. 6 | const MAX_CONTEXTS: usize = MAX_GUEST_HARTS * 2; 7 | 8 | pub struct PlicState { 9 | base: u64, 10 | source_priority: [u32; 512], 11 | pending: [u32; 16], 12 | enable: [[u32; 32]; MAX_CONTEXTS], 13 | thresholds: [u32; MAX_CONTEXTS], 14 | claim_complete: [u32; MAX_CONTEXTS], 15 | } 16 | 17 | impl PlicState { 18 | pub const fn new() -> Self { 19 | Self { 20 | base: 0x0c000000, 21 | source_priority: [0; 512], 22 | pending: [0; 16], 23 | enable: [[0; 32]; MAX_CONTEXTS], 24 | thresholds: [0; MAX_CONTEXTS], 25 | claim_complete: [0; MAX_CONTEXTS], 26 | } 27 | } 28 | 29 | pub fn read_u32(&mut self, addr: u64) -> u32 { 30 | let offset = addr.wrapping_sub(self.base); 31 | if offset <= 0x800 { 32 | self.source_priority[offset as usize >> 2] 33 | } else if offset >= 0x1000 && offset <= 0x1014 { 34 | self.pending[offset as usize >> 2] 35 | } else if offset >= 0x2000 && offset < 0x2000 + 0x80 * MAX_CONTEXTS as u64 { 36 | let hart = (offset - 0x2000) / 0x80; 37 | let index = ((offset - 0x2000) & 0x7f) >> 2; 38 | if index <= 32 { 39 | self.enable[hart as usize][index as usize] 40 | } else { 41 | 0 42 | } 43 | } else if offset >= 0x200000 && offset < 0x200000 + 0x1000 * MAX_CONTEXTS as u64 { 44 | let hart = ((offset - 0x200000) / 0x1000) as usize; 45 | let index = ((offset - 0x200000) & 0xfff) >> 2; 46 | if index == 0 { 47 | self.thresholds[hart] 48 | } else if index == 1 { 49 | if self.claim_complete[hart] == 0 { 50 | let threshold = self.thresholds[hart]; 51 | let mut max_priority = threshold; 52 | for i in 0..self.pending.len() { 53 | if self.pending[i] == 0 { 54 | continue; 55 | } 56 | 57 | for j in 0..32 { 58 | if self.pending[i] & (1 << j) != 0 { 59 | let interrupt = i*32 + j; 60 | if self.source_priority[interrupt] > max_priority { 61 | max_priority = self.source_priority[interrupt]; 62 | self.claim_complete[hart] = interrupt as u32; 63 | } 64 | } 65 | } 66 | } 67 | } 68 | self.set_pending(self.claim_complete[hart], false); 69 | self.claim_complete[hart] 70 | } else { 71 | 0 72 | } 73 | } else { 74 | 0 75 | } 76 | } 77 | 78 | pub fn write_u32(&mut self, addr: u64, value: u32, clear_seip: &mut bool) { 79 | let offset = addr.wrapping_sub(self.base); 80 | if offset <= 0x800 { 81 | self.source_priority[offset as usize >> 2] = value; 82 | } else if offset >= 0x1000 && offset <= 0x1014 { 83 | self.pending[offset as usize >> 2] = value; 84 | } else if offset >= 0x2000 && offset < 0x2000 + 0x80 * MAX_CONTEXTS as u64 { 85 | let hart = (offset - 0x2000) / 0x80; 86 | let index = ((offset - 0x2000) & 0x7f) >> 2; 87 | 88 | if index <= 32 { 89 | self.enable[hart as usize][index as usize] = value; 90 | } 91 | } else if offset >= 0x200000 && offset < 0x200000 + 0x1000 * MAX_CONTEXTS as u64 { 92 | let hart = (offset - 0x200000) / 0x1000; 93 | let index = ((offset - 0x200000) & 0xfff) >> 2; 94 | if index == 0 { 95 | self.thresholds[hart as usize] = value; 96 | } else if index == 1 { 97 | if self.claim_complete[hart as usize] == value { 98 | self.set_pending(value, false); 99 | self.claim_complete[hart as usize] = 0; 100 | *clear_seip = true; 101 | } 102 | } 103 | } 104 | } 105 | 106 | pub fn set_pending(&mut self, interrupt: u32, value: bool) { 107 | let index = (interrupt / 32) as usize; 108 | let mask = 1 << (interrupt % 32); 109 | 110 | if value { 111 | self.pending[index] |= mask; 112 | } else { 113 | self.pending[index] &= !mask; 114 | } 115 | } 116 | 117 | pub fn interrupt_pending(&self) -> bool { 118 | const CONTEXT: usize = 1; // TODO: shouldn't be a constant 119 | 120 | let threshold = self.thresholds[CONTEXT]; 121 | for i in 0..self.pending.len() { 122 | if self.pending[i] == 0 { 123 | continue; 124 | } 125 | 126 | for j in 0..32 { 127 | if self.pending[i] & (1 << j) != 0 { 128 | if self.source_priority[i*32 + j] > threshold { 129 | return true; 130 | } 131 | } 132 | } 133 | } 134 | 135 | false 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/memory_region.rs: -------------------------------------------------------------------------------- 1 | use core::mem; 2 | use core::ops::{Index, IndexMut}; 3 | use crate::pmap; 4 | 5 | pub struct MemoryRegion { 6 | ptr: *mut T, 7 | base_address: u64, 8 | length_bytes: u64, 9 | } 10 | 11 | unsafe impl Send for MemoryRegion {} 12 | 13 | impl MemoryRegion { 14 | pub unsafe fn new(address: u64, length: u64) -> Self { 15 | assert_eq!(length % mem::size_of::() as u64, 0); 16 | Self { 17 | ptr: address as *mut T, 18 | base_address: pmap::va2pa(address), 19 | length_bytes: length, 20 | } 21 | } 22 | 23 | pub unsafe fn with_base_address(address: u64, base_address: u64, length: u64) -> Self { 24 | assert_eq!(length % mem::size_of::() as u64, 0); 25 | Self { 26 | ptr: address as *mut T, 27 | base_address, 28 | length_bytes: length, 29 | } 30 | } 31 | 32 | pub fn get(&self, index: u64) -> Option { 33 | if index % mem::size_of::() as u64 != 0 || index < self.base_address { 34 | return None; 35 | } 36 | 37 | let offset = index - self.base_address; 38 | if offset >= self.length_bytes { 39 | return None; 40 | } 41 | 42 | unsafe { Some(*(self.ptr.add(offset as usize / mem::size_of::()))) } 43 | } 44 | 45 | pub fn base(&self) -> u64 { 46 | self.base_address 47 | } 48 | 49 | pub fn len(&self) -> u64 { 50 | self.length_bytes 51 | } 52 | 53 | pub fn in_region(&self, addr: u64) -> bool { 54 | addr >= self.base_address && addr < self.base_address + self.length_bytes 55 | } 56 | 57 | pub fn slice(&self, index: u64, len: u64) -> &[u8] { 58 | assert!(index >= self.base_address); 59 | 60 | let offset = index - self.base_address; 61 | assert!(offset < self.length_bytes); 62 | 63 | assert!(self.length_bytes - offset >= len); 64 | 65 | unsafe { 66 | core::slice::from_raw_parts((self.ptr as *mut u8).wrapping_add(offset as usize), 67 | len as usize) 68 | } 69 | } 70 | 71 | pub fn slice_mut(&mut self, index: u64, len: u64) -> &mut [u8] { 72 | assert!(index >= self.base_address); 73 | 74 | let offset = index - self.base_address; 75 | assert!(offset < self.length_bytes); 76 | 77 | assert!(self.length_bytes - offset >= len); 78 | 79 | unsafe { 80 | core::slice::from_raw_parts_mut((self.ptr as *mut u8).wrapping_add(offset as usize), 81 | len as usize) 82 | } 83 | } 84 | } 85 | 86 | impl Index for MemoryRegion { 87 | type Output = T; 88 | /// Return a reference to a u64 index many *bytes* into the memory region. The value of index 89 | /// must be divisible by sizeof(T). 90 | fn index(&self, index: u64) -> &T { 91 | assert_eq!(index % mem::size_of::() as u64, 0); 92 | assert!(index >= self.base_address); 93 | 94 | let offset = index - self.base_address; 95 | assert!(offset < self.length_bytes); 96 | 97 | unsafe { &*(self.ptr.add(offset as usize / mem::size_of::())) } 98 | } 99 | } 100 | 101 | impl IndexMut for MemoryRegion { 102 | /// Return a reference to a u64 index many *bytes* into the memory region. The value of index 103 | /// must be divisible by sizeof(T). 104 | fn index_mut(&mut self, index: u64) -> &mut T { 105 | assert_eq!(index % mem::size_of::() as u64, 0); 106 | assert!(index >= self.base_address); 107 | 108 | let offset = index - self.base_address; 109 | assert!(offset < self.length_bytes); 110 | 111 | unsafe { &mut *(self.ptr.add(offset as usize / mem::size_of::())) } 112 | } 113 | } 114 | 115 | /// Use to represent a region containing page tables. All addresses are in terms of *physical 116 | /// addresses* to simplify usage. 117 | pub struct PageTableRegion { 118 | region: MemoryRegion, 119 | end_pa: u64, 120 | } 121 | impl PageTableRegion { 122 | pub fn new(region: MemoryRegion) -> Self { 123 | assert_eq!((region.ptr as u64) % 4096, 0); 124 | assert_eq!(region.length_bytes % 4096, 0); 125 | 126 | let end_pa = pmap::va2pa(region.ptr as u64) + region.length_bytes; 127 | 128 | Self { 129 | region, 130 | end_pa, 131 | } 132 | } 133 | 134 | pub unsafe fn set_pte_unchecked(&mut self, pte_address: u64, pte_value: u64) { 135 | self.region[pte_address] = pte_value; 136 | } 137 | 138 | pub fn set_leaf_pte(&mut self, pte_address: u64, pte_value: u64) { 139 | assert!(pte_value & 0xf != 0x1); 140 | assert!(!self.inside_region(pte_value)); 141 | self.region[pte_address] = pte_value; 142 | } 143 | 144 | pub fn set_nonleaf_pte(&mut self, pte_address: u64, pte_value: u64) { 145 | assert_eq!(pte_value & 0xf, 0x1); 146 | assert!(self.inside_region(pte_value)); 147 | self.region[pte_address] = pte_value; 148 | } 149 | 150 | pub fn set_invalid_pte(&mut self, pte_address: u64, pte_value: u64) { 151 | assert_eq!(pte_value & 0x1, 0); 152 | self.region[pte_address] = pte_value; 153 | } 154 | 155 | // Returns a conservative answer of whether the pte could map some memory that overlapped this 156 | // region. 157 | fn inside_region(&self, pte: u64) -> bool { 158 | // since we don't know page size (and because we know all mappings will point to physical 159 | // addresses larger than the end of this region) we only check that the start of the page is 160 | // beyond the end of this region. 161 | ((pte >> 10) << 12) < self.end_pa 162 | } 163 | } 164 | 165 | impl Index for PageTableRegion { 166 | type Output = u64; 167 | /// Return a reference to the pte at physical address `address`. This must be divisible by 8 and 168 | /// inside the memory region. 169 | fn index(&self, address: u64) -> &u64 { 170 | &self.region[address] 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/print.rs: -------------------------------------------------------------------------------- 1 | use core::{fmt, ptr}; 2 | use spin::MutexGuard; 3 | use crate::statics::SHARED_STATICS; 4 | use crate::fdt::UartType; 5 | use crate::pmap; 6 | 7 | // see https://github.com/riscv/riscv-pk/blob/master/machine/uart16550.c 8 | // see: https://os.phil-opp.com/printing-to-screen 9 | 10 | pub enum UartWriterInner { 11 | Ns16550a { initialized: bool }, 12 | SiFive, 13 | } 14 | 15 | pub struct UartWriter { 16 | pub pa: u64, 17 | pub inner: UartWriterInner, 18 | } 19 | 20 | impl UartWriterInner { 21 | #[inline(always)] 22 | unsafe fn initialize_ns16550a(base_address: *mut u8) { 23 | ptr::write_volatile(base_address.offset(1), 0x00); 24 | ptr::write_volatile(base_address.offset(3), 0x80); 25 | ptr::write_volatile(base_address.offset(0), 0x03); 26 | ptr::write_volatile(base_address.offset(1), 0x00); 27 | ptr::write_volatile(base_address.offset(3), 0x03); 28 | ptr::write_volatile(base_address.offset(2), 0xC7); 29 | } 30 | 31 | #[inline(always)] 32 | fn putchar(&mut self, base_address: u64, ch: u8) { 33 | unsafe { 34 | match *self { 35 | UartWriterInner::Ns16550a { ref mut initialized } => { 36 | let base_address = base_address as *mut u8; 37 | if !*initialized { 38 | Self::initialize_ns16550a(base_address); 39 | *initialized = true; 40 | } 41 | 42 | while ptr::read_volatile(base_address.offset(5)) & 0x20 == 0 { 43 | // do nothing 44 | } 45 | ptr::write_volatile(base_address, ch) 46 | } 47 | UartWriterInner::SiFive => { 48 | let base_address = base_address as *mut u32; 49 | while ptr::read_volatile(base_address) & 0x80000000 != 0 { 50 | // do nothing 51 | } 52 | ptr::write_volatile(base_address, ch as u32) 53 | } 54 | } 55 | } 56 | } 57 | 58 | #[inline(always)] 59 | fn getchar(&mut self, base_address: u64) -> Option { 60 | unsafe { 61 | match *self { 62 | UartWriterInner::Ns16550a { ref mut initialized } => { 63 | let base_address = base_address as *mut u8; 64 | if !*initialized { 65 | Self::initialize_ns16550a(base_address); 66 | *initialized = true; 67 | } 68 | 69 | if ptr::read_volatile(base_address.offset(5)) & 0x01 != 0 { 70 | Some(ptr::read_volatile(base_address)) 71 | } else { 72 | None 73 | } 74 | } 75 | UartWriterInner::SiFive => { 76 | let base_address = base_address as *mut u32; 77 | let rxdata = ptr::read_volatile(base_address); 78 | if rxdata & 0x80000000 != 0 { 79 | Some(rxdata as u8) 80 | } else { 81 | None 82 | } 83 | } 84 | } 85 | } 86 | } 87 | } 88 | impl UartWriter { 89 | #[cfg(not(feature = "physical_symbol_addresses"))] 90 | pub fn putchar(&mut self, ch: u8) { 91 | self.inner.putchar(pmap::pa2va(self.pa), ch); 92 | } 93 | 94 | #[cfg(feature = "physical_symbol_addresses")] 95 | pub fn putchar(&mut self, ch: u8) { 96 | self.inner.putchar(self.pa, ch); 97 | } 98 | 99 | pub fn getchar(&mut self) -> Option { 100 | self.inner.getchar(pmap::pa2va(self.pa)) 101 | } 102 | 103 | pub unsafe fn init(&mut self, address: u64, ty: UartType) { 104 | if let UartWriterInner::Ns16550a { initialized: true } = self.inner { 105 | assert_eq!(self.pa, address); 106 | assert_eq!(ty, UartType::Ns16550a); 107 | } else { 108 | self.inner = match ty { 109 | UartType::Ns16550a => UartWriterInner::Ns16550a { 110 | initialized: false, 111 | }, 112 | UartType::SiFive => UartWriterInner::SiFive, 113 | }; 114 | self.pa = address; 115 | } 116 | } 117 | } 118 | impl fmt::Write for UartWriter { 119 | fn write_str(&mut self, s: &str) -> fmt::Result { 120 | for byte in s.bytes() { 121 | self.putchar(byte); 122 | } 123 | Ok(()) 124 | } 125 | } 126 | unsafe impl Send for UartWriter {} 127 | 128 | #[macro_use] 129 | pub mod macros { 130 | #[macro_export] 131 | macro_rules! print { 132 | ($($arg:tt)*) => ({ 133 | use core::fmt::Write; 134 | use crate::SHARED_STATICS; 135 | let mut writer = SHARED_STATICS.uart_writer.lock(); 136 | if cfg!(feature = "physical_symbol_addresses") { 137 | writer.write_str("\u{1b}[31m").unwrap(); 138 | } else { 139 | writer.write_str("\u{1b}[33m").unwrap(); 140 | } 141 | writer.write_fmt(format_args!($($arg)*)).unwrap(); 142 | writer.write_str("\u{1b}[0m").unwrap(); 143 | }); 144 | } 145 | #[macro_export] 146 | macro_rules! println { 147 | ($fmt:expr) => (crate::print!(concat!($fmt, "\n"))); 148 | ($fmt:expr, $($arg:tt)*) => (crate::print!(concat!($fmt, "\n"), $($arg)*)); 149 | } 150 | } 151 | 152 | pub fn guest_println(guestid: u64, line: &[u8]) { 153 | use core::fmt::Write; 154 | let mut writer = SHARED_STATICS.uart_writer.lock(); 155 | match guestid { 156 | 1 => writer.write_str("\u{1b}[32m").unwrap(), 157 | 2 => writer.write_str("\u{1b}[34m").unwrap(), 158 | _ => writer.write_str("\u{1b}[33m").unwrap(), 159 | } 160 | writer.write_str("\u{1b}[1m").unwrap(); 161 | writer.write_fmt(format_args!("[{}] ", guestid)).unwrap(); 162 | writer.write_str("\u{1b}[0m").unwrap(); 163 | for &b in line { 164 | writer.putchar(b); 165 | } 166 | writer.write_str("\n").unwrap(); 167 | } 168 | 169 | pub fn mwriter<'a>() -> Option> { 170 | SHARED_STATICS.uart_writer.try_lock() 171 | } 172 | 173 | const QEMU_VENDOR_ID: u64 = 0x00000000; 174 | 175 | // guess whether we're likely a SiFive board or a QEMU board, for the sake of having early-boot 176 | // uart detection work correctly. 177 | pub fn early_guess_uart() { 178 | if csrr!(mvendorid) == QEMU_VENDOR_ID { 179 | let mut writer = SHARED_STATICS.uart_writer.lock(); 180 | *writer = UartWriter { 181 | pa: 0x10000000, 182 | inner: UartWriterInner::Ns16550a { initialized: false }, 183 | } 184 | } else { 185 | // probably SiFive; just use the value already configured. 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/pfault.rs: -------------------------------------------------------------------------------- 1 | use crate::context::Context; 2 | use crate::riscv::bits::SATP_PPN; 3 | use crate::{pmap::*, riscv, virtio}; 4 | use riscv_decode::Instruction; 5 | 6 | /// Perform any handling required in response to a guest page fault. Returns true if the fault could 7 | /// be handled, or false if it should be forwarded on to the guest. 8 | pub fn handle_page_fault(state: &mut Context, cause: u64, instruction: Option) -> bool { 9 | let shadow = state.shadow(); 10 | if shadow == PageTableRoot::MPA { 11 | println!("Page fault without guest paging enabled?"); 12 | return false; 13 | } 14 | 15 | let guest_va = csrr!(stval); 16 | //assert!((guest_va & SV39_MASK) < (511 << 30)); 17 | 18 | let access = match cause { 19 | 12 => PTE_EXECUTE, 20 | 13 => PTE_READ, 21 | 15 => PTE_WRITE, 22 | _ => unreachable!(), 23 | }; 24 | 25 | let page = guest_va & !0xfff; 26 | if let Some(translation) = translate_guest_address(&state.guest_memory, (state.csrs.satp & SATP_PPN) << 12, page) { 27 | // Check R/W/X bits 28 | if translation.pte_value & access == 0 { 29 | return false; 30 | } 31 | 32 | // Check U bit 33 | match shadow { 34 | PageTableRoot::UVA => if translation.pte_value & PTE_USER == 0 { return false; } 35 | PageTableRoot::KVA => if translation.pte_value & PTE_USER != 0 { return false; } 36 | PageTableRoot::MVA => {} 37 | _ => unreachable!(), 38 | } 39 | 40 | if state.guest_memory.in_region(translation.guest_pa) { 41 | let host_pa = translation.guest_pa + state.guest_shift; 42 | 43 | // Set A and D bits 44 | let new_pte = if (translation.pte_value & PTE_DIRTY) == 0 && access == PTE_WRITE { 45 | translation.pte_value | PTE_DIRTY | PTE_ACCESSED 46 | } else if (translation.pte_value & PTE_ACCESSED) == 0 { 47 | translation.pte_value | PTE_ACCESSED 48 | } else { 49 | translation.pte_value 50 | }; 51 | 52 | if new_pte != translation.pte_value { 53 | // TODO: do this atomically 54 | state.guest_memory[translation.pte_addr] = new_pte; 55 | } 56 | 57 | let perm = if (new_pte & PTE_DIRTY) == 0 && access != PTE_WRITE { 58 | (new_pte & (PTE_READ | PTE_EXECUTE)) 59 | } else { 60 | (new_pte & (PTE_READ | PTE_WRITE | PTE_EXECUTE)) 61 | }; 62 | 63 | if virtio::is_queue_access(state, translation.guest_pa) { 64 | let guest_pa = (translation.guest_pa & !0xfff) | (guest_va & 0xfff); 65 | let host_pa = (host_pa & !0xfff) | (guest_va & 0xfff); 66 | let instruction = instruction.expect("attempted to execute code from virtio queue page"); 67 | return virtio::handle_queue_access(state, guest_pa, host_pa, instruction); 68 | } 69 | 70 | let reserved_bits = match translation.level { 71 | PageTableLevel::Level4KB => 0x000, 72 | PageTableLevel::Level2MB => 0x100, 73 | PageTableLevel::Level1GB => 0x200, 74 | }; 75 | 76 | let new_shadow_pte = (host_pa >> 2) | reserved_bits | perm | PTE_AD | PTE_USER | PTE_VALID; 77 | let old_shadow_pte = state.shadow_page_tables.rmw_mapping(shadow, page, new_shadow_pte); 78 | 79 | // Flushing the TLB entry for a virtual address can be very expensive and we only need 80 | // to do one here if the processor cache invalid TLB entries. The logic below attempts 81 | // to detect whether invalid PTEs are being cached, and if so sets a flag so that future 82 | // page faults will trigger a flush. 83 | if state.tlb_caches_invalid_ptes { 84 | riscv::sfence_vma_addr(guest_va); 85 | } else if new_shadow_pte == old_shadow_pte { 86 | state.consecutive_page_fault_count += 1; 87 | if state.consecutive_page_fault_count == 10 { 88 | state.tlb_caches_invalid_ptes = true; 89 | } 90 | } else { 91 | state.consecutive_page_fault_count = 1; 92 | } 93 | 94 | return true; 95 | } else if access != PTE_EXECUTE && state.smode { 96 | let pa = (translation.guest_pa & !0xfff) | (guest_va & 0xfff); 97 | if let Some(instruction) = instruction { 98 | if is_uart_access(pa) { 99 | return handle_uart_access(state, pa, instruction); 100 | } 101 | 102 | if is_plic_access(pa) { 103 | return handle_plic_access(state, pa, instruction) 104 | } 105 | 106 | if virtio::is_device_access(state, pa) { 107 | return virtio::handle_device_access(state, pa, instruction); 108 | } 109 | } 110 | } 111 | } 112 | 113 | false 114 | } 115 | 116 | #[inline(always)] 117 | fn is_uart_access(guest_pa: u64) -> bool { 118 | guest_pa >= 0x10000000 && guest_pa < 0x10000100 119 | } 120 | fn handle_uart_access(state: &mut Context, guest_pa: u64, instruction: u32) -> bool { 121 | match riscv_decode::decode(instruction).ok() { 122 | Some(Instruction::Lb(i)) => { 123 | let value = state.uart.read(&state.host_clint, guest_pa) as u64; 124 | state.saved_registers.set(i.rd(), value); 125 | } 126 | Some(Instruction::Sb(i)) => { 127 | let value = (state.saved_registers.get(i.rs2()) & 0xff) as u8; 128 | state.uart.write(&state.host_clint, guest_pa, value); 129 | } 130 | Some(instr) => { 131 | println!("UART: Instruction {:?} used to target addr {:#x} from pc {:#x}", instr, guest_pa, csrr!(sepc)); 132 | loop {} 133 | } 134 | _ => return false, 135 | } 136 | riscv::set_sepc(csrr!(sepc) + riscv_decode::instruction_length(instruction as u16) as u64); 137 | true 138 | } 139 | 140 | #[inline(always)] 141 | fn is_plic_access(guest_pa: u64) -> bool { 142 | guest_pa >= 0x0c000000 && guest_pa < 0x10000000 143 | } 144 | fn handle_plic_access(state: &mut Context, guest_pa: u64, instruction: u32) -> bool { 145 | match riscv_decode::decode(instruction).ok() { 146 | Some(Instruction::Lw(i)) => { 147 | let value = state.plic.read_u32(guest_pa) as i32 as i64 as u64; 148 | // println!("PLIC: Read value {:#x} at address {:#x}", value, guest_pa); 149 | state.saved_registers.set(i.rd(), value) 150 | } 151 | Some(Instruction::Sw(i)) => { 152 | let value = state.saved_registers.get(i.rs2()) as u32; 153 | // println!("PLIC: Writing {:#x} to address {:#x}", value, guest_pa); 154 | 155 | let mut clear_seip = false; 156 | state.plic.write_u32(guest_pa, value, &mut clear_seip); 157 | if clear_seip { 158 | state.csrs.sip &= !0x200; 159 | } 160 | state.no_interrupt = false; 161 | } 162 | Some(instr) => { 163 | println!("PLIC: Instruction {:?} used to target addr {:#x} from pc {:#x}", instr, guest_pa, csrr!(sepc)); 164 | loop {} 165 | } 166 | _ => { 167 | println!("Unrecognized instruction targetting PLIC {:#x} at {:#x}!", instruction, csrr!(sepc)); 168 | loop {} 169 | } 170 | } 171 | riscv::set_sepc(csrr!(sepc) + riscv_decode::instruction_length(instruction as u16) as u64); 172 | true 173 | } 174 | -------------------------------------------------------------------------------- /src/pmp.rs: -------------------------------------------------------------------------------- 1 | 2 | use rvirt::*; 3 | 4 | pub unsafe fn write_pmp_config(entry: u8, config: u8) { 5 | assert!(entry <= 15, "entry out of range"); 6 | 7 | let shift = (entry & 7) * 8; 8 | if entry < 8 { 9 | csrc!(pmpcfg0, (0xFF as u64) << shift); 10 | csrs!(pmpcfg0, (config as u64) << shift); 11 | } else { 12 | csrc!(pmpcfg2, (0xFF as u64) << shift); 13 | csrs!(pmpcfg2, (config as u64) << shift); 14 | } 15 | } 16 | 17 | pub fn read_pmp_config(entry: u8) -> u8 { 18 | assert!(entry <= 15, "entry out of range"); 19 | 20 | let shift = (entry & 7) * 8; 21 | let reg = if entry < 8 { 22 | csrr!(pmpcfg0) 23 | } else { 24 | csrr!(pmpcfg2) 25 | }; 26 | (reg >> shift) as u8 27 | } 28 | 29 | pub fn read_pmp_address(entry: u8) -> u64 { 30 | // come up with a better solution to this 31 | // (though apparently CSR instructions are hard-coded by CSR, so that might be hard?) 32 | match entry { 33 | 0 => csrr!(pmpaddr0), 34 | 1 => csrr!(pmpaddr1), 35 | 2 => csrr!(pmpaddr2), 36 | 3 => csrr!(pmpaddr3), 37 | 4 => csrr!(pmpaddr4), 38 | 5 => csrr!(pmpaddr5), 39 | 6 => csrr!(pmpaddr6), 40 | 7 => csrr!(pmpaddr7), 41 | 8 => csrr!(pmpaddr8), 42 | 9 => csrr!(pmpaddr9), 43 | 10 => csrr!(pmpaddr10), 44 | 11 => csrr!(pmpaddr11), 45 | 12 => csrr!(pmpaddr12), 46 | 13 => csrr!(pmpaddr13), 47 | 14 => csrr!(pmpaddr14), 48 | 15 => csrr!(pmpaddr15), 49 | _ => unreachable!("entry out of range"), 50 | } 51 | } 52 | 53 | pub unsafe fn write_pmp_address(entry: u8, address: u64) { 54 | // come up with a better solution to this 55 | // (though apparently CSR instructions are hard-coded by CSR, so that might be hard?) 56 | match entry { 57 | 0 => csrw!(pmpaddr0, address), 58 | 1 => csrw!(pmpaddr1, address), 59 | 2 => csrw!(pmpaddr2, address), 60 | 3 => csrw!(pmpaddr3, address), 61 | 4 => csrw!(pmpaddr4, address), 62 | 5 => csrw!(pmpaddr5, address), 63 | 6 => csrw!(pmpaddr6, address), 64 | 7 => csrw!(pmpaddr7, address), 65 | 8 => csrw!(pmpaddr8, address), 66 | 9 => csrw!(pmpaddr9, address), 67 | 10 => csrw!(pmpaddr10, address), 68 | 11 => csrw!(pmpaddr11, address), 69 | 12 => csrw!(pmpaddr12, address), 70 | 13 => csrw!(pmpaddr13, address), 71 | 14 => csrw!(pmpaddr14, address), 72 | 15 => csrw!(pmpaddr15, address), 73 | _ => unreachable!("entry out of range"), 74 | } 75 | } 76 | 77 | // note: these updates are not atomic. don't let interrupts happen during them! 78 | pub unsafe fn install_pmp(entry: u8, config: u8, address: u64) { 79 | assert!((read_pmp_config(entry) & LOCK) == 0, "attempt to modify locked PMP entry"); 80 | write_pmp_address(entry, address); 81 | write_pmp_config(entry, config); 82 | } 83 | 84 | pub unsafe fn install_pmp_napot(entry: u8, config: u8, address: u64, size: u64) { 85 | assert_eq!(address & 3, 0, "addresses must be 4-byte aligned"); 86 | 87 | if size == 4 { 88 | install_pmp(entry, config | MODE_NA4, address >> 2); 89 | } else { 90 | assert!(size.is_power_of_two(), "attempt to install not-power-of-two napot value"); 91 | assert_eq!(address & (size - 1), 0, "attempt to install unnaturally-aligned address"); 92 | assert!(size >= 8, "attempt to install too-small napot value"); 93 | 94 | install_pmp(entry, config | MODE_NAPOT, (address >> 2) + (size / 8 - 1)); 95 | } 96 | } 97 | 98 | // cover everything in memory 99 | pub unsafe fn install_pmp_allmem(entry: u8, config: u8) { 100 | // 0xFFFFFFFFFFFFFFFF is reserved as of priv-1.10, but fixed in an unreleased spec, and QEMU 101 | // interprets it correctly, so we're just going to go with it. 102 | install_pmp(entry, config | MODE_NAPOT, 0xFFFFFFFF_FFFFFFFF); 103 | } 104 | 105 | // returns (bits, remaining). 106 | fn extract_napot_bits(address: u64) -> (u8, u64) { 107 | let mut bits = 0; 108 | let mut shifted = address; 109 | while (shifted & 1) == 1 { 110 | bits += 1; 111 | shifted >>= 1; 112 | } 113 | (bits, shifted << bits) 114 | } 115 | 116 | // if this is the first entry, set lastconfig = lastaddressreg = 0 117 | // return value is [low, high) -- so low is inclusive and high is exclusive 118 | pub fn decode_pmp_range(config: u8, address: u64, _lastconfig: u8, lastaddress: u64) -> (u64, u64) { 119 | match (config >> PMP_A_SHIFT) & 3 { 120 | PMP_A_OFF => (0, 0), 121 | PMP_A_TOR => (lastaddress << 2, address << 2), 122 | PMP_A_NA4 => (address << 2, (address << 2) + 4), 123 | PMP_A_NAPOT => { 124 | if address == 0xFFFFFFFF_FFFFFFFF { 125 | // covers everything, both per latest unreleased spec and QEMU interpretation 126 | (0, 0) 127 | } else { 128 | let (bits, address) = extract_napot_bits(address); 129 | (address << 2, (address << 2) + (8 << bits)) 130 | } 131 | } 132 | _ => unreachable!() 133 | } 134 | } 135 | 136 | pub const READ: u8 = 0x1; 137 | pub const WRITE: u8 = 0x2; 138 | pub const EXEC: u8 = 0x4; 139 | // for decoding 140 | const PMP_A_SHIFT: u8 = 3; 141 | const PMP_A_OFF: u8 = 0x0; 142 | const PMP_A_TOR: u8 = 0x1; 143 | const PMP_A_NA4: u8 = 0x2; 144 | const PMP_A_NAPOT: u8 = 0x3; 145 | // for encoding 146 | #[allow(unused)] 147 | pub const MODE_OFF: u8 = PMP_A_OFF << PMP_A_SHIFT; 148 | #[allow(unused)] 149 | pub const MODE_TOR: u8 = PMP_A_TOR << PMP_A_SHIFT; 150 | pub const MODE_NA4: u8 = PMP_A_NA4 << PMP_A_SHIFT; 151 | pub const MODE_NAPOT: u8 = PMP_A_NAPOT << PMP_A_SHIFT; 152 | pub const RESERVED1: u8 = 0x20; 153 | pub const RESERVED2: u8 = 0x40; 154 | pub const LOCK: u8 = 0x80; 155 | 156 | /** prints out as much information on the PMP state as possible in M-mode */ 157 | pub fn debug_pmp() { 158 | let hart = csrr!(mhartid); 159 | println!("============================== PMP CONFIGURATION STATE (hart {}) =============================", hart); 160 | println!(" R W X AMODE RES1 RES2 LOCK ADDRESS (raw) ADDRESS (low) ADDRESS (high)"); 161 | let mut lastconfig= 0; 162 | let mut lastaddress = 0; 163 | for entry in 0..16 { 164 | let config = read_pmp_config(entry); 165 | let address = read_pmp_address(entry); 166 | print!("pmp{: <2}", entry); 167 | print!(" ==> "); 168 | if config & READ != 0 { 169 | print!("R "); 170 | } else { 171 | print!("- "); 172 | } 173 | if config & WRITE != 0 { 174 | print!("W "); 175 | } else { 176 | print!("- "); 177 | } 178 | if config & EXEC != 0 { 179 | print!("X "); 180 | } else { 181 | print!("- "); 182 | } 183 | let mode = (config >> PMP_A_SHIFT) & 3; 184 | match mode { 185 | PMP_A_OFF => print!(" OFF "), 186 | PMP_A_TOR => print!(" TOR "), 187 | PMP_A_NA4 => print!(" NA4 "), 188 | PMP_A_NAPOT => print!("NAPOT "), 189 | _ => unreachable!() 190 | }; 191 | if config & RESERVED1 != 0 { 192 | print!("res1 "); 193 | } else { 194 | print!(" "); 195 | } 196 | if config & RESERVED2 != 0 { 197 | print!("res2 "); 198 | } else { 199 | print!(" "); 200 | } 201 | if config & LOCK != 0 { 202 | print!("lock "); 203 | } else { 204 | print!(" "); 205 | } 206 | print!("{:016x}", address); 207 | if mode != PMP_A_OFF { 208 | let (low, high) = decode_pmp_range(config, address, lastconfig, lastaddress); 209 | print!(" {:016x} {:016x}", low, high.wrapping_sub(1)); 210 | } 211 | println!(""); 212 | lastconfig = config; 213 | lastaddress = address; 214 | } 215 | println!("================================== END CONFIGURATION STATE =================================="); 216 | } 217 | -------------------------------------------------------------------------------- /virtio-order.md: -------------------------------------------------------------------------------- 1 | How can we reorder virtio devices in QEMU? 2 | 3 | # How to inspect the virtio mapping in a guest VM 4 | 5 | $ mount -t sysfs sys /sys # only needed if using init=/bin/bash 6 | 7 | $ ls /sys/devices/platform/*.virtio_mmio 8 | /sys/devices/platform/10001000.virtio_mmio: 9 | driver_override modalias of_node subsystem uevent 10 | 11 | /sys/devices/platform/10002000.virtio_mmio: 12 | driver_override modalias of_node subsystem uevent 13 | 14 | /sys/devices/platform/10003000.virtio_mmio: 15 | driver_override modalias of_node subsystem uevent 16 | 17 | /sys/devices/platform/10004000.virtio_mmio: 18 | driver_override modalias of_node subsystem uevent 19 | 20 | /sys/devices/platform/10005000.virtio_mmio: 21 | driver_override modalias of_node subsystem uevent 22 | 23 | /sys/devices/platform/10006000.virtio_mmio: 24 | driver driver_override modalias of_node subsystem uevent virtio2 25 | 26 | /sys/devices/platform/10007000.virtio_mmio: 27 | driver driver_override modalias of_node subsystem uevent virtio1 28 | 29 | /sys/devices/platform/10008000.virtio_mmio: 30 | driver driver_override modalias of_node subsystem uevent virtio0 31 | 32 | $ ls -l /sys/devices/platform/*.virtio_mmio/virtio*/driver 33 | lrwxrwxrwx 1 root root 0 Jan 1 00:02 /sys/devices/platform/10006000.virtio_mmio/virtio2/driver -> ../../../../bus/virtio/drivers/virtio_net 34 | lrwxrwxrwx 1 root root 0 Jan 1 00:02 /sys/devices/platform/10007000.virtio_mmio/virtio1/driver -> ../../../../bus/virtio/drivers/virtio_blk 35 | 36 | # Inspecting the QEMU device tree 37 | 38 | Hit C-a C to enter the monitor. 39 | 40 | Now let's run the command to inspect the device tree: 41 | 42 | (qemu) info qtree 43 | bus: main-system-bus 44 | type System 45 | [ ... ] 46 | dev: virtio-mmio, id "" 47 | [ ... ] 48 | mmio 0000000010008000/0000000000000200 49 | bus: virtio-mmio-bus.7 50 | type virtio-mmio-bus 51 | dev: virtio-rng-device, id "" 52 | [ ... ] 53 | dev: virtio-mmio, id "" 54 | [ ... ] 55 | mmio 0000000010007000/0000000000000200 56 | bus: virtio-mmio-bus.6 57 | type virtio-mmio-bus 58 | dev: virtio-blk-device, id "" 59 | [ ... ] 60 | dev: virtio-mmio, id "" 61 | [ ... ] 62 | mmio 0000000010006000/0000000000000200 63 | bus: virtio-mmio-bus.5 64 | type virtio-mmio-bus 65 | dev: virtio-net-device, id "" 66 | [ ... ] 67 | dev: virtio-mmio, id "" 68 | [ ... ] 69 | mmio 0000000010005000/0000000000000200 70 | bus: virtio-mmio-bus.4 71 | type virtio-mmio-bus 72 | dev: virtio-mmio, id "" 73 | [ ... ] 74 | mmio 0000000010004000/0000000000000200 75 | bus: virtio-mmio-bus.3 76 | type virtio-mmio-bus 77 | dev: virtio-mmio, id "" 78 | [ ... ] 79 | mmio 0000000010003000/0000000000000200 80 | bus: virtio-mmio-bus.2 81 | type virtio-mmio-bus 82 | dev: virtio-mmio, id "" 83 | [ ... ] 84 | mmio 0000000010002000/0000000000000200 85 | bus: virtio-mmio-bus.1 86 | type virtio-mmio-bus 87 | dev: virtio-mmio, id "" 88 | [ ... ] 89 | mmio 0000000010001000/0000000000000200 90 | bus: virtio-mmio-bus.0 91 | type virtio-mmio-bus 92 | [ ... ] 93 | 94 | Note that the virtio-mmio-bus devices are in reverse order. This means that the command line entries are actually being 95 | placed into memory in the order from highest address to lowest address. The linux kernel considers higher-address 96 | devices as deserving lower numbers, so the first specified device will be virtio0, the second will be virtio1, and so 97 | on. 98 | 99 | Note that virtioN devices are numbered by order, and not by slot. If they were in the last three slots instead of the 100 | first three, they'd show up with the same names as far as Linux userland cares. (Except for the differing MMIO bus 101 | addresses in sysfs.) 102 | 103 | # How to map virtio devices 104 | 105 | The previously-shown mapping is for the following QEMU configuration: 106 | 107 | -object rng-random,filename=/dev/urandom,id=rng0 108 | -device virtio-rng-device,rng=rng0 109 | -device virtio-blk-device,drive=hd0 110 | -drive file=stage4-disk.img,format=raw,id=hd0 111 | -device virtio-net-device,netdev=usernet 112 | -netdev user,id=usernet,hostfwd=tcp::10000-:22 113 | 114 | We can force which virtio device each corresponds to by including a bus=virtio-mmio-bus.N property, corresponding to the 115 | names in the device tree shown previously. For example (not including the other required parameters): 116 | 117 | -device virtio-rng-device,rng=rng0,bus=virtio-mmio-bus.6 118 | -device virtio-blk-device,drive=hd0,bus=virtio-mmio-bus.1 119 | -device virtio-net-device,netdev=usernet,bus=virtio-mmio-bus.3 120 | 121 | This configuration would yield the following device tree: 122 | 123 | (qemu) info qtree 124 | bus: main-system-bus 125 | type System 126 | [ ... ] 127 | dev: virtio-mmio, id "" 128 | [ ... ] 129 | mmio 0000000010008000/0000000000000200 130 | bus: virtio-mmio-bus.7 131 | type virtio-mmio-bus 132 | dev: virtio-mmio, id "" 133 | [ ... ] 134 | mmio 0000000010007000/0000000000000200 135 | bus: virtio-mmio-bus.6 136 | type virtio-mmio-bus 137 | dev: virtio-rng-device, id "" 138 | [ ... ] 139 | dev: virtio-mmio, id "" 140 | [ ... ] 141 | mmio 0000000010006000/0000000000000200 142 | bus: virtio-mmio-bus.5 143 | type virtio-mmio-bus 144 | dev: virtio-mmio, id "" 145 | [ ... ] 146 | mmio 0000000010005000/0000000000000200 147 | bus: virtio-mmio-bus.4 148 | type virtio-mmio-bus 149 | dev: virtio-mmio, id "" 150 | [ ... ] 151 | mmio 0000000010004000/0000000000000200 152 | bus: virtio-mmio-bus.3 153 | type virtio-mmio-bus 154 | dev: virtio-net-device, id "" 155 | [ ... ] 156 | dev: virtio-mmio, id "" 157 | [ ... ] 158 | mmio 0000000010003000/0000000000000200 159 | bus: virtio-mmio-bus.2 160 | type virtio-mmio-bus 161 | dev: virtio-mmio, id "" 162 | [ ... ] 163 | mmio 0000000010002000/0000000000000200 164 | bus: virtio-mmio-bus.1 165 | type virtio-mmio-bus 166 | dev: virtio-blk-device, id "" 167 | [ ... ] 168 | dev: virtio-mmio, id "" 169 | [ ... ] 170 | mmio 0000000010001000/0000000000000200 171 | bus: virtio-mmio-bus.0 172 | type virtio-mmio-bus 173 | [ ... ] 174 | 175 | Note how the positions of the virtio devices have shifted to new buses in accordance with our configuration. 176 | 177 | Similarly, we can observe how this looks from Linux: 178 | 179 | $ ls /sys/devices/platform/*.virtio_mmio 180 | /sys/devices/platform/10001000.virtio_mmio: 181 | driver_override modalias of_node subsystem uevent 182 | 183 | /sys/devices/platform/10002000.virtio_mmio: 184 | driver driver_override modalias of_node subsystem uevent virtio2 185 | 186 | /sys/devices/platform/10003000.virtio_mmio: 187 | driver_override modalias of_node subsystem uevent 188 | 189 | /sys/devices/platform/10004000.virtio_mmio: 190 | driver driver_override modalias of_node subsystem uevent virtio1 191 | 192 | /sys/devices/platform/10005000.virtio_mmio: 193 | driver_override modalias of_node subsystem uevent 194 | 195 | /sys/devices/platform/10006000.virtio_mmio: 196 | driver_override modalias of_node subsystem uevent 197 | 198 | /sys/devices/platform/10007000.virtio_mmio: 199 | driver driver_override modalias of_node subsystem uevent virtio0 200 | 201 | /sys/devices/platform/10008000.virtio_mmio: 202 | driver_override modalias of_node subsystem uevent 203 | 204 | $ ls -l /sys/devices/platform/*.virtio_mmio/virtio*/driver 205 | lrwxrwxrwx 1 root root 0 Jan 1 00:00 /sys/devices/platform/10002000.virtio_mmio/virtio2/driver -> ../../../../bus/virtio/drivers/virtio_blk 206 | lrwxrwxrwx 1 root root 0 Jan 1 00:00 /sys/devices/platform/10004000.virtio_mmio/virtio1/driver -> ../../../../bus/virtio/drivers/virtio_net 207 | 208 | Note how the slots changed, and the ordering changed, but the devices are still numbered virtio0, virtio1, and virtio2. 209 | (Although which device is which of virtio0, virtio1, and virtio2 has indeed changed.) 210 | -------------------------------------------------------------------------------- /src/riscv/csr.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(unused)] 3 | 4 | pub const ustatus: u64 = 0x000; 5 | pub const uie: u64 = 0x004; 6 | pub const utvec: u64 = 0x005; 7 | pub const uscratch: u64 = 0x040; 8 | pub const uepc: u64 = 0x041; 9 | pub const ucause: u64 = 0x042; 10 | pub const utval: u64 = 0x043; 11 | pub const uip: u64 = 0x044; 12 | pub const fflags: u64 = 0x001; 13 | pub const frm: u64 = 0x002; 14 | pub const fcsr: u64 = 0x003; 15 | pub const cycle: u64 = 0xc00; 16 | pub const time: u64 = 0xc01; 17 | pub const instret: u64 = 0xc02; 18 | pub const hpmcounter3: u64 = 0xc03; 19 | pub const hpmcounter4: u64 = 0xc04; 20 | pub const hpmcounter5: u64 = 0xc05; 21 | pub const hpmcounter6: u64 = 0xc06; 22 | pub const hpmcounter7: u64 = 0xc07; 23 | pub const hpmcounter8: u64 = 0xc08; 24 | pub const hpmcounter9: u64 = 0xc09; 25 | pub const hpmcounter10: u64 = 0xc0a; 26 | pub const hpmcounter11: u64 = 0xc0b; 27 | pub const hpmcounter12: u64 = 0xc0c; 28 | pub const hpmcounter13: u64 = 0xc0d; 29 | pub const hpmcounter14: u64 = 0xc0e; 30 | pub const hpmcounter15: u64 = 0xc0f; 31 | pub const hpmcounter16: u64 = 0xc10; 32 | pub const hpmcounter17: u64 = 0xc11; 33 | pub const hpmcounter18: u64 = 0xc12; 34 | pub const hpmcounter19: u64 = 0xc13; 35 | pub const hpmcounter20: u64 = 0xc14; 36 | pub const hpmcounter21: u64 = 0xc15; 37 | pub const hpmcounter22: u64 = 0xc16; 38 | pub const hpmcounter23: u64 = 0xc17; 39 | pub const hpmcounter24: u64 = 0xc18; 40 | pub const hpmcounter25: u64 = 0xc19; 41 | pub const hpmcounter26: u64 = 0xc1a; 42 | pub const hpmcounter27: u64 = 0xc1b; 43 | pub const hpmcounter28: u64 = 0xc1c; 44 | pub const hpmcounter29: u64 = 0xc1d; 45 | pub const hpmcounter30: u64 = 0xc1e; 46 | pub const hpmcounter31: u64 = 0xc1f; 47 | pub const cycleh: u64 = 0xc80; 48 | pub const timeh: u64 = 0xc81; 49 | pub const instreth: u64 = 0xc82; 50 | pub const hpmcounter3h: u64 = 0xc83; 51 | pub const hpmcounter4h: u64 = 0xc84; 52 | pub const hpmcounter5h: u64 = 0xc85; 53 | pub const hpmcounter6h: u64 = 0xc86; 54 | pub const hpmcounter7h: u64 = 0xc87; 55 | pub const hpmcounter8h: u64 = 0xc88; 56 | pub const hpmcounter9h: u64 = 0xc89; 57 | pub const hpmcounter10h: u64 = 0xc8a; 58 | pub const hpmcounter11h: u64 = 0xc8b; 59 | pub const hpmcounter12h: u64 = 0xc8c; 60 | pub const hpmcounter13h: u64 = 0xc8d; 61 | pub const hpmcounter14h: u64 = 0xc8e; 62 | pub const hpmcounter15h: u64 = 0xc8f; 63 | pub const hpmcounter16h: u64 = 0xc90; 64 | pub const hpmcounter17h: u64 = 0xc91; 65 | pub const hpmcounter18h: u64 = 0xc92; 66 | pub const hpmcounter19h: u64 = 0xc93; 67 | pub const hpmcounter20h: u64 = 0xc94; 68 | pub const hpmcounter21h: u64 = 0xc95; 69 | pub const hpmcounter22h: u64 = 0xc96; 70 | pub const hpmcounter23h: u64 = 0xc97; 71 | pub const hpmcounter24h: u64 = 0xc98; 72 | pub const hpmcounter25h: u64 = 0xc99; 73 | pub const hpmcounter26h: u64 = 0xc9a; 74 | pub const hpmcounter27h: u64 = 0xc9b; 75 | pub const hpmcounter28h: u64 = 0xc9c; 76 | pub const hpmcounter29h: u64 = 0xc9d; 77 | pub const hpmcounter30h: u64 = 0xc9e; 78 | pub const hpmcounter31h: u64 = 0xc9f; 79 | pub const mcycle: u64 = 0xb00; 80 | pub const minstret: u64 = 0xb02; 81 | pub const mcycleh: u64 = 0xb80; 82 | pub const minstreth: u64 = 0xb82; 83 | pub const mvendorid: u64 = 0xf11; 84 | pub const marchid: u64 = 0xf12; 85 | pub const mimpid: u64 = 0xf13; 86 | pub const mhartid: u64 = 0xf14; 87 | pub const mstatus: u64 = 0x300; 88 | pub const misa: u64 = 0x301; 89 | pub const medeleg: u64 = 0x302; 90 | pub const mideleg: u64 = 0x303; 91 | pub const mie: u64 = 0x304; 92 | pub const mtvec: u64 = 0x305; 93 | pub const mcounteren: u64 = 0x306; 94 | pub const mtvt: u64 = 0x307; 95 | pub const mucounteren: u64 = 0x320; 96 | pub const mscounteren: u64 = 0x321; 97 | pub const mscratch: u64 = 0x340; 98 | pub const mepc: u64 = 0x341; 99 | pub const mcause: u64 = 0x342; 100 | pub const mbadaddr: u64 = 0x343; 101 | pub const mtval: u64 = 0x343; 102 | pub const mip: u64 = 0x344; 103 | pub const mnxti: u64 = 0x345; 104 | pub const mintstatus: u64 = 0x346; 105 | pub const mscratchcsw: u64 = 0x348; 106 | pub const sstatus: u64 = 0x100; 107 | pub const sedeleg: u64 = 0x102; 108 | pub const sideleg: u64 = 0x103; 109 | pub const sie: u64 = 0x104; 110 | pub const stvec: u64 = 0x105; 111 | pub const scounteren: u64 = 0x106; 112 | pub const stvt: u64 = 0x107; 113 | pub const sscratch: u64 = 0x140; 114 | pub const sepc: u64 = 0x141; 115 | pub const scause: u64 = 0x142; 116 | pub const sbadaddr: u64 = 0x143; 117 | pub const stval: u64 = 0x143; 118 | pub const sip: u64 = 0x144; 119 | pub const snxti: u64 = 0x145; 120 | pub const sintstatus: u64 = 0x146; 121 | pub const sscratchcsw: u64 = 0x148; 122 | pub const sptbr: u64 = 0x180; 123 | pub const satp: u64 = 0x180; 124 | pub const pmpcfg0: u64 = 0x3a0; 125 | pub const pmpcfg1: u64 = 0x3a1; 126 | pub const pmpcfg2: u64 = 0x3a2; 127 | pub const pmpcfg3: u64 = 0x3a3; 128 | pub const pmpaddr0: u64 = 0x3b0; 129 | pub const pmpaddr1: u64 = 0x3b1; 130 | pub const pmpaddr2: u64 = 0x3b2; 131 | pub const pmpaddr3: u64 = 0x3b3; 132 | pub const pmpaddr4: u64 = 0x3b4; 133 | pub const pmpaddr5: u64 = 0x3b5; 134 | pub const pmpaddr6: u64 = 0x3b6; 135 | pub const pmpaddr7: u64 = 0x3b7; 136 | pub const pmpaddr8: u64 = 0x3b8; 137 | pub const pmpaddr9: u64 = 0x3b9; 138 | pub const pmpaddr10: u64 = 0x3ba; 139 | pub const pmpaddr11: u64 = 0x3bb; 140 | pub const pmpaddr12: u64 = 0x3bc; 141 | pub const pmpaddr13: u64 = 0x3bd; 142 | pub const pmpaddr14: u64 = 0x3be; 143 | pub const pmpaddr15: u64 = 0x3bf; 144 | pub const tselect: u64 = 0x7a0; 145 | pub const tdata1: u64 = 0x7a1; 146 | pub const tdata2: u64 = 0x7a2; 147 | pub const tdata3: u64 = 0x7a3; 148 | pub const dcsr: u64 = 0x7b0; 149 | pub const dpc: u64 = 0x7b1; 150 | pub const dscratch: u64 = 0x7b2; 151 | pub const mhpmcounter3: u64 = 0xb03; 152 | pub const mhpmcounter4: u64 = 0xb04; 153 | pub const mhpmcounter5: u64 = 0xb05; 154 | pub const mhpmcounter6: u64 = 0xb06; 155 | pub const mhpmcounter7: u64 = 0xb07; 156 | pub const mhpmcounter8: u64 = 0xb08; 157 | pub const mhpmcounter9: u64 = 0xb09; 158 | pub const mhpmcounter10: u64 = 0xb0a; 159 | pub const mhpmcounter11: u64 = 0xb0b; 160 | pub const mhpmcounter12: u64 = 0xb0c; 161 | pub const mhpmcounter13: u64 = 0xb0d; 162 | pub const mhpmcounter14: u64 = 0xb0e; 163 | pub const mhpmcounter15: u64 = 0xb0f; 164 | pub const mhpmcounter16: u64 = 0xb10; 165 | pub const mhpmcounter17: u64 = 0xb11; 166 | pub const mhpmcounter18: u64 = 0xb12; 167 | pub const mhpmcounter19: u64 = 0xb13; 168 | pub const mhpmcounter20: u64 = 0xb14; 169 | pub const mhpmcounter21: u64 = 0xb15; 170 | pub const mhpmcounter22: u64 = 0xb16; 171 | pub const mhpmcounter23: u64 = 0xb17; 172 | pub const mhpmcounter24: u64 = 0xb18; 173 | pub const mhpmcounter25: u64 = 0xb19; 174 | pub const mhpmcounter26: u64 = 0xb1a; 175 | pub const mhpmcounter27: u64 = 0xb1b; 176 | pub const mhpmcounter28: u64 = 0xb1c; 177 | pub const mhpmcounter29: u64 = 0xb1d; 178 | pub const mhpmcounter30: u64 = 0xb1e; 179 | pub const mhpmcounter31: u64 = 0xb1f; 180 | pub const mhpmevent3: u64 = 0x323; 181 | pub const mhpmevent4: u64 = 0x324; 182 | pub const mhpmevent5: u64 = 0x325; 183 | pub const mhpmevent6: u64 = 0x326; 184 | pub const mhpmevent7: u64 = 0x327; 185 | pub const mhpmevent8: u64 = 0x328; 186 | pub const mhpmevent9: u64 = 0x329; 187 | pub const mhpmevent10: u64 = 0x32a; 188 | pub const mhpmevent11: u64 = 0x32b; 189 | pub const mhpmevent12: u64 = 0x32c; 190 | pub const mhpmevent13: u64 = 0x32d; 191 | pub const mhpmevent14: u64 = 0x32e; 192 | pub const mhpmevent15: u64 = 0x32f; 193 | pub const mhpmevent16: u64 = 0x330; 194 | pub const mhpmevent17: u64 = 0x331; 195 | pub const mhpmevent18: u64 = 0x332; 196 | pub const mhpmevent19: u64 = 0x333; 197 | pub const mhpmevent20: u64 = 0x334; 198 | pub const mhpmevent21: u64 = 0x335; 199 | pub const mhpmevent22: u64 = 0x336; 200 | pub const mhpmevent23: u64 = 0x337; 201 | pub const mhpmevent24: u64 = 0x338; 202 | pub const mhpmevent25: u64 = 0x339; 203 | pub const mhpmevent26: u64 = 0x33a; 204 | pub const mhpmevent27: u64 = 0x33b; 205 | pub const mhpmevent28: u64 = 0x33c; 206 | pub const mhpmevent29: u64 = 0x33d; 207 | pub const mhpmevent30: u64 = 0x33e; 208 | pub const mhpmevent31: u64 = 0x33f; 209 | pub const mhpmcounter3h: u64 = 0xb83; 210 | pub const mhpmcounter4h: u64 = 0xb84; 211 | pub const mhpmcounter5h: u64 = 0xb85; 212 | pub const mhpmcounter6h: u64 = 0xb86; 213 | pub const mhpmcounter7h: u64 = 0xb87; 214 | pub const mhpmcounter8h: u64 = 0xb88; 215 | pub const mhpmcounter9h: u64 = 0xb89; 216 | pub const mhpmcounter10h: u64 = 0xb8a; 217 | pub const mhpmcounter11h: u64 = 0xb8b; 218 | pub const mhpmcounter12h: u64 = 0xb8c; 219 | pub const mhpmcounter13h: u64 = 0xb8d; 220 | pub const mhpmcounter14h: u64 = 0xb8e; 221 | pub const mhpmcounter15h: u64 = 0xb8f; 222 | pub const mhpmcounter16h: u64 = 0xb90; 223 | pub const mhpmcounter17h: u64 = 0xb91; 224 | pub const mhpmcounter18h: u64 = 0xb92; 225 | pub const mhpmcounter19h: u64 = 0xb93; 226 | pub const mhpmcounter20h: u64 = 0xb94; 227 | pub const mhpmcounter21h: u64 = 0xb95; 228 | pub const mhpmcounter22h: u64 = 0xb96; 229 | pub const mhpmcounter23h: u64 = 0xb97; 230 | pub const mhpmcounter24h: u64 = 0xb98; 231 | pub const mhpmcounter25h: u64 = 0xb99; 232 | pub const mhpmcounter26h: u64 = 0xb9a; 233 | pub const mhpmcounter27h: u64 = 0xb9b; 234 | pub const mhpmcounter28h: u64 = 0xb9c; 235 | pub const mhpmcounter29h: u64 = 0xb9d; 236 | pub const mhpmcounter30h: u64 = 0xb9e; 237 | pub const mhpmcounter31h: u64 = 0xb9f; 238 | -------------------------------------------------------------------------------- /src/pagedebug.rs: -------------------------------------------------------------------------------- 1 | 2 | use rvirt::*; 3 | use crate::pagedebug::PageWalkError::{ErrUnmapped}; 4 | use crate::riscv::bits::STATUS_SUM; 5 | 6 | pub const MODE_NONE: u8 = 0; 7 | pub const MODE_SV39: u8 = 8; 8 | pub const MODE_SV48: u8 = 9; 9 | pub const MODE_SV57_RES: u8 = 10; 10 | pub const MODE_SV64_RES: u8 = 11; 11 | 12 | global_asm!(include_str!("loadaddress.S")); 13 | 14 | fn mode_to_str(mode: u8) -> &'static str { 15 | match mode { 16 | MODE_NONE => "bare: no translation or protection", 17 | MODE_SV39 => "sv39: page-based 39-bit virtual addressing", 18 | MODE_SV48 => "sv48: page-based 48-bit virtual addressing", 19 | MODE_SV57_RES => "sv57: reserved for page-based 57-bit virtual addressing", 20 | MODE_SV64_RES => "sv64: reserved for page-based 64-bit virtual addressing", 21 | _ => "reserved" 22 | } 23 | } 24 | 25 | // returns (mode, asid, ppn) 26 | fn parse_satp(satp: u64) -> (u8, u16, u64) { 27 | ((satp >> 60) as u8, (satp >> 44) as u16, satp & 0xfff_ffff_ffff) 28 | } 29 | 30 | const FLAG_VALID: u8 = 0x01; 31 | const FLAG_READ: u8 = 0x02; 32 | const FLAG_WRITE: u8 = 0x04; 33 | const FLAG_EXEC: u8 = 0x08; 34 | const FLAG_USER: u8 = 0x10; 35 | const FLAG_GLOBAL: u8 = 0x20; 36 | const FLAG_ACCESSED: u8 = 0x40; 37 | const FLAG_DIRTY: u8 = 0x80; 38 | 39 | #[derive(Clone)] 40 | #[derive(Copy)] 41 | #[derive(PartialEq)] 42 | enum PageWalkError { 43 | ErrNone, 44 | ErrUnmapped, 45 | ErrReserved, 46 | ErrTooDeep, 47 | ErrMisalignedSuperpage, 48 | } 49 | 50 | fn pwe_to_str(err: PageWalkError) -> &'static str { 51 | match err { 52 | PageWalkError::ErrNone => "ok", 53 | PageWalkError::ErrUnmapped => "unmapped", 54 | PageWalkError::ErrReserved => "reserved bit pattern in use", 55 | PageWalkError::ErrTooDeep => "page table is too deep", 56 | PageWalkError::ErrMisalignedSuperpage => "superpage is misaligned", 57 | } 58 | } 59 | 60 | type PageWalkerCallback = fn(flags: u8, rsw: u8, va: u64, pa: u64, len: u64, err: PageWalkError, data: &mut Data); 61 | 62 | const LEVELS_SV39: u8 = 3; 63 | const PTESIZE_SV39: u64 = 8; 64 | const SIGN_BITS_SV39: u8 = 64 - 39; 65 | const VPN_BITS_EACH: u8 = 9; 66 | const PPN_BITS_EACH: u8 = 9; 67 | const PAGE_BITS: u8 = 12; 68 | const PAGE_SIZE: u64 = 1u64 << PAGE_BITS; 69 | 70 | fn sign_extend(v: u64, bits: u8) -> u64 { 71 | (((v << bits) as i64) >> bits) as u64 72 | } 73 | 74 | // TODO: handle getting blocked by PMP 75 | unsafe fn walk_page_table_iter(a: u64, i: u8, vabase: u64, cb: PageWalkerCallback, data: &mut Data) { 76 | for entry in 0..512u64 { 77 | let pte = *((a + entry * PTESIZE_SV39) as *const u64); 78 | let ppn = (pte >> 10) & 0xfff_ffff_ffff; // mask because higher bits are reserved as of priv-v1.10 79 | let pabase = ppn << PAGE_BITS; 80 | let valocal = sign_extend(vabase + (entry << PAGE_BITS + VPN_BITS_EACH * i), SIGN_BITS_SV39); 81 | let flags = pte as u8; 82 | let rsw = ((pte >> 8) & 0x3) as u8; 83 | let pagelen = PAGE_SIZE << (PPN_BITS_EACH * i); 84 | 85 | let err; 86 | 87 | if (flags & (FLAG_VALID | FLAG_READ | FLAG_WRITE | FLAG_EXEC)) == FLAG_VALID { 88 | if i == 0 { 89 | err = PageWalkError::ErrTooDeep; 90 | } else { 91 | walk_page_table_iter(pabase, i - 1, valocal, cb, data); 92 | continue; 93 | } 94 | } else { 95 | if (flags & FLAG_VALID) == 0 { 96 | err = PageWalkError::ErrUnmapped; 97 | } else if (flags & (FLAG_VALID | FLAG_READ | FLAG_WRITE)) == (FLAG_VALID | FLAG_WRITE) { 98 | err = PageWalkError::ErrReserved; 99 | } else if (pabase & (pagelen - 1)) != 0 { 100 | err = PageWalkError::ErrMisalignedSuperpage; 101 | } else { 102 | err = PageWalkError::ErrNone; 103 | } 104 | } 105 | cb(flags, rsw, valocal, pabase, pagelen, err, data); 106 | if entry == 255 { 107 | cb(0, 0, 0x4000000000, 0, 0xffffff8000000000, ErrUnmapped, data); 108 | } 109 | } 110 | } 111 | 112 | unsafe fn walk_page_table(root: u64, cb: PageWalkerCallback, data: &mut Data) { 113 | walk_page_table_iter(root, LEVELS_SV39 - 1, 0, cb, data); 114 | } 115 | 116 | fn flag(flags: u8, f: &str, flag: u8) { 117 | let mut spaces = 1; 118 | if (flags & flag) == flag { 119 | print!("{}", f); 120 | } else { 121 | spaces += f.len(); 122 | } 123 | for _ in 0..spaces { 124 | print!(" "); 125 | } 126 | } 127 | 128 | struct CompressionWalker<'data, Data> { 129 | cb: PageWalkerCallback, 130 | data: &'data mut Data, 131 | haslast: bool, 132 | lastflags: u8, 133 | lastrsw: u8, 134 | totallen: u64, 135 | endva: u64, 136 | endpa: u64, 137 | lasterr: PageWalkError, 138 | } 139 | 140 | fn compression_walk(flags: u8, rsw: u8, va: u64, pa: u64, len: u64, err: PageWalkError, walker: &mut CompressionWalker) { 141 | if walker.haslast && (flags != walker.lastflags || rsw != walker.lastrsw || va != walker.endva || (pa != walker.endpa && err != ErrUnmapped) || err != walker.lasterr) { 142 | /*if flags != walker.lastflags { 143 | print!("FLAGS\n"); 144 | } 145 | if rsw != walker.lastrsw { 146 | print!("RSW\n"); 147 | } 148 | if va != walker.endva { 149 | print!("VA\n"); 150 | } 151 | if pa != walker.endpa && err != ErrUnmapped { 152 | print!("PA\n"); 153 | } 154 | if err != walker.lasterr { 155 | print!("ERR\n"); 156 | } 157 | print!("RETIRED\n");*/ 158 | // retire last entry 159 | (walker.cb)(walker.lastflags, walker.lastrsw, walker.endva - walker.totallen, walker.endpa - walker.totallen, walker.totallen, walker.lasterr, walker.data); 160 | walker.haslast = false; 161 | } 162 | if walker.haslast { 163 | // extend last entry 164 | walker.totallen += len; 165 | walker.endva += len; 166 | walker.endpa += len; 167 | } else { 168 | // create new entry 169 | walker.haslast = true; 170 | walker.lastflags = flags; 171 | walker.lastrsw = rsw; 172 | walker.totallen = len; 173 | walker.endva = va + len; 174 | walker.endpa = pa + len; 175 | walker.lasterr = err; 176 | } 177 | if va + len == 0 { // last entry; retire because we won't be coming back 178 | (walker.cb)(walker.lastflags, walker.lastrsw, walker.endva - walker.totallen, walker.endpa - walker.totallen, walker.totallen, walker.lasterr, walker.data); 179 | walker.haslast = false; 180 | } 181 | } 182 | 183 | #[inline(never)] 184 | unsafe fn walk_page_table_compressed(root: u64, cb: PageWalkerCallback, data: &mut Data) { 185 | let mut ourdata = CompressionWalker{ 186 | cb, 187 | data, 188 | haslast: false, 189 | lastflags: 0, 190 | lastrsw: 0, 191 | totallen: 0, 192 | endva: 0, 193 | endpa: 0, 194 | lasterr: PageWalkError::ErrNone 195 | }; 196 | walk_page_table(root, compression_walk, &mut ourdata); 197 | } 198 | 199 | #[no_mangle] 200 | fn debug_walk(flags: u8, rsw: u8, va: u64, pa: u64, len: u64, err: PageWalkError, _: &mut ()) { 201 | flag(flags, "VALID", FLAG_VALID); 202 | flag(flags, "R", FLAG_READ); 203 | flag(flags, "W", FLAG_WRITE); 204 | flag(flags, "X", FLAG_EXEC); 205 | flag(flags, "USER", FLAG_USER); 206 | flag(flags, "GLOBAL", FLAG_GLOBAL); 207 | flag(flags, "ACC", FLAG_ACCESSED); 208 | flag(flags, "DIRTY", FLAG_DIRTY); 209 | print!(" {} {:#018x}-{:#018x}", rsw, va, va + len - 1); 210 | if err != ErrUnmapped { 211 | print!(" {:#018x}-{:#018x} ", pa, pa + len - 1); 212 | } else { 213 | print!(" ") 214 | } 215 | println!("{}", pwe_to_str(err)); 216 | } 217 | 218 | #[inline(never)] 219 | pub fn debug_paging() { 220 | let hart = csrr!(mhartid); 221 | println!("==================================================== PAGE TABLE STATE (hart {}) ===================================================", hart); 222 | let (mode, asid, ppn) = parse_satp(csrr!(satp)); 223 | let root = ppn << PAGE_BITS; 224 | 225 | println!("Paging mode: {}", mode_to_str(mode)); 226 | println!("ASID: {}", asid); 227 | println!("Page table address: {:#x}", root); 228 | 229 | if (csrr!(sstatus) & STATUS_SUM) != 0 { 230 | println!("Supervisor: can access user memory"); 231 | } else { 232 | println!("Supervisor: limited to supervisor memory"); 233 | } 234 | 235 | if mode != MODE_SV39 { 236 | println!("debugging not implemented for this paging mode.") 237 | } else { 238 | println!("VALID R W X USER GLOBAL ACC DIRTY RSW VIRTUAL (low) VIRTUAL (high) PHYSICAL (low) PHYSICAL (high) TRAVERSAL-ERROR"); 239 | 240 | unsafe { 241 | let debug_walk_ptr: u64; 242 | asm!("lla $0, debug_walk" : "=r"(debug_walk_ptr)); 243 | walk_page_table_compressed(root, core::mem::transmute(debug_walk_ptr), &mut ()); 244 | } 245 | println!("VALID R W X USER GLOBAL ACC DIRTY RSW VIRTUAL (low) VIRTUAL (high) PHYSICAL (low) PHYSICAL (high) TRAVERSAL-ERROR"); 246 | } 247 | println!("====================================================== END PAGE TABLE STATE ======================================================"); 248 | } 249 | -------------------------------------------------------------------------------- /interrupt-bug.md: -------------------------------------------------------------------------------- 1 | The bug manifested itself as a random guest crash due to a page fault, illegal 2 | exception fault, or other strange error and the cause wasn't consistent between 3 | runs. At first the only hint I had was that decreasing the timer clock frequency 4 | (causing fewer timer interrupts to be delivered) caused the guest to get much 5 | farther in its execution before crashing. 6 | 7 | I suspected that this might because caused by a subtle bug in the way timer 8 | interrupts were delivered to the Linux guest. If I was mistakenly leaving 9 | interrupts enabled when the shouldn't have been I might deliver an interrupt 10 | while Linux was inside a critical section. Vectoring to the wrong address, 11 | incorrectly setting the privilege mode, or any of numerous other spec violations 12 | could have caused Linux to behave erratically. Yet, going through my code line 13 | by line exactly matched the RISC-V specification. 14 | 15 | Next I tried running my code a bunch of times to see if I could find any 16 | patterns in where the traps occurred. Printing out the instruction address where 17 | each crash happened proved to be of little help. Nearly every time landed in a 18 | different place, and often in entirely different functions. 19 | 20 | By commenting out sections of hypervisor code, I was able to establish that 21 | masking timer interrupts for Linux made the problem go away. Yet, this wasn't 22 | conclusive: without timer interrupts Linux livelocked early in the boot process. 23 | 24 | Eventually I noticed that if I delivered timer interrupts somewhere between 25 | 10-100x slower than real time then the crash would always happen in exactly the 26 | same place and with the same error: "guest page table specified invalid physical 27 | address". This consistency would prove invaluable in tracking down exactly what 28 | was going on. 29 | 30 | Examining disassebled output of the particular function, I learned that the 31 | crash was happening in code generated from macros, meaning that no source was 32 | available, and the error message itself proved to be a red herring: on RISC-V 33 | Linux defaults to 128 GB of direct mapped physical addreses into the upper 34 | portion of the page table, regardless of how much RAM is actually present. The 35 | code had just happened to touch the first page past the end of RAM. 36 | 37 | I tried stepping through the code with GDB and eventually started to understand 38 | what was going on. The load instruction that triggered the fault was happening 39 | inside a loop, and at a random iteration the loop bound (stored on the stack) 40 | was being overwritten. Finally! This sort of memory corruption was consistent 41 | with the crashes I was seeing. 42 | 43 | It took me a while to notice that it wasn't actually the memory location that 44 | was changing. Rather a single nibble in the frame pointer register was changing 45 | so it went from 0xffffffe022057ce0 to 0xffffffe022057c70. Now it was simply a 46 | matter of finding where it was being overwritten. I grepped through the 47 | disassembly of my hypervisor and discovered that the only place were the frame 48 | pointer was touched was to save it on trap entry and restore it on exit. The 49 | compiler wasn't even emitting any code that used the frame pointer! 50 | 51 | No matter, maybe somewhere I was overwritting the saved value on the stack with 52 | another value? With some printf debugging I learned that this wasn't happening: 53 | the saved value did not change at any point during my hypervisor's interrupt 54 | handler. Then I noticed something even stranger, the frame pointer was already 55 | corrupt when entering the hypervisor. I turned my attention the M-mode stub I 56 | was using to forward interrupts to my hypervisor. Only 24 hand written assembly 57 | instructions long, I couldn't see anything that might explain the bug. There was 58 | no mention of the frame pointer register, and the few memory accesses were all 59 | to safe addresses. I confirmed with objdump that the assembly produced matched 60 | what I intended. GDB was initially uncooperative, but eventually I set a 61 | breakpoint at the start of the M-mode stub and confirmed that the frame pointer 62 | was already wrong even then! 63 | 64 | At this point I was starting to get suspicious of QEMU. How could a register 65 | change between two unrelated instructions? Running QEMU inside GDB I was able to 66 | set a watchpoint on the location of the fp register, and saw that it changed 67 | inside `cpu_tb_exec` while QEMU was running a translation block. The 68 | modification was also happening *before* the breakpoint I'd set on mtrap_entry, 69 | the start of my M-mode trap handler stub. This didn't make any sense, that 70 | breakpoint should have fired before executing any code after the interrupt, yet 71 | some code was being run that modified the register. 72 | 73 | Once I noticed that the interrupt vector wasn't actually being set to 74 | mtrap_entry I was able to start piecing together what was going on. I initialize 75 | `mtvec` during boot with this bit of code: 76 | 77 | ```rust 78 | asm!("auipc t0, 0 79 | c.addi t0, 18 80 | csrw 0x305, t0 // mtvec 81 | c.j continue 82 | 83 | .align 4 84 | mtrap_entry: 85 | csrw 0x340, sp // mscratch 86 | 87 | ... 88 | 89 | mret 90 | continue:" ::: "t0" : "volatile"); 91 | ... 92 | ``` 93 | 94 | Which the assembler expands to: 95 | 96 | ``` 97 | 8000006a: 00000297 auipc x5,0x0 98 | 8000006e: 02c1 addi x5,x5,18 99 | 80000070: 30529073 csrw mtvec,x5 100 | 80000074: a88d j 800000e6 101 | 80000076: 00000013 nop 102 | 8000007a: 00000013 nop 103 | 8000007e: 0001 nop 104 | 105 | 0000000080000080 : 106 | 80000080: 34011073 csrw mscratch,x2 107 | ... 108 | 00000000800000e6 : 109 | ``` 110 | 111 | This was intended to do was to set the machine mode trap vector, `mtvec`, to 112 | 0x80000080 (the address of mtrap_entry) and then jump over the code for the trap 113 | handler to continue on with initialization. Unfortunately, the address 114 | calculation was wrong and `mtvec` was actually initialized with the value 115 | 0x8000007c. At first glance this difference seems completely innocuous: that 116 | other address land right in the middle of a range of NOPs and execution should 117 | fall through to mtrap_entry. However, this is not what happens, because when the 118 | processor jumps to that address it actually in the middle of a NOP and thus sees 119 | a slightly different sequence of instructions: 120 | 121 | ``` 122 | 8000007c: 0000 c.unimp 123 | 8000007e: 0001 nop 124 | 80000080: 34011073 csrw mscratch,x2 125 | ``` 126 | 127 | When execution reached the `c.unimp` instruction, the processor should have 128 | triggered an illegal instruction exception and jumped back to the start of the 129 | M-mode trap handler where it would have encountered the same illegal instruction 130 | and looped forever. However, due to a bug in QEMU, the instruction sequence was 131 | actually decoded as: 132 | 133 | ``` 134 | 8000007c: 0000 c.addi4sp x8,0 135 | 8000007e: 0001 nop 136 | 80000080: 34011073 csrw mscratch,x2 137 | ``` 138 | 139 | Or, equivalently with aliases: 140 | 141 | ``` 142 | 8000007c: 0000 mv fp,sp 143 | 8000007e: 0001 nop 144 | 80000080: 34011073 csrw mscratch,sp 145 | ``` 146 | 147 | In other words, instead of trapping again, QEMU clobbered the frame pointer 148 | (with the contents of the stack pointer) and then resumed execution. The 149 | hypervisor trap handler then went on to save and restored this now invalid frame 150 | pointer, and was thus unaffected by this corruption. 151 | 152 | Linux was less lucky. Since the kernel was compiled with 153 | `-fno-omit-frame-pointer`, it made heavy use of the register. However, RISC-V 154 | has a large number of general purpose registers, so most of the time local 155 | variables do not spill out onto the stack and thus most accesses to the frame 156 | pointer only occur in function prologues/epilogues. 157 | 158 | One notable exception is the raid6_int2_xor_syndrome function which benchmarks a 159 | RAID algorithm that requires lots of registers. This function is also notable 160 | because Linux runs it repeatedly until a specific number of timer interrupts 161 | arrive (to measure how long it takes). This is also why I was able to 162 | consistently get Linux to crash in the function. Here, the loop bound is stored 163 | on the stack to free up a register: 164 | 165 | ``` 166 | ffffffe0002b8f70 : 167 | ... 168 | ffffffe0002b908a: 2ac1 addiw s5,s5,16 169 | ... 170 | ffffffe0002b90a6: f9843783 ld a5,-104(fp) 171 | ffffffe0002b90aa: ecfae3e3 bltu s5,a5,ffffffe0002b8f70 172 | ``` 173 | 174 | When a timer interrupt arrives while the program counter is in the body of the 175 | loop, the frame pointer gets set to the stack pointer and so during the next 176 | iteration instead of pulling off the correct loop bound it reads a garbage value 177 | off the stack. In this particular case, that garbage value happened to be a 178 | pointer to kernel memory (ie with MSB set) so instead of running `4096/16=256` 179 | iterations of the loop it actually tried to run nearly `2^64/16=2^60` iterations 180 | eventually running off the end of RAM. 181 | -------------------------------------------------------------------------------- /src/supervisor.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![feature(asm)] 3 | #![feature(const_fn)] 4 | #![feature(global_asm)] 5 | #![feature(lang_items)] 6 | #![feature(linkage)] 7 | #![feature(naked_functions)] 8 | #![feature(proc_macro_hygiene)] 9 | #![feature(ptr_offset_from)] 10 | #![feature(start)] 11 | #![feature(try_blocks)] 12 | 13 | use rvirt::*; 14 | 15 | // mandatory rust environment setup 16 | #[lang = "eh_personality"] extern fn eh_personality() {} 17 | #[panic_handler] fn panic(info: &::core::panic::PanicInfo) -> ! { println!("{}", info); loop {}} 18 | #[start] fn start(_argc: isize, _argv: *const *const u8) -> isize {0} 19 | #[no_mangle] fn abort() -> ! { println!("Abort!"); loop {}} 20 | 21 | static GUEST_DTB: &'static [u8] = include_bytes!("guest.dtb"); 22 | 23 | #[link_section = ".initrd"] 24 | #[cfg(feature = "embed_guest_kernel")] 25 | static GUEST_KERNEL: [u8; include_bytes!(env!("RVIRT_GUEST_KERNEL")).len()] = 26 | *include_bytes!(env!("RVIRT_GUEST_KERNEL")); 27 | 28 | #[cfg(not(feature = "embed_guest_kernel"))] 29 | static GUEST_KERNEL: [u8; 0] = []; 30 | 31 | global_asm!(include_str!("scode.S")); 32 | 33 | extern { 34 | fn hart_entry(); 35 | fn panic_trap_handler(); 36 | } 37 | 38 | //#[naked] 39 | #[no_mangle] 40 | #[inline(never)] 41 | unsafe fn sstart2(hartid: u64, device_tree_blob: u64, shared_segments_shift: u64) { 42 | csrci!(sstatus, riscv::bits::STATUS_SIE); 43 | if !SHARED_STATICS.hart_lottery.swap(false, Ordering::SeqCst) { 44 | csrw!(stvec, hart_entry as u64); 45 | csrw!(sscratch, hartid); 46 | csrw!(sie, 0x002); 47 | csrsi!(sstatus, riscv::bits::STATUS_SIE); 48 | loop { 49 | riscv::wfi(); 50 | } 51 | } 52 | 53 | csrw!(stvec, panic_trap_handler as *const () as u64); 54 | 55 | // Read and process host FDT. 56 | let mut fdt = Fdt::new(pa2va(device_tree_blob)); 57 | assert!(fdt.magic_valid()); 58 | assert!(fdt.version() >= 17 && fdt.last_comp_version() <= 17); 59 | assert!(fdt.total_size() < 64 * 1024); 60 | let machine = fdt.parse(); 61 | 62 | // Initialize UART 63 | if let Some(ty) = machine.uart_type { 64 | SHARED_STATICS.uart_writer.lock().init(machine.uart_address, ty); 65 | } 66 | 67 | // Do some sanity checks now that the UART is initialized and we have a better chance of 68 | // successfully printing output. 69 | assert!(machine.initrd_end <= machine.physical_memory_offset + pmap::HART_SEGMENT_SIZE); 70 | assert!(machine.initrd_end - machine.initrd_start <= pmap::HEAP_SIZE); 71 | assert!(machine.harts.iter().any(|h| h.hartid == hartid)); 72 | if !cfg!(feature = "embed_guest_kernel") && machine.initrd_end == 0 { 73 | println!("WARN: No guest kernel provided. Make sure to pass one with `-initrd or compile with --features embed_guest_kernel`"); 74 | } 75 | 76 | // Do not allow the __SHARED_STATICS_IMPL symbol to be optimized out. 77 | assert_eq!(&__SHARED_STATICS_IMPL as *const _ as u64, constants::SUPERVISOR_SHARED_STATIC_ADDRESS); 78 | 79 | // Program PLIC priorities 80 | for i in 1..127 { 81 | *(pa2va(machine.plic_address + i*4) as *mut u32) = 1; 82 | } 83 | 84 | let mut guest_harts = machine.harts.clone(); 85 | let single_hart = guest_harts.len() == 1; 86 | if !single_hart { 87 | guest_harts.retain(|h| h.hartid != hartid); 88 | } 89 | let single_guest = guest_harts.len() == 1; 90 | assert!(guest_harts.len() != 0); 91 | 92 | assert!(1 + guest_harts.len() as u64 <= (machine.physical_memory_size >> 30)); 93 | 94 | let mut guestid = 1; 95 | for hart in guest_harts { 96 | let hart_base_pa = machine.physical_memory_offset + pmap::HART_SEGMENT_SIZE * guestid; 97 | 98 | let mut irq_mask = 0; 99 | for j in 0..4 { 100 | let index = ((guestid-1) * 4 + j) as usize; 101 | if index < machine.virtio.len() { 102 | let irq = machine.virtio[index].irq; 103 | assert!(irq < 32); 104 | irq_mask |= 1u32 << irq; 105 | } 106 | } 107 | 108 | *(pa2va(machine.plic_address + 0x200000 + 0x1000 * hart.plic_context) as *mut u32) = 0; 109 | *(pa2va(machine.plic_address + 0x2000 + 0x80 * hart.plic_context) as *mut u32) = irq_mask; 110 | *(pa2va(machine.plic_address + 0x2000 + 0x80 * hart.plic_context + 4) as *mut u32) = 0; 111 | 112 | (*(pa2va(hart_base_pa) as *mut [u64; 1024])) = pmap::make_boot_page_table(hart_base_pa); 113 | for i in 512..1024 { 114 | *(pa2va(hart_base_pa + i * 8) as *mut u64) += shared_segments_shift >> 2; 115 | } 116 | 117 | core::ptr::copy(pa2va(device_tree_blob) as *const u8, 118 | pa2va(hart_base_pa + 4096*2) as *mut u8, 119 | fdt.total_size() as usize); 120 | if machine.initrd_start == machine.initrd_end { 121 | core::ptr::copy(&GUEST_KERNEL as *const _ as *const u8, 122 | pa2va(hart_base_pa + pmap::HEAP_OFFSET) as *mut u8, 123 | GUEST_KERNEL.len()); 124 | } else { 125 | core::ptr::copy(pa2va(machine.initrd_start) as *const u8, 126 | pa2va(hart_base_pa + pmap::HEAP_OFFSET) as *mut u8, 127 | (machine.initrd_end - machine.initrd_start) as usize); 128 | } 129 | 130 | let reason = IpiReason::TriggerHartEntry { 131 | a0: hart.hartid, 132 | a1: hart_base_pa + 4096*2, 133 | a2: shared_segments_shift, 134 | a3: hart_base_pa, 135 | a4: if !single_guest { guestid as u64 } else { u64::max_value() }, 136 | sp: hart_base_pa + (4<<20) + pmap::DIRECT_MAP_OFFSET, 137 | satp: 8 << 60 | (hart_base_pa >> 12), 138 | }; 139 | 140 | *SHARED_STATICS.ipi_reason_array[hart.hartid as usize].lock() = Some(reason); 141 | if single_hart { 142 | hart_entry2(hartid); 143 | } else { 144 | riscv::sbi::send_ipi_to_hart(hart.hartid); 145 | } 146 | 147 | guestid += 1; 148 | } 149 | 150 | loop {} 151 | } 152 | 153 | #[no_mangle] 154 | unsafe fn hart_entry2(hartid: u64) { 155 | let reason = { SHARED_STATICS.ipi_reason_array.get_unchecked(hartid as usize).lock().take() }; 156 | if let Some(IpiReason::TriggerHartEntry { a0, a1, a2, a3, a4, sp, satp }) = reason { 157 | csrw!(sie, 0x222); 158 | csrw!(satp, satp); 159 | hart_entry3(a0, a1, a2, a3, a4, sp); 160 | } else { 161 | unreachable!(); 162 | } 163 | } 164 | 165 | #[naked] 166 | #[no_mangle] 167 | #[inline(never)] 168 | unsafe fn hart_entry3(_hartid: u64, _device_tree_blob: u64, _shared_segments_shift: u64, 169 | _hart_base_pa: u64, _guestid: u64, _stack_pointer: u64) { 170 | asm!("mv sp, a5 171 | j hart_entry4" :::: "volatile"); 172 | } 173 | 174 | #[no_mangle] 175 | unsafe fn hart_entry4(hartid: u64, device_tree_blob: u64, shared_segments_shift: u64, 176 | hart_base_pa: u64, guestid: u64) { 177 | csrw!(stvec, trap::strap_entry as *const () as u64); 178 | csrw!(sie, 0x222); 179 | csrs!(sstatus, riscv::bits::STATUS_SUM); 180 | csrc!(sstatus, riscv::bits::STATUS_SPP); 181 | riscv::sbi::clear_ipi(); 182 | 183 | let guestid = if guestid == u64::max_value() { 184 | None 185 | } else { 186 | Some(guestid) 187 | }; 188 | 189 | // Read and process host FDT. 190 | let mut fdt = Fdt::new(pa2va(device_tree_blob)); 191 | assert!(fdt.magic_valid()); 192 | assert!(fdt.version() >= 17 && fdt.last_comp_version() <= 17); 193 | let machine = fdt.parse(); 194 | 195 | // Initialize memory subsystem. 196 | let (shadow_page_tables, guest_memory, guest_shift) = 197 | pmap::init(hart_base_pa, shared_segments_shift, &machine); 198 | 199 | // Load guest binary 200 | let (entry, max_addr) = sum::access_user_memory(||{ 201 | elf::load_elf(pa2va(hart_base_pa + pmap::HEAP_OFFSET) as *const u8, 202 | machine.physical_memory_offset as *mut u8) 203 | }); 204 | let guest_dtb = (max_addr | 0x1fffff) + 1; 205 | csrw!(sepc, entry); 206 | 207 | // Load guest FDT. 208 | let guest_machine = sum::access_user_memory(||{ 209 | core::ptr::copy(GUEST_DTB.as_ptr(), 210 | guest_dtb as *mut u8, 211 | GUEST_DTB.len()); 212 | let mut guest_fdt = Fdt::new(guest_dtb); 213 | guest_fdt.initialize_guest(guest_memory.len(), &machine.bootargs); 214 | guest_fdt.parse() 215 | }); 216 | 217 | // Initialize context 218 | context::initialize(&machine, &guest_machine, shadow_page_tables, guest_memory, guest_shift, hartid, guestid); 219 | 220 | // Jump into the guest kernel. 221 | asm!("mv a1, $0 // dtb = guest_dtb 222 | 223 | li ra, 0 224 | li sp, 0 225 | li gp, 0 226 | li tp, 0 227 | li t0, 0 228 | li t1, 0 229 | li t2, 0 230 | li s0, 0 231 | li s1, 0 232 | li a0, 0 // hartid = 0 233 | li a2, 0 234 | li a3, 0 235 | li a4, 0 236 | li a5, 0 237 | li a6, 0 238 | li a7, 0 239 | li s2, 0 240 | li s3, 0 241 | li s4, 0 242 | li s5, 0 243 | li s6, 0 244 | li s7, 0 245 | li s8, 0 246 | li s9, 0 247 | li s10, 0 248 | li s11, 0 249 | li t3, 0 250 | li t4, 0 251 | li t5, 0 252 | li t6, 0 253 | sret" :: "r"(guest_dtb) : "memory" : "volatile"); 254 | 255 | unreachable!(); 256 | } 257 | 258 | #[no_mangle] 259 | fn panic_trap_handler2() { 260 | println!("scause={}", csrr!(scause) as isize); 261 | println!("sepc={:x}", csrr!(sepc)); 262 | panic!("Got unexpected trap, panicking..."); 263 | } 264 | -------------------------------------------------------------------------------- /src/virtio.rs: -------------------------------------------------------------------------------- 1 | use byteorder::{NativeEndian, ByteOrder}; 2 | use riscv_decode::Instruction; 3 | use crate::context::Context; 4 | use crate::memory_region::MemoryRegion; 5 | use crate::drivers::macb::MacbDriver; 6 | use crate::{pmap, riscv, drivers}; 7 | 8 | pub const MAX_QUEUES: usize = 4; 9 | pub const MAX_DEVICES: usize = 4; 10 | 11 | #[derive(Copy, Clone)] 12 | pub struct Queue { 13 | /// Address guest thinks queue is mapped at 14 | guest_pa: u64, 15 | /// Address queue is actually mapped at 16 | host_pa: u64, 17 | /// Number of entries in queue 18 | size: u64, 19 | } 20 | 21 | pub enum Device { 22 | Passthrough { 23 | /// Virtual Queue Index, offset=0x30 24 | queue_sel: u32, 25 | queues: [Queue; MAX_QUEUES], 26 | device_registers: MemoryRegion, 27 | }, 28 | Unmapped, 29 | Macb(drivers::GuestDevice), 30 | } 31 | impl Device { 32 | pub unsafe fn new(host_base_address: u64) -> Self { 33 | Device::Passthrough { 34 | queue_sel: 0, 35 | queues: [Queue {guest_pa: 0, host_pa: 0, size: 0}; MAX_QUEUES], 36 | device_registers: MemoryRegion::with_base_address(pmap::pa2va(host_base_address), 0, 0x1000), 37 | } 38 | } 39 | } 40 | 41 | #[inline(always)] 42 | pub fn is_device_access(state: &mut Context, guest_pa: u64) -> bool { 43 | guest_pa >= 0x10001000 && guest_pa < 0x10001000 + 0x1000 * state.virtio.devices.len() as u64 44 | } 45 | 46 | pub fn handle_device_access(state: &mut Context, guest_pa: u64, instruction: u32) -> bool { 47 | let device = ((guest_pa - 0x10001000) / 0x1000) as usize; 48 | let offset = guest_pa & 0xfff; 49 | 50 | match state.virtio.devices[device] { 51 | Device::Passthrough { ref mut queue_sel, ref mut queues, ref mut device_registers } => { 52 | let mut current = device_registers[offset & !0x3]; 53 | if offset == 0x10 { 54 | current = current & !(1 << 28); // No VIRTIO_F_INDIRECT_DESC 55 | } else if offset == 0x34 { 56 | current = current.min(256); // ensure queues take up at most one page 57 | } 58 | 59 | match riscv_decode::decode(instruction).ok() { 60 | Some(Instruction::Lw(i)) => { 61 | state.saved_registers.set(i.rd(), current as u64) 62 | } 63 | Some(Instruction::Lb(i)) => { 64 | assert!(offset >= 0x100); 65 | let value = (current >> (8*(offset & 0x3))) & 0xff; 66 | state.saved_registers.set(i.rd(), value as u64) 67 | } 68 | Some(Instruction::Sw(i)) => { 69 | let mut value = state.saved_registers.get(i.rs2()) as u32; 70 | if offset == 0x30 { // QueueSel 71 | assert!(value < 4); 72 | *queue_sel = value; 73 | } else if offset == 0x38 { // QueueNum 74 | let queue = &mut queues[*queue_sel as usize]; 75 | queue.size = value as u64; 76 | 77 | // Linux never changes queue sizes, so this isn't supported. 78 | assert_eq!(queue.host_pa, 0); 79 | } else if offset == 0x40 { // QueuePFN 80 | let queue = &mut queues[*queue_sel as usize]; 81 | 82 | // Linux never releases queues, so this is currently unimplemented. 83 | assert_eq!(queue.host_pa, 0); 84 | 85 | if value != 0 { 86 | queue.guest_pa = (value as u64) << 12; 87 | value += (state.guest_shift >> 12) as u32; 88 | queue.host_pa = (value as u64) << 12; 89 | } else { 90 | unimplemented!(); 91 | } 92 | 93 | // Sad, but necessary because we don't know all the places this page is mapped. 94 | pmap::flush_shadow_page_table(&mut state.shadow_page_tables); 95 | 96 | state.virtio.queue_guest_pages.push(queue.guest_pa); 97 | for i in 0..queue.size { 98 | let value = &mut state.guest_memory[queue.guest_pa + i * 16]; 99 | *value = (*value).wrapping_add(state.guest_shift); 100 | } 101 | } 102 | device_registers[offset] = value; 103 | } 104 | Some(instr) => { 105 | println!("VIRTIO: Instruction {:?} used to target addr {:#x} from pc {:#x}", instr, guest_pa, csrr!(sepc)); 106 | loop {} 107 | } 108 | None => { 109 | println!("Unrecognized instruction targetting VIRTIO {:#x} at {:#x}!", instruction, csrr!(sepc)); 110 | loop {} 111 | } 112 | } 113 | } 114 | Device::Unmapped => { 115 | match riscv_decode::decode(instruction).ok() { 116 | Some(Instruction::Lw(i)) => state.saved_registers.set(i.rd(), 0), 117 | Some(Instruction::Lb(i)) => state.saved_registers.set(i.rd(), 0), 118 | Some(Instruction::Sw(_)) => {} 119 | Some(instr) => { 120 | println!("VIRTIO: Instruction {:?} used to target addr {:#x} from pc {:#x}", instr, guest_pa, csrr!(sepc)); 121 | loop {} 122 | } 123 | None => { 124 | println!("Unrecognized instruction targetting VIRTIO {:#x} at {:#x}!", instruction, csrr!(sepc)); 125 | loop {} 126 | } 127 | } 128 | } 129 | Device::Macb(ref mut macb) => match riscv_decode::decode(instruction).ok() { 130 | Some(Instruction::Lb(i)) => state.saved_registers.set(i.rd(), macb.read_u8(&mut state.guest_memory, offset) as u64), 131 | Some(Instruction::Lw(i)) => state.saved_registers.set(i.rd(), macb.read_u32(&mut state.guest_memory, offset) as u64), 132 | Some(Instruction::Sb(i)) => macb.write_u8(&mut state.guest_memory, offset, state.saved_registers.get(i.rs2()) as u8), 133 | Some(Instruction::Sw(i)) => macb.write_u32(&mut state.guest_memory, offset, state.saved_registers.get(i.rs2()) as u32), 134 | Some(_) | None => {} 135 | } 136 | } 137 | riscv::set_sepc(csrr!(sepc) + riscv_decode::instruction_length(instruction as u16) as u64); 138 | true 139 | } 140 | 141 | pub fn is_queue_access(state: &mut Context, guest_page: u64) -> bool { 142 | for i in 0..state.virtio.queue_guest_pages.len() { 143 | if state.virtio.queue_guest_pages[i] == guest_page { 144 | return true; 145 | } 146 | } 147 | false 148 | } 149 | 150 | pub fn handle_queue_access(state: &mut Context, guest_pa: u64, host_pa: u64, instruction: u32) -> bool { 151 | let mut hit_queue = false; 152 | for d in &state.virtio.devices { 153 | if let Device::Passthrough { ref queues, .. } = d { 154 | for q in queues { 155 | if guest_pa >= q.guest_pa && guest_pa < q.guest_pa + q.size * 16 && guest_pa & 0xf < 8 { 156 | hit_queue = true; 157 | } 158 | } 159 | } 160 | } 161 | 162 | let decoded = riscv_decode::decode(instruction); 163 | if let Err(err) = decoded { 164 | println!("Unrecognized instruction targetting VQUEUE {:#x} at {:#x} (error: {:?})!", 165 | instruction, csrr!(sepc), err); 166 | loop {} 167 | } 168 | 169 | if hit_queue { 170 | match decoded.unwrap() { 171 | Instruction::Ld(i) => { 172 | state.saved_registers.set(i.rd(), state.guest_memory[guest_pa].wrapping_sub(state.guest_shift)); 173 | } 174 | Instruction::Sd(i) => { 175 | let value = state.saved_registers.get(i.rs2()); 176 | if value == 0 { 177 | state.guest_memory[guest_pa] = 0; 178 | } else if state.guest_memory.in_region(value) { 179 | state.guest_memory[guest_pa] = value.wrapping_add(state.guest_shift); 180 | } else { 181 | loop {} 182 | } 183 | } 184 | instr => { 185 | println!("VQUEUE: Instruction {:?} used to target addr {:#x} from pc {:#x}", 186 | instr, host_pa, csrr!(sepc)); 187 | loop {} 188 | } 189 | } 190 | } else { 191 | let index = guest_pa & !0x7; 192 | let offset = (guest_pa % 8) as usize; 193 | let mut current = state.guest_memory[index].to_ne_bytes(); 194 | match decoded.as_ref().unwrap() { 195 | Instruction::Ld(i) => state.saved_registers.set(i.rd(), u64::from_ne_bytes(current)), 196 | Instruction::Lwu(i) => state.saved_registers.set(i.rd(), NativeEndian::read_u32(¤t[offset..]) as u64), 197 | Instruction::Lhu(i) => state.saved_registers.set(i.rd(), NativeEndian::read_u16(¤t[offset..]) as u64), 198 | Instruction::Lbu(i) => state.saved_registers.set(i.rd(), current[offset] as u64), 199 | Instruction::Lw(i) => state.saved_registers.set(i.rd(), NativeEndian::read_i32(¤t[offset..]) as i64 as u64), 200 | Instruction::Lh(i) => state.saved_registers.set(i.rd(), NativeEndian::read_i16(¤t[offset..]) as i64 as u64), 201 | Instruction::Lb(i) => state.saved_registers.set(i.rd(), current[offset] as i8 as i64 as u64), 202 | Instruction::Sd(i) => state.guest_memory[index] = state.saved_registers.get(i.rs2()), 203 | Instruction::Sw(i) => { 204 | NativeEndian::write_u32(&mut current[offset..], state.saved_registers.get(i.rs2()) as u32); 205 | state.guest_memory[index] = u64::from_ne_bytes(current); 206 | } 207 | Instruction::Sh(i) => { 208 | NativeEndian::write_u16(&mut current[offset..], state.saved_registers.get(i.rs2()) as u16); 209 | state.guest_memory[index] = u64::from_ne_bytes(current); 210 | } 211 | Instruction::Sb(i) => { 212 | current[offset] = state.saved_registers.get(i.rs2()) as u8; 213 | state.guest_memory[index] = u64::from_ne_bytes(current); 214 | } 215 | instr => { 216 | println!("VQUEUE: Instruction {:?} used to target addr {:#x} from pc {:#x}", 217 | instr, host_pa, csrr!(sepc)); 218 | loop {} 219 | } 220 | } 221 | } 222 | 223 | riscv::set_sepc(csrr!(sepc) + riscv_decode::instruction_length(instruction as u16) as u64); 224 | true 225 | } 226 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/drivers/mod.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | 3 | use arrayvec::ArrayVec; 4 | use byteorder::{ByteOrder, LittleEndian}; 5 | use crate::memory_region::MemoryRegion; 6 | 7 | pub mod macb; 8 | 9 | #[allow(unused)] 10 | mod constants { 11 | pub const VENDOR_ID: u32 = 0x1af4; 12 | pub const MAGIC_VALUE: u32 = 0x74726976; 13 | 14 | pub const REG_MAGIC_VALUE: u64 = 0x0; 15 | pub const REG_VERSION: u64 = 0x004; 16 | pub const REG_DEVICE_ID: u64 = 0x008; 17 | pub const REG_VENDOR_ID: u64 = 0x00c; 18 | pub const REG_HOST_FEATURES: u64 = 0x010; 19 | pub const REG_HOST_FEATURES_SEL: u64 = 0x014; 20 | pub const REG_GUEST_FEATURES: u64 = 0x020; 21 | pub const REG_GUEST_FEATURES_SEL: u64 = 0x024; 22 | pub const REG_GUEST_PAGE_SIZE: u64 = 0x028; 23 | pub const REG_QUEUE_SEL: u64 = 0x030; 24 | pub const REG_QUEUE_NUM_MAX: u64 = 0x034; 25 | pub const REG_QUEUE_NUM: u64 = 0x038; 26 | pub const REG_QUEUE_ALIGN: u64 = 0x03c; 27 | pub const REG_QUEUE_PFN: u64 = 0x040; 28 | pub const REG_QUEUE_NOTIFY: u64 = 0x050; 29 | pub const REG_INTERRUPT_STATUS: u64 = 0x060; 30 | pub const REG_INTERRUPT_ACK: u64 = 0x064; 31 | pub const REG_STATUS: u64 = 0x070; 32 | 33 | pub const STATUS_ACKNOWLEDGE: u32 = 1; 34 | pub const STATUS_DRIVER: u32 = 2; 35 | pub const STATUS_FAILED: u32 = 128; 36 | pub const STATUS_FEATURES_OK: u32 = 8; 37 | pub const STATUS_DRIVER_OK: u32 = 4; 38 | pub const STATUS_NEEDS_RESET: u32 = 64; 39 | 40 | pub const VIRTIO_NET_F_MTU: u64 = 1 << 3; 41 | pub const VIRTIO_NET_F_MAC: u64 = 1 << 5; 42 | 43 | pub const VIRTQ_DESC_F_NEXT: u16 = 1; 44 | pub const VIRTQ_DESC_F_WRITE: u16 = 2; 45 | 46 | pub const MAX_QUEUES: usize = 4; 47 | } 48 | pub use constants::*; 49 | 50 | pub trait Driver: Sized { 51 | const DEVICE_ID: u32; 52 | const FEATURES: u64; 53 | const QUEUE_NUM_MAX: u32; 54 | 55 | fn interrupt(device: &mut GuestDevice, guest_memory: &mut MemoryRegion) -> bool; 56 | fn doorbell(device: &mut GuestDevice, guest_memory: &mut MemoryRegion, queue: u32); 57 | 58 | fn read_config_u8(device: &GuestDevice, guest_memory: &mut MemoryRegion, offset: u64) -> u8; 59 | fn read_config_u32(device: &GuestDevice, guest_memory: &mut MemoryRegion, offset: u64) -> u32 { 60 | u32::from_le_bytes([ 61 | Self::read_config_u8(device, guest_memory, offset), 62 | Self::read_config_u8(device, guest_memory, offset+1), 63 | Self::read_config_u8(device, guest_memory, offset+2), 64 | Self::read_config_u8(device, guest_memory, offset+3), 65 | ]) 66 | } 67 | fn write_config_u8(device: &mut GuestDevice, guest_memory: &mut MemoryRegion, offset: u64, value: u8); 68 | fn write_config_u32(device: &mut GuestDevice, guest_memory: &mut MemoryRegion, offset: u64, value: u32) { 69 | Self::write_config_u8(device, guest_memory, offset, value.to_le_bytes()[0]); 70 | Self::write_config_u8(device, guest_memory, offset+1, value.to_le_bytes()[1]); 71 | Self::write_config_u8(device, guest_memory, offset+2, value.to_le_bytes()[2]); 72 | Self::write_config_u8(device, guest_memory, offset+3, value.to_le_bytes()[3]); 73 | } 74 | 75 | fn reset(device: &mut GuestDevice, guest_memory: &mut MemoryRegion); 76 | } 77 | 78 | pub struct DescriptorTable<'a> { 79 | desc: &'a [u8], 80 | avail: &'a [u8], 81 | used: &'a mut [u8], 82 | queue_size: usize, 83 | } 84 | #[allow(unused)] 85 | impl<'a> DescriptorTable<'a> { 86 | fn desc_addr(&self, index: usize) -> u64 { LittleEndian::read_u64(&self.desc[16*index..]) } 87 | fn desc_len(&self, index: usize) -> u32 { LittleEndian::read_u32(&self.desc[8+16*index..]) } 88 | fn desc_flags(&self, index: usize) -> u16 { LittleEndian::read_u16(&self.desc[12+16*index..]) } 89 | fn desc_next(&self, index: usize) -> u16 { LittleEndian::read_u16(&self.desc[14+16*index..]) } 90 | 91 | fn avail_flags(&self) -> u16 { LittleEndian::read_u16(&self.avail) } 92 | fn avail_idx(&self) -> u16 { LittleEndian::read_u16(&self.avail[2..]) } 93 | fn avail_ring(&self, index: usize) -> u16 { LittleEndian::read_u16(&self.avail[4+2*index..]) } 94 | 95 | fn used_flags(&self) -> u16 { LittleEndian::read_u16(&self.used) } 96 | fn used_idx(&self) -> u16 { LittleEndian::read_u16(&self.used[2..]) } 97 | fn used_ring_id(&self, index: usize) -> u32 { LittleEndian::read_u32(&self.used[4+8*index..]) } 98 | fn used_ring_len(&self, index: usize) -> u32 { LittleEndian::read_u32(&self.used[8+8*index..]) } 99 | 100 | fn set_used_flags(&mut self, value: u16) { LittleEndian::write_u16(&mut self.used, value) } 101 | fn set_used_idx(&mut self, value: u16) { LittleEndian::write_u16(&mut self.used[2..], value) } 102 | fn set_used_ring_id(&mut self, index: usize, value: u32) { LittleEndian::write_u32(&mut self.used[4+8*index..], value) } 103 | fn set_used_ring_len(&mut self, index: usize, value: u32) { LittleEndian::write_u32(&mut self.used[8+8*index..], value) } 104 | } 105 | 106 | pub struct GuestDevice { 107 | host_features_sel: u32, 108 | 109 | guest_features_sel: u32, 110 | guest_features: u64, 111 | 112 | guest_page_size: u32, 113 | 114 | queue_sel: u32, 115 | queue_num: [u32; MAX_QUEUES], 116 | queue_align: [u32; MAX_QUEUES], 117 | queue_pfn: [u32; MAX_QUEUES], 118 | 119 | interrupt_status: u32, 120 | status: u32, 121 | 122 | host_driver: D, 123 | } 124 | 125 | impl GuestDevice { 126 | pub fn new(host_driver: D) -> Self { 127 | Self { 128 | host_features_sel: 0, 129 | guest_features_sel: 0, 130 | guest_features: 0, 131 | guest_page_size: 4096, 132 | queue_sel: 0, 133 | queue_num: [0; MAX_QUEUES], 134 | queue_align: [0; MAX_QUEUES], 135 | queue_pfn: [0; MAX_QUEUES], 136 | interrupt_status: 0, 137 | status: 0, 138 | host_driver, 139 | } 140 | } 141 | 142 | pub fn read_u8(&mut self, guest_memory: &mut MemoryRegion, offset: u64) -> u8 { 143 | if offset > 0x100 { 144 | D::read_config_u8(self, guest_memory, offset) 145 | } else { 146 | 0 147 | } 148 | } 149 | 150 | pub fn read_u32(&mut self, guest_memory: &mut MemoryRegion, offset: u64) -> u32 { 151 | if offset % 4 != 0 { 152 | return 0; 153 | } 154 | 155 | if offset > 0x100 { 156 | return D::read_config_u32(self, guest_memory, offset); 157 | } 158 | 159 | match offset { 160 | REG_MAGIC_VALUE => MAGIC_VALUE, 161 | REG_VERSION => 1, 162 | REG_DEVICE_ID => D::DEVICE_ID, 163 | REG_VENDOR_ID => VENDOR_ID, 164 | REG_HOST_FEATURES if self.host_features_sel == 0 => (D::FEATURES & 0xffffffff) as u32, 165 | REG_HOST_FEATURES if self.host_features_sel == 1 => ((D::FEATURES >> 32) & 0xffffffff) as u32, 166 | REG_HOST_FEATURES => 0, 167 | REG_HOST_FEATURES_SEL => self.host_features_sel, 168 | REG_GUEST_FEATURES => 0, 169 | REG_GUEST_FEATURES_SEL => self.guest_features_sel, 170 | REG_GUEST_PAGE_SIZE => self.guest_page_size, 171 | REG_QUEUE_SEL => self.queue_sel, 172 | REG_QUEUE_NUM_MAX => D::QUEUE_NUM_MAX, 173 | REG_QUEUE_NUM => self.queue_num[self.queue_sel as usize], 174 | REG_QUEUE_ALIGN => self.queue_align[self.queue_sel as usize], 175 | REG_QUEUE_PFN => self.queue_pfn[self.queue_sel as usize], 176 | REG_QUEUE_NOTIFY => 0, 177 | REG_INTERRUPT_STATUS => 0, 178 | REG_INTERRUPT_ACK => 0, 179 | REG_STATUS => self.status, 180 | _ => 0, 181 | } 182 | } 183 | 184 | pub fn write_u8(&mut self, guest_memory: &mut MemoryRegion, offset: u64, value: u8) { 185 | if offset > 0x100 { 186 | D::write_config_u8(self, guest_memory, offset, value); 187 | } 188 | } 189 | 190 | pub fn write_u32(&mut self, guest_memory: &mut MemoryRegion, offset: u64, value: u32) { 191 | if offset % 4 != 0 { 192 | return; 193 | } 194 | 195 | if offset > 0x100 { 196 | D::write_config_u32(self, guest_memory, offset, value); 197 | return; 198 | } 199 | 200 | match offset { 201 | REG_HOST_FEATURES_SEL => self.host_features_sel = value, 202 | REG_GUEST_FEATURES if self.guest_features_sel == 0 => self.guest_features = (self.guest_features & !0xffffffff) | value as u64, 203 | REG_GUEST_FEATURES if self.guest_features_sel == 1 => self.guest_features = (self.guest_features & 0xffffffff) | ((value as u64) << 32), 204 | REG_GUEST_FEATURES_SEL => self.guest_features_sel = value, 205 | REG_GUEST_PAGE_SIZE => self.guest_page_size = value, 206 | REG_QUEUE_SEL => self.queue_sel = value, 207 | REG_QUEUE_NUM => self.queue_num[self.queue_sel as usize] = value, 208 | REG_QUEUE_ALIGN => self.queue_align[self.queue_sel as usize] = value, 209 | REG_QUEUE_PFN => self.queue_pfn[self.queue_sel as usize] = value, 210 | REG_QUEUE_NOTIFY => D::doorbell(self, guest_memory, value), 211 | REG_INTERRUPT_ACK => self.interrupt_status &= !value, 212 | REG_STATUS => { 213 | if value == 0 { 214 | self.reset(); 215 | D::reset(self, guest_memory); 216 | } else { 217 | self.status = value; 218 | } 219 | } 220 | _ => {}, 221 | } 222 | } 223 | 224 | /// Returns true if the interrupt should be forwarded onto the guest, false otherwise. 225 | pub fn interrupt(&mut self, guest_memory: &mut MemoryRegion) -> bool { 226 | D::interrupt(self, guest_memory) 227 | } 228 | 229 | fn reset(&mut self) { 230 | self.host_features_sel = 0; 231 | self.guest_features_sel = 0; 232 | self.guest_features = 0; 233 | self.guest_page_size = 4096; 234 | 235 | self.queue_sel = 0; 236 | self.queue_num = [0; MAX_QUEUES]; 237 | self.queue_align = [0; MAX_QUEUES]; 238 | self.queue_pfn = [0; MAX_QUEUES]; 239 | 240 | self.interrupt_status = 0; 241 | } 242 | 243 | fn with_buffer Option>(&mut self, guest_memory: &mut MemoryRegion, queue: u32, f: F) { 244 | let dt = self.get_queue(guest_memory, queue); 245 | 246 | if dt.avail_idx() == dt.used_idx() { 247 | return; 248 | } 249 | 250 | let mut ranges = ArrayVec::<[(u64, u32); 16]>::new(); 251 | 252 | let idx = (dt.used_idx() as usize + 1) % dt.queue_size; 253 | let id = dt.avail_ring(idx) as usize; 254 | 255 | let mut flags = VIRTQ_DESC_F_NEXT; 256 | let mut next_id = id; 257 | while flags & VIRTQ_DESC_F_NEXT != 0 { 258 | let addr = dt.desc_addr(next_id); 259 | let len = dt.desc_len(next_id); 260 | flags = dt.desc_flags(next_id); 261 | next_id = dt.desc_next(next_id) as usize; 262 | 263 | ranges.push((addr, len)); 264 | } 265 | 266 | // Handling the borrow checker is a bit tricky here. At this point, we let the lifetime of 267 | // `dt` end so that its borrow of `guest_memory` ends. Then we borrow a bunch of slices from 268 | // `guest_memory` and pass them to `f`. Once that function returns, we have `buffers` go out 269 | // of scope so that we can borrow `guest_memory` again to make a DescriptorTable. 270 | let consume_buffers = { 271 | let mut buffers = ArrayVec::<[&[u8]; 16]>::new(); 272 | for (addr, len) in ranges { 273 | buffers.push(guest_memory.slice(addr, len as u64)); 274 | } 275 | 276 | f(&*buffers) 277 | }; 278 | 279 | if let Some(len) = consume_buffers { 280 | let mut dt = self.get_queue(guest_memory, queue); 281 | dt.set_used_ring_id(idx, id as u32); 282 | dt.set_used_ring_len(idx, len); 283 | dt.set_used_idx(dt.used_idx().wrapping_add(1)); 284 | } 285 | } 286 | 287 | fn get_queue<'a>(&'a mut self, guest_memory: &'a mut MemoryRegion, queue: u32) -> DescriptorTable<'a> { 288 | let pfn = self.queue_pfn[queue as usize]; 289 | let queue_size = self.queue_num[queue as usize] as usize; 290 | let align = self.queue_align[queue as usize] as usize; 291 | 292 | let desc_size = 16 * queue_size; 293 | let avail_size = 6 + 2 * queue_size; 294 | let used_size = 6 + 8 * queue_size; 295 | 296 | let used_start = ((desc_size + avail_size + (align - 1)) % align) - align; 297 | 298 | let slice = guest_memory.slice_mut(pfn as u64 * 4096, (used_start + used_size) as u64); 299 | let (desc, slice) = slice.split_at_mut(desc_size); 300 | let (avail, slice) = slice.split_at_mut(used_size); 301 | let (_, used) = slice.split_at_mut(used_start - desc_size - avail_size); 302 | 303 | DescriptorTable { 304 | desc, 305 | avail, 306 | used, 307 | queue_size 308 | } 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /pmp-test-strategy.txt: -------------------------------------------------------------------------------- 1 | ======== REVIEW OF PMP SPECIFICATION ======== 2 | 3 | Based on v1.10 priv spec 4 | 5 | PMP configuration layout on RV64: 6 | 7 | 63 56 55 48 47 40 39 32 31 24 23 16 15 8 7 0 8 | | pmp7cfg | pmp6cfg | pmp5cfg | pmp4cfg | pmp3cfg | pmp2cfg | pmp1cfg | pmp0cfg | CSR pmpcfg0 9 | 63 56 55 48 47 40 39 32 31 24 23 16 15 8 7 0 10 | | pmp15cfg | pmp14cfg | pmp13cfg | pmp12cfg | pmp11cfg | pmp10cfg | pmp9cfg | pmp8cfg | CSR pmpcfg2 11 | 63 56 55 48 47 40 39 32 31 24 23 16 15 8 7 0 12 | 13 | PMP configuration sublayout: 14 | 15 | 7 6 5 4 3 2 1 0 16 | | L | reserved | A | X | W | R | pmp0cfg-pmp15cfg 17 | ^ ^ ^ ^ ^ 18 | | | | | \----- entry permits read 19 | | | | \--------- entry permits write 20 | | | \------------- entry permits execute 21 | | \----------------- address matching mode of entry 22 | \-------------------------------- PMP entry is locked until reset (also covers previous entry in TOR mode) 23 | (AND means that restrictions apply to M mode as well as other modes) 24 | 25 | PMP address matching modes: 26 | 27 | A = 0: (OFF) null region (disabled) 28 | 29 | No addresses are matched by this region. 30 | 31 | A = 1: (TOR) top-of-range region 32 | 33 | Address range is formed from the previous entry's address (pppp...pppp) and this entry's address (aaaa...aaaa). 34 | 35 | The range matched is pppp...pppp00 (inclusive) through aaaa...aaaa00 (exclusive). 36 | 37 | If the first PMP region is set to TOR mode, the previous address is 0000...0000. 38 | 39 | A = 2: (NA4) naturally-aligned four-byte region 40 | A = 3: (NAPOT) naturally-aligned power-of-two region, >= 8 bytes 41 | 42 | These two modes encode regions of size 2^N for 2 <= N <= 56 43 | 44 | Encodings: 45 | 46 | address[55:2] mode length matching range 47 | aaaa...aaaa NA4 4 bytes aaaa...aaaa00 through aaaa...aaaa11 48 | aaaa...aaa0 NAPOT 8 bytes aaaa...aaa000 through aaaa...aaa111 49 | aaaa...aa01 NAPOT 16 bytes aaaa...aa0000 through aaaa...aa1111 50 | aaaa...a011 NAPOT 32 bytes aaaa...a00000 through aaaa...a11111 51 | aaaa...0111 NAPOT 64 bytes aaaa...000000 through aaaa...111111 52 | . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 | aa01...1111 NAPOT 2^54 bytes aa00...000000 through aa11...111111 54 | a011...1111 NAPOT 2^55 bytes a000...000000 through a111...111111 55 | 0111...1111 NAPOT 2^56 bytes 0000...000000 through 1111...111111 56 | 57 | PMP address register format on RV64: 58 | 59 | 63 54 53 0 60 | | reserved | address[55:2] | pmpaddr0-pmpaddr15 61 | 62 | PMP prioritization: 63 | 64 | The earliest PMP entry that matches any byte of an access determines whether that access succeeds or fails. 65 | 66 | If no PMP entry matches, and the privilege mode is M, the access succeeds. 67 | If no PMP entry matches, and the privilege mode is S or U, the access fails. 68 | 69 | Multi-byte accesses that straddle a border between PMP regions fail, even if there is no conflict between the 70 | permissions of the two regions. 71 | 72 | If the L bit is clear, and the privilege mode is M, the access succeeds. 73 | Otherwise, the access only succeeds if the access's RWX type has its corresponding bit set. 74 | 75 | ======== TESTING STRATEGY FOR PMP ======== 76 | 77 | NOTE: many of these tests will be unnecessary to actually implement, because we don't care about verifying functionality 78 | to the extent that I was thinking when I originally wrote this. So please don't take this as a plan for the actual full 79 | scope of what I'm going to do. 80 | 81 | Elements to verify: 82 | - OFF regions do not restrict access for M 83 | - OFF regions do not unrestrict access for S/U 84 | - OFF regions have no collateral effects 85 | - The same permissions should be present for both S and U. 86 | - TOR byte boundaries should be correct (inclusive, exclusive) 87 | - Byte accesses and aligned word accesses should fail on any edge dereference 88 | - NAPOT regions should handle low and high bits correctly 89 | - NAPOT regions should be the right size 90 | - Region priority-order should be respected. 91 | - NAPOT should function for every power of two from 2^2 to 2^56 92 | - M should not be restricted unless L is on 93 | - L should prevent rewriting of any config or address bits 94 | - L should prevent rewriting of the previous entry IFF in TOR mode 95 | - Restricts should function the same regardless of the entry number used 96 | - R, W, and X should be respected correctly in S/U mode 97 | - R, W, and X should be respected correctly in M mode 98 | - PMP violations are trapped in S/U mode 99 | - PMP violations are trapped in M mode 100 | - PMP violations are trapped precisely 101 | - Address ranges for a first-entry TOR are correct 102 | 103 | As a note, the following tests should verify, for a denial, that the correct KIND of exception was generated, and 104 | that the exception was generated from the correct ADDRESS, not just that SOME exception was generated. 105 | 106 | For the sake of not testing too exhaustively, only a subset of these tests will actually be implemented. The tests 107 | planned to be implemented have a # symbol at the left. 108 | 109 | Tests: 110 | # 1. Simple functionality check: enable a set of regions with R or W or X and confirm that U/S can R/W/X them. 111 | VARIATIONS: U/S, verify each of R/W/X 112 | 1.1 -> R: S,U 113 | 1.2 -> W: S,U 114 | 1.3 -> X: S,U 115 | # 2. Simple rejection check: enable a set of regions without R or W or X and confirm that U/S cannot R/W/X them. 116 | VARIATIONS: U/S, verify each of R/W/X 117 | 2.1 -> R: S,U 118 | 2.2 -> W: S,U 119 | 2.3 -> X: S,U 120 | # 3. Machine functionality check: enable a set of locked regions with R or W or X, and confirm that M can R/W/X 121 | them. 122 | VARIATIONS: verify each of R/W/X 123 | 3.1 -> R 124 | 3.2 -> W 125 | 3.3 -> X 126 | # 4. Machine rejection check: enable a set of locked regions without R or W or X, and confirm that M cannot R/W/X 127 | them. 128 | VARIATIONS: verify each of R/W/X 129 | 4.1 -> R 130 | 4.2 -> W 131 | 4.3 -> X 132 | # 5. Machine irreverence check: enable a set of regions without R or W or X and confirm that M can still R/W/X 133 | them. 134 | VARIATIONS: verify each of R/W/X 135 | 5.1 -> R 136 | 5.2 -> W 137 | 5.3 -> X 138 | 6. Configure a bunch of locked OFF regions with no RWX; verify that these don't prevent M mode from 139 | reading/writing/executing anything. 140 | VARIATIONS: verify each of R/W/X 141 | 6.1 -> R 142 | 6.2 -> W 143 | 6.3 -> X 144 | 7. Configure a bunch of OFF regions (and only a single X region) with all RWX enabled; verify that U/S code 145 | running in the X region cannot read or write anything, or execute anything else outside of the region. 146 | VARIATIONS: U/S, verify each of R/W/X 147 | 7.1 -> R: S,U 148 | 7.2 -> W: S,U 149 | 7.3 -> X: S,U 150 | 8. Configure all of the high-priority OFF regions to include key addresses; confirm that these don't mess with 151 | resolution for a lowest-priority region 152 | VARIATIONS: M/S/U, L/!L 153 | 8.1 -> L: M,S,U 154 | 8.2 -> !L: M,S,U 155 | 9. Configure consecutive TOR regions over an entire array, either L or !L, and confirm that M/S/U can perform 156 | or not perform by byte/halfword/word/doubleword accesses on the entire range. 157 | VARIATIONS: M/S/U, L/!L, R/W/X, permit/deny 158 | 9.1 -> LR-permit: M,S,U 159 | 9.2 -> LR-deny: M,S,U 160 | 9.3 -> LW-permit: M,S,U 161 | 9.4 -> LW-deny: M,S,U 162 | 9.5 -> LX-permit: M,S,U 163 | 9.6 -> LX-deny: M,S,U 164 | 9.7 -> !LR-permit: M,S,U 165 | 9.8 -> !LR-deny: M,S,U 166 | 9.9 -> !LW-permit: M,S,U 167 | 9.10 -> !LW-deny: M,S,U 168 | 9.11 -> !LX-permit: M,S,U 169 | 9.12 -> !LX-deny: M,S,U 170 | 10. Configure consecutive TOR regions over an entire array, either L or !L, and confirm that accesses to region 171 | edges are rejected, despite matching permissions. 172 | VARIATIONS: M/S/U, L/!L 173 | 10.1 -> L: M,S,U 174 | 10.2 -> !L: M,S,U 175 | 11. Confirm that M mode can perform edge accesses on !L regions 176 | VARIATIONS: R/W/X, permit/deny 177 | 11.1 -> R-permit 178 | 11.2 -> R-deny 179 | 11.3 -> W-permit 180 | 11.4 -> W-deny 181 | 11.5 -> X-permit 182 | 11.6 -> X-deny 183 | # 12. Check smallest (4, 8), medium (2^32), and largest (2^56) NAPOT regions for correct range handling. 184 | VARIATIONS: M/S/U, L/!L, R/W/X, permit/deny, 4/8/2^32/2^56 185 | 12.1 -> LR-permit-4: M,S,U 186 | 12.2 -> LR-permit-8: M,S,U 187 | 12.3 -> LR-permit-2^32: M,S,U 188 | 12.4 -> LR-permit-2^56: M,S,U 189 | 12.5 -> LR-deny-4: M,S,U 190 | 12.6 -> LR-deny-8: M,S,U 191 | 12.7 -> LR-deny-2^32: M,S,U 192 | 12.8 -> LR-deny-2^56: M,S,U 193 | 12.9 -> LW-permit-4: M,S,U 194 | [...] 195 | 12.16 -> LW-deny-2^56: M,S,U 196 | 12.17 -> LX-permit-4: M,S,U 197 | [...] 198 | 12.24 -> LX-deny-2^56: M,S,U 199 | 12.25 -> !LR-permit-4: M,S,U 200 | [...] 201 | 12.48 -> !LX-deny-2^56: M,S,U 202 | # 13. Check contiguous (buddy-block-style) NAPOT ranges for permitted accesses over the entire array, both with 203 | byte and word accesses. 204 | VARIATIONS: M/S/U, L/!L, R/W/X, permit/deny, byte/word 205 | 13.1 -> LR-permit-byte: M,S,U 206 | 13.2 -> LR-permit-word: M,S,U 207 | 13.3 -> LR-deny-byte: M,S,U 208 | 13.4 -> LR-deny-word: M,S,U 209 | 13.5 -> LW-permit-byte: M,S,U 210 | [...] 211 | 13.8 -> LW-deny-word: M,S,U 212 | 13.9 -> LX-permit-byte: M,S,U 213 | [...] 214 | 13.12 -> LX-deny-word: M,S,U 215 | 13.13 -> !LR-permit-byte: M,S,U 216 | [...] 217 | 13.24 -> !LX-deny-word: M,S,U 218 | 14. Confirm that contiguous NAPOT ranges will cause exceptions on edge accesses, regardless of permissions 219 | VARIATIONS: M/S/U, L/!L, R/W/X, 4/8/2^32/2^56 220 | [...] 221 | # 15. Build forward-pyramid and reverse-pyramid configurations from NAPOT; sample different points on the pyramid 222 | to confirm that priority order is respected. 223 | VARIATIONS: M/S/U, L/!L, R/W/X, left-edge/middle/right-edge pyramid 224 | 15.1 -> LR-left: M,S,U 225 | 15.2 -> LR-middle: M,S,U 226 | 15.3 -> LR-right: M,S,U 227 | 15.4 -> LW-left: M,S,U 228 | 15.5 -> LW-middle: M,S,U 229 | 15.6 -> LW-right: M,S,U 230 | 15.7 -> LX-left: M,S,U 231 | 15.8 -> LX-middle: M,S,U 232 | 15.9 -> LX-right: M,S,U 233 | 15.10 -> !LR-left: M,S,U 234 | 15.11 -> !LR-middle: M,S,U 235 | 15.12 -> !LR-right: M,S,U 236 | 15.13 -> !LW-left: M,S,U 237 | 15.14 -> !LW-middle: M,S,U 238 | 15.15 -> !LW-right: M,S,U 239 | 15.16 -> !LX-left: M,S,U 240 | 15.17 -> !LX-middle: M,S,U 241 | 15.18 -> !LX-right: M,S,U 242 | 16. Stick TOR ranges on top of NAPOT ranges; confirm that TOR permissions override NAPOT permissions. 243 | VARIATIONS: M/S/U, L/!L, R/W/X, override-add/override-subtract 244 | [...] 245 | 17. Stick NAPOT ranges on top of TOR ranges; confirm that NAPOT permissions override TOR permissions. 246 | VARIATIONS: M/S/U, L/!L, R/W/X, override-add/override-subtract 247 | [...] 248 | 18. Confirm that U-mode and S-mode cannot modify the CSRs 249 | VARIATIONS: U/S, L/!L, R/W/X, A=[0-3] 250 | [...] 251 | 19. Confirm that M-mode can always modify CSRs, when L is unset. 252 | VARIATIONS: R/W/X, A=[0-3], entry=[0-15] 253 | [...] 254 | # 20. Confirm that M-mode cannot ever modify CSRs, when L is set. 255 | VARIATIONS: R/W/X, A=[0-3], entry=[0-15] 256 | 20.1 -> R-A0-E0 257 | [...] 258 | 20.16 -> R-A0-E15 259 | 20.17 -> R-A3-E0 260 | [...] 261 | 20.64 -> R-A3-E15 262 | 20.65 -> W-A0-E0 263 | 20.128 -> W-A3-E15 264 | 20.129 -> X-A0-E0 265 | 20.192 -> X-A3-E15 266 | 21. Recap simple/machine functionality/rejection/irreverence checks from tests 1-5; confirm that these are the 267 | same regardless of which entry is used. 268 | VARIATIONS: M/S/U, R/W/X, entry=[0-15] 269 | 22. Configure TOR in first entry, confirm that it starts at the correct position 270 | VARIATIONS: M/S/U, R/W/X 271 | -------------------------------------------------------------------------------- /src/trap.rs: -------------------------------------------------------------------------------- 1 | use riscv_decode::Instruction; 2 | use crate::context::{Context, CONTEXT, IrqMapping}; 3 | use crate::riscv::bits::*; 4 | use crate::{pfault, pmap, riscv, sum, virtio}; 5 | 6 | pub trait U64Bits { 7 | fn get(&self, mask: Self) -> bool; 8 | fn set(&mut self, mask: Self, value: bool); 9 | } 10 | impl U64Bits for u64 { 11 | #[inline(always)] 12 | fn get(&self, mask: Self) -> bool { 13 | *self & mask != 0 14 | } 15 | #[inline(always)] 16 | fn set(&mut self, mask: Self, value: bool) { 17 | if value { 18 | *self |= mask; 19 | } else { 20 | *self &= !mask; 21 | } 22 | } 23 | } 24 | 25 | #[naked] 26 | #[no_mangle] 27 | pub unsafe fn strap_entry() -> ! { 28 | asm!(".align 4 29 | csrw sscratch, sp // Save stack pointer in sscratch 30 | li sp, $0 // Set stack pointer 31 | 32 | // Save registers 33 | sd ra, 1*8(sp) 34 | sd gp, 3*8(sp) 35 | sd tp, 4*8(sp) 36 | sd t0, 5*8(sp) 37 | sd t1, 6*8(sp) 38 | sd t2, 7*8(sp) 39 | sd s0, 8*8(sp) 40 | sd s1, 9*8(sp) 41 | sd a0, 10*8(sp) 42 | sd a1, 11*8(sp) 43 | sd a2, 12*8(sp) 44 | sd a3, 13*8(sp) 45 | sd a4, 14*8(sp) 46 | sd a5, 15*8(sp) 47 | sd a6, 16*8(sp) 48 | sd a7, 17*8(sp) 49 | sd s2, 18*8(sp) 50 | sd s3, 19*8(sp) 51 | sd s4, 20*8(sp) 52 | sd s5, 21*8(sp) 53 | sd s6, 22*8(sp) 54 | sd s7, 23*8(sp) 55 | sd s8, 24*8(sp) 56 | sd s9, 25*8(sp) 57 | sd s10, 26*8(sp) 58 | sd s11, 27*8(sp) 59 | sd t3, 28*8(sp) 60 | sd t4, 29*8(sp) 61 | sd t5, 30*8(sp) 62 | sd t6, 31*8(sp) 63 | 64 | jal ra, strap // Call `strap` 65 | li sp, $0 // Reset stack pointer, just to be safe 66 | 67 | // Restore registers 68 | ld ra, 1*8(sp) 69 | ld gp, 3*8(sp) 70 | ld tp, 4*8(sp) 71 | ld t0, 5*8(sp) 72 | ld t1, 6*8(sp) 73 | ld t2, 7*8(sp) 74 | ld s0, 8*8(sp) 75 | ld s1, 9*8(sp) 76 | ld a0, 10*8(sp) 77 | ld a1, 11*8(sp) 78 | ld a2, 12*8(sp) 79 | ld a3, 13*8(sp) 80 | ld a4, 14*8(sp) 81 | ld a5, 15*8(sp) 82 | ld a6, 16*8(sp) 83 | ld a7, 17*8(sp) 84 | ld s2, 18*8(sp) 85 | ld s3, 19*8(sp) 86 | ld s4, 20*8(sp) 87 | ld s5, 21*8(sp) 88 | ld s6, 22*8(sp) 89 | ld s7, 23*8(sp) 90 | ld s8, 24*8(sp) 91 | ld s9, 25*8(sp) 92 | ld s10, 26*8(sp) 93 | ld s11, 27*8(sp) 94 | ld t3, 28*8(sp) 95 | ld t4, 29*8(sp) 96 | ld t5, 30*8(sp) 97 | ld t6, 31*8(sp) 98 | 99 | // Restore stack pointer and return 100 | csrr sp, sscratch 101 | sret" :: "i"(SSTACK_BASE) : "memory" : "volatile"); 102 | 103 | unreachable!() 104 | } 105 | 106 | #[no_mangle] 107 | pub fn strap() { 108 | let cause = csrr!(scause); 109 | let status = csrr!(sstatus); 110 | 111 | if status.get(STATUS_SPP) { 112 | println!("Trap from within hypervisor?!"); 113 | println!("sepc = {:#x}", csrr!(sepc)); 114 | println!("stval = {:#x}", csrr!(stval)); 115 | println!("cause = {}", cause); 116 | 117 | // No other threads could be accessing CONTEXT here, and even if we interrupted a critical 118 | // section, we're about to crash anyway so it doesn't matter that much. 119 | unsafe { CONTEXT.force_unlock() } 120 | let state = CONTEXT.lock(); 121 | let state = (&*state).as_ref().unwrap(); 122 | 123 | println!("reg ra = {:#x}", state.saved_registers.get(1)); 124 | println!("reg sp = {:#x}", state.saved_registers.get(2)); 125 | for i in 3..32 { 126 | println!("reg x{} = {:#x}", i, state.saved_registers.get(i)); 127 | } 128 | 129 | loop {} 130 | } 131 | 132 | let mut state = CONTEXT.lock(); 133 | let mut state = (&mut *state).as_mut().unwrap(); 134 | 135 | // For the processor to have generated a load/store page fault or an illegal instruction fault, 136 | // the processor must have been able to load the relevant instruction (or else an access fault 137 | // or instruction page fault would have been triggered). Thus, it is safe to access memory 138 | // pointed to by `sepc`. 139 | let instruction = match cause { 140 | SCAUSE_LOAD_PAGE_FAULT | 141 | SCAUSE_STORE_PAGE_FAULT | 142 | SCAUSE_ILLEGAL_INSN => unsafe { 143 | Some(load_instruction_at_address(&mut state, csrr!(sepc))) 144 | } 145 | _ => None, 146 | }; 147 | 148 | if (cause as isize) < 0 { 149 | handle_interrupt(&mut state, cause); 150 | maybe_forward_interrupt(&mut state, csrr!(sepc)); 151 | } else if cause == SCAUSE_INSN_PAGE_FAULT || cause == SCAUSE_LOAD_PAGE_FAULT || cause == SCAUSE_STORE_PAGE_FAULT { 152 | let pc = csrr!(sepc); 153 | if pfault::handle_page_fault(&mut state, cause, instruction.map(|i|i.0)) { 154 | maybe_forward_interrupt(&mut state, pc); 155 | } else { 156 | forward_exception(&mut state, cause, pc); 157 | } 158 | } else if cause == SCAUSE_ILLEGAL_INSN && state.smode { 159 | let pc = csrr!(sepc); 160 | let (instruction, len) = instruction.unwrap(); 161 | let mut advance_pc = true; 162 | match riscv_decode::decode(instruction).ok() { 163 | Some(Instruction::Sret) => { 164 | if !state.csrs.sstatus.get(STATUS_SIE) && state.csrs.sstatus.get(STATUS_SPIE) { 165 | state.no_interrupt = false; 166 | } 167 | state.csrs.pop_sie(); 168 | state.smode = state.csrs.sstatus.get(STATUS_SPP); 169 | state.csrs.sstatus.set(STATUS_SPP, false); 170 | riscv::set_sepc(state.csrs.sepc); 171 | advance_pc = false; 172 | 173 | if !state.smode { 174 | state.no_interrupt = false; 175 | } 176 | } 177 | Some(Instruction::SfenceVma(rtype)) => pmap::handle_sfence_vma(&mut state, rtype), 178 | Some(Instruction::Csrrw(i)) => if let Some(prev) = state.get_csr(i.csr()) { 179 | let value = state.saved_registers.get(i.rs1()); 180 | state.set_csr(i.csr(), value); 181 | state.saved_registers.set(i.rd(), prev); 182 | } 183 | Some(Instruction::Csrrs(i)) => if let Some(prev) = state.get_csr(i.csr()) { 184 | let mask = state.saved_registers.get(i.rs1()); 185 | if mask != 0 { 186 | state.set_csr(i.csr(), prev | mask); 187 | } 188 | state.saved_registers.set(i.rd(), prev); 189 | } 190 | Some(Instruction::Csrrc(i)) => if let Some(prev) = state.get_csr(i.csr()) { 191 | let mask = state.saved_registers.get(i.rs1()); 192 | if mask != 0 { 193 | state.set_csr(i.csr(), prev & !mask); 194 | } 195 | state.saved_registers.set(i.rd(), prev); 196 | } 197 | Some(Instruction::Csrrwi(i)) => if let Some(prev) = state.get_csr(i.csr()) { 198 | state.set_csr(i.csr(), i.zimm() as u64); 199 | state.saved_registers.set(i.rd(), prev); 200 | } 201 | Some(Instruction::Csrrsi(i)) => if let Some(prev) = state.get_csr(i.csr()) { 202 | let mask = i.zimm() as u64; 203 | if mask != 0 { 204 | state.set_csr(i.csr(), prev | mask); 205 | } 206 | state.saved_registers.set(i.rd(), prev); 207 | } 208 | Some(Instruction::Csrrci(i)) => if let Some(prev) = state.get_csr(i.csr()) { 209 | let mask = i.zimm() as u64; 210 | if mask != 0 { 211 | state.set_csr(i.csr(), prev & !mask); 212 | } 213 | state.saved_registers.set(i.rd(), prev); 214 | } 215 | Some(Instruction::Wfi) => {} 216 | Some(decoded) => { 217 | println!("Unrecognized instruction! {:?} @ pc={:#x}", decoded, pc); 218 | forward_exception(&mut state, cause, pc); 219 | advance_pc = false; 220 | } 221 | None => { 222 | println!("Unrecognized instruction {:#x} @ pc={:#x}", instruction, pc); 223 | forward_exception(&mut state, cause, pc); 224 | advance_pc = false; 225 | } 226 | } 227 | 228 | if advance_pc { 229 | riscv::set_sepc(pc + len); 230 | } 231 | maybe_forward_interrupt(&mut state, csrr!(sepc)); 232 | } else if cause == SCAUSE_ENV_CALL && state.smode { 233 | match state.saved_registers.get(17) { 234 | 0 => { 235 | state.csrs.sip.set(IP_STIP, false); 236 | state.csrs.mtimecmp = state.saved_registers.get(10); 237 | riscv::sbi::set_timer(state.csrs.mtimecmp); 238 | } 239 | 1 => { 240 | let value = state.saved_registers.get(10) as u8; 241 | state.uart.output_byte(value) 242 | } 243 | 5 => riscv::fence_i(), 244 | 6 | 7 => { 245 | // Current versions of the Linux kernel pass wrong arguments to these SBI calls. As 246 | // a result, this function ignores the arguments and just does a global fence. This 247 | // will eventually be fixed by https://patchwork.kernel.org/patch/10872353. 248 | pmap::flush_shadow_page_table(&mut state.shadow_page_tables); 249 | } 250 | 8 => { 251 | if let Some(ref mut finisher) = state.test_finisher { 252 | finisher.pass(); 253 | } 254 | loop {} 255 | } 256 | i => { 257 | println!("Got ecall from guest function={}!", i); 258 | loop {} 259 | } 260 | } 261 | riscv::set_sepc(csrr!(sepc) + 4); 262 | } else { 263 | if cause != SCAUSE_ENV_CALL { // no need to print anything for guest syscalls... 264 | println!("Forward exception (cause = {}, smode={})!", cause, state.smode); 265 | } 266 | forward_exception(&mut state, cause, csrr!(sepc)); 267 | } 268 | 269 | state.shadow_page_tables.install_root(state.shadow()); 270 | } 271 | 272 | fn handle_interrupt(state: &mut Context, cause: u64) { 273 | let interrupt = cause & 0xff; 274 | match interrupt { 275 | 0x1 => { 276 | // Software interrupt 277 | unreachable!(); 278 | } 279 | 0x5 => { 280 | // Timer interrupt 281 | let time = state.host_clint.get_mtime(); 282 | let mut next = time + 1_000_000; 283 | 284 | crate::context::Uart::timer(state, time); 285 | if state.csrs.mtimecmp <= time { 286 | state.csrs.sip |= IP_STIP; 287 | state.no_interrupt = false; 288 | } else { 289 | next = next.min(state.csrs.mtimecmp); 290 | } 291 | 292 | if state.uart.next_interrupt_time > time { 293 | next = next.min(state.uart.next_interrupt_time); 294 | } 295 | riscv::sbi::set_timer(next); 296 | } 297 | 0x9 => { 298 | // External 299 | let host_irq = state.host_plic.claim_and_clear(); 300 | let guest_irq = state.irq_map[host_irq as usize]; 301 | match guest_irq { 302 | IrqMapping::Virtio { device_index, guest_irq } => { 303 | let forward = match state.virtio.devices[device_index as usize] { 304 | virtio::Device::Passthrough { .. } => true, 305 | virtio::Device::Unmapped => false, 306 | virtio::Device::Macb(ref mut macb) => macb.interrupt(&mut state.guest_memory), 307 | }; 308 | 309 | if forward { 310 | state.plic.set_pending(guest_irq as u32, true); 311 | 312 | // Guest might have masked out this interrupt 313 | if state.plic.interrupt_pending() { 314 | state.no_interrupt = false; 315 | state.csrs.sip |= IP_SEIP; 316 | } else { 317 | assert_eq!(state.csrs.sip & IP_SEIP, 0); 318 | } 319 | } 320 | } 321 | IrqMapping::Ignored => {} 322 | } 323 | 324 | } 325 | i => { 326 | println!("Got interrupt #{}", i); 327 | unreachable!() 328 | } 329 | } 330 | } 331 | 332 | fn maybe_forward_interrupt(state: &mut Context, sepc: u64) { 333 | if state.no_interrupt { 334 | return; 335 | } 336 | 337 | if !state.csrs.sip.get(IP_SEIP) && state.plic.interrupt_pending() { 338 | state.csrs.sip.set(IP_SEIP, true); 339 | } 340 | 341 | if (!state.smode || state.csrs.sstatus.get(STATUS_SIE)) && (state.csrs.sie & state.csrs.sip != 0) { 342 | let cause = if state.csrs.sip.get(IP_SEIP) { 343 | 9 344 | } else if state.csrs.sip.get(IP_STIP) { 345 | 5 346 | } else if state.csrs.sip.get(IP_SSIP) { 347 | 1 348 | } else { 349 | unreachable!() 350 | }; 351 | 352 | // println!("||> Forwarding timer interrupt! (state.smode={}, sepc={:#x})", state.smode, sepc); 353 | // forward interrupt 354 | state.csrs.push_sie(); 355 | state.csrs.sepc = sepc; 356 | state.csrs.scause = (1 << 63) | cause; 357 | state.csrs.sstatus.set(STATUS_SPP, state.smode); 358 | state.csrs.stval = 0; 359 | state.smode = true; 360 | 361 | match state.csrs.stvec & TVEC_MODE { 362 | 0 => riscv::set_sepc(state.csrs.stvec & TVEC_BASE), 363 | 1 => riscv::set_sepc((state.csrs.stvec & TVEC_BASE) + 4 * cause), 364 | _ => unreachable!(), 365 | } 366 | } else { 367 | state.no_interrupt = true; 368 | } 369 | } 370 | 371 | fn forward_exception(state: &mut Context, cause: u64, sepc: u64) { 372 | // println!("||> Forward exception sepc={:#x}", sepc); 373 | state.csrs.push_sie(); 374 | state.csrs.sepc = sepc; 375 | state.csrs.scause = cause; 376 | state.csrs.sstatus.set(STATUS_SPP, state.smode); 377 | state.csrs.stval = csrr!(stval); 378 | state.smode = true; 379 | riscv::set_sepc(state.csrs.stvec & TVEC_BASE); 380 | } 381 | 382 | pub unsafe fn load_instruction_at_address(_state: &mut Context, guest_va: u64) -> (u32, u64) { 383 | let pc_ptr = guest_va as *const u16; 384 | sum::access_user_memory(||{ 385 | let il: u16 = *pc_ptr; 386 | match riscv_decode::instruction_length(il) { 387 | 2 => (il as u32, 2), 388 | 4 => (il as u32 | ((*pc_ptr.offset(1) as u32) << 16), 4), 389 | _ => unreachable!(), 390 | } 391 | }) 392 | } 393 | -------------------------------------------------------------------------------- /src/fdt.rs: -------------------------------------------------------------------------------- 1 | use arrayvec::{ArrayString, ArrayVec}; 2 | use byteorder::{BigEndian, ByteOrder}; 3 | use core::slice; 4 | 5 | const FDT_BEGIN_NODE: u32 = 0x01; 6 | const FDT_END_NODE: u32 = 0x02; 7 | const FDT_PROP: u32 = 0x03; 8 | const FDT_NOP: u32 = 0x04; 9 | const FDT_END: u32 = 0x09; 10 | 11 | #[derive(Default)] 12 | struct AddressMap(ArrayVec<[u64; Self::MAX_LEN]>); 13 | impl AddressMap { 14 | const MAX_LEN: usize = 16; 15 | fn index_of(&mut self, value: u64) -> usize { 16 | for i in 0..self.0.len() { 17 | if value == self.0[i] { 18 | return i; 19 | } 20 | } 21 | 22 | self.0.push(value); 23 | self.0.len() - 1 24 | } 25 | } 26 | 27 | 28 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 29 | pub enum UartType { 30 | Ns16550a, 31 | SiFive, 32 | } 33 | 34 | #[derive(Clone, Debug)] 35 | pub struct Device { 36 | pub base_address: u64, 37 | pub size: u64, 38 | pub irq: u64, 39 | } 40 | 41 | #[derive(Clone, Debug)] 42 | pub struct Hart { 43 | pub hartid: u64, 44 | pub plic_context: u64, 45 | } 46 | 47 | #[derive(Clone, Debug, Default)] 48 | pub struct MachineMeta { 49 | pub physical_memory_offset: u64, 50 | pub physical_memory_size: u64, 51 | 52 | pub harts: ArrayVec<[Hart; 16]>, 53 | 54 | pub uart_type: Option, 55 | pub uart_address: u64, 56 | 57 | pub plic_address: u64, 58 | pub clint_address: Option, 59 | 60 | pub test_finisher_address: Option, 61 | 62 | pub virtio: ArrayVec<[Device; 16]>, 63 | 64 | pub bootargs: ArrayString<[u8; 256]>, 65 | 66 | pub initrd_start: u64, 67 | pub initrd_end: u64, 68 | } 69 | 70 | #[repr(C)] 71 | struct FdtHeader { 72 | magic: u32, 73 | total_size: u32, 74 | off_dt_struct: u32, 75 | off_dt_strings: u32, 76 | off_mem_rsvmap: u32, 77 | version: u32, 78 | last_comp_version: u32, 79 | boot_cpuid_phys: u32, 80 | size_dt_strings: u32, 81 | size_dt_struct: u32, 82 | } 83 | 84 | pub struct Fdt<'a>{ 85 | header: &'a mut FdtHeader, 86 | strings: &'a [u8], 87 | nodes: &'a mut [u8], 88 | } 89 | 90 | #[allow(unused)] 91 | impl<'a> Fdt<'a> { 92 | pub unsafe fn new(addr: u64) -> Self { 93 | let header = &mut *(addr as *mut FdtHeader); 94 | let total_size = header.total_size.swap_bytes() as usize; 95 | 96 | let off_dt_strings = header.off_dt_strings.swap_bytes() as u64; 97 | let size_dt_strings = header.size_dt_strings.swap_bytes() as usize; 98 | assert!(off_dt_strings as usize + size_dt_strings <= total_size); 99 | 100 | let off_dt_struct = header.off_dt_struct.swap_bytes() as u64; 101 | let size_dt_struct = header.size_dt_struct.swap_bytes() as usize; 102 | assert!(off_dt_struct as usize + size_dt_struct <= total_size); 103 | 104 | let strings = slice::from_raw_parts_mut((addr + off_dt_strings) as *mut u8, size_dt_strings); 105 | let nodes = slice::from_raw_parts_mut((addr + off_dt_struct) as *mut u8, size_dt_struct); 106 | 107 | Self { 108 | header, 109 | strings, 110 | nodes, 111 | } 112 | } 113 | 114 | pub fn magic_valid(&self) -> bool { 115 | self.header.magic == 0xedfe0dd0 116 | } 117 | pub fn total_size(&self) -> u32 { self.header.total_size.swap_bytes() } 118 | pub fn off_dt_struct(&self) -> u32 { self.header.off_dt_struct.swap_bytes() } 119 | pub fn off_dt_strings(&self) -> u32 { self.header.off_dt_strings.swap_bytes() } 120 | pub fn off_mem_rsvmap(&self) -> u32 { self.header.off_mem_rsvmap.swap_bytes() } 121 | pub fn version(&self) -> u32 { self.header.version.swap_bytes() } 122 | pub fn last_comp_version(&self) -> u32 { self.header.last_comp_version.swap_bytes() } 123 | pub fn boot_cpuid_phys(&self) -> u32 { self.header.boot_cpuid_phys.swap_bytes() } 124 | pub fn size_dt_strings(&self) -> u32 { self.header.size_dt_strings.swap_bytes() } 125 | pub fn size_dt_struct(&self) -> u32 { self.header.size_dt_struct.swap_bytes() } 126 | 127 | pub fn get_string(strings: &[u8], offset: usize) -> &str { 128 | let mut end = offset; 129 | while end < strings.len() && strings[end] != 0 { 130 | end += 1; 131 | } 132 | 133 | core::str::from_utf8(&strings[offset..end]).expect("FDT contained invalid string") 134 | } 135 | 136 | pub fn print(&mut self) { 137 | self.walk(|path, unit_addresses, v| match v { 138 | FdtVisit::Property { name, prop } => { 139 | if path != "/" { 140 | let mut depth = 0; 141 | for ch in path.chars() { 142 | if ch == '/' { 143 | if let Some(a) = unit_addresses[depth] { 144 | print!("@{:x}", unit_addresses[depth].unwrap()); 145 | } 146 | depth += 1; 147 | } 148 | print!("{}", ch) 149 | } 150 | if let Some(unit_address) = unit_addresses[depth] { 151 | print!("@{:x}", unit_address) 152 | } 153 | print!(":{}", name); 154 | } else { 155 | print!("{}", name); 156 | } 157 | 158 | if prop.len() == 4 || prop.len() == 8 { 159 | println!("={:#x}", prop.read_int()); 160 | } else if prop.len() == 16 { 161 | let range = prop.read_range(); 162 | println!("={:x}:{:x}", range.0, range.1); 163 | } else if prop.len() != 0 { 164 | if let Some(value) = prop.value_str() { 165 | println!("=\"{}\"", value); 166 | } else { 167 | println!(" (value_len={})", prop.len()); 168 | } 169 | } else { 170 | println!(""); 171 | } 172 | } 173 | FdtVisit::Node { .. } => {} 174 | }); 175 | } 176 | 177 | pub fn parse(&mut self) -> MachineMeta { 178 | let mut initrd_start: Option = None; 179 | let mut initrd_end: Option = None; 180 | let mut plic: Option = None; 181 | 182 | let mut meta = MachineMeta::default(); 183 | 184 | let mut virtio_address_map = AddressMap::default(); 185 | let mut virtio = [(None, None); AddressMap::MAX_LEN]; 186 | 187 | // (hartid, phandle) 188 | let mut cpus = [(None, None); AddressMap::MAX_LEN]; 189 | let mut cpu_address_map = AddressMap::default(); 190 | 191 | // hart phandle for each plic S-mode context 192 | let mut plic_context_phandles = [None; 64]; 193 | 194 | self.walk(|path, unit_addresses, v| { 195 | match v { 196 | FdtVisit::Property { name, prop } => match (path, name) { 197 | ("/chosen", "linux,initrd-end") => initrd_end = Some(prop.read_int()), 198 | ("/chosen", "linux,initrd-start") => initrd_start = Some(prop.read_int()), 199 | ("/chosen", "bootargs") => { 200 | meta.bootargs.push_str(prop.value_str() 201 | .expect("Unable to parse bootargs string")) 202 | } 203 | ("/memory", "reg") => { 204 | let region = prop.read_range(); 205 | meta.physical_memory_offset = region.0; 206 | meta.physical_memory_size = region.1; 207 | } 208 | ("/uart", "reg") | 209 | ("/soc/uart", "reg") | 210 | ("/soc/serial", "reg") => if meta.uart_address == 0 { 211 | meta.uart_address = prop.read_range().0 212 | } 213 | ("/uart", "compatible") | 214 | ("/soc/uart", "compatible") | 215 | ("/soc/serial", "compatible") => if meta.uart_type.is_none() { 216 | match prop.value_str().map(|s| s.trim_end_matches('\0')) { 217 | Some("ns16550a") => meta.uart_type = Some(UartType::Ns16550a), 218 | Some("sifive,uart0") => meta.uart_type = Some(UartType::SiFive), 219 | _ => {}, 220 | } 221 | } 222 | ("/soc/clint", "reg") => meta.clint_address = Some(prop.read_range().0), 223 | ("/test", "reg") => meta.test_finisher_address = Some(prop.read_range().0), 224 | ("/soc/interrupt-controller", "reg") => plic = Some(prop.read_range().0), 225 | ("/soc/interrupt-controller", "interrupts-extended") => { 226 | let cells = prop.cells(); 227 | for i in (0..cells).step_by(2) { 228 | let irq = prop.read_cell(i + 1); 229 | if irq == 9 { 230 | plic_context_phandles[i/2] = Some(prop.read_cell(i)); 231 | } 232 | } 233 | } 234 | ("/virtio_mmio", "reg") => { 235 | let index = virtio_address_map.index_of(unit_addresses[1].unwrap_or(0)); 236 | virtio[index].0 = Some(prop.read_range()); 237 | } 238 | ("/virtio_mmio", "interrupts") => { 239 | let index = virtio_address_map.index_of(unit_addresses[1].unwrap_or(0)); 240 | virtio[index].1 = Some(prop.read_int()); 241 | } 242 | ("/cpus/cpu", "reg") => { 243 | let index = virtio_address_map.index_of(unit_addresses[2].unwrap_or(0)); 244 | cpus[index].0 = Some(prop.read_int()); 245 | } 246 | ("/cpus/cpu/interrupt-controller", "phandle") => { 247 | let index = virtio_address_map.index_of(unit_addresses[2].unwrap_or(0)); 248 | cpus[index].1 = Some(prop.read_int()); 249 | } 250 | _ => {}, 251 | } 252 | FdtVisit::Node { .. } => {} 253 | } 254 | }); 255 | 256 | if let (Some(start), Some(end)) = (initrd_start, initrd_end) { 257 | meta.initrd_start = start; 258 | meta.initrd_end = end; 259 | } 260 | 261 | meta.plic_address = plic.expect("PLIC address not specified"); 262 | 263 | for &c in cpus.iter() { 264 | if let (Some(hartid), Some(phandle)) = c { 265 | if let Some(plic_context) = plic_context_phandles.iter().position(|&p| p == Some(phandle as u32)) { 266 | meta.harts.push(Hart { 267 | hartid, 268 | plic_context: plic_context as u64, 269 | }) 270 | } 271 | } 272 | } 273 | meta.harts.sort_unstable_by_key(|h|h.hartid); 274 | 275 | for &v in virtio.iter().rev() { 276 | if let (Some((base_address, size)), Some(irq)) = v { 277 | meta.virtio.push(Device { 278 | base_address, 279 | size, 280 | irq 281 | }) 282 | } 283 | } 284 | meta.virtio.sort_unstable_by_key(|v| v.base_address); 285 | 286 | meta 287 | } 288 | 289 | pub fn initialize_guest(&mut self, guest_memory_size: u64, bootargs: &str) { 290 | self.walk(|path, unit_addresses, v| match v { 291 | FdtVisit::Property { name, prop } => match (path, name) { 292 | ("/chosen", "bootargs") => { 293 | let s = prop.value_slice(); 294 | assert!(s.len() >= bootargs.len()); 295 | 296 | for i in 0..bootargs.len() { 297 | s[i] = bootargs.as_bytes()[i]; 298 | } 299 | } 300 | ("/memory", "reg") => { 301 | let region = prop.read_range(); 302 | let mut new_region = [0; 16]; 303 | BigEndian::write_u64(&mut new_region, region.0); 304 | BigEndian::write_u64(&mut new_region[8..], guest_memory_size); 305 | prop.set(&new_region); 306 | } 307 | _ => {}, 308 | } 309 | FdtVisit::Node { .. } => {} 310 | }); 311 | } 312 | 313 | // Mask out entries from FDT and return some information about the machine. 314 | fn walk(&mut self, mut visit: F) where 315 | F: FnMut(&str, &[Option], FdtVisit), 316 | { 317 | let mut mask_node = 0; 318 | 319 | let mut path = ArrayString::<[_; 1024]>::new(); 320 | let mut unit_addresses = ArrayVec::<[Option; 32]>::new(); 321 | 322 | let mut i = 0; 323 | while i < self.nodes.len() { 324 | let old_i = i; 325 | assert_eq!(i % 4, 0); 326 | match BigEndian::read_u32(&self.nodes[i..]) { 327 | FDT_END => { 328 | break; 329 | } 330 | FDT_BEGIN_NODE => { 331 | i += 4; 332 | 333 | // Root node is weird: name will be empty so its children should not prepend 334 | // another slash. 335 | if path.len() != 1 { 336 | path.push('/'); 337 | } 338 | 339 | let mut full_name = ArrayString::<[_;48]>::new(); 340 | while self.nodes[i] != 0 { 341 | full_name.push(self.nodes[i] as char); 342 | i += 1; 343 | } 344 | i = round4(i); 345 | 346 | let mut name_parts = full_name.split('@'); 347 | path.push_str(name_parts.next().unwrap_or("")); 348 | unit_addresses.push(name_parts.next().and_then(|a| u64::from_str_radix(a, 16).ok())); 349 | 350 | if mask_node > 0 { 351 | mask_node += 1; 352 | } else { 353 | let mut mask = false; 354 | visit(&path, &unit_addresses, FdtVisit::Node { mask: &mut mask }); 355 | if mask { 356 | mask_node = 1; 357 | } 358 | } 359 | } 360 | FDT_END_NODE => { 361 | if mask_node > 0 { 362 | BigEndian::write_u32(&mut self.nodes[i..], FDT_NOP); 363 | mask_node = mask_node - 1; 364 | } 365 | 366 | let mut index = path.rfind('/').unwrap(); 367 | if index == 0 && path.len() > 1 { 368 | index = 1; 369 | } 370 | path.truncate(index); 371 | unit_addresses.pop(); 372 | i += 4; 373 | } 374 | FDT_PROP => { 375 | let mut prop = Property::from_slice(&mut self.nodes[i..]).0; 376 | let prop_name = Self::get_string(self.strings, prop.name_offset()); 377 | i += 12 + round4(prop.len()); 378 | visit(&path, &unit_addresses, FdtVisit::Property{ name: prop_name, prop: &mut prop }); 379 | } 380 | FDT_NOP | _ => { 381 | i += 4; 382 | } 383 | } 384 | 385 | if mask_node > 0 { 386 | for j in (old_i..i).step_by(4) { 387 | BigEndian::write_u32(&mut self.nodes[j..], FDT_NOP); 388 | } 389 | } 390 | } 391 | } 392 | } 393 | 394 | #[repr(C)] 395 | #[derive(Clone)] 396 | pub struct MemoryRegion([u8; 16]); 397 | impl MemoryRegion { 398 | pub fn offset(&self) -> u64 { 399 | BigEndian::read_u64(&self.0) 400 | } 401 | pub fn size(&self) -> u64 { 402 | BigEndian::read_u64(&self.0[8..]) 403 | } 404 | pub fn set_size(&mut self, size: u64) { 405 | BigEndian::write_u64(&mut self.0[8..], size) 406 | } 407 | } 408 | 409 | #[repr(C)] 410 | pub struct Property<'a>(&'a mut [u8]); 411 | impl<'a> Property<'a> { 412 | pub fn from_slice(s: &'a mut [u8]) -> (Self, &mut [u8]) { 413 | assert_eq!(BigEndian::read_u32(s), FDT_PROP); 414 | 415 | let len = 12 + round4(BigEndian::read_u32(&s[4..]) as usize); 416 | let split = s.split_at_mut(len as usize); 417 | 418 | (Self(split.0), split.1) 419 | } 420 | 421 | pub fn len(&self) -> usize { 422 | BigEndian::read_u32(&self.0[4..][..4]) as usize 423 | } 424 | pub fn name_offset(&self) -> usize { 425 | BigEndian::read_u32(&self.0[8..][..4]) as usize 426 | } 427 | 428 | pub fn read_int(&self) -> u64 { 429 | match self.len() { 430 | 4 => BigEndian::read_u32(&self.0[12..][..4]) as u64, 431 | 8 => BigEndian::read_u64(&self.0[12..][..8]), 432 | _ => unreachable!(), 433 | } 434 | } 435 | pub fn read_range(&self) -> (u64, u64) { 436 | assert_eq!(self.len(), 16); 437 | 438 | (BigEndian::read_u64(&self.0[12..20]), BigEndian::read_u64(&self.0[20..28])) 439 | } 440 | pub fn mask(&mut self) { 441 | for i in (0..self.0.len()).step_by(4) { 442 | BigEndian::write_u32(&mut self.0[i..], FDT_NOP); 443 | } 444 | } 445 | pub fn value_str(&mut self) -> Option<&str> { 446 | if self.len() == 0 { return Some(""); } 447 | 448 | for i in 0..(self.len() - 1) { 449 | let c = self.0[12 + i]; 450 | if c < 32 || c > 126 { 451 | return None; 452 | } 453 | } 454 | Some(core::str::from_utf8(&self.0[12..][..(self.len() - 1)]).unwrap()) 455 | } 456 | pub fn value_slice(&mut self) -> &mut [u8] { 457 | &mut self.0[12..] 458 | } 459 | 460 | pub fn cells(&self) -> usize { 461 | self.len() / 4 462 | } 463 | pub fn read_cell(&self, i: usize) -> u32 { 464 | BigEndian::read_u32(&self.0[(12 + 4*i)..]) 465 | } 466 | 467 | pub fn set(&mut self, value: &[u8]) { 468 | assert_eq!(value.len(), self.len()); 469 | self.0[12..].copy_from_slice(value); 470 | } 471 | } 472 | 473 | enum FdtVisit<'a> { 474 | Node { #[allow(unused)] mask: &'a mut bool }, 475 | Property { 476 | name: &'a str, 477 | prop: &'a mut Property<'a>, 478 | } 479 | } 480 | 481 | /// Round up to the next multiple of 4 482 | const fn round4(i: usize) -> usize { 483 | 4 * ((i + 3) / 4) 484 | } 485 | --------------------------------------------------------------------------------