├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── README.md ├── examples ├── hello_world.rs └── io_test.rs ├── rust-toolchain ├── setup-hugetlbfs.sh └── src ├── cmd.rs ├── lib.rs ├── memory.rs ├── nvme.rs ├── pci.rs └── queues.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: self-hosted 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | 4 | # rustfmt 5 | **/*.rs.bk 6 | 7 | # Jetbrains 8 | .idea/ 9 | *.iml 10 | 11 | # misc 12 | .DS_Store 13 | # Shakespeare 14 | *.txt 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "vroom" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | libc = "0.2" 10 | byteorder = "1" 11 | lazy_static = "1.4.0" 12 | rand = "0.8.5" 13 | 14 | [profile.release] 15 | debug = true 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vroom 2 | vroom is a userspace NVMe driver written in Rust. It aims to be as fast as the SPDK NVMe driver, while minimizing unsafe code and offering a simplified API. vroom currently serves as a proof of concept. 3 | 4 | [My thesis](https://db.in.tum.de/people/sites/ellmann/theses/finished/24/pirhonen_writing_an_nvme_driver_in_rust.pdf) contains some details about the implementation. 5 | 6 | # Build instructions 7 | You will need Rust, as well as its package manager `cargo` which you can install with: 8 | ```bash 9 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 10 | ``` 11 | 12 | Huge pages need to be enabled: 13 | ```bash 14 | cd vroom 15 | sudo ./setup-hugetlbfs.sh 16 | ``` 17 | 18 | To build the driver, as well as any examples run: 19 | ```bash 20 | cargo build --release --all-targets 21 | ``` 22 | 23 | e.g. to run the hello world example (root rights are needed for DMA): 24 | ``` 25 | sudo ./target/release/examples/hello_world 0000:00:07.0 26 | ``` 27 | 28 | # Disclaimer 29 | This is by no means production-ready. Do not use it in critical environments. DMA may corrupt memory. 30 | 31 | # Related projects 32 | - [Redox's NVMe driver](https://gitlab.redox-os.org/redox-os/drivers/-/tree/master/storage/nvmed) 33 | -------------------------------------------------------------------------------- /examples/hello_world.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::{env, process}; 3 | 4 | pub fn main() -> Result<(), Box> { 5 | let mut args = env::args(); 6 | args.next(); 7 | 8 | let pci_addr = match args.next() { 9 | Some(arg) => arg, 10 | None => { 11 | eprintln!("Usage: cargo run --example hello_world "); 12 | process::exit(1); 13 | } 14 | }; 15 | 16 | let mut nvme = vroom::init(&pci_addr)?; 17 | nvme.write_copied("hello world".as_bytes(), 0)?; 18 | 19 | let mut dest = [0u8; 12]; 20 | nvme.read_copied(&mut dest, 0)?; 21 | 22 | println!("{}", std::str::from_utf8(&dest)?); 23 | 24 | Ok(()) 25 | } 26 | -------------------------------------------------------------------------------- /examples/io_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{thread_rng, Rng}; 2 | use std::error::Error; 3 | use std::sync::{Arc, Mutex}; 4 | use std::time::{Duration, Instant}; 5 | use std::{env, process, thread}; 6 | use vroom::memory::*; 7 | use vroom::{NvmeDevice, QUEUE_LENGTH}; 8 | 9 | #[allow(unused_variables, unused_mut)] 10 | pub fn main() -> Result<(), Box> { 11 | let mut args = env::args(); 12 | args.next(); 13 | 14 | let pci_addr = match args.next() { 15 | Some(arg) => arg, 16 | None => { 17 | eprintln!("Usage: cargo run --example init "); 18 | process::exit(1); 19 | } 20 | }; 21 | 22 | let duration = match args.next() { 23 | Some(secs) => Some(Duration::from_secs(secs.parse().expect( 24 | "Usage: cargo run --example init ", 25 | ))), 26 | None => None, 27 | }; 28 | 29 | let mut nvme = vroom::init(&pci_addr)?; 30 | 31 | let nvme = qd_n(nvme, 1, 0, false, 128, duration)?; 32 | let _ = qd_n(nvme, 1, 0, false, 256, duration)?; 33 | 34 | // let _ = qd1(nvme, 0, false, true, duration)?; 35 | 36 | Ok(()) 37 | } 38 | 39 | fn qd1( 40 | mut nvme: NvmeDevice, 41 | n: u64, 42 | write: bool, 43 | random: bool, 44 | time: Option, 45 | ) -> Result> { 46 | let mut buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE)?; 47 | 48 | let blocks = 8; 49 | let bytes = 512 * blocks; 50 | let ns_blocks = nvme.namespaces.get(&1).unwrap().blocks / blocks - 1; // - blocks - 1; 51 | 52 | let mut rng = thread_rng(); 53 | let seq = if random { 54 | (0..n) 55 | .map(|_| rng.gen_range(0..ns_blocks as u64)) 56 | .collect::>() 57 | } else { 58 | (0..n).map(|i| (i * 8) % ns_blocks).collect::>() 59 | }; 60 | 61 | let rand_block = &(0..bytes).map(|_| rand::random::()).collect::>()[..]; 62 | buffer[..rand_block.len()].copy_from_slice(rand_block); 63 | 64 | let mut total = Duration::ZERO; 65 | 66 | if let Some(time) = time { 67 | let mut ios = 0; 68 | let lba = 0; 69 | while total < time { 70 | let lba = if random { rng.gen_range(0..ns_blocks) } else { (lba + 1) % ns_blocks }; 71 | 72 | let before = Instant::now(); 73 | if write { 74 | nvme.write(&buffer.slice(0..bytes as usize), lba * blocks)?; 75 | } else { 76 | nvme.read(&buffer.slice(0..bytes as usize), lba * blocks)?; 77 | } 78 | let elapsed = before.elapsed(); 79 | total += elapsed; 80 | ios += 1; 81 | } 82 | println!( 83 | "IOP: {ios}, total {} iops: {:?}", 84 | if write { "write" } else { "read" }, 85 | ios as f64 / total.as_secs_f64() 86 | ); 87 | } else { 88 | for lba in seq { 89 | let before = Instant::now(); 90 | if write { 91 | nvme.write(&buffer.slice(0..bytes as usize), lba * blocks)?; 92 | } else { 93 | nvme.read(&buffer.slice(0..bytes as usize), lba * blocks)?; 94 | } 95 | total += before.elapsed(); 96 | } 97 | println!( 98 | "n: {n}, total {} iops: {:?}", 99 | if write { "write" } else { "read" }, 100 | n as f64 / total.as_secs_f64() 101 | ); 102 | } 103 | Ok(nvme) 104 | } 105 | 106 | #[allow(unused)] 107 | fn qd_n( 108 | nvme: NvmeDevice, 109 | n_threads: u64, 110 | n: u64, 111 | write: bool, 112 | batch_size: usize, 113 | time: Option, 114 | ) -> Result> { 115 | let blocks = 8; 116 | let ns_blocks = nvme.namespaces.get(&1).unwrap().blocks / blocks; 117 | 118 | let nvme = Arc::new(Mutex::new(nvme)); 119 | let mut threads = Vec::new(); 120 | 121 | for i in 0..n_threads { 122 | let nvme = Arc::clone(&nvme); 123 | let range = (0, ns_blocks); 124 | 125 | let handle = thread::spawn(move || -> (u64, f64) { 126 | let mut rng = rand::thread_rng(); 127 | let bytes = 512 * blocks as usize; 128 | let mut total = std::time::Duration::ZERO; 129 | let mut buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE).unwrap(); 130 | 131 | let mut qpair = nvme 132 | .lock() 133 | .unwrap() 134 | .create_io_queue_pair(QUEUE_LENGTH) 135 | .unwrap(); 136 | 137 | let rand_block = &(0..(32 * bytes)) 138 | .map(|_| rand::random::()) 139 | .collect::>()[..]; 140 | buffer[0..32 * bytes].copy_from_slice(rand_block); 141 | 142 | let mut ctr = 0; 143 | if let Some(time) = time { 144 | let mut ios = 0; 145 | while total < time { 146 | let lba = rng.gen_range(range.0..range.1); 147 | let before = Instant::now(); 148 | while let Some(_) = qpair.quick_poll() { 149 | ctr -= 1; 150 | ios += 1; 151 | } 152 | if ctr == batch_size { 153 | qpair.complete_io(1); 154 | ctr -= 1; 155 | ios += 1; 156 | } 157 | qpair.submit_io( 158 | &buffer.slice((ctr * bytes)..(ctr + 1) * bytes), 159 | lba * blocks, 160 | write, 161 | ); 162 | total += before.elapsed(); 163 | ctr += 1; 164 | } 165 | 166 | if ctr != 0 { 167 | let before = Instant::now(); 168 | qpair.complete_io(ctr); 169 | total += before.elapsed(); 170 | } 171 | ios += ctr as u64; 172 | assert!(qpair.sub_queue.is_empty()); 173 | nvme.lock().unwrap().delete_io_queue_pair(qpair).unwrap(); 174 | 175 | (ios, ios as f64 / total.as_secs_f64()) 176 | } else { 177 | let seq = &(0..n) 178 | .map(|_| rng.gen_range(range.0..range.1)) 179 | .collect::>()[..]; 180 | for &lba in seq { 181 | let before = Instant::now(); 182 | while let Some(_) = qpair.quick_poll() { 183 | ctr -= 1; 184 | } 185 | if ctr == 32 { 186 | qpair.complete_io(1); 187 | ctr -= 1; 188 | } 189 | qpair.submit_io( 190 | &buffer.slice((ctr * bytes)..(ctr + 1) * bytes), 191 | lba * blocks, 192 | write, 193 | ); 194 | total += before.elapsed(); 195 | ctr += 1; 196 | } 197 | if ctr != 0 { 198 | let before = Instant::now(); 199 | qpair.complete_io(ctr); 200 | total += before.elapsed(); 201 | } 202 | assert!(qpair.sub_queue.is_empty()); 203 | nvme.lock().unwrap().delete_io_queue_pair(qpair).unwrap(); 204 | (n, n as f64 / total.as_secs_f64()) 205 | } 206 | 207 | }); 208 | threads.push(handle); 209 | } 210 | 211 | let total = threads 212 | .into_iter() 213 | .fold((0, 0.), |acc, thread| { 214 | let res = thread 215 | .join() 216 | .expect("The thread creation or execution failed!"); 217 | ( 218 | acc.0 + res.0, 219 | acc.1 + res.1, 220 | ) 221 | }); 222 | println!( 223 | "n: {}, total {} iops: {:?}", 224 | total.0, 225 | if write { "write" } else { "read" }, 226 | total.1 227 | ); 228 | 229 | match Arc::try_unwrap(nvme) { 230 | Ok(mutex) => match mutex.into_inner() { 231 | Ok(t) => Ok(t), 232 | Err(e) => Err(e.into()), 233 | }, 234 | Err(_) => Err("Arc::try_unwrap failed, not the last reference.".into()), 235 | } 236 | } 237 | 238 | fn fill_ns(nvme: &mut NvmeDevice) { 239 | let buffer: Dma = Dma::allocate(HUGE_PAGE_SIZE).unwrap(); 240 | let max_lba = nvme.namespaces.get(&1).unwrap().blocks - buffer.size as u64 / 512 - 1; 241 | let blocks = buffer.size as u64 / 512; 242 | let mut lba = 0; 243 | while lba < max_lba - 512 { 244 | nvme.write(&buffer, lba).unwrap(); 245 | lba += blocks; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly 2 | -------------------------------------------------------------------------------- /setup-hugetlbfs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p /mnt/huge 3 | (mount | grep /mnt/huge) > /dev/null || mount -t hugetlbfs hugetlbfs /mnt/huge 4 | for i in {0..7} 5 | do 6 | if [[ -e "/sys/devices/system/node/node$i" ]] 7 | then 8 | echo 512 > /sys/devices/system/node/node$i/hugepages/hugepages-2048kB/nr_hugepages 9 | fi 10 | done 11 | -------------------------------------------------------------------------------- /src/cmd.rs: -------------------------------------------------------------------------------- 1 | /// NVMe Spec 4.2 2 | /// Submission queue entry 3 | #[derive(Clone, Copy, Debug, Default)] 4 | #[repr(C, packed)] 5 | pub struct NvmeCommand { 6 | /// Opcode 7 | pub opcode: u8, 8 | /// Flags; FUSE (2 bits) | Reserved (4 bits) | PSDT (2 bits) 9 | pub flags: u8, 10 | /// Command ID 11 | pub c_id: u16, 12 | /// Namespace ID 13 | pub ns_id: u32, 14 | /// Reserved 15 | pub _rsvd: u64, 16 | /// Metadata pointer 17 | pub md_ptr: u64, 18 | /// Data pointer 19 | pub d_ptr: [u64; 2], 20 | /// Command dword 10 21 | pub cdw10: u32, 22 | /// Command dword 11 23 | pub cdw11: u32, 24 | /// Command dword 12 25 | pub cdw12: u32, 26 | /// Command dword 13 27 | pub cdw13: u32, 28 | /// Command dword 14 29 | pub cdw14: u32, 30 | /// Command dword 15 31 | pub cdw15: u32, 32 | } 33 | 34 | impl NvmeCommand { 35 | pub fn create_io_completion_queue(c_id: u16, qid: u16, ptr: usize, size: u16) -> Self { 36 | Self { 37 | opcode: 5, 38 | flags: 0, 39 | c_id, 40 | ns_id: 0, 41 | _rsvd: 0, 42 | md_ptr: 0, 43 | d_ptr: [ptr as u64, 0], 44 | cdw10: ((size as u32) << 16) | (qid as u32), 45 | cdw11: 1, // Physically Contiguous 46 | cdw12: 0, 47 | cdw13: 0, 48 | cdw14: 0, 49 | cdw15: 0, 50 | } 51 | } 52 | 53 | pub fn create_io_submission_queue( 54 | c_id: u16, 55 | q_id: u16, 56 | ptr: usize, 57 | size: u16, 58 | cq_id: u16, 59 | ) -> Self { 60 | Self { 61 | opcode: 1, 62 | flags: 0, 63 | c_id, 64 | ns_id: 0, 65 | _rsvd: 0, 66 | md_ptr: 0, 67 | d_ptr: [ptr as u64, 0], 68 | cdw10: ((size as u32) << 16) | (q_id as u32), 69 | cdw11: ((cq_id as u32) << 16) | 1, /* Physically Contiguous */ 70 | //TODO: QPRIO 71 | cdw12: 0, //TODO: NVMSETID 72 | cdw13: 0, 73 | cdw14: 0, 74 | cdw15: 0, 75 | } 76 | } 77 | 78 | pub fn delete_io_submission_queue(c_id: u16, q_id: u16) -> Self { 79 | Self { 80 | opcode: 0, 81 | c_id, 82 | cdw10: q_id as u32, 83 | ..Default::default() 84 | } 85 | } 86 | 87 | pub fn delete_io_completion_queue(c_id: u16, q_id: u16) -> Self { 88 | Self { 89 | opcode: 4, 90 | c_id, 91 | cdw10: q_id as u32, 92 | ..Default::default() 93 | } 94 | } 95 | 96 | pub fn identify_namespace(c_id: u16, ptr: usize, ns_id: u32) -> Self { 97 | Self { 98 | opcode: 6, 99 | flags: 0, 100 | c_id, 101 | ns_id, 102 | _rsvd: 0, 103 | md_ptr: 0, 104 | d_ptr: [ptr as u64, 0], 105 | cdw10: 0, 106 | cdw11: 0, 107 | cdw12: 0, 108 | cdw13: 0, 109 | cdw14: 0, 110 | cdw15: 0, 111 | } 112 | } 113 | 114 | pub fn identify_controller(c_id: u16, ptr: usize) -> Self { 115 | Self { 116 | opcode: 6, 117 | flags: 0, 118 | c_id, 119 | ns_id: 0, 120 | _rsvd: 0, 121 | md_ptr: 0, 122 | d_ptr: [ptr as u64, 0], 123 | cdw10: 1, 124 | cdw11: 0, 125 | cdw12: 0, 126 | cdw13: 0, 127 | cdw14: 0, 128 | cdw15: 0, 129 | } 130 | } 131 | 132 | pub fn identify_namespace_list(c_id: u16, ptr: usize, base: u32) -> Self { 133 | Self { 134 | opcode: 6, 135 | flags: 0, 136 | c_id, 137 | ns_id: base, 138 | _rsvd: 0, 139 | md_ptr: 0, 140 | d_ptr: [ptr as u64, 0], 141 | cdw10: 2, 142 | cdw11: 0, 143 | cdw12: 0, 144 | cdw13: 0, 145 | cdw14: 0, 146 | cdw15: 0, 147 | } 148 | } 149 | 150 | pub fn get_features(c_id: u16, ptr: usize, fid: u8) -> Self { 151 | Self { 152 | opcode: 0xA, 153 | d_ptr: [ptr as u64, 0], 154 | cdw10: u32::from(fid), // TODO: SEL 155 | ..Default::default() 156 | } 157 | } 158 | 159 | pub fn io_read(c_id: u16, ns_id: u32, lba: u64, blocks_1: u16, ptr0: u64, ptr1: u64) -> Self { 160 | Self { 161 | opcode: 2, 162 | flags: 0, 163 | c_id, 164 | ns_id, 165 | _rsvd: 0, 166 | md_ptr: 0, 167 | d_ptr: [ptr0, ptr1], 168 | cdw10: lba as u32, 169 | cdw11: (lba >> 32) as u32, 170 | cdw12: blocks_1 as u32, 171 | cdw13: 0, 172 | cdw14: 0, 173 | cdw15: 0, 174 | } 175 | } 176 | 177 | pub fn io_write(c_id: u16, ns_id: u32, lba: u64, blocks_1: u16, ptr0: u64, ptr1: u64) -> Self { 178 | Self { 179 | opcode: 1, 180 | flags: 0, 181 | c_id, 182 | ns_id, 183 | _rsvd: 0, 184 | md_ptr: 0, 185 | d_ptr: [ptr0, ptr1], 186 | cdw10: lba as u32, 187 | cdw11: (lba >> 32) as u32, 188 | cdw12: blocks_1 as u32, 189 | cdw13: 0, 190 | cdw14: 0, 191 | cdw15: 0, 192 | } 193 | } 194 | 195 | pub(crate) fn format_nvm(c_id: u16, ns_id: u32) -> Self { 196 | Self { 197 | opcode: 0x80, 198 | flags: 0, 199 | c_id, 200 | ns_id, 201 | _rsvd: 0, 202 | md_ptr: 0, 203 | d_ptr: [0, 0], 204 | cdw10: 1 << 9, 205 | // TODO: dealloc and prinfo bits 206 | cdw11: 0, 207 | cdw12: 0, 208 | cdw13: 0, 209 | cdw14: 0, 210 | cdw15: 0, 211 | } 212 | } 213 | 214 | pub(crate) fn async_event_req(c_id: u16) -> Self { 215 | Self { 216 | opcode: 0xC, 217 | flags: 0, 218 | c_id, 219 | ns_id: 0, 220 | _rsvd: 0, 221 | md_ptr: 0, 222 | d_ptr: [0, 0], 223 | cdw10: 0, 224 | cdw11: 0, 225 | cdw12: 0, 226 | cdw13: 0, 227 | cdw14: 0, 228 | cdw15: 0, 229 | } 230 | } 231 | 232 | pub(crate) fn get_log_page( 233 | c_id: u16, 234 | numd: u32, 235 | ptr0: u64, 236 | ptr1: u64, 237 | lid: u8, 238 | lpid: u16, 239 | ) -> Self { 240 | Self { 241 | c_id, 242 | d_ptr: [ptr0, ptr1], 243 | cdw10: (numd << 16) | lid as u32, 244 | cdw11: ((lpid as u32) << 16) | numd >> 16, 245 | ..Self::default() 246 | } 247 | } 248 | 249 | // not supported by samsung 250 | pub fn write_zeroes(c_id: u16, ns_id: u32, slba: u64, nlb: u16, deac: bool) -> Self { 251 | Self { 252 | opcode: 8, 253 | flags: 0, 254 | c_id, 255 | ns_id, 256 | _rsvd: 0, 257 | md_ptr: 0, 258 | d_ptr: [0, 0], 259 | cdw10: slba as u32, 260 | // TODO: dealloc and prinfo bits 261 | cdw11: (slba >> 32) as u32, 262 | cdw12: ((deac as u32) << 25) | nlb as u32, 263 | cdw13: 0, 264 | cdw14: 0, 265 | cdw15: 0, 266 | } 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(target_arch = "aarch64", feature(stdarch_arm_hints))] 2 | #[allow(unused)] 3 | mod cmd; 4 | #[allow(dead_code)] 5 | pub mod memory; 6 | #[allow(dead_code)] 7 | mod nvme; 8 | #[allow(dead_code)] 9 | mod pci; 10 | #[allow(dead_code)] 11 | mod queues; 12 | 13 | pub use memory::HUGE_PAGE_SIZE; 14 | pub use nvme::{NvmeDevice, NvmeQueuePair}; 15 | use pci::*; 16 | pub use queues::QUEUE_LENGTH; 17 | use std::error::Error; 18 | 19 | pub fn init(pci_addr: &str) -> Result> { 20 | let mut vendor_file = pci_open_resource_ro(pci_addr, "vendor").expect("wrong pci address"); 21 | let mut device_file = pci_open_resource_ro(pci_addr, "device").expect("wrong pci address"); 22 | let mut config_file = pci_open_resource_ro(pci_addr, "config").expect("wrong pci address"); 23 | 24 | let _vendor_id = read_hex(&mut vendor_file)?; 25 | let _device_id = read_hex(&mut device_file)?; 26 | let class_id = read_io32(&mut config_file, 8)? >> 16; 27 | 28 | // 0x01 -> mass storage device class id 29 | // 0x08 -> nvme subclass 30 | if class_id != 0x0108 { 31 | return Err(format!("device {} is not a block device", pci_addr).into()); 32 | } 33 | 34 | let mut nvme = NvmeDevice::init(pci_addr)?; 35 | nvme.identify_controller()?; 36 | let ns = nvme.identify_namespace_list(0); 37 | for n in ns { 38 | println!("ns_id: {n}"); 39 | nvme.identify_namespace(n); 40 | } 41 | Ok(nvme) 42 | } 43 | 44 | #[derive(Debug, Clone, Copy)] 45 | pub struct NvmeNamespace { 46 | pub id: u32, 47 | pub blocks: u64, 48 | pub block_size: u64, 49 | } 50 | 51 | #[derive(Debug, Clone, Default)] 52 | pub struct NvmeStats { 53 | pub completions: u64, 54 | pub submissions: u64, 55 | } 56 | -------------------------------------------------------------------------------- /src/memory.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use std::slice; 3 | // use std::rc::Rc; 4 | // use std::cell::RefCell; 5 | use std::collections::HashMap; 6 | use std::error::Error; 7 | use std::io::{self, Read, Seek}; 8 | use std::os::fd::{AsRawFd, RawFd}; 9 | use std::sync::atomic::{AtomicUsize, Ordering}; 10 | use std::sync::Mutex; 11 | use std::{fs, mem, process, ptr}; 12 | use std::ops::{Deref, DerefMut, Index, IndexMut, Range, RangeTo, RangeFull}; 13 | 14 | // from https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt 15 | const X86_VA_WIDTH: u8 = 47; 16 | 17 | const HUGE_PAGE_BITS: u32 = 21; 18 | pub const HUGE_PAGE_SIZE: usize = 1 << HUGE_PAGE_BITS; 19 | 20 | pub const IOVA_WIDTH: u8 = X86_VA_WIDTH; 21 | 22 | static HUGEPAGE_ID: AtomicUsize = AtomicUsize::new(0); 23 | 24 | pub(crate) static mut VFIO_CONTAINER_FILE_DESCRIPTOR: Option = None; 25 | 26 | lazy_static! { 27 | pub(crate) static ref VFIO_GROUP_FILE_DESCRIPTORS: Mutex> = 28 | Mutex::new(HashMap::new()); 29 | } 30 | 31 | pub struct Dma { 32 | pub virt: *mut T, 33 | pub phys: usize, 34 | pub size: usize, 35 | } 36 | 37 | // should be safe 38 | impl Deref for Dma { 39 | type Target = T; 40 | 41 | fn deref(&self) -> &Self::Target { 42 | unsafe { 43 | &*self.virt 44 | } 45 | } 46 | } 47 | 48 | impl DerefMut for Dma { 49 | fn deref_mut(&mut self) -> &mut Self::Target { 50 | unsafe { 51 | &mut *self.virt 52 | } 53 | } 54 | } 55 | 56 | // Trait for types that can be viewed as DMA slices 57 | pub trait DmaSlice { 58 | type Item; 59 | 60 | fn chunks(&self, bytes: usize) -> DmaChunks; 61 | fn slice(&self, range: Range) -> Self::Item; 62 | } 63 | 64 | // mildly overengineered lol 65 | pub struct DmaChunks<'a, T> { 66 | current_offset: usize, 67 | chunk_size: usize, 68 | dma: &'a Dma, 69 | } 70 | 71 | impl<'a, T> Iterator for DmaChunks<'a, T> { 72 | type Item = DmaChunk<'a, T>; 73 | 74 | fn next(&mut self) -> Option { 75 | if self.current_offset >= self.dma.size { 76 | None 77 | } else { 78 | let chunk_phys_addr = self.dma.phys + self.current_offset * std::mem::size_of::(); 79 | let offset_ptr = unsafe { self.dma.virt.add(self.current_offset) }; 80 | let len = std::cmp::min(self.chunk_size, (self.dma.size - self.current_offset) / std::mem::size_of::()); 81 | 82 | self.current_offset += len; 83 | 84 | Some(DmaChunk { 85 | phys_addr: chunk_phys_addr, 86 | slice: unsafe { std::slice::from_raw_parts_mut(offset_ptr, len) }, 87 | }) 88 | } 89 | } 90 | } 91 | 92 | // Represents a chunk obtained from a Dma, with physical address and slice. 93 | pub struct DmaChunk<'a, T> { 94 | pub phys_addr: usize, 95 | pub slice: &'a mut [T], 96 | } 97 | 98 | impl DmaSlice for Dma { 99 | type Item = Dma; 100 | fn chunks(&self, bytes: usize) -> DmaChunks { 101 | DmaChunks { 102 | current_offset: 0, 103 | chunk_size: bytes, 104 | dma: self, 105 | } 106 | } 107 | 108 | fn slice(&self, index: Range) -> Self::Item { 109 | assert!(index.end <= self.size, "Index out of bounds"); 110 | 111 | unsafe { 112 | Dma { 113 | virt: self.virt.add(index.start), 114 | phys: self.phys + index.start, 115 | size: (index.end - index.start) 116 | } 117 | } 118 | 119 | } 120 | } 121 | 122 | impl Index> for Dma { 123 | type Output = [u8]; 124 | 125 | fn index(&self, index: Range) -> &Self::Output { 126 | assert!(index.end <= self.size, "Index out of bounds"); 127 | 128 | unsafe { 129 | slice::from_raw_parts(self.virt.add(index.start), index.end - index.start) 130 | } 131 | } 132 | } 133 | 134 | impl IndexMut> for Dma { 135 | fn index_mut(&mut self, index: Range) -> &mut Self::Output { 136 | assert!(index.end <= self.size, "Index out of bounds"); 137 | unsafe { 138 | slice::from_raw_parts_mut(self.virt.add(index.start), index.end - index.start) 139 | } 140 | } 141 | } 142 | 143 | impl Index> for Dma { 144 | type Output = [u8]; 145 | 146 | fn index(&self, index: RangeTo) -> &Self::Output { 147 | &self[0..index.end] 148 | } 149 | } 150 | 151 | impl IndexMut> for Dma { 152 | fn index_mut(&mut self, index: RangeTo) -> &mut Self::Output { 153 | &mut self[0..index.end] 154 | } 155 | } 156 | 157 | impl Index for Dma { 158 | type Output = [u8]; 159 | 160 | fn index(&self, _: RangeFull) -> &Self::Output { 161 | &self[0..self.size] 162 | } 163 | } 164 | 165 | impl IndexMut for Dma { 166 | fn index_mut(&mut self, _: RangeFull) -> &mut Self::Output { 167 | let len = self.size; 168 | &mut self[0..len] 169 | 170 | } 171 | } 172 | 173 | impl Dma { 174 | /// Allocates DMA Memory on a huge page 175 | // TODO: vfio support? 176 | pub fn allocate(size: usize) -> Result, Box> { 177 | let size = if size % HUGE_PAGE_SIZE != 0 { 178 | ((size >> HUGE_PAGE_BITS) + 1) << HUGE_PAGE_BITS 179 | } else { 180 | size 181 | }; 182 | 183 | let id = HUGEPAGE_ID.fetch_add(1, Ordering::SeqCst); 184 | let path = format!("/mnt/huge/nvme-{}-{}", process::id(), id); 185 | 186 | match fs::OpenOptions::new() 187 | .read(true) 188 | .write(true) 189 | .create(true) 190 | .open(path.clone()) 191 | { 192 | Ok(f) => { 193 | let ptr = unsafe { 194 | libc::mmap( 195 | ptr::null_mut(), 196 | size, 197 | libc::PROT_READ | libc::PROT_WRITE, 198 | libc::MAP_SHARED | libc::MAP_HUGETLB, 199 | // libc::MAP_SHARED, 200 | f.as_raw_fd(), 201 | 0, 202 | ) 203 | }; 204 | if ptr == libc::MAP_FAILED { 205 | Err("failed to mmap huge page - are huge pages enabled and free?".into()) 206 | } else if unsafe { libc::mlock(ptr, size) } == 0 { 207 | let memory = Dma { 208 | // virt: NonNull::new(ptr as *mut T).expect("oops"), 209 | virt: ptr as *mut T, 210 | phys: virt_to_phys(ptr as usize)?, 211 | size 212 | }; 213 | Ok(memory) 214 | } else { 215 | Err("failed to memory lock huge page".into()) 216 | } 217 | } 218 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => Err(Box::new(io::Error::new( 219 | e.kind(), 220 | format!( 221 | "huge page {} could not be created - huge pages enabled?", 222 | path 223 | ), 224 | ))), 225 | Err(e) => Err(Box::new(e)), 226 | } 227 | } 228 | } 229 | 230 | /// Translates a virtual address to its physical counterpart 231 | pub(crate) fn virt_to_phys(addr: usize) -> Result> { 232 | let pagesize = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as usize; 233 | 234 | let mut file = fs::OpenOptions::new() 235 | .read(true) 236 | .open("/proc/self/pagemap")?; 237 | 238 | file.seek(io::SeekFrom::Start( 239 | (addr / pagesize * mem::size_of::()) as u64, 240 | ))?; 241 | 242 | let mut buffer = [0; mem::size_of::()]; 243 | file.read_exact(&mut buffer)?; 244 | 245 | let phys = unsafe { mem::transmute::<[u8; mem::size_of::()], usize>(buffer) }; 246 | Ok((phys & 0x007F_FFFF_FFFF_FFFF) * pagesize + addr % pagesize) 247 | } 248 | 249 | #[allow(unused)] 250 | pub fn vfio_enabled() -> bool { 251 | unsafe { VFIO_CONTAINER_FILE_DESCRIPTOR.is_some() } 252 | } 253 | -------------------------------------------------------------------------------- /src/nvme.rs: -------------------------------------------------------------------------------- 1 | use crate::cmd::NvmeCommand; 2 | use crate::memory::{Dma, DmaSlice}; 3 | use crate::pci::pci_map_resource; 4 | use crate::queues::*; 5 | use crate::{NvmeNamespace, NvmeStats, HUGE_PAGE_SIZE}; 6 | use std::collections::HashMap; 7 | use std::error::Error; 8 | use std::hint::spin_loop; 9 | 10 | // clippy doesnt like this 11 | #[allow(unused, clippy::upper_case_acronyms)] 12 | #[derive(Copy, Clone, Debug)] 13 | pub enum NvmeRegs32 { 14 | VS = 0x8, // Version 15 | INTMS = 0xC, // Interrupt Mask Set 16 | INTMC = 0x10, // Interrupt Mask Clear 17 | CC = 0x14, // Controller Configuration 18 | CSTS = 0x1C, // Controller Status 19 | NSSR = 0x20, // NVM Subsystem Reset 20 | AQA = 0x24, // Admin Queue Attributes 21 | CMBLOC = 0x38, // Contoller Memory Buffer Location 22 | CMBSZ = 0x3C, // Controller Memory Buffer Size 23 | BPINFO = 0x40, // Boot Partition Info 24 | BPRSEL = 0x44, // Boot Partition Read Select 25 | BPMBL = 0x48, // Bood Partition Memory Location 26 | CMBSTS = 0x58, // Controller Memory Buffer Status 27 | PMRCAP = 0xE00, // PMem Capabilities 28 | PMRCTL = 0xE04, // PMem Region Control 29 | PMRSTS = 0xE08, // PMem Region Status 30 | PMREBS = 0xE0C, // PMem Elasticity Buffer Size 31 | PMRSWTP = 0xE10, // PMem Sustained Write Throughput 32 | } 33 | 34 | #[allow(unused, clippy::upper_case_acronyms)] 35 | #[derive(Copy, Clone, Debug)] 36 | pub enum NvmeRegs64 { 37 | CAP = 0x0, // Controller Capabilities 38 | ASQ = 0x28, // Admin Submission Queue Base Address 39 | ACQ = 0x30, // Admin Completion Queue Base Address 40 | CMBMSC = 0x50, // Controller Memory Buffer Space Control 41 | PMRMSC = 0xE14, // Persistent Memory Buffer Space Control 42 | } 43 | 44 | #[allow(non_camel_case_types)] 45 | #[derive(Copy, Clone, Debug)] 46 | pub(crate) enum NvmeArrayRegs { 47 | SQyTDBL, 48 | CQyHDBL, 49 | } 50 | 51 | // who tf is abbreviating this stuff 52 | #[repr(C, packed)] 53 | #[derive(Debug, Clone, Copy)] 54 | #[allow(unused)] 55 | struct IdentifyNamespaceData { 56 | pub nsze: u64, 57 | pub ncap: u64, 58 | nuse: u64, 59 | nsfeat: u8, 60 | pub nlbaf: u8, 61 | pub flbas: u8, 62 | mc: u8, 63 | dpc: u8, 64 | dps: u8, 65 | nmic: u8, 66 | rescap: u8, 67 | fpi: u8, 68 | dlfeat: u8, 69 | nawun: u16, 70 | nawupf: u16, 71 | nacwu: u16, 72 | nabsn: u16, 73 | nabo: u16, 74 | nabspf: u16, 75 | noiob: u16, 76 | nvmcap: u128, 77 | npwg: u16, 78 | npwa: u16, 79 | npdg: u16, 80 | npda: u16, 81 | nows: u16, 82 | _rsvd1: [u8; 18], 83 | anagrpid: u32, 84 | _rsvd2: [u8; 3], 85 | nsattr: u8, 86 | nvmsetid: u16, 87 | endgid: u16, 88 | nguid: [u8; 16], 89 | eui64: u64, 90 | pub lba_format_support: [u32; 16], 91 | _rsvd3: [u8; 192], 92 | vendor_specific: [u8; 3712], 93 | } 94 | 95 | pub struct NvmeQueuePair { 96 | pub id: u16, 97 | pub sub_queue: NvmeSubQueue, 98 | comp_queue: NvmeCompQueue, 99 | } 100 | 101 | impl NvmeQueuePair { 102 | /// returns amount of requests pushed into submission queue 103 | pub fn submit_io(&mut self, data: &impl DmaSlice, mut lba: u64, write: bool) -> usize { 104 | let mut reqs = 0; 105 | // TODO: contruct PRP list? 106 | for chunk in data.chunks(2 * 4096) { 107 | let blocks = (chunk.slice.len() as u64 + 512 - 1) / 512; 108 | 109 | let addr = chunk.phys_addr as u64; 110 | let bytes = blocks * 512; 111 | let ptr1 = if bytes <= 4096 { 112 | 0 113 | } else { 114 | addr + 4096 // self.page_size 115 | }; 116 | 117 | let entry = if write { 118 | NvmeCommand::io_write( 119 | self.id << 11 | self.sub_queue.tail as u16, 120 | 1, 121 | lba, 122 | blocks as u16 - 1, 123 | addr, 124 | ptr1, 125 | ) 126 | } else { 127 | NvmeCommand::io_read( 128 | self.id << 11 | self.sub_queue.tail as u16, 129 | 1, 130 | lba, 131 | blocks as u16 - 1, 132 | addr, 133 | ptr1, 134 | ) 135 | }; 136 | 137 | if let Some(tail) = self.sub_queue.submit_checked(entry) { 138 | unsafe { 139 | std::ptr::write_volatile(self.sub_queue.doorbell as *mut u32, tail as u32); 140 | } 141 | } else { 142 | eprintln!("queue full"); 143 | return reqs; 144 | } 145 | 146 | lba += blocks; 147 | reqs += 1; 148 | } 149 | reqs 150 | } 151 | 152 | // TODO: maybe return result 153 | pub fn complete_io(&mut self, n: usize) -> Option { 154 | assert!(n > 0); 155 | let (tail, c_entry, _) = self.comp_queue.complete_n(n); 156 | unsafe { 157 | std::ptr::write_volatile(self.comp_queue.doorbell as *mut u32, tail as u32); 158 | } 159 | self.sub_queue.head = c_entry.sq_head as usize; 160 | let status = c_entry.status >> 1; 161 | if status != 0 { 162 | eprintln!( 163 | "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}", 164 | status, 165 | status & 0xFF, 166 | (status >> 8) & 0x7 167 | ); 168 | eprintln!("{:?}", c_entry); 169 | return None; 170 | } 171 | Some(c_entry.sq_head) 172 | } 173 | 174 | pub fn quick_poll(&mut self) -> Option<()> { 175 | if let Some((tail, c_entry, _)) = self.comp_queue.complete() { 176 | unsafe { 177 | std::ptr::write_volatile(self.comp_queue.doorbell as *mut u32, tail as u32); 178 | } 179 | self.sub_queue.head = c_entry.sq_head as usize; 180 | let status = c_entry.status >> 1; 181 | if status != 0 { 182 | eprintln!( 183 | "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}", 184 | status, 185 | status & 0xFF, 186 | (status >> 8) & 0x7 187 | ); 188 | eprintln!("{:?}", c_entry); 189 | } 190 | return Some(()); 191 | } 192 | None 193 | } 194 | } 195 | 196 | #[allow(unused)] 197 | pub struct NvmeDevice { 198 | pci_addr: String, 199 | addr: *mut u8, 200 | len: usize, 201 | // Doorbell stride 202 | dstrd: u16, 203 | admin_sq: NvmeSubQueue, 204 | admin_cq: NvmeCompQueue, 205 | io_sq: NvmeSubQueue, 206 | io_cq: NvmeCompQueue, 207 | buffer: Dma, // 2MiB of buffer 208 | prp_list: Dma<[u64; 512]>, // Address of PRP's, devices doesn't necessarily support 2MiB page sizes; 8 Bytes * 512 = 4096 209 | pub namespaces: HashMap, 210 | pub stats: NvmeStats, 211 | q_id: u16, 212 | } 213 | 214 | // TODO 215 | unsafe impl Send for NvmeDevice {} 216 | unsafe impl Sync for NvmeDevice {} 217 | 218 | #[allow(unused)] 219 | impl NvmeDevice { 220 | pub fn init(pci_addr: &str) -> Result> { 221 | let (addr, len) = pci_map_resource(pci_addr)?; 222 | let mut dev = Self { 223 | pci_addr: pci_addr.to_string(), 224 | addr, 225 | dstrd: { 226 | unsafe { 227 | ((std::ptr::read_volatile( 228 | (addr as usize + NvmeRegs64::CAP as usize) as *const u64, 229 | ) >> 32) 230 | & 0b1111) as u16 231 | } 232 | }, 233 | len, 234 | admin_sq: NvmeSubQueue::new(QUEUE_LENGTH, 0)?, 235 | admin_cq: NvmeCompQueue::new(QUEUE_LENGTH, 0)?, 236 | io_sq: NvmeSubQueue::new(QUEUE_LENGTH, 0)?, 237 | io_cq: NvmeCompQueue::new(QUEUE_LENGTH, 0)?, 238 | buffer: Dma::allocate(crate::memory::HUGE_PAGE_SIZE)?, 239 | prp_list: Dma::allocate(8 * 512)?, 240 | namespaces: HashMap::new(), 241 | stats: NvmeStats::default(), 242 | q_id: 1, 243 | }; 244 | 245 | for i in 1..512 { 246 | dev.prp_list[i - 1] = (dev.buffer.phys + i * 4096) as u64; 247 | } 248 | 249 | println!("CAP: 0x{:x}", dev.get_reg64(NvmeRegs64::CAP as u64)); 250 | println!("VS: 0x{:x}", dev.get_reg32(NvmeRegs32::VS as u32)); 251 | println!("CC: 0x{:x}", dev.get_reg32(NvmeRegs32::CC as u32)); 252 | 253 | println!("Disabling controller"); 254 | // Set Enable bit to 0 255 | let ctrl_config = dev.get_reg32(NvmeRegs32::CC as u32) & 0xFFFF_FFFE; 256 | dev.set_reg32(NvmeRegs32::CC as u32, ctrl_config); 257 | 258 | // Wait for not ready 259 | loop { 260 | let csts = dev.get_reg32(NvmeRegs32::CSTS as u32); 261 | if csts & 1 == 1 { 262 | spin_loop(); 263 | } else { 264 | break; 265 | } 266 | } 267 | 268 | // Configure Admin Queues 269 | dev.set_reg64(NvmeRegs64::ASQ as u32, dev.admin_sq.get_addr() as u64); 270 | dev.set_reg64(NvmeRegs64::ACQ as u32, dev.admin_cq.get_addr() as u64); 271 | dev.set_reg32( 272 | NvmeRegs32::AQA as u32, 273 | (QUEUE_LENGTH as u32 - 1) << 16 | (QUEUE_LENGTH as u32 - 1), 274 | ); 275 | 276 | // Configure other stuff 277 | // TODO: check css values 278 | let mut cc = dev.get_reg32(NvmeRegs32::CC as u32); 279 | // mask out reserved stuff 280 | cc &= 0xFF00_000F; 281 | // Set Completion (2^4 = 16 Bytes) and Submission Entry (2^6 = 64 Bytes) sizes 282 | cc |= (4 << 20) | (6 << 16); 283 | 284 | // Set Memory Page Size 285 | // let mpsmax = ((dev.get_reg64(NvmeRegs64::CAP as u64) >> 52) & 0xF) as u32; 286 | // cc |= (mpsmax << 7); 287 | // println!("MPS {}", (cc >> 7) & 0xF); 288 | dev.set_reg32(NvmeRegs32::CC as u32, cc); 289 | 290 | // Enable the controller 291 | println!("Enabling controller"); 292 | let ctrl_config = dev.get_reg32(NvmeRegs32::CC as u32) | 1; 293 | dev.set_reg32(NvmeRegs32::CC as u32, ctrl_config); 294 | 295 | // wait for ready 296 | loop { 297 | let csts = dev.get_reg32(NvmeRegs32::CSTS as u32); 298 | if csts & 1 == 0 { 299 | spin_loop(); 300 | } else { 301 | break; 302 | } 303 | } 304 | 305 | let q_id = dev.q_id; 306 | let addr = dev.io_cq.get_addr(); 307 | println!("Requesting i/o completion queue"); 308 | let comp = dev.submit_and_complete_admin(|c_id, _| { 309 | NvmeCommand::create_io_completion_queue(c_id, q_id, addr, (QUEUE_LENGTH - 1) as u16) 310 | })?; 311 | let addr = dev.io_sq.get_addr(); 312 | println!("Requesting i/o submission queue"); 313 | let comp = dev.submit_and_complete_admin(|c_id, _| { 314 | NvmeCommand::create_io_submission_queue( 315 | c_id, 316 | q_id, 317 | addr, 318 | (QUEUE_LENGTH - 1) as u16, 319 | q_id, 320 | ) 321 | })?; 322 | dev.q_id += 1; 323 | 324 | Ok(dev) 325 | } 326 | 327 | pub fn identify_controller(&mut self) -> Result<(), Box> { 328 | println!("Trying to identify controller"); 329 | let _entry = self.submit_and_complete_admin(NvmeCommand::identify_controller); 330 | 331 | println!("Dumping identify controller"); 332 | let mut serial = String::new(); 333 | let data = &self.buffer; 334 | 335 | for &b in &data[4..24] { 336 | if b == 0 { 337 | break; 338 | } 339 | serial.push(b as char); 340 | } 341 | 342 | let mut model = String::new(); 343 | for &b in &data[24..64] { 344 | if b == 0 { 345 | break; 346 | } 347 | model.push(b as char); 348 | } 349 | 350 | let mut firmware = String::new(); 351 | for &b in &data[64..72] { 352 | if b == 0 { 353 | break; 354 | } 355 | firmware.push(b as char); 356 | } 357 | 358 | println!( 359 | " - Model: {} Serial: {} Firmware: {}", 360 | model.trim(), 361 | serial.trim(), 362 | firmware.trim() 363 | ); 364 | 365 | Ok(()) 366 | } 367 | 368 | // 1 to 1 Submission/Completion Queue Mapping 369 | pub fn create_io_queue_pair(&mut self, len: usize) -> Result> { 370 | let q_id = self.q_id; 371 | println!("Requesting i/o queue pair with id {q_id}"); 372 | 373 | let offset = 0x1000 + ((4 << self.dstrd) * (2 * q_id + 1) as usize); 374 | assert!(offset <= self.len - 4, "SQ doorbell offset out of bounds"); 375 | 376 | let dbl = self.addr as usize + offset; 377 | 378 | let comp_queue = NvmeCompQueue::new(len, dbl)?; 379 | let comp = self.submit_and_complete_admin(|c_id, _| { 380 | NvmeCommand::create_io_completion_queue( 381 | c_id, 382 | q_id, 383 | comp_queue.get_addr(), 384 | (len - 1) as u16, 385 | ) 386 | })?; 387 | 388 | let dbl = self.addr as usize + 0x1000 + ((4 << self.dstrd) * (2 * q_id) as usize); 389 | let sub_queue = NvmeSubQueue::new(len, dbl)?; 390 | let comp = self.submit_and_complete_admin(|c_id, _| { 391 | NvmeCommand::create_io_submission_queue( 392 | c_id, 393 | q_id, 394 | sub_queue.get_addr(), 395 | (len - 1) as u16, 396 | q_id, 397 | ) 398 | })?; 399 | 400 | self.q_id += 1; 401 | Ok(NvmeQueuePair { 402 | id: q_id, 403 | sub_queue, 404 | comp_queue, 405 | }) 406 | } 407 | 408 | pub fn delete_io_queue_pair(&mut self, qpair: NvmeQueuePair) -> Result<(), Box> { 409 | println!("Deleting i/o queue pair with id {}", qpair.id); 410 | self.submit_and_complete_admin(|c_id, _| { 411 | NvmeCommand::delete_io_submission_queue(c_id, qpair.id) 412 | })?; 413 | self.submit_and_complete_admin(|c_id, _| { 414 | NvmeCommand::delete_io_completion_queue(c_id, qpair.id) 415 | })?; 416 | Ok(()) 417 | } 418 | 419 | pub fn identify_namespace_list(&mut self, base: u32) -> Vec { 420 | self.submit_and_complete_admin(|c_id, addr| { 421 | NvmeCommand::identify_namespace_list(c_id, addr, base) 422 | }); 423 | 424 | // TODO: idk bout this/don't hardcode len 425 | let data: &[u32] = 426 | unsafe { std::slice::from_raw_parts(self.buffer.virt as *const u32, 1024) }; 427 | 428 | data.iter() 429 | .copied() 430 | .take_while(|&id| id != 0) 431 | .collect::>() 432 | } 433 | 434 | pub fn identify_namespace(&mut self, id: u32) -> NvmeNamespace { 435 | self.submit_and_complete_admin(|c_id, addr| { 436 | NvmeCommand::identify_namespace(c_id, addr, id) 437 | }); 438 | 439 | let namespace_data: IdentifyNamespaceData = 440 | unsafe { *(self.buffer.virt as *const IdentifyNamespaceData) }; 441 | 442 | // let namespace_data = unsafe { *tmp_buff.virt }; 443 | let size = namespace_data.nsze; 444 | let blocks = namespace_data.ncap; 445 | 446 | // figure out block size 447 | let flba_idx = (namespace_data.flbas & 0xF) as usize; 448 | let flba_data = (namespace_data.lba_format_support[flba_idx] >> 16) & 0xFF; 449 | let block_size = if !(9..32).contains(&flba_data) { 450 | 0 451 | } else { 452 | 1 << flba_data 453 | }; 454 | 455 | // TODO: check metadata? 456 | println!("Namespace {id}, Size: {size}, Blocks: {blocks}, Block size: {block_size}"); 457 | 458 | let namespace = NvmeNamespace { 459 | id, 460 | blocks, 461 | block_size, 462 | }; 463 | self.namespaces.insert(id, namespace); 464 | namespace 465 | } 466 | 467 | // TODO: currently namespace 1 is hardcoded 468 | pub fn write(&mut self, data: &impl DmaSlice, mut lba: u64) -> Result<(), Box> { 469 | for chunk in data.chunks(2 * 4096) { 470 | let blocks = (chunk.slice.len() as u64 + 512 - 1) / 512; 471 | self.namespace_io(1, blocks, lba, chunk.phys_addr as u64, true)?; 472 | lba += blocks; 473 | } 474 | 475 | Ok(()) 476 | } 477 | 478 | pub fn read(&mut self, dest: &impl DmaSlice, mut lba: u64) -> Result<(), Box> { 479 | // let ns = *self.namespaces.get(&1).unwrap(); 480 | for chunk in dest.chunks(2 * 4096) { 481 | let blocks = (chunk.slice.len() as u64 + 512 - 1) / 512; 482 | self.namespace_io(1, blocks, lba, chunk.phys_addr as u64, false)?; 483 | lba += blocks; 484 | } 485 | Ok(()) 486 | } 487 | 488 | pub fn write_copied(&mut self, data: &[u8], mut lba: u64) -> Result<(), Box> { 489 | let ns = *self.namespaces.get(&1).unwrap(); 490 | for chunk in data.chunks(128 * 4096) { 491 | self.buffer[..chunk.len()].copy_from_slice(chunk); 492 | let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size; 493 | self.namespace_io(1, blocks, lba, self.buffer.phys as u64, true)?; 494 | lba += blocks; 495 | } 496 | 497 | Ok(()) 498 | } 499 | 500 | pub fn read_copied( 501 | &mut self, 502 | dest: &mut [u8], 503 | mut lba: u64, 504 | ) -> Result<(), Box> { 505 | let ns = *self.namespaces.get(&1).unwrap(); 506 | for chunk in dest.chunks_mut(128 * 4096) { 507 | let blocks = (chunk.len() as u64 + ns.block_size - 1) / ns.block_size; 508 | self.namespace_io(1, blocks, lba, self.buffer.phys as u64, false)?; 509 | lba += blocks; 510 | chunk.copy_from_slice(&self.buffer[..chunk.len()]); 511 | } 512 | Ok(()) 513 | } 514 | 515 | fn submit_io( 516 | &mut self, 517 | ns: &NvmeNamespace, 518 | addr: u64, 519 | blocks: u64, 520 | lba: u64, 521 | write: bool, 522 | ) -> Option { 523 | assert!(blocks > 0); 524 | assert!(blocks <= 0x1_0000); 525 | let q_id = 1; 526 | 527 | let bytes = blocks * ns.block_size; 528 | let ptr1 = if bytes <= 4096 { 529 | 0 530 | } else if bytes <= 8192 { 531 | addr + 4096 // self.page_size 532 | } else { 533 | // idk if this works 534 | let offset = (addr - self.buffer.phys as u64) / 8; 535 | self.prp_list.phys as u64 + offset 536 | }; 537 | 538 | let entry = if write { 539 | NvmeCommand::io_write( 540 | self.io_sq.tail as u16, 541 | ns.id, 542 | lba, 543 | blocks as u16 - 1, 544 | addr, 545 | ptr1, 546 | ) 547 | } else { 548 | NvmeCommand::io_read( 549 | self.io_sq.tail as u16, 550 | ns.id, 551 | lba, 552 | blocks as u16 - 1, 553 | addr, 554 | ptr1, 555 | ) 556 | }; 557 | self.io_sq.submit_checked(entry) 558 | } 559 | 560 | fn complete_io(&mut self, step: u64) -> Option { 561 | let q_id = 1; 562 | 563 | let (tail, c_entry, _) = self.io_cq.complete_n(step as usize); 564 | self.write_reg_idx(NvmeArrayRegs::CQyHDBL, q_id as u16, tail as u32); 565 | 566 | let status = c_entry.status >> 1; 567 | if status != 0 { 568 | eprintln!( 569 | "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}", 570 | status, 571 | status & 0xFF, 572 | (status >> 8) & 0x7 573 | ); 574 | eprintln!("{:?}", c_entry); 575 | return None; 576 | } 577 | self.stats.completions += 1; 578 | Some(c_entry.sq_head) 579 | } 580 | 581 | pub fn batched_write( 582 | &mut self, 583 | ns_id: u32, 584 | data: &[u8], 585 | mut lba: u64, 586 | batch_len: u64, 587 | ) -> Result<(), Box> { 588 | let ns = *self.namespaces.get(&ns_id).unwrap(); 589 | let block_size = 512; 590 | let q_id = 1; 591 | 592 | for chunk in data.chunks(HUGE_PAGE_SIZE) { 593 | self.buffer[..chunk.len()].copy_from_slice(chunk); 594 | let tail = self.io_sq.tail; 595 | 596 | let batch_len = std::cmp::min(batch_len, chunk.len() as u64 / block_size); 597 | let batch_size = chunk.len() as u64 / batch_len; 598 | let blocks = batch_size / ns.block_size; 599 | 600 | for i in 0..batch_len { 601 | if let Some(tail) = self.submit_io( 602 | &ns, 603 | self.buffer.phys as u64 + i * batch_size, 604 | blocks, 605 | lba, 606 | true, 607 | ) { 608 | self.stats.submissions += 1; 609 | self.write_reg_idx(NvmeArrayRegs::SQyTDBL, q_id as u16, tail as u32); 610 | } else { 611 | eprintln!("tail: {tail}, batch_len: {batch_len}, batch_size: {batch_size}, blocks: {blocks}"); 612 | } 613 | lba += blocks; 614 | } 615 | self.io_sq.head = self.complete_io(batch_len).unwrap() as usize; 616 | } 617 | 618 | Ok(()) 619 | } 620 | 621 | pub fn batched_read( 622 | &mut self, 623 | ns_id: u32, 624 | data: &mut [u8], 625 | mut lba: u64, 626 | batch_len: u64, 627 | ) -> Result<(), Box> { 628 | let ns = *self.namespaces.get(&ns_id).unwrap(); 629 | let block_size = 512; 630 | let q_id = 1; 631 | 632 | for chunk in data.chunks_mut(HUGE_PAGE_SIZE) { 633 | let tail = self.io_sq.tail; 634 | 635 | let batch_len = std::cmp::min(batch_len, chunk.len() as u64 / block_size); 636 | let batch_size = chunk.len() as u64 / batch_len; 637 | let blocks = batch_size / ns.block_size; 638 | 639 | for i in 0..batch_len { 640 | if let Some(tail) = self.submit_io( 641 | &ns, 642 | self.buffer.phys as u64 + i * batch_size, 643 | blocks, 644 | lba, 645 | false, 646 | ) { 647 | self.stats.submissions += 1; 648 | self.write_reg_idx(NvmeArrayRegs::SQyTDBL, q_id as u16, tail as u32); 649 | } else { 650 | eprintln!("tail: {tail}, batch_len: {batch_len}, batch_size: {batch_size}, blocks: {blocks}"); 651 | } 652 | lba += blocks; 653 | } 654 | self.io_sq.head = self.complete_io(batch_len).unwrap() as usize; 655 | chunk.copy_from_slice(&self.buffer[..chunk.len()]); 656 | } 657 | Ok(()) 658 | } 659 | 660 | #[inline(always)] 661 | fn namespace_io( 662 | &mut self, 663 | ns_id: u32, 664 | blocks: u64, 665 | lba: u64, 666 | addr: u64, 667 | write: bool, 668 | ) -> Result<(), Box> { 669 | assert!(blocks > 0); 670 | assert!(blocks <= 0x1_0000); 671 | 672 | let q_id = 1; 673 | 674 | let bytes = blocks * 512; 675 | let ptr1 = if bytes <= 4096 { 676 | 0 677 | } else if bytes <= 8192 { 678 | // self.buffer.phys as u64 + 4096 // self.page_size 679 | addr + 4096 // self.page_size 680 | } else { 681 | self.prp_list.phys as u64 682 | }; 683 | 684 | let entry = if write { 685 | NvmeCommand::io_write( 686 | self.io_sq.tail as u16, 687 | ns_id, 688 | lba, 689 | blocks as u16 - 1, 690 | addr, 691 | ptr1, 692 | ) 693 | } else { 694 | NvmeCommand::io_read( 695 | self.io_sq.tail as u16, 696 | ns_id, 697 | lba, 698 | blocks as u16 - 1, 699 | addr, 700 | ptr1, 701 | ) 702 | }; 703 | 704 | let tail = self.io_sq.submit(entry); 705 | self.stats.submissions += 1; 706 | 707 | self.write_reg_idx(NvmeArrayRegs::SQyTDBL, q_id as u16, tail as u32); 708 | self.io_sq.head = self.complete_io(1).unwrap() as usize; 709 | Ok(()) 710 | } 711 | 712 | fn submit_and_complete_admin NvmeCommand>( 713 | &mut self, 714 | cmd_init: F, 715 | ) -> Result> { 716 | let cid = self.admin_sq.tail; 717 | let tail = self.admin_sq.submit(cmd_init(cid as u16, self.buffer.phys)); 718 | self.write_reg_idx(NvmeArrayRegs::SQyTDBL, 0, tail as u32); 719 | 720 | let (head, entry, _) = self.admin_cq.complete_spin(); 721 | self.write_reg_idx(NvmeArrayRegs::CQyHDBL, 0, head as u32); 722 | let status = entry.status >> 1; 723 | if status != 0 { 724 | eprintln!( 725 | "Status: 0x{:x}, Status Code 0x{:x}, Status Code Type: 0x{:x}", 726 | status, 727 | status & 0xFF, 728 | (status >> 8) & 0x7 729 | ); 730 | return Err("Requesting i/o completion queue failed".into()); 731 | } 732 | Ok(entry) 733 | } 734 | 735 | pub fn clear_namespace(&mut self, ns_id: Option) { 736 | let ns_id = if let Some(ns_id) = ns_id { 737 | assert!(self.namespaces.contains_key(&ns_id)); 738 | ns_id 739 | } else { 740 | 0xFFFF_FFFF 741 | }; 742 | self.submit_and_complete_admin(|c_id, _| NvmeCommand::format_nvm(c_id, ns_id)); 743 | } 744 | 745 | /// Sets Queue `qid` Tail Doorbell to `val` 746 | fn write_reg_idx(&self, reg: NvmeArrayRegs, qid: u16, val: u32) { 747 | match reg { 748 | NvmeArrayRegs::SQyTDBL => unsafe { 749 | std::ptr::write_volatile( 750 | (self.addr as usize + 0x1000 + ((4 << self.dstrd) * (2 * qid)) as usize) 751 | as *mut u32, 752 | val, 753 | ); 754 | }, 755 | NvmeArrayRegs::CQyHDBL => unsafe { 756 | std::ptr::write_volatile( 757 | (self.addr as usize + 0x1000 + ((4 << self.dstrd) * (2 * qid + 1)) as usize) 758 | as *mut u32, 759 | val, 760 | ); 761 | }, 762 | } 763 | } 764 | 765 | /// Sets the register at `self.addr` + `reg` to `value`. 766 | /// 767 | /// # Panics 768 | /// 769 | /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. 770 | fn set_reg32(&self, reg: u32, value: u32) { 771 | assert!(reg as usize <= self.len - 4, "memory access out of bounds"); 772 | 773 | unsafe { 774 | std::ptr::write_volatile((self.addr as usize + reg as usize) as *mut u32, value); 775 | } 776 | } 777 | 778 | /// Returns the register at `self.addr` + `reg`. 779 | /// 780 | /// # Panics 781 | /// 782 | /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. 783 | fn get_reg32(&self, reg: u32) -> u32 { 784 | assert!(reg as usize <= self.len - 4, "memory access out of bounds"); 785 | 786 | unsafe { std::ptr::read_volatile((self.addr as usize + reg as usize) as *mut u32) } 787 | } 788 | 789 | /// Sets the register at `self.addr` + `reg` to `value`. 790 | /// 791 | /// # Panics 792 | /// 793 | /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. 794 | fn set_reg64(&self, reg: u32, value: u64) { 795 | assert!(reg as usize <= self.len - 8, "memory access out of bounds"); 796 | 797 | unsafe { 798 | std::ptr::write_volatile((self.addr as usize + reg as usize) as *mut u64, value); 799 | } 800 | } 801 | 802 | /// Returns the register at `self.addr` + `reg`. 803 | /// 804 | /// # Panics 805 | /// 806 | /// Panics if `self.addr` + `reg` does not belong to the mapped memory of the pci device. 807 | fn get_reg64(&self, reg: u64) -> u64 { 808 | assert!(reg as usize <= self.len - 8, "memory access out of bounds"); 809 | 810 | unsafe { std::ptr::read_volatile((self.addr as usize + reg as usize) as *mut u64) } 811 | } 812 | } 813 | -------------------------------------------------------------------------------- /src/pci.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs::{self, File, OpenOptions}; 3 | use std::io::{self, Read, Seek, SeekFrom, Write}; 4 | use std::os::unix::prelude::AsRawFd; 5 | use std::ptr; 6 | 7 | use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; 8 | 9 | // write to the command register (offset 4) in the PCIe config space 10 | pub const COMMAND_REGISTER_OFFSET: u64 = 4; 11 | // bit 2: "bus master enable", see PCIe 3.0 specification section 7.5.1.1 12 | pub const BUS_MASTER_ENABLE_BIT: u64 = 2; 13 | // bit 10: "interrupt disable" 14 | pub const INTERRUPT_DISABLE: u64 = 10; 15 | 16 | /// Unbinds the driver from the device at `pci_addr`. 17 | pub fn unbind_driver(pci_addr: &str) -> Result<(), Box> { 18 | let path = format!("/sys/bus/pci/devices/{}/driver/unbind", pci_addr); 19 | 20 | match fs::OpenOptions::new().write(true).open(path) { 21 | Ok(mut f) => { 22 | write!(f, "{}", pci_addr)?; 23 | Ok(()) 24 | } 25 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => Ok(()), 26 | Err(e) => Err(Box::new(e)), 27 | } 28 | } 29 | 30 | /// Enables direct memory access for the device at `pci_addr`. 31 | pub fn enable_dma(pci_addr: &str) -> Result<(), Box> { 32 | let path = format!("/sys/bus/pci/devices/{}/config", pci_addr); 33 | let mut file = fs::OpenOptions::new().read(true).write(true).open(path)?; 34 | 35 | let mut dma = read_io16(&mut file, COMMAND_REGISTER_OFFSET)?; 36 | dma |= 1 << BUS_MASTER_ENABLE_BIT; 37 | write_io16(&mut file, dma, COMMAND_REGISTER_OFFSET)?; 38 | 39 | Ok(()) 40 | } 41 | 42 | /// Disable INTx interrupts for the device at `pci_addr`. 43 | pub fn disable_interrupts(pci_addr: &str) -> Result<(), Box> { 44 | let path = format!("/sys/bus/pci/devices/{}/config", pci_addr); 45 | let mut file = fs::OpenOptions::new().read(true).write(true).open(path)?; 46 | 47 | let mut dma = read_io16(&mut file, COMMAND_REGISTER_OFFSET)?; 48 | dma |= 1 << INTERRUPT_DISABLE; 49 | write_io16(&mut file, dma, COMMAND_REGISTER_OFFSET)?; 50 | 51 | Ok(()) 52 | } 53 | 54 | /// Mmaps a pci resource and returns a pointer to the mapped memory. 55 | pub fn pci_map_resource(pci_addr: &str) -> Result<(*mut u8, usize), Box> { 56 | let path = format!("/sys/bus/pci/devices/{}/resource0", pci_addr); 57 | 58 | unbind_driver(pci_addr)?; 59 | enable_dma(pci_addr)?; 60 | disable_interrupts(pci_addr)?; 61 | 62 | let file = fs::OpenOptions::new().read(true).write(true).open(&path)?; 63 | let len = fs::metadata(&path)?.len() as usize; 64 | 65 | let ptr = unsafe { 66 | libc::mmap( 67 | ptr::null_mut(), 68 | len, 69 | libc::PROT_READ | libc::PROT_WRITE, 70 | libc::MAP_SHARED, 71 | file.as_raw_fd(), 72 | 0, 73 | ) as *mut u8 74 | }; 75 | 76 | if ptr.is_null() || len == 0 { 77 | Err("pci mapping failed".into()) 78 | } else { 79 | Ok((ptr, len)) 80 | } 81 | } 82 | 83 | /// Opens a pci resource file at the given address. 84 | pub fn pci_open_resource(pci_addr: &str, resource: &str) -> Result> { 85 | let path = format!("/sys/bus/pci/devices/{}/{}", pci_addr, resource); 86 | Ok(OpenOptions::new().read(true).write(true).open(path)?) 87 | } 88 | 89 | /// Opens a pci resource file at the given address in read-only mode. 90 | pub fn pci_open_resource_ro(pci_addr: &str, resource: &str) -> Result> { 91 | let path = format!("/sys/bus/pci/devices/{}/{}", pci_addr, resource); 92 | Ok(OpenOptions::new().read(true).write(false).open(path)?) 93 | } 94 | 95 | /// Reads and returns an u8 at `offset` in `file`. 96 | pub fn read_io8(file: &mut File, offset: u64) -> Result { 97 | file.seek(SeekFrom::Start(offset))?; 98 | file.read_u8() 99 | } 100 | 101 | /// Reads and returns an u16 at `offset` in `file`. 102 | pub fn read_io16(file: &mut File, offset: u64) -> Result { 103 | file.seek(SeekFrom::Start(offset))?; 104 | file.read_u16::() 105 | } 106 | 107 | /// Reads and returns an u32 at `offset` in `file`. 108 | pub fn read_io32(file: &mut File, offset: u64) -> Result { 109 | file.seek(SeekFrom::Start(offset))?; 110 | file.read_u32::() 111 | } 112 | 113 | /// Writes an u64 at `offset` in `file`. 114 | pub fn read_io64(file: &mut File, offset: u64) -> Result { 115 | file.seek(SeekFrom::Start(offset))?; 116 | file.read_u64::() 117 | } 118 | 119 | /// Writes an u8 at `offset` in `file`. 120 | pub fn write_io8(file: &mut File, value: u8, offset: u64) -> Result<(), io::Error> { 121 | file.seek(SeekFrom::Start(offset))?; 122 | file.write_u8(value) 123 | } 124 | 125 | /// Writes an u16 at `offset` in `file`. 126 | pub fn write_io16(file: &mut File, value: u16, offset: u64) -> Result<(), io::Error> { 127 | file.seek(SeekFrom::Start(offset))?; 128 | file.write_u16::(value) 129 | } 130 | 131 | /// Writes an u32 at `offset` in `file`. 132 | pub fn write_io32(file: &mut File, value: u32, offset: u64) -> Result<(), io::Error> { 133 | file.seek(SeekFrom::Start(offset))?; 134 | file.write_u32::(value) 135 | } 136 | 137 | /// Writes an u64 at `offset` in `file`. 138 | pub fn write_io64(file: &mut File, value: u64, offset: u64) -> Result<(), io::Error> { 139 | file.seek(SeekFrom::Start(offset))?; 140 | file.write_u64::(value) 141 | } 142 | 143 | /// Reads a hex string from `file` and returns it as `u64`. 144 | pub fn read_hex(file: &mut File) -> Result> { 145 | let mut buffer = String::new(); 146 | file.read_to_string(&mut buffer)?; 147 | 148 | Ok(u64::from_str_radix( 149 | buffer.trim().trim_start_matches("0x"), 150 | 16, 151 | )?) 152 | } 153 | -------------------------------------------------------------------------------- /src/queues.rs: -------------------------------------------------------------------------------- 1 | use crate::cmd::NvmeCommand; 2 | use crate::memory::*; 3 | use std::error::Error; 4 | use std::hint::spin_loop; 5 | 6 | /// NVMe spec 4.6 7 | /// Completion queue entry 8 | #[allow(dead_code)] 9 | #[derive(Clone, Copy, Debug, Default)] 10 | #[repr(C, packed)] 11 | pub struct NvmeCompletion { 12 | /// Command specific 13 | pub command_specific: u32, 14 | /// Reserved 15 | pub _rsvd: u32, 16 | // Submission queue head 17 | pub sq_head: u16, 18 | // Submission queue ID 19 | pub sq_id: u16, 20 | // Command ID 21 | pub c_id: u16, 22 | // Status field 23 | pub status: u16, 24 | } 25 | 26 | /// maximum amount of submission entries on a 2MiB huge page 27 | pub const QUEUE_LENGTH: usize = 1024; 28 | 29 | /// Submission queue 30 | pub struct NvmeSubQueue { 31 | // TODO: switch to mempool for larger queue 32 | commands: Dma<[NvmeCommand; QUEUE_LENGTH]>, 33 | pub head: usize, 34 | pub tail: usize, 35 | len: usize, 36 | pub doorbell: usize, 37 | } 38 | 39 | impl NvmeSubQueue { 40 | pub fn new(len: usize, doorbell: usize) -> Result> { 41 | Ok(Self { 42 | commands: Dma::allocate(crate::memory::HUGE_PAGE_SIZE)?, 43 | head: 0, 44 | tail: 0, 45 | len: len.min(QUEUE_LENGTH), 46 | doorbell, 47 | }) 48 | } 49 | 50 | pub fn is_empty(&self) -> bool { 51 | self.head == self.tail 52 | } 53 | 54 | pub fn is_full(&self) -> bool { 55 | self.head == (self.tail + 1) % self.len 56 | } 57 | 58 | pub fn submit_checked(&mut self, entry: NvmeCommand) -> Option { 59 | if self.is_full() { 60 | None 61 | } else { 62 | Some(self.submit(entry)) 63 | } 64 | } 65 | 66 | #[inline(always)] 67 | pub fn submit(&mut self, entry: NvmeCommand) -> usize { 68 | // println!("SUBMISSION ENTRY: {:?}", entry); 69 | self.commands[self.tail] = entry; 70 | 71 | self.tail = (self.tail + 1) % self.len; 72 | self.tail 73 | } 74 | 75 | pub fn get_addr(&self) -> usize { 76 | self.commands.phys 77 | } 78 | } 79 | 80 | /// Completion queue 81 | pub struct NvmeCompQueue { 82 | commands: Dma<[NvmeCompletion; QUEUE_LENGTH]>, 83 | head: usize, 84 | phase: bool, 85 | len: usize, 86 | pub doorbell: usize, 87 | } 88 | 89 | // TODO: error handling 90 | impl NvmeCompQueue { 91 | pub fn new(len: usize, doorbell: usize) -> Result> { 92 | Ok(Self { 93 | commands: Dma::allocate(crate::memory::HUGE_PAGE_SIZE)?, 94 | head: 0, 95 | phase: true, 96 | len: len.min(QUEUE_LENGTH), 97 | doorbell, 98 | }) 99 | } 100 | 101 | #[inline(always)] 102 | pub fn complete(&mut self) -> Option<(usize, NvmeCompletion, usize)> { 103 | let entry = &self.commands[self.head]; 104 | 105 | if ((entry.status & 1) == 1) == self.phase { 106 | let prev = self.head; 107 | self.head = (self.head + 1) % self.len; 108 | if self.head == 0 { 109 | self.phase = !self.phase; 110 | } 111 | Some((self.head, entry.clone(), prev)) 112 | } else { 113 | None 114 | } 115 | } 116 | 117 | /// 118 | #[inline(always)] 119 | pub fn complete_n(&mut self, commands: usize) -> (usize, NvmeCompletion, usize) { 120 | let prev = self.head; 121 | self.head += commands - 1; 122 | if self.head >= self.len { 123 | self.phase = !self.phase; 124 | } 125 | self.head %= self.len; 126 | 127 | let (head, entry, _) = self.complete_spin(); 128 | (head, entry, prev) 129 | } 130 | 131 | #[inline(always)] 132 | pub fn complete_spin(&mut self) -> (usize, NvmeCompletion, usize) { 133 | loop { 134 | if let Some(val) = self.complete() { 135 | return val; 136 | } 137 | spin_loop(); 138 | } 139 | } 140 | 141 | pub fn get_addr(&self) -> usize { 142 | self.commands.phys 143 | } 144 | } 145 | --------------------------------------------------------------------------------