├── .gitignore ├── Cargo.toml ├── README.md ├── echo-async ├── Cargo.toml └── src │ ├── main.rs │ ├── serve_tcp.rs │ └── timeout.rs ├── io-uring-sys ├── Cargo.toml └── src │ ├── lib.rs │ ├── macros.rs │ └── submission.rs ├── io-uring ├── Cargo.toml └── src │ ├── lib.rs │ └── mmap.rs ├── tokio-uring-reactor ├── Cargo.toml └── src │ ├── io.rs │ ├── lib.rs │ ├── net.rs │ ├── reactor.rs │ ├── reactor │ ├── async_poll.rs │ ├── async_read.rs │ └── async_write.rs │ ├── registration.rs │ └── unpark.rs └── tokio-uring ├── Cargo.toml ├── examples └── echo.rs └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "io-uring-sys", 4 | "io-uring", 5 | "tokio-uring-reactor", 6 | "tokio-uring", 7 | "echo-async", 8 | ] 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Proof of Concept io-uring libraries, work in progress 2 | 3 | See https://twitter.com/axboe/status/1114568873800491009 for what io-uring is about. 4 | 5 | Also see ["echo-server" example](tokio-uring/examples/echo.rs). 6 | 7 | Crates: 8 | - [`io-uring-sys`](io-uring-sys): Low-level wrapping of the kernel API and types 9 | - [`io-uring`](io-uring): Fancy wrapping of kernel API (especially submission and completion queue) 10 | - [`tokio-uring-reactor`](tokio-uring-reactor): Reactor (IO handling) based on `io-uring` for tokio integration. 11 | - [`tokio-uring`](tokio-uring): tokio (current_thread) Runtime based on `io-uring`. 12 | 13 | Right now some very basic TCP (accept, read, write) operations are supported by the tokio integration directly, but you can do almost anything using the `async_*` functions provided by the reactor handle. 14 | -------------------------------------------------------------------------------- /echo-async/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "echo-async" 3 | version = "0.1.0" 4 | authors = ["Stefan Bühler "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | futures = "0.1.26" 9 | tokio-current-thread = "0.1.6" 10 | tokio-timer = "0.2.8" 11 | tokio-executor = "0.1.7" 12 | tokio-uring-reactor = { path = "../tokio-uring-reactor", features = ["nightly-async"] } 13 | tokio-uring = { path = "../tokio-uring" } 14 | env_logger = "0.6.1" 15 | 16 | futures-core-preview = { version = "=0.3.0-alpha.15" } 17 | futures-util-preview = { version = "=0.3.0-alpha.15", features = ["compat"] } 18 | -------------------------------------------------------------------------------- /echo-async/src/main.rs: -------------------------------------------------------------------------------- 1 | #![feature(async_await, await_macro)] 2 | 3 | pub mod timeout; 4 | pub mod serve_tcp; 5 | 6 | use std::io; 7 | use std::net; 8 | use std::time::Duration; 9 | use tokio_uring_reactor::{ 10 | io::{ 11 | SocketRead, 12 | SocketWrite, 13 | }, 14 | Handle, 15 | }; 16 | use crate::timeout::{TryFutureExt as _}; 17 | 18 | async fn handle_connection(handle: Handle, c: tokio_uring_reactor::net::TcpStream, a: net::SocketAddr) -> io::Result<()> { 19 | println!("Connection from {}", a); 20 | 21 | let mut storage: Option> = None; 22 | let mut connection = Some(c); 23 | 24 | loop { 25 | let mut buf = storage.take().unwrap_or_default(); 26 | let con = connection.take().expect("connection missing"); 27 | buf.resize_with(512, Default::default); 28 | 29 | let (n, mut buf, con) = await!(con.read(&handle, buf).timeout(Duration::from_secs(3)))?; 30 | if n == 0 { 31 | println!("Connection from {} closing", a); 32 | return Ok(()) 33 | } 34 | buf.truncate(n); 35 | println!("Echoing: {:?}", buf); 36 | let (_n, buf, con) = await!(con.write(&handle, buf))?; 37 | 38 | // put values back for next round 39 | storage = Some(buf); 40 | connection = Some(con); 41 | } 42 | } 43 | 44 | pub fn main() { 45 | env_logger::init(); 46 | 47 | println!("Starting echo server"); 48 | 49 | let l = net::TcpListener::bind("[::]:22").expect("bind"); 50 | 51 | let mut runtime = tokio_uring::Runtime::new().expect("new runtime"); 52 | 53 | serve_tcp::serve(&mut runtime, l, handle_connection, async move |_, e, addr| { 54 | eprintln!("Connection from {} error: {}", addr, e); 55 | }); 56 | runtime.run().expect("runtime run"); 57 | } 58 | -------------------------------------------------------------------------------- /echo-async/src/serve_tcp.rs: -------------------------------------------------------------------------------- 1 | use futures_util; 2 | use futures_util::{ 3 | compat::Compat, 4 | try_stream::TryStreamExt as _, 5 | try_future::TryFutureExt as _, 6 | future::FutureExt as _, 7 | }; 8 | use std::future::Future; 9 | use std::io; 10 | use std::net; 11 | use std::time::Duration; 12 | use std::rc::Rc; 13 | use tokio_uring_reactor::{ 14 | Handle, 15 | }; 16 | use crate::timeout::{TryFutureExt as _}; 17 | 18 | pub fn serve(runtime: &mut tokio_uring::Runtime, l: net::TcpListener, handle_con: H, handle_err: HE) 19 | where 20 | H: Fn(Handle, tokio_uring_reactor::net::TcpStream, net::SocketAddr) -> HF + 'static, 21 | HF: Future> + 'static, 22 | HE: Fn(Handle, io::Error, net::SocketAddr) -> HEF + 'static, 23 | HEF: Future + 'static, 24 | { 25 | let handle = runtime.reactor_handle(); 26 | runtime.spawn(Compat::new(Box::pin(async move { 27 | let handle_err = Rc::new(handle_err); 28 | 29 | if let Err::<(), io::Error>(e) = await!(async { 30 | let l = tokio_uring_reactor::net::TcpListener::from(l); 31 | let mut i = l.incoming(&handle); 32 | 33 | loop { 34 | let (con, addr) = await!(i.try_next().timeout(Duration::from_secs(30)))?.unwrap(); 35 | let ehandle = handle.clone(); 36 | let handle_err = handle_err.clone(); 37 | tokio_current_thread::spawn(Compat::new(Box::pin( 38 | handle_con(handle.clone(), con, addr.clone()) 39 | .or_else(move |e| handle_err(ehandle, e, addr).map(Ok)) 40 | ))) 41 | } 42 | }) { 43 | eprintln!("Serve error: {}", e); 44 | } 45 | 46 | Ok::<(), ()>(()) 47 | }))); 48 | } 49 | -------------------------------------------------------------------------------- /echo-async/src/timeout.rs: -------------------------------------------------------------------------------- 1 | use futures_util::compat::Compat01As03; 2 | use tokio_timer::Delay; 3 | 4 | use std::{ 5 | io, 6 | time::Duration, 7 | future::Future, 8 | pin::Pin, 9 | task::{ 10 | Context, 11 | Poll, 12 | }, 13 | }; 14 | 15 | pub enum TimeoutError { 16 | Timeout, 17 | Inner(E), 18 | Timer(tokio_timer::Error), 19 | } 20 | 21 | impl From> for io::Error 22 | where 23 | E: Into, 24 | { 25 | fn from(e: TimeoutError) -> io::Error { 26 | match e { 27 | TimeoutError::Timeout => io::Error::new(io::ErrorKind::TimedOut, "async operation timed out"), 28 | TimeoutError::Inner(e) => e.into(), 29 | TimeoutError::Timer(e) => io::Error::new(io::ErrorKind::Other, e), 30 | } 31 | } 32 | } 33 | 34 | pub struct Timeout { 35 | timeout: Duration, 36 | delay: Option>, 37 | inner: T, 38 | } 39 | 40 | impl Timeout { 41 | pub fn new(inner: T, timeout: Duration) -> Self { 42 | Timeout { 43 | timeout, 44 | delay: None, 45 | inner, 46 | } 47 | } 48 | } 49 | 50 | impl Future for Timeout 51 | where 52 | T: Future>, 53 | { 54 | type Output = Result>; 55 | 56 | fn poll(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { 57 | // must not move this.inner or this.delay 58 | let this = unsafe { self.get_unchecked_mut() }; 59 | match unsafe { Pin::new_unchecked(&mut this.inner) }.poll(ctx) { 60 | Poll::Pending => { 61 | if this.delay.is_none() { 62 | let now = tokio_timer::clock::now(); 63 | this.delay = Some(Compat01As03::new(Delay::new(now + this.timeout))); 64 | } 65 | match unsafe { Pin::new_unchecked(&mut this.delay.as_mut().expect("delay")) }.poll(ctx) { 66 | Poll::Pending => Poll::Pending, 67 | Poll::Ready(Ok(())) => Poll::Ready(Err(TimeoutError::Timeout)), 68 | Poll::Ready(Err(e)) => Poll::Ready(Err(TimeoutError::Timer(e))), 69 | } 70 | }, 71 | Poll::Ready(r) => { 72 | this.delay = None; // don't move, reset in-place 73 | Poll::Ready(r.map_err(TimeoutError::Inner)) 74 | } 75 | } 76 | } 77 | } 78 | 79 | pub trait TryFutureExt: Future> + Sized { 80 | fn timeout(self, timeout: Duration) -> Timeout { 81 | Timeout::new(self, timeout) 82 | } 83 | } 84 | 85 | impl TryFutureExt for T 86 | where 87 | T: Future> 88 | { 89 | } 90 | -------------------------------------------------------------------------------- /io-uring-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "io-uring-sys" 3 | version = "0.1.0" 4 | authors = ["Stefan Bühler "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | bitflags = "1.0.4" 9 | libc = "0.2.51" 10 | -------------------------------------------------------------------------------- /io-uring-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | 3 | mod macros; 4 | 5 | mod submission; 6 | pub use submission::{ 7 | IoPriorityLevel, 8 | IoPriority, 9 | FileDescriptor, 10 | }; 11 | 12 | use bitflags::bitflags; 13 | 14 | use core::mem::size_of; 15 | use core::fmt; 16 | use libc::{ 17 | c_int, 18 | c_long, 19 | c_uint, 20 | c_void, 21 | }; 22 | 23 | static_assert!( 24 | size_of::() == 120, 25 | size_of::() == 40, 26 | size_of::() == 40, 27 | size_of::() == 64, 28 | size_of::() == 4, 29 | size_of::() == 24, 30 | size_of::() == 16, 31 | true 32 | ); 33 | 34 | #[cfg(all(target_os = "linux", any(target_arch = "x86", target_arch = "x86_64")))] 35 | #[allow(non_upper_case_globals)] 36 | mod syscalls { 37 | pub const SYS_io_uring_setup: libc::c_long = 425; 38 | pub const SYS_io_uring_enter: libc::c_long = 426; 39 | pub const SYS_io_uring_register: libc::c_long = 427; 40 | } 41 | 42 | pub unsafe fn io_uring_setup(entries: u32, params: *mut SetupParameters) -> c_int { 43 | libc::syscall( 44 | syscalls::SYS_io_uring_setup, 45 | entries as c_long, 46 | params as usize as c_long, 47 | ) as c_int 48 | } 49 | 50 | pub unsafe fn io_uring_enter(fd: c_int, to_submit: c_uint, min_complete: c_uint, flags: c_uint, sig: *const libc::sigset_t) -> c_int { 51 | libc::syscall( 52 | syscalls::SYS_io_uring_enter, 53 | fd as c_long, 54 | to_submit as c_long, 55 | min_complete as c_long, 56 | flags as c_long, 57 | sig as usize as c_long, 58 | core::mem::size_of::() as c_long, 59 | ) as c_int 60 | } 61 | 62 | pub unsafe fn io_uring_register(fd: c_int, opcode: c_uint, arg: *const c_void, nr_args: c_uint) -> c_int { 63 | libc::syscall( 64 | syscalls::SYS_io_uring_register, 65 | fd as c_long, 66 | opcode as c_long, 67 | arg as usize as c_long, 68 | nr_args as c_long, 69 | ) as c_int 70 | } 71 | 72 | bitflags! { 73 | #[derive(Default)] 74 | pub struct EnterFlags: u32 { 75 | /// `IORING_ENTER_GETEVENTS` 76 | const GETEVENTS = (1 << 0); 77 | /// `IORING_ENTER_SQ_WAKEUP` 78 | const SQ_WAKEUP = (1 << 1); 79 | 80 | // don't truncate any bits 81 | #[doc(hidden)] 82 | const _ALL = !0; 83 | } 84 | } 85 | 86 | #[derive(Clone, Copy, Debug)] 87 | pub struct RegisterOpCode(pub u32); 88 | 89 | impl RegisterOpCode { 90 | /// `IORING_REGISTER_BUFFERS` 91 | pub const REGISTER_BUFFERS: Self = Self(0); 92 | /// `IORING_UNREGISTER_BUFFERS` 93 | pub const UNREGISTER_BUFFERS: Self = Self(1); 94 | /// `IORING_REGISTER_FILES` 95 | pub const REGISTER_FILES: Self = Self(2); 96 | /// `IORING_UNREGISTER_FILES` 97 | pub const UNREGISTER_FILES: Self = Self(3); 98 | } 99 | 100 | /// Passed in for io_uring_setup(2). Copied back with updated info on 101 | /// success 102 | /// 103 | /// C: `struct io_uring_params` 104 | #[derive(Clone, Copy, Default, Debug)] 105 | #[repr(C)] 106 | pub struct SetupParameters { 107 | /// (output) allocated entries in submission queue 108 | /// 109 | /// (both ring index `array` and separate entry array at 110 | /// `SUBMISSION_QUEUE_ENTRIES_OFFSET`). 111 | pub sq_entries: u32, 112 | /// (output) allocated entries in completion queue 113 | pub cq_entries: u32, 114 | /// (input) 115 | pub flags: SetupFlags, 116 | /// (input) used if SQ_AFF and SQPOLL flags are active to pin poll 117 | /// thread to specific cpu 118 | /// 119 | /// right now always checked in kernel for "possible cpu". 120 | pub sq_thread_cpu: u32, 121 | /// (input) used if SQPOLL flag is active; timeout in milliseconds 122 | /// until kernel poll thread goes to sleep. 123 | pub sq_thread_idle: u32, 124 | // reserved 125 | _reserved: [u32; 5], 126 | /// (output) submission queue ring data field offsets 127 | pub sq_off: SubmissionQueueRingOffsets, 128 | /// (output) completion queue ring data field offsets 129 | pub cq_off: CompletionQueueRingOffsets, 130 | } 131 | 132 | impl SetupParameters { 133 | /// `IORING_OFF_SQ_RING`: mmap offset for submission queue ring 134 | pub const SUBMISSION_QUEUE_RING_OFFSET: i64 = 0; 135 | /// `IORING_OFF_CQ_RING`: mmap offset for completion queue ring 136 | pub const COMPLETION_QUEUE_RING_OFFSET: i64 = 0x8000000; 137 | /// `IORING_OFF_SQES`: mmap offset for submission entries 138 | pub const SUBMISSION_QUEUE_ENTRIES_OFFSET: i64 = 0x10000000; 139 | 140 | pub fn new(flags: SetupFlags) -> Self { 141 | Self { 142 | flags, 143 | ..Self::default() 144 | } 145 | } 146 | } 147 | 148 | 149 | bitflags! { 150 | /// io_uring_setup() flags 151 | #[derive(Default)] 152 | pub struct SetupFlags: u32 { 153 | /// `IORING_SETUP_IOPOLL`: io_context is polled 154 | const IOPOLL = (1 << 0); 155 | 156 | /// `IORING_SETUP_SQPOLL`: SQ poll thread 157 | const SQPOLL = (1 << 1); 158 | 159 | /// `IORING_SETUP_SQ_AFF`: sq_thread_cpu is valid 160 | const SQ_AFF = (1 << 2); 161 | 162 | // don't truncate any bits 163 | #[doc(hidden)] 164 | const _ALL = !0; 165 | } 166 | } 167 | 168 | /// Offset to various struct members in mmap() at offset 169 | /// `SUBMISSION_QUEUE_RING_OFFSET` 170 | /// 171 | /// C: `struct io_sqring_offsets` 172 | #[derive(Clone, Copy, Default, Debug)] 173 | #[repr(C)] 174 | pub struct SubmissionQueueRingOffsets { 175 | /// member type: AtomicU32; index into `self.array` (after `self.ring_mask` is applied) 176 | /// 177 | /// incremented by kernel after entry at `head` was processed. 178 | /// 179 | /// pending submissions: [head..tail] 180 | pub head: u32, 181 | /// member type: AtomicU32; index into `self.array` (after `self.ring_mask` is applied) 182 | /// 183 | /// modified by user space when new entry was queued; points to next 184 | /// entry user space is going to fill. 185 | pub tail: u32, 186 | /// member type: (const) u32 187 | /// 188 | /// value `value_at(self.ring_entries) - 1` 189 | /// 190 | /// mask for indices at `head` and `tail` (don't delete masked bits! 191 | /// `head` and `tail` can point to the same entry, but if they are 192 | /// not exactly equal it implies the ring is full, and if they are 193 | /// exactly equal the ring is empty.) 194 | pub ring_mask: u32, 195 | /// member type: (const) u32; value same as SetupParameters.sq_entries, power of 2. 196 | pub ring_entries: u32, 197 | /// member type: (atomic) SubmissionQueueFlags 198 | pub flags: u32, 199 | /// member type: AtomicU32 200 | /// 201 | /// number of (invalid) entries that were dropped; entries are 202 | /// invalid if there index (in `self.array`) is out of bounds. 203 | pub dropped: u32, 204 | /// member type: [u32] (index array into array of `SubmissionEntry`s 205 | /// at offset `SUBMISSION_QUEUE_ENTRIES_OFFSET` in mmap()) 206 | pub array: u32, 207 | // reserved 208 | _reserved: [u32; 3], 209 | } 210 | 211 | bitflags! { 212 | #[derive(Default)] 213 | pub struct SubmissionQueueFlags: u32 { 214 | /// `IORING_SQ_NEED_WAKEUP`: needs io_uring_enter wakeup 215 | /// 216 | /// set by kernel poll thread when it goes sleeping, and reset 217 | /// on wakeup 218 | const NEED_WAKEUP = (1 << 0); 219 | 220 | // don't truncate any bits 221 | #[doc(hidden)] 222 | const _ALL = !0; 223 | } 224 | } 225 | 226 | /// Offset to various struct members in mmap() at offset 227 | /// `COMPLETION_QUEUE_RING_OFFSET` 228 | /// 229 | /// C: `struct io_cqring_offsets` 230 | #[derive(Clone, Copy, Default, Debug)] 231 | #[repr(C)] 232 | pub struct CompletionQueueRingOffsets { 233 | /// member type: AtomicU32; index into `self.cqes` (after `self.ring_mask` is applied) 234 | /// 235 | /// incremented by user space after entry at `head` was processed. 236 | /// 237 | /// available entries for processing: [head..tail] 238 | pub head: u32, 239 | /// member type: AtomicU32; index into `self.cqes` (after `self.ring_mask` is applied) 240 | /// 241 | /// modified by kernel when new entry was created; points to next 242 | /// entry kernel is going to fill. 243 | pub tail: u32, 244 | /// member type: (const) u32 245 | /// 246 | /// value `value_at(self.ring_entries) - 1` 247 | /// 248 | /// mask for indices at `head` and `tail` (don't delete masked bits! 249 | /// `head` and `tail` can point to the same entry, but if they are 250 | /// not exactly equal it implies the ring is full, and if they are 251 | /// exactly equal the ring is empty.) 252 | pub ring_mask: u32, 253 | /// member type: (const) u32; value same as SetupParameters.cq_entries, power of 2. 254 | pub ring_entries: u32, 255 | /// member type: AtomicU32 256 | /// 257 | /// incremented by the kernel every time it failed to queue a 258 | /// completion event because the ring was full. 259 | pub overflow: u32, 260 | /// member type: [CompletionEntry; self.ring_entries] 261 | pub cqes: u32, 262 | // reserved 263 | _reserved: [u64; 2], 264 | } 265 | 266 | /// C: `struct io_uring_sqe` 267 | #[repr(C)] 268 | #[derive(Debug)] 269 | pub struct SubmissionEntry { 270 | pub opcode: RawOperation, 271 | pub flags: SubmissionEntryFlags, 272 | pub ioprio: EncodedIoPriority, 273 | pub fd: i32, 274 | pub off: u64, 275 | pub addr: u64, 276 | pub len: u32, 277 | pub op_flags: SubmissionEntryOperationFlags, 278 | pub user_data: u64, 279 | pub extra: SubmissionEntryExtraData, 280 | } 281 | 282 | #[repr(u8)] 283 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 284 | // #[non_exhaustive] 285 | pub enum Operation { 286 | Nop = 0, 287 | Readv = 1, 288 | Writev = 2, 289 | Fsync = 3, 290 | ReadFixed = 4, 291 | WriteFixed = 5, 292 | PollAdd = 6, 293 | PollRemove = 7, 294 | } 295 | 296 | impl Default for Operation { 297 | fn default() -> Self { 298 | Operation::Nop 299 | } 300 | } 301 | 302 | #[repr(C)] 303 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] 304 | pub struct RawOperation(pub u8); 305 | impl RawOperation { 306 | pub fn decode(self) -> Option { 307 | Some(match self.0 { 308 | 0 => Operation::Nop, 309 | 1 => Operation::Readv, 310 | 2 => Operation::Writev, 311 | 3 => Operation::Fsync, 312 | 4 => Operation::ReadFixed, 313 | 5 => Operation::WriteFixed, 314 | 6 => Operation::PollAdd, 315 | 7 => Operation::PollRemove, 316 | _ => return None, 317 | }) 318 | } 319 | } 320 | 321 | impl From for RawOperation { 322 | fn from(op: Operation) -> Self { 323 | RawOperation(op as u8) 324 | } 325 | } 326 | 327 | impl fmt::Debug for RawOperation { 328 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 329 | match self.decode() { 330 | Some(op) => op.fmt(f), 331 | None => f.debug_tuple("RawOperation").field(&self.0).finish(), 332 | } 333 | } 334 | } 335 | 336 | bitflags! { 337 | #[derive(Default)] 338 | pub struct SubmissionEntryFlags: u8 { 339 | /// IOSQE_FIXED_FILE: use fixed fileset 340 | /// 341 | /// I.e. `SubmissionEntry.fd` is used as index into the 342 | /// registered fileset (array of fds) instead. 343 | const FIXED_FILE = (1 << 0); 344 | 345 | // don't truncate any bits 346 | #[doc(hidden)] 347 | const _ALL = !0; 348 | } 349 | } 350 | 351 | #[repr(C)] 352 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)] 353 | pub struct EncodedIoPriority(pub u16); 354 | 355 | #[repr(C)] 356 | pub union SubmissionEntryOperationFlags { 357 | pub raw: u32, 358 | pub rw_flags: ReadWriteFlags, 359 | pub fsync_flags: FsyncFlags, 360 | pub poll_events: PollFlags, 361 | } 362 | 363 | impl fmt::Debug for SubmissionEntryOperationFlags { 364 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 365 | f.debug_struct("SubmissionEntryOperationFlags") 366 | .field("raw", unsafe { &self.raw }) 367 | .field("rw_flags", unsafe { &self.rw_flags }) 368 | .field("fsync_flags", unsafe { &self.fsync_flags }) 369 | .field("poll_events", unsafe { &self.poll_events }) 370 | .finish() 371 | } 372 | } 373 | 374 | impl From for SubmissionEntryOperationFlags { 375 | fn from(raw: u32) -> Self { 376 | Self { raw } 377 | } 378 | } 379 | 380 | impl From for SubmissionEntryOperationFlags { 381 | fn from(rw_flags: ReadWriteFlags) -> Self { 382 | Self { rw_flags } 383 | } 384 | } 385 | 386 | impl From for SubmissionEntryOperationFlags { 387 | fn from(fsync_flags: FsyncFlags) -> Self { 388 | Self { fsync_flags } 389 | } 390 | } 391 | 392 | impl From for SubmissionEntryOperationFlags { 393 | fn from(poll_events: PollFlags) -> Self { 394 | Self { poll_events } 395 | } 396 | } 397 | 398 | bitflags! { 399 | #[derive(Default)] 400 | pub struct ReadWriteFlags: u32 { 401 | /// High priority read/write. Allows block-based filesystems to 402 | /// use polling of the device, which provides lower latency, but 403 | /// may use additional resources. (Currently, this feature is 404 | /// usable only on a file descriptor opened using the 405 | /// O_DIRECT flag.) 406 | /// 407 | /// (since Linux 4.6) 408 | const HIPRI = 0x00000001; 409 | 410 | /// Provide a per-write equivalent of the O_DSYNC open(2) flag. 411 | /// This flag is meaningful only for pwritev2(), and its effect 412 | /// applies only to the data range written by the system call. 413 | /// 414 | /// (since Linux 4.7) 415 | const DSYNC = 0x00000002; 416 | 417 | /// Provide a per-write equivalent of the O_SYNC open(2) flag. 418 | /// This flag is meaningful only for pwritev2(), and its effect 419 | /// applies only to the data range written by the system call. 420 | /// 421 | /// (since Linux 4.7) 422 | const SYNC = 0x00000004; 423 | 424 | 425 | /// Do not wait for data which is not immediately available. If 426 | /// this flag is specified, the preadv2() system call will 427 | /// return instantly if it would have to read data from the 428 | /// backing storage or wait for a lock. If some data was 429 | /// successfully read, it will return the number of bytes read. 430 | /// If no bytes were read, it will return -1 and set errno to 431 | /// EAGAIN. Currently, this flag is meaningful only for 432 | /// preadv2(). 433 | /// 434 | /// (since Linux 4.14) 435 | const NOWAIT = 0x00000008; 436 | 437 | /// Provide a per-write equivalent of the O_APPEND open(2) flag. 438 | /// This flag is meaningful only for pwritev2(), and its effect 439 | /// applies only to the data range written by the system call. 440 | /// The offset argument does not affect the write operation; the 441 | /// data is always appended to the end of the file. However, if 442 | /// the offset argument is -1, the current file offset is 443 | /// updated. 444 | /// 445 | /// (since Linux 4.16) 446 | const APPEND = 0x00000010; 447 | 448 | const SUPPORTED = 0 449 | | Self::HIPRI.bits 450 | | Self::DSYNC.bits 451 | | Self::SYNC.bits 452 | | Self::NOWAIT.bits 453 | | Self::APPEND.bits 454 | ; 455 | 456 | // don't truncate any bits 457 | #[doc(hidden)] 458 | const _ALL = !0; 459 | } 460 | } 461 | 462 | bitflags! { 463 | #[derive(Default)] 464 | pub struct FsyncFlags: u32 { 465 | const DATASYNC = (1 << 0); 466 | 467 | // don't truncate any bits 468 | #[doc(hidden)] 469 | const _ALL = !0; 470 | } 471 | } 472 | 473 | bitflags! { 474 | #[derive(Default)] 475 | pub struct PollFlags: u16 { 476 | const IN = libc::POLLIN as u16; 477 | const OUT = libc::POLLOUT as u16; 478 | const PRI = libc::POLLPRI as u16; 479 | const ERR = libc::POLLERR as u16; 480 | const NVAL = libc::POLLNVAL as u16; 481 | const RDNORM = libc::POLLRDNORM as u16; 482 | const RDBAND = libc::POLLRDBAND as u16; 483 | const WRNORM = libc::POLLWRNORM as u16; 484 | const WRBAND = libc::POLLWRBAND as u16; 485 | const HUP = libc::POLLHUP as u16; 486 | const RDHUP = 0x2000; // sparc: 0x800; // TODO: libc::POLLRDHUP as u16; 487 | const MSG = 0x0400; // sparc: 0x200; // TODO: libc::POLLMSG as u16; 488 | 489 | // don't truncate any bits 490 | #[doc(hidden)] 491 | const _ALL = !0; 492 | } 493 | } 494 | 495 | 496 | #[repr(C)] 497 | #[derive(Clone, Copy)] 498 | pub union SubmissionEntryExtraData { 499 | pub fixed: SubmissionEntryFixedOp, 500 | _pad2: [u64; 3], 501 | } 502 | 503 | impl fmt::Debug for SubmissionEntryExtraData { 504 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 505 | f.debug_struct("SubmissionEntryExtraData") 506 | .field("fixed", unsafe { &self.fixed }) 507 | .finish() 508 | } 509 | } 510 | 511 | #[derive(Clone, Copy, Default, Debug)] 512 | #[repr(C)] 513 | pub struct SubmissionEntryFixedOp { 514 | /// index into fixed buffers 515 | pub buf_index: u16, 516 | } 517 | 518 | /// C: `struct io_uring_cqe` 519 | #[repr(C)] 520 | #[derive(Clone, Copy, Default, Debug)] 521 | pub struct CompletionEntry { 522 | pub user_data: u64, 523 | pub res: i32, 524 | pub flags: u32, 525 | } 526 | -------------------------------------------------------------------------------- /io-uring-sys/src/macros.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! static_assert { 3 | (let $e:expr; ) => ( 4 | struct _ArrayForStaticAssert([i8; ($e) as usize - 1]); 5 | ); 6 | 7 | (let $e:expr; $e1:expr $(, $ee:expr)*) => ( 8 | static_assert!(let ($e) && ($e1); $($ee),*); 9 | ); 10 | 11 | ($e:expr $(, $ee:expr)*) => ( 12 | static_assert!(let true && ($e); $($ee),*); 13 | ); 14 | } 15 | -------------------------------------------------------------------------------- /io-uring-sys/src/submission.rs: -------------------------------------------------------------------------------- 1 | // additional code that makes it easier to handle submissions 2 | 3 | use crate::*; 4 | 5 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 6 | #[repr(u8)] 7 | pub enum IoPriorityLevel { 8 | Level0 = 0, 9 | Level1 = 1, 10 | Level2 = 2, 11 | Level3 = 3, 12 | Level4 = 4, 13 | Level5 = 5, 14 | Level6 = 6, 15 | Level7 = 7, 16 | } 17 | 18 | impl IoPriorityLevel { 19 | pub fn try_from(v: u8) -> Option { 20 | Some(match v { 21 | 0 => IoPriorityLevel::Level0, 22 | 1 => IoPriorityLevel::Level1, 23 | 2 => IoPriorityLevel::Level2, 24 | 3 => IoPriorityLevel::Level3, 25 | 4 => IoPriorityLevel::Level4, 26 | 5 => IoPriorityLevel::Level5, 27 | 6 => IoPriorityLevel::Level6, 28 | 7 => IoPriorityLevel::Level7, 29 | _ => return None, 30 | }) 31 | } 32 | } 33 | 34 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 35 | pub enum IoPriority { 36 | None, 37 | Realtime(IoPriorityLevel), 38 | BestEffort(IoPriorityLevel), 39 | Idle, 40 | } 41 | 42 | impl IoPriority { 43 | pub fn try_from(e: EncodedIoPriority) -> Option { 44 | Some(match e.0 >> 13 { 45 | 0 => IoPriority::None, 46 | 1 => IoPriority::Realtime(IoPriorityLevel::try_from(e.0 as u8)?), 47 | 2 => IoPriority::BestEffort(IoPriorityLevel::try_from(e.0 as u8)?), 48 | 3 => IoPriority::Idle, 49 | _ => return None, 50 | }) 51 | } 52 | } 53 | 54 | impl Default for IoPriority { 55 | fn default() -> Self { 56 | IoPriority::None 57 | } 58 | } 59 | 60 | impl Into for IoPriority { 61 | fn into(self) -> EncodedIoPriority { 62 | EncodedIoPriority(match self { 63 | IoPriority::None => 0 << 13, 64 | IoPriority::Realtime(l) => (1 << 13) | ((l as u8) as u16), 65 | IoPriority::BestEffort(l) => (2 << 13) | ((l as u8) as u16), 66 | IoPriority::Idle => 3 << 13, 67 | }) 68 | } 69 | } 70 | 71 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 72 | pub enum FileDescriptor { 73 | /// standard file descriptor 74 | FD(i32), 75 | /// index into previously registered list of fds 76 | Fixed(u32), 77 | } 78 | 79 | impl SubmissionEntry { 80 | pub fn clear(&mut self) { 81 | unsafe { 82 | *self = core::mem::zeroed(); 83 | } 84 | } 85 | 86 | fn iov(&mut self, op: Operation, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, iov: *const [libc::iovec]) { 87 | self.opcode = op.into(); 88 | self.flags = Default::default(); 89 | self.ioprio = prio.into(); 90 | match fd { 91 | FileDescriptor::FD(fd) => self.fd = fd, 92 | FileDescriptor::Fixed(ndx) => { 93 | self.flags |= SubmissionEntryFlags::FIXED_FILE; 94 | self.fd = ndx as i32; 95 | } 96 | } 97 | self.off = offset; 98 | self.addr = unsafe { (*iov).as_ptr() } as usize as u64; 99 | self.len = unsafe { (*iov).len() } as u32; 100 | self.op_flags = flags.into(); 101 | unsafe { 102 | self.extra.fixed.buf_index = 0; 103 | } 104 | } 105 | 106 | // iov needs to live until operation is completed! (as the kernel might submit the request "async") 107 | pub unsafe fn readv(&mut self, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, iov: *const [libc::iovec]) { 108 | self.iov(Operation::Readv, prio, fd, offset, flags, iov); 109 | } 110 | 111 | // iov needs to live until operation is completed! (as the kernel might submit the request "async") 112 | pub unsafe fn writev(&mut self, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, iov: *const [libc::iovec]) { 113 | self.iov(Operation::Writev, prio, fd, offset, flags, iov); 114 | } 115 | 116 | fn io_fixed(&mut self, op: Operation, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, buf_index: u16, buf: *const [u8]) { 117 | self.opcode = op.into(); 118 | self.flags = Default::default(); 119 | self.ioprio = prio.into(); 120 | match fd { 121 | FileDescriptor::FD(fd) => self.fd = fd, 122 | FileDescriptor::Fixed(ndx) => { 123 | self.flags |= SubmissionEntryFlags::FIXED_FILE; 124 | self.fd = ndx as i32; 125 | } 126 | } 127 | self.off = offset; 128 | self.addr = unsafe { (*buf).as_ptr() } as usize as u64; 129 | self.len = unsafe { (*buf).len() } as u32; 130 | self.op_flags = flags.into(); 131 | unsafe { 132 | self.extra.fixed.buf_index = buf_index; 133 | } 134 | } 135 | 136 | // buf must be a sub-slice of the buffer registered at the given index 137 | pub unsafe fn read_fixed(&mut self, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, buf_index: u16, buf: *const [u8]) { 138 | self.io_fixed(Operation::ReadFixed, prio, fd, offset, flags, buf_index, buf); 139 | } 140 | 141 | // buf must be a sub-slice of the buffer registered at the given index 142 | pub unsafe fn write_fixed(&mut self, prio: IoPriority, fd: FileDescriptor, offset: u64, flags: ReadWriteFlags, buf_index: u16, buf: *const [u8]) { 143 | self.io_fixed(Operation::WriteFixed, prio, fd, offset, flags, buf_index, buf); 144 | } 145 | 146 | pub fn fsync_full(&mut self, fd: FileDescriptor, flags: FsyncFlags) { 147 | self.fsync(fd, flags, 0, 0); 148 | } 149 | 150 | // if offset + len == 0 it syncs until end of file 151 | // right now it seems to require FsyncFlags::DATASYNC to be set. 152 | pub fn fsync(&mut self, fd: FileDescriptor, flags: FsyncFlags, offset: u64, len: u32) { 153 | self.opcode = Operation::Fsync.into(); 154 | self.flags = Default::default(); 155 | self.ioprio = EncodedIoPriority(0); 156 | match fd { 157 | FileDescriptor::FD(fd) => self.fd = fd, 158 | FileDescriptor::Fixed(ndx) => { 159 | self.flags |= SubmissionEntryFlags::FIXED_FILE; 160 | self.fd = ndx as i32; 161 | } 162 | } 163 | self.off = offset; 164 | self.addr = 0; 165 | self.len = len; 166 | self.op_flags = flags.into(); 167 | unsafe { 168 | self.extra.fixed.buf_index = 0; 169 | } 170 | } 171 | 172 | // The CQE `res` will contain the mask with "ready" eventy flags 173 | pub fn poll_add(&mut self, fd: FileDescriptor, flags: PollFlags) { 174 | self.opcode = Operation::PollAdd.into(); 175 | self.flags = Default::default(); 176 | self.ioprio = EncodedIoPriority(0); 177 | match fd { 178 | FileDescriptor::FD(fd) => self.fd = fd, 179 | FileDescriptor::Fixed(ndx) => { 180 | self.flags |= SubmissionEntryFlags::FIXED_FILE; 181 | self.fd = ndx as i32; 182 | } 183 | } 184 | self.off = 0; 185 | self.addr = 0; 186 | self.len = 0; 187 | self.op_flags = flags.into(); 188 | unsafe { 189 | self.extra.fixed.buf_index = 0; 190 | } 191 | } 192 | 193 | // the PollRemove operation will still complete (possibly with an empty mask) 194 | pub fn poll_remove(&mut self, match_user_data: u64) { 195 | self.opcode = Operation::PollRemove.into(); 196 | self.flags = Default::default(); 197 | self.ioprio = EncodedIoPriority(0); 198 | self.fd = 0; 199 | self.off = 0; 200 | self.addr = match_user_data; 201 | self.len = 0; 202 | self.op_flags = 0u32.into(); 203 | unsafe { 204 | self.extra.fixed.buf_index = 0; 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /io-uring/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "io-uring" 3 | version = "0.1.0" 4 | authors = ["Stefan Bühler "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | io-uring-sys = { path = "../io-uring-sys" } 9 | libc = "0.2.51" 10 | log = "0.4.6" 11 | -------------------------------------------------------------------------------- /io-uring/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod mmap; 2 | 3 | use std::sync::atomic::{ 4 | Ordering, 5 | AtomicU32, 6 | }; 7 | use std::io; 8 | use std::os::unix::io::{ 9 | AsRawFd, 10 | FromRawFd, 11 | IntoRawFd, 12 | RawFd, 13 | }; 14 | use std::mem::size_of; 15 | 16 | pub use io_uring_sys::*; 17 | use crate::mmap::MappedMemory; 18 | 19 | pub struct Uring { 20 | file: UringFile, 21 | sq: SubmissionQueue, 22 | cq: CompletionQueue, 23 | } 24 | 25 | impl Uring { 26 | pub fn new(entries: u32, mut params: SetupParameters) -> io::Result { 27 | let file = UringFile::new(entries, &mut params)?; 28 | let sq = SubmissionQueue::new(&file, ¶ms.sq_off, params.sq_entries)?; 29 | let cq = CompletionQueue::new(&file, ¶ms.cq_off, params.cq_entries)?; 30 | 31 | Ok(Uring { 32 | file, 33 | sq, 34 | cq, 35 | }) 36 | } 37 | 38 | pub fn file(&mut self) -> &mut UringFile { 39 | &mut self.file 40 | } 41 | 42 | pub fn submission_queue(&mut self) -> &mut SubmissionQueue { 43 | &mut self.sq 44 | } 45 | 46 | pub fn completion_queue(&mut self) -> &mut CompletionQueue { 47 | &mut self.cq 48 | } 49 | } 50 | 51 | // the purpose of the indirection (index array -> sces) is not quite 52 | // clear; we don't need it and will keep the index simple, i.e. entry i 53 | // (masked) in ring will point at i in sces. 54 | pub struct SubmissionQueue { 55 | _mmap: MappedMemory, 56 | 57 | // `head` is controlled by kernel. we only need to update it if it 58 | // seems the ring is full. 59 | k_head: &'static AtomicU32, 60 | cached_head: u32, 61 | // `tail` is controlled by us; we can stage multiple entries and 62 | // then flush them in one update 63 | k_tail: &'static AtomicU32, 64 | local_tail: u32, 65 | 66 | // `ring_mask` and `ring_entries` are const, so only read them once. 67 | ring_mask: u32, 68 | // k_ring_mask: *const u32, 69 | // ring_entries: u32, 70 | // k_ring_entries: *const u32, 71 | 72 | // `flags`: the only flag so far is controlled by the kernel 73 | k_flags: &'static AtomicU32, // SubmissionQueueFlags 74 | 75 | // `dropped` counter of invalid submissions (out-of-bound index in array) 76 | k_dropped: &'static AtomicU32, 77 | 78 | // index array; kernel only reads this, so we can init it once 79 | // k_array: *mut u32, 80 | 81 | // points to [SubmissionEntry; ring_entries] 82 | _mmap_entries: MappedMemory, 83 | sces: *mut SubmissionEntry, 84 | } 85 | 86 | impl SubmissionQueue { 87 | fn new(file: &UringFile, offsets: &SubmissionQueueRingOffsets, sq_entries: u32) -> io::Result { 88 | let mmap = MappedMemory::map( 89 | file.as_raw_fd(), 90 | SetupParameters::SUBMISSION_QUEUE_RING_OFFSET, 91 | (offsets.array as usize) + size_of::() * (sq_entries as usize), 92 | )?; 93 | let mmap_entries = MappedMemory::map( 94 | file.as_raw_fd(), 95 | SetupParameters::SUBMISSION_QUEUE_ENTRIES_OFFSET, 96 | size_of::() * (sq_entries as usize), 97 | )?; 98 | let k_head: &AtomicU32 = unsafe { &*mmap.get_field(offsets.head) }; 99 | let cached_head = k_head.load(Ordering::Relaxed); 100 | let k_tail: &AtomicU32 = unsafe { &*mmap.get_field(offsets.tail) }; 101 | let local_tail = k_tail.load(Ordering::Relaxed); 102 | let k_ring_mask: *mut u32 = mmap.get_field(offsets.ring_mask); 103 | let ring_mask = unsafe { *k_ring_mask }; 104 | let k_ring_entries: *mut u32 = mmap.get_field(offsets.ring_entries); 105 | let ring_entries = unsafe { *k_ring_entries }; 106 | let k_flags: &AtomicU32 = unsafe { &*mmap.get_field(offsets.flags) }; 107 | let k_dropped: &AtomicU32 = unsafe { &*mmap.get_field(offsets.dropped) }; 108 | let k_array: *mut u32 = mmap.get_field(offsets.array); 109 | let sces: *mut SubmissionEntry = mmap_entries.as_mut_ptr() as *mut SubmissionEntry; 110 | 111 | assert_eq!(sq_entries, ring_entries); 112 | assert!(ring_entries.is_power_of_two()); 113 | assert_eq!(ring_mask, ring_entries - 1); 114 | 115 | // initialize index array to identity map: i -> i. 116 | for i in 0..ring_entries { 117 | unsafe { *k_array.add(i as usize) = i }; 118 | } 119 | 120 | Ok(SubmissionQueue { 121 | _mmap: mmap, 122 | k_head, 123 | cached_head, 124 | k_tail, 125 | local_tail, 126 | ring_mask, 127 | // ring_entries, 128 | k_flags, 129 | k_dropped, 130 | _mmap_entries: mmap_entries, 131 | sces, 132 | }) 133 | } 134 | 135 | fn flush_tail(&mut self) { 136 | log::trace!("SQ updating tail: {}", self.local_tail); 137 | self.k_tail.store(self.local_tail, Ordering::Release); 138 | } 139 | 140 | fn head(&mut self) -> u32 { 141 | if self.cached_head != self.local_tail { 142 | // ring not empty 143 | if 0 == ((self.cached_head ^ self.local_tail) & self.ring_mask) { 144 | // head and tail point to same entry, potentially full; refresh cache 145 | self.refresh_head(); 146 | } 147 | } 148 | self.cached_head 149 | } 150 | 151 | fn refresh_head(&mut self) -> u32 { 152 | self.cached_head = self.k_head.load(Ordering::Acquire); 153 | log::trace!("Refreshed SQ head: {}", self.cached_head); 154 | self.cached_head 155 | } 156 | 157 | pub fn is_full(&mut self) -> bool { 158 | let head = self.head(); 159 | (head != self.local_tail) // not empty 160 | && 0 == ((self.cached_head ^ self.local_tail) & self.ring_mask) // point to same entry 161 | } 162 | 163 | pub fn bulk(&mut self) -> BulkSubmission { 164 | BulkSubmission(self) 165 | } 166 | 167 | pub fn flags(&self) -> SubmissionQueueFlags { 168 | SubmissionQueueFlags::from_bits_truncate(self.k_flags.load(Ordering::Relaxed)) 169 | } 170 | 171 | pub fn dropped(&self) -> u32 { 172 | self.k_dropped.load(Ordering::Relaxed) 173 | } 174 | 175 | pub fn has_pending_submissions(&mut self) -> bool { 176 | self.refresh_head() != self.local_tail 177 | } 178 | 179 | pub fn pending_submissions(&mut self) -> u32 { 180 | self.local_tail - self.refresh_head() 181 | } 182 | } 183 | 184 | #[derive(Debug)] 185 | pub enum SubmissionError { 186 | QueueFull, 187 | FillError(E), 188 | } 189 | 190 | impl std::fmt::Display for SubmissionError { 191 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 192 | match self { 193 | SubmissionError::QueueFull => write!(f, "submission error: queue full"), 194 | SubmissionError::FillError(e) => write!(f, "submission error: {}", e), 195 | } 196 | } 197 | } 198 | 199 | impl std::error::Error for SubmissionError { 200 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 201 | match self { 202 | SubmissionError::QueueFull => None, 203 | SubmissionError::FillError(e) => Some(e), 204 | } 205 | } 206 | } 207 | 208 | pub struct BulkSubmission<'a>(&'a mut SubmissionQueue); 209 | 210 | impl BulkSubmission<'_> { 211 | pub fn is_full(&mut self) -> bool { 212 | self.0.is_full() 213 | } 214 | 215 | pub fn submit_with(&mut self, f: F) -> Result<(), SubmissionError> 216 | where 217 | F: FnOnce(&mut SubmissionEntry) -> Result<(), E> 218 | { 219 | if self.is_full() { return Err(SubmissionError::QueueFull); } 220 | let ndx = self.0.local_tail; 221 | let entry = unsafe { &mut *self.0.sces.add((ndx & self.0.ring_mask) as usize) }; 222 | entry.clear(); 223 | f(entry).map_err(SubmissionError::FillError)?; 224 | log::debug!("Submit: @{} -> {:?}", ndx, entry); 225 | self.0.local_tail = ndx.wrapping_add(1); 226 | Ok(()) 227 | } 228 | } 229 | 230 | impl Drop for BulkSubmission<'_> { 231 | fn drop(&mut self) { 232 | self.0.flush_tail(); 233 | } 234 | } 235 | 236 | pub struct CompletionQueue { 237 | _mmap: MappedMemory, 238 | 239 | // updated by userspace; increment after entry at head was read; for 240 | // bulk reading only increment local_head and write back alter 241 | k_head: &'static AtomicU32, 242 | local_head: u32, 243 | 244 | // updated by kernel; update cached value once (cached) head reaches 245 | // cached tail. 246 | k_tail: &'static AtomicU32, 247 | cached_tail: u32, 248 | 249 | // k_ring_mask: *mut u32, 250 | ring_mask: u32, 251 | 252 | // k_ring_entries: *mut u32, 253 | // ring_entries: u32, 254 | 255 | k_overflow: &'static AtomicU32, 256 | k_cqes: *mut CompletionEntry, 257 | } 258 | 259 | impl CompletionQueue { 260 | fn new(file: &UringFile, offsets: &CompletionQueueRingOffsets, cq_entries: u32) -> io::Result { 261 | let mmap = MappedMemory::map( 262 | file.as_raw_fd(), 263 | SetupParameters::COMPLETION_QUEUE_RING_OFFSET, 264 | (offsets.cqes as usize) + size_of::() * (cq_entries as usize), 265 | )?; 266 | let k_head: &AtomicU32 = unsafe { &*mmap.get_field(offsets.head) }; 267 | let local_head = k_head.load(Ordering::Relaxed); 268 | let k_tail: &AtomicU32 = unsafe { &*mmap.get_field(offsets.tail) }; 269 | let cached_tail = k_tail.load(Ordering::Relaxed); 270 | let k_ring_mask: *mut u32 = mmap.get_field(offsets.ring_mask); 271 | let ring_mask = unsafe { *k_ring_mask }; 272 | let k_ring_entries: *mut u32 = mmap.get_field(offsets.ring_entries); 273 | let ring_entries = unsafe { *k_ring_entries }; 274 | let k_overflow: &AtomicU32 = unsafe { &*mmap.get_field(offsets.overflow) }; 275 | let k_cqes: *mut CompletionEntry = mmap.get_field(offsets.cqes); 276 | 277 | assert_eq!(cq_entries, ring_entries); 278 | assert!(ring_entries.is_power_of_two()); 279 | assert_eq!(ring_mask, ring_entries - 1); 280 | 281 | Ok(CompletionQueue { 282 | _mmap: mmap, 283 | k_head, 284 | local_head, 285 | k_tail, 286 | cached_tail, 287 | ring_mask, 288 | // ring_entries, 289 | k_overflow, 290 | k_cqes, 291 | }) 292 | } 293 | 294 | fn flush_head(&mut self) { 295 | self.k_head.store(self.local_head, Ordering::Release); 296 | } 297 | 298 | fn refresh_tail(&mut self) -> u32 { 299 | self.cached_tail = self.k_tail.load(Ordering::Acquire); 300 | self.cached_tail 301 | } 302 | 303 | fn is_empty(&mut self) -> bool { 304 | if self.cached_tail == self.local_head { 305 | self.refresh_tail(); 306 | self.cached_tail == self.local_head 307 | } else { 308 | false 309 | } 310 | } 311 | 312 | pub fn overflow(&mut self) -> u32 { 313 | self.k_overflow.load(Ordering::Relaxed) 314 | } 315 | } 316 | 317 | impl<'a> IntoIterator for &'a mut CompletionQueue { 318 | type Item = CompletionEntry; 319 | type IntoIter = BulkCompletion<'a>; 320 | 321 | fn into_iter(self) -> Self::IntoIter { 322 | BulkCompletion(self) 323 | } 324 | } 325 | 326 | pub struct BulkCompletion<'a>(&'a mut CompletionQueue); 327 | 328 | impl Iterator for BulkCompletion<'_> { 329 | type Item = CompletionEntry; 330 | 331 | fn next(&mut self) -> Option { 332 | if self.0.is_empty() { return None; } 333 | let ndx = self.0.local_head; 334 | let item = unsafe { &*self.0.k_cqes.add((ndx & self.0.ring_mask) as usize) }.clone(); 335 | self.0.local_head = ndx.wrapping_add(1); 336 | log::debug!("Completed: @{} -> {:?}", ndx, item); 337 | Some(item) 338 | } 339 | } 340 | 341 | impl Drop for BulkCompletion<'_> { 342 | fn drop(&mut self) { 343 | self.0.flush_head(); 344 | } 345 | } 346 | 347 | pub struct UringFile(std::fs::File); 348 | 349 | impl UringFile { 350 | pub fn new(entries: u32, params: &mut SetupParameters) -> io::Result { 351 | let res = unsafe { 352 | io_uring_setup(entries, params) 353 | }; 354 | if res < 0 { 355 | return Err(io::Error::last_os_error()); 356 | } 357 | Ok(unsafe { Self::from_raw_fd(res) }) 358 | } 359 | 360 | pub fn enter(&mut self, to_submit: u32, min_complete: u32, flags: EnterFlags, sig: Option<&libc::sigset_t>) -> io::Result<()> { 361 | let sig = match sig { 362 | Some(sig) => sig as *const _, 363 | None => 0 as *const _, 364 | }; 365 | if unsafe { io_uring_enter(self.as_raw_fd(), to_submit, min_complete, flags.bits(), sig) } < 0 { 366 | Err(io::Error::last_os_error()) 367 | } else { 368 | Ok(()) 369 | } 370 | } 371 | 372 | /// can only register one list of buffers at once; needs an explicit 373 | /// unregister before registering the next list. 374 | /// 375 | /// unsafe because it passes raw pointers in the iovecs. 376 | pub unsafe fn register_buffers(&mut self, buffers: &[libc::iovec]) -> io::Result<()> { 377 | self.register(RegisterOpCode::REGISTER_BUFFERS, buffers.as_ptr() as *const _, buffers.len() as u32) 378 | } 379 | 380 | /// fails if there are currently no buffers registered. 381 | pub fn unregister_buffers(&mut self) -> io::Result<()> { 382 | unsafe { 383 | self.register(RegisterOpCode::UNREGISTER_BUFFERS, 0 as *const _, 0) 384 | } 385 | } 386 | 387 | /// can only register one list of fds at once; needs an explicit 388 | /// unregister before registering the next list. 389 | pub fn register_files(&mut self, fds: &[RawFd]) -> io::Result<()> { 390 | assert!(fds.len() <= u32::max_value() as usize); 391 | unsafe { 392 | self.register(RegisterOpCode::REGISTER_FILES, fds.as_ptr() as *const _, fds.len() as u32) 393 | } 394 | } 395 | 396 | /// fails if there is currently no fd set registered. 397 | pub fn unregister_files(&mut self) -> io::Result<()> { 398 | unsafe { 399 | self.register(RegisterOpCode::UNREGISTER_FILES, 0 as *const _, 0) 400 | } 401 | } 402 | 403 | pub unsafe fn register(&self, opcode: RegisterOpCode, arg: *const libc::c_void, nr_args: u32) -> io::Result<()> { 404 | if io_uring_register(self.as_raw_fd(), opcode.0, arg, nr_args) != 0 { 405 | Err(io::Error::last_os_error()) 406 | } else { 407 | Ok(()) 408 | } 409 | } 410 | } 411 | 412 | impl AsRawFd for UringFile { 413 | fn as_raw_fd(&self) -> RawFd { 414 | self.0.as_raw_fd() 415 | } 416 | } 417 | 418 | impl IntoRawFd for UringFile { 419 | fn into_raw_fd(self) -> RawFd { 420 | self.0.into_raw_fd() 421 | } 422 | } 423 | 424 | impl FromRawFd for UringFile { 425 | unsafe fn from_raw_fd(fd: RawFd) -> Self { 426 | UringFile(std::fs::File::from_raw_fd(fd)) 427 | } 428 | } 429 | -------------------------------------------------------------------------------- /io-uring/src/mmap.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::io; 3 | 4 | pub struct MappedMemory { 5 | addr: *mut libc::c_void, 6 | len: usize, 7 | } 8 | 9 | impl MappedMemory { 10 | pub fn map(fd: libc::c_int, offset: i64, len: usize) -> io::Result { 11 | let addr = unsafe { libc::mmap( 12 | 0 as *mut _, 13 | len, 14 | libc::PROT_READ | libc::PROT_WRITE, 15 | libc::MAP_SHARED | libc::MAP_POPULATE, 16 | fd, 17 | offset, 18 | ) }; 19 | if addr == libc::MAP_FAILED { 20 | Err(io::Error::last_os_error()) 21 | } else { 22 | Ok(MappedMemory { 23 | addr, 24 | len, 25 | }) 26 | } 27 | } 28 | 29 | pub fn as_mut_ptr(&self) -> *mut libc::c_void { 30 | self.addr 31 | } 32 | 33 | /* 34 | pub fn len(&self) -> usize { 35 | self.len 36 | } 37 | */ 38 | 39 | pub fn get_field(&self, offset: u32) -> *mut T { 40 | (self.addr as usize + (offset as usize)) as *mut T 41 | } 42 | } 43 | 44 | impl Drop for MappedMemory { 45 | fn drop(&mut self) { 46 | if self.len != 0 { 47 | if 0 != unsafe { libc::munmap(self.addr, self.len) } { 48 | log::error!( 49 | "munmap(0x{:x}, {}) failed: {}", 50 | self.addr as usize, 51 | self.len, 52 | std::io::Error::last_os_error(), 53 | ); 54 | } 55 | } 56 | } 57 | } 58 | 59 | impl fmt::Debug for MappedMemory { 60 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 61 | write!(f, "MappedMemory{{addr: 0x{:x}, len: {}}}", self.addr as usize, self.len) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /tokio-uring-reactor/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tokio-uring-reactor" 3 | version = "0.1.0" 4 | authors = ["Stefan Bühler "] 5 | edition = "2018" 6 | 7 | [features] 8 | nightly-async = ["futures-core-preview"] 9 | 10 | [dependencies] 11 | futures = "0.1.26" 12 | tokio-executor = "0.1.7" 13 | 14 | io-uring = { path = "../io-uring" } 15 | libc = "0.2.51" 16 | timerfd = "1.0.0" 17 | log = "0.4.6" 18 | 19 | futures-core-preview = { version = "=0.3.0-alpha.15", optional = true } 20 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/io.rs: -------------------------------------------------------------------------------- 1 | use std::os::unix::io::{AsRawFd, RawFd}; 2 | use std::rc::Rc; 3 | 4 | use crate::reactor::{ 5 | Handle, 6 | AsyncRead, 7 | AsyncWrite, 8 | }; 9 | 10 | pub trait SocketRead: AsRawFd + Sized { 11 | fn read>(self, handle: &Handle, buf: T) -> AsyncRead { 12 | handle.async_read(self, 0, buf) 13 | } 14 | 15 | fn split(self) -> (SplitRead, SplitWrite) 16 | where 17 | Self: SocketWrite, 18 | { 19 | split(self) 20 | } 21 | } 22 | 23 | pub trait SocketWrite: AsRawFd + Sized { 24 | fn write>(self, handle: &Handle, buf: T) -> AsyncWrite { 25 | handle.async_write(self, 0, buf) 26 | } 27 | } 28 | 29 | pub fn split(rw: T) -> (SplitRead, SplitWrite) { 30 | let rw = Rc::new(rw); 31 | (SplitRead(rw.clone()), SplitWrite(rw)) 32 | } 33 | 34 | #[derive(Debug)] 35 | pub struct SplitRead(Rc); 36 | 37 | impl AsRawFd for SplitRead { 38 | fn as_raw_fd(&self) -> RawFd { 39 | self.0.as_raw_fd() 40 | } 41 | } 42 | 43 | impl SocketRead for SplitRead { 44 | } 45 | 46 | #[derive(Debug)] 47 | pub struct SplitWrite(Rc); 48 | 49 | impl AsRawFd for SplitWrite { 50 | fn as_raw_fd(&self) -> RawFd { 51 | self.0.as_raw_fd() 52 | } 53 | } 54 | 55 | impl SocketWrite for SplitWrite { 56 | } 57 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod reactor; 2 | mod registration; 3 | mod unpark; 4 | pub mod io; 5 | pub mod net; 6 | 7 | pub use crate::{ 8 | reactor::{ 9 | Reactor, 10 | Handle, 11 | Unpark, 12 | }, 13 | }; 14 | 15 | pub fn with_default(_handle: &Handle, enter: &mut tokio_executor::Enter, f: F) -> R 16 | where 17 | F: FnOnce(&mut tokio_executor::Enter) -> R, 18 | { 19 | // TODO: some enter construction? 20 | f(enter) 21 | } 22 | 23 | fn set_non_block(fd: libc::c_int) { 24 | unsafe { 25 | let flags = libc::fcntl(fd, libc::F_GETFL, 0); 26 | if 0 == flags & libc::O_NONBLOCK { 27 | libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/net.rs: -------------------------------------------------------------------------------- 1 | use std::net; 2 | use std::io; 3 | use std::os::unix::io::{RawFd, AsRawFd}; 4 | 5 | #[cfg(feature = "nightly-async")] 6 | use std::{ 7 | pin::Pin, 8 | future::Future, 9 | task::Poll, 10 | task::Context, 11 | }; 12 | #[cfg(feature = "nightly-async")] 13 | use futures_core; 14 | 15 | use crate::Handle; 16 | 17 | #[derive(Debug)] 18 | pub struct TcpListener(net::TcpListener); 19 | 20 | impl TcpListener { 21 | pub fn incoming(self, handle: &Handle) -> Incoming { 22 | let fd = self.0.as_raw_fd(); 23 | Incoming { 24 | inner: self, 25 | blocked: true, // poll first 26 | poll: handle.async_poll(fd, io_uring::PollFlags::IN), 27 | } 28 | } 29 | } 30 | 31 | impl From for TcpListener { 32 | fn from(l: net::TcpListener) -> Self { 33 | crate::set_non_block(l.as_raw_fd()); 34 | TcpListener(l) 35 | } 36 | } 37 | 38 | #[must_use = "streams do nothing unless polled"] 39 | #[derive(Debug)] 40 | pub struct Incoming { 41 | inner: TcpListener, 42 | blocked: bool, 43 | poll: crate::reactor::AsyncPoll, 44 | } 45 | 46 | impl futures::Stream for Incoming { 47 | type Item = (TcpStream, net::SocketAddr); 48 | type Error = io::Error; 49 | 50 | fn poll(&mut self) -> futures::Poll, Self::Error> { 51 | loop { 52 | if !self.blocked { 53 | match self.inner.0.accept() { 54 | Ok((s, a)) => return Ok(futures::Async::Ready(Some(( 55 | TcpStream(s), 56 | a, 57 | )))), 58 | Err(e) => { 59 | if e.kind() == io::ErrorKind::Interrupted { 60 | continue; // again 61 | } else if e.kind() == io::ErrorKind::WouldBlock { 62 | self.blocked = true; 63 | } else { 64 | return Err(e); 65 | } 66 | } 67 | } 68 | } 69 | match self.poll.poll()? { 70 | futures::Async::NotReady => return Ok(futures::Async::NotReady), 71 | futures::Async::Ready(None) => unreachable!(), 72 | futures::Async::Ready(Some(_events)) => { 73 | // println!("Incoming events: {:?}", _events); 74 | self.blocked = false; 75 | // try loop again 76 | }, 77 | } 78 | } 79 | } 80 | } 81 | 82 | #[cfg(feature = "nightly-async")] 83 | impl futures_core::Stream for Incoming { 84 | type Item = io::Result<(TcpStream, net::SocketAddr)>; 85 | 86 | fn poll_next(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll> { 87 | loop { 88 | if !self.blocked { 89 | match self.inner.0.accept() { 90 | Ok((s, a)) => return Poll::Ready(Some(Ok(( 91 | TcpStream(s), 92 | a, 93 | )))), 94 | Err(e) => { 95 | if e.kind() == io::ErrorKind::Interrupted { 96 | continue; // again 97 | } else if e.kind() == io::ErrorKind::WouldBlock { 98 | self.blocked = true; 99 | } else { 100 | return Poll::Ready(Some(Err(e))); 101 | } 102 | } 103 | } 104 | } 105 | match unsafe { Pin::new_unchecked(&mut self.poll) }.poll(ctx)? { 106 | Poll::Pending => return Poll::Pending, 107 | Poll::Ready(_events) => { 108 | // println!("Incoming events: {:?}", _events); 109 | self.blocked = false; 110 | // try loop again 111 | }, 112 | } 113 | } 114 | } 115 | } 116 | 117 | #[derive(Debug)] 118 | pub struct TcpStream(net::TcpStream); 119 | 120 | impl AsRawFd for TcpStream { 121 | fn as_raw_fd(&self) -> RawFd { 122 | self.0.as_raw_fd() 123 | } 124 | } 125 | 126 | impl crate::io::SocketRead for TcpStream {} 127 | impl crate::io::SocketWrite for TcpStream {} 128 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/reactor.rs: -------------------------------------------------------------------------------- 1 | mod async_poll; 2 | mod async_read; 3 | mod async_write; 4 | 5 | use std::{ 6 | cell::UnsafeCell, 7 | convert::Infallible, 8 | fmt, 9 | io, 10 | os::unix::io::{RawFd, AsRawFd}, 11 | pin::Pin, 12 | rc::{Rc, Weak}, 13 | time::Duration, 14 | }; 15 | 16 | use crate::{ 17 | registration::{ 18 | RawRegistration, 19 | UringResult, 20 | }, 21 | unpark, 22 | }; 23 | 24 | pub use self::async_poll::AsyncPoll; 25 | pub use self::async_read::AsyncRead; 26 | pub use self::async_write::AsyncWrite; 27 | 28 | fn iovec_from(data: &[u8]) -> libc::iovec { 29 | libc::iovec { 30 | iov_base: data.as_ptr() as *mut libc::c_void, 31 | iov_len: data.len(), 32 | } 33 | } 34 | 35 | fn iovec_empty() -> libc::iovec { 36 | libc::iovec { 37 | iov_base: 0 as *mut libc::c_void, 38 | iov_len: 0, 39 | } 40 | } 41 | 42 | fn sq_full_map_err(_error: io_uring::SubmissionError) -> io::Error { 43 | io::Error::new(io::ErrorKind::Other, "submission queue full") 44 | } 45 | 46 | pub struct Unpark(unpark::Unpark); 47 | 48 | impl tokio_executor::park::Unpark for Unpark { 49 | fn unpark(&self) { 50 | self.0.unpark(); 51 | } 52 | } 53 | 54 | // stuff we need to mutate during uring completion handling 55 | struct CompletionState { 56 | requeue_timer: bool, 57 | timer_pending: bool, 58 | requeue_park: bool, 59 | active_wait: usize, 60 | park: unpark::Park, 61 | } 62 | 63 | impl CompletionState { 64 | // special events must be "odd" 65 | const TIMER: u64 = 0x1; 66 | const PARK: u64 = 0x3; 67 | 68 | fn new() -> io::Result { 69 | Ok(CompletionState { 70 | requeue_timer: true, 71 | timer_pending: false, 72 | requeue_park: true, 73 | active_wait: 0, 74 | park: unpark::Park::new()?, 75 | }) 76 | } 77 | 78 | fn handle_completion(&mut self, user_data: u64, result: UringResult) { 79 | if 0 == user_data { 80 | // fire-and-forget command (POLL_DEL) 81 | return; 82 | } 83 | self.active_wait -= 1; 84 | if 0 == user_data & 0x1 { 85 | let mut reg = unsafe { RawRegistration::from_user_data(user_data) }; 86 | reg.notify(result); 87 | } else { 88 | match user_data { 89 | CompletionState::TIMER => { 90 | // wakeup by timer, just requeue read and rearm/disable next turn 91 | self.requeue_timer = true; 92 | self.timer_pending = true; 93 | }, 94 | CompletionState::PARK => { 95 | // wakeup by park, just requeue read 96 | self.park.clear_event(); 97 | self.requeue_park = true; 98 | }, 99 | _ => panic!("unknown event: {}", user_data), 100 | } 101 | } 102 | } 103 | } 104 | 105 | struct Inner { 106 | // FIXME: on shutdown need to clear (wait for completion!) *at 107 | // least* internal operations before freeing memory 108 | uring: io_uring::Uring, 109 | completion_state: CompletionState, 110 | timerfd: timerfd::TimerFd, 111 | read_buf: [u8; 32], // for various wakeup mechanisms 112 | read_iovec: [libc::iovec; 1], 113 | } 114 | 115 | impl Inner { 116 | fn build() -> io::Result { 117 | let params = io_uring::SetupParameters::new(io_uring::SetupFlags::default()); 118 | 119 | Ok(Inner { 120 | uring: io_uring::Uring::new(4096, params)?, 121 | completion_state: CompletionState::new()?, 122 | timerfd: timerfd::TimerFd::new()?, 123 | read_buf: [0u8; 32], 124 | read_iovec: [ iovec_empty() ], 125 | }) 126 | } 127 | 128 | fn init(mut self: Pin<&mut Self>) { 129 | let iovec = iovec_from(&self.as_ref().read_buf); 130 | self.as_mut().read_iovec[0] = iovec; 131 | } 132 | 133 | // returns true if at least one completion was received 134 | fn check_completions(&mut self) -> bool { 135 | let mut received_completion: bool = false; 136 | 137 | for cqe in self.uring.completion_queue().into_iter() { 138 | received_completion = true; 139 | 140 | let result = UringResult { 141 | result: cqe.res, 142 | flags: cqe.flags, 143 | }; 144 | 145 | self.completion_state.handle_completion(cqe.user_data, result); 146 | } 147 | 148 | received_completion 149 | } 150 | 151 | fn park_inner(&mut self, mut wait: bool, timeout: Option) -> io::Result<()> { 152 | if self.check_completions() { 153 | // don't wait for new events below; we first need to handle this one 154 | wait = false; 155 | } 156 | 157 | // proper check later, but don't need to setup various things if 158 | // we already know we're not going to wait 159 | if self.completion_state.park.pending() { 160 | wait = false; 161 | } 162 | 163 | if wait { 164 | // set timer before we requeue it 165 | if let Some(timeout) = timeout { 166 | log::trace!("wait with timeout: {:?}", timeout); 167 | debug_assert!(timeout != Duration::new(0, 0)); // "zero" timer must trigger wait = false 168 | self.timerfd.set_state(timerfd::TimerState::Oneshot(timeout), timerfd::SetTimeFlags::Default); 169 | self.completion_state.timer_pending = false; 170 | } else { 171 | log::trace!("wait without timeout"); 172 | if self.completion_state.timer_pending { 173 | // disarm timer after it triggered (instead of reading it) 174 | self.timerfd.set_state(timerfd::TimerState::Disarmed, timerfd::SetTimeFlags::Default); 175 | self.completion_state.timer_pending = false; 176 | } 177 | } 178 | 179 | if self.completion_state.requeue_timer { 180 | if self.queue_timer_poll().is_err() { 181 | // never wait if submission queue is full and we couldn't insert timer 182 | wait = false; 183 | } else { 184 | self.completion_state.requeue_timer = false; 185 | } 186 | } 187 | } 188 | 189 | if wait && self.completion_state.requeue_park { 190 | if self.queue_park_read().is_err() { 191 | // never wait if submission queue is full and we couldn't insert park 192 | wait = false; 193 | } else { 194 | self.completion_state.requeue_park = false; 195 | } 196 | } 197 | 198 | let pending = self.uring.submission_queue().pending_submissions(); 199 | 200 | { 201 | let park_enter = self.completion_state.park.enter(); 202 | if !park_enter.allow_wait { 203 | wait = false; 204 | } 205 | 206 | if 0 == pending && !wait { 207 | log::trace!("nothing to submit and not waiting, not calling io_uring_enter"); 208 | return Ok(()); 209 | } 210 | 211 | let (min_complete, flags) = if wait { 212 | (1, io_uring::EnterFlags::GETEVENTS) 213 | } else { 214 | // submit only 215 | (0, io_uring::EnterFlags::default()) 216 | }; 217 | 218 | log::trace!( 219 | "io_uring_enter: (to_submit = {}, min_complete = {}, flags = {:?}, sig = None)", 220 | pending, 221 | min_complete, 222 | flags, 223 | ); 224 | self.uring.file().enter(pending, min_complete, flags, None)?; 225 | } 226 | 227 | self.check_completions(); 228 | 229 | Ok(()) 230 | } 231 | 232 | fn park(&mut self) -> io::Result<()> { 233 | self.park_inner(true, None) 234 | } 235 | 236 | fn park_timeout(&mut self, duration: Duration) -> io::Result<()> { 237 | if duration == Duration::new(0, 0) { 238 | // don't wait at all 239 | self.park_inner(false, None) 240 | } else { 241 | self.park_inner(true, Some(duration)) 242 | } 243 | } 244 | 245 | fn queue_timer_poll(&mut self) -> Result<(), io_uring::SubmissionError> { 246 | let fd = self.timerfd.as_raw_fd(); 247 | self.uring.submission_queue().bulk().submit_with(|entry| { 248 | entry.poll_add( 249 | io_uring::FileDescriptor::FD(fd), 250 | io_uring::PollFlags::IN, 251 | ); 252 | entry.user_data = CompletionState::TIMER; 253 | Ok(()) 254 | })?; 255 | self.completion_state.active_wait += 1; 256 | Ok(()) 257 | } 258 | 259 | fn queue_park_read(&mut self) -> Result<(), io_uring::SubmissionError> { 260 | let fd = self.completion_state.park.as_raw_fd(); 261 | //let iovec = &self.read_iovec; 262 | self.uring.submission_queue().bulk().submit_with(|entry| { 263 | entry.poll_add( 264 | io_uring::FileDescriptor::FD(fd), 265 | io_uring::PollFlags::IN, 266 | ); 267 | 268 | /* 269 | unsafe { 270 | entry.readv( 271 | io_uring::IoPriority::None, 272 | io_uring::FileDescriptor::FD(fd), 273 | 0, 274 | io_uring::ReadWriteFlags::default(), 275 | iovec, 276 | ); 277 | } 278 | */ 279 | 280 | entry.user_data = CompletionState::PARK; 281 | Ok(()) 282 | })?; 283 | self.completion_state.active_wait += 1; 284 | Ok(()) 285 | } 286 | 287 | fn queue_async_read(&mut self, fd: RawFd, offset: u64, iovec: *const [libc::iovec], reg: RawRegistration) -> io::Result<()> { 288 | self.uring.submission_queue().bulk().submit_with(|entry| { 289 | unsafe { 290 | entry.readv( 291 | io_uring::IoPriority::None, 292 | io_uring::FileDescriptor::FD(fd), 293 | offset, 294 | io_uring::ReadWriteFlags::default(), 295 | iovec, 296 | ); 297 | entry.user_data = reg.into_user_data(); 298 | } 299 | Ok(()) 300 | }).map_err(sq_full_map_err)?; 301 | self.completion_state.active_wait += 1; 302 | Ok(()) 303 | } 304 | 305 | fn queue_async_write(&mut self, fd: RawFd, offset: u64, iovec: *const [libc::iovec], reg: RawRegistration) -> io::Result<()> { 306 | self.uring.submission_queue().bulk().submit_with(|entry| { 307 | unsafe { 308 | entry.writev( 309 | io_uring::IoPriority::None, 310 | io_uring::FileDescriptor::FD(fd), 311 | offset, 312 | io_uring::ReadWriteFlags::default(), 313 | iovec, 314 | ); 315 | entry.user_data = reg.into_user_data(); 316 | } 317 | Ok(()) 318 | }).map_err(sq_full_map_err)?; 319 | self.completion_state.active_wait += 1; 320 | Ok(()) 321 | } 322 | 323 | fn queue_async_poll(&mut self, fd: RawFd, flags: io_uring::PollFlags, reg: RawRegistration) -> io::Result<()> { 324 | self.uring.submission_queue().bulk().submit_with(|entry| { 325 | unsafe { 326 | entry.poll_add( 327 | io_uring::FileDescriptor::FD(fd), 328 | flags, 329 | ); 330 | entry.user_data = reg.into_user_data(); 331 | } 332 | Ok(()) 333 | }).map_err(sq_full_map_err)?; 334 | self.completion_state.active_wait += 1; 335 | Ok(()) 336 | } 337 | } 338 | 339 | struct InnerMut { 340 | inner: Rc>, 341 | } 342 | 343 | impl InnerMut { 344 | fn pinned(&mut self) -> Pin<&mut Inner> { 345 | let ptr: *mut Inner = self.inner.get(); 346 | unsafe { Pin::new_unchecked(&mut *ptr) } 347 | } 348 | } 349 | 350 | pub struct Reactor { 351 | inner: Rc>, 352 | } 353 | 354 | impl Reactor { 355 | fn build() -> io::Result { 356 | let inner = Rc::new(UnsafeCell::new(Inner::build()?)); 357 | Ok(Reactor { 358 | inner, 359 | }) 360 | } 361 | 362 | pub fn new() -> io::Result { 363 | let reactor = Self::build()?; 364 | reactor.inner_mut().pinned().init(); 365 | Ok(reactor) 366 | } 367 | 368 | fn inner_mut(&self) -> InnerMut { 369 | let inner = self.inner.clone(); 370 | InnerMut { 371 | inner 372 | } 373 | } 374 | 375 | pub fn handle(&self) -> Handle { 376 | Handle(Rc::downgrade(&self.inner)) 377 | } 378 | } 379 | 380 | impl tokio_executor::park::Park for Reactor { 381 | type Unpark = Unpark; 382 | 383 | type Error = io::Error; 384 | 385 | fn unpark(&self) -> Self::Unpark { 386 | Unpark(self.inner_mut().pinned().completion_state.park.unpark()) 387 | } 388 | 389 | fn park(&mut self) -> Result<(), Self::Error> { 390 | self.inner_mut().pinned().park() 391 | } 392 | 393 | fn park_timeout(&mut self, duration: std::time::Duration) -> Result<(), Self::Error> { 394 | self.inner_mut().pinned().park_timeout(duration) 395 | } 396 | } 397 | 398 | impl fmt::Debug for Reactor { 399 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 400 | write!(f, "Reactor {{..}}") 401 | } 402 | } 403 | 404 | 405 | #[derive(Clone)] 406 | pub struct Handle(Weak>); 407 | 408 | impl Handle { 409 | fn inner_mut(&self) -> io::Result { 410 | let inner = self.0.upgrade().ok_or_else(|| { 411 | io::Error::new(io::ErrorKind::Other, "uring reactor dead") 412 | })?; 413 | 414 | Ok(InnerMut { inner }) 415 | } 416 | 417 | pub fn async_read(&self, file: F, offset: u64, buf: T) -> AsyncRead 418 | where 419 | T: AsMut<[u8]> + 'static, 420 | F: AsRawFd + 'static, 421 | { 422 | AsyncRead::new(self, file, offset, buf) 423 | } 424 | 425 | pub fn async_write + 'static, F: AsRawFd + 'static>(&self, file: F, offset: u64, buf: T) -> AsyncWrite 426 | where 427 | T: AsRef<[u8]> + 'static, 428 | F: AsRawFd + 'static, 429 | { 430 | AsyncWrite::new(self, file, offset, buf) 431 | } 432 | 433 | pub fn async_poll(&self, fd: RawFd, flags: io_uring::PollFlags) -> AsyncPoll { 434 | AsyncPoll::new(self, fd, flags) 435 | } 436 | } 437 | 438 | impl fmt::Debug for Handle { 439 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 440 | write!(f, "Handle {{..}}") 441 | } 442 | } 443 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/reactor/async_poll.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io, 3 | os::unix::io::RawFd, 4 | }; 5 | 6 | use crate::{ 7 | reactor::{ 8 | Handle, 9 | }, 10 | registration::{ 11 | Registration, 12 | }, 13 | }; 14 | 15 | // TODO: Dropping AsyncPoll should trigger a POLL_DEL 16 | #[derive(Debug)] 17 | pub struct AsyncPoll { 18 | handle: Handle, 19 | fd: RawFd, 20 | active: bool, 21 | flags: io_uring::PollFlags, 22 | registration: Registration<()>, 23 | } 24 | 25 | impl AsyncPoll { 26 | pub fn new(handle: &Handle, fd: RawFd, flags: io_uring::PollFlags) -> AsyncPoll { 27 | let registration = Registration::new(()); 28 | 29 | AsyncPoll { 30 | active: false, 31 | handle: handle.clone(), 32 | fd, 33 | flags, 34 | registration, 35 | } 36 | } 37 | } 38 | 39 | impl futures::Stream for AsyncPoll { 40 | type Item = io_uring::PollFlags; 41 | type Error = io::Error; 42 | 43 | fn poll(&mut self) -> futures::Poll, Self::Error> { 44 | if !self.active { 45 | // println!("Register fd {} for events {:?}", self.fd, self.flags); 46 | let mut im = self.handle.inner_mut()?; 47 | im.pinned().queue_async_poll(self.fd, self.flags, self.registration.to_raw())?; 48 | self.active = true; 49 | self.registration.track(); 50 | return Ok(futures::Async::NotReady); 51 | } 52 | match self.registration.poll_stream_and_reset() { 53 | futures::Async::NotReady => Ok(futures::Async::NotReady), 54 | futures::Async::Ready(r) => { 55 | self.active = false; 56 | if r.result < 0 { 57 | return Err(io::Error::from_raw_os_error(-r.result)); 58 | } 59 | let flags = io_uring::PollFlags::from_bits_truncate(r.result as u16); 60 | Ok(futures::Async::Ready(Some(flags))) 61 | } 62 | } 63 | } 64 | } 65 | 66 | #[cfg(feature = "nightly-async")] 67 | use std::{ 68 | pin::Pin, 69 | future::Future, 70 | task::Poll, 71 | task::Context, 72 | }; 73 | 74 | #[cfg(feature = "nightly-async")] 75 | impl Future for AsyncPoll { 76 | type Output = io::Result; 77 | 78 | fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll { 79 | if !self.active { 80 | // println!("Register fd {} for events {:?}", self.fd, self.flags); 81 | let mut im = self.handle.inner_mut()?; 82 | im.pinned().queue_async_poll(self.fd, self.flags, self.registration.to_raw())?; 83 | self.active = true; 84 | self.registration.track_async(ctx.waker()); 85 | return Poll::Pending; 86 | } 87 | match self.registration.poll_stream_and_reset_async(ctx.waker()) { 88 | Poll::Pending => Poll::Pending, 89 | Poll::Ready(r) => { 90 | self.active = false; 91 | if r.result < 0 { 92 | return Poll::Ready(Err(io::Error::from_raw_os_error(-r.result))); 93 | } 94 | let flags = io_uring::PollFlags::from_bits_truncate(r.result as u16); 95 | Poll::Ready(Ok(flags)) 96 | } 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/reactor/async_read.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt, 3 | io, 4 | os::unix::io::{AsRawFd}, 5 | }; 6 | 7 | use crate::{ 8 | reactor::{ 9 | Handle, 10 | iovec_empty, 11 | iovec_from, 12 | }, 13 | registration::{ 14 | Registration, 15 | }, 16 | }; 17 | 18 | // #[non_exhaustive] TODO ? 19 | pub struct AsyncReadError { 20 | pub error: io::Error, 21 | pub buffer: T, 22 | pub file: F, 23 | } 24 | 25 | impl From> for io::Error { 26 | fn from(e: AsyncReadError) -> io::Error { 27 | e.error 28 | } 29 | } 30 | 31 | struct Context { 32 | iovec: [libc::iovec; 1], 33 | buffer: T, 34 | file: F, 35 | } 36 | 37 | impl Context { 38 | fn with_error(self, error: io::Error) -> AsyncReadError { 39 | AsyncReadError { 40 | error, 41 | buffer: self.buffer, 42 | file: self.file, 43 | } 44 | } 45 | } 46 | 47 | enum State { 48 | Pending(Registration>), 49 | InitFailed(AsyncReadError), 50 | Closed, 51 | } 52 | 53 | impl fmt::Debug for State { 54 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 55 | match self { 56 | State::Pending(ref p) => f.debug_tuple("Pending").field(p).finish(), 57 | State::InitFailed(ref e) => f.debug_tuple("InitFailed").field(&e.error).finish(), 58 | State::Closed => f.debug_tuple("Closed").finish(), 59 | } 60 | } 61 | } 62 | 63 | pub struct AsyncRead(State); 64 | 65 | impl AsyncRead { 66 | pub(super) fn new(handle: &Handle, file: F, offset: u64, buffer: T) -> AsyncRead 67 | where 68 | T: AsMut<[u8]> + 'static, 69 | F: AsRawFd + 'static, 70 | { 71 | let fd = file.as_raw_fd(); 72 | let context = Context { 73 | iovec: [ iovec_empty() ], // fill below 74 | buffer, 75 | file, 76 | }; 77 | 78 | let mut im = match handle.inner_mut() { 79 | Err(e) => return AsyncRead(State::InitFailed(context.with_error(e))), 80 | Ok(im) => im, 81 | }; 82 | 83 | // this "pins" buf, as the data is boxed 84 | let mut reg = Registration::new(context); 85 | let queue_result = { 86 | let iovec = unsafe { 87 | let d = reg.data_mut(); 88 | d.iovec[0] = iovec_from(d.buffer.as_mut()); 89 | &d.iovec 90 | }; 91 | 92 | im.pinned().queue_async_read(fd, offset, iovec, reg.to_raw()) 93 | }; 94 | if let Err(e) = queue_result { 95 | let context = reg.abort().expect("registration context"); 96 | return AsyncRead(State::InitFailed(context.with_error(e))); 97 | } 98 | AsyncRead(State::Pending(reg)) 99 | } 100 | } 101 | 102 | impl fmt::Debug for AsyncRead { 103 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 104 | f.debug_tuple("AsyncRead").field(&self.0).finish() 105 | } 106 | } 107 | 108 | impl futures::Future for AsyncRead { 109 | type Item = (usize, T, F); 110 | type Error = AsyncReadError; 111 | 112 | fn poll(&mut self) -> futures::Poll { 113 | match self.0 { 114 | State::Pending(ref mut p) => { 115 | match p.poll() { 116 | futures::Async::NotReady => Ok(futures::Async::NotReady), 117 | futures::Async::Ready((r, context)) => { 118 | let result = if r.result < 0 { 119 | Err(context.with_error(io::Error::from_raw_os_error(-r.result))) 120 | } else { 121 | Ok(futures::Async::Ready((r.result as usize, context.buffer, context.file))) 122 | }; 123 | std::mem::replace(&mut self.0, State::Closed); 124 | result 125 | } 126 | } 127 | }, 128 | _ => { 129 | match std::mem::replace(&mut self.0, State::Closed) { 130 | State::Pending(_) => unreachable!(), 131 | State::InitFailed(e) => Err(e), 132 | State::Closed => panic!("already finished"), 133 | } 134 | } 135 | } 136 | } 137 | } 138 | 139 | #[cfg(feature = "nightly-async")] 140 | use std::{ 141 | pin::Pin, 142 | task, 143 | future::Future, 144 | task::Poll, 145 | }; 146 | 147 | #[cfg(feature = "nightly-async")] 148 | impl Future for AsyncRead { 149 | type Output = Result<(usize, T, F), AsyncReadError>; 150 | 151 | fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { 152 | let this: &mut Self = &mut *self; 153 | match this.0 { 154 | State::Pending(ref mut p) => { 155 | match p.poll_async(ctx.waker()) { 156 | Poll::Pending => Poll::Pending, 157 | Poll::Ready((r, context)) => { 158 | let result = if r.result < 0 { 159 | Err(context.with_error(io::Error::from_raw_os_error(-r.result))) 160 | } else { 161 | Ok((r.result as usize, context.buffer, context.file)) 162 | }; 163 | std::mem::replace(&mut this.0, State::Closed); 164 | Poll::Ready(result) 165 | } 166 | } 167 | }, 168 | _ => { 169 | match std::mem::replace(&mut this.0, State::Closed) { 170 | State::Pending(_) => unreachable!(), 171 | State::InitFailed(e) => Poll::Ready(Err(e)), 172 | State::Closed => panic!("already finished"), 173 | } 174 | } 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/reactor/async_write.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt, 3 | io, 4 | os::unix::io::{AsRawFd}, 5 | }; 6 | 7 | use crate::{ 8 | reactor::{ 9 | Handle, 10 | iovec_empty, 11 | iovec_from, 12 | }, 13 | registration::{ 14 | Registration, 15 | }, 16 | }; 17 | 18 | // #[non_exhaustive] TODO ? 19 | pub struct AsyncWriteError { 20 | pub error: io::Error, 21 | pub buffer: T, 22 | pub file: F, 23 | } 24 | 25 | impl From> for io::Error { 26 | fn from(e: AsyncWriteError) -> io::Error { 27 | e.error 28 | } 29 | } 30 | 31 | struct Context { 32 | iovec: [libc::iovec; 1], 33 | buffer: T, 34 | file: F, 35 | } 36 | 37 | impl Context { 38 | fn with_error(self, error: io::Error) -> AsyncWriteError { 39 | AsyncWriteError { 40 | error, 41 | buffer: self.buffer, 42 | file: self.file, 43 | } 44 | } 45 | } 46 | 47 | enum State { 48 | Pending(Registration>), 49 | InitFailed(AsyncWriteError), 50 | Closed, 51 | } 52 | 53 | impl fmt::Debug for State { 54 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 55 | match self { 56 | State::Pending(ref p) => f.debug_tuple("Pending").field(p).finish(), 57 | State::InitFailed(ref e) => f.debug_tuple("InitFailed").field(&e.error).finish(), 58 | State::Closed => f.debug_tuple("Closed").finish(), 59 | } 60 | } 61 | } 62 | 63 | pub struct AsyncWrite(State); 64 | 65 | impl AsyncWrite { 66 | pub(super) fn new(handle: &Handle, file: F, offset: u64, buffer: T) -> AsyncWrite 67 | where 68 | T: AsRef<[u8]> + 'static, 69 | F: AsRawFd + 'static, 70 | { 71 | let fd = file.as_raw_fd(); 72 | let context = Context { 73 | iovec: [ iovec_empty() ], // fill below 74 | buffer, 75 | file, 76 | }; 77 | 78 | let mut im = match handle.inner_mut() { 79 | Err(e) => return AsyncWrite(State::InitFailed(context.with_error(e))), 80 | Ok(im) => im, 81 | }; 82 | 83 | // this "pins" buf, as the data is boxed 84 | let mut reg = Registration::new(context); 85 | let queue_result = { 86 | let iovec = unsafe { 87 | let d = reg.data_mut(); 88 | d.iovec[0] = iovec_from(d.buffer.as_ref()); 89 | &d.iovec 90 | }; 91 | 92 | im.pinned().queue_async_write(fd, offset, iovec, reg.to_raw()) 93 | }; 94 | if let Err(e) = queue_result { 95 | let context = reg.abort().expect("registration context"); 96 | return AsyncWrite(State::InitFailed(context.with_error(e))); 97 | } 98 | AsyncWrite(State::Pending(reg)) 99 | } 100 | } 101 | 102 | impl fmt::Debug for AsyncWrite { 103 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 104 | f.debug_tuple("AsyncWrite").field(&self.0).finish() 105 | } 106 | } 107 | 108 | impl futures::Future for AsyncWrite { 109 | type Item = (usize, T, F); 110 | type Error = AsyncWriteError; 111 | 112 | fn poll(&mut self) -> futures::Poll { 113 | match self.0 { 114 | State::Pending(ref mut p) => { 115 | match p.poll() { 116 | futures::Async::NotReady => Ok(futures::Async::NotReady), 117 | futures::Async::Ready((r, context)) => { 118 | let result = if r.result < 0 { 119 | Err(context.with_error(io::Error::from_raw_os_error(-r.result))) 120 | } else { 121 | Ok(futures::Async::Ready((r.result as usize, context.buffer, context.file))) 122 | }; 123 | std::mem::replace(&mut self.0, State::Closed); 124 | result 125 | } 126 | } 127 | } 128 | _ => { 129 | match std::mem::replace(&mut self.0, State::Closed) { 130 | State::Pending(_) => unreachable!(), 131 | State::InitFailed(e) => Err(e), 132 | State::Closed => panic!("already finished"), 133 | } 134 | } 135 | } 136 | } 137 | } 138 | 139 | #[cfg(feature = "nightly-async")] 140 | use std::{ 141 | pin::Pin, 142 | task, 143 | future::Future, 144 | task::Poll, 145 | }; 146 | 147 | #[cfg(feature = "nightly-async")] 148 | impl Future for AsyncWrite { 149 | type Output = Result<(usize, T, F), AsyncWriteError>; 150 | 151 | fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context) -> Poll { 152 | let this: &mut Self = &mut *self; 153 | match this.0 { 154 | State::Pending(ref mut p) => { 155 | match p.poll_async(ctx.waker()) { 156 | Poll::Pending => Poll::Pending, 157 | Poll::Ready((r, context)) => { 158 | let result = if r.result < 0 { 159 | Err(context.with_error(io::Error::from_raw_os_error(-r.result))) 160 | } else { 161 | Ok((r.result as usize, context.buffer, context.file)) 162 | }; 163 | std::mem::replace(&mut this.0, State::Closed); 164 | Poll::Ready(result) 165 | } 166 | } 167 | }, 168 | _ => { 169 | match std::mem::replace(&mut this.0, State::Closed) { 170 | State::Pending(_) => unreachable!(), 171 | State::InitFailed(e) => Poll::Ready(Err(e)), 172 | State::Closed => panic!("already finished"), 173 | } 174 | } 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/registration.rs: -------------------------------------------------------------------------------- 1 | use std::any::Any; 2 | use std::cell::UnsafeCell; 3 | use std::fmt; 4 | use std::marker::PhantomData; 5 | use std::rc::Rc; 6 | 7 | #[cfg(feature = "nightly-async")] 8 | use std::{ 9 | task::Waker, 10 | task::Poll, 11 | }; 12 | 13 | enum CompatWaker { 14 | Empty, 15 | Old(futures::task::Task), 16 | #[cfg(feature = "nightly-async")] 17 | New(Waker), 18 | } 19 | 20 | impl CompatWaker { 21 | fn notify(&mut self) { 22 | match std::mem::replace(self, CompatWaker::Empty) { 23 | CompatWaker::Empty => (), 24 | CompatWaker::Old(t) => t.notify(), 25 | #[cfg(feature = "nightly-async")] 26 | CompatWaker::New(w) => w.wake(), 27 | } 28 | } 29 | 30 | fn register_old(&mut self) { 31 | *self = CompatWaker::Old(futures::task::current()); 32 | } 33 | 34 | #[cfg(feature = "nightly-async")] 35 | fn register_new(&mut self, waker: &Waker) { 36 | *self = CompatWaker::New(waker.clone()); 37 | } 38 | } 39 | 40 | impl Default for CompatWaker { 41 | fn default() -> Self { 42 | CompatWaker::Empty 43 | } 44 | } 45 | 46 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)] 47 | pub struct UringResult { 48 | pub result: i32, 49 | pub flags: u32, 50 | } 51 | 52 | #[derive(Default)] 53 | struct Inner { 54 | result: UringResult, 55 | finished: bool, 56 | waker: UnsafeCell, 57 | data: Option>, 58 | } 59 | 60 | pub struct RawRegistration { 61 | inner: Rc>, 62 | } 63 | 64 | impl RawRegistration { 65 | pub fn notify(&mut self, result: UringResult) { 66 | let inner = unsafe { &mut *self.inner.get() }; 67 | assert!(!inner.finished); 68 | inner.finished = true; 69 | inner.result = result; 70 | let waker = unsafe { &mut *inner.waker.get() }; 71 | waker.notify(); 72 | } 73 | 74 | pub unsafe fn into_user_data(self) -> u64 { 75 | let user_data = Rc::into_raw(self.inner) as usize as u64; 76 | assert!(user_data != 0 && user_data & 0x1 == 0); 77 | user_data 78 | } 79 | 80 | pub unsafe fn from_user_data(data: u64) -> Self { 81 | RawRegistration { 82 | inner: Rc::from_raw(data as usize as *const UnsafeCell), 83 | } 84 | } 85 | } 86 | 87 | pub struct Registration { 88 | inner: Rc>, 89 | _data_type: PhantomData, 90 | } 91 | 92 | impl Registration { 93 | pub fn new(data: T) -> Self { 94 | let inner = Inner { 95 | data: Some(Box::new(data)), 96 | .. Inner::default() 97 | }; 98 | Registration { 99 | inner: Rc::new(UnsafeCell::new(inner)), 100 | _data_type: PhantomData, 101 | } 102 | } 103 | 104 | pub fn track(&mut self) { 105 | let inner = unsafe { &mut *self.inner.get() }; 106 | let waker = unsafe { &mut *inner.waker.get() }; 107 | waker.register_old(); 108 | } 109 | 110 | #[cfg(feature = "nightly-async")] 111 | pub fn track_async(&mut self, waker: &Waker) { 112 | let inner = unsafe { &mut *self.inner.get() }; 113 | let w = unsafe { &mut *inner.waker.get() }; 114 | w.register_new(waker); 115 | } 116 | 117 | pub fn poll(&mut self) -> futures::Async<(UringResult, T)> { 118 | let inner = unsafe { &mut *self.inner.get() }; 119 | if inner.data.is_none() { 120 | // or panic? can't become ready again 121 | return futures::Async::NotReady; 122 | } 123 | if inner.finished { 124 | let result = inner.result; 125 | let data = inner.data.take().expect("data").downcast::().expect("type"); 126 | futures::Async::Ready((result, *data)) 127 | } else { 128 | let waker = unsafe { &mut *inner.waker.get() }; 129 | waker.register_old(); 130 | futures::Async::NotReady 131 | } 132 | } 133 | 134 | #[cfg(feature = "nightly-async")] 135 | pub fn poll_async(&mut self, waker: &Waker) -> Poll<(UringResult, T)> { 136 | let inner = unsafe { &mut *self.inner.get() }; 137 | if inner.data.is_none() { 138 | // or panic? can't become ready again 139 | return Poll::Pending; 140 | } 141 | if inner.finished { 142 | let result = inner.result; 143 | let data = inner.data.take().expect("data").downcast::().expect("type"); 144 | Poll::Ready((result, *data)) 145 | } else { 146 | let w = unsafe { &mut *inner.waker.get() }; 147 | w.register_new(waker); 148 | Poll::Pending 149 | } 150 | } 151 | 152 | pub fn abort(self) -> Option { 153 | Some(*Rc::try_unwrap(self.inner).ok()?.into_inner().data.expect("data").downcast::().expect("type")) 154 | } 155 | 156 | pub fn user_data(&self) -> u64 { 157 | let user_data = &(*self.inner) as *const UnsafeCell as usize as u64; 158 | assert!(user_data != 0 && user_data & 0x1 == 0); 159 | user_data 160 | } 161 | 162 | pub fn to_raw(&self) -> RawRegistration { 163 | let inner = self.inner.clone(); 164 | RawRegistration { 165 | inner, 166 | } 167 | } 168 | 169 | pub unsafe fn data_mut(&mut self) -> &mut T { 170 | let inner = &mut *self.inner.get(); 171 | inner.data.as_mut().expect("data").downcast_mut::().expect("type") 172 | } 173 | } 174 | 175 | impl Registration<()> { 176 | // if there is no data we can easily reuse the registration; the 177 | // caller must track though whether the registration is active or 178 | // not. 179 | pub fn poll_stream_and_reset(&mut self) -> futures::Async { 180 | let inner = unsafe { &mut *self.inner.get() }; 181 | if inner.finished { 182 | inner.finished = false; // reset 183 | futures::Async::Ready(inner.result) 184 | } else { 185 | let waker = unsafe { &mut *inner.waker.get() }; 186 | waker.register_old(); 187 | futures::Async::NotReady 188 | } 189 | } 190 | 191 | #[cfg(feature = "nightly-async")] 192 | pub fn poll_stream_and_reset_async(&mut self, waker: &Waker) -> Poll { 193 | let inner = unsafe { &mut *self.inner.get() }; 194 | if inner.finished { 195 | inner.finished = false; // reset 196 | Poll::Ready(inner.result) 197 | } else { 198 | let w = unsafe { &mut *inner.waker.get() }; 199 | w.register_new(waker); 200 | Poll::Pending 201 | } 202 | } 203 | } 204 | 205 | impl fmt::Debug for Registration { 206 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 207 | let inner = unsafe { &mut *self.inner.get() }; 208 | 209 | let user_data = self.user_data(); 210 | 211 | f.debug_struct("Registration") 212 | .field("user_data", &user_data) 213 | .field("finished", &inner.finished) 214 | .field("result", &inner.result) 215 | .finish() 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /tokio-uring-reactor/src/unpark.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::fs::File; 3 | use std::os::unix::io::{ 4 | FromRawFd, 5 | AsRawFd, 6 | RawFd, 7 | }; 8 | use std::sync::{ 9 | atomic::{ 10 | AtomicBool, 11 | Ordering, 12 | }, 13 | Arc, 14 | Weak, 15 | }; 16 | 17 | struct Shared { 18 | write_fd: File, 19 | pending: AtomicBool, 20 | entered: AtomicBool, 21 | } 22 | 23 | pub struct Unpark { 24 | shared: Weak, 25 | } 26 | 27 | impl Unpark { 28 | pub fn unpark(&self) { 29 | const DATA: &'static [u8] = b"u"; 30 | 31 | let shared = match self.shared.upgrade() { 32 | None => return, 33 | Some(shared) => shared, 34 | }; 35 | 36 | if shared.pending.swap(true, Ordering::AcqRel) { 37 | // already pending 38 | return; 39 | } 40 | if shared.entered.load(Ordering::SeqCst) { 41 | unsafe { 42 | libc::write( 43 | shared.write_fd.as_raw_fd(), 44 | DATA.as_ptr() as *const libc::c_void, 45 | DATA.len(), 46 | ); 47 | } 48 | } 49 | } 50 | } 51 | 52 | pub struct Park { 53 | shared: Arc, 54 | read_fd: File, 55 | } 56 | 57 | impl Park { 58 | pub fn new() -> io::Result { 59 | let mut fds = [-1i32; 2]; 60 | let res = unsafe { 61 | libc::pipe2( 62 | (&mut fds[..]).as_mut_ptr(), 63 | libc::O_CLOEXEC, // | libc::O_NONBLOCK, 64 | ) 65 | }; 66 | if 0 != res { 67 | return Err(io::Error::last_os_error()); 68 | } 69 | let read_fd = unsafe { File::from_raw_fd(fds[0]) }; 70 | let write_fd = unsafe { File::from_raw_fd(fds[1]) }; 71 | crate::set_non_block(fds[1]); // only write end non blocking 72 | Ok(Park { 73 | shared: Arc::new(Shared { 74 | write_fd, 75 | pending: AtomicBool::new(false), 76 | entered: AtomicBool::new(false), 77 | }), 78 | read_fd, 79 | }) 80 | } 81 | 82 | pub fn enter(&self) -> ParkEntered 83 | { 84 | self.shared.entered.store(true, Ordering::SeqCst); 85 | 86 | let allow_wait = !self.shared.pending.load(Ordering::SeqCst); 87 | 88 | ParkEntered { 89 | allow_wait, 90 | park: self, 91 | } 92 | } 93 | 94 | pub fn unpark(&self) -> Unpark { 95 | Unpark { 96 | shared: Arc::downgrade(&self.shared), 97 | } 98 | } 99 | 100 | pub fn pending(&self) -> bool { 101 | self.shared.pending.load(Ordering::Relaxed) 102 | } 103 | 104 | pub fn clear_unpark(&self) { 105 | self.shared.pending.store(false, Ordering::Relaxed); 106 | } 107 | 108 | pub fn clear_event(&self) { 109 | let mut buf = [0u8; 16]; 110 | 111 | unsafe { 112 | libc::read( 113 | self.read_fd.as_raw_fd(), 114 | buf[..].as_mut_ptr() as *mut libc::c_void, 115 | buf.len(), 116 | ); 117 | } 118 | } 119 | } 120 | 121 | pub struct ParkEntered<'a> { 122 | pub allow_wait: bool, 123 | park: &'a Park, 124 | } 125 | 126 | impl Drop for ParkEntered<'_> { 127 | fn drop(&mut self) { 128 | self.park.shared.entered.store(false, Ordering::Relaxed); 129 | self.park.clear_unpark(); 130 | } 131 | } 132 | 133 | impl AsRawFd for Park { 134 | fn as_raw_fd(&self) -> RawFd { 135 | self.read_fd.as_raw_fd() 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /tokio-uring/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tokio-uring" 3 | version = "0.1.0" 4 | authors = ["Stefan Bühler "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | futures = "0.1.26" 9 | tokio-current-thread = "0.1.6" 10 | tokio-timer = "0.2.8" 11 | tokio-executor = "0.1.7" 12 | 13 | tokio-uring-reactor = { path = "../tokio-uring-reactor" } 14 | 15 | [dev-dependencies] 16 | env_logger = "0.6.1" 17 | -------------------------------------------------------------------------------- /tokio-uring/examples/echo.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::net; 3 | use std::time::Duration; 4 | use futures::prelude::*; 5 | use tokio_uring_reactor::io::{ 6 | SocketRead, 7 | SocketWrite, 8 | }; 9 | use tokio_timer::Timeout; 10 | 11 | pub fn main() { 12 | env_logger::init(); 13 | 14 | println!("Starting echo server"); 15 | 16 | let l = net::TcpListener::bind("[::]:22").expect("bind"); 17 | let l = tokio_uring_reactor::net::TcpListener::from(l); 18 | 19 | let mut runtime = tokio_uring::Runtime::new().expect("new runtime"); 20 | 21 | let handle = runtime.reactor_handle(); 22 | let connection_handler = move |(c, a): (tokio_uring_reactor::net::TcpStream, net::SocketAddr)| { 23 | println!("Connection from {}", a); 24 | let mut buf: Vec = Vec::new(); 25 | let whandle = handle.clone(); 26 | buf.resize_with(512, Default::default); 27 | tokio_current_thread::spawn( 28 | Timeout::new(c.read(&handle, buf).from_err(), Duration::from_secs(3)) 29 | .map_err(|e| { 30 | eprintln!("timout/read error"); 31 | if e.is_inner() { 32 | e.into_inner().expect("inner") 33 | } else { 34 | io::Error::new(io::ErrorKind::TimedOut, "timeout") 35 | } 36 | }) 37 | .and_then(move |(n, mut buf, c)| { 38 | buf.truncate(n); 39 | println!("Echoing: {:?}", buf); 40 | c.write(&whandle, buf).from_err() 41 | }) 42 | .map(|(_,_,_)| println!("connection done")) 43 | .map_err(|e| eprintln!("Connection error: {}", e)) 44 | ); 45 | Ok(()) 46 | }; 47 | 48 | let handle = runtime.reactor_handle(); 49 | runtime.spawn( 50 | Timeout::new(l.incoming(&handle), Duration::from_secs(30)) 51 | .map_err(|e| { 52 | if e.is_inner() { 53 | panic!(e.into_inner().expect("inner")); 54 | } 55 | }) 56 | .for_each(connection_handler) 57 | .map(|()| eprintln!("listening done")) 58 | ); 59 | runtime.run().expect("runtime run"); 60 | } 61 | -------------------------------------------------------------------------------- /tokio-uring/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::fmt; 3 | use std::io; 4 | use std::time::Duration; 5 | 6 | use futures::Future; 7 | use tokio_current_thread::{self, CurrentThread}; 8 | use tokio_executor::{self, Enter}; 9 | use tokio_uring_reactor::{self, Handle, Reactor}; 10 | use tokio_timer::{self, Timer}; 11 | 12 | 13 | pub struct Runtime { 14 | executor: CurrentThread>, 15 | } 16 | 17 | impl Runtime { 18 | /// Create new Runtime 19 | pub fn new() -> io::Result { 20 | let reactor = Reactor::new()?; 21 | let executor = CurrentThread::new_with_park(Timer::new(reactor)); 22 | 23 | Ok(Runtime { 24 | executor, 25 | }) 26 | } 27 | 28 | /// Spawn the future on the executor. 29 | /// 30 | /// This internally queues the future to be executed once `run` is called. 31 | pub fn spawn(&mut self, future: F) -> &mut Self 32 | where 33 | F: Future + 'static, 34 | { 35 | self.executor.spawn(future); 36 | self 37 | } 38 | 39 | /// Run the executor to completion, blocking the thread until **all** 40 | /// spawned futures have completed. 41 | pub fn run(&mut self) -> Result<(), tokio_current_thread::RunError> { 42 | let mut enter = tokio_executor::enter().unwrap(); 43 | self.enter(&mut enter).run() 44 | } 45 | 46 | /// Run the executor to completion, blocking the thread until all 47 | /// spawned futures have completed **or** `duration` time has elapsed. 48 | pub fn run_timeout( 49 | &mut self, 50 | duration: Duration, 51 | ) -> Result<(), tokio_current_thread::RunTimeoutError> { 52 | let mut enter = tokio_executor::enter().unwrap(); 53 | self.enter(&mut enter).run_timeout(duration) 54 | } 55 | 56 | /// Synchronously waits for the provided `future` to complete. 57 | /// 58 | /// Also waits for all other tasks to complete. 59 | /// 60 | /// The outer `Result` represents possible event loop errors; on success it 61 | /// will return the `Future`s result (which can have a different error). 62 | pub fn block_on_all( 63 | &mut self, 64 | future: F, 65 | ) -> Result> 66 | where 67 | F: Future, 68 | { 69 | let mut enter = tokio_executor::enter().unwrap(); 70 | self.enter(&mut enter).block_on_all(future) 71 | } 72 | 73 | /// Perform a single iteration of the event loop. 74 | /// 75 | /// This function blocks the current thread even if the executor is idle. 76 | pub fn turn( 77 | &mut self, 78 | duration: Option, 79 | ) -> Result { 80 | let mut enter = tokio_executor::enter().unwrap(); 81 | self.enter(&mut enter).turn(duration) 82 | } 83 | 84 | /// Returns `true` if the executor is currently idle. 85 | /// 86 | /// An idle executor is defined by not currently having any spawned tasks. 87 | /// 88 | /// Timers / IO-watchers that are not associated with a spawned task are 89 | /// ignored. 90 | pub fn is_idle(&self) -> bool { 91 | self.executor.is_idle() 92 | } 93 | 94 | /// Bind `Runtime` instance with an execution context. 95 | pub fn enter<'a>(&'a mut self, enter: &'a mut Enter) -> Entered<'a> { 96 | Entered { 97 | runtime: self, 98 | enter, 99 | } 100 | } 101 | 102 | /// Get `Reactor` handle for this `Runtime` 103 | pub fn reactor_handle(&mut self) -> Handle { 104 | self.executor.get_park().get_park().handle() 105 | } 106 | 107 | /// Get `Timer` handle for this `Runtime` 108 | pub fn timer_handle(&mut self) -> tokio_timer::timer::Handle { 109 | self.executor.get_park().handle() 110 | } 111 | } 112 | 113 | impl fmt::Debug for Runtime { 114 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 115 | self.executor.get_park().fmt(f) 116 | } 117 | } 118 | 119 | // Handle::current doesn't offer a "failable" access, it always spawns a new 120 | // reactor if it can't find one 121 | thread_local!(static CURRENT_REACTOR: RefCell> = RefCell::new(None)); 122 | 123 | fn with_reactor_handle(handle: &Handle, enter: &mut Enter, f: F) -> R 124 | where 125 | F: FnOnce(&mut Enter) -> R, 126 | { 127 | struct Reset; 128 | 129 | impl Drop for Reset { 130 | fn drop(&mut self) { 131 | CURRENT_REACTOR.with(|current| { 132 | let mut current = current.borrow_mut(); 133 | *current = None; 134 | }); 135 | } 136 | } 137 | 138 | // make sure to always reset CURRENT_REACTOR 139 | let reset = Reset; 140 | 141 | CURRENT_REACTOR.with(|current| { 142 | let mut current = current.borrow_mut(); 143 | *current = Some(handle.clone()); 144 | }); 145 | 146 | // also use this handle for Handle::current() 147 | let result = tokio_uring_reactor::with_default(handle, enter, f); 148 | 149 | // Here we want to usually reset the CURRENT_REACTOR, and on unwinding it 150 | // will get dropped automatically too. 151 | drop(reset); 152 | 153 | result 154 | } 155 | 156 | /// Get a `Handle` to the `Reactor` of the currently running `Runtime`. 157 | /// 158 | /// Doesn't create a background thread with a new `Reactor` like 159 | /// `Handle::current` if there is no `Handle` available (i.e. when the current 160 | /// thread doesn't have an active `Runtime`). 161 | pub fn current_reactor_handle() -> Option { 162 | CURRENT_REACTOR.with(|current| current.borrow().clone()) 163 | } 164 | 165 | /// A `Runtime` instance bound to a supplied execution context. 166 | pub struct Entered<'a> { 167 | runtime: &'a mut Runtime, 168 | enter: &'a mut Enter, 169 | } 170 | 171 | impl<'a> Entered<'a> { 172 | fn with(&mut self, f: F) -> T 173 | where 174 | F: FnOnce(Borrow) -> T, 175 | { 176 | let (reactor_handle, timer_handle) = { 177 | let timer = self.runtime.executor.get_park(); 178 | ( 179 | timer.get_park().handle(), 180 | timer.handle(), 181 | ) 182 | }; 183 | let runtime = &mut self.runtime; 184 | with_reactor_handle(&reactor_handle, self.enter, |enter| { 185 | tokio_timer::with_default(&timer_handle, enter, |enter| { 186 | f(Borrow { 187 | executor: runtime.executor.enter(enter), 188 | }) 189 | }) 190 | }) 191 | } 192 | 193 | /// Spawn the future on the executor. 194 | /// 195 | /// This internally queues the future to be executed once `run` is called. 196 | pub fn spawn(&mut self, future: F) -> &mut Self 197 | where 198 | F: Future + 'static, 199 | { 200 | self.runtime.executor.spawn(future); 201 | self 202 | } 203 | 204 | /// Run the executor to completion, blocking the thread until **all** 205 | /// spawned futures have completed. 206 | pub fn run(&mut self) -> Result<(), tokio_current_thread::RunError> { 207 | self.with(|mut borrow| { 208 | borrow 209 | .executor 210 | .run() 211 | }) 212 | } 213 | 214 | /// Run the executor to completion, blocking the thread until all 215 | /// spawned futures have completed **or** `duration` time has elapsed. 216 | pub fn run_timeout( 217 | &mut self, 218 | duration: Duration, 219 | ) -> Result<(), tokio_current_thread::RunTimeoutError> { 220 | self.with(|mut borrow| { 221 | borrow 222 | .executor 223 | .run_timeout(duration) 224 | }) 225 | } 226 | 227 | /// Synchronously waits for the provided `future` to complete. 228 | /// 229 | /// Also waits for all other tasks to complete. 230 | /// 231 | /// The outer `Result` represents possible event loop errors; on success it 232 | /// will return the `Future`s result (which can have a different error). 233 | pub fn block_on_all( 234 | &mut self, 235 | future: F, 236 | ) -> Result> 237 | where 238 | F: Future, 239 | { 240 | self.with(|mut borrow| { 241 | let ret = borrow.executor.block_on(future); 242 | borrow.executor.run().unwrap(); 243 | ret 244 | }) 245 | } 246 | 247 | /// Perform a single iteration of the event loop. 248 | /// 249 | /// This function blocks the current thread even if the executor is idle. 250 | pub fn turn( 251 | &mut self, 252 | duration: Option, 253 | ) -> Result { 254 | self.with(|mut borrow| { 255 | borrow 256 | .executor 257 | .turn(duration) 258 | }) 259 | } 260 | 261 | /// Returns `true` if the executor is currently idle. 262 | /// 263 | /// An idle executor is defined by not currently having any spawned tasks. 264 | /// 265 | /// Timers / IO-watchers that are not associated with a spawned task are 266 | /// ignored. 267 | pub fn is_idle(&self) -> bool { 268 | self.runtime.executor.is_idle() 269 | } 270 | } 271 | 272 | impl<'a> fmt::Debug for Entered<'a> { 273 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 274 | self.runtime.executor.get_park().fmt(f) 275 | } 276 | } 277 | 278 | struct Borrow<'a> { 279 | executor: tokio_current_thread::Entered<'a, Timer>, 280 | } 281 | --------------------------------------------------------------------------------