├── .cargo └── config.toml ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── DESIGN.md ├── LICENSE ├── README.md ├── benches ├── criterion │ └── no_op.rs └── lai │ └── no_op.rs ├── examples ├── cat.rs ├── mix.rs ├── tcp_listener.rs ├── tcp_listener_fixed_buffers.rs ├── tcp_stream.rs ├── test_create_dir_all.rs ├── udp_socket.rs ├── unix_listener.rs ├── unix_stream.rs └── wrk-bench.rs ├── src ├── buf │ ├── bounded.rs │ ├── fixed │ │ ├── buffers.rs │ │ ├── handle.rs │ │ ├── mod.rs │ │ ├── plumbing │ │ │ ├── mod.rs │ │ │ ├── pool.rs │ │ │ └── registry.rs │ │ ├── pool.rs │ │ └── registry.rs │ ├── io_buf.rs │ ├── io_buf_mut.rs │ ├── mod.rs │ └── slice.rs ├── fs │ ├── create_dir_all.rs │ ├── directory.rs │ ├── file.rs │ ├── mod.rs │ ├── open_options.rs │ ├── statx.rs │ └── symlink.rs ├── future.rs ├── io │ ├── accept.rs │ ├── bind.rs │ ├── close.rs │ ├── connect.rs │ ├── fallocate.rs │ ├── fsync.rs │ ├── mkdir_at.rs │ ├── mod.rs │ ├── noop.rs │ ├── open.rs │ ├── pool.rs │ ├── read.rs │ ├── read_fixed.rs │ ├── readv.rs │ ├── recv_from.rs │ ├── recvmsg.rs │ ├── rename_at.rs │ ├── send_to.rs │ ├── send_zc.rs │ ├── sendmsg.rs │ ├── sendmsg_zc.rs │ ├── shared_fd.rs │ ├── socket.rs │ ├── statx.rs │ ├── symlink.rs │ ├── unlink_at.rs │ ├── util.rs │ ├── write.rs │ ├── write_fixed.rs │ ├── writev.rs │ └── writev_all.rs ├── lib.rs ├── net │ ├── mod.rs │ ├── tcp │ │ ├── listener.rs │ │ ├── mod.rs │ │ └── stream.rs │ ├── udp.rs │ └── unix │ │ ├── listener.rs │ │ ├── mod.rs │ │ └── stream.rs └── runtime │ ├── context.rs │ ├── driver │ ├── handle.rs │ ├── mod.rs │ └── op │ │ ├── mod.rs │ │ └── slab_list.rs │ └── mod.rs └── tests ├── buf.rs ├── driver.rs ├── fixed_buf.rs ├── fs_directory.rs ├── fs_file.rs ├── fs_symlink.rs └── runtime.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [env] 2 | RUST_TEST_THREADS = { value = "1" } 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | push: 8 | branches: 9 | - master 10 | 11 | env: 12 | RUSTFLAGS: -Dwarnings 13 | RUST_BACKTRACE: 1 14 | 15 | jobs: 16 | # Depends on all actions that are required for a "successful" CI run. 17 | # Based on the ci here: https://github.com/tokio-rs/tokio/blob/master/.github/workflows/ci.yml 18 | all-systems-go: 19 | runs-on: ubuntu-latest 20 | needs: 21 | - check 22 | - clippy 23 | - fmt 24 | - test 25 | - test-docs 26 | - docs 27 | steps: 28 | - run: exit 0 29 | 30 | bench: 31 | runs-on: ubuntu-latest 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Install Rust 35 | run: rustup update stable 36 | - run: cargo bench --no-run 37 | 38 | check: 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Install Rust 43 | run: rustup update stable 44 | - run: cargo check 45 | 46 | clippy: 47 | runs-on: ubuntu-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - name: Install Rust 51 | run: rustup update stable 52 | - run: cargo clippy 53 | 54 | test: 55 | runs-on: ubuntu-latest 56 | steps: 57 | - uses: actions/checkout@v4 58 | - name: Install Rust 59 | run: rustup update stable 60 | - run: cargo test 61 | 62 | test-docs: 63 | runs-on: ubuntu-latest 64 | steps: 65 | - uses: actions/checkout@v4 66 | - name: Install Rust 67 | run: rustup update stable 68 | - run: cargo test --doc 69 | 70 | fmt: 71 | runs-on: ubuntu-latest 72 | steps: 73 | - uses: actions/checkout@v4 74 | - name: Install Rust 75 | run: rustup update stable 76 | - run: cargo fmt -- --check 77 | 78 | docs: 79 | runs-on: ubuntu-latest 80 | steps: 81 | - uses: actions/checkout@v4 82 | - name: Install Rust 83 | run: rustup update nightly && rustup default nightly 84 | - run: cargo doc --no-deps --all-features 85 | env: 86 | RUSTDOCFLAGS: -Dwarnings 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.4.0 (November 5th, 2022) 2 | 3 | ### Fixed 4 | 5 | - Fix panic in Deref/DerefMut for Slice extending into uninitialized part of the buffer ([#52]) 6 | - docs: all-features = true ([#84]) 7 | - fix fs unit tests to avoid parallelism ([#121]) 8 | - Box the socket address to allow moving the Connect future ([#126]) 9 | - rt: Fix data race ([#146]) 10 | 11 | ### Added 12 | 13 | - Implement fs::File::readv_at()/writev_at() ([#87]) 14 | - fs: implement FromRawFd for File ([#89]) 15 | - Implement `AsRawFd` for `TcpStream` ([#94]) 16 | - net: add TcpListener.local_addr method ([#107]) 17 | - net: add TcpStream.write_all ([#111]) 18 | - driver: add Builder API as an option to start ([#113]) 19 | - Socket and TcpStream shutdown ([#124]) 20 | - fs: implement fs::File::from_std ([#131]) 21 | - net: implement FromRawFd for TcpStream ([#132]) 22 | - fs: implement OpenOptionsExt for OpenOptions ([#133]) 23 | - Add NoOp support ([#134]) 24 | - Add writev to TcpStream ([#136]) 25 | - sync TcpStream, UnixStream and UdpSocket functionality ([#141]) 26 | - Add benchmarks for no-op submission ([#144]) 27 | - Expose runtime structure ([#148]) 28 | 29 | ### Changed 30 | 31 | - driver: batch submit requests and add benchmark ([#78]) 32 | - Depend on io-uring version ^0.5.8 ([#153]) 33 | 34 | ### Internal Improvements 35 | 36 | - chore: fix clippy lints ([#99]) 37 | - io: refactor post-op logic in ops into Completable ([#116]) 38 | - Support multi completion events: v2 ([#130]) 39 | - simplify driver operation futures ([#139]) 40 | - rt: refactor runtime to avoid Rc\> ([#142]) 41 | - Remove unused dev-dependencies ([#143]) 42 | - chore: types and fields explicitly named ([#149]) 43 | - Ignore errors from uring while cleaning up ([#154]) 44 | - rt: drop runtime before driver during shutdown ([#155]) 45 | - rt: refactor drop logic ([#157]) 46 | - rt: fix error when calling block_on twice ([#162]) 47 | 48 | ### CI changes 49 | 50 | - chore: update actions/checkout action to v3 ([#90]) 51 | - chore: add all-systems-go ci check ([#98]) 52 | - chore: add clippy to ci ([#100]) 53 | - ci: run cargo test --doc ([#135]) 54 | 55 | 56 | [#52]: https://github.com/tokio-rs/tokio-uring/pull/52 57 | [#78]: https://github.com/tokio-rs/tokio-uring/pull/78 58 | [#84]: https://github.com/tokio-rs/tokio-uring/pull/84 59 | [#87]: https://github.com/tokio-rs/tokio-uring/pull/87 60 | [#89]: https://github.com/tokio-rs/tokio-uring/pull/89 61 | [#90]: https://github.com/tokio-rs/tokio-uring/pull/90 62 | [#94]: https://github.com/tokio-rs/tokio-uring/pull/94 63 | [#98]: https://github.com/tokio-rs/tokio-uring/pull/98 64 | [#99]: https://github.com/tokio-rs/tokio-uring/pull/99 65 | [#100]: https://github.com/tokio-rs/tokio-uring/pull/100 66 | [#107]: https://github.com/tokio-rs/tokio-uring/pull/107 67 | [#111]: https://github.com/tokio-rs/tokio-uring/pull/111 68 | [#113]: https://github.com/tokio-rs/tokio-uring/pull/113 69 | [#116]: https://github.com/tokio-rs/tokio-uring/pull/116 70 | [#121]: https://github.com/tokio-rs/tokio-uring/pull/121 71 | [#124]: https://github.com/tokio-rs/tokio-uring/pull/124 72 | [#126]: https://github.com/tokio-rs/tokio-uring/pull/126 73 | [#130]: https://github.com/tokio-rs/tokio-uring/pull/130 74 | [#131]: https://github.com/tokio-rs/tokio-uring/pull/131 75 | [#132]: https://github.com/tokio-rs/tokio-uring/pull/132 76 | [#133]: https://github.com/tokio-rs/tokio-uring/pull/133 77 | [#134]: https://github.com/tokio-rs/tokio-uring/pull/134 78 | [#135]: https://github.com/tokio-rs/tokio-uring/pull/135 79 | [#136]: https://github.com/tokio-rs/tokio-uring/pull/136 80 | [#139]: https://github.com/tokio-rs/tokio-uring/pull/139 81 | [#141]: https://github.com/tokio-rs/tokio-uring/pull/141 82 | [#142]: https://github.com/tokio-rs/tokio-uring/pull/142 83 | [#143]: https://github.com/tokio-rs/tokio-uring/pull/143 84 | [#144]: https://github.com/tokio-rs/tokio-uring/pull/144 85 | [#146]: https://github.com/tokio-rs/tokio-uring/pull/146 86 | [#148]: https://github.com/tokio-rs/tokio-uring/pull/148 87 | [#149]: https://github.com/tokio-rs/tokio-uring/pull/149 88 | [#153]: https://github.com/tokio-rs/tokio-uring/pull/153 89 | [#154]: https://github.com/tokio-rs/tokio-uring/pull/154 90 | [#155]: https://github.com/tokio-rs/tokio-uring/pull/155 91 | [#157]: https://github.com/tokio-rs/tokio-uring/pull/157 92 | [#162]: https://github.com/tokio-rs/tokio-uring/pull/162 93 | 94 | # 0.3.0 (March 2nd, 2022) 95 | ### Added 96 | - net: add unix stream & listener ([#74]) 97 | - net: add tcp and udp support ([#40]) 98 | 99 | [#74]: https://github.com/tokio-rs/tokio-uring/pull/74 100 | [#40]: https://github.com/tokio-rs/tokio-uring/pull/40 101 | 102 | # 0.2.0 (January 9th, 2022) 103 | 104 | ### Fixed 105 | - fs: fix error handling related to changes in rustc ([#69]) 106 | - op: fix 'already borrowed' panic ([#39]) 107 | 108 | ### Added 109 | - fs: add fs::remove_file ([#66]) 110 | - fs: implement Debug for File ([#65]) 111 | - fs: add remove_dir and unlink ([#63]) 112 | - buf: impl IoBuf/IoBufMut for bytes::Bytes/BytesMut ([#43]) 113 | 114 | [#69]: https://github.com/tokio-rs/tokio-uring/pull/69 115 | [#66]: https://github.com/tokio-rs/tokio-uring/pull/66 116 | [#65]: https://github.com/tokio-rs/tokio-uring/pull/65 117 | [#63]: https://github.com/tokio-rs/tokio-uring/pull/63 118 | [#39]: https://github.com/tokio-rs/tokio-uring/pull/39 119 | [#43]: https://github.com/tokio-rs/tokio-uring/pull/43 120 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tokio-uring" 3 | version = "0.5.0" 4 | authors = ["Tokio Contributors "] 5 | edition = "2018" 6 | readme = "README.md" 7 | license = "MIT" 8 | documentation = "https://docs.rs/tokio-uring/0.5.0/tokio-uring" 9 | repository = "https://github.com/tokio-rs/tokio-uring" 10 | homepage = "https://tokio.rs" 11 | description = """ 12 | io-uring support for the Tokio asynchronous runtime. 13 | """ 14 | categories = ["asynchronous", "network-programming"] 15 | keywords = ["async", "fs", "io-uring"] 16 | 17 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 18 | 19 | [dependencies] 20 | tokio = { version = "1.2", features = ["net", "rt", "sync"] } 21 | slab = "0.4.2" 22 | libc = "0.2.80" 23 | io-uring = "0.6.0" 24 | socket2 = { version = "0.4.4", features = ["all"] } 25 | bytes = { version = "1.0", optional = true } 26 | futures-util = { version = "0.3.26", default-features = false, features = ["std"] } 27 | 28 | [dev-dependencies] 29 | tempfile = "3.2.0" 30 | tokio-test = "0.4.2" 31 | iai = "0.1.1" 32 | criterion = "0.4.0" 33 | # we use joinset in our tests 34 | tokio = "1.21.2" 35 | nix = "0.26.1" 36 | 37 | [package.metadata.docs.rs] 38 | all-features = true 39 | 40 | [profile.bench] 41 | debug = true 42 | 43 | [[bench]] 44 | name = "lai_no_op" 45 | path = "benches/lai/no_op.rs" 46 | harness = false 47 | 48 | [[bench]] 49 | name = "criterion_no_op" 50 | path = "benches/criterion/no_op.rs" 51 | harness = false 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Carl Lerche 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tokio-uring 2 | 3 | This crate provides [`io-uring`] for [Tokio] by exposing a new Runtime that is 4 | compatible with Tokio but also can drive [`io-uring`]-backed resources. Any 5 | library that works with [Tokio] also works with `tokio-uring`. The crate 6 | provides new resource types that work with [`io-uring`]. 7 | 8 | [`io-uring`]: https://unixism.net/loti/ 9 | [Tokio]: https://github.com/tokio-rs/tokio 10 | [`fs::File`]: https://docs.rs/tokio-uring/latest/tokio_uring/fs/struct.File.html 11 | 12 | [API Docs](https://docs.rs/tokio-uring/latest/tokio_uring) | 13 | [Chat](https://discord.gg/tokio) 14 | 15 | # Getting started 16 | 17 | Using `tokio-uring` requires starting a [`tokio-uring`] runtime. This 18 | runtime internally manages the main Tokio runtime and a `io-uring` driver. 19 | 20 | In your Cargo.toml: 21 | ```toml 22 | [dependencies] 23 | tokio-uring = { version = "0.5.0" } 24 | ``` 25 | In your main.rs: 26 | ```rust 27 | use tokio_uring::fs::File; 28 | 29 | fn main() -> Result<(), Box> { 30 | tokio_uring::start(async { 31 | // Open a file 32 | let file = File::open("hello.txt").await?; 33 | 34 | let buf = vec![0; 4096]; 35 | // Read some data, the buffer is passed by ownership and 36 | // submitted to the kernel. When the operation completes, 37 | // we get the buffer back. 38 | let (res, buf) = file.read_at(buf, 0).await; 39 | let n = res?; 40 | 41 | // Display the contents 42 | println!("{:?}", &buf[..n]); 43 | 44 | Ok(()) 45 | }) 46 | } 47 | ``` 48 | ## Requirements 49 | `tokio-uring` requires a very recent linux kernel. (Not even all kernels with io_uring support will work) 50 | In particular `5.4.0` does not work (This is standard on Ubuntu 20.4). However `5.11.0` (the ubuntu hwe image) does work. 51 | 52 | ## Project status 53 | 54 | The `tokio-uring` project is still very young. Currently, we are focusing on 55 | supporting filesystem and network operations. Eventually, we will add safe APIs for all 56 | io-uring compatible operations. 57 | 58 | ## License 59 | 60 | This project is licensed under the [MIT license]. 61 | 62 | [MIT license]: LICENSE 63 | 64 | ### Contribution 65 | 66 | Unless you explicitly state otherwise, any contribution intentionally submitted 67 | for inclusion in tokio-uring by you, shall be licensed as MIT, without any 68 | additional terms or conditions. 69 | -------------------------------------------------------------------------------- /benches/criterion/no_op.rs: -------------------------------------------------------------------------------- 1 | use criterion::{ 2 | criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput, 3 | }; 4 | use std::time::{Duration, Instant}; 5 | 6 | use tokio::task::JoinSet; 7 | 8 | #[derive(Clone)] 9 | struct Options { 10 | iterations: usize, 11 | concurrency: usize, 12 | sq_size: usize, 13 | cq_size: usize, 14 | } 15 | 16 | impl Default for Options { 17 | fn default() -> Self { 18 | Self { 19 | iterations: 100000, 20 | concurrency: 1, 21 | sq_size: 128, 22 | cq_size: 256, 23 | } 24 | } 25 | } 26 | 27 | fn run_no_ops(opts: &Options, count: u64) -> Duration { 28 | let mut ring_opts = tokio_uring::uring_builder(); 29 | ring_opts.setup_cqsize(opts.cq_size as _); 30 | 31 | let mut m = Duration::ZERO; 32 | 33 | // Run the required number of iterations 34 | for _ in 0..count { 35 | m += tokio_uring::builder() 36 | .entries(opts.sq_size as _) 37 | .uring_builder(&ring_opts) 38 | .start(async move { 39 | let mut js = JoinSet::new(); 40 | 41 | for _ in 0..opts.iterations { 42 | js.spawn_local(tokio_uring::no_op()); 43 | } 44 | 45 | let start = Instant::now(); 46 | 47 | while let Some(res) = js.join_next().await { 48 | res.unwrap().unwrap(); 49 | } 50 | 51 | start.elapsed() 52 | }) 53 | } 54 | m 55 | } 56 | 57 | fn bench(c: &mut Criterion) { 58 | let mut group = c.benchmark_group("no_op"); 59 | let mut opts = Options::default(); 60 | for concurrency in [1, 32, 64, 256].iter() { 61 | opts.concurrency = *concurrency; 62 | 63 | // We perform long running benchmarks: this is the best mode 64 | group.sampling_mode(SamplingMode::Flat); 65 | 66 | group.throughput(Throughput::Elements(opts.iterations as u64)); 67 | group.bench_with_input( 68 | BenchmarkId::from_parameter(concurrency), 69 | &opts, 70 | |b, opts| { 71 | // Custom iterator used because we don't expose access to runtime, 72 | // which is required to do async benchmarking with criterion 73 | b.iter_custom(move |iter| run_no_ops(opts, iter)); 74 | }, 75 | ); 76 | } 77 | group.finish(); 78 | } 79 | 80 | criterion_group!(benches, bench); 81 | criterion_main!(benches); 82 | -------------------------------------------------------------------------------- /benches/lai/no_op.rs: -------------------------------------------------------------------------------- 1 | use iai::black_box; 2 | use tokio::task::JoinSet; 3 | 4 | #[derive(Clone)] 5 | struct Options { 6 | iterations: usize, 7 | concurrency: usize, 8 | sq_size: usize, 9 | cq_size: usize, 10 | } 11 | 12 | impl Default for Options { 13 | fn default() -> Self { 14 | Self { 15 | iterations: 100000, 16 | concurrency: 1, 17 | sq_size: 64, 18 | cq_size: 256, 19 | } 20 | } 21 | } 22 | 23 | fn runtime_only() -> Result<(), Box> { 24 | let opts = Options::default(); 25 | let mut ring_opts = tokio_uring::uring_builder(); 26 | ring_opts.setup_cqsize(opts.cq_size as _); 27 | 28 | tokio_uring::builder() 29 | .entries(opts.sq_size as _) 30 | .uring_builder(&ring_opts) 31 | .start(async move { black_box(Ok(())) }) 32 | } 33 | 34 | fn run_no_ops(opts: Options) -> Result<(), Box> { 35 | let mut ring_opts = tokio_uring::uring_builder(); 36 | ring_opts.setup_cqsize(opts.cq_size as _); 37 | 38 | tokio_uring::builder() 39 | .entries(opts.sq_size as _) 40 | .uring_builder(&ring_opts) 41 | .start(async move { 42 | let mut js = JoinSet::new(); 43 | 44 | for _ in 0..opts.iterations { 45 | js.spawn_local(tokio_uring::no_op()); 46 | } 47 | 48 | while let Some(res) = js.join_next().await { 49 | res.unwrap().unwrap(); 50 | } 51 | 52 | Ok(()) 53 | }) 54 | } 55 | 56 | // This provides a baseline for estimating op overhead on top of this 57 | fn no_op_x1() -> Result<(), Box> { 58 | let opts = Options::default(); 59 | run_no_ops(black_box(opts)) 60 | } 61 | 62 | fn no_op_x32() -> Result<(), Box> { 63 | let mut opts = Options::default(); 64 | opts.concurrency = 32; 65 | run_no_ops(black_box(opts)) 66 | } 67 | 68 | fn no_op_x64() -> Result<(), Box> { 69 | let mut opts = Options::default(); 70 | opts.concurrency = 64; 71 | run_no_ops(black_box(opts)) 72 | } 73 | 74 | fn no_op_x256() -> Result<(), Box> { 75 | let mut opts = Options::default(); 76 | opts.concurrency = 256; 77 | run_no_ops(black_box(opts)) 78 | } 79 | 80 | iai::main!(runtime_only, no_op_x1, no_op_x32, no_op_x64, no_op_x256); 81 | -------------------------------------------------------------------------------- /examples/cat.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | io::Write, 3 | {env, io}, 4 | }; 5 | 6 | use tokio_uring::fs::File; 7 | 8 | fn main() { 9 | // The file to `cat` is passed as a CLI argument 10 | let args: Vec<_> = env::args().collect(); 11 | 12 | if args.len() <= 1 { 13 | panic!("no path specified"); 14 | } 15 | 16 | let path = &args[1]; 17 | 18 | // Lock stdout 19 | let out = io::stdout(); 20 | let mut out = out.lock(); 21 | 22 | tokio_uring::start(async { 23 | // Open the file without blocking 24 | let file = File::open(path).await.unwrap(); 25 | let mut buf = vec![0; 16 * 1_024]; 26 | 27 | // Track the current position in the file; 28 | let mut pos = 0; 29 | 30 | loop { 31 | // Read a chunk 32 | let (res, b) = file.read_at(buf, pos).await; 33 | let n = res.unwrap(); 34 | 35 | if n == 0 { 36 | break; 37 | } 38 | 39 | out.write_all(&b[..n]).unwrap(); 40 | pos += n as u64; 41 | 42 | buf = b; 43 | } 44 | 45 | // Include a new line 46 | println!(); 47 | }); 48 | } 49 | -------------------------------------------------------------------------------- /examples/mix.rs: -------------------------------------------------------------------------------- 1 | //! Shows how use Tokio types from the `tokio-uring` runtime. 2 | //! 3 | //! Serve a single file over TCP 4 | 5 | use std::env; 6 | 7 | use tokio_uring::{fs::File, net::TcpListener}; 8 | 9 | fn main() { 10 | // The file to serve over TCP is passed as a CLI argument 11 | let args: Vec<_> = env::args().collect(); 12 | 13 | if args.len() <= 1 { 14 | panic!("no path specified"); 15 | } 16 | 17 | tokio_uring::start(async { 18 | // Start a TCP listener 19 | let listener = TcpListener::bind("0.0.0.0:8080".parse().unwrap()).unwrap(); 20 | 21 | // Accept new sockets 22 | loop { 23 | let (socket, _) = listener.accept().await.unwrap(); 24 | let path = args[1].clone(); 25 | 26 | // Spawn a task to send the file back to the socket 27 | tokio_uring::spawn(async move { 28 | // Open the file without blocking 29 | let file = File::open(path).await.unwrap(); 30 | let mut buf = vec![0; 16 * 1_024]; 31 | 32 | // Track the current position in the file; 33 | let mut pos = 0; 34 | 35 | loop { 36 | // Read a chunk 37 | let (res, b) = file.read_at(buf, pos).await; 38 | let n = res.unwrap(); 39 | 40 | if n == 0 { 41 | break; 42 | } 43 | 44 | let (res, b) = socket.write(b).submit().await; 45 | pos += res.unwrap() as u64; 46 | 47 | buf = b; 48 | } 49 | }); 50 | } 51 | }); 52 | } 53 | -------------------------------------------------------------------------------- /examples/tcp_listener.rs: -------------------------------------------------------------------------------- 1 | use std::{env, net::SocketAddr}; 2 | 3 | use tokio_uring::net::TcpListener; 4 | 5 | fn main() { 6 | let args: Vec<_> = env::args().collect(); 7 | 8 | let socket_addr = if args.len() <= 1 { 9 | "127.0.0.1:0" 10 | } else { 11 | args[1].as_ref() 12 | }; 13 | let socket_addr: SocketAddr = socket_addr.parse().unwrap(); 14 | 15 | tokio_uring::start(async { 16 | let listener = TcpListener::bind(socket_addr).unwrap(); 17 | 18 | println!("Listening on {}", listener.local_addr().unwrap()); 19 | 20 | loop { 21 | let (stream, socket_addr) = listener.accept().await.unwrap(); 22 | tokio_uring::spawn(async move { 23 | // implement ping-pong loop 24 | 25 | use tokio_uring::buf::BoundedBuf; // for slice() 26 | 27 | println!("{} connected", socket_addr); 28 | let mut n = 0; 29 | 30 | let mut buf = vec![0u8; 4096]; 31 | loop { 32 | let (result, nbuf) = stream.read(buf).await; 33 | buf = nbuf; 34 | let read = result.unwrap(); 35 | if read == 0 { 36 | println!("{} closed, {} total ping-ponged", socket_addr, n); 37 | break; 38 | } 39 | 40 | let (res, slice) = stream.write_all(buf.slice(..read)).await; 41 | res.unwrap(); 42 | buf = slice.into_inner(); 43 | println!("{} all {} bytes ping-ponged", socket_addr, read); 44 | n += read; 45 | } 46 | }); 47 | } 48 | }); 49 | } 50 | -------------------------------------------------------------------------------- /examples/tcp_listener_fixed_buffers.rs: -------------------------------------------------------------------------------- 1 | // An example of an echo server using fixed buffers for reading and writing TCP streams. 2 | // A buffer registry size of two is created, to allow a maximum of two simultaneous connections. 3 | 4 | use std::{env, iter, net::SocketAddr}; 5 | 6 | use tokio_uring::{ 7 | buf::{fixed::FixedBufRegistry, BoundedBuf, IoBufMut}, 8 | net::{TcpListener, TcpStream}, 9 | }; // BoundedBuf for slice method 10 | 11 | // A contrived example, where just two fixed buffers are created. 12 | const POOL_SIZE: usize = 2; 13 | 14 | fn main() { 15 | let args: Vec<_> = env::args().collect(); 16 | 17 | let socket_addr = if args.len() <= 1 { 18 | "127.0.0.1:0" 19 | } else { 20 | args[1].as_ref() 21 | }; 22 | let socket_addr: SocketAddr = socket_addr.parse().unwrap(); 23 | 24 | tokio_uring::start(accept_loop(socket_addr)); 25 | } 26 | 27 | // Bind to address and accept connections, spawning an echo handler for each connection. 28 | async fn accept_loop(listen_addr: SocketAddr) { 29 | let listener = TcpListener::bind(listen_addr).unwrap(); 30 | 31 | println!( 32 | "Listening on {}, fixed buffer pool size only {POOL_SIZE}", 33 | listener.local_addr().unwrap() 34 | ); 35 | 36 | // Other iterators may be passed to FixedBufRegistry::new also. 37 | let registry = FixedBufRegistry::new(iter::repeat(vec![0; 4096]).take(POOL_SIZE)); 38 | 39 | // Register the buffers with the kernel, asserting the syscall passed. 40 | 41 | registry.register().unwrap(); 42 | 43 | loop { 44 | let (stream, peer) = listener.accept().await.unwrap(); 45 | 46 | tokio_uring::spawn(echo_handler(stream, peer, registry.clone())); 47 | } 48 | } 49 | 50 | // A loop that echoes input to output. Use one fixed buffer for receiving and sending the response 51 | // back. Once the connection is closed, the function returns and the fixed buffer is dropped, 52 | // getting the fixed buffer index returned to the available pool kept by the registry. 53 | async fn echo_handler( 54 | stream: TcpStream, 55 | peer: SocketAddr, 56 | registry: FixedBufRegistry, 57 | ) { 58 | println!("peer {} connected", peer); 59 | 60 | // Get one of the two fixed buffers. 61 | // If neither is unavailable, print reason and return immediately, dropping this connection; 62 | // be nice and shutdown the connection before dropping it so the client sees the connection is 63 | // closed immediately. 64 | 65 | let mut fbuf = registry.check_out(0); 66 | if fbuf.is_none() { 67 | fbuf = registry.check_out(1); 68 | }; 69 | if fbuf.is_none() { 70 | let _ = stream.shutdown(std::net::Shutdown::Write); 71 | println!("peer {} closed, no fixed buffers available", peer); 72 | return; 73 | }; 74 | 75 | let mut fbuf = fbuf.unwrap(); 76 | 77 | let mut n = 0; 78 | loop { 79 | // Each time through the loop, use fbuf and then get it back for the next 80 | // iteration. 81 | 82 | let (result, fbuf1) = stream.read_fixed(fbuf).await; 83 | fbuf = { 84 | let read = result.unwrap(); 85 | if read == 0 { 86 | break; 87 | } 88 | assert_eq!(4096, fbuf1.len()); // To prove a point. 89 | 90 | let (res, nslice) = stream.write_fixed_all(fbuf1.slice(..read)).await; 91 | 92 | res.unwrap(); 93 | println!("peer {} all {} bytes ping-ponged", peer, read); 94 | n += read; 95 | 96 | // Important. One of the points of this example. 97 | nslice.into_inner() // Return the buffer we started with. 98 | }; 99 | } 100 | let _ = stream.shutdown(std::net::Shutdown::Write); 101 | println!("peer {} closed, {} total ping-ponged", peer, n); 102 | } 103 | -------------------------------------------------------------------------------- /examples/tcp_stream.rs: -------------------------------------------------------------------------------- 1 | use std::{env, net::SocketAddr}; 2 | 3 | use tokio_uring::net::TcpStream; 4 | 5 | fn main() { 6 | let args: Vec<_> = env::args().collect(); 7 | 8 | if args.len() <= 1 { 9 | panic!("no addr specified"); 10 | } 11 | 12 | let socket_addr: SocketAddr = args[1].parse().unwrap(); 13 | 14 | tokio_uring::start(async { 15 | let stream = TcpStream::connect(socket_addr).await.unwrap(); 16 | let buf = vec![1u8; 128]; 17 | 18 | let (result, buf) = stream.write(buf).submit().await; 19 | println!("written: {}", result.unwrap()); 20 | 21 | let (result, buf) = stream.read(buf).await; 22 | let read = result.unwrap(); 23 | println!("read: {:?}", &buf[..read]); 24 | }); 25 | } 26 | -------------------------------------------------------------------------------- /examples/test_create_dir_all.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::path::Path; 3 | use tokio_uring::fs; 4 | 5 | fn tests() -> std::slice::Iter<'static, Expected<'static>> { 6 | [ 7 | // 8 | // A number of Fail cases because of permissions (assuming not running as root). 9 | // 10 | Expected::Fail(Op::create_dir("/no-good")), 11 | Expected::Fail(Op::create_dir("/no-good/lots/more")), 12 | Expected::Fail(Op::create_dir_all("/no-good")), 13 | Expected::Fail(Op::create_dir_all("/no-good/lots/more")), 14 | Expected::Fail(Op::DirBuilder("/no-good")), 15 | Expected::Fail(Op::DirBuilder2("/no-good/lots/more", false, 0o777)), 16 | Expected::Fail(Op::DirBuilder2("/no-good/lots/more", true, 0o777)), 17 | // 18 | // A sequence of steps where assumption is /tmp exists and /tmp/test-good does not. 19 | // 20 | Expected::Pass(Op::create_dir("/tmp/test-good")), 21 | Expected::Pass(Op::statx("/tmp/test-good")), 22 | Expected::Pass(Op::StatxBuilder("/tmp/test-good")), 23 | Expected::Pass(Op::StatxBuilder2("/tmp", "test-good")), 24 | Expected::Pass(Op::StatxBuilder2("/tmp", "./test-good")), 25 | Expected::Pass(Op::StatxBuilder2("/tmp/", "./test-good")), 26 | Expected::Pass(Op::StatxBuilder2("/etc/", "/tmp/test-good")), 27 | Expected::Pass(Op::is_dir("/tmp/test-good")), 28 | Expected::Fail(Op::is_regfile("/tmp/test-good")), 29 | Expected::Pass(Op::create_dir("/tmp/test-good/x1")), 30 | Expected::Fail(Op::create_dir("/tmp/test-good/x1")), 31 | Expected::Pass(Op::remove_dir("/tmp/test-good/x1")), 32 | Expected::Fail(Op::remove_dir("/tmp/test-good/x1")), 33 | Expected::Pass(Op::remove_dir("/tmp/test-good")), 34 | Expected::Pass(Op::create_dir_all("/tmp/test-good/lots/lots/more")), 35 | Expected::Pass(Op::create_dir_all("/tmp/test-good/lots/lots/more")), 36 | Expected::Pass(Op::remove_dir("/tmp/test-good/lots/lots/more")), 37 | Expected::Pass(Op::remove_dir("/tmp/test-good/lots/lots")), 38 | Expected::Pass(Op::remove_dir("/tmp/test-good/lots")), 39 | Expected::Pass(Op::remove_dir("/tmp/test-good")), 40 | Expected::Fail(Op::statx("/tmp/test-good")), 41 | Expected::Fail(Op::StatxBuilder("/tmp/test-good")), 42 | // 43 | // A sequence that tests when mode is passed as 0, the directory can't be written to. 44 | // 45 | Expected::Pass(Op::DirBuilder2("/tmp/test-good", true, 0)), 46 | Expected::Pass(Op::matches_mode("/tmp/test-good", 0)), 47 | Expected::Fail(Op::create_dir("/tmp/test-good/x1")), 48 | Expected::Pass(Op::remove_dir("/tmp/test-good")), 49 | // 50 | // A sequence that tests creation of a user rwx only directory 51 | // 52 | Expected::Pass(Op::DirBuilder2("/tmp/test-good", true, 0o700)), 53 | Expected::Pass(Op::matches_mode("/tmp/test-good", 0o700)), 54 | Expected::Pass(Op::create_dir("/tmp/test-good/x1")), 55 | Expected::Pass(Op::remove_dir("/tmp/test-good/x1")), 56 | Expected::Pass(Op::remove_dir("/tmp/test-good")), 57 | // 58 | // Same sequence but with recursive = false 59 | // 60 | Expected::Pass(Op::DirBuilder2("/tmp/test-good", false, 0)), 61 | Expected::Fail(Op::create_dir("/tmp/test-good/x1")), 62 | Expected::Pass(Op::remove_dir("/tmp/test-good")), 63 | // 64 | // Some file operations 65 | // 66 | Expected::Pass(Op::touch_file("/tmp/test-good-file")), 67 | Expected::Pass(Op::is_regfile("/tmp/test-good-file")), 68 | Expected::Fail(Op::is_dir("/tmp/test-good-file")), 69 | Expected::Pass(Op::remove_file("/tmp/test-good-file")), 70 | Expected::Fail(Op::is_regfile("/tmp/test-good-file")), 71 | Expected::Fail(Op::is_dir("/tmp/test-good-file")), 72 | ] 73 | .iter() 74 | } 75 | 76 | type OpPath<'a> = &'a str; 77 | 78 | #[allow(non_camel_case_types)] 79 | #[allow(dead_code)] 80 | #[derive(Debug)] 81 | enum Op<'a> { 82 | statx(OpPath<'a>), 83 | StatxBuilder(OpPath<'a>), 84 | StatxBuilder2(OpPath<'a>, OpPath<'a>), 85 | matches_mode(OpPath<'a>, u16), 86 | is_regfile(OpPath<'a>), 87 | is_dir(OpPath<'a>), 88 | touch_file(OpPath<'a>), 89 | create_dir(OpPath<'a>), 90 | create_dir_all(OpPath<'a>), 91 | DirBuilder(OpPath<'a>), 92 | DirBuilder2(OpPath<'a>, bool, u32), 93 | remove_file(OpPath<'a>), 94 | remove_dir(OpPath<'a>), 95 | } 96 | 97 | #[derive(Debug)] 98 | enum Expected<'a> { 99 | Pass(Op<'a>), 100 | Fail(Op<'a>), 101 | } 102 | 103 | async fn main1() -> io::Result<()> { 104 | let (mut as_expected, mut unexpected) = (0, 0); 105 | 106 | for test in tests() { 107 | let (expect_to_pass, op) = match test { 108 | Expected::Pass(op) => (true, op), 109 | Expected::Fail(op) => (false, op), 110 | }; 111 | let res = match op { 112 | Op::statx(path) => statx(path).await, 113 | Op::StatxBuilder(path) => statx_builder(path).await, 114 | Op::StatxBuilder2(path, rel_path) => statx_builder2(path, rel_path).await, 115 | Op::matches_mode(path, mode) => matches_mode(path, *mode).await, 116 | Op::is_regfile(path) => is_regfile(path).await, 117 | Op::is_dir(path) => is_dir(path).await, 118 | Op::touch_file(path) => touch_file(path).await, 119 | Op::create_dir(path) => fs::create_dir(path).await, 120 | Op::create_dir_all(path) => fs::create_dir_all(path).await, 121 | Op::DirBuilder(path) => fs::DirBuilder::new().create(path).await, 122 | Op::DirBuilder2(path, recursive, mode) => { 123 | fs::DirBuilder::new() 124 | .recursive(*recursive) 125 | .mode(*mode) 126 | .create(path) 127 | .await 128 | } 129 | Op::remove_file(path) => fs::remove_file(path).await, 130 | Op::remove_dir(path) => fs::remove_dir(path).await, 131 | }; 132 | 133 | let verbose = true; 134 | 135 | match res { 136 | Ok(_) => { 137 | if expect_to_pass { 138 | as_expected += 1; 139 | if verbose { 140 | println!("Success: {op:?} passed."); 141 | } 142 | } else { 143 | unexpected += 1; 144 | println!("Failure: {op:?} expected to fail but passed."); 145 | } 146 | } 147 | Err(e) => { 148 | if expect_to_pass { 149 | unexpected += 1; 150 | println!("Failure: {op:?} expected to pass but failed with error \"{e}\"."); 151 | } else { 152 | as_expected += 1; 153 | if verbose { 154 | println!("Success: {op:?} expected to fail and did with error \"{e}\"."); 155 | } 156 | } 157 | } 158 | } 159 | } 160 | 161 | println!("{as_expected} as_expected, {unexpected} unexpected"); 162 | 163 | if unexpected == 0 { 164 | Ok(()) 165 | } else { 166 | Err(std::io::Error::new( 167 | std::io::ErrorKind::Other, 168 | format!("{unexpected} unexpected result(s)"), 169 | )) 170 | } 171 | } 172 | 173 | async fn statx>(path: P) -> io::Result<()> { 174 | let _statx = tokio_uring::fs::statx(path).await?; 175 | Ok(()) 176 | } 177 | 178 | async fn statx_builder>(path: P) -> io::Result<()> { 179 | let _statx = tokio_uring::fs::StatxBuilder::new() 180 | .pathname(path)? 181 | .statx() 182 | .await?; 183 | Ok(()) 184 | } 185 | 186 | async fn statx_builder2>(dir_path: P, rel_path: P) -> io::Result<()> { 187 | // This shows the power of combining an open file, presumably a directory, and the relative 188 | // path to have the statx operation return the meta data for the child of the opened directory 189 | // descriptor. 190 | let f = tokio_uring::fs::File::open(dir_path).await?; 191 | 192 | // Fetch file metadata 193 | let res = f.statx_builder().pathname(rel_path)?.statx().await; 194 | 195 | // Close the file 196 | f.close().await?; 197 | 198 | res.map(|_| ()) 199 | } 200 | 201 | async fn matches_mode>(path: P, want_mode: u16) -> io::Result<()> { 202 | let statx = tokio_uring::fs::StatxBuilder::new() 203 | .mask(libc::STATX_MODE) 204 | .pathname(path)? 205 | .statx() 206 | .await?; 207 | let got_mode = statx.stx_mode & 0o7777; 208 | if want_mode == got_mode { 209 | Ok(()) 210 | } else { 211 | Err(std::io::Error::new( 212 | std::io::ErrorKind::Other, 213 | format!("want mode {want_mode:#o}, got mode {got_mode:#o}"), 214 | )) 215 | } 216 | } 217 | 218 | async fn touch_file>(path: P) -> io::Result<()> { 219 | let file = tokio_uring::fs::OpenOptions::new() 220 | .append(true) 221 | .create(true) 222 | .open(path) 223 | .await?; 224 | 225 | file.close().await 226 | } 227 | 228 | async fn is_regfile>(path: P) -> io::Result<()> { 229 | let (_is_dir, is_regfile) = tokio_uring::fs::is_dir_regfile(path).await; 230 | 231 | if is_regfile { 232 | Ok(()) 233 | } else { 234 | Err(std::io::Error::new( 235 | std::io::ErrorKind::Other, 236 | "not regular file", 237 | )) 238 | } 239 | } 240 | 241 | async fn is_dir>(path: P) -> io::Result<()> { 242 | let (is_dir, _is_regfile) = tokio_uring::fs::is_dir_regfile(path).await; 243 | 244 | if is_dir { 245 | Ok(()) 246 | } else { 247 | Err(std::io::Error::new( 248 | std::io::ErrorKind::Other, 249 | "not directory", 250 | )) 251 | } 252 | } 253 | 254 | fn main() { 255 | tokio_uring::start(async { 256 | if let Err(e) = main1().await { 257 | println!("error: {}", e); 258 | } 259 | }); 260 | } 261 | -------------------------------------------------------------------------------- /examples/udp_socket.rs: -------------------------------------------------------------------------------- 1 | use std::{env, net::SocketAddr}; 2 | use tokio_uring::net::UdpSocket; 3 | 4 | fn main() { 5 | let args: Vec<_> = env::args().collect(); 6 | 7 | if args.len() <= 1 { 8 | panic!("no addr specified"); 9 | } 10 | 11 | let socket_addr: SocketAddr = args[1].parse().unwrap(); 12 | 13 | tokio_uring::start(async { 14 | let socket = UdpSocket::bind(socket_addr).await.unwrap(); 15 | 16 | let buf = vec![0u8; 128]; 17 | 18 | let (result, mut buf) = socket.recv_from(buf).await; 19 | let (read, socket_addr) = result.unwrap(); 20 | buf.resize(read, 0); 21 | println!("received from {}: {:?}", socket_addr, &buf[..]); 22 | 23 | let (result, _buf) = socket.send_to(buf, socket_addr).await; 24 | println!("sent to {}: {}", socket_addr, result.unwrap()); 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /examples/unix_listener.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use tokio_uring::net::UnixListener; 4 | 5 | fn main() { 6 | let args: Vec<_> = env::args().collect(); 7 | 8 | if args.len() <= 1 { 9 | panic!("no addr specified"); 10 | } 11 | 12 | let socket_addr: String = args[1].clone(); 13 | 14 | tokio_uring::start(async { 15 | let listener = UnixListener::bind(&socket_addr).unwrap(); 16 | 17 | loop { 18 | let stream = listener.accept().await.unwrap(); 19 | let socket_addr = socket_addr.clone(); 20 | tokio_uring::spawn(async move { 21 | let buf = vec![1u8; 128]; 22 | 23 | let (result, buf) = stream.write(buf).submit().await; 24 | println!("written to {}: {}", &socket_addr, result.unwrap()); 25 | 26 | let (result, buf) = stream.read(buf).await; 27 | let read = result.unwrap(); 28 | println!("read from {}: {:?}", &socket_addr, &buf[..read]); 29 | }); 30 | } 31 | }); 32 | } 33 | -------------------------------------------------------------------------------- /examples/unix_stream.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | 3 | use tokio_uring::net::UnixStream; 4 | 5 | fn main() { 6 | let args: Vec<_> = env::args().collect(); 7 | 8 | if args.len() <= 1 { 9 | panic!("no addr specified"); 10 | } 11 | 12 | let socket_addr: &String = &args[1]; 13 | 14 | tokio_uring::start(async { 15 | let stream = UnixStream::connect(socket_addr).await.unwrap(); 16 | let buf = vec![1u8; 128]; 17 | 18 | let (result, buf) = stream.write(buf).submit().await; 19 | println!("written: {}", result.unwrap()); 20 | 21 | let (result, buf) = stream.read(buf).await; 22 | let read = result.unwrap(); 23 | println!("read: {:?}", &buf[..read]); 24 | }); 25 | } 26 | -------------------------------------------------------------------------------- /examples/wrk-bench.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::rc::Rc; 3 | use tokio::task::JoinHandle; 4 | 5 | pub const RESPONSE: &[u8] = 6 | b"HTTP/1.1 200 OK\nContent-Type: text/plain\nContent-Length: 12\n\nHello world!"; 7 | 8 | pub const ADDRESS: &str = "127.0.0.1:8080"; 9 | 10 | fn main() -> io::Result<()> { 11 | tokio_uring::start(async { 12 | let mut tasks = Vec::with_capacity(16); 13 | let listener = Rc::new(tokio_uring::net::TcpListener::bind( 14 | ADDRESS.parse().unwrap(), 15 | )?); 16 | 17 | for _ in 0..16 { 18 | let listener = listener.clone(); 19 | let task: JoinHandle> = tokio::task::spawn_local(async move { 20 | loop { 21 | let (stream, _) = listener.accept().await?; 22 | 23 | tokio_uring::spawn(async move { 24 | let (result, _) = stream.write(RESPONSE).submit().await; 25 | 26 | if let Err(err) = result { 27 | eprintln!("Client connection failed: {}", err); 28 | } 29 | }); 30 | } 31 | }); 32 | tasks.push(task); 33 | } 34 | 35 | for t in tasks { 36 | t.await.unwrap()?; 37 | } 38 | 39 | Ok(()) 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /src/buf/bounded.rs: -------------------------------------------------------------------------------- 1 | use super::{IoBuf, IoBufMut, Slice}; 2 | 3 | use std::ops; 4 | use std::ptr; 5 | 6 | /// A possibly bounded view into an owned [`IoBuf`] buffer. 7 | /// 8 | /// Because buffers are passed by ownership to the runtime, Rust's slice API 9 | /// (`&buf[..]`) cannot be used. Instead, `tokio-uring` provides an owned slice 10 | /// API: [`.slice()`]. The method takes ownership of the buffer and returns a 11 | /// [`Slice`] value that tracks the requested range. 12 | /// 13 | /// This trait provides a generic way to use buffers and `Slice` views 14 | /// into such buffers with `io-uring` operations. 15 | /// 16 | /// [`.slice()`]: BoundedBuf::slice 17 | pub trait BoundedBuf: Unpin + 'static { 18 | /// The type of the underlying buffer. 19 | type Buf: IoBuf; 20 | 21 | /// The type representing the range bounds of the view. 22 | type Bounds: ops::RangeBounds; 23 | 24 | /// Returns a view of the buffer with the specified range. 25 | /// 26 | /// This method is similar to Rust's slicing (`&buf[..]`), but takes 27 | /// ownership of the buffer. The range bounds are specified against 28 | /// the possibly offset beginning of the `self` view into the buffer 29 | /// and the end bound, if specified, must not exceed the view's total size. 30 | /// Note that the range may extend into the uninitialized part of the 31 | /// buffer, but it must start (if so bounded) in the initialized part 32 | /// or immediately adjacent to it. 33 | /// 34 | /// # Panics 35 | /// 36 | /// If the range is invalid with regard to the recipient's total size or 37 | /// the length of its initialized part, the implementation of this method 38 | /// should panic. 39 | /// 40 | /// # Examples 41 | /// 42 | /// ``` 43 | /// use tokio_uring::buf::BoundedBuf; 44 | /// 45 | /// let buf = b"hello world".to_vec(); 46 | /// let slice = buf.slice(5..10); 47 | /// assert_eq!(&slice[..], b" worl"); 48 | /// let slice = slice.slice(1..3); 49 | /// assert_eq!(&slice[..], b"wo"); 50 | /// ``` 51 | fn slice(self, range: impl ops::RangeBounds) -> Slice; 52 | 53 | /// Returns a `Slice` with the view's full range. 54 | /// 55 | /// This method is to be used by the `tokio-uring` runtime and it is not 56 | /// expected for users to call it directly. 57 | fn slice_full(self) -> Slice; 58 | 59 | /// Gets a reference to the underlying buffer. 60 | fn get_buf(&self) -> &Self::Buf; 61 | 62 | /// Returns the range bounds for this view. 63 | fn bounds(&self) -> Self::Bounds; 64 | 65 | /// Constructs a view from an underlying buffer and range bounds. 66 | fn from_buf_bounds(buf: Self::Buf, bounds: Self::Bounds) -> Self; 67 | 68 | /// Like [`IoBuf::stable_ptr`], 69 | /// but possibly offset to the view's starting position. 70 | fn stable_ptr(&self) -> *const u8; 71 | 72 | /// Number of initialized bytes available via this view. 73 | fn bytes_init(&self) -> usize; 74 | 75 | /// Total size of the view, including uninitialized memory, if any. 76 | fn bytes_total(&self) -> usize; 77 | } 78 | 79 | impl BoundedBuf for T { 80 | type Buf = Self; 81 | type Bounds = ops::RangeFull; 82 | 83 | fn slice(self, range: impl ops::RangeBounds) -> Slice { 84 | use ops::Bound; 85 | 86 | let begin = match range.start_bound() { 87 | Bound::Included(&n) => n, 88 | Bound::Excluded(&n) => n.checked_add(1).expect("out of range"), 89 | Bound::Unbounded => 0, 90 | }; 91 | 92 | assert!(begin < self.bytes_total()); 93 | 94 | let end = match range.end_bound() { 95 | Bound::Included(&n) => n.checked_add(1).expect("out of range"), 96 | Bound::Excluded(&n) => n, 97 | Bound::Unbounded => self.bytes_total(), 98 | }; 99 | 100 | assert!(end <= self.bytes_total()); 101 | assert!(begin <= self.bytes_init()); 102 | 103 | Slice::new(self, begin, end) 104 | } 105 | 106 | fn slice_full(self) -> Slice { 107 | let end = self.bytes_total(); 108 | Slice::new(self, 0, end) 109 | } 110 | 111 | fn get_buf(&self) -> &Self { 112 | self 113 | } 114 | 115 | fn bounds(&self) -> Self::Bounds { 116 | .. 117 | } 118 | 119 | fn from_buf_bounds(buf: Self, _: ops::RangeFull) -> Self { 120 | buf 121 | } 122 | 123 | fn stable_ptr(&self) -> *const u8 { 124 | IoBuf::stable_ptr(self) 125 | } 126 | 127 | fn bytes_init(&self) -> usize { 128 | IoBuf::bytes_init(self) 129 | } 130 | 131 | fn bytes_total(&self) -> usize { 132 | IoBuf::bytes_total(self) 133 | } 134 | } 135 | 136 | /// A possibly bounded view into an owned [`IoBufMut`] buffer. 137 | /// 138 | /// This trait provides a generic way to use mutable buffers and `Slice` views 139 | /// into such buffers with `io-uring` operations. 140 | pub trait BoundedBufMut: BoundedBuf { 141 | /// The type of the underlying buffer. 142 | type BufMut: IoBufMut; 143 | 144 | /// Like [`IoBufMut::stable_mut_ptr`], 145 | /// but possibly offset to the view's starting position. 146 | fn stable_mut_ptr(&mut self) -> *mut u8; 147 | 148 | /// Like [`IoBufMut::set_init`], 149 | /// but the position is possibly offset to the view's starting position. 150 | /// 151 | /// # Safety 152 | /// 153 | /// The caller must ensure that all bytes starting at `stable_mut_ptr()` up 154 | /// to `pos` are initialized and owned by the buffer. 155 | unsafe fn set_init(&mut self, pos: usize); 156 | 157 | /// Copies the given byte slice into the buffer, starting at 158 | /// this view's offset. 159 | /// 160 | /// # Panics 161 | /// 162 | /// If the slice's length exceeds the destination's total capacity, 163 | /// this method panics. 164 | fn put_slice(&mut self, src: &[u8]) { 165 | assert!(self.bytes_total() >= src.len()); 166 | let dst = self.stable_mut_ptr(); 167 | 168 | // Safety: 169 | // dst pointer validity is ensured by stable_mut_ptr; 170 | // the length is checked to not exceed the view's total capacity; 171 | // src (immutable) and dst (mutable) cannot point to overlapping memory; 172 | // after copying the amount of bytes given by the slice, it's safe 173 | // to mark them as initialized in the buffer. 174 | unsafe { 175 | ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len()); 176 | self.set_init(src.len()); 177 | } 178 | } 179 | } 180 | 181 | impl BoundedBufMut for T { 182 | type BufMut = T; 183 | 184 | fn stable_mut_ptr(&mut self) -> *mut u8 { 185 | IoBufMut::stable_mut_ptr(self) 186 | } 187 | 188 | unsafe fn set_init(&mut self, pos: usize) { 189 | IoBufMut::set_init(self, pos) 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/buf/fixed/buffers.rs: -------------------------------------------------------------------------------- 1 | use libc::iovec; 2 | 3 | // Abstracts management of fixed buffers in a buffer registry. 4 | pub(crate) trait FixedBuffers { 5 | // Provides access to the raw buffers as a slice of iovec. 6 | fn iovecs(&self) -> &[iovec]; 7 | 8 | /// Sets the indexed buffer's state to free and records the updated length 9 | /// of its initialized part. 10 | /// 11 | /// # Panics 12 | /// 13 | /// The buffer addressed must be in the checked out state, 14 | /// otherwise this function may panic. 15 | /// 16 | /// # Safety 17 | /// 18 | /// While the implementation of this method typically does not need to 19 | /// do anything unsafe, the caller must ensure that the bytes in the buffer 20 | /// are initialized up to the specified length. 21 | unsafe fn check_in(&mut self, buf_index: u16, init_len: usize); 22 | } 23 | -------------------------------------------------------------------------------- /src/buf/fixed/handle.rs: -------------------------------------------------------------------------------- 1 | use super::FixedBuffers; 2 | use crate::buf::{IoBuf, IoBufMut}; 3 | 4 | use libc::iovec; 5 | use std::cell::RefCell; 6 | use std::fmt::{self, Debug}; 7 | use std::ops::{Deref, DerefMut}; 8 | use std::rc::Rc; 9 | 10 | // Data to construct a `FixedBuf` handle from. 11 | pub(crate) struct CheckedOutBuf { 12 | // Pointer and size of the buffer. 13 | pub iovec: iovec, 14 | // Length of the initialized part. 15 | pub init_len: usize, 16 | // Buffer index. 17 | pub index: u16, 18 | } 19 | 20 | /// A unique handle to a memory buffer that can be pre-registered with 21 | /// the kernel for `io-uring` operations. 22 | /// 23 | /// `FixedBuf` handles can be obtained from a collection of fixed buffers, 24 | /// either [`FixedBufRegistry`] or [`FixedBufPool`]. 25 | /// For each buffer, only a single `FixedBuf` handle can be either used by the 26 | /// application code or owned by an I/O operation at any given time, 27 | /// thus avoiding data races between `io-uring` operations in flight and 28 | /// the application accessing buffer data. 29 | /// 30 | /// [`FixedBufRegistry`]: super::FixedBufRegistry 31 | /// [`FixedBufPool`]: super::FixedBufPool 32 | /// 33 | pub struct FixedBuf { 34 | registry: Rc>, 35 | buf: CheckedOutBuf, 36 | } 37 | 38 | impl Drop for FixedBuf { 39 | fn drop(&mut self) { 40 | let mut registry = self.registry.borrow_mut(); 41 | // Safety: the length of the initialized data in the buffer has been 42 | // maintained accordingly to the safety contracts on 43 | // Self::new and IoBufMut. 44 | unsafe { 45 | registry.check_in(self.buf.index, self.buf.init_len); 46 | } 47 | } 48 | } 49 | 50 | impl FixedBuf { 51 | // Safety: Validity constraints must apply to CheckedOutBuf members: 52 | // - the array will not be deallocated until the buffer is checked in; 53 | // - the data in the array must be initialized up to the number of bytes 54 | // given in init_len. 55 | pub(super) unsafe fn new(registry: Rc>, buf: CheckedOutBuf) -> Self { 56 | FixedBuf { registry, buf } 57 | } 58 | 59 | /// Index of the underlying registry buffer 60 | pub fn buf_index(&self) -> u16 { 61 | self.buf.index 62 | } 63 | } 64 | 65 | unsafe impl IoBuf for FixedBuf { 66 | fn stable_ptr(&self) -> *const u8 { 67 | self.buf.iovec.iov_base as _ 68 | } 69 | 70 | fn bytes_init(&self) -> usize { 71 | self.buf.init_len 72 | } 73 | 74 | fn bytes_total(&self) -> usize { 75 | self.buf.iovec.iov_len 76 | } 77 | } 78 | 79 | unsafe impl IoBufMut for FixedBuf { 80 | fn stable_mut_ptr(&mut self) -> *mut u8 { 81 | self.buf.iovec.iov_base as _ 82 | } 83 | 84 | unsafe fn set_init(&mut self, pos: usize) { 85 | if self.buf.init_len < pos { 86 | self.buf.init_len = pos 87 | } 88 | } 89 | } 90 | 91 | impl Deref for FixedBuf { 92 | type Target = [u8]; 93 | 94 | fn deref(&self) -> &[u8] { 95 | // Safety: The iovec points to a slice held in self.buffers, to which no mutable reference exists. 96 | unsafe { std::slice::from_raw_parts(self.buf.iovec.iov_base as _, self.buf.init_len) } 97 | } 98 | } 99 | 100 | impl DerefMut for FixedBuf { 101 | fn deref_mut(&mut self) -> &mut [u8] { 102 | // Safety: The iovec points to a slice held in self.buffers, to which no other reference exists. 103 | unsafe { std::slice::from_raw_parts_mut(self.buf.iovec.iov_base as _, self.buf.init_len) } 104 | } 105 | } 106 | 107 | impl Debug for FixedBuf { 108 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 109 | let buf: &[u8] = self; 110 | f.debug_struct("FixedBuf") 111 | .field("buf", &buf) // as slice 112 | .field("index", &self.buf.index) 113 | .finish_non_exhaustive() 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/buf/fixed/mod.rs: -------------------------------------------------------------------------------- 1 | //! Buffers pre-registered with the kernel. 2 | //! 3 | //! This module provides facilities for registering in-memory buffers with 4 | //! the `tokio-uring` runtime. Operations like [`File::read_fixed_at`][rfa] and 5 | //! [`File::write_fixed_at`][wfa] make use of buffers pre-mapped by 6 | //! the kernel to reduce per-I/O overhead. 7 | //! 8 | //! Two kinds of buffer collections are provided: [`FixedBufRegistry`] and 9 | //! [`FixedBufPool`], realizing two different patterns of buffer management. 10 | //! The `register` method on either of these types is used to register a 11 | //! collection of buffers with the kernel. It must be called before any of 12 | //! the [`FixedBuf`] handles to the collection's buffers can be used with 13 | //! I/O operations. 14 | //! 15 | //! [rfa]: crate::fs::File::read_fixed_at 16 | //! [wfa]: crate::fs::File::write_fixed_at 17 | 18 | mod handle; 19 | pub use handle::FixedBuf; 20 | 21 | mod buffers; 22 | pub(crate) use buffers::FixedBuffers; 23 | 24 | mod plumbing; 25 | 26 | pub mod pool; 27 | pub use pool::FixedBufPool; 28 | 29 | mod registry; 30 | pub use registry::FixedBufRegistry; 31 | -------------------------------------------------------------------------------- /src/buf/fixed/plumbing/mod.rs: -------------------------------------------------------------------------------- 1 | // Internal data structures shared between thread-local and thread-safe 2 | // fixed buffer collections. 3 | 4 | mod pool; 5 | pub(super) use pool::Pool; 6 | 7 | mod registry; 8 | pub(super) use registry::Registry; 9 | -------------------------------------------------------------------------------- /src/buf/fixed/plumbing/pool.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::fixed::{handle::CheckedOutBuf, FixedBuffers}; 2 | use crate::buf::IoBufMut; 3 | 4 | use libc::{iovec, UIO_MAXIOV}; 5 | use tokio::sync::Notify; 6 | 7 | use std::cmp; 8 | use std::collections::HashMap; 9 | use std::mem; 10 | use std::ptr; 11 | use std::slice; 12 | use std::sync::Arc; 13 | 14 | // Internal state shared by FixedBufPool and FixedBuf handles. 15 | pub(crate) struct Pool { 16 | // Pointer to an allocated array of iovec records referencing 17 | // the allocated buffers. The number of initialized records is the 18 | // same as the length of the states array. 19 | raw_bufs: ptr::NonNull, 20 | // Original capacity of raw_bufs as a Vec. 21 | orig_cap: usize, 22 | // State information on the buffers. Indices in this array correspond to 23 | // the indices in the array at raw_bufs. 24 | states: Vec, 25 | // Table of head indices of the free buffer lists in each size bucket. 26 | free_buf_head_by_cap: HashMap, 27 | // Original buffers, kept until drop 28 | buffers: Vec, 29 | // Used to notify tasks pending on `next` 30 | notify_next_by_cap: HashMap>, 31 | } 32 | 33 | // State information of a buffer in the registry, 34 | enum BufState { 35 | // The buffer is not in use. 36 | Free { 37 | // This field records the length of the initialized part. 38 | init_len: usize, 39 | // Index of the next buffer of the same capacity in a free buffer list, if any. 40 | next: Option, 41 | }, 42 | // The buffer is checked out. 43 | // Its data are logically owned by the FixedBuf handle, 44 | // which also keeps track of the length of the initialized part. 45 | CheckedOut, 46 | } 47 | 48 | impl Pool { 49 | pub(crate) fn new(bufs: impl Iterator) -> Self { 50 | // Limit the number of buffers to the maximum allowable number. 51 | let bufs = bufs.take(cmp::min(UIO_MAXIOV as usize, u16::MAX as usize)); 52 | // Collect into `buffers`, which holds the backing buffers for 53 | // the lifetime of the pool. Using collect may allow 54 | // the compiler to apply collect in place specialization, 55 | // to avoid an allocation. 56 | let mut buffers = bufs.collect::>(); 57 | let mut iovecs = Vec::with_capacity(buffers.len()); 58 | let mut states = Vec::with_capacity(buffers.len()); 59 | let mut free_buf_head_by_cap = HashMap::new(); 60 | for (index, buf) in buffers.iter_mut().enumerate() { 61 | let cap = buf.bytes_total(); 62 | 63 | // Link the buffer as the head of the free list for its capacity. 64 | // This constructs the free buffer list to be initially retrieved 65 | // back to front, which should be of no difference to the user. 66 | let next = free_buf_head_by_cap.insert(cap, index as u16); 67 | 68 | iovecs.push(iovec { 69 | iov_base: buf.stable_mut_ptr() as *mut _, 70 | iov_len: cap, 71 | }); 72 | states.push(BufState::Free { 73 | init_len: buf.bytes_init(), 74 | next, 75 | }); 76 | } 77 | debug_assert_eq!(iovecs.len(), states.len()); 78 | debug_assert_eq!(iovecs.len(), buffers.len()); 79 | 80 | // Safety: Vec::as_mut_ptr never returns null 81 | let raw_bufs = unsafe { ptr::NonNull::new_unchecked(iovecs.as_mut_ptr()) }; 82 | let orig_cap = iovecs.capacity(); 83 | mem::forget(iovecs); 84 | Pool { 85 | raw_bufs, 86 | orig_cap, 87 | states, 88 | free_buf_head_by_cap, 89 | buffers, 90 | notify_next_by_cap: HashMap::new(), 91 | } 92 | } 93 | 94 | // If the free buffer list for this capacity is not empty, checks out the first buffer 95 | // from the list and returns its data. Otherwise, returns None. 96 | pub(crate) fn try_next(&mut self, cap: usize) -> Option { 97 | let free_head = self.free_buf_head_by_cap.get_mut(&cap)?; 98 | let index = *free_head as usize; 99 | let state = &mut self.states[index]; 100 | 101 | let (init_len, next) = match *state { 102 | BufState::Free { init_len, next } => { 103 | *state = BufState::CheckedOut; 104 | (init_len, next) 105 | } 106 | BufState::CheckedOut => panic!("buffer is checked out"), 107 | }; 108 | 109 | // Update the head of the free list for this capacity. 110 | match next { 111 | Some(i) => { 112 | *free_head = i; 113 | } 114 | None => { 115 | self.free_buf_head_by_cap.remove(&cap); 116 | } 117 | } 118 | 119 | // Safety: the allocated array under the pointer is valid 120 | // for the lifetime of self, a free buffer index is inside the array, 121 | // as also asserted by the indexing operation on the states array 122 | // that has the same length. 123 | let iovec = unsafe { self.raw_bufs.as_ptr().add(index).read() }; 124 | debug_assert_eq!(iovec.iov_len, cap); 125 | Some(CheckedOutBuf { 126 | iovec, 127 | init_len, 128 | index: index as u16, 129 | }) 130 | } 131 | 132 | // Returns a `Notify` to use for waking up tasks awaiting a buffer of 133 | // the specified capacity. 134 | pub(crate) fn notify_on_next(&mut self, cap: usize) -> Arc { 135 | let notify = self.notify_next_by_cap.entry(cap).or_default(); 136 | Arc::clone(notify) 137 | } 138 | 139 | fn check_in_internal(&mut self, index: u16, init_len: usize) { 140 | let cap = self.iovecs()[index as usize].iov_len; 141 | let state = &mut self.states[index as usize]; 142 | debug_assert!( 143 | matches!(state, BufState::CheckedOut), 144 | "the buffer must be checked out" 145 | ); 146 | 147 | // Link the buffer as the new head of the free list for its capacity. 148 | // Recently checked in buffers will be first to be reused, 149 | // improving cache locality. 150 | let next = self.free_buf_head_by_cap.insert(cap, index); 151 | 152 | *state = BufState::Free { init_len, next }; 153 | 154 | if let Some(notify) = self.notify_next_by_cap.get(&cap) { 155 | // Wake up a single task pending on `next` 156 | notify.notify_one(); 157 | } 158 | } 159 | } 160 | 161 | impl FixedBuffers for Pool { 162 | fn iovecs(&self) -> &[iovec] { 163 | // Safety: the raw_bufs pointer is valid for the lifetime of self, 164 | // the length of the states array is also the length of buffers array 165 | // by construction. 166 | unsafe { slice::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len()) } 167 | } 168 | 169 | unsafe fn check_in(&mut self, index: u16, init_len: usize) { 170 | self.check_in_internal(index, init_len) 171 | } 172 | } 173 | 174 | impl Drop for Pool { 175 | fn drop(&mut self) { 176 | for (i, state) in self.states.iter().enumerate() { 177 | match state { 178 | BufState::Free { init_len, .. } => { 179 | // Update buffer initialization. 180 | // The buffer is about to dropped, but this may release it 181 | // from Registry ownership, rather than deallocate. 182 | unsafe { self.buffers[i].set_init(*init_len) }; 183 | } 184 | BufState::CheckedOut => unreachable!("all buffers must be checked in"), 185 | } 186 | } 187 | 188 | // Rebuild Vec, so it's dropped 189 | let _ = unsafe { 190 | Vec::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len(), self.orig_cap) 191 | }; 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/buf/fixed/plumbing/registry.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::fixed::{handle::CheckedOutBuf, FixedBuffers}; 2 | use crate::buf::IoBufMut; 3 | 4 | use libc::{iovec, UIO_MAXIOV}; 5 | use std::cmp; 6 | use std::mem; 7 | use std::ptr; 8 | use std::slice; 9 | 10 | // Internal state shared by FixedBufRegistry and FixedBuf handles. 11 | pub(crate) struct Registry { 12 | // Pointer to an allocated array of iovec records referencing 13 | // the allocated buffers. The number of initialized records is the 14 | // same as the length of the states array. 15 | raw_bufs: ptr::NonNull, 16 | // Original capacity of raw_bufs as a Vec. 17 | orig_cap: usize, 18 | // State information on the buffers. Indices in this array correspond to 19 | // the indices in the array at raw_bufs. 20 | states: Vec, 21 | // The owned buffers are kept until Drop 22 | buffers: Vec, 23 | } 24 | 25 | // State information of a buffer in the registry, 26 | enum BufState { 27 | // The buffer is not in use. 28 | // The field records the length of the initialized part. 29 | Free { init_len: usize }, 30 | // The buffer is checked out. 31 | // Its data are logically owned by the FixedBuf handle, 32 | // which also keeps track of the length of the initialized part. 33 | CheckedOut, 34 | } 35 | 36 | impl Registry { 37 | pub(crate) fn new(bufs: impl Iterator) -> Self { 38 | // Limit the number of buffers to the maximum allowable number. 39 | let bufs = bufs.take(cmp::min(UIO_MAXIOV as usize, u16::MAX as usize)); 40 | // Collect into `buffers`, which holds the backing buffers for 41 | // the lifetime of the pool. Using collect may allow 42 | // the compiler to apply collect in place specialization, 43 | // to avoid an allocation. 44 | let mut buffers = bufs.collect::>(); 45 | let mut iovecs = Vec::with_capacity(buffers.len()); 46 | let mut states = Vec::with_capacity(buffers.len()); 47 | for buf in buffers.iter_mut() { 48 | iovecs.push(iovec { 49 | iov_base: buf.stable_mut_ptr() as *mut _, 50 | iov_len: buf.bytes_total(), 51 | }); 52 | states.push(BufState::Free { 53 | init_len: buf.bytes_init(), 54 | }); 55 | } 56 | debug_assert_eq!(iovecs.len(), states.len()); 57 | debug_assert_eq!(iovecs.len(), buffers.len()); 58 | 59 | // Safety: Vec::as_mut_ptr never returns null 60 | let raw_bufs = unsafe { ptr::NonNull::new_unchecked(iovecs.as_mut_ptr()) }; 61 | let orig_cap = iovecs.capacity(); 62 | mem::forget(iovecs); 63 | Registry { 64 | raw_bufs, 65 | orig_cap, 66 | states, 67 | buffers, 68 | } 69 | } 70 | 71 | // If the indexed buffer is free, changes its state to checked out 72 | // and returns its data. 73 | // If the buffer is already checked out, returns None. 74 | pub(crate) fn check_out(&mut self, index: usize) -> Option { 75 | let state = self.states.get_mut(index)?; 76 | let BufState::Free { init_len } = *state else { 77 | return None; 78 | }; 79 | 80 | *state = BufState::CheckedOut; 81 | 82 | // Safety: the allocated array under the pointer is valid 83 | // for the lifetime of self, the index is inside the array 84 | // as checked by Vec::get_mut above, called on the array of 85 | // states that has the same length. 86 | let iovec = unsafe { self.raw_bufs.as_ptr().add(index).read() }; 87 | debug_assert!(index <= u16::MAX as usize); 88 | Some(CheckedOutBuf { 89 | iovec, 90 | init_len, 91 | index: index as u16, 92 | }) 93 | } 94 | 95 | fn check_in_internal(&mut self, index: u16, init_len: usize) { 96 | let state = self 97 | .states 98 | .get_mut(index as usize) 99 | .expect("invalid buffer index"); 100 | debug_assert!( 101 | matches!(state, BufState::CheckedOut), 102 | "the buffer must be checked out" 103 | ); 104 | *state = BufState::Free { init_len }; 105 | } 106 | } 107 | 108 | impl FixedBuffers for Registry { 109 | fn iovecs(&self) -> &[iovec] { 110 | // Safety: the raw_bufs pointer is valid for the lifetime of self, 111 | // the length of the states array is also the length of buffers array 112 | // by construction. 113 | unsafe { slice::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len()) } 114 | } 115 | 116 | unsafe fn check_in(&mut self, index: u16, init_len: usize) { 117 | self.check_in_internal(index, init_len) 118 | } 119 | } 120 | 121 | impl Drop for Registry { 122 | fn drop(&mut self) { 123 | for (i, state) in self.states.iter().enumerate() { 124 | match state { 125 | BufState::Free { init_len, .. } => { 126 | // Update buffer initialization. 127 | // The buffer is about to be dropped, but this may release it 128 | // from Registry ownership, rather than deallocate. 129 | unsafe { self.buffers[i].set_init(*init_len) }; 130 | } 131 | BufState::CheckedOut => unreachable!("all buffers must be checked in"), 132 | } 133 | } 134 | 135 | // Rebuild Vec, so it's dropped 136 | let _ = unsafe { 137 | Vec::from_raw_parts(self.raw_bufs.as_ptr(), self.states.len(), self.orig_cap) 138 | }; 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/buf/fixed/registry.rs: -------------------------------------------------------------------------------- 1 | use super::plumbing; 2 | use super::FixedBuf; 3 | 4 | use crate::buf::IoBufMut; 5 | use crate::runtime::CONTEXT; 6 | use std::cell::RefCell; 7 | use std::io; 8 | use std::rc::Rc; 9 | 10 | /// An indexed collection of I/O buffers pre-registered with the kernel. 11 | /// 12 | /// `FixedBufRegistry` allows the application to manage a collection of buffers 13 | /// allocated in memory, that can be registered in the current `tokio-uring` 14 | /// context using the [`register`] method. The buffers are accessed by their 15 | /// indices using the [`check_out`] method. 16 | /// 17 | /// A `FixedBufRegistry` value is a lightweight handle for a collection of 18 | /// allocated buffers. Cloning of a `FixedBufRegistry` creates a new reference to 19 | /// the same collection of buffers. 20 | /// 21 | /// The buffers of the collection are not deallocated until: 22 | /// - all `FixedBufRegistry` references to the collection have been dropped; 23 | /// - all [`FixedBuf`] handles to individual buffers in the collection have 24 | /// been dropped, including the buffer handles owned by any I/O operations 25 | /// in flight; 26 | /// - The `tokio-uring` [`Runtime`] the buffers are registered with 27 | /// has been dropped. 28 | /// 29 | /// [`register`]: Self::register 30 | /// [`check_out`]: Self::check_out 31 | /// [`Runtime`]: crate::Runtime 32 | #[derive(Clone)] 33 | pub struct FixedBufRegistry { 34 | inner: Rc>>, 35 | } 36 | 37 | impl FixedBufRegistry { 38 | /// Creates a new collection of buffers from the provided allocated vectors. 39 | /// 40 | /// The buffers are assigned 0-based indices in the order of the iterable 41 | /// input parameter. The returned collection takes up to [`UIO_MAXIOV`] 42 | /// buffers from the input. Any items in excess of that amount are silently 43 | /// dropped, unless the input iterator produces the vectors lazily. 44 | /// 45 | /// [`UIO_MAXIOV`]: libc::UIO_MAXIOV 46 | /// 47 | /// # Examples 48 | /// 49 | /// When providing uninitialized vectors for the collection, take care to 50 | /// not replicate a vector with `.clone()` as that does not preserve the 51 | /// capacity and the resulting buffer pointer will be rejected by the kernel. 52 | /// This means that the following use of [`iter::repeat`] would not work: 53 | /// 54 | /// [`iter::repeat`]: std::iter::repeat 55 | /// 56 | /// ```should_panic 57 | /// use tokio_uring::buf::fixed::FixedBufRegistry; 58 | /// use std::iter; 59 | /// 60 | /// # #[allow(non_snake_case)] 61 | /// # fn main() -> Result<(), std::io::Error> { 62 | /// # use nix::sys::resource::{getrlimit, Resource}; 63 | /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; 64 | /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); 65 | /// # let BUF_SIZE = 4096; 66 | /// let registry = FixedBufRegistry::new( 67 | /// iter::repeat(Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) 68 | /// ); 69 | /// 70 | /// tokio_uring::start(async { 71 | /// registry.register()?; 72 | /// // ... 73 | /// Ok(()) 74 | /// }) 75 | /// # } 76 | /// ``` 77 | /// 78 | /// Instead, create the vectors with requested capacity directly: 79 | /// 80 | /// ``` 81 | /// use tokio_uring::buf::fixed::FixedBufRegistry; 82 | /// use std::iter; 83 | /// 84 | /// # #[allow(non_snake_case)] 85 | /// # fn main() -> Result<(), std::io::Error> { 86 | /// # use nix::sys::resource::{getrlimit, Resource}; 87 | /// # let (memlock_limit, _) = getrlimit(Resource::RLIMIT_MEMLOCK)?; 88 | /// # let NUM_BUFFERS = std::cmp::max(memlock_limit as usize / 4096 / 8, 1); 89 | /// # let BUF_SIZE = 4096; 90 | /// let registry = FixedBufRegistry::new( 91 | /// iter::repeat_with(|| Vec::with_capacity(BUF_SIZE)).take(NUM_BUFFERS) 92 | /// ); 93 | /// 94 | /// tokio_uring::start(async { 95 | /// registry.register()?; 96 | /// // ... 97 | /// Ok(()) 98 | /// }) 99 | /// # } 100 | /// ``` 101 | pub fn new(bufs: impl IntoIterator) -> Self { 102 | FixedBufRegistry { 103 | inner: Rc::new(RefCell::new(plumbing::Registry::new(bufs.into_iter()))), 104 | } 105 | } 106 | 107 | /// Registers the buffers with the kernel. 108 | /// 109 | /// This method must be called in the context of a `tokio-uring` runtime. 110 | /// The registration persists for the lifetime of the runtime, unless 111 | /// revoked by the [`unregister`] method. Dropping the 112 | /// `FixedBufRegistry` instance this method has been called on does not revoke 113 | /// the registration or deallocate the buffers. 114 | /// 115 | /// [`unregister`]: Self::unregister 116 | /// 117 | /// This call can be blocked in the kernel to complete any operations 118 | /// in-flight on the same `io-uring` instance. The application is 119 | /// recommended to register buffers before starting any I/O operations. 120 | /// 121 | /// # Errors 122 | /// 123 | /// If a collection of buffers is currently registered in the context 124 | /// of the `tokio-uring` runtime this call is made in, the function returns 125 | /// an error. 126 | pub fn register(&self) -> io::Result<()> { 127 | CONTEXT.with(|x| { 128 | x.handle() 129 | .as_ref() 130 | .expect("Not in a runtime context") 131 | .register_buffers(Rc::clone(&self.inner) as _) 132 | }) 133 | } 134 | 135 | /// Unregisters this collection of buffers. 136 | /// 137 | /// This method must be called in the context of a `tokio-uring` runtime, 138 | /// where the buffers should have been previously registered. 139 | /// 140 | /// This operation invalidates any `FixedBuf` handles checked out from 141 | /// this registry instance. Continued use of such handles in I/O 142 | /// operations may result in an error. 143 | /// 144 | /// # Errors 145 | /// 146 | /// If another collection of buffers is currently registered in the context 147 | /// of the `tokio-uring` runtime this call is made in, the function returns 148 | /// an error. Calling `unregister` when no `FixedBufRegistry` is currently 149 | /// registered on this runtime also returns an error. 150 | pub fn unregister(&self) -> io::Result<()> { 151 | CONTEXT.with(|x| { 152 | x.handle() 153 | .as_ref() 154 | .expect("Not in a runtime context") 155 | .unregister_buffers(Rc::clone(&self.inner) as _) 156 | }) 157 | } 158 | 159 | /// Returns a buffer identified by the specified index for use by the 160 | /// application, unless the buffer is already in use. 161 | /// 162 | /// The buffer is released to be available again once the 163 | /// returned `FixedBuf` handle has been dropped. An I/O operation 164 | /// using the buffer takes ownership of it and returns it once completed, 165 | /// preventing shared use of the buffer while the operation is in flight. 166 | pub fn check_out(&self, index: usize) -> Option { 167 | let mut inner = self.inner.borrow_mut(); 168 | inner.check_out(index).map(|data| { 169 | let registry = Rc::clone(&self.inner); 170 | // Safety: the validity of buffer data is ensured by 171 | // plumbing::Registry::check_out 172 | unsafe { FixedBuf::new(registry, data) } 173 | }) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/buf/io_buf.rs: -------------------------------------------------------------------------------- 1 | /// An `io-uring` compatible buffer. 2 | /// 3 | /// The `IoBuf` trait is implemented by buffer types that can be used with 4 | /// io-uring operations. Users will not need to use this trait directly. 5 | /// The [`BoundedBuf`] trait provides some useful methods including `slice`. 6 | /// 7 | /// # Safety 8 | /// 9 | /// Buffers passed to `io-uring` operations must reference a stable memory 10 | /// region. While the runtime holds ownership to a buffer, the pointer returned 11 | /// by `stable_ptr` must remain valid even if the `IoBuf` value is moved. 12 | /// 13 | /// [`BoundedBuf`]: crate::buf::BoundedBuf 14 | pub unsafe trait IoBuf: Unpin + 'static { 15 | /// Returns a raw pointer to the vector’s buffer. 16 | /// 17 | /// This method is to be used by the `tokio-uring` runtime and it is not 18 | /// expected for users to call it directly. 19 | /// 20 | /// The implementation must ensure that, while the `tokio-uring` runtime 21 | /// owns the value, the pointer returned by `stable_ptr` **does not** 22 | /// change. 23 | fn stable_ptr(&self) -> *const u8; 24 | 25 | /// Number of initialized bytes. 26 | /// 27 | /// This method is to be used by the `tokio-uring` runtime and it is not 28 | /// expected for users to call it directly. 29 | /// 30 | /// For `Vec`, this is identical to `len()`. 31 | fn bytes_init(&self) -> usize; 32 | 33 | /// Total size of the buffer, including uninitialized memory, if any. 34 | /// 35 | /// This method is to be used by the `tokio-uring` runtime and it is not 36 | /// expected for users to call it directly. 37 | /// 38 | /// For `Vec`, this is identical to `capacity()`. 39 | fn bytes_total(&self) -> usize; 40 | } 41 | 42 | unsafe impl IoBuf for Vec { 43 | fn stable_ptr(&self) -> *const u8 { 44 | self.as_ptr() 45 | } 46 | 47 | fn bytes_init(&self) -> usize { 48 | self.len() 49 | } 50 | 51 | fn bytes_total(&self) -> usize { 52 | self.capacity() 53 | } 54 | } 55 | 56 | unsafe impl IoBuf for &'static [u8] { 57 | fn stable_ptr(&self) -> *const u8 { 58 | self.as_ptr() 59 | } 60 | 61 | fn bytes_init(&self) -> usize { 62 | <[u8]>::len(self) 63 | } 64 | 65 | fn bytes_total(&self) -> usize { 66 | self.bytes_init() 67 | } 68 | } 69 | 70 | unsafe impl IoBuf for &'static str { 71 | fn stable_ptr(&self) -> *const u8 { 72 | self.as_ptr() 73 | } 74 | 75 | fn bytes_init(&self) -> usize { 76 | ::len(self) 77 | } 78 | 79 | fn bytes_total(&self) -> usize { 80 | self.bytes_init() 81 | } 82 | } 83 | 84 | #[cfg(feature = "bytes")] 85 | unsafe impl IoBuf for bytes::Bytes { 86 | fn stable_ptr(&self) -> *const u8 { 87 | self.as_ptr() 88 | } 89 | 90 | fn bytes_init(&self) -> usize { 91 | self.len() 92 | } 93 | 94 | fn bytes_total(&self) -> usize { 95 | self.len() 96 | } 97 | } 98 | 99 | #[cfg(feature = "bytes")] 100 | unsafe impl IoBuf for bytes::BytesMut { 101 | fn stable_ptr(&self) -> *const u8 { 102 | self.as_ptr() 103 | } 104 | 105 | fn bytes_init(&self) -> usize { 106 | self.len() 107 | } 108 | 109 | fn bytes_total(&self) -> usize { 110 | self.capacity() 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/buf/io_buf_mut.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::IoBuf; 2 | 3 | /// A mutable`io-uring` compatible buffer. 4 | /// 5 | /// The `IoBufMut` trait is implemented by buffer types that can be used with 6 | /// io-uring operations. Users will not need to use this trait directly. 7 | /// 8 | /// # Safety 9 | /// 10 | /// Buffers passed to `io-uring` operations must reference a stable memory 11 | /// region. While the runtime holds ownership to a buffer, the pointer returned 12 | /// by `stable_mut_ptr` must remain valid even if the `IoBufMut` value is moved. 13 | pub unsafe trait IoBufMut: IoBuf { 14 | /// Returns a raw mutable pointer to the vector’s buffer. 15 | /// 16 | /// This method is to be used by the `tokio-uring` runtime and it is not 17 | /// expected for users to call it directly. 18 | /// 19 | /// The implementation must ensure that, while the `tokio-uring` runtime 20 | /// owns the value, the pointer returned by `stable_mut_ptr` **does not** 21 | /// change. 22 | fn stable_mut_ptr(&mut self) -> *mut u8; 23 | 24 | /// Updates the number of initialized bytes. 25 | /// 26 | /// If the specified `pos` is greater than the value returned by 27 | /// [`IoBuf::bytes_init`], it becomes the new water mark as returned by 28 | /// `IoBuf::bytes_init`. 29 | /// 30 | /// # Safety 31 | /// 32 | /// The caller must ensure that all bytes starting at `stable_mut_ptr()` up 33 | /// to `pos` are initialized and owned by the buffer. 34 | unsafe fn set_init(&mut self, pos: usize); 35 | } 36 | 37 | unsafe impl IoBufMut for Vec { 38 | fn stable_mut_ptr(&mut self) -> *mut u8 { 39 | self.as_mut_ptr() 40 | } 41 | 42 | unsafe fn set_init(&mut self, init_len: usize) { 43 | if self.len() < init_len { 44 | self.set_len(init_len); 45 | } 46 | } 47 | } 48 | 49 | #[cfg(feature = "bytes")] 50 | unsafe impl IoBufMut for bytes::BytesMut { 51 | fn stable_mut_ptr(&mut self) -> *mut u8 { 52 | self.as_mut_ptr() 53 | } 54 | 55 | unsafe fn set_init(&mut self, init_len: usize) { 56 | if self.len() < init_len { 57 | self.set_len(init_len); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/buf/mod.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for working with buffers. 2 | //! 3 | //! `io-uring` APIs require passing ownership of buffers to the runtime. The 4 | //! crate defines [`IoBuf`] and [`IoBufMut`] traits which are implemented by buffer 5 | //! types that respect the `io-uring` contract. 6 | 7 | pub mod fixed; 8 | 9 | mod io_buf; 10 | pub use io_buf::IoBuf; 11 | 12 | mod io_buf_mut; 13 | pub use io_buf_mut::IoBufMut; 14 | 15 | mod slice; 16 | pub use slice::Slice; 17 | 18 | mod bounded; 19 | pub use bounded::{BoundedBuf, BoundedBufMut}; 20 | 21 | pub(crate) fn deref(buf: &impl IoBuf) -> &[u8] { 22 | // Safety: the `IoBuf` trait is marked as unsafe and is expected to be 23 | // implemented correctly. 24 | unsafe { std::slice::from_raw_parts(buf.stable_ptr(), buf.bytes_init()) } 25 | } 26 | 27 | pub(crate) fn deref_mut(buf: &mut impl IoBufMut) -> &mut [u8] { 28 | // Safety: the `IoBufMut` trait is marked as unsafe and is expected to be 29 | // implemented correct. 30 | unsafe { std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_init()) } 31 | } 32 | -------------------------------------------------------------------------------- /src/buf/slice.rs: -------------------------------------------------------------------------------- 1 | use super::{BoundedBuf, BoundedBufMut, IoBuf, IoBufMut}; 2 | 3 | use std::cmp; 4 | use std::ops; 5 | 6 | /// An owned view into a contiguous sequence of bytes. 7 | /// 8 | /// This is similar to Rust slices (`&buf[..]`) but owns the underlying buffer. 9 | /// This type is useful for performing io-uring read and write operations using 10 | /// a subset of a buffer. 11 | /// 12 | /// Slices are created using [`BoundedBuf::slice`]. 13 | /// 14 | /// # Examples 15 | /// 16 | /// Creating a slice 17 | /// 18 | /// ``` 19 | /// use tokio_uring::buf::BoundedBuf; 20 | /// 21 | /// let buf = b"hello world".to_vec(); 22 | /// let slice = buf.slice(..5); 23 | /// 24 | /// assert_eq!(&slice[..], b"hello"); 25 | /// ``` 26 | pub struct Slice { 27 | buf: T, 28 | begin: usize, 29 | end: usize, 30 | } 31 | 32 | impl Slice { 33 | pub(crate) fn new(buf: T, begin: usize, end: usize) -> Slice { 34 | Slice { buf, begin, end } 35 | } 36 | 37 | /// Offset in the underlying buffer at which this slice starts. 38 | /// 39 | /// # Examples 40 | /// 41 | /// ``` 42 | /// use tokio_uring::buf::BoundedBuf; 43 | /// 44 | /// let buf = b"hello world".to_vec(); 45 | /// let slice = buf.slice(1..5); 46 | /// 47 | /// assert_eq!(1, slice.begin()); 48 | /// ``` 49 | pub fn begin(&self) -> usize { 50 | self.begin 51 | } 52 | 53 | /// Ofset in the underlying buffer at which this slice ends. 54 | /// 55 | /// # Examples 56 | /// 57 | /// ``` 58 | /// use tokio_uring::buf::BoundedBuf; 59 | /// 60 | /// let buf = b"hello world".to_vec(); 61 | /// let slice = buf.slice(1..5); 62 | /// 63 | /// assert_eq!(5, slice.end()); 64 | /// ``` 65 | pub fn end(&self) -> usize { 66 | self.end 67 | } 68 | 69 | /// Gets a reference to the underlying buffer. 70 | /// 71 | /// This method escapes the slice's view. 72 | /// 73 | /// # Examples 74 | /// 75 | /// ``` 76 | /// use tokio_uring::buf::BoundedBuf; 77 | /// 78 | /// let buf = b"hello world".to_vec(); 79 | /// let slice = buf.slice(..5); 80 | /// 81 | /// assert_eq!(slice.get_ref(), b"hello world"); 82 | /// assert_eq!(&slice[..], b"hello"); 83 | /// ``` 84 | pub fn get_ref(&self) -> &T { 85 | &self.buf 86 | } 87 | 88 | /// Gets a mutable reference to the underlying buffer. 89 | /// 90 | /// This method escapes the slice's view. 91 | /// 92 | /// # Examples 93 | /// 94 | /// ``` 95 | /// use tokio_uring::buf::BoundedBuf; 96 | /// 97 | /// let buf = b"hello world".to_vec(); 98 | /// let mut slice = buf.slice(..5); 99 | /// 100 | /// slice.get_mut()[0] = b'b'; 101 | /// 102 | /// assert_eq!(slice.get_mut(), b"bello world"); 103 | /// assert_eq!(&slice[..], b"bello"); 104 | /// ``` 105 | pub fn get_mut(&mut self) -> &mut T { 106 | &mut self.buf 107 | } 108 | 109 | /// Unwraps this `Slice`, returning the underlying buffer. 110 | /// 111 | /// # Examples 112 | /// 113 | /// ``` 114 | /// use tokio_uring::buf::BoundedBuf; 115 | /// 116 | /// let buf = b"hello world".to_vec(); 117 | /// let slice = buf.slice(..5); 118 | /// 119 | /// let buf = slice.into_inner(); 120 | /// assert_eq!(buf, b"hello world"); 121 | /// ``` 122 | pub fn into_inner(self) -> T { 123 | self.buf 124 | } 125 | } 126 | 127 | impl ops::Deref for Slice { 128 | type Target = [u8]; 129 | 130 | fn deref(&self) -> &[u8] { 131 | let buf_bytes = super::deref(&self.buf); 132 | let end = cmp::min(self.end, buf_bytes.len()); 133 | &buf_bytes[self.begin..end] 134 | } 135 | } 136 | 137 | impl ops::DerefMut for Slice { 138 | fn deref_mut(&mut self) -> &mut [u8] { 139 | let buf_bytes = super::deref_mut(&mut self.buf); 140 | let end = cmp::min(self.end, buf_bytes.len()); 141 | &mut buf_bytes[self.begin..end] 142 | } 143 | } 144 | 145 | impl BoundedBuf for Slice { 146 | type Buf = T; 147 | type Bounds = ops::Range; 148 | 149 | fn slice(self, range: impl ops::RangeBounds) -> Slice { 150 | use ops::Bound; 151 | 152 | let begin = match range.start_bound() { 153 | Bound::Included(&n) => self.begin.checked_add(n).expect("out of range"), 154 | Bound::Excluded(&n) => self 155 | .begin 156 | .checked_add(n) 157 | .and_then(|x| x.checked_add(1)) 158 | .expect("out of range"), 159 | Bound::Unbounded => self.begin, 160 | }; 161 | 162 | assert!(begin <= self.end); 163 | 164 | let end = match range.end_bound() { 165 | Bound::Included(&n) => self 166 | .begin 167 | .checked_add(n) 168 | .and_then(|x| x.checked_add(1)) 169 | .expect("out of range"), 170 | Bound::Excluded(&n) => self.begin.checked_add(n).expect("out of range"), 171 | Bound::Unbounded => self.end, 172 | }; 173 | 174 | assert!(end <= self.end); 175 | assert!(begin <= self.buf.bytes_init()); 176 | 177 | Slice::new(self.buf, begin, end) 178 | } 179 | 180 | fn slice_full(self) -> Slice { 181 | self 182 | } 183 | 184 | fn get_buf(&self) -> &T { 185 | &self.buf 186 | } 187 | 188 | fn bounds(&self) -> Self::Bounds { 189 | self.begin..self.end 190 | } 191 | 192 | fn from_buf_bounds(buf: T, bounds: Self::Bounds) -> Self { 193 | assert!(bounds.start <= buf.bytes_init()); 194 | assert!(bounds.end <= buf.bytes_total()); 195 | Slice::new(buf, bounds.start, bounds.end) 196 | } 197 | 198 | fn stable_ptr(&self) -> *const u8 { 199 | super::deref(&self.buf)[self.begin..].as_ptr() 200 | } 201 | 202 | fn bytes_init(&self) -> usize { 203 | ops::Deref::deref(self).len() 204 | } 205 | 206 | fn bytes_total(&self) -> usize { 207 | self.end - self.begin 208 | } 209 | } 210 | 211 | impl BoundedBufMut for Slice { 212 | type BufMut = T; 213 | 214 | fn stable_mut_ptr(&mut self) -> *mut u8 { 215 | super::deref_mut(&mut self.buf)[self.begin..].as_mut_ptr() 216 | } 217 | 218 | unsafe fn set_init(&mut self, pos: usize) { 219 | self.buf.set_init(self.begin + pos); 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/fs/create_dir_all.rs: -------------------------------------------------------------------------------- 1 | use futures_util::future::LocalBoxFuture; 2 | use std::io; 3 | use std::path::Path; 4 | 5 | /// Recursively create a directory and all of its parent components if they are missing. 6 | /// 7 | /// # Examples 8 | /// 9 | /// ```no_run 10 | /// tokio_uring::start(async { 11 | /// tokio_uring::fs::create_dir_all("/some/dir").await.unwrap(); 12 | /// }); 13 | /// ``` 14 | pub async fn create_dir_all>(path: P) -> io::Result<()> { 15 | DirBuilder::new() 16 | .recursive(true) 17 | .create(path.as_ref()) 18 | .await 19 | } 20 | 21 | /// A builder used to create directories in various manners, based on uring async operations. 22 | /// 23 | /// This builder supports the Linux specific option `mode` and may support `at` in the future. 24 | #[derive(Debug)] 25 | pub struct DirBuilder { 26 | inner: fs_imp::DirBuilder, 27 | recursive: bool, 28 | } 29 | 30 | impl Default for DirBuilder { 31 | fn default() -> Self { 32 | Self::new() 33 | } 34 | } 35 | 36 | impl DirBuilder { 37 | /// Creates a new set of options with default mode/security settings for all 38 | /// platforms and also non-recursive. 39 | /// 40 | /// # Examples 41 | /// 42 | /// ``` 43 | /// let builder = tokio_uring::fs::DirBuilder::new(); 44 | /// ``` 45 | #[must_use] 46 | pub fn new() -> DirBuilder { 47 | DirBuilder { 48 | inner: fs_imp::DirBuilder::new(), 49 | recursive: false, 50 | } 51 | } 52 | 53 | /// Indicates that directories should be created recursively, creating all 54 | /// parent directories. Parents that do not exist are created with the same 55 | /// security and permissions settings. 56 | /// 57 | /// This option defaults to `false`. 58 | /// 59 | /// # Examples 60 | /// 61 | /// ``` 62 | /// let mut builder = tokio_uring::fs::DirBuilder::new(); 63 | /// builder.recursive(true); 64 | /// ``` 65 | #[must_use] 66 | pub fn recursive(&mut self, recursive: bool) -> &mut Self { 67 | self.recursive = recursive; 68 | self 69 | } 70 | 71 | /// Sets the mode to create new directories with. This option defaults to 0o777. 72 | /// 73 | /// This option defaults to 0o777. 74 | /// 75 | /// # Examples 76 | /// 77 | /// ``` 78 | /// let mut builder = tokio_uring::fs::DirBuilder::new(); 79 | /// builder.mode(0o700); 80 | /// ``` 81 | #[must_use] 82 | pub fn mode(&mut self, mode: u32) -> &mut Self { 83 | self.inner.set_mode(mode); 84 | self 85 | } 86 | 87 | /// Creates the specified directory with the options configured in this 88 | /// builder. 89 | /// 90 | /// It is considered an error if the directory already exists unless 91 | /// recursive mode is enabled. 92 | /// 93 | /// # Examples 94 | /// 95 | /// ```no_run 96 | /// tokio_uring::start(async { 97 | /// let path = "/tmp/foo/bar/baz"; 98 | /// tokio_uring::fs::DirBuilder::new() 99 | /// .recursive(true) 100 | /// .mode(0o700) // user-only mode: drwx------ 101 | /// .create(path).await.unwrap(); 102 | /// 103 | /// // TODO change with tokio_uring version 104 | /// assert!(std::fs::metadata(path).unwrap().is_dir()); 105 | /// }) 106 | /// ``` 107 | pub async fn create>(&self, path: P) -> io::Result<()> { 108 | self._create(path.as_ref()).await 109 | } 110 | 111 | async fn _create(&self, path: &Path) -> io::Result<()> { 112 | if self.recursive { 113 | self.recurse_create_dir_all(path).await 114 | } else { 115 | self.inner.mkdir(path).await 116 | } 117 | } 118 | 119 | // This recursive function is very closely modeled after the std library version. 120 | // 121 | // A recursive async function requires a Boxed Future. TODO There may be an implementation that 122 | // is less costly in terms of heap allocations. Maybe a non-recursive version is possible given 123 | // we even know the path separator for Linux. Or maybe expand the first level to avoid 124 | // recursion when only the first level of the directory needs to be built. For now, this serves 125 | // its purpose. 126 | 127 | fn recurse_create_dir_all<'a>(&'a self, path: &'a Path) -> LocalBoxFuture> { 128 | Box::pin(async move { 129 | if path == Path::new("") { 130 | return Ok(()); 131 | } 132 | 133 | match self.inner.mkdir(path).await { 134 | Ok(()) => return Ok(()), 135 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => {} 136 | Err(_) if is_dir(path).await => return Ok(()), 137 | Err(e) => return Err(e), 138 | } 139 | match path.parent() { 140 | Some(p) => self.recurse_create_dir_all(p).await?, 141 | None => { 142 | return Err(std::io::Error::new( 143 | std::io::ErrorKind::Other, 144 | "failed to create whole tree", 145 | )); 146 | /* TODO build own allocation free error some day like the std library does. 147 | return Err(io::const_io_error!( 148 | io::ErrorKind::Uncategorized, 149 | "failed to create whole tree", 150 | )); 151 | */ 152 | } 153 | } 154 | match self.inner.mkdir(path).await { 155 | Ok(()) => Ok(()), 156 | Err(_) if is_dir(path).await => Ok(()), 157 | Err(e) => Err(e), 158 | } 159 | }) 160 | } 161 | } 162 | 163 | // TODO this DirBuilder and this fs_imp module is modeled after the std library's. Here there is 164 | // only Linux supported so is it worth to continue this separation? 165 | 166 | mod fs_imp { 167 | use crate::runtime::driver::op::Op; 168 | use libc::mode_t; 169 | use std::path::Path; 170 | 171 | #[derive(Debug)] 172 | pub struct DirBuilder { 173 | mode: mode_t, 174 | } 175 | 176 | impl DirBuilder { 177 | pub fn new() -> DirBuilder { 178 | DirBuilder { mode: 0o777 } 179 | } 180 | 181 | pub async fn mkdir(&self, p: &Path) -> std::io::Result<()> { 182 | Op::make_dir(p, self.mode)?.await 183 | } 184 | 185 | pub fn set_mode(&mut self, mode: u32) { 186 | self.mode = mode as mode_t; 187 | } 188 | } 189 | } 190 | 191 | // Returns true if the path represents a directory. 192 | // 193 | // Uses one asynchronous uring call to determine this. 194 | async fn is_dir>(path: P) -> bool { 195 | let mut builder = crate::fs::StatxBuilder::new(); 196 | if builder.mask(libc::STATX_TYPE).pathname(path).is_err() { 197 | return false; 198 | } 199 | 200 | let res = builder.statx().await; 201 | match res { 202 | Ok(statx) => (u32::from(statx.stx_mode) & libc::S_IFMT) == libc::S_IFDIR, 203 | Err(_) => false, 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/fs/directory.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::Op; 2 | use std::io; 3 | use std::path::Path; 4 | 5 | /// Creates a directory on the local filesystem. 6 | /// 7 | /// # Errors 8 | /// 9 | /// This function will return an error in the following situations, but is not 10 | /// limited to just these cases: 11 | /// 12 | /// * User lacks permissions to create a directory at `path` 13 | /// * [`io::ErrorKind`] would be set to `PermissionDenied` 14 | /// * A parent of the given path doesn't exist. 15 | /// * [`io::ErrorKind`] would be set to `NotFound` or `NotADirectory` 16 | /// * `path` already exists. 17 | /// * [`io::ErrorKind`] would be set to `AlreadyExists` 18 | /// 19 | /// [`ErrorKind`]: std::io::ErrorKind 20 | /// # Examples 21 | /// 22 | /// ```no_run 23 | /// use tokio_uring::fs::create_dir; 24 | /// 25 | /// fn main() -> Result<(), Box> { 26 | /// tokio_uring::start(async { 27 | /// create_dir("/some/dir").await?; 28 | /// Ok::<(), std::io::Error>(()) 29 | /// })?; 30 | /// Ok(()) 31 | /// } 32 | /// ``` 33 | pub async fn create_dir>(path: P) -> io::Result<()> { 34 | Op::make_dir(path.as_ref(), 0o777)?.await 35 | } 36 | 37 | /// Removes a directory on the local filesystem. 38 | /// 39 | /// This will only remove empty directories with no children. If you want to destroy the entire 40 | /// contents of a directory, you may try [`remove_dir_all`] which uses the standard Tokio executor. 41 | /// There currently is no implementation of `remove_dir_all` in tokio-uring. 42 | /// 43 | /// [`remove_dir_all`]: https://docs.rs/tokio/latest/tokio/fs/fn.remove_dir_all.html 44 | /// 45 | /// # Errors 46 | /// 47 | /// This function will return an error in the following situations, but is not 48 | /// limited to just these cases: 49 | /// 50 | /// * `path` doesn't exist. 51 | /// * [`io::ErrorKind`] would be set to `NotFound` 52 | /// * `path` isn't a directory. 53 | /// * [`io::ErrorKind`] would be set to `NotADirectory` 54 | /// * The user lacks permissions to modify/remove the directory at the provided `path`. 55 | /// * [`io::ErrorKind`] would be set to `PermissionDenied` 56 | /// * The directory isn't empty. 57 | /// * [`io::ErrorKind`] would be set to `DirectoryNotEmpty` 58 | /// 59 | /// # Examples 60 | /// 61 | /// ```no_run 62 | /// use tokio_uring::fs::remove_dir; 63 | /// 64 | /// fn main() -> Result<(), Box> { 65 | /// tokio_uring::start(async { 66 | /// remove_dir("/some/dir").await?; 67 | /// Ok::<(), std::io::Error>(()) 68 | /// })?; 69 | /// Ok(()) 70 | /// } 71 | /// ``` 72 | pub async fn remove_dir>(path: P) -> io::Result<()> { 73 | Op::unlink_dir(path.as_ref())?.await 74 | } 75 | -------------------------------------------------------------------------------- /src/fs/mod.rs: -------------------------------------------------------------------------------- 1 | //! Filesystem manipulation operations. 2 | 3 | mod directory; 4 | pub use directory::create_dir; 5 | pub use directory::remove_dir; 6 | 7 | mod create_dir_all; 8 | pub use create_dir_all::create_dir_all; 9 | pub use create_dir_all::DirBuilder; 10 | 11 | mod file; 12 | pub use file::remove_file; 13 | pub use file::rename; 14 | pub use file::File; 15 | 16 | mod open_options; 17 | pub use open_options::OpenOptions; 18 | 19 | mod statx; 20 | pub use statx::is_dir_regfile; 21 | pub use statx::statx; 22 | pub use statx::StatxBuilder; 23 | 24 | mod symlink; 25 | pub use symlink::symlink; 26 | -------------------------------------------------------------------------------- /src/fs/symlink.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::Op; 2 | use std::io; 3 | use std::path::Path; 4 | 5 | /// Creates a new symbolic link on the filesystem. 6 | /// The dst path will be a symbolic link pointing to the src path. 7 | /// This is an async version of std::os::unix::fs::symlink. 8 | pub async fn symlink, Q: AsRef>(src: P, dst: Q) -> io::Result<()> { 9 | Op::symlink(src, dst)?.await 10 | } 11 | -------------------------------------------------------------------------------- /src/future.rs: -------------------------------------------------------------------------------- 1 | // TODO see about removing or just commenting out. 2 | #[allow(unused_macros)] 3 | macro_rules! ready { 4 | ($e:expr $(,)?) => { 5 | match $e { 6 | std::task::Poll::Ready(t) => t, 7 | std::task::Poll::Pending => return std::task::Poll::Pending, 8 | } 9 | }; 10 | } 11 | -------------------------------------------------------------------------------- /src/io/accept.rs: -------------------------------------------------------------------------------- 1 | use crate::io::{SharedFd, Socket}; 2 | use crate::runtime::driver::op; 3 | use crate::runtime::driver::op::{Completable, Op}; 4 | use crate::runtime::CONTEXT; 5 | use std::net::SocketAddr; 6 | use std::{boxed::Box, io}; 7 | 8 | pub(crate) struct Accept { 9 | fd: SharedFd, 10 | pub(crate) socketaddr: Box<(libc::sockaddr_storage, libc::socklen_t)>, 11 | } 12 | 13 | impl Op { 14 | pub(crate) fn accept(fd: &SharedFd) -> io::Result> { 15 | use io_uring::{opcode, types}; 16 | 17 | let socketaddr = Box::new(( 18 | unsafe { std::mem::zeroed() }, 19 | std::mem::size_of::() as libc::socklen_t, 20 | )); 21 | CONTEXT.with(|x| { 22 | x.handle().expect("Not in a runtime context").submit_op( 23 | Accept { 24 | fd: fd.clone(), 25 | socketaddr, 26 | }, 27 | |accept| { 28 | opcode::Accept::new( 29 | types::Fd(accept.fd.raw_fd()), 30 | &mut accept.socketaddr.0 as *mut _ as *mut _, 31 | &mut accept.socketaddr.1, 32 | ) 33 | .flags(libc::O_CLOEXEC) 34 | .build() 35 | }, 36 | ) 37 | }) 38 | } 39 | } 40 | 41 | impl Completable for Accept { 42 | type Output = io::Result<(Socket, Option)>; 43 | 44 | fn complete(self, cqe: op::CqeResult) -> Self::Output { 45 | let fd = cqe.result?; 46 | let fd = SharedFd::new(fd as i32); 47 | let socket = Socket { fd }; 48 | let (_, addr) = unsafe { 49 | socket2::SockAddr::init(move |addr_storage, len| { 50 | self.socketaddr.0.clone_into(&mut *addr_storage); 51 | *len = self.socketaddr.1; 52 | Ok(()) 53 | })? 54 | }; 55 | Ok((socket, addr.as_socket())) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/io/bind.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tokio-rs/tokio-uring/7761222aa7f4bd48c559ca82e9535d47aac96d53/src/io/bind.rs -------------------------------------------------------------------------------- /src/io/close.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op; 2 | use crate::runtime::driver::op::{Completable, Op}; 3 | use crate::runtime::CONTEXT; 4 | use std::io; 5 | use std::os::unix::io::RawFd; 6 | 7 | pub(crate) struct Close { 8 | fd: RawFd, 9 | } 10 | 11 | impl Op { 12 | pub(crate) fn close(fd: RawFd) -> io::Result> { 13 | use io_uring::{opcode, types}; 14 | 15 | CONTEXT.with(|x| { 16 | x.handle() 17 | .expect("Not in a runtime context") 18 | .submit_op(Close { fd }, |close| { 19 | opcode::Close::new(types::Fd(close.fd)).build() 20 | }) 21 | }) 22 | } 23 | } 24 | 25 | impl Completable for Close { 26 | type Output = io::Result<()>; 27 | 28 | fn complete(self, cqe: op::CqeResult) -> Self::Output { 29 | let _ = cqe.result?; 30 | 31 | Ok(()) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/io/connect.rs: -------------------------------------------------------------------------------- 1 | use crate::io::SharedFd; 2 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 3 | use crate::runtime::CONTEXT; 4 | use socket2::SockAddr; 5 | use std::io; 6 | 7 | /// Open a file 8 | pub(crate) struct Connect { 9 | fd: SharedFd, 10 | // this avoids a UAF (UAM?) if the future is moved, but not if the future is 11 | // dropped. no Op can be dropped before completion in tokio-uring land right now. 12 | socket_addr: Box, 13 | } 14 | 15 | impl Op { 16 | /// Submit a request to connect. 17 | pub(crate) fn connect(fd: &SharedFd, socket_addr: SockAddr) -> io::Result> { 18 | use io_uring::{opcode, types}; 19 | 20 | CONTEXT.with(|x| { 21 | x.handle().expect("Not in a runtime context").submit_op( 22 | Connect { 23 | fd: fd.clone(), 24 | socket_addr: Box::new(socket_addr), 25 | }, 26 | |connect| { 27 | opcode::Connect::new( 28 | types::Fd(connect.fd.raw_fd()), 29 | connect.socket_addr.as_ptr(), 30 | connect.socket_addr.len(), 31 | ) 32 | .build() 33 | }, 34 | ) 35 | }) 36 | } 37 | } 38 | 39 | impl Completable for Connect { 40 | type Output = io::Result<()>; 41 | 42 | fn complete(self, cqe: CqeResult) -> Self::Output { 43 | cqe.result.map(|_| ()) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/io/fallocate.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use io_uring::{opcode, types}; 4 | 5 | use crate::{ 6 | io::SharedFd, 7 | runtime::{ 8 | driver::op::{Completable, CqeResult, Op}, 9 | CONTEXT, 10 | }, 11 | }; 12 | 13 | pub(crate) struct Fallocate { 14 | fd: SharedFd, 15 | } 16 | 17 | impl Op { 18 | pub(crate) fn fallocate( 19 | fd: &SharedFd, 20 | offset: u64, 21 | len: u64, 22 | flags: i32, 23 | ) -> io::Result> { 24 | CONTEXT.with(|x| { 25 | x.handle().expect("not in a runtime context").submit_op( 26 | Fallocate { fd: fd.clone() }, 27 | |fallocate| { 28 | opcode::Fallocate::new(types::Fd(fallocate.fd.raw_fd()), len as _) 29 | .offset(offset as _) 30 | .mode(flags) 31 | .build() 32 | }, 33 | ) 34 | }) 35 | } 36 | } 37 | 38 | impl Completable for Fallocate { 39 | type Output = io::Result<()>; 40 | 41 | fn complete(self, cqe: CqeResult) -> Self::Output { 42 | cqe.result.map(|_| ()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/io/fsync.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use crate::io::SharedFd; 4 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 5 | use crate::runtime::CONTEXT; 6 | use io_uring::{opcode, types}; 7 | 8 | pub(crate) struct Fsync { 9 | fd: SharedFd, 10 | } 11 | 12 | impl Op { 13 | pub(crate) fn fsync(fd: &SharedFd) -> io::Result> { 14 | CONTEXT.with(|x| { 15 | x.handle() 16 | .expect("Not in a runtime context") 17 | .submit_op(Fsync { fd: fd.clone() }, |fsync| { 18 | opcode::Fsync::new(types::Fd(fsync.fd.raw_fd())).build() 19 | }) 20 | }) 21 | } 22 | 23 | pub(crate) fn datasync(fd: &SharedFd) -> io::Result> { 24 | CONTEXT.with(|x| { 25 | x.handle().expect("Not in a runtime context").submit_op( 26 | Fsync { fd: fd.clone() }, 27 | |fsync| { 28 | opcode::Fsync::new(types::Fd(fsync.fd.raw_fd())) 29 | .flags(types::FsyncFlags::DATASYNC) 30 | .build() 31 | }, 32 | ) 33 | }) 34 | } 35 | } 36 | 37 | impl Completable for Fsync { 38 | type Output = io::Result<()>; 39 | 40 | fn complete(self, cqe: CqeResult) -> Self::Output { 41 | cqe.result.map(|_| ()) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/io/mkdir_at.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | 4 | use super::util::cstr; 5 | 6 | use std::ffi::CString; 7 | use std::io; 8 | use std::path::Path; 9 | 10 | /// Create a directory at path relative to the current working directory 11 | /// of the caller's process. 12 | pub(crate) struct Mkdir { 13 | pub(crate) _path: CString, 14 | } 15 | 16 | impl Op { 17 | /// Submit a request to create a directory 18 | pub(crate) fn make_dir(path: &Path, mode: u32) -> io::Result> { 19 | use io_uring::{opcode, types}; 20 | 21 | let _path = cstr(path)?; 22 | 23 | CONTEXT.with(|x| { 24 | x.handle() 25 | .expect("Not in a runtime context") 26 | .submit_op(Mkdir { _path }, |mkdir| { 27 | let p_ref = mkdir._path.as_c_str().as_ptr(); 28 | 29 | opcode::MkDirAt::new(types::Fd(libc::AT_FDCWD), p_ref) 30 | .mode(mode) 31 | .build() 32 | }) 33 | }) 34 | } 35 | } 36 | 37 | impl Completable for Mkdir { 38 | type Output = io::Result<()>; 39 | 40 | fn complete(self, cqe: CqeResult) -> Self::Output { 41 | cqe.result.map(|_| ()) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/io/mod.rs: -------------------------------------------------------------------------------- 1 | mod accept; 2 | 3 | mod close; 4 | 5 | mod connect; 6 | 7 | mod fallocate; 8 | 9 | mod fsync; 10 | 11 | mod mkdir_at; 12 | 13 | mod noop; 14 | pub(crate) use noop::NoOp; 15 | 16 | mod open; 17 | 18 | mod read; 19 | 20 | mod read_fixed; 21 | 22 | mod readv; 23 | 24 | mod recv_from; 25 | 26 | mod recvmsg; 27 | 28 | mod rename_at; 29 | 30 | mod send_to; 31 | 32 | mod send_zc; 33 | 34 | mod sendmsg; 35 | 36 | mod sendmsg_zc; 37 | 38 | mod shared_fd; 39 | pub(crate) use shared_fd::SharedFd; 40 | 41 | mod socket; 42 | pub(crate) use socket::Socket; 43 | 44 | mod statx; 45 | 46 | mod symlink; 47 | 48 | mod unlink_at; 49 | 50 | mod util; 51 | pub(crate) use util::cstr; 52 | 53 | pub(crate) mod write; 54 | 55 | mod write_fixed; 56 | 57 | mod writev; 58 | 59 | mod writev_all; 60 | pub(crate) use writev_all::writev_at_all; 61 | -------------------------------------------------------------------------------- /src/io/noop.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use std::io; 4 | 5 | /// No operation. Just posts a completion event, nothing else. 6 | /// 7 | /// Has a place in benchmarking. 8 | pub struct NoOp {} 9 | 10 | impl Op { 11 | pub fn no_op() -> io::Result> { 12 | use io_uring::opcode; 13 | 14 | CONTEXT.with(|x| { 15 | x.handle() 16 | .expect("Not in a runtime context") 17 | .submit_op(NoOp {}, |_| opcode::Nop::new().build()) 18 | }) 19 | } 20 | } 21 | 22 | impl Completable for NoOp { 23 | type Output = io::Result<()>; 24 | 25 | fn complete(self, cqe: CqeResult) -> Self::Output { 26 | cqe.result.map(|_| ()) 27 | } 28 | } 29 | 30 | #[cfg(test)] 31 | mod test { 32 | use crate as tokio_uring; 33 | 34 | #[test] 35 | fn perform_no_op() -> () { 36 | tokio_uring::start(async { 37 | tokio_uring::no_op().await.unwrap(); 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/io/open.rs: -------------------------------------------------------------------------------- 1 | use crate::fs::{File, OpenOptions}; 2 | use crate::io::SharedFd; 3 | 4 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 5 | use crate::runtime::CONTEXT; 6 | use std::ffi::CString; 7 | use std::io; 8 | use std::path::Path; 9 | 10 | /// Open a file 11 | #[allow(dead_code)] 12 | pub(crate) struct Open { 13 | pub(crate) path: CString, 14 | pub(crate) flags: libc::c_int, 15 | } 16 | 17 | impl Op { 18 | /// Submit a request to open a file. 19 | pub(crate) fn open(path: &Path, options: &OpenOptions) -> io::Result> { 20 | use io_uring::{opcode, types}; 21 | let path = super::util::cstr(path)?; 22 | let flags = libc::O_CLOEXEC 23 | | options.access_mode()? 24 | | options.creation_mode()? 25 | | (options.custom_flags & !libc::O_ACCMODE); 26 | 27 | CONTEXT.with(|x| { 28 | x.handle() 29 | .expect("Not in a runtime context") 30 | .submit_op(Open { path, flags }, |open| { 31 | // Get a reference to the memory. The string will be held by the 32 | // operation state and will not be accessed again until the operation 33 | // completes. 34 | let p_ref = open.path.as_c_str().as_ptr(); 35 | 36 | opcode::OpenAt::new(types::Fd(libc::AT_FDCWD), p_ref) 37 | .flags(flags) 38 | .mode(options.mode) 39 | .build() 40 | }) 41 | }) 42 | } 43 | } 44 | 45 | impl Completable for Open { 46 | type Output = io::Result; 47 | 48 | fn complete(self, cqe: CqeResult) -> Self::Output { 49 | Ok(File::from_shared_fd(SharedFd::new(cqe.result? as _))) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/io/pool.rs: -------------------------------------------------------------------------------- 1 | use crate::driver; 2 | 3 | use io_uring::{opcode, IoUring}; 4 | use std::io; 5 | use std::mem::ManuallyDrop; 6 | 7 | /// Buffer pool shared with kernel 8 | pub(crate) struct Pool { 9 | mem: *mut u8, 10 | num: usize, 11 | size: usize, 12 | } 13 | 14 | pub(crate) struct ProvidedBuf { 15 | buf: ManuallyDrop>, 16 | driver: driver::Handle, 17 | } 18 | 19 | impl Pool { 20 | pub(super) fn new(num: usize, size: usize) -> Pool { 21 | let total = num * size; 22 | let mut mem = ManuallyDrop::new(Vec::::with_capacity(total)); 23 | 24 | assert_eq!(mem.capacity(), total); 25 | 26 | Pool { 27 | mem: mem.as_mut_ptr(), 28 | num, 29 | size, 30 | } 31 | } 32 | 33 | pub(super) fn provide_buffers(&self, uring: &mut IoUring) -> io::Result<()> { 34 | let op = opcode::ProvideBuffers::new(self.mem, self.size as _, self.num as _, 0, 0) 35 | .build() 36 | .user_data(0); 37 | 38 | // Scoped to ensure `sq` drops before trying to submit 39 | { 40 | let mut sq = uring.submission(); 41 | 42 | if unsafe { sq.push(&op) }.is_err() { 43 | unimplemented!("when is this hit?"); 44 | } 45 | } 46 | 47 | uring.submit_and_wait(1)?; 48 | 49 | let mut cq = uring.completion(); 50 | for cqe in &mut cq { 51 | assert_eq!(cqe.user_data(), 0); 52 | } 53 | 54 | Ok(()) 55 | } 56 | } 57 | 58 | impl ProvidedBuf {} 59 | 60 | impl Drop for ProvidedBuf { 61 | fn drop(&mut self) { 62 | let mut driver = self.driver.borrow_mut(); 63 | let pool = &driver.pool; 64 | 65 | let ptr = self.buf.as_mut_ptr(); 66 | let bid = (ptr as usize - pool.mem as usize) / pool.size; 67 | 68 | let op = opcode::ProvideBuffers::new(ptr, pool.size as _, 1, 0, bid as _) 69 | .build() 70 | .user_data(u64::MAX); 71 | 72 | let mut sq = driver.uring.submission(); 73 | 74 | if unsafe { sq.push(&op) }.is_err() { 75 | unimplemented!(); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/io/read.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::BoundedBufMut; 2 | use crate::io::SharedFd; 3 | use crate::BufResult; 4 | 5 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 6 | use crate::runtime::CONTEXT; 7 | use std::io; 8 | 9 | pub(crate) struct Read { 10 | /// Holds a strong ref to the FD, preventing the file from being closed 11 | /// while the operation is in-flight. 12 | #[allow(dead_code)] 13 | fd: SharedFd, 14 | 15 | /// Reference to the in-flight buffer. 16 | pub(crate) buf: T, 17 | } 18 | 19 | impl Op> { 20 | pub(crate) fn read_at(fd: &SharedFd, buf: T, offset: u64) -> io::Result>> { 21 | use io_uring::{opcode, types}; 22 | 23 | CONTEXT.with(|x| { 24 | x.handle().expect("Not in a runtime context").submit_op( 25 | Read { 26 | fd: fd.clone(), 27 | buf, 28 | }, 29 | |read| { 30 | // Get raw buffer info 31 | let ptr = read.buf.stable_mut_ptr(); 32 | let len = read.buf.bytes_total(); 33 | opcode::Read::new(types::Fd(fd.raw_fd()), ptr, len as _) 34 | .offset(offset as _) 35 | .build() 36 | }, 37 | ) 38 | }) 39 | } 40 | } 41 | 42 | impl Completable for Read 43 | where 44 | T: BoundedBufMut, 45 | { 46 | type Output = BufResult; 47 | 48 | fn complete(self, cqe: CqeResult) -> Self::Output { 49 | // Convert the operation result to `usize` 50 | let res = cqe.result.map(|v| v as usize); 51 | // Recover the buffer 52 | let mut buf = self.buf; 53 | 54 | // If the operation was successful, advance the initialized cursor. 55 | if let Ok(n) = res { 56 | // Safety: the kernel wrote `n` bytes to the buffer. 57 | unsafe { 58 | buf.set_init(n); 59 | } 60 | } 61 | 62 | (res, buf) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/io/read_fixed.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::fixed::FixedBuf; 2 | use crate::buf::BoundedBufMut; 3 | use crate::io::SharedFd; 4 | use crate::runtime::driver::op::{self, Completable, Op}; 5 | use crate::BufResult; 6 | 7 | use crate::runtime::CONTEXT; 8 | use std::io; 9 | 10 | pub(crate) struct ReadFixed { 11 | /// Holds a strong ref to the FD, preventing the file from being closed 12 | /// while the operation is in-flight. 13 | #[allow(dead_code)] 14 | fd: SharedFd, 15 | 16 | /// The in-flight buffer. 17 | buf: T, 18 | } 19 | 20 | impl Op> 21 | where 22 | T: BoundedBufMut, 23 | { 24 | pub(crate) fn read_fixed_at( 25 | fd: &SharedFd, 26 | buf: T, 27 | offset: u64, 28 | ) -> io::Result>> { 29 | use io_uring::{opcode, types}; 30 | 31 | CONTEXT.with(|x| { 32 | x.handle().expect("Not in a runtime context").submit_op( 33 | ReadFixed { 34 | fd: fd.clone(), 35 | buf, 36 | }, 37 | |read_fixed| { 38 | // Get raw buffer info 39 | let ptr = read_fixed.buf.stable_mut_ptr(); 40 | let len = read_fixed.buf.bytes_total(); 41 | let buf_index = read_fixed.buf.get_buf().buf_index(); 42 | opcode::ReadFixed::new(types::Fd(fd.raw_fd()), ptr, len as _, buf_index) 43 | .offset(offset as _) 44 | .build() 45 | }, 46 | ) 47 | }) 48 | } 49 | } 50 | 51 | impl Completable for ReadFixed 52 | where 53 | T: BoundedBufMut, 54 | { 55 | type Output = BufResult; 56 | 57 | fn complete(self, cqe: op::CqeResult) -> Self::Output { 58 | // Convert the operation result to `usize` 59 | let res = cqe.result.map(|v| v as usize); 60 | // Recover the buffer 61 | let mut buf = self.buf; 62 | 63 | // If the operation was successful, advance the initialized cursor. 64 | if let Ok(n) = res { 65 | // Safety: the kernel wrote `n` bytes to the buffer. 66 | unsafe { 67 | buf.set_init(n); 68 | } 69 | } 70 | 71 | (res, buf) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/io/readv.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::BoundedBufMut; 2 | use crate::BufResult; 3 | 4 | use crate::io::SharedFd; 5 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 6 | use crate::runtime::CONTEXT; 7 | use libc::iovec; 8 | use std::io; 9 | 10 | pub(crate) struct Readv { 11 | /// Holds a strong ref to the FD, preventing the file from being closed 12 | /// while the operation is in-flight. 13 | #[allow(dead_code)] 14 | fd: SharedFd, 15 | 16 | /// Reference to the in-flight buffer. 17 | pub(crate) bufs: Vec, 18 | /// Parameter for `io_uring::op::readv`, referring `bufs`. 19 | iovs: Vec, 20 | } 21 | 22 | impl Op> { 23 | pub(crate) fn readv_at( 24 | fd: &SharedFd, 25 | mut bufs: Vec, 26 | offset: u64, 27 | ) -> io::Result>> { 28 | use io_uring::{opcode, types}; 29 | 30 | // Build `iovec` objects referring the provided `bufs` for `io_uring::opcode::Readv`. 31 | let iovs: Vec = bufs 32 | .iter_mut() 33 | .map(|b| iovec { 34 | // Safety guaranteed by `BoundedBufMut`. 35 | iov_base: unsafe { b.stable_mut_ptr().add(b.bytes_init()) as *mut libc::c_void }, 36 | iov_len: b.bytes_total() - b.bytes_init(), 37 | }) 38 | .collect(); 39 | 40 | CONTEXT.with(|x| { 41 | x.handle().expect("Not in a runtime context").submit_op( 42 | Readv { 43 | fd: fd.clone(), 44 | bufs, 45 | iovs, 46 | }, 47 | |read| { 48 | opcode::Readv::new( 49 | types::Fd(fd.raw_fd()), 50 | read.iovs.as_ptr(), 51 | read.iovs.len() as u32, 52 | ) 53 | .offset(offset as _) 54 | .build() 55 | }, 56 | ) 57 | }) 58 | } 59 | } 60 | 61 | impl Completable for Readv 62 | where 63 | T: BoundedBufMut, 64 | { 65 | type Output = BufResult>; 66 | 67 | fn complete(self, cqe: CqeResult) -> Self::Output { 68 | // Convert the operation result to `usize` 69 | let res = cqe.result.map(|v| v as usize); 70 | // Recover the buffer 71 | let mut bufs = self.bufs; 72 | 73 | // If the operation was successful, advance the initialized cursor. 74 | if let Ok(n) = res { 75 | let mut count = n; 76 | for b in bufs.iter_mut() { 77 | let sz = std::cmp::min(count, b.bytes_total() - b.bytes_init()); 78 | let pos = b.bytes_init() + sz; 79 | // Safety: the kernel returns bytes written, and we have ensured that `pos` is 80 | // valid for current buffer. 81 | unsafe { b.set_init(pos) }; 82 | count -= sz; 83 | if count == 0 { 84 | break; 85 | } 86 | } 87 | assert_eq!(count, 0); 88 | } 89 | 90 | (res, bufs) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/io/recv_from.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use crate::{buf::BoundedBufMut, io::SharedFd, BufResult}; 4 | use socket2::SockAddr; 5 | use std::{ 6 | io::IoSliceMut, 7 | {boxed::Box, io, net::SocketAddr}, 8 | }; 9 | 10 | #[allow(dead_code)] 11 | pub(crate) struct RecvFrom { 12 | fd: SharedFd, 13 | pub(crate) buf: T, 14 | io_slices: Vec>, 15 | pub(crate) socket_addr: Box, 16 | pub(crate) msghdr: Box, 17 | } 18 | 19 | impl Op> { 20 | pub(crate) fn recv_from(fd: &SharedFd, mut buf: T) -> io::Result>> { 21 | use io_uring::{opcode, types}; 22 | 23 | let mut io_slices = vec![IoSliceMut::new(unsafe { 24 | std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_total()) 25 | })]; 26 | 27 | let socket_addr = Box::new(unsafe { SockAddr::init(|_, _| Ok(()))?.1 }); 28 | 29 | let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); 30 | msghdr.msg_iov = io_slices.as_mut_ptr().cast(); 31 | msghdr.msg_iovlen = io_slices.len() as _; 32 | msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; 33 | msghdr.msg_namelen = socket_addr.len(); 34 | 35 | CONTEXT.with(|x| { 36 | x.handle().expect("Not in a runtime context").submit_op( 37 | RecvFrom { 38 | fd: fd.clone(), 39 | buf, 40 | io_slices, 41 | socket_addr, 42 | msghdr, 43 | }, 44 | |recv_from| { 45 | opcode::RecvMsg::new( 46 | types::Fd(recv_from.fd.raw_fd()), 47 | recv_from.msghdr.as_mut() as *mut _, 48 | ) 49 | .build() 50 | }, 51 | ) 52 | }) 53 | } 54 | } 55 | 56 | impl Completable for RecvFrom 57 | where 58 | T: BoundedBufMut, 59 | { 60 | type Output = BufResult<(usize, SocketAddr), T>; 61 | 62 | fn complete(self, cqe: CqeResult) -> Self::Output { 63 | // Convert the operation result to `usize` 64 | let res = cqe.result.map(|v| v as usize); 65 | // Recover the buffer 66 | let mut buf = self.buf; 67 | 68 | let socket_addr = (*self.socket_addr).as_socket(); 69 | 70 | let res = res.map(|n| { 71 | let socket_addr: SocketAddr = socket_addr.unwrap(); 72 | 73 | // Safety: the kernel wrote `n` bytes to the buffer. 74 | unsafe { 75 | buf.set_init(n); 76 | } 77 | 78 | (n, socket_addr) 79 | }); 80 | 81 | (res, buf) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/io/recvmsg.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use crate::{buf::BoundedBufMut, io::SharedFd, BufResult}; 4 | use socket2::SockAddr; 5 | use std::{ 6 | io::IoSliceMut, 7 | {boxed::Box, io, net::SocketAddr}, 8 | }; 9 | 10 | pub(crate) struct RecvMsg { 11 | #[allow(dead_code)] 12 | fd: SharedFd, 13 | pub(crate) buf: Vec, 14 | #[allow(dead_code)] 15 | io_slices: Vec>, 16 | pub(crate) socket_addr: Box, 17 | pub(crate) msghdr: Box, 18 | } 19 | 20 | impl Op> { 21 | pub(crate) fn recvmsg(fd: &SharedFd, mut bufs: Vec) -> io::Result>> { 22 | use io_uring::{opcode, types}; 23 | 24 | let mut io_slices = Vec::with_capacity(bufs.len()); 25 | for buf in &mut bufs { 26 | io_slices.push(IoSliceMut::new(unsafe { 27 | std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_total()) 28 | })); 29 | } 30 | 31 | let socket_addr = Box::new(unsafe { SockAddr::init(|_, _| Ok(()))?.1 }); 32 | 33 | let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); 34 | msghdr.msg_iov = io_slices.as_mut_ptr().cast(); 35 | msghdr.msg_iovlen = io_slices.len() as _; 36 | msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; 37 | msghdr.msg_namelen = socket_addr.len(); 38 | 39 | CONTEXT.with(|x| { 40 | x.handle().expect("Not in a runtime context").submit_op( 41 | RecvMsg { 42 | fd: fd.clone(), 43 | buf: bufs, 44 | io_slices, 45 | socket_addr, 46 | msghdr, 47 | }, 48 | |recv_from| { 49 | opcode::RecvMsg::new( 50 | types::Fd(recv_from.fd.raw_fd()), 51 | recv_from.msghdr.as_mut() as *mut _, 52 | ) 53 | .build() 54 | }, 55 | ) 56 | }) 57 | } 58 | } 59 | 60 | impl Completable for RecvMsg 61 | where 62 | T: BoundedBufMut, 63 | { 64 | type Output = BufResult<(usize, SocketAddr), Vec>; 65 | 66 | fn complete(self, cqe: CqeResult) -> Self::Output { 67 | // Convert the operation result to `usize` 68 | let res = cqe.result.map(|v| v as usize); 69 | // Recover the buffers 70 | let mut bufs = self.buf; 71 | 72 | let socket_addr = (*self.socket_addr).as_socket(); 73 | 74 | let res = res.map(|n| { 75 | let socket_addr: SocketAddr = socket_addr.unwrap(); 76 | 77 | let mut bytes = n; 78 | for buf in &mut bufs { 79 | // Safety: the kernel wrote `n` bytes to the buffer. 80 | unsafe { 81 | buf.set_init(bytes); 82 | } 83 | let total = buf.bytes_total(); 84 | if bytes > total { 85 | bytes -= total; 86 | } else { 87 | // In the current API bytes_init is a watermark, 88 | // so remaining don't need zeroing. 89 | break; 90 | } 91 | } 92 | (n, socket_addr) 93 | }); 94 | 95 | (res, bufs) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/io/rename_at.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use std::ffi::CString; 4 | use std::io; 5 | use std::path::Path; 6 | 7 | /// Renames a file, moving it between directories if required. 8 | /// 9 | /// The given paths are interpreted relative to the current working directory 10 | /// of the calling process. 11 | pub(crate) struct RenameAt { 12 | pub(crate) from: CString, 13 | pub(crate) to: CString, 14 | } 15 | 16 | impl Op { 17 | /// Submit a request to rename a specified path to a new name with 18 | /// the provided flags. 19 | pub(crate) fn rename_at(from: &Path, to: &Path, flags: u32) -> io::Result> { 20 | use io_uring::{opcode, types}; 21 | 22 | let from = super::util::cstr(from)?; 23 | let to = super::util::cstr(to)?; 24 | 25 | CONTEXT.with(|x| { 26 | x.handle().expect("Not in a runtime context").submit_op( 27 | RenameAt { from, to }, 28 | |rename| { 29 | // Get a reference to the memory. The string will be held by the 30 | // operation state and will not be accessed again until the operation 31 | // completes. 32 | let from_ref = rename.from.as_c_str().as_ptr(); 33 | let to_ref = rename.to.as_c_str().as_ptr(); 34 | opcode::RenameAt::new( 35 | types::Fd(libc::AT_FDCWD), 36 | from_ref, 37 | types::Fd(libc::AT_FDCWD), 38 | to_ref, 39 | ) 40 | .flags(flags) 41 | .build() 42 | }, 43 | ) 44 | }) 45 | } 46 | } 47 | 48 | impl Completable for RenameAt { 49 | type Output = io::Result<()>; 50 | 51 | fn complete(self, cqe: CqeResult) -> Self::Output { 52 | cqe.result.map(|_| ()) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/io/send_to.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::BoundedBuf; 2 | use crate::io::SharedFd; 3 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 4 | use crate::runtime::CONTEXT; 5 | use crate::BufResult; 6 | use socket2::SockAddr; 7 | use std::io::IoSlice; 8 | use std::{boxed::Box, io, net::SocketAddr}; 9 | 10 | pub(crate) struct SendTo { 11 | #[allow(dead_code)] 12 | fd: SharedFd, 13 | pub(crate) buf: T, 14 | #[allow(dead_code)] 15 | io_slices: Vec>, 16 | #[allow(dead_code)] 17 | socket_addr: Option>, 18 | pub(crate) msghdr: Box, 19 | } 20 | 21 | impl Op> { 22 | pub(crate) fn send_to( 23 | fd: &SharedFd, 24 | buf: T, 25 | socket_addr: Option, 26 | ) -> io::Result>> { 27 | use io_uring::{opcode, types}; 28 | 29 | let io_slices = vec![IoSlice::new(unsafe { 30 | std::slice::from_raw_parts(buf.stable_ptr(), buf.bytes_init()) 31 | })]; 32 | 33 | let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); 34 | msghdr.msg_iov = io_slices.as_ptr() as *mut _; 35 | msghdr.msg_iovlen = io_slices.len() as _; 36 | 37 | let socket_addr = match socket_addr { 38 | Some(_socket_addr) => { 39 | let socket_addr = Box::new(SockAddr::from(_socket_addr)); 40 | msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; 41 | msghdr.msg_namelen = socket_addr.len(); 42 | Some(socket_addr) 43 | } 44 | None => { 45 | msghdr.msg_name = std::ptr::null_mut(); 46 | msghdr.msg_namelen = 0; 47 | None 48 | } 49 | }; 50 | 51 | CONTEXT.with(|x| { 52 | x.handle().expect("Not in a runtime context").submit_op( 53 | SendTo { 54 | fd: fd.clone(), 55 | buf, 56 | io_slices, 57 | socket_addr, 58 | msghdr, 59 | }, 60 | |send_to| { 61 | opcode::SendMsg::new( 62 | types::Fd(send_to.fd.raw_fd()), 63 | send_to.msghdr.as_ref() as *const _, 64 | ) 65 | .build() 66 | }, 67 | ) 68 | }) 69 | } 70 | } 71 | 72 | impl Completable for SendTo { 73 | type Output = BufResult; 74 | 75 | fn complete(self, cqe: CqeResult) -> Self::Output { 76 | // Convert the operation result to `usize` 77 | let res = cqe.result.map(|v| v as usize); 78 | // Recover the buffer 79 | let buf = self.buf; 80 | 81 | (res, buf) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/io/send_zc.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, MultiCQEFuture, Op, Updateable}; 2 | use crate::runtime::CONTEXT; 3 | use crate::{buf::BoundedBuf, io::SharedFd, BufResult}; 4 | use std::io; 5 | 6 | pub(crate) struct SendZc { 7 | /// Holds a strong ref to the FD, preventing the file from being closed 8 | /// while the operation is in-flight. 9 | #[allow(dead_code)] 10 | fd: SharedFd, 11 | 12 | pub(crate) buf: T, 13 | 14 | /// Hold the number of transmitted bytes 15 | bytes: usize, 16 | } 17 | 18 | impl Op, MultiCQEFuture> { 19 | pub(crate) fn send_zc(fd: &SharedFd, buf: T) -> io::Result { 20 | use io_uring::{opcode, types}; 21 | 22 | CONTEXT.with(|x| { 23 | x.handle().expect("Not in a runtime context").submit_op( 24 | SendZc { 25 | fd: fd.clone(), 26 | buf, 27 | bytes: 0, 28 | }, 29 | |send| { 30 | // Get raw buffer info 31 | let ptr = send.buf.stable_ptr(); 32 | let len = send.buf.bytes_init(); 33 | 34 | opcode::SendZc::new(types::Fd(fd.raw_fd()), ptr, len as _).build() 35 | }, 36 | ) 37 | }) 38 | } 39 | } 40 | 41 | impl Completable for SendZc { 42 | type Output = BufResult; 43 | 44 | fn complete(self, cqe: CqeResult) -> Self::Output { 45 | // Convert the operation result to `usize` 46 | let res = cqe.result.map(|v| self.bytes + v as usize); 47 | // Recover the buffer 48 | let buf = self.buf; 49 | (res, buf) 50 | } 51 | } 52 | 53 | impl Updateable for SendZc { 54 | fn update(&mut self, cqe: CqeResult) { 55 | // uring send_zc promises there will be no error on CQE's marked more 56 | self.bytes += *cqe.result.as_ref().unwrap() as usize; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/io/sendmsg.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::BoundedBuf; 2 | use crate::io::SharedFd; 3 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 4 | use crate::runtime::CONTEXT; 5 | use socket2::SockAddr; 6 | use std::io; 7 | use std::io::IoSlice; 8 | use std::net::SocketAddr; 9 | 10 | pub(crate) struct SendMsg { 11 | _fd: SharedFd, 12 | _io_bufs: Vec, 13 | _io_slices: Vec>, 14 | _socket_addr: Option>, 15 | msg_control: Option, 16 | msghdr: Box, 17 | } 18 | 19 | impl Op> { 20 | pub(crate) fn sendmsg( 21 | fd: &SharedFd, 22 | io_bufs: Vec, 23 | socket_addr: Option, 24 | msg_control: Option, 25 | ) -> io::Result { 26 | use io_uring::{opcode, types}; 27 | 28 | let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); 29 | 30 | let mut io_slices: Vec> = Vec::with_capacity(io_bufs.len()); 31 | 32 | for io_buf in &io_bufs { 33 | io_slices.push(IoSlice::new(unsafe { 34 | std::slice::from_raw_parts(io_buf.stable_ptr(), io_buf.bytes_init()) 35 | })) 36 | } 37 | 38 | msghdr.msg_iov = io_slices.as_ptr() as *mut _; 39 | msghdr.msg_iovlen = io_slices.len() as _; 40 | 41 | let socket_addr = match socket_addr { 42 | Some(_socket_addr) => { 43 | let socket_addr = Box::new(SockAddr::from(_socket_addr)); 44 | msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; 45 | msghdr.msg_namelen = socket_addr.len(); 46 | Some(socket_addr) 47 | } 48 | None => { 49 | msghdr.msg_name = std::ptr::null_mut(); 50 | msghdr.msg_namelen = 0; 51 | None 52 | } 53 | }; 54 | 55 | match msg_control { 56 | Some(ref _msg_control) => { 57 | msghdr.msg_control = _msg_control.stable_ptr() as *mut _; 58 | msghdr.msg_controllen = _msg_control.bytes_init(); 59 | } 60 | None => { 61 | msghdr.msg_control = std::ptr::null_mut(); 62 | msghdr.msg_controllen = 0_usize; 63 | } 64 | } 65 | 66 | CONTEXT.with(|x| { 67 | x.handle().expect("Not in a runtime context").submit_op( 68 | SendMsg { 69 | _fd: fd.clone(), 70 | _io_bufs: io_bufs, 71 | _socket_addr: socket_addr, 72 | _io_slices: io_slices, 73 | msg_control, 74 | msghdr, 75 | }, 76 | |sendmsg| { 77 | opcode::SendMsg::new( 78 | types::Fd(sendmsg._fd.raw_fd()), 79 | &*sendmsg.msghdr as *const _, 80 | ) 81 | .build() 82 | }, 83 | ) 84 | }) 85 | } 86 | } 87 | 88 | impl Completable for SendMsg { 89 | type Output = (io::Result, Vec, Option); 90 | 91 | fn complete(self, cqe: CqeResult) -> (io::Result, Vec, Option) { 92 | // Convert the operation result to `usize` 93 | let res = cqe.result.map(|n| n as usize); 94 | 95 | // Recover the data buffers. 96 | let io_bufs = self._io_bufs; 97 | 98 | // Recover the ancillary data buffer. 99 | let msg_control = self.msg_control; 100 | 101 | (res, io_bufs, msg_control) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/io/sendmsg_zc.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::BoundedBuf; 2 | use crate::io::SharedFd; 3 | use crate::runtime::driver::op::{Completable, CqeResult, MultiCQEFuture, Op, Updateable}; 4 | use crate::runtime::CONTEXT; 5 | use socket2::SockAddr; 6 | use std::io; 7 | use std::io::IoSlice; 8 | use std::net::SocketAddr; 9 | 10 | pub(crate) struct SendMsgZc { 11 | #[allow(dead_code)] 12 | fd: SharedFd, 13 | #[allow(dead_code)] 14 | io_bufs: Vec, 15 | #[allow(dead_code)] 16 | io_slices: Vec>, 17 | #[allow(dead_code)] 18 | socket_addr: Option>, 19 | msg_control: Option, 20 | msghdr: Box, 21 | 22 | /// Hold the number of transmitted bytes 23 | bytes: usize, 24 | } 25 | 26 | impl Op, MultiCQEFuture> { 27 | pub(crate) fn sendmsg_zc( 28 | fd: &SharedFd, 29 | io_bufs: Vec, 30 | socket_addr: Option, 31 | msg_control: Option, 32 | ) -> io::Result { 33 | use io_uring::{opcode, types}; 34 | 35 | let mut msghdr: Box = Box::new(unsafe { std::mem::zeroed() }); 36 | 37 | let mut io_slices: Vec> = Vec::with_capacity(io_bufs.len()); 38 | 39 | for io_buf in &io_bufs { 40 | io_slices.push(IoSlice::new(unsafe { 41 | std::slice::from_raw_parts(io_buf.stable_ptr(), io_buf.bytes_init()) 42 | })) 43 | } 44 | 45 | msghdr.msg_iov = io_slices.as_ptr() as *mut _; 46 | msghdr.msg_iovlen = io_slices.len() as _; 47 | 48 | let socket_addr = match socket_addr { 49 | Some(_socket_addr) => { 50 | let socket_addr = Box::new(SockAddr::from(_socket_addr)); 51 | msghdr.msg_name = socket_addr.as_ptr() as *mut libc::c_void; 52 | msghdr.msg_namelen = socket_addr.len(); 53 | Some(socket_addr) 54 | } 55 | None => { 56 | msghdr.msg_name = std::ptr::null_mut(); 57 | msghdr.msg_namelen = 0; 58 | None 59 | } 60 | }; 61 | 62 | match msg_control { 63 | Some(ref _msg_control) => { 64 | msghdr.msg_control = _msg_control.stable_ptr() as *mut _; 65 | msghdr.msg_controllen = _msg_control.bytes_init(); 66 | } 67 | None => { 68 | msghdr.msg_control = std::ptr::null_mut(); 69 | msghdr.msg_controllen = 0_usize; 70 | } 71 | } 72 | 73 | CONTEXT.with(|x| { 74 | x.handle().expect("Not in a runtime context").submit_op( 75 | SendMsgZc { 76 | fd: fd.clone(), 77 | io_bufs, 78 | socket_addr, 79 | io_slices, 80 | msg_control, 81 | msghdr, 82 | bytes: 0, 83 | }, 84 | |sendmsg_zc| { 85 | opcode::SendMsgZc::new( 86 | types::Fd(sendmsg_zc.fd.raw_fd()), 87 | sendmsg_zc.msghdr.as_mut() as *const _, 88 | ) 89 | .build() 90 | }, 91 | ) 92 | }) 93 | } 94 | } 95 | 96 | impl Completable for SendMsgZc { 97 | type Output = (io::Result, Vec, Option); 98 | 99 | fn complete(self, cqe: CqeResult) -> (io::Result, Vec, Option) { 100 | // Convert the operation result to `usize`, and add previous byte count 101 | let res = cqe.result.map(|v| self.bytes + v as usize); 102 | 103 | // Recover the data buffers. 104 | let io_bufs = self.io_bufs; 105 | 106 | // Recover the ancillary data buffer. 107 | let msg_control = self.msg_control; 108 | 109 | (res, io_bufs, msg_control) 110 | } 111 | } 112 | 113 | impl Updateable for SendMsgZc { 114 | fn update(&mut self, cqe: CqeResult) { 115 | // uring send_zc promises there will be no error on CQE's marked more 116 | self.bytes += *cqe.result.as_ref().unwrap() as usize; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/io/shared_fd.rs: -------------------------------------------------------------------------------- 1 | use std::future::poll_fn; 2 | 3 | use std::{ 4 | cell::RefCell, 5 | io, 6 | os::unix::io::{FromRawFd, RawFd}, 7 | rc::Rc, 8 | task::Waker, 9 | }; 10 | 11 | use crate::runtime::driver::op::Op; 12 | 13 | // Tracks in-flight operations on a file descriptor. Ensures all in-flight 14 | // operations complete before submitting the close. 15 | // 16 | // When closing the file descriptor because it is going out of scope, a synchronous close is 17 | // employed. 18 | // 19 | // The closed state is tracked so close calls after the first are ignored. 20 | // Only the first close call returns the true result of closing the file descriptor. 21 | #[derive(Clone)] 22 | pub(crate) struct SharedFd { 23 | inner: Rc, 24 | } 25 | 26 | struct Inner { 27 | // Open file descriptor 28 | fd: RawFd, 29 | 30 | // Track the sharing state of the file descriptor: 31 | // normal, being waited on to allow a close by the parent's owner, or already closed. 32 | state: RefCell, 33 | } 34 | 35 | enum State { 36 | /// Initial state 37 | Init, 38 | 39 | /// Waiting for the number of strong Rc pointers to drop to 1. 40 | WaitingForUniqueness(Waker), 41 | 42 | /// The close has been triggered by the parent owner. 43 | Closed, 44 | } 45 | 46 | impl SharedFd { 47 | pub(crate) fn new(fd: RawFd) -> SharedFd { 48 | SharedFd { 49 | inner: Rc::new(Inner { 50 | fd, 51 | state: RefCell::new(State::Init), 52 | }), 53 | } 54 | } 55 | 56 | /// Returns the RawFd 57 | pub(crate) fn raw_fd(&self) -> RawFd { 58 | self.inner.fd 59 | } 60 | 61 | /// An FD cannot be closed until all in-flight operation have completed. 62 | /// This prevents bugs where in-flight reads could operate on the incorrect 63 | /// file descriptor. 64 | /// 65 | pub(crate) async fn close(&mut self) -> io::Result<()> { 66 | loop { 67 | // Get a mutable reference to Inner, indicating there are no 68 | // in-flight operations on the FD. 69 | if let Some(inner) = Rc::get_mut(&mut self.inner) { 70 | // Wait for the close operation. 71 | return inner.async_close_op().await; 72 | } 73 | 74 | self.sharedfd_is_unique().await; 75 | } 76 | } 77 | 78 | /// Completes when the SharedFd's Inner Rc strong count is 1. 79 | /// Gets polled any time a SharedFd is dropped. 80 | async fn sharedfd_is_unique(&self) { 81 | use std::task::Poll; 82 | 83 | poll_fn(|cx| { 84 | if Rc::::strong_count(&self.inner) == 1 { 85 | return Poll::Ready(()); 86 | } 87 | 88 | let mut state = self.inner.state.borrow_mut(); 89 | 90 | match &mut *state { 91 | State::Init => { 92 | *state = State::WaitingForUniqueness(cx.waker().clone()); 93 | Poll::Pending 94 | } 95 | State::WaitingForUniqueness(waker) => { 96 | if !waker.will_wake(cx.waker()) { 97 | waker.clone_from(cx.waker()); 98 | } 99 | 100 | Poll::Pending 101 | } 102 | State::Closed => Poll::Ready(()), 103 | } 104 | }) 105 | .await; 106 | } 107 | } 108 | 109 | impl Inner { 110 | async fn async_close_op(&mut self) -> io::Result<()> { 111 | // &mut self implies there are no outstanding operations. 112 | // If state already closed, the user closed multiple times; simply return Ok. 113 | // Otherwise, set state to closed and then submit and await the uring close operation. 114 | { 115 | // Release state guard before await. 116 | let state = RefCell::get_mut(&mut self.state); 117 | 118 | if let State::Closed = *state { 119 | return Ok(()); 120 | } 121 | 122 | *state = State::Closed; 123 | } 124 | Op::close(self.fd)?.await 125 | } 126 | } 127 | 128 | impl Drop for SharedFd { 129 | fn drop(&mut self) { 130 | // If the SharedFd state is Waiting 131 | // The job of the SharedFd's drop is to possibly wake a task that is waiting for the 132 | // reference count to go down. 133 | use std::mem; 134 | 135 | let mut state = self.inner.state.borrow_mut(); 136 | if let State::WaitingForUniqueness(_) = *state { 137 | let state = &mut *state; 138 | if let State::WaitingForUniqueness(waker) = mem::replace(state, State::Init) { 139 | // Wake the task wanting to close this SharedFd and let it try again. If it finds 140 | // there are no more outstanding clones, it will succeed. Otherwise it will start a new 141 | // Future, waiting for another SharedFd to be dropped. 142 | waker.wake() 143 | } 144 | } 145 | } 146 | } 147 | 148 | impl Drop for Inner { 149 | fn drop(&mut self) { 150 | // If the inner state isn't `Closed`, the user hasn't called close().await 151 | // so do it synchronously. 152 | 153 | let state = self.state.borrow_mut(); 154 | 155 | if let State::Closed = *state { 156 | return; 157 | } 158 | let _ = unsafe { std::fs::File::from_raw_fd(self.fd) }; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /src/io/statx.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::{ffi::CStr, io}; 3 | 4 | use io_uring::{opcode, types}; 5 | 6 | use crate::runtime::{ 7 | driver::op::{Completable, CqeResult, Op}, 8 | CONTEXT, 9 | }; 10 | 11 | use super::SharedFd; 12 | 13 | pub(crate) struct Statx { 14 | #[allow(dead_code)] 15 | fd: Option, 16 | #[allow(dead_code)] 17 | path: CString, 18 | 19 | // TODO consider returning this type when the operation is complete so the caller has the boxed value. 20 | // The builder could even recycle an old boxed value and pass it in here. 21 | statx: Box, 22 | } 23 | 24 | impl Op { 25 | // If we are passed a reference to a shared fd, clone it so we keep it live during the 26 | // Future. If we aren't, use the libc::AT_FDCWD value. 27 | // If Path is None, the flags is combined with libc::AT_EMPTY_PATH automatically. 28 | pub(crate) fn statx( 29 | fd: Option, 30 | path: Option, 31 | flags: i32, 32 | mask: u32, 33 | ) -> io::Result> { 34 | let raw = fd.as_ref().map_or(libc::AT_FDCWD, |fd| fd.raw_fd()); 35 | let mut flags = flags; 36 | let path = match path { 37 | Some(path) => path, 38 | None => { 39 | // If there is no path, add appropriate bit to flags. 40 | flags |= libc::AT_EMPTY_PATH; 41 | CStr::from_bytes_with_nul(b"\0").unwrap().into() // TODO Is there a constant CString we 42 | // could use here. 43 | } 44 | }; 45 | CONTEXT.with(|x| { 46 | x.handle().expect("not in a runtime context").submit_op( 47 | Statx { 48 | fd, 49 | path, 50 | statx: Box::new(unsafe { std::mem::zeroed() }), 51 | }, 52 | |statx| { 53 | opcode::Statx::new( 54 | types::Fd(raw), 55 | statx.path.as_ptr(), 56 | &mut *statx.statx as *mut libc::statx as *mut types::statx, 57 | ) 58 | .flags(flags) 59 | .mask(mask) 60 | .build() 61 | }, 62 | ) 63 | }) 64 | } 65 | } 66 | 67 | impl Completable for Statx { 68 | type Output = io::Result; 69 | 70 | fn complete(self, cqe: CqeResult) -> Self::Output { 71 | cqe.result?; 72 | Ok(*self.statx) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/io/symlink.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | 4 | use super::util::cstr; 5 | 6 | use std::ffi::CString; 7 | use std::io; 8 | use std::path::Path; 9 | 10 | pub(crate) struct Symlink { 11 | pub(crate) _from: CString, 12 | pub(crate) _to: CString, 13 | } 14 | 15 | impl Op { 16 | pub(crate) fn symlink, Q: AsRef>( 17 | from: P, 18 | to: Q, 19 | ) -> io::Result> { 20 | use io_uring::{opcode, types}; 21 | 22 | let _from = cstr(from.as_ref())?; 23 | let _to = cstr(to.as_ref())?; 24 | 25 | CONTEXT.with(|x| { 26 | x.handle().expect("Not in a runtime context").submit_op( 27 | Symlink { _from, _to }, 28 | |symlink| { 29 | let from_ref = symlink._from.as_c_str().as_ptr(); 30 | let to_ref = symlink._to.as_c_str().as_ptr(); 31 | 32 | opcode::SymlinkAt::new(types::Fd(libc::AT_FDCWD), from_ref, to_ref).build() 33 | }, 34 | ) 35 | }) 36 | } 37 | } 38 | 39 | impl Completable for Symlink { 40 | type Output = io::Result<()>; 41 | 42 | fn complete(self, cqe: CqeResult) -> Self::Output { 43 | cqe.result.map(|_| ()) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/io/unlink_at.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use std::ffi::CString; 4 | use std::io; 5 | use std::path::Path; 6 | 7 | /// Unlink a path relative to the current working directory of the caller's process. 8 | pub(crate) struct Unlink { 9 | pub(crate) path: CString, 10 | } 11 | 12 | impl Op { 13 | /// Submit a request to unlink a directory with provided flags. 14 | pub(crate) fn unlink_dir(path: &Path) -> io::Result> { 15 | Self::unlink(path, libc::AT_REMOVEDIR) 16 | } 17 | 18 | /// Submit a request to unlink a file with provided flags. 19 | pub(crate) fn unlink_file(path: &Path) -> io::Result> { 20 | Self::unlink(path, 0) 21 | } 22 | 23 | /// Submit a request to unlink a specified path with provided flags. 24 | pub(crate) fn unlink(path: &Path, flags: i32) -> io::Result> { 25 | use io_uring::{opcode, types}; 26 | 27 | let path = super::util::cstr(path)?; 28 | 29 | CONTEXT.with(|x| { 30 | x.handle() 31 | .expect("Not in a runtime context") 32 | .submit_op(Unlink { path }, |unlink| { 33 | // Get a reference to the memory. The string will be held by the 34 | // operation state and will not be accessed again until the operation 35 | // completes. 36 | let p_ref = unlink.path.as_c_str().as_ptr(); 37 | opcode::UnlinkAt::new(types::Fd(libc::AT_FDCWD), p_ref) 38 | .flags(flags) 39 | .build() 40 | }) 41 | }) 42 | } 43 | } 44 | 45 | impl Completable for Unlink { 46 | type Output = io::Result<()>; 47 | 48 | fn complete(self, cqe: CqeResult) -> Self::Output { 49 | cqe.result.map(|_| ()) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/io/util.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | use std::io; 3 | use std::path::Path; 4 | 5 | pub(crate) fn cstr(p: &Path) -> io::Result { 6 | use std::os::unix::ffi::OsStrExt; 7 | Ok(CString::new(p.as_os_str().as_bytes())?) 8 | } 9 | -------------------------------------------------------------------------------- /src/io/write.rs: -------------------------------------------------------------------------------- 1 | use crate::{buf::BoundedBuf, io::SharedFd, BufResult, OneshotOutputTransform, UnsubmittedOneshot}; 2 | use io_uring::cqueue::Entry; 3 | use std::io; 4 | use std::marker::PhantomData; 5 | 6 | /// An unsubmitted write operation. 7 | pub type UnsubmittedWrite = UnsubmittedOneshot, WriteTransform>; 8 | 9 | #[allow(missing_docs)] 10 | pub struct WriteData { 11 | /// Holds a strong ref to the FD, preventing the file from being closed 12 | /// while the operation is in-flight. 13 | _fd: SharedFd, 14 | 15 | buf: T, 16 | } 17 | 18 | #[allow(missing_docs)] 19 | pub struct WriteTransform { 20 | _phantom: PhantomData, 21 | } 22 | 23 | impl OneshotOutputTransform for WriteTransform { 24 | type Output = BufResult; 25 | type StoredData = WriteData; 26 | 27 | fn transform_oneshot_output(self, data: Self::StoredData, cqe: Entry) -> Self::Output { 28 | let res = if cqe.result() >= 0 { 29 | Ok(cqe.result() as usize) 30 | } else { 31 | Err(io::Error::from_raw_os_error(-cqe.result())) 32 | }; 33 | 34 | (res, data.buf) 35 | } 36 | } 37 | 38 | impl UnsubmittedWrite { 39 | pub(crate) fn write_at(fd: &SharedFd, buf: T, offset: u64) -> Self { 40 | use io_uring::{opcode, types}; 41 | 42 | // Get raw buffer info 43 | let ptr = buf.stable_ptr(); 44 | let len = buf.bytes_init(); 45 | 46 | Self::new( 47 | WriteData { 48 | _fd: fd.clone(), 49 | buf, 50 | }, 51 | WriteTransform { 52 | _phantom: PhantomData, 53 | }, 54 | opcode::Write::new(types::Fd(fd.raw_fd()), ptr, len as _) 55 | .offset(offset as _) 56 | .build(), 57 | ) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/io/write_fixed.rs: -------------------------------------------------------------------------------- 1 | use crate::buf::fixed::FixedBuf; 2 | use crate::buf::BoundedBuf; 3 | use crate::io::SharedFd; 4 | use crate::runtime::driver::op::{self, Completable, Op}; 5 | use crate::BufResult; 6 | 7 | use crate::runtime::CONTEXT; 8 | use std::io; 9 | 10 | pub(crate) struct WriteFixed { 11 | /// Holds a strong ref to the FD, preventing the file from being closed 12 | /// while the operation is in-flight. 13 | #[allow(dead_code)] 14 | fd: SharedFd, 15 | 16 | buf: T, 17 | } 18 | 19 | impl Op> 20 | where 21 | T: BoundedBuf, 22 | { 23 | pub(crate) fn write_fixed_at( 24 | fd: &SharedFd, 25 | buf: T, 26 | offset: u64, 27 | ) -> io::Result>> { 28 | use io_uring::{opcode, types}; 29 | 30 | CONTEXT.with(|x| { 31 | x.handle().expect("Not in a runtime context").submit_op( 32 | WriteFixed { 33 | fd: fd.clone(), 34 | buf, 35 | }, 36 | |write_fixed| { 37 | // Get raw buffer info 38 | let ptr = write_fixed.buf.stable_ptr(); 39 | let len = write_fixed.buf.bytes_init(); 40 | let buf_index = write_fixed.buf.get_buf().buf_index(); 41 | opcode::WriteFixed::new(types::Fd(fd.raw_fd()), ptr, len as _, buf_index) 42 | .offset(offset as _) 43 | .build() 44 | }, 45 | ) 46 | }) 47 | } 48 | } 49 | 50 | impl Completable for WriteFixed { 51 | type Output = BufResult; 52 | 53 | fn complete(self, cqe: op::CqeResult) -> Self::Output { 54 | // Convert the operation result to `usize` 55 | let res = cqe.result.map(|v| v as usize); 56 | // Recover the buffer 57 | let buf = self.buf; 58 | 59 | (res, buf) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/io/writev.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use crate::{buf::BoundedBuf, io::SharedFd, BufResult}; 4 | use libc::iovec; 5 | use std::io; 6 | 7 | pub(crate) struct Writev { 8 | /// Holds a strong ref to the FD, preventing the file from being closed 9 | /// while the operation is in-flight. 10 | #[allow(dead_code)] 11 | fd: SharedFd, 12 | 13 | pub(crate) bufs: Vec, 14 | 15 | /// Parameter for `io_uring::op::readv`, referring `bufs`. 16 | iovs: Vec, 17 | } 18 | 19 | impl Op> { 20 | pub(crate) fn writev_at( 21 | fd: &SharedFd, 22 | mut bufs: Vec, 23 | offset: u64, 24 | ) -> io::Result>> { 25 | use io_uring::{opcode, types}; 26 | 27 | // Build `iovec` objects referring the provided `bufs` for `io_uring::opcode::Readv`. 28 | let iovs: Vec = bufs 29 | .iter_mut() 30 | .map(|b| iovec { 31 | iov_base: b.stable_ptr() as *mut libc::c_void, 32 | iov_len: b.bytes_init(), 33 | }) 34 | .collect(); 35 | 36 | CONTEXT.with(|x| { 37 | x.handle().expect("Not in a runtime context").submit_op( 38 | Writev { 39 | fd: fd.clone(), 40 | bufs, 41 | iovs, 42 | }, 43 | |write| { 44 | opcode::Writev::new( 45 | types::Fd(fd.raw_fd()), 46 | write.iovs.as_ptr(), 47 | write.iovs.len() as u32, 48 | ) 49 | .offset(offset as _) 50 | .build() 51 | }, 52 | ) 53 | }) 54 | } 55 | } 56 | 57 | impl Completable for Writev 58 | where 59 | T: BoundedBuf, 60 | { 61 | type Output = BufResult>; 62 | 63 | fn complete(self, cqe: CqeResult) -> Self::Output { 64 | // Convert the operation result to `usize` 65 | let res = cqe.result.map(|v| v as usize); 66 | // Recover the buffer 67 | let buf = self.bufs; 68 | 69 | (res, buf) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/io/writev_all.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver::op::{Completable, CqeResult, Op}; 2 | use crate::runtime::CONTEXT; 3 | use crate::{buf::BoundedBuf, io::SharedFd}; 4 | use libc::iovec; 5 | use std::io; 6 | 7 | // This provides a common write-all implementation for writev and is fairly efficient by allocating 8 | // the Vec just once, and computing the individual iovec entries just once, at the cost of 9 | // some unsafe calls to advance the iovec array pointer and the iovec_base pointer from time to 10 | // time when a further call to `writev` is necessary. 11 | // 12 | // The fd, bufs, and iovecs resources are passed to and from the operation's future to ensure they 13 | // stay live while the operation is active, even if the future returned by this call is cancelled. 14 | // The SharedFd is only cloned once but at the cost of also passing it back and forth within this 15 | // module. 16 | pub(crate) async fn writev_at_all( 17 | fd: &SharedFd, 18 | mut bufs: Vec, 19 | offset: Option, 20 | ) -> crate::BufResult> { 21 | // TODO decide if the function should return immediately if all the buffer lengths 22 | // were to sum to zero. That would save an allocation and one call into writev. 23 | 24 | // The fd is cloned once. 25 | let mut fd = fd.clone(); 26 | 27 | // iovs is allocated once. 28 | let mut iovs: Vec = bufs 29 | .iter_mut() 30 | .map(|b| iovec { 31 | iov_base: b.stable_ptr() as *mut libc::c_void, 32 | iov_len: b.bytes_init(), 33 | }) 34 | .collect(); 35 | 36 | let mut iovs_ptr = iovs.as_ptr(); 37 | let mut iovs_len: u32 = iovs.len() as _; 38 | 39 | let mut total: usize = 0; 40 | 41 | // Loop until all the bytes have been written or an error has been returned by the io_uring 42 | // device. 43 | 44 | loop { 45 | // If caller provided some offset, pass an updated offset to writev 46 | // else keep passing zero. 47 | let o = match offset { 48 | Some(m) => m + (total as u64), 49 | None => 0, 50 | }; 51 | 52 | // Call the Op that is internal to this module. 53 | let op = Op::writev_at_all2(fd, bufs, iovs, iovs_ptr, iovs_len, o).unwrap(); 54 | let res; 55 | (res, fd, bufs, iovs) = op.await; 56 | 57 | let mut n: usize = match res { 58 | Ok(m) => m, 59 | 60 | // On error, there is no indication how many bytes were written. This is standard. 61 | // The device doesn't tell us that either. 62 | Err(e) => return (Err(e), bufs), 63 | }; 64 | 65 | // TODO if n is zero, while there was more data to be written, should this be interpreted 66 | // as the file is closed so an error should be returned? Otherwise we reach the 67 | // unreachable! panic below. 68 | // 69 | // if n == 0 { return Err(..); } 70 | 71 | total += n; 72 | 73 | // Consume n and iovs_len until one or the other is exhausted. 74 | while n != 0 && iovs_len > 0 { 75 | // safety: iovs_len > 0, so safe to dereference the const *. 76 | let mut iovec = unsafe { *iovs_ptr }; 77 | let iov_len = iovec.iov_len; 78 | if n >= iov_len { 79 | n -= iov_len; 80 | // safety: iovs_len > 0, so safe to add 1 as iovs_len is decremented by 1. 81 | iovs_ptr = unsafe { iovs_ptr.add(1) }; 82 | iovs_len -= 1; 83 | } else { 84 | // safety: n was found to be less than iov_len, so adding to base and keeping 85 | // iov_len updated by decrementing maintains the invariant of the iovec 86 | // representing how much of the buffer remains to be written to. 87 | iovec.iov_base = unsafe { (iovec.iov_base as *const u8).add(n) } as _; 88 | iovec.iov_len -= n; 89 | n = 0; 90 | } 91 | } 92 | 93 | // Assert that both n and iovs_len become exhausted simultaneously. 94 | 95 | if (iovs_len == 0 && n != 0) || (iovs_len > 0 && n == 0) { 96 | unreachable!(); 97 | } 98 | 99 | // We are done when n and iovs_len have been consumed. 100 | if n == 0 { 101 | break; 102 | } 103 | } 104 | (Ok(total), bufs) 105 | } 106 | 107 | struct WritevAll { 108 | /// Holds a strong ref to the FD, preventing the file from being closed 109 | /// while the operation is in-flight. 110 | fd: SharedFd, 111 | 112 | bufs: Vec, 113 | 114 | iovs: Vec, 115 | } 116 | 117 | impl Op> { 118 | fn writev_at_all2( 119 | // Three values to share to keep live. 120 | fd: SharedFd, 121 | bufs: Vec, 122 | iovs: Vec, 123 | 124 | // Three values to use for this invocation. 125 | iovs_ptr: *const iovec, 126 | iovs_len: u32, 127 | offset: u64, 128 | ) -> io::Result>> { 129 | use io_uring::{opcode, types}; 130 | 131 | CONTEXT.with(|x| { 132 | x.handle().expect("Not in a runtime context").submit_op( 133 | WritevAll { fd, bufs, iovs }, 134 | // So this wouldn't need to be a function. Just pass in the entry. 135 | |write| { 136 | opcode::Writev::new(types::Fd(write.fd.raw_fd()), iovs_ptr, iovs_len) 137 | .offset(offset as _) 138 | .build() 139 | }, 140 | ) 141 | }) 142 | } 143 | } 144 | 145 | impl Completable for WritevAll 146 | where 147 | T: BoundedBuf, 148 | { 149 | type Output = (Result, SharedFd, Vec, Vec); 150 | 151 | fn complete(self, cqe: CqeResult) -> Self::Output { 152 | // Convert the operation result to `usize` 153 | let res = cqe.result.map(|v| v as usize); 154 | 155 | (res, self.fd, self.bufs, self.iovs) 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/net/mod.rs: -------------------------------------------------------------------------------- 1 | //! TCP/UDP bindings for `tokio-uring`. 2 | //! 3 | //! This module contains the TCP/UDP networking types, similar to the standard 4 | //! library, which can be used to implement networking protocols. 5 | //! 6 | //! # Organization 7 | //! 8 | //! * [`TcpListener`] and [`TcpStream`] provide functionality for communication over TCP 9 | //! * [`UdpSocket`] provides functionality for communication over UDP 10 | 11 | //! 12 | //! [`TcpListener`]: TcpListener 13 | //! [`TcpStream`]: TcpStream 14 | //! [`UdpSocket`]: UdpSocket 15 | 16 | mod tcp; 17 | mod udp; 18 | mod unix; 19 | 20 | pub use tcp::{TcpListener, TcpStream}; 21 | pub use udp::UdpSocket; 22 | pub use unix::{UnixListener, UnixStream}; 23 | -------------------------------------------------------------------------------- /src/net/tcp/listener.rs: -------------------------------------------------------------------------------- 1 | use super::TcpStream; 2 | use crate::io::{SharedFd, Socket}; 3 | use std::{ 4 | io, 5 | net::SocketAddr, 6 | os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, 7 | }; 8 | 9 | /// A TCP socket server, listening for connections. 10 | /// 11 | /// You can accept a new connection by using the [`accept`](`TcpListener::accept`) 12 | /// method. 13 | /// 14 | /// # Examples 15 | /// 16 | /// ``` 17 | /// use tokio_uring::net::TcpListener; 18 | /// use tokio_uring::net::TcpStream; 19 | /// 20 | /// let listener = TcpListener::bind("127.0.0.1:2345".parse().unwrap()).unwrap(); 21 | /// 22 | /// tokio_uring::start(async move { 23 | /// let (tx_ch, rx_ch) = tokio::sync::oneshot::channel(); 24 | /// 25 | /// tokio_uring::spawn(async move { 26 | /// let (rx, _) = listener.accept().await.unwrap(); 27 | /// if let Err(_) = tx_ch.send(rx) { 28 | /// panic!("The receiver dropped"); 29 | /// } 30 | /// }); 31 | /// tokio::task::yield_now().await; // Ensure the listener.accept().await has been kicked off. 32 | /// 33 | /// let tx = TcpStream::connect("127.0.0.1:2345".parse().unwrap()).await.unwrap(); 34 | /// let rx = rx_ch.await.expect("The spawned task expected to send a TcpStream"); 35 | /// 36 | /// tx.write(b"test" as &'static [u8]).submit().await.0.unwrap(); 37 | /// 38 | /// let (_, buf) = rx.read(vec![0; 4]).await; 39 | /// 40 | /// assert_eq!(buf, b"test"); 41 | /// }); 42 | /// ``` 43 | pub struct TcpListener { 44 | inner: Socket, 45 | } 46 | 47 | impl TcpListener { 48 | /// Creates a new TcpListener, which will be bound to the specified address. 49 | /// 50 | /// The returned listener is ready for accepting connections. 51 | /// 52 | /// Binding with a port number of 0 will request that the OS assigns a port 53 | /// to this listener. 54 | pub fn bind(addr: SocketAddr) -> io::Result { 55 | let socket = Socket::bind(addr, libc::SOCK_STREAM)?; 56 | socket.listen(1024)?; 57 | Ok(TcpListener { inner: socket }) 58 | } 59 | 60 | /// Creates new `TcpListener` from a previously bound `std::net::TcpListener`. 61 | /// 62 | /// This function is intended to be used to wrap a TCP listener from the 63 | /// standard library in the tokio-uring equivalent. The conversion assumes nothing 64 | /// about the underlying socket; it is left up to the user to decide what socket 65 | /// options are appropriate for their use case. 66 | /// 67 | /// This can be used in conjunction with socket2's `Socket` interface to 68 | /// configure a socket before it's handed off, such as setting options like 69 | /// `reuse_address` or binding to multiple addresses. 70 | /// 71 | /// # Example 72 | /// 73 | /// ``` 74 | /// tokio_uring::start(async { 75 | /// let address: std::net::SocketAddr = "[::0]:8443".parse().unwrap(); 76 | /// let socket = tokio::net::TcpSocket::new_v6().unwrap(); 77 | /// socket.set_reuseaddr(true).unwrap(); 78 | /// socket.set_reuseport(true).unwrap(); 79 | /// socket.bind(address).unwrap(); 80 | /// 81 | /// let listener = socket.listen(1024).unwrap(); 82 | /// 83 | /// let listener = tokio_uring::net::TcpListener::from_std(listener.into_std().unwrap()); 84 | /// }) 85 | /// ``` 86 | pub fn from_std(socket: std::net::TcpListener) -> Self { 87 | let inner = Socket::from_std(socket); 88 | Self { inner } 89 | } 90 | 91 | pub(crate) fn from_socket(inner: Socket) -> Self { 92 | Self { inner } 93 | } 94 | 95 | /// Returns the local address that this listener is bound to. 96 | /// 97 | /// This can be useful, for example, when binding to port 0 to 98 | /// figure out which port was actually bound. 99 | /// 100 | /// # Examples 101 | /// 102 | /// ``` 103 | /// use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; 104 | /// use tokio_uring::net::TcpListener; 105 | /// 106 | /// let listener = TcpListener::bind("127.0.0.1:8080".parse().unwrap()).unwrap(); 107 | /// 108 | /// let addr = listener.local_addr().expect("Couldn't get local address"); 109 | /// assert_eq!(addr, SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 8080))); 110 | /// ``` 111 | pub fn local_addr(&self) -> io::Result { 112 | let fd = self.inner.as_raw_fd(); 113 | // SAFETY: Our fd is the handle the kernel has given us for a TcpListener. 114 | // Create a std::net::TcpListener long enough to call its local_addr method 115 | // and then forget it so the socket is not closed here. 116 | let l = unsafe { std::net::TcpListener::from_raw_fd(fd) }; 117 | let local_addr = l.local_addr(); 118 | std::mem::forget(l); 119 | local_addr 120 | } 121 | 122 | /// Accepts a new incoming connection from this listener. 123 | /// 124 | /// This function will yield once a new TCP connection is established. When 125 | /// established, the corresponding [`TcpStream`] and the remote peer's 126 | /// address will be returned. 127 | /// 128 | /// [`TcpStream`]: struct@crate::net::TcpStream 129 | pub async fn accept(&self) -> io::Result<(TcpStream, SocketAddr)> { 130 | let (socket, socket_addr) = self.inner.accept().await?; 131 | let stream = TcpStream { inner: socket }; 132 | let socket_addr = socket_addr.ok_or_else(|| { 133 | io::Error::new(io::ErrorKind::Other, "Could not get socket IP address") 134 | })?; 135 | Ok((stream, socket_addr)) 136 | } 137 | } 138 | 139 | impl FromRawFd for TcpListener { 140 | unsafe fn from_raw_fd(fd: RawFd) -> Self { 141 | TcpListener::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) 142 | } 143 | } 144 | 145 | impl AsRawFd for TcpListener { 146 | fn as_raw_fd(&self) -> RawFd { 147 | self.inner.as_raw_fd() 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/net/tcp/mod.rs: -------------------------------------------------------------------------------- 1 | mod listener; 2 | pub use listener::TcpListener; 3 | 4 | mod stream; 5 | pub use stream::TcpStream; 6 | -------------------------------------------------------------------------------- /src/net/unix/listener.rs: -------------------------------------------------------------------------------- 1 | use super::UnixStream; 2 | use crate::io::Socket; 3 | use std::{io, path::Path}; 4 | 5 | /// A Unix socket server, listening for connections. 6 | /// 7 | /// You can accept a new connection by using the [`accept`](`UnixListener::accept`) 8 | /// method. 9 | /// 10 | /// # Examples 11 | /// 12 | /// ``` 13 | /// use tokio_uring::net::UnixListener; 14 | /// use tokio_uring::net::UnixStream; 15 | /// 16 | /// let sock_file = "/tmp/tokio-uring-unix-test.sock"; 17 | /// let listener = UnixListener::bind(&sock_file).unwrap(); 18 | /// 19 | /// tokio_uring::start(async move { 20 | /// let (tx_ch, rx_ch) = tokio::sync::oneshot::channel(); 21 | /// 22 | /// tokio_uring::spawn(async move { 23 | /// let rx = listener.accept().await.unwrap(); 24 | /// if let Err(_) = tx_ch.send(rx) { 25 | /// panic!("The receiver dropped"); 26 | /// } 27 | /// }); 28 | /// tokio::task::yield_now().await; // Ensure the listener.accept().await has been kicked off. 29 | /// 30 | /// let tx = UnixStream::connect(&sock_file).await.unwrap(); 31 | /// let rx = rx_ch.await.expect("The spawned task expected to send a UnixStream"); 32 | /// 33 | /// tx.write(b"test" as &'static [u8]).submit().await.0.unwrap(); 34 | /// 35 | /// let (_, buf) = rx.read(vec![0; 4]).await; 36 | /// 37 | /// assert_eq!(buf, b"test"); 38 | /// }); 39 | /// 40 | /// std::fs::remove_file(&sock_file).unwrap(); 41 | /// ``` 42 | pub struct UnixListener { 43 | inner: Socket, 44 | } 45 | 46 | impl UnixListener { 47 | /// Creates a new UnixListener, which will be bound to the specified file path. 48 | /// The file path cannnot yet exist, and will be cleaned up upon dropping `UnixListener` 49 | pub fn bind>(path: P) -> io::Result { 50 | let socket = Socket::bind_unix(path, libc::SOCK_STREAM)?; 51 | socket.listen(1024)?; 52 | Ok(UnixListener { inner: socket }) 53 | } 54 | 55 | /// Returns the local address that this listener is bound to. 56 | /// 57 | /// # Examples 58 | /// 59 | /// ``` 60 | /// use tokio_uring::net::UnixListener; 61 | /// use std::path::Path; 62 | /// 63 | /// let sock_file = "/tmp/tokio-uring-unix-test.sock"; 64 | /// let listener = UnixListener::bind(&sock_file).unwrap(); 65 | /// 66 | /// let addr = listener.local_addr().expect("Couldn't get local address"); 67 | /// assert_eq!(addr.as_pathname(), Some(Path::new(sock_file))); 68 | /// 69 | /// std::fs::remove_file(&sock_file).unwrap(); 70 | /// ``` 71 | pub fn local_addr(&self) -> io::Result { 72 | use std::os::unix::io::{AsRawFd, FromRawFd}; 73 | 74 | let fd = self.inner.as_raw_fd(); 75 | // SAFETY: Our fd is the handle the kernel has given us for a UnixListener. 76 | // Create a std::net::UnixListener long enough to call its local_addr method 77 | // and then forget it so the socket is not closed here. 78 | let l = unsafe { std::os::unix::net::UnixListener::from_raw_fd(fd) }; 79 | let local_addr = l.local_addr(); 80 | std::mem::forget(l); 81 | local_addr 82 | } 83 | 84 | /// Accepts a new incoming connection from this listener. 85 | /// 86 | /// This function will yield once a new Unix domain socket connection 87 | /// is established. When established, the corresponding [`UnixStream`] and 88 | /// will be returned. 89 | /// 90 | /// [`UnixStream`]: struct@crate::net::UnixStream 91 | pub async fn accept(&self) -> io::Result { 92 | let (socket, _) = self.inner.accept().await?; 93 | let stream = UnixStream { inner: socket }; 94 | Ok(stream) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/net/unix/mod.rs: -------------------------------------------------------------------------------- 1 | mod listener; 2 | pub use listener::UnixListener; 3 | 4 | mod stream; 5 | pub use stream::UnixStream; 6 | -------------------------------------------------------------------------------- /src/net/unix/stream.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | buf::fixed::FixedBuf, 3 | buf::{BoundedBuf, BoundedBufMut}, 4 | io::{SharedFd, Socket}, 5 | UnsubmittedWrite, 6 | }; 7 | use socket2::SockAddr; 8 | use std::{ 9 | io, 10 | os::unix::prelude::{AsRawFd, FromRawFd, RawFd}, 11 | path::Path, 12 | }; 13 | 14 | /// A Unix stream between two local sockets on a Unix OS. 15 | /// 16 | /// A Unix stream can either be created by connecting to an endpoint, via the 17 | /// [`connect`] method, or by [`accepting`] a connection from a [`listener`]. 18 | /// 19 | /// # Examples 20 | /// 21 | /// ```no_run 22 | /// use tokio_uring::net::UnixStream; 23 | /// use std::net::ToSocketAddrs; 24 | /// 25 | /// fn main() -> std::io::Result<()> { 26 | /// tokio_uring::start(async { 27 | /// // Connect to a peer 28 | /// let mut stream = UnixStream::connect("/tmp/tokio-uring-unix-test.sock").await?; 29 | /// 30 | /// // Write some data. 31 | /// let (result, _) = stream.write(b"hello world!".as_slice()).submit().await; 32 | /// result.unwrap(); 33 | /// 34 | /// Ok(()) 35 | /// }) 36 | /// } 37 | /// ``` 38 | /// 39 | /// [`connect`]: UnixStream::connect 40 | /// [`accepting`]: crate::net::UnixListener::accept 41 | /// [`listener`]: crate::net::UnixListener 42 | pub struct UnixStream { 43 | pub(super) inner: Socket, 44 | } 45 | 46 | impl UnixStream { 47 | /// Opens a Unix connection to the specified file path. There must be a 48 | /// `UnixListener` or equivalent listening on the corresponding Unix domain socket 49 | /// to successfully connect and return a `UnixStream`. 50 | pub async fn connect>(path: P) -> io::Result { 51 | let socket = Socket::new_unix(libc::SOCK_STREAM)?; 52 | socket.connect(SockAddr::unix(path)?).await?; 53 | let unix_stream = UnixStream { inner: socket }; 54 | Ok(unix_stream) 55 | } 56 | 57 | /// Creates new `UnixStream` from a previously bound `std::os::unix::net::UnixStream`. 58 | /// 59 | /// This function is intended to be used to wrap a TCP stream from the 60 | /// standard library in the tokio-uring equivalent. The conversion assumes nothing 61 | /// about the underlying socket; it is left up to the user to decide what socket 62 | /// options are appropriate for their use case. 63 | /// 64 | /// This can be used in conjunction with socket2's `Socket` interface to 65 | /// configure a socket before it's handed off, such as setting options like 66 | /// `reuse_address` or binding to multiple addresses. 67 | pub fn from_std(socket: std::os::unix::net::UnixStream) -> UnixStream { 68 | let inner = Socket::from_std(socket); 69 | Self { inner } 70 | } 71 | 72 | pub(crate) fn from_socket(inner: Socket) -> Self { 73 | Self { inner } 74 | } 75 | 76 | /// Read some data from the stream into the buffer, returning the original buffer and 77 | /// quantity of data read. 78 | pub async fn read(&self, buf: T) -> crate::BufResult { 79 | self.inner.read(buf).await 80 | } 81 | 82 | /// Like [`read`], but using a pre-mapped buffer 83 | /// registered with [`FixedBufRegistry`]. 84 | /// 85 | /// [`read`]: Self::read 86 | /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry 87 | /// 88 | /// # Errors 89 | /// 90 | /// In addition to errors that can be reported by `read`, 91 | /// this operation fails if the buffer is not registered in the 92 | /// current `tokio-uring` runtime. 93 | pub async fn read_fixed(&self, buf: T) -> crate::BufResult 94 | where 95 | T: BoundedBufMut, 96 | { 97 | self.inner.read_fixed(buf).await 98 | } 99 | 100 | /// Write some data to the stream from the buffer, returning the original buffer and 101 | /// quantity of data written. 102 | pub fn write(&self, buf: T) -> UnsubmittedWrite { 103 | self.inner.write(buf) 104 | } 105 | 106 | /// Attempts to write an entire buffer to the stream. 107 | /// 108 | /// This method will continuously call [`write`] until there is no more data to be 109 | /// written or an error is returned. This method will not return until the entire 110 | /// buffer has been successfully written or an error has occurred. 111 | /// 112 | /// If the buffer contains no data, this will never call [`write`]. 113 | /// 114 | /// # Errors 115 | /// 116 | /// This function will return the first error that [`write`] returns. 117 | /// 118 | /// [`write`]: Self::write 119 | pub async fn write_all(&self, buf: T) -> crate::BufResult<(), T> { 120 | self.inner.write_all(buf).await 121 | } 122 | 123 | /// Like [`write`], but using a pre-mapped buffer 124 | /// registered with [`FixedBufRegistry`]. 125 | /// 126 | /// [`write`]: Self::write 127 | /// [`FixedBufRegistry`]: crate::buf::fixed::FixedBufRegistry 128 | /// 129 | /// # Errors 130 | /// 131 | /// In addition to errors that can be reported by `write`, 132 | /// this operation fails if the buffer is not registered in the 133 | /// current `tokio-uring` runtime. 134 | pub async fn write_fixed(&self, buf: T) -> crate::BufResult 135 | where 136 | T: BoundedBuf, 137 | { 138 | self.inner.write_fixed(buf).await 139 | } 140 | 141 | /// Attempts to write an entire buffer to the stream. 142 | /// 143 | /// This method will continuously call [`write_fixed`] until there is no more data to be 144 | /// written or an error is returned. This method will not return until the entire 145 | /// buffer has been successfully written or an error has occurred. 146 | /// 147 | /// If the buffer contains no data, this will never call [`write_fixed`]. 148 | /// 149 | /// # Errors 150 | /// 151 | /// This function will return the first error that [`write_fixed`] returns. 152 | /// 153 | /// [`write_fixed`]: Self::write 154 | pub async fn write_fixed_all(&self, buf: T) -> crate::BufResult<(), T> 155 | where 156 | T: BoundedBuf, 157 | { 158 | self.inner.write_fixed_all(buf).await 159 | } 160 | 161 | /// Write data from buffers into this socket returning how many bytes were 162 | /// written. 163 | /// 164 | /// This function will attempt to write the entire contents of `bufs`, but 165 | /// the entire write may not succeed, or the write may also generate an 166 | /// error. The bytes will be written starting at the specified offset. 167 | /// 168 | /// # Return 169 | /// 170 | /// The method returns the operation result and the same array of buffers 171 | /// passed in as an argument. A return value of `0` typically means that the 172 | /// underlying socket is no longer able to accept bytes and will likely not 173 | /// be able to in the future as well, or that the buffer provided is empty. 174 | /// 175 | /// # Errors 176 | /// 177 | /// Each call to `write` may generate an I/O error indicating that the 178 | /// operation could not be completed. If an error is returned then no bytes 179 | /// in the buffer were written to this writer. 180 | /// 181 | /// It is **not** considered an error if the entire buffer could not be 182 | /// written to this writer. 183 | /// 184 | /// [`Ok(n)`]: Ok 185 | pub async fn writev(&self, buf: Vec) -> crate::BufResult> { 186 | self.inner.writev(buf).await 187 | } 188 | 189 | /// Shuts down the read, write, or both halves of this connection. 190 | /// 191 | /// This function will cause all pending and future I/O on the specified portions to return 192 | /// immediately with an appropriate value. 193 | pub fn shutdown(&self, how: std::net::Shutdown) -> io::Result<()> { 194 | self.inner.shutdown(how) 195 | } 196 | } 197 | 198 | impl FromRawFd for UnixStream { 199 | unsafe fn from_raw_fd(fd: RawFd) -> Self { 200 | UnixStream::from_socket(Socket::from_shared_fd(SharedFd::new(fd))) 201 | } 202 | } 203 | 204 | impl AsRawFd for UnixStream { 205 | fn as_raw_fd(&self) -> RawFd { 206 | self.inner.as_raw_fd() 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/runtime/context.rs: -------------------------------------------------------------------------------- 1 | use crate::runtime::driver; 2 | use crate::runtime::driver::{Handle, WeakHandle}; 3 | use std::cell::RefCell; 4 | 5 | /// Owns the driver and resides in thread-local storage. 6 | pub(crate) struct RuntimeContext { 7 | driver: RefCell>, 8 | } 9 | 10 | impl RuntimeContext { 11 | /// Construct the context with an uninitialized driver. 12 | pub(crate) const fn new() -> Self { 13 | Self { 14 | driver: RefCell::new(None), 15 | } 16 | } 17 | 18 | /// Initialize the driver. 19 | pub(crate) fn set_handle(&self, handle: Handle) { 20 | let mut guard = self.driver.borrow_mut(); 21 | 22 | assert!(guard.is_none(), "Attempted to initialize the driver twice"); 23 | 24 | *guard = Some(handle); 25 | } 26 | 27 | pub(crate) fn unset_driver(&self) { 28 | let mut guard = self.driver.borrow_mut(); 29 | 30 | assert!(guard.is_some(), "Attempted to clear nonexistent driver"); 31 | 32 | *guard = None; 33 | } 34 | 35 | /// Check if driver is initialized 36 | #[allow(dead_code)] 37 | pub(crate) fn is_set(&self) -> bool { 38 | self.driver 39 | .try_borrow() 40 | .map(|b| b.is_some()) 41 | .unwrap_or(false) 42 | } 43 | 44 | pub(crate) fn handle(&self) -> Option { 45 | self.driver.borrow().clone() 46 | } 47 | 48 | #[allow(dead_code)] 49 | pub(crate) fn weak(&self) -> Option { 50 | self.driver.borrow().as_ref().map(Into::into) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/runtime/driver/handle.rs: -------------------------------------------------------------------------------- 1 | //! Internal, reference-counted handle to the driver. 2 | //! 3 | //! The driver was previously managed exclusively by thread-local context, but this proved 4 | //! untenable. 5 | //! 6 | //! The new system uses a handle which reference-counts the driver to track ownership and access to 7 | //! the driver. 8 | //! 9 | //! There are two handles. 10 | //! The strong handle is owning, and the weak handle is non-owning. 11 | //! This is important for avoiding reference cycles. 12 | //! The weak handle should be used by anything which is stored in the driver or does not need to 13 | //! keep the driver alive for it's duration. 14 | 15 | use io_uring::{cqueue, squeue}; 16 | use std::cell::RefCell; 17 | use std::io; 18 | use std::ops::Deref; 19 | use std::os::unix::io::{AsRawFd, RawFd}; 20 | use std::rc::{Rc, Weak}; 21 | use std::task::{Context, Poll}; 22 | 23 | use crate::buf::fixed::FixedBuffers; 24 | use crate::runtime::driver::op::{Completable, MultiCQEFuture, Op, Updateable}; 25 | use crate::runtime::driver::Driver; 26 | 27 | #[derive(Clone)] 28 | pub(crate) struct Handle { 29 | pub(super) inner: Rc>, 30 | } 31 | 32 | #[derive(Clone)] 33 | pub(crate) struct WeakHandle { 34 | inner: Weak>, 35 | } 36 | 37 | impl Handle { 38 | pub(crate) fn new(b: &crate::Builder) -> io::Result { 39 | Ok(Self { 40 | inner: Rc::new(RefCell::new(Driver::new(b)?)), 41 | }) 42 | } 43 | 44 | pub(crate) fn dispatch_completions(&self) { 45 | self.inner.borrow_mut().dispatch_completions() 46 | } 47 | 48 | pub(crate) fn flush(&self) -> io::Result { 49 | self.inner.borrow_mut().uring.submit() 50 | } 51 | 52 | pub(crate) fn register_buffers( 53 | &self, 54 | buffers: Rc>, 55 | ) -> io::Result<()> { 56 | self.inner.borrow_mut().register_buffers(buffers) 57 | } 58 | 59 | pub(crate) fn unregister_buffers( 60 | &self, 61 | buffers: Rc>, 62 | ) -> io::Result<()> { 63 | self.inner.borrow_mut().unregister_buffers(buffers) 64 | } 65 | 66 | pub(crate) fn submit_op_2(&self, sqe: squeue::Entry) -> usize { 67 | self.inner.borrow_mut().submit_op_2(sqe) 68 | } 69 | 70 | pub(crate) fn submit_op(&self, data: T, f: F) -> io::Result> 71 | where 72 | T: Completable, 73 | F: FnOnce(&mut T) -> squeue::Entry, 74 | { 75 | self.inner.borrow_mut().submit_op(data, f, self.into()) 76 | } 77 | 78 | pub(crate) fn poll_op(&self, op: &mut Op, cx: &mut Context<'_>) -> Poll 79 | where 80 | T: Unpin + 'static + Completable, 81 | { 82 | self.inner.borrow_mut().poll_op(op, cx) 83 | } 84 | 85 | pub(crate) fn poll_op_2(&self, index: usize, cx: &mut Context<'_>) -> Poll { 86 | self.inner.borrow_mut().poll_op_2(index, cx) 87 | } 88 | 89 | pub(crate) fn poll_multishot_op( 90 | &self, 91 | op: &mut Op, 92 | cx: &mut Context<'_>, 93 | ) -> Poll 94 | where 95 | T: Unpin + 'static + Completable + Updateable, 96 | { 97 | self.inner.borrow_mut().poll_multishot_op(op, cx) 98 | } 99 | 100 | pub(crate) fn remove_op(&self, op: &mut Op) { 101 | self.inner.borrow_mut().remove_op(op) 102 | } 103 | 104 | pub(crate) fn remove_op_2(&self, index: usize, data: T) { 105 | self.inner.borrow_mut().remove_op_2(index, data) 106 | } 107 | } 108 | 109 | impl WeakHandle { 110 | pub(crate) fn upgrade(&self) -> Option { 111 | Some(Handle { 112 | inner: self.inner.upgrade()?, 113 | }) 114 | } 115 | } 116 | 117 | impl AsRawFd for Handle { 118 | fn as_raw_fd(&self) -> RawFd { 119 | self.inner.borrow().uring.as_raw_fd() 120 | } 121 | } 122 | 123 | impl From for Handle { 124 | fn from(driver: Driver) -> Self { 125 | Self { 126 | inner: Rc::new(RefCell::new(driver)), 127 | } 128 | } 129 | } 130 | 131 | impl From for WeakHandle 132 | where 133 | T: Deref, 134 | { 135 | fn from(handle: T) -> Self { 136 | Self { 137 | inner: Rc::downgrade(&handle.inner), 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/runtime/driver/op/slab_list.rs: -------------------------------------------------------------------------------- 1 | //! An indexed linked list, with entries held in slab storage. 2 | //! The slab may hold multiple independent lists concurrently. 3 | //! 4 | //! Each list is uniquely identified by a SlabListIndices, 5 | //! which holds the index of the first element of the list. 6 | //! It also holds the index of the last element, to support 7 | //! push operations without list traversal. 8 | use slab::Slab; 9 | use std::ops::{Deref, DerefMut}; 10 | 11 | /// A linked list backed by slab storage 12 | pub(crate) struct SlabList<'a, T> { 13 | index: SlabListIndices, 14 | slab: &'a mut Slab>, 15 | } 16 | 17 | // Indices to the head and tail of a single list held within a SlabList 18 | #[derive(Clone)] 19 | pub(crate) struct SlabListIndices { 20 | start: usize, 21 | end: usize, 22 | } 23 | 24 | /// Multi cycle operations may return an unbounded number of CQE's 25 | /// for a single cycle SQE. 26 | /// 27 | /// These are held in an indexed linked list 28 | pub(crate) struct SlabListEntry { 29 | entry: T, 30 | next: usize, 31 | } 32 | 33 | impl Deref for SlabListEntry { 34 | type Target = T; 35 | 36 | fn deref(&self) -> &Self::Target { 37 | &self.entry 38 | } 39 | } 40 | 41 | impl DerefMut for SlabListEntry { 42 | fn deref_mut(&mut self) -> &mut Self::Target { 43 | &mut self.entry 44 | } 45 | } 46 | 47 | impl SlabListIndices { 48 | pub(crate) fn new() -> Self { 49 | let start = usize::MAX; 50 | SlabListIndices { start, end: start } 51 | } 52 | 53 | pub(crate) fn into_list(self, slab: &mut Slab>) -> SlabList<'_, T> { 54 | SlabList::from_indices(self, slab) 55 | } 56 | } 57 | 58 | impl<'a, T> SlabList<'a, T> { 59 | pub(crate) fn from_indices( 60 | index: SlabListIndices, 61 | slab: &'a mut Slab>, 62 | ) -> Self { 63 | SlabList { slab, index } 64 | } 65 | 66 | pub(crate) fn is_empty(&self) -> bool { 67 | self.index.start == usize::MAX 68 | } 69 | 70 | /// Peek at the end of the list (most recently pushed) 71 | /// This leaves the list unchanged 72 | pub(crate) fn peek_end(&mut self) -> Option<&T> { 73 | if self.index.end == usize::MAX { 74 | None 75 | } else { 76 | Some(&self.slab[self.index.end].entry) 77 | } 78 | } 79 | 80 | /// Pop from front of list 81 | pub(crate) fn pop(&mut self) -> Option { 82 | self.slab 83 | .try_remove(self.index.start) 84 | .map(|SlabListEntry { next, entry, .. }| { 85 | if next == usize::MAX { 86 | self.index.end = usize::MAX; 87 | } 88 | self.index.start = next; 89 | entry 90 | }) 91 | } 92 | 93 | /// Push to the end of the list 94 | pub(crate) fn push(&mut self, entry: T) { 95 | let prev = self.index.end; 96 | let entry = SlabListEntry { 97 | entry, 98 | next: usize::MAX, 99 | }; 100 | self.index.end = self.slab.insert(entry); 101 | if prev != usize::MAX { 102 | self.slab[prev].next = self.index.end; 103 | } else { 104 | self.index.start = self.index.end; 105 | } 106 | } 107 | 108 | /// Consume the list, without dropping entries, returning just the start and end indices 109 | pub(crate) fn into_indices(mut self) -> SlabListIndices { 110 | std::mem::replace(&mut self.index, SlabListIndices::new()) 111 | } 112 | } 113 | 114 | impl<'a, T> Drop for SlabList<'a, T> { 115 | fn drop(&mut self) { 116 | while !self.is_empty() { 117 | let removed = self.slab.remove(self.index.start); 118 | self.index.start = removed.next; 119 | } 120 | } 121 | } 122 | 123 | impl<'a, T> Iterator for SlabList<'a, T> { 124 | type Item = T; 125 | 126 | fn next(&mut self) -> Option { 127 | self.pop() 128 | } 129 | } 130 | 131 | #[cfg(test)] 132 | mod test { 133 | use super::*; 134 | 135 | #[test] 136 | fn push_pop() { 137 | let mut slab = Slab::with_capacity(8); 138 | let mut list = SlabListIndices::new().into_list(&mut slab); 139 | assert!(list.is_empty()); 140 | assert_eq!(list.pop(), None); 141 | for i in 0..5 { 142 | list.push(i); 143 | assert_eq!(list.peek_end(), Some(&i)); 144 | assert!(!list.is_empty()); 145 | assert!(!list.slab.is_empty()); 146 | } 147 | for i in 0..5 { 148 | assert_eq!(list.pop(), Some(i)) 149 | } 150 | assert!(list.is_empty()); 151 | assert!(list.slab.is_empty()); 152 | assert_eq!(list.pop(), None); 153 | } 154 | 155 | #[test] 156 | fn entries_freed_on_drop() { 157 | let mut slab = Slab::with_capacity(8); 158 | { 159 | let mut list = SlabListIndices::new().into_list(&mut slab); 160 | list.push(42); 161 | assert!(!list.is_empty()); 162 | } 163 | assert!(slab.is_empty()); 164 | } 165 | 166 | #[test] 167 | fn entries_kept_on_converion_to_index() { 168 | let mut slab = Slab::with_capacity(8); 169 | { 170 | let mut list = SlabListIndices::new().into_list(&mut slab); 171 | list.push(42); 172 | assert!(!list.is_empty()); 173 | // This forgets the entries 174 | let _ = list.into_indices(); 175 | } 176 | assert!(!slab.is_empty()); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/runtime/mod.rs: -------------------------------------------------------------------------------- 1 | use std::future::Future; 2 | use std::io; 3 | use std::mem::ManuallyDrop; 4 | use tokio::io::unix::AsyncFd; 5 | use tokio::task::LocalSet; 6 | 7 | mod context; 8 | pub(crate) mod driver; 9 | 10 | pub(crate) use context::RuntimeContext; 11 | 12 | thread_local! { 13 | pub(crate) static CONTEXT: RuntimeContext = RuntimeContext::new(); 14 | } 15 | 16 | /// The Runtime Executor 17 | /// 18 | /// This is the Runtime for `tokio-uring`. 19 | /// It wraps the default [`Runtime`] using the platform-specific Driver. 20 | /// 21 | /// This executes futures and tasks within the current-thread only. 22 | /// 23 | /// [`Runtime`]: tokio::runtime::Runtime 24 | pub struct Runtime { 25 | /// Tokio runtime, always current-thread 26 | tokio_rt: ManuallyDrop, 27 | 28 | /// LocalSet for !Send tasks 29 | local: ManuallyDrop, 30 | 31 | /// Strong reference to the driver. 32 | driver: driver::Handle, 33 | } 34 | 35 | /// Spawns a new asynchronous task, returning a [`JoinHandle`] for it. 36 | /// 37 | /// Spawning a task enables the task to execute concurrently to other tasks. 38 | /// There is no guarantee that a spawned task will execute to completion. When a 39 | /// runtime is shutdown, all outstanding tasks are dropped, regardless of the 40 | /// lifecycle of that task. 41 | /// 42 | /// This function must be called from the context of a `tokio-uring` runtime. 43 | /// 44 | /// [`JoinHandle`]: tokio::task::JoinHandle 45 | /// 46 | /// # Examples 47 | /// 48 | /// In this example, a server is started and `spawn` is used to start a new task 49 | /// that processes each received connection. 50 | /// 51 | /// ```no_run 52 | /// tokio_uring::start(async { 53 | /// let handle = tokio_uring::spawn(async { 54 | /// println!("hello from a background task"); 55 | /// }); 56 | /// 57 | /// // Let the task complete 58 | /// handle.await.unwrap(); 59 | /// }); 60 | /// ``` 61 | pub fn spawn(task: T) -> tokio::task::JoinHandle { 62 | tokio::task::spawn_local(task) 63 | } 64 | 65 | impl Runtime { 66 | /// Creates a new tokio_uring runtime on the current thread. 67 | /// 68 | /// This takes the tokio-uring [`Builder`](crate::Builder) as a parameter. 69 | pub fn new(b: &crate::Builder) -> io::Result { 70 | let rt = tokio::runtime::Builder::new_current_thread() 71 | .on_thread_park(|| { 72 | CONTEXT.with(|x| { 73 | let _ = x 74 | .handle() 75 | .expect("Internal error, driver context not present when invoking hooks") 76 | .flush(); 77 | }); 78 | }) 79 | .enable_all() 80 | .build()?; 81 | 82 | let tokio_rt = ManuallyDrop::new(rt); 83 | let local = ManuallyDrop::new(LocalSet::new()); 84 | let driver = driver::Handle::new(b)?; 85 | 86 | start_uring_wakes_task(&tokio_rt, &local, driver.clone()); 87 | 88 | Ok(Runtime { 89 | local, 90 | tokio_rt, 91 | driver, 92 | }) 93 | } 94 | 95 | /// Runs a future to completion on the tokio-uring runtime. This is the 96 | /// runtime's entry point. 97 | /// 98 | /// This runs the given future on the current thread, blocking until it is 99 | /// complete, and yielding its resolved result. Any tasks, futures, or timers 100 | /// which the future spawns internally will be executed on this runtime. 101 | /// 102 | /// Any spawned tasks will be suspended after `block_on` returns. Calling 103 | /// `block_on` again will resume previously spawned tasks. 104 | /// 105 | /// # Panics 106 | /// 107 | /// This function panics if the provided future panics, or if called within an 108 | /// asynchronous execution context. 109 | /// Runs a future to completion on the current runtime. 110 | pub fn block_on(&self, future: F) -> F::Output 111 | where 112 | F: Future, 113 | { 114 | struct ContextGuard; 115 | 116 | impl Drop for ContextGuard { 117 | fn drop(&mut self) { 118 | CONTEXT.with(|cx| cx.unset_driver()); 119 | } 120 | } 121 | 122 | CONTEXT.with(|cx| cx.set_handle(self.driver.clone())); 123 | 124 | let _guard = ContextGuard; 125 | 126 | tokio::pin!(future); 127 | 128 | let res = self 129 | .tokio_rt 130 | .block_on(self.local.run_until(std::future::poll_fn(|cx| { 131 | // assert!(drive.as_mut().poll(cx).is_pending()); 132 | future.as_mut().poll(cx) 133 | }))); 134 | 135 | res 136 | } 137 | } 138 | 139 | impl Drop for Runtime { 140 | fn drop(&mut self) { 141 | // drop tasks in correct order 142 | unsafe { 143 | ManuallyDrop::drop(&mut self.local); 144 | ManuallyDrop::drop(&mut self.tokio_rt); 145 | } 146 | } 147 | } 148 | 149 | fn start_uring_wakes_task( 150 | tokio_rt: &tokio::runtime::Runtime, 151 | local: &LocalSet, 152 | driver: driver::Handle, 153 | ) { 154 | let _guard = tokio_rt.enter(); 155 | let async_driver_handle = AsyncFd::new(driver).unwrap(); 156 | 157 | local.spawn_local(drive_uring_wakes(async_driver_handle)); 158 | } 159 | 160 | async fn drive_uring_wakes(driver: AsyncFd) { 161 | loop { 162 | // Wait for read-readiness 163 | let mut guard = driver.readable().await.unwrap(); 164 | 165 | guard.get_inner().dispatch_completions(); 166 | 167 | guard.clear_ready(); 168 | } 169 | } 170 | 171 | #[cfg(test)] 172 | mod test { 173 | 174 | use super::*; 175 | use crate::builder; 176 | 177 | #[test] 178 | fn block_on() { 179 | let rt = Runtime::new(&builder()).unwrap(); 180 | rt.block_on(async move { () }); 181 | } 182 | 183 | #[test] 184 | fn block_on_twice() { 185 | let rt = Runtime::new(&builder()).unwrap(); 186 | rt.block_on(async move { () }); 187 | rt.block_on(async move { () }); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /tests/buf.rs: -------------------------------------------------------------------------------- 1 | use tokio_uring::buf::{BoundedBuf, BoundedBufMut, Slice}; 2 | 3 | use std::mem; 4 | use std::ops::RangeBounds; 5 | use std::slice::SliceIndex; 6 | 7 | #[test] 8 | fn test_vec() { 9 | let mut v = vec![]; 10 | 11 | assert_eq!(v.as_ptr(), v.stable_ptr()); 12 | assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); 13 | assert_eq!(v.bytes_init(), 0); 14 | assert_eq!(v.bytes_total(), 0); 15 | 16 | v.reserve(100); 17 | 18 | assert_eq!(v.as_ptr(), v.stable_ptr()); 19 | assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); 20 | assert_eq!(v.bytes_init(), 0); 21 | assert_eq!(v.bytes_total(), v.capacity()); 22 | 23 | v.extend(b"hello"); 24 | 25 | assert_eq!(v.as_ptr(), v.stable_ptr()); 26 | assert_eq!(v.as_mut_ptr(), v.stable_mut_ptr()); 27 | assert_eq!(v.bytes_init(), 5); 28 | assert_eq!(v.bytes_total(), v.capacity()); 29 | 30 | // Assume init does not go backwards 31 | unsafe { 32 | v.set_init(3); 33 | } 34 | assert_eq!(&v[..], b"hello"); 35 | 36 | // Initializing goes forward 37 | unsafe { 38 | std::ptr::copy(DATA.as_ptr(), v.stable_mut_ptr(), 10); 39 | v.set_init(10); 40 | } 41 | 42 | assert_eq!(&v[..], &DATA[..10]); 43 | } 44 | 45 | #[test] 46 | fn test_slice() { 47 | let v = &b""[..]; 48 | 49 | assert_eq!(v.as_ptr(), v.stable_ptr()); 50 | assert_eq!(v.bytes_init(), 0); 51 | assert_eq!(v.bytes_total(), 0); 52 | 53 | let v = &b"hello"[..]; 54 | 55 | assert_eq!(v.as_ptr(), v.stable_ptr()); 56 | assert_eq!(v.bytes_init(), 5); 57 | assert_eq!(v.bytes_total(), 5); 58 | } 59 | 60 | const DATA: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789!?"; 61 | 62 | macro_rules! test_slice { 63 | ( 64 | $( $name:ident => $buf:expr; )* 65 | ) => { 66 | $( 67 | mod $name { 68 | use super::*; 69 | 70 | #[test] 71 | fn test_slice_read() { 72 | let buf = $buf; 73 | 74 | let slice = buf.slice(..); 75 | assert_eq!(slice.begin(), 0); 76 | assert_eq!(slice.end(), DATA.len()); 77 | 78 | assert_eq!(&slice[..], DATA); 79 | assert_eq!(&slice[5..], &DATA[5..]); 80 | assert_eq!(&slice[10..15], &DATA[10..15]); 81 | assert_eq!(&slice[..15], &DATA[..15]); 82 | 83 | let buf = slice.into_inner(); 84 | 85 | let slice = buf.slice(10..); 86 | assert_eq!(slice.begin(), 10); 87 | assert_eq!(slice.end(), DATA.len()); 88 | 89 | assert_eq!(&slice[..], &DATA[10..]); 90 | assert_eq!(&slice[10..], &DATA[20..]); 91 | assert_eq!(&slice[5..15], &DATA[15..25]); 92 | assert_eq!(&slice[..15], &DATA[10..25]); 93 | 94 | let buf = slice.into_inner(); 95 | 96 | let slice = buf.slice(5..15); 97 | assert_eq!(slice.begin(), 5); 98 | assert_eq!(slice.end(), 15); 99 | 100 | assert_eq!(&slice[..], &DATA[5..15]); 101 | assert_eq!(&slice[5..], &DATA[10..15]); 102 | assert_eq!(&slice[5..8], &DATA[10..13]); 103 | assert_eq!(&slice[..5], &DATA[5..10]); 104 | let buf = slice.into_inner(); 105 | 106 | let slice = buf.slice(..15); 107 | assert_eq!(slice.begin(), 0); 108 | assert_eq!(slice.end(), 15); 109 | 110 | assert_eq!(&slice[..], &DATA[..15]); 111 | assert_eq!(&slice[5..], &DATA[5..15]); 112 | assert_eq!(&slice[5..10], &DATA[5..10]); 113 | assert_eq!(&slice[..5], &DATA[..5]); 114 | } 115 | 116 | #[test] 117 | fn test_subslice_read() { 118 | let buf = $buf; 119 | 120 | let buf = test_subslice_read_case(buf.slice(..), DATA, ..); 121 | let buf = test_subslice_read_case(buf.slice(..), DATA, 10..); 122 | let buf = test_subslice_read_case(buf.slice(..), DATA, 5..15); 123 | let buf = test_subslice_read_case(buf.slice(..), DATA, ..15); 124 | 125 | let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], ..); 126 | let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], 5..); 127 | let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], 5..15); 128 | let buf = test_subslice_read_case(buf.slice(5..), &DATA[5..], ..10); 129 | 130 | let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], ..); 131 | let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], 5..); 132 | let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], 5..15); 133 | let buf = test_subslice_read_case(buf.slice(5..25), &DATA[5..25], ..10); 134 | 135 | let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], ..); 136 | let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], 5..); 137 | let buf = test_subslice_read_case(buf.slice(..25), &DATA[..25], 5..15); 138 | let ___ = test_subslice_read_case(buf.slice(..25), &DATA[..25], ..10); 139 | } 140 | } 141 | )* 142 | }; 143 | } 144 | 145 | fn test_subslice_read_case(slice: Slice, expected: &[u8], range: R) -> B 146 | where 147 | B: tokio_uring::buf::IoBuf, 148 | R: RangeBounds + SliceIndex<[u8], Output = [u8]> + Clone, 149 | { 150 | use std::ops::{Bound, Index}; 151 | 152 | let buf_ptr = slice.get_ref().stable_ptr(); 153 | let buf_total = slice.get_ref().bytes_total(); 154 | let buf_init = slice.get_ref().bytes_init(); 155 | 156 | let begin = slice.begin(); 157 | let end = slice.end(); 158 | let subslice = slice.slice(range.clone()); 159 | let data = expected.index(range.clone()); 160 | match range.start_bound() { 161 | Bound::Included(&n) => { 162 | assert_eq!(subslice.begin(), begin + n); 163 | } 164 | Bound::Excluded(&n) => { 165 | assert_eq!(subslice.begin(), begin + n + 1); 166 | } 167 | Bound::Unbounded => { 168 | assert_eq!(subslice.begin(), begin); 169 | } 170 | } 171 | match range.end_bound() { 172 | Bound::Included(&n) => { 173 | assert_eq!(subslice.end(), begin + n + 1); 174 | } 175 | Bound::Excluded(&n) => { 176 | assert_eq!(subslice.end(), begin + n); 177 | } 178 | Bound::Unbounded => { 179 | assert_eq!(subslice.end(), end); 180 | } 181 | } 182 | assert_eq!(&subslice[..], data); 183 | 184 | let buf = subslice.into_inner(); 185 | assert_eq!(buf.stable_ptr(), buf_ptr); 186 | assert_eq!(buf.bytes_init(), buf_init); 187 | assert_eq!(buf.bytes_total(), buf_total); 188 | buf 189 | } 190 | 191 | test_slice! { 192 | vec => Vec::from(DATA); 193 | slice => DATA; 194 | } 195 | 196 | #[test] 197 | fn can_deref_slice_into_uninit_buf() { 198 | let buf = Vec::with_capacity(10).slice(..); 199 | let _ = buf.stable_ptr(); 200 | assert_eq!(buf.bytes_init(), 0); 201 | assert_eq!(buf.bytes_total(), 10); 202 | assert!(buf[..].is_empty()); 203 | 204 | let mut v = Vec::with_capacity(10); 205 | v.push(42); 206 | let mut buf = v.slice(..); 207 | let _ = buf.stable_mut_ptr(); 208 | assert_eq!(buf.bytes_init(), 1); 209 | assert_eq!(buf.bytes_total(), 10); 210 | assert_eq!(mem::replace(&mut buf[0], 0), 42); 211 | buf.copy_from_slice(&[43]); 212 | assert_eq!(&buf[..], &[43]); 213 | } 214 | -------------------------------------------------------------------------------- /tests/driver.rs: -------------------------------------------------------------------------------- 1 | use tempfile::NamedTempFile; 2 | 3 | use tokio_uring::{buf::IoBuf, fs::File}; 4 | 5 | #[path = "../src/future.rs"] 6 | #[allow(warnings)] 7 | mod future; 8 | 9 | #[test] 10 | fn complete_ops_on_drop() { 11 | use std::sync::Arc; 12 | 13 | struct MyBuf { 14 | data: Vec, 15 | _ref_cnt: Arc<()>, 16 | } 17 | 18 | unsafe impl IoBuf for MyBuf { 19 | fn stable_ptr(&self) -> *const u8 { 20 | self.data.stable_ptr() 21 | } 22 | 23 | fn bytes_init(&self) -> usize { 24 | self.data.bytes_init() 25 | } 26 | 27 | fn bytes_total(&self) -> usize { 28 | self.data.bytes_total() 29 | } 30 | } 31 | 32 | unsafe impl tokio_uring::buf::IoBufMut for MyBuf { 33 | fn stable_mut_ptr(&mut self) -> *mut u8 { 34 | self.data.stable_mut_ptr() 35 | } 36 | 37 | unsafe fn set_init(&mut self, pos: usize) { 38 | self.data.set_init(pos); 39 | } 40 | } 41 | 42 | // Used to test if the buffer dropped. 43 | let ref_cnt = Arc::new(()); 44 | 45 | let tempfile = tempfile(); 46 | 47 | let vec = vec![0; 50 * 1024 * 1024]; 48 | let mut file = std::fs::File::create(tempfile.path()).unwrap(); 49 | std::io::Write::write_all(&mut file, &vec).unwrap(); 50 | 51 | let file = tokio_uring::start(async { 52 | let file = File::create(tempfile.path()).await.unwrap(); 53 | poll_once(async { 54 | file.read_at( 55 | MyBuf { 56 | data: vec![0; 64 * 1024], 57 | _ref_cnt: ref_cnt.clone(), 58 | }, 59 | 25 * 1024 * 1024, 60 | ) 61 | .await 62 | .0 63 | .unwrap(); 64 | }) 65 | .await; 66 | 67 | file 68 | }); 69 | 70 | assert_eq!(Arc::strong_count(&ref_cnt), 1); 71 | 72 | // little sleep 73 | std::thread::sleep(std::time::Duration::from_millis(100)); 74 | 75 | drop(file); 76 | } 77 | 78 | #[test] 79 | fn too_many_submissions() { 80 | let tempfile = tempfile(); 81 | 82 | tokio_uring::start(async { 83 | let file = File::create(tempfile.path()).await.unwrap(); 84 | for _ in 0..600 { 85 | poll_once(async { 86 | file.write_at(b"hello world".to_vec(), 0) 87 | .submit() 88 | .await 89 | .0 90 | .unwrap(); 91 | }) 92 | .await; 93 | } 94 | }); 95 | } 96 | 97 | #[test] 98 | fn completion_overflow() { 99 | use std::process; 100 | use std::{thread, time}; 101 | use tokio::task::JoinSet; 102 | 103 | let spawn_cnt = 50; 104 | let squeue_entries = 2; 105 | let cqueue_entries = 2 * squeue_entries; 106 | 107 | std::thread::spawn(|| { 108 | thread::sleep(time::Duration::from_secs(8)); // 1000 times longer than it takes on a slow machine 109 | eprintln!("Timeout reached. The uring completions are hung."); 110 | process::exit(1); 111 | }); 112 | 113 | tokio_uring::builder() 114 | .entries(squeue_entries) 115 | .uring_builder(tokio_uring::uring_builder().setup_cqsize(cqueue_entries)) 116 | .start(async move { 117 | let mut js = JoinSet::new(); 118 | 119 | for _ in 0..spawn_cnt { 120 | js.spawn_local(tokio_uring::no_op()); 121 | } 122 | 123 | while let Some(res) = js.join_next().await { 124 | res.unwrap().unwrap(); 125 | } 126 | }); 127 | } 128 | 129 | fn tempfile() -> NamedTempFile { 130 | NamedTempFile::new().unwrap() 131 | } 132 | 133 | async fn poll_once(future: impl std::future::Future) { 134 | // use std::future::Future; 135 | use std::task::Poll; 136 | use tokio::pin; 137 | 138 | pin!(future); 139 | 140 | std::future::poll_fn(|cx| { 141 | assert!(future.as_mut().poll(cx).is_pending()); 142 | Poll::Ready(()) 143 | }) 144 | .await; 145 | } 146 | -------------------------------------------------------------------------------- /tests/fixed_buf.rs: -------------------------------------------------------------------------------- 1 | use tokio_test::assert_err; 2 | use tokio_uring::buf::fixed::{FixedBufPool, FixedBufRegistry}; 3 | use tokio_uring::buf::{BoundedBuf, BoundedBufMut}; 4 | use tokio_uring::fs::File; 5 | 6 | use std::fs::File as StdFile; 7 | use std::io::prelude::*; 8 | use std::iter; 9 | use std::mem; 10 | use tempfile::NamedTempFile; 11 | 12 | const HELLO: &[u8] = b"hello world..."; 13 | 14 | #[test] 15 | fn fixed_buf_turnaround() { 16 | tokio_uring::start(async { 17 | let mut tempfile = tempfile(); 18 | tempfile.write_all(HELLO).unwrap(); 19 | 20 | let file = File::open(tempfile.path()).await.unwrap(); 21 | 22 | let buffers = FixedBufRegistry::new([30, 20, 10].iter().map(|&n| Vec::with_capacity(n))); 23 | buffers.register().unwrap(); 24 | 25 | let fixed_buf = buffers.check_out(0).unwrap(); 26 | assert_eq!(fixed_buf.bytes_total(), 30); 27 | 28 | // Can't check out the same buffer twice. 29 | assert!(buffers.check_out(0).is_none()); 30 | 31 | // Checking out another buffer from the same registry is possible, 32 | // but does not affect the status of the first buffer. 33 | let fixed_buf1 = buffers.check_out(1).unwrap(); 34 | assert_eq!(fixed_buf1.bytes_total(), 20); 35 | assert!(buffers.check_out(0).is_none()); 36 | mem::drop(fixed_buf1); 37 | assert!(buffers.check_out(0).is_none()); 38 | 39 | let op = file.read_fixed_at(fixed_buf, 0); 40 | 41 | // The buffer is used by the pending operation, can't check it out 42 | // for another instance. 43 | assert!(buffers.check_out(0).is_none()); 44 | 45 | let (res, buf) = op.await; 46 | let n = res.unwrap(); 47 | assert_eq!(n, HELLO.len()); 48 | 49 | // The buffer is owned by `buf`, can't check it out 50 | // for another instance. 51 | assert!(buffers.check_out(0).is_none()); 52 | 53 | mem::drop(buf); 54 | 55 | // The buffer has been released, check it out again. 56 | let fixed_buf = buffers.check_out(0).unwrap(); 57 | assert_eq!(fixed_buf.bytes_total(), 30); 58 | assert_eq!(fixed_buf.bytes_init(), HELLO.len()); 59 | }); 60 | } 61 | 62 | #[test] 63 | fn unregister_invalidates_checked_out_buffers() { 64 | tokio_uring::start(async { 65 | let mut tempfile = tempfile(); 66 | tempfile.write_all(HELLO).unwrap(); 67 | 68 | let file = File::open(tempfile.path()).await.unwrap(); 69 | 70 | let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); 71 | buffers.register().unwrap(); 72 | 73 | let fixed_buf = buffers.check_out(0).unwrap(); 74 | 75 | // The checked out handle keeps the buffer allocation alive. 76 | // Meanwhile, we replace buffer registration in the kernel: 77 | buffers.unregister().unwrap(); 78 | let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); 79 | buffers.register().unwrap(); 80 | 81 | // The old buffer's index no longer matches the memory area of the 82 | // currently registered buffer, so the read operation using the old 83 | // buffer's memory should fail. 84 | let (res, _) = file.read_fixed_at(fixed_buf, 0).await; 85 | assert_err!(res); 86 | 87 | let fixed_buf = buffers.check_out(0).unwrap(); 88 | let (res, buf) = file.read_fixed_at(fixed_buf, 0).await; 89 | let n = res.unwrap(); 90 | assert_eq!(n, HELLO.len()); 91 | assert_eq!(&buf[..], HELLO); 92 | }); 93 | } 94 | 95 | #[test] 96 | fn slicing() { 97 | tokio_uring::start(async { 98 | let mut tempfile = tempfile(); 99 | tempfile.write_all(HELLO).unwrap(); 100 | 101 | let file = File::from_std( 102 | StdFile::options() 103 | .read(true) 104 | .write(true) 105 | .open(tempfile.path()) 106 | .unwrap(), 107 | ); 108 | 109 | let buffers = FixedBufRegistry::new([Vec::with_capacity(1024)]); 110 | buffers.register().unwrap(); 111 | 112 | let fixed_buf = buffers.check_out(0).unwrap(); 113 | 114 | // Read no more than 8 bytes into the fixed buffer. 115 | let (res, slice) = file.read_fixed_at(fixed_buf.slice(..8), 3).await; 116 | let n = res.unwrap(); 117 | assert_eq!(n, 8); 118 | assert_eq!(slice[..], HELLO[3..11]); 119 | let fixed_buf = slice.into_inner(); 120 | 121 | // Write from the fixed buffer, starting at offset 1, 122 | // up to the end of the initialized bytes in the buffer. 123 | let (res, slice) = file 124 | .write_fixed_at(fixed_buf.slice(1..), HELLO.len() as u64) 125 | .await; 126 | let n = res.unwrap(); 127 | assert_eq!(n, 7); 128 | assert_eq!(slice[..], HELLO[4..11]); 129 | let fixed_buf = slice.into_inner(); 130 | 131 | // Read into the fixed buffer, overwriting bytes starting from offset 3 132 | // and then extending the initialized part with as many bytes as 133 | // the operation can read. 134 | let (res, slice) = file.read_fixed_at(fixed_buf.slice(3..), 0).await; 135 | let n = res.unwrap(); 136 | assert_eq!(n, HELLO.len() + 7); 137 | assert_eq!(slice[..HELLO.len()], HELLO[..]); 138 | assert_eq!(slice[HELLO.len()..], HELLO[4..11]); 139 | }) 140 | } 141 | 142 | #[test] 143 | fn pool_next_as_concurrency_limit() { 144 | tokio_uring::start(async move { 145 | const BUF_SIZE: usize = 80; 146 | 147 | let mut tempfile = tempfile(); 148 | let file = StdFile::options() 149 | .write(true) 150 | .open(tempfile.path()) 151 | .unwrap(); 152 | 153 | let buffers = FixedBufPool::new(iter::repeat_with(|| Vec::with_capacity(BUF_SIZE)).take(2)); 154 | buffers.register().unwrap(); 155 | 156 | let mut join_handles = vec![]; 157 | for i in 0..10 { 158 | let mut buf = buffers.next(BUF_SIZE).await; 159 | println!( 160 | "[main] iteration {}: obtained buffer {}", 161 | i, 162 | buf.buf_index() 163 | ); 164 | let cloned_file = file.try_clone().unwrap(); 165 | 166 | let handle = tokio_uring::spawn(async move { 167 | let file = File::from_std(cloned_file); 168 | let data = [b'0' + i as u8; BUF_SIZE]; 169 | buf.put_slice(&data); 170 | let (res, buf) = file.write_fixed_all_at(buf, BUF_SIZE as u64 * i).await; 171 | res.unwrap(); 172 | println!("[worker {}]: dropping buffer {}", i, buf.buf_index()); 173 | }); 174 | 175 | join_handles.push(handle); 176 | } 177 | for (i, handle) in join_handles.into_iter().enumerate() { 178 | handle 179 | .await 180 | .unwrap_or_else(|e| panic!("worker {} terminated abnormally: {}", i, e)); 181 | } 182 | 183 | mem::drop(file); 184 | let mut content = String::new(); 185 | tempfile.read_to_string(&mut content).unwrap(); 186 | println!("{}", content); 187 | }) 188 | } 189 | 190 | fn tempfile() -> NamedTempFile { 191 | NamedTempFile::new().unwrap() 192 | } 193 | -------------------------------------------------------------------------------- /tests/fs_directory.rs: -------------------------------------------------------------------------------- 1 | #[path = "../src/future.rs"] 2 | #[allow(warnings)] 3 | mod future; 4 | 5 | use tokio_test::assert_ok; 6 | use tokio_uring::fs; 7 | 8 | use tempfile::tempdir; 9 | 10 | #[test] 11 | fn basic_create_dir() { 12 | tokio_uring::start(async { 13 | let base_dir = tempdir().unwrap(); 14 | let new_dir = base_dir.path().join("foo"); 15 | let new_dir_2 = new_dir.clone(); 16 | 17 | assert_ok!(fs::create_dir(new_dir).await); 18 | 19 | assert!(new_dir_2.is_dir()); 20 | }); 21 | } 22 | 23 | #[test] 24 | fn basic_remove_dir() { 25 | tokio_uring::start(async { 26 | let temp_dir = tempfile::TempDir::new().unwrap(); 27 | tokio_uring::fs::remove_dir(temp_dir.path()).await.unwrap(); 28 | assert!(std::fs::metadata(temp_dir.path()).is_err()); 29 | }); 30 | } 31 | -------------------------------------------------------------------------------- /tests/fs_symlink.rs: -------------------------------------------------------------------------------- 1 | #[path = "../src/future.rs"] 2 | #[allow(warnings)] 3 | mod future; 4 | 5 | use std::io::Write; 6 | use tokio_test::assert_ok; 7 | use tokio_uring::fs; 8 | 9 | use tempfile::tempdir; 10 | use tempfile::NamedTempFile; 11 | 12 | const TEST_PAYLOAD: &[u8] = b"I am data in the source file"; 13 | 14 | #[test] 15 | fn test_create_symlink() { 16 | tokio_uring::start(async { 17 | let mut src_file = NamedTempFile::new().unwrap(); 18 | src_file.write_all(TEST_PAYLOAD).unwrap(); 19 | 20 | let dst_enclosing_dir = tempdir().unwrap(); 21 | 22 | assert_ok!(fs::symlink(src_file.path(), dst_enclosing_dir.path().join("abc")).await); 23 | 24 | let content = std::fs::read(dst_enclosing_dir.path().join("abc")).unwrap(); 25 | 26 | assert_eq!(content, TEST_PAYLOAD); 27 | }); 28 | } 29 | -------------------------------------------------------------------------------- /tests/runtime.rs: -------------------------------------------------------------------------------- 1 | use tokio::net::{TcpListener, TcpStream}; 2 | 3 | #[test] 4 | fn use_tokio_types_from_runtime() { 5 | tokio_uring::start(async { 6 | let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); 7 | let addr = listener.local_addr().unwrap(); 8 | 9 | let task = tokio::spawn(async move { 10 | let _socket = TcpStream::connect(addr).await.unwrap(); 11 | }); 12 | 13 | // Accept a connection 14 | let (_socket, _) = listener.accept().await.unwrap(); 15 | 16 | // Wait for the task to complete 17 | task.await.unwrap(); 18 | }); 19 | } 20 | 21 | #[test] 22 | fn spawn_a_task() { 23 | use std::cell::RefCell; 24 | use std::rc::Rc; 25 | 26 | tokio_uring::start(async { 27 | let cell = Rc::new(RefCell::new(1)); 28 | let c = cell.clone(); 29 | let handle = tokio_uring::spawn(async move { 30 | *c.borrow_mut() = 2; 31 | }); 32 | 33 | handle.await.unwrap(); 34 | assert_eq!(2, *cell.borrow()); 35 | }); 36 | } 37 | --------------------------------------------------------------------------------