├── .github └── workflows │ ├── ci.yml │ └── test.yml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── build.rs ├── ci ├── .gitignore ├── mkosi.build ├── mkosi.conf ├── mkosi.default.d │ ├── 10-ubdsrv.conf │ └── fedora │ │ └── 10-fedora.conf └── mkosi.extra │ └── etc │ └── modules-load.d │ └── ublk-drv.conf ├── examples ├── loop.rs ├── null.rs └── ramdisk.rs ├── src ├── ctrl.rs ├── helpers.rs ├── io.rs ├── lib.rs ├── sys.rs └── uring_async.rs ├── tests └── basic.rs ├── ublk_cmd.h └── utils ├── ublk_chown.sh └── ublk_user_id_rs.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Cargo Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | build: 12 | name: Rust project - latest 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | toolchain: 17 | - stable 18 | - nightly 19 | target: 20 | - x86_64-unknown-linux-gnu 21 | - i686-unknown-linux-gnu 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Install dependencies 25 | if: matrix.target == 'i686-unknown-linux-gnu' 26 | run: | 27 | sudo apt-get update 28 | sudo apt-get install -y gcc-multilib 29 | - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} && rustup target add ${{ matrix.target }} 30 | - run: cargo build --verbose --target ${{ matrix.target }} 31 | - run: cargo build --features=fat_complete --verbose --target ${{ matrix.target }} 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Libublk Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | pull_request: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | 11 | jobs: 12 | #build one Fedora image and run libublk-rs test 13 | test: 14 | runs-on: ubuntu-latest 15 | timeout-minutes: 20 16 | steps: 17 | - name: "install: mkosi + dependencies" 18 | shell: bash 19 | run: | 20 | sudo apt update -o Acquire::Retries=3 21 | sudo apt install -y dnf rpm systemd-container qemu-system-x86 ovmf e2fsprogs btrfs-progs 22 | rustup update stable && rustup default stable 23 | wget https://github.com/systemd/mkosi/archive/refs/tags/v14.tar.gz 24 | tar zxf v14.tar.gz 25 | cd mkosi-14 26 | sed -i '/gpgurl\ = \urllib.parse.urljoin/c \ gpgurl\ =\ \"https://fedoraproject.org/fedora.gpg\"' ./mkosi/__init__.py 27 | sed -i 's/gpgcheck = True/gpgcheck = False/g' ./mkosi/__init__.py 28 | python3 -m pip install --user ./ 29 | 30 | # Required for ssh'ing into VM 31 | - name: "setup: environment" 32 | run: | 33 | sudo systemctl enable --now systemd-networkd 34 | 35 | - name: "cache: os packages" 36 | uses: actions/cache@v3 37 | with: 38 | path: ~/mkosi.cache 39 | key: fedora-cache-v1 40 | 41 | - name: "acquire: libublk" 42 | uses: actions/checkout@v3 43 | 44 | - name: "build: fedora image" 45 | working-directory: ci 46 | run: | 47 | [ -d ~/mkosi.cache ] && ln -s mkosi.cache ~/mkosi.cache 48 | sudo $(which mkosi) build 49 | if [ ! -d ~/mkosi.cache ]; then cp -fr ./mkosi.cache ~/; fi 50 | 51 | - name: "start: boot fedora in qemu" 52 | working-directory: ci 53 | run: | 54 | RUNNER_TRACKING_ID="" && sudo $(which mkosi) qemu -serial none -monitor none -display none -device virtio-net-pci,netdev=network0 -netdev user,id=network0,hostfwd=tcp:127.0.0.1:5555-:22 | tee ${{ github.workspace }}/qemu.log & 55 | 56 | - name: "connect: check ssh connection" 57 | shell: bash 58 | timeout-minutes: 10 59 | working-directory: ci 60 | run: | 61 | until mkosi ssh uname -a; do 62 | echo "Retrying..." 63 | sleep 0.25 64 | done 65 | 66 | - name: "test: run libublk test" 67 | working-directory: ci 68 | run: | 69 | #sudo iptables --flush 70 | #sudo lscpu 71 | #sudo free -h 72 | #mkosi ssh lscpu 73 | #mkosi ssh free -h 74 | #mkosi ssh ifconfig -a 75 | mkosi ssh ls -l /usr/share/libublk-rs/target/debug/deps/libublk* 76 | mkosi ssh ls -l /usr/share/libublk-rs/target/debug/deps/basic* 77 | mkosi ssh ls -l /usr/share/libublk-rs/target/debug/ 78 | mkosi ssh /usr/share/libublk-rs/target/debug/test-libublk --nocapture 79 | mkosi ssh /usr/share/libublk-rs/target/debug/test-basic --nocapture 80 | mkosi ssh truncate -s 128M /tmp/test.img 81 | mkosi ssh /usr/share/libublk-rs/target/debug/examples/loop add --foreground --oneshot -f /tmp/test.img -a 82 | mkosi ssh /usr/share/libublk-rs/target/debug/examples/null add --foreground --oneshot -a 83 | mkosi ssh /usr/share/libublk-rs/target/debug/examples/null add --foreground --oneshot -a -u 84 | 85 | - name: "cleanup" 86 | if: always() 87 | continue-on-error: true 88 | run: | 89 | cat ${{ github.workspace }}/qemu.log 90 | sudo pkill -f qemu 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "libublk" 3 | version = "0.4.0" 4 | edition = "2021" 5 | description = "Library for building linux block device in userspace" 6 | authors = ["Ming Lei "] 7 | readme = "README.md" 8 | repository = "https://github.com/ublk-org/libublk-rs" 9 | license = "MIT OR Apache-2.0" 10 | keywords = ["ublk", "io_uring", "userspace", "block", "storage"] 11 | categories = [ "asynchronous", "filesystem" ] 12 | 13 | publish = true 14 | 15 | [badges] 16 | maintenance = { status = "actively-developed" } 17 | 18 | [features] 19 | fat_complete = [] 20 | 21 | [[bin]] 22 | name = "ublk_user_id" 23 | path = "utils/ublk_user_id_rs.rs" 24 | 25 | [package.metadata] 26 | scripts = ["utils/ublk_chown.sh"] 27 | 28 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 29 | 30 | [build-dependencies] 31 | pkg-config = "0.3" 32 | bindgen = "0.69" 33 | regex = "1.8.3" 34 | anyhow = {version = "1.0.66", features = ["default"]} 35 | 36 | [dependencies] 37 | libc = "0.2" 38 | io-uring = {version="0.7.2", features = ["bindgen", "overwrite"]} 39 | serde = {version = "1.0.99", features = ["derive"]} 40 | serde_json = "1.0.79" 41 | bitmaps = "3.2.0" 42 | log = {version = "0.4", features = ["release_max_level_off"]} 43 | thiserror = "1.0.43" 44 | futures = "0.3.31" 45 | env_logger = "0.9" 46 | smol = "2.0.2" 47 | slab = "0.4.9" 48 | derive_setters = "0.1" 49 | bitflags = "2.4.1" 50 | 51 | [dev-dependencies] 52 | block-utils = "0.11.0" 53 | tempfile = "3.6.0" 54 | regex = "1.8.4" 55 | anyhow = {version = "1.0.66", features = ["default"]} 56 | clap = "4.3" 57 | nix = "0.26.2" 58 | ilog = "1.0.1" 59 | async-std = {version = "1.12.0"} 60 | ctrlc = "3.4.0" 61 | daemonize = "0.5" 62 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2023 Ming Lei 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright 2023 Ming Lei 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Libublk 2 | 3 | [![license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/ming1/libublk-rs/blob/master/LICENSE-MIT) 4 | [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/ming1/libublk-rs/blob/master/LICENSE-APACHE) 5 | 6 | Rust library for building linux ublk target device, which talks with 7 | linux `ublk driver`[^1] for exposing standard linux block device, 8 | meantime all target IO logic is implemented in userspace. 9 | 10 | Linux kernel 6.0 starts to support ublk covered by config option of 11 | CONFIG_BLK_DEV_UBLK. 12 | 13 | ## Documentations 14 | 15 | [ublk doc 16 | links](https://github.com/ming1/ubdsrv/blob/master/doc/external_links.rst) 17 | 18 | [ublk 19 | introduction](https://github.com/ming1/ubdsrv/blob/master/doc/ublk_intro.pdf) 20 | 21 | ## Quick Start 22 | 23 | Follows one 2-queue ublk-null target which is built over libublk, ublk block 24 | device(/dev/ublkbN) is created after the code is run. And the device will be 25 | deleted after terminating this process by ctrl+C. 26 | 27 | ``` rust 28 | use libublk::{ctrl::UblkCtrlBuilder, io::UblkDev, io::UblkQueue}; 29 | 30 | // async/.await IO handling 31 | async fn handle_io_cmd(q: &UblkQueue<'_>, tag: u16) -> i32 { 32 | (q.get_iod(tag).nr_sectors << 9) as i32 33 | } 34 | 35 | // implement whole ublk IO level protocol 36 | async fn io_task(q: &UblkQueue<'_>, tag: u16) { 37 | // IO buffer for exchange data with /dev/ublkbN 38 | let buf_bytes = q.dev.dev_info.max_io_buf_bytes as usize; 39 | let buf = libublk::helpers::IoBuf::::new(buf_bytes); 40 | let mut cmd_op = libublk::sys::UBLK_U_IO_FETCH_REQ; 41 | let mut res = 0; 42 | 43 | // Register IO buffer, so that buffer pages can be discarded 44 | // when queue becomes idle 45 | q.register_io_buf(tag, &buf); 46 | loop { 47 | // Complete previous command with result and re-submit 48 | // IO command for fetching new IO request from /dev/ublkbN 49 | res = q.submit_io_cmd(tag, cmd_op, buf.as_mut_ptr(), res).await; 50 | if res == libublk::sys::UBLK_IO_RES_ABORT { 51 | break; 52 | } 53 | 54 | // Handle this incoming IO command 55 | res = handle_io_cmd(&q, tag).await; 56 | cmd_op = libublk::sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ; 57 | } 58 | } 59 | 60 | fn q_fn(qid: u16, dev: &UblkDev) { 61 | let q_rc = std::rc::Rc::new(UblkQueue::new(qid as u16, &dev).unwrap()); 62 | let exe = smol::LocalExecutor::new(); 63 | let mut f_vec = Vec::new(); 64 | 65 | for tag in 0..dev.dev_info.queue_depth { 66 | let q = q_rc.clone(); 67 | 68 | f_vec.push(exe.spawn(async move { io_task(&q, tag).await })); 69 | } 70 | 71 | // Drive smol executor, won't exit until queue is dead 72 | libublk::uring_async::ublk_wait_and_handle_ios(&exe, &q_rc); 73 | smol::block_on(async { futures::future::join_all(f_vec).await }); 74 | } 75 | 76 | fn main() { 77 | // Create ublk device 78 | let ctrl = std::sync::Arc::new( 79 | UblkCtrlBuilder::default() 80 | .name("async_null") 81 | .nr_queues(2) 82 | .dev_flags(libublk::UblkFlags::UBLK_DEV_F_ADD_DEV) 83 | .build() 84 | .unwrap(), 85 | ); 86 | // Kill ublk device by handling "Ctrl + C" 87 | let ctrl_sig = ctrl.clone(); 88 | let _ = ctrlc::set_handler(move || { 89 | ctrl_sig.kill_dev().unwrap(); 90 | }); 91 | 92 | // Now start this ublk target 93 | ctrl.run_target( 94 | // target initialization 95 | |dev| { 96 | dev.set_default_params(250_u64 << 30); 97 | Ok(()) 98 | }, 99 | // queue IO logic 100 | |tag, dev| q_fn(tag, dev), 101 | // dump device after it is started 102 | |dev| dev.dump(), 103 | ) 104 | .unwrap(); 105 | 106 | // Usually device is deleted automatically when `ctrl` drops, but 107 | // here `ctrl` is leaked by the global sig handler closure actually, 108 | // so we have to delete it explicitly 109 | ctrl.del_dev().unwrap(); 110 | } 111 | ``` 112 | 113 | * [`examples/loop.rs`](examples/loop.rs): real example using 114 | async/await & io_uring. 115 | 116 | * [`examples/ramdisk.rs`](examples/ramdisk.rs): single thread & 117 | async/.await for both ctrl and IO, this technique will be extended to 118 | create multiple devices from single thread in future 119 | 120 | `rublk`[^4] is based on libublk, and supports null, loop, zoned & qcow2 targets so 121 | far. 122 | 123 | ## unprivileged ublk support 124 | 125 | In unprivileged mode(`UBLK_F_UNPRIVILEGED_DEV`), ublk device can be created 126 | in non-admin user session. For supporting this feature: 127 | 128 | - install udev rules 129 | 130 | ``` 131 | KERNEL=="ublk-control", MODE="0666", OPTIONS+="static_node=ublk-control" 132 | ACTION=="add",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown.sh %k 'add' '%M' '%m'" 133 | ACTION=="remove",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown.sh %k 'remove' '%M' '%m'" 134 | ``` 135 | 136 | - install utility and script 137 | 138 | `utils/ublk_chown.sh` and binary of `utils/ublk_user_id.rs` needs to be 139 | installed under /usr/local/sbin or other directory which has to match 140 | with the udev rules. 141 | 142 | 143 | ## Test 144 | 145 | You can run the test of the library with ```cargo test``` 146 | 147 | ## Performance 148 | 149 | When running fio `t/io_uring /dev/ublkb0`[^2], IOPS is basically same with 150 | running same test over ublk device created by blktests `miniublk`[^3], which 151 | is written by pure C. And the ublk device is null, which has 2 queues, each 152 | queue's depth is 64. 153 | 154 | ## Example 155 | 156 | ### loop 157 | 158 | cargo run \--example loop help 159 | 160 | ### null 161 | 162 | cargo run \--example null help 163 | 164 | ## License 165 | 166 | This project is licensed under either of Apache License, Version 2.0 or 167 | MIT license at your option. 168 | 169 | ## Contributing 170 | 171 | Any kinds of contributions are welcome! 172 | 173 | ## References 174 | 175 | [^1]: 176 | [^2]: 177 | [^3]: 178 | [^4]: 179 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate bindgen; 2 | 3 | #[derive(Debug)] 4 | pub struct Fix753 {} 5 | impl bindgen::callbacks::ParseCallbacks for Fix753 { 6 | fn item_name(&self, original_item_name: &str) -> Option { 7 | Some(original_item_name.trim_start_matches("Fix753_").to_owned()) 8 | } 9 | } 10 | 11 | fn add_serialize(outdir: &std::path::Path) -> anyhow::Result { 12 | use std::fs::File; 13 | use std::io::Write; 14 | 15 | let res = std::fs::read_to_string(outdir.join("ublk_cmd.rs"))?; 16 | let data = format!( 17 | "use serde::{{Serialize, Deserialize}};\n{}", 18 | regex::Regex::new(r"#\s*\[\s*derive\s*\((?P[^)]+)\)\s*\]\s*pub\s*(?Pstruct|enum)")? 19 | .replace_all(&res, "#[derive($d, Serialize, Deserialize)] pub $s") 20 | ); 21 | let mut fd = File::create(outdir.join("ublk_cmd.rs"))?; 22 | fd.write_all(data.as_bytes())?; 23 | 24 | Ok(0) 25 | } 26 | 27 | fn main() { 28 | use std::env; 29 | use std::path::PathBuf; 30 | 31 | const INCLUDE: &str = r#" 32 | #include 33 | #include 34 | #include 35 | #include "ublk_cmd.h" 36 | 37 | #ifdef UBLK_F_CMD_IOCTL_ENCODE 38 | #define MARK_FIX_753(req_name) const unsigned int Fix753_##req_name = req_name; 39 | #else 40 | #define MARK_FIX_753(req_name) 41 | #endif 42 | MARK_FIX_753(UBLK_U_CMD_GET_QUEUE_AFFINITY); 43 | MARK_FIX_753(UBLK_U_CMD_GET_DEV_INFO); 44 | MARK_FIX_753(UBLK_U_CMD_ADD_DEV); 45 | MARK_FIX_753(UBLK_U_CMD_DEL_DEV); 46 | MARK_FIX_753(UBLK_U_CMD_START_DEV); 47 | MARK_FIX_753(UBLK_U_CMD_STOP_DEV); 48 | MARK_FIX_753(UBLK_U_CMD_SET_PARAMS); 49 | MARK_FIX_753(UBLK_U_CMD_GET_PARAMS); 50 | MARK_FIX_753(UBLK_U_CMD_START_USER_RECOVERY); 51 | MARK_FIX_753(UBLK_U_CMD_END_USER_RECOVERY); 52 | MARK_FIX_753(UBLK_U_CMD_GET_DEV_INFO2); 53 | MARK_FIX_753(UBLK_U_CMD_GET_FEATURES); 54 | MARK_FIX_753(UBLK_U_IO_FETCH_REQ); 55 | MARK_FIX_753(UBLK_U_IO_COMMIT_AND_FETCH_REQ); 56 | MARK_FIX_753(UBLK_U_IO_NEED_GET_DATA); 57 | MARK_FIX_753(UBLK_U_CMD_DEL_DEV_ASYNC); 58 | const int Fix753_UBLK_IO_RES_ABORT = UBLK_IO_RES_ABORT; 59 | "#; 60 | 61 | let outdir = PathBuf::from(env::var("OUT_DIR").unwrap()); 62 | let mut builder = bindgen::Builder::default(); 63 | builder = builder.header_contents("include-file.h", INCLUDE); 64 | 65 | builder 66 | .ctypes_prefix("libc") 67 | .prepend_enum_name(false) 68 | .derive_default(true) 69 | .generate_comments(true) 70 | .use_core() 71 | .allowlist_var("UBLKSRV_.*|UBLK_.*|UBLK_U_.*|Fix753_.*") 72 | .allowlist_type("ublksrv_.*|ublk_.*") 73 | .allowlist_var("BLK_ZONE_.*") 74 | .allowlist_type("blk_zone_.*") 75 | .parse_callbacks(Box::new(Fix753 {})) 76 | .generate() 77 | .unwrap() 78 | .write_to_file(outdir.join("ublk_cmd.rs")) 79 | .unwrap(); 80 | 81 | if let Err(error) = add_serialize(&outdir) { 82 | eprintln!("Error: {}", error) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /ci/.gitignore: -------------------------------------------------------------------------------- 1 | mkosi.builddir/ 2 | mkosi.output/ 3 | mkosi.cache/ 4 | -------------------------------------------------------------------------------- /ci/mkosi.build: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # 4 | # This is a build script file for OS image generation using mkosi (https://github.com/systemd/mkosi). 5 | # It is invoked in a build environment, with the following set well-known variables: 6 | # 7 | # $SRCDIR 8 | # $DESTDIR 9 | # $BUILDDIR 10 | # 11 | # 12 | 13 | install_rust_1_81_to_path() { 14 | local target_dir="$1" 15 | 16 | if [[ -z "$target_dir" ]]; then 17 | echo "Usage: install_rust_1_81_to_path " 18 | return 1 19 | fi 20 | 21 | # Create the target directory if it doesn't exist 22 | mkdir -p "$target_dir" 23 | 24 | # Set environment variables for custom installation paths 25 | export CARGO_HOME="$target_dir" 26 | export RUSTUP_HOME="$target_dir" 27 | 28 | # Remove any system-installed Rust to avoid conflicts 29 | dnf remove -y rust 30 | 31 | # Install rustup in non-interactive mode 32 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 33 | 34 | # Add the custom CARGO_HOME bin directory to PATH for this session 35 | export PATH="$CARGO_HOME/bin:$PATH" 36 | 37 | # Install Rust 1.81 38 | rustup install 1.81.0 39 | 40 | # Set Rust 1.81 as the default version globally (in the custom path) 41 | rustup default 1.81.0 42 | 43 | # Verify the installation 44 | if "$CARGO_HOME/bin/rustc" --version | grep -q "1.81.0"; then 45 | echo "Rust 1.81.0 has been installed successfully to $target_dir!" 46 | else 47 | echo "Rust installation failed." 48 | fi 49 | } 50 | 51 | create_libublk() 52 | { 53 | local src=$1 54 | local rs_dir=$2 55 | 56 | shift 2 57 | local para=$@ 58 | 59 | cp -fr $src $rs_dir 60 | 61 | cd $rs_dir 62 | cargo test $para -v --no-run 63 | cargo run --example loop -- help 64 | cargo run --example null -- help 65 | cd - 66 | 67 | TA=`find $rs_dir/target/debug/deps/ -type f -executable -name "libublk*"` 68 | TB=`find $rs_dir/target/debug/deps/ -type f -executable -name "basic*"` 69 | ln $TA $rs_dir/target/debug/test-libublk 70 | ln $TB $rs_dir/target/debug/test-basic 71 | ls -l $rs_dir/ci/ 72 | } 73 | 74 | set -eo pipefail 75 | 76 | mkdir -p $DESTDIR/root/ 77 | echo "systemctl restart systemd-networkd" >> $DESTDIR/root/.bash_profile 78 | echo "export PATH=/root/bin:\$PATH" >> $DESTDIR/root/.bash_profile 79 | chmod +x $DESTDIR/root/.bash_profile 80 | 81 | install_rust_1_81_to_path $DESTDIR/root/ 82 | 83 | # Copy tests into the dest 84 | mkdir -p $DESTDIR/usr/share 85 | 86 | create_libublk $SRCDIR $DESTDIR/usr/share/libublk-rs 87 | #create_libublk $SRCDIR $DESTDIR/usr/share/libublk-rs2 --features=fat_complete 88 | -------------------------------------------------------------------------------- /ci/mkosi.conf: -------------------------------------------------------------------------------- 1 | [Distribution] 2 | Distribution=fedora 3 | -------------------------------------------------------------------------------- /ci/mkosi.default.d/10-ubdsrv.conf: -------------------------------------------------------------------------------- 1 | # This is a settings file for OS image generation using mkosi (https://github.com/systemd/mkosi). 2 | 3 | [Output] 4 | Format=gpt_btrfs 5 | Bootable=yes 6 | HostonlyInitrd=yes 7 | OutputDirectory=mkosi.output 8 | QCow2=yes 9 | 10 | [Content] 11 | BuildDirectory=mkosi.builddir 12 | BuildSources=.. 13 | Cache=mkosi.cache 14 | SourceFileTransfer=copy-git-cached 15 | WithNetwork=yes 16 | 17 | [Host] 18 | QemuHeadless=yes 19 | Netdev=yes 20 | Ssh=yes 21 | SshTimeout=300 22 | QemuSmp=1 23 | QemuMem=4G 24 | QemuKvm=no 25 | 26 | [Validation] 27 | Password= 28 | Autologin=yes 29 | 30 | [Partitions] 31 | RootSize=6G 32 | -------------------------------------------------------------------------------- /ci/mkosi.default.d/fedora/10-fedora.conf: -------------------------------------------------------------------------------- 1 | # This is a settings file for OS image generation using mkosi (https://github.com/systemd/mkosi). 2 | 3 | [Distribution] 4 | Distribution=fedora 5 | Release=37 6 | 7 | [Content] 8 | BuildPackages= 9 | diffutils 10 | git 11 | libtool 12 | make 13 | Packages= 14 | fio 15 | fio-engine-libaio 16 | util-linux 17 | which 18 | clang 19 | clang-devel 20 | net-tools 21 | systemd-resolved 22 | dhcp-client 23 | libudev-devel 24 | e2fsprogs 25 | -------------------------------------------------------------------------------- /ci/mkosi.extra/etc/modules-load.d/ublk-drv.conf: -------------------------------------------------------------------------------- 1 | ublk_drv 2 | ext4 3 | -------------------------------------------------------------------------------- /examples/loop.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use bitflags::bitflags; 3 | use clap::{Arg, ArgAction, Command}; 4 | use ilog::IntLog; 5 | use io_uring::{opcode, squeue, types}; 6 | use libublk::helpers::IoBuf; 7 | use libublk::io::{UblkDev, UblkIOCtx, UblkQueue}; 8 | use libublk::uring_async::ublk_wait_and_handle_ios; 9 | use libublk::{ctrl::UblkCtrl, sys, UblkError, UblkFlags, UblkIORes}; 10 | use serde::Serialize; 11 | use std::os::unix::fs::FileTypeExt; 12 | use std::os::unix::io::AsRawFd; 13 | use std::rc::Rc; 14 | 15 | #[derive(Debug, Serialize)] 16 | struct LoJson { 17 | back_file_path: String, 18 | direct_io: i32, 19 | } 20 | 21 | struct LoopTgt { 22 | back_file_path: String, 23 | back_file: std::fs::File, 24 | direct_io: i32, 25 | } 26 | 27 | bitflags! { 28 | #[derive(Default)] 29 | struct LoFlags: u32 { 30 | const ASYNC = 0b00000001; 31 | const FOREGROUND = 0b00000010; 32 | const ONESHOT = 0b00001000; 33 | } 34 | } 35 | 36 | // Generate ioctl function 37 | const BLK_IOCTL_TYPE: u8 = 0x12; // Defined in linux/fs.h 38 | const BLKGETSIZE64_NR: u8 = 114; 39 | const BLKSSZGET_NR: u8 = 104; 40 | const BLKPBSZGET_NR: u8 = 123; 41 | 42 | nix::ioctl_read!(ioctl_blkgetsize64, BLK_IOCTL_TYPE, BLKGETSIZE64_NR, u64); 43 | nix::ioctl_read_bad!( 44 | ioctl_blksszget, 45 | nix::request_code_none!(BLK_IOCTL_TYPE, BLKSSZGET_NR), 46 | i32 47 | ); 48 | nix::ioctl_read_bad!( 49 | ioctl_blkpbszget, 50 | nix::request_code_none!(BLK_IOCTL_TYPE, BLKPBSZGET_NR), 51 | u32 52 | ); 53 | fn lo_file_size(f: &std::fs::File) -> Result<(u64, u8, u8)> { 54 | if let Ok(meta) = f.metadata() { 55 | if meta.file_type().is_block_device() { 56 | let fd = f.as_raw_fd(); 57 | let mut cap = 0_u64; 58 | let mut ssz = 0_i32; 59 | let mut pbsz = 0_u32; 60 | 61 | unsafe { 62 | let cap_ptr = &mut cap as *mut u64; 63 | let ssz_ptr = &mut ssz as *mut i32; 64 | let pbsz_ptr = &mut pbsz as *mut u32; 65 | 66 | ioctl_blkgetsize64(fd, cap_ptr).unwrap(); 67 | ioctl_blksszget(fd, ssz_ptr).unwrap(); 68 | ioctl_blkpbszget(fd, pbsz_ptr).unwrap(); 69 | } 70 | 71 | Ok((cap, ssz.log2() as u8, pbsz.log2() as u8)) 72 | } else if meta.file_type().is_file() { 73 | Ok((f.metadata().unwrap().len(), 9, 12)) 74 | } else { 75 | Err(anyhow::anyhow!("unsupported file")) 76 | } 77 | } else { 78 | Err(anyhow::anyhow!("no file meta got")) 79 | } 80 | } 81 | 82 | // setup loop target 83 | fn lo_init_tgt(dev: &mut UblkDev, lo: &LoopTgt) -> Result<(), UblkError> { 84 | log::info!("loop: init_tgt {}", dev.dev_info.dev_id); 85 | if lo.direct_io != 0 { 86 | unsafe { 87 | libc::fcntl(lo.back_file.as_raw_fd(), libc::F_SETFL, libc::O_DIRECT); 88 | } 89 | } 90 | 91 | let tgt = &mut dev.tgt; 92 | let nr_fds = tgt.nr_fds; 93 | tgt.fds[nr_fds as usize] = lo.back_file.as_raw_fd(); 94 | tgt.nr_fds = nr_fds + 1; 95 | 96 | let sz = { lo_file_size(&lo.back_file).unwrap() }; 97 | tgt.dev_size = sz.0; 98 | //todo: figure out correct block size 99 | tgt.params = libublk::sys::ublk_params { 100 | types: libublk::sys::UBLK_PARAM_TYPE_BASIC, 101 | basic: libublk::sys::ublk_param_basic { 102 | logical_bs_shift: sz.1, 103 | physical_bs_shift: sz.2, 104 | io_opt_shift: 12, 105 | io_min_shift: 9, 106 | max_sectors: dev.dev_info.max_io_buf_bytes >> 9, 107 | dev_sectors: tgt.dev_size >> 9, 108 | ..Default::default() 109 | }, 110 | ..Default::default() 111 | }; 112 | let val = serde_json::json!({"loop": LoJson { back_file_path: lo.back_file_path.clone(), direct_io: 1 } }); 113 | dev.set_target_json(val); 114 | 115 | Ok(()) 116 | } 117 | 118 | #[inline] 119 | fn __lo_prep_submit_io_cmd(iod: &libublk::sys::ublksrv_io_desc) -> i32 { 120 | let op = iod.op_flags & 0xff; 121 | 122 | match op { 123 | libublk::sys::UBLK_IO_OP_FLUSH 124 | | libublk::sys::UBLK_IO_OP_READ 125 | | libublk::sys::UBLK_IO_OP_WRITE => return 0, 126 | _ => return -libc::EINVAL, 127 | }; 128 | } 129 | 130 | #[inline] 131 | fn __lo_make_io_sqe(op: u32, off: u64, bytes: u32, buf_addr: *mut u8) -> io_uring::squeue::Entry { 132 | match op { 133 | libublk::sys::UBLK_IO_OP_FLUSH => opcode::SyncFileRange::new(types::Fixed(1), bytes) 134 | .offset(off) 135 | .build() 136 | .flags(squeue::Flags::FIXED_FILE), 137 | libublk::sys::UBLK_IO_OP_READ => opcode::Read::new(types::Fixed(1), buf_addr, bytes) 138 | .offset(off) 139 | .build() 140 | .flags(squeue::Flags::FIXED_FILE), 141 | libublk::sys::UBLK_IO_OP_WRITE => opcode::Write::new(types::Fixed(1), buf_addr, bytes) 142 | .offset(off) 143 | .build() 144 | .flags(squeue::Flags::FIXED_FILE), 145 | _ => panic!(), 146 | } 147 | } 148 | 149 | async fn lo_handle_io_cmd_async(q: &UblkQueue<'_>, tag: u16, buf_addr: *mut u8) -> i32 { 150 | let iod = q.get_iod(tag); 151 | let res = __lo_prep_submit_io_cmd(iod); 152 | if res < 0 { 153 | return res; 154 | } 155 | 156 | for _ in 0..4 { 157 | let op = iod.op_flags & 0xff; 158 | // either start to handle or retry 159 | let off = (iod.start_sector << 9) as u64; 160 | let bytes = (iod.nr_sectors << 9) as u32; 161 | 162 | let sqe = __lo_make_io_sqe(op, off, bytes, buf_addr); 163 | let res = q.ublk_submit_sqe(sqe).await; 164 | if res != -(libc::EAGAIN) { 165 | return res; 166 | } 167 | } 168 | 169 | return -libc::EAGAIN; 170 | } 171 | 172 | fn lo_handle_io_cmd_sync(q: &UblkQueue<'_>, tag: u16, i: &UblkIOCtx, buf_addr: *mut u8) { 173 | let iod = q.get_iod(tag); 174 | let op = iod.op_flags & 0xff; 175 | let data = UblkIOCtx::build_user_data(tag as u16, op, 0, true); 176 | if i.is_tgt_io() { 177 | let user_data = i.user_data(); 178 | let res = i.result(); 179 | let cqe_tag = UblkIOCtx::user_data_to_tag(user_data); 180 | 181 | assert!(cqe_tag == tag as u32); 182 | 183 | if res != -(libc::EAGAIN) { 184 | q.complete_io_cmd(tag, buf_addr, Ok(UblkIORes::Result(res))); 185 | return; 186 | } 187 | } 188 | 189 | let res = __lo_prep_submit_io_cmd(iod); 190 | if res < 0 { 191 | q.complete_io_cmd(tag, buf_addr, Ok(UblkIORes::Result(res))); 192 | } else { 193 | let op = iod.op_flags & 0xff; 194 | // either start to handle or retry 195 | let off = (iod.start_sector << 9) as u64; 196 | let bytes = (iod.nr_sectors << 9) as u32; 197 | let sqe = __lo_make_io_sqe(op, off, bytes, buf_addr).user_data(data); 198 | q.ublk_submit_sqe_sync(sqe).unwrap(); 199 | } 200 | } 201 | 202 | fn q_fn(qid: u16, dev: &UblkDev) { 203 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 204 | let bufs = bufs_rc.clone(); 205 | let lo_io_handler = move |q: &UblkQueue, tag: u16, io: &UblkIOCtx| { 206 | let bufs = bufs_rc.clone(); 207 | 208 | lo_handle_io_cmd_sync(q, tag, io, bufs[tag as usize].as_mut_ptr()); 209 | }; 210 | 211 | UblkQueue::new(qid, dev) 212 | .unwrap() 213 | .regiser_io_bufs(Some(&bufs)) 214 | .submit_fetch_commands(Some(&bufs)) 215 | .wait_and_handle_io(lo_io_handler); 216 | } 217 | 218 | fn q_a_fn(qid: u16, dev: &UblkDev, depth: u16) { 219 | let q_rc = Rc::new(UblkQueue::new(qid as u16, &dev).unwrap()); 220 | let exe = smol::LocalExecutor::new(); 221 | let mut f_vec = Vec::new(); 222 | 223 | for tag in 0..depth { 224 | let q = q_rc.clone(); 225 | 226 | f_vec.push(exe.spawn(async move { 227 | let buf = IoBuf::::new(q.dev.dev_info.max_io_buf_bytes as usize); 228 | let buf_addr = buf.as_mut_ptr(); 229 | let mut cmd_op = sys::UBLK_U_IO_FETCH_REQ; 230 | let mut res = 0; 231 | 232 | q.register_io_buf(tag, &buf); 233 | loop { 234 | let cmd_res = q.submit_io_cmd(tag, cmd_op, buf_addr, res).await; 235 | if cmd_res == sys::UBLK_IO_RES_ABORT { 236 | break; 237 | } 238 | 239 | res = lo_handle_io_cmd_async(&q, tag, buf_addr).await; 240 | cmd_op = sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ; 241 | } 242 | })); 243 | } 244 | ublk_wait_and_handle_ios(&exe, &q_rc); 245 | smol::block_on(async { futures::future::join_all(f_vec).await }); 246 | } 247 | 248 | fn __loop_add( 249 | id: i32, 250 | nr_queues: u32, 251 | depth: u16, 252 | buf_sz: u32, 253 | backing_file: &String, 254 | ctrl_flags: u64, 255 | lo_flags: LoFlags, 256 | ) { 257 | let aio = lo_flags.intersects(LoFlags::ASYNC); 258 | let oneshot = lo_flags.intersects(LoFlags::ONESHOT); 259 | // LooTgt has to live in the whole device lifetime 260 | let lo = LoopTgt { 261 | back_file: std::fs::OpenOptions::new() 262 | .read(true) 263 | .write(true) 264 | .open(&backing_file) 265 | .unwrap(), 266 | direct_io: 1, 267 | back_file_path: backing_file.clone(), 268 | }; 269 | let ctrl = libublk::ctrl::UblkCtrlBuilder::default() 270 | .name("example_loop") 271 | .id(id) 272 | .ctrl_flags(ctrl_flags) 273 | .nr_queues(nr_queues.try_into().unwrap()) 274 | .depth(depth) 275 | .io_buf_bytes(buf_sz) 276 | .dev_flags(UblkFlags::UBLK_DEV_F_ADD_DEV) 277 | .build() 278 | .unwrap(); 279 | let tgt_init = |dev: &mut UblkDev| lo_init_tgt(dev, &lo); 280 | let wh = move |d_ctrl: &UblkCtrl| { 281 | d_ctrl.dump(); 282 | if oneshot { 283 | d_ctrl.kill_dev().unwrap(); 284 | } 285 | }; 286 | 287 | if aio { 288 | ctrl.run_target(tgt_init, move |qid, dev: &_| q_a_fn(qid, dev, depth), wh) 289 | .unwrap(); 290 | } else { 291 | ctrl.run_target(tgt_init, move |qid, dev: &_| q_fn(qid, dev), wh) 292 | .unwrap(); 293 | } 294 | } 295 | 296 | fn loop_add( 297 | id: i32, 298 | nr_queues: u32, 299 | depth: u16, 300 | buf_sz: u32, 301 | backing_file: &String, 302 | ctrl_flags: u64, 303 | lo_flags: LoFlags, 304 | ) { 305 | if lo_flags.intersects(LoFlags::FOREGROUND) { 306 | __loop_add( 307 | id, 308 | nr_queues, 309 | depth, 310 | buf_sz, 311 | backing_file, 312 | ctrl_flags, 313 | lo_flags, 314 | ); 315 | } else { 316 | let daemonize = daemonize::Daemonize::new() 317 | .stdout(daemonize::Stdio::keep()) 318 | .stderr(daemonize::Stdio::keep()); 319 | 320 | match daemonize.start() { 321 | Ok(_) => __loop_add( 322 | id, 323 | nr_queues, 324 | depth, 325 | buf_sz, 326 | backing_file, 327 | ctrl_flags, 328 | lo_flags, 329 | ), 330 | Err(_) => panic!(), 331 | } 332 | } 333 | } 334 | 335 | fn main() { 336 | let matches = Command::new("ublk-loop-example") 337 | .subcommand_required(true) 338 | .arg_required_else_help(true) 339 | .subcommand( 340 | Command::new("add") 341 | .about("Add ublk device") 342 | .arg( 343 | Arg::new("number") 344 | .short('n') 345 | .long("number") 346 | .default_value("-1") 347 | .allow_hyphen_values(true) 348 | .help("device id, -1: auto-allocation") 349 | .action(ArgAction::Set), 350 | ) 351 | .arg( 352 | Arg::new("queues") 353 | .long("queues") 354 | .short('q') 355 | .default_value("1") 356 | .help("nr_hw_queues") 357 | .action(ArgAction::Set), 358 | ) 359 | .arg( 360 | Arg::new("depth") 361 | .long("depth") 362 | .short('d') 363 | .default_value("64") 364 | .help("queue depth: max in-flight io commands") 365 | .action(ArgAction::Set), 366 | ) 367 | .arg( 368 | Arg::new("buf_size") 369 | .long("buf_size") 370 | .short('b') 371 | .default_value("524288") 372 | .help("io buffer size") 373 | .action(ArgAction::Set), 374 | ) 375 | .arg( 376 | Arg::new("unprivileged") 377 | .long("unprivileged") 378 | .short('p') 379 | .action(ArgAction::SetTrue) 380 | .help("enable UBLK_F_UN_PRIVILEGED_DEV"), 381 | ) 382 | .arg( 383 | Arg::new("foreground") 384 | .long("foreground") 385 | .action(ArgAction::SetTrue) 386 | .help("run in foreground mode"), 387 | ) 388 | .arg( 389 | Arg::new("backing_file") 390 | .long("backing_file") 391 | .short('f') 392 | .required(true) 393 | .help("backing file") 394 | .action(ArgAction::Set), 395 | ) 396 | .arg( 397 | Arg::new("async") 398 | .long("async") 399 | .short('a') 400 | .action(ArgAction::SetTrue) 401 | .help("use async/await to handle IO command"), 402 | ) 403 | .arg( 404 | Arg::new("oneshot") 405 | .long("oneshot") 406 | .action(ArgAction::SetTrue) 407 | .help("create, dump and remove device automatically"), 408 | ), 409 | ) 410 | .subcommand( 411 | Command::new("del").about("Delete ublk device").arg( 412 | Arg::new("number") 413 | .short('n') 414 | .long("number") 415 | .required(true) 416 | .help("device id") 417 | .action(ArgAction::Set), 418 | ), 419 | ) 420 | .subcommand(Command::new("list").about("List ublk device")) 421 | .get_matches(); 422 | 423 | match matches.subcommand() { 424 | Some(("add", add_matches)) => { 425 | let id = add_matches 426 | .get_one::("number") 427 | .unwrap() 428 | .parse::() 429 | .unwrap_or(-1); 430 | let nr_queues = add_matches 431 | .get_one::("queues") 432 | .unwrap() 433 | .parse::() 434 | .unwrap_or(1); 435 | let depth = add_matches 436 | .get_one::("depth") 437 | .unwrap() 438 | .parse::() 439 | .unwrap_or(64); 440 | let buf_size = add_matches 441 | .get_one::("buf_size") 442 | .unwrap() 443 | .parse::() 444 | .unwrap_or(52288); 445 | let backing_file = add_matches.get_one::("backing_file").unwrap(); 446 | let mut lo_flags: LoFlags = Default::default(); 447 | 448 | if add_matches.get_flag("async") { 449 | lo_flags |= LoFlags::ASYNC; 450 | }; 451 | if add_matches.get_flag("foreground") { 452 | lo_flags |= LoFlags::FOREGROUND; 453 | }; 454 | if add_matches.get_flag("oneshot") { 455 | lo_flags |= LoFlags::ONESHOT; 456 | }; 457 | let ctrl_flags: u64 = if add_matches.get_flag("unprivileged") { 458 | libublk::sys::UBLK_F_UNPRIVILEGED_DEV as u64 459 | } else { 460 | 0 461 | }; 462 | loop_add( 463 | id, 464 | nr_queues, 465 | depth.try_into().unwrap(), 466 | buf_size, 467 | backing_file, 468 | ctrl_flags, 469 | lo_flags, 470 | ); 471 | } 472 | Some(("del", add_matches)) => { 473 | let id = add_matches 474 | .get_one::("number") 475 | .unwrap() 476 | .parse::() 477 | .unwrap_or(-1); 478 | UblkCtrl::new_simple(id).unwrap().del_dev().unwrap(); 479 | } 480 | Some(("list", _add_matches)) => UblkCtrl::for_each_dev_id(|dev_id| { 481 | UblkCtrl::new_simple(dev_id as i32).unwrap().dump(); 482 | }), 483 | _ => { 484 | println!("unsupported command"); 485 | } 486 | }; 487 | } 488 | -------------------------------------------------------------------------------- /examples/null.rs: -------------------------------------------------------------------------------- 1 | use bitflags::bitflags; 2 | use clap::{Arg, ArgAction, Command}; 3 | use libublk::helpers::IoBuf; 4 | use libublk::io::{UblkDev, UblkIOCtx, UblkQueue}; 5 | use libublk::uring_async::ublk_wait_and_handle_ios; 6 | use libublk::{ctrl::UblkCtrl, UblkFlags, UblkIORes}; 7 | use std::rc::Rc; 8 | 9 | bitflags! { 10 | #[derive(Default)] 11 | struct NullFlags: u32 { 12 | const ASYNC = 0b00000001; 13 | const FOREGROUND = 0b00000010; 14 | const ONESHOT = 0b00000100; 15 | } 16 | } 17 | 18 | #[inline] 19 | fn get_io_cmd_result(q: &UblkQueue, tag: u16) -> i32 { 20 | let iod = q.get_iod(tag); 21 | let bytes = (iod.nr_sectors << 9) as i32; 22 | 23 | bytes 24 | } 25 | 26 | #[inline] 27 | fn handle_io_cmd(q: &UblkQueue, tag: u16, buf_addr: *mut u8) { 28 | let bytes = get_io_cmd_result(q, tag); 29 | 30 | q.complete_io_cmd(tag, buf_addr, Ok(UblkIORes::Result(bytes))); 31 | } 32 | 33 | fn q_sync_fn(qid: u16, dev: &UblkDev, user_copy: bool) { 34 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 35 | let bufs = bufs_rc.clone(); 36 | 37 | // logic for io handling 38 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 39 | let buf_addr = if user_copy { 40 | std::ptr::null_mut() 41 | } else { 42 | bufs[tag as usize].as_mut_ptr() 43 | }; 44 | handle_io_cmd(q, tag, buf_addr); 45 | }; 46 | 47 | UblkQueue::new(qid, dev) 48 | .unwrap() 49 | .regiser_io_bufs(if user_copy { None } else { Some(&bufs_rc) }) 50 | .submit_fetch_commands(if user_copy { None } else { Some(&bufs_rc) }) 51 | .wait_and_handle_io(io_handler); 52 | } 53 | 54 | fn q_async_fn(qid: u16, dev: &UblkDev, user_copy: bool) { 55 | let q_rc = Rc::new(UblkQueue::new(qid as u16, &dev).unwrap()); 56 | let exe = smol::LocalExecutor::new(); 57 | let mut f_vec = Vec::new(); 58 | 59 | for tag in 0..dev.dev_info.queue_depth as u16 { 60 | let q = q_rc.clone(); 61 | 62 | f_vec.push(exe.spawn(async move { 63 | let mut cmd_op = libublk::sys::UBLK_U_IO_FETCH_REQ; 64 | let mut res = 0; 65 | let (_buf, buf_addr) = if user_copy { 66 | (None, std::ptr::null_mut()) 67 | } else { 68 | let buf = IoBuf::::new(q.dev.dev_info.max_io_buf_bytes as usize); 69 | 70 | q.register_io_buf(tag, &buf); 71 | let addr = buf.as_mut_ptr(); 72 | (Some(buf), addr) 73 | }; 74 | 75 | loop { 76 | let cmd_res = q.submit_io_cmd(tag, cmd_op, buf_addr, res).await; 77 | if cmd_res == libublk::sys::UBLK_IO_RES_ABORT { 78 | break; 79 | } 80 | 81 | res = get_io_cmd_result(&q, tag); 82 | cmd_op = libublk::sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ; 83 | } 84 | })); 85 | } 86 | ublk_wait_and_handle_ios(&exe, &q_rc); 87 | smol::block_on(async { futures::future::join_all(f_vec).await }); 88 | } 89 | 90 | fn __null_add( 91 | id: i32, 92 | nr_queues: u32, 93 | depth: u32, 94 | ctrl_flags: u64, 95 | buf_size: u32, 96 | flags: NullFlags, 97 | ) { 98 | let aio = flags.intersects(NullFlags::ASYNC); 99 | let oneshot = flags.intersects(NullFlags::ONESHOT); 100 | let ctrl = libublk::ctrl::UblkCtrlBuilder::default() 101 | .name("example_null") 102 | .id(id) 103 | .depth(depth.try_into().unwrap()) 104 | .nr_queues(nr_queues.try_into().unwrap()) 105 | .io_buf_bytes(buf_size) 106 | .ctrl_flags(ctrl_flags) 107 | .dev_flags(UblkFlags::UBLK_DEV_F_ADD_DEV) 108 | .build() 109 | .unwrap(); 110 | let tgt_init = |dev: &mut UblkDev| { 111 | dev.set_default_params(250_u64 << 30); 112 | Ok(()) 113 | }; 114 | let user_copy = (ctrl.dev_info().flags & libublk::sys::UBLK_F_USER_COPY as u64) != 0; 115 | let wh = move |d_ctrl: &UblkCtrl| { 116 | d_ctrl.dump(); 117 | if oneshot { 118 | d_ctrl.kill_dev().unwrap(); 119 | } 120 | }; 121 | 122 | // Now start this ublk target 123 | if aio { 124 | let q_async_handler = move |qid, dev: &_| q_async_fn(qid, dev, user_copy); 125 | ctrl.run_target(tgt_init, q_async_handler, wh).unwrap(); 126 | } else { 127 | let q_sync_handler = move |qid, dev: &_| q_sync_fn(qid, dev, user_copy); 128 | ctrl.run_target(tgt_init, q_sync_handler, wh).unwrap(); 129 | } 130 | } 131 | 132 | fn null_add(id: i32, nr_queues: u32, depth: u32, ctrl_flags: u64, buf_size: u32, flags: NullFlags) { 133 | if flags.intersects(NullFlags::FOREGROUND) { 134 | __null_add(id, nr_queues, depth, ctrl_flags, buf_size, flags); 135 | } else { 136 | let daemonize = daemonize::Daemonize::new() 137 | .stdout(daemonize::Stdio::keep()) 138 | .stderr(daemonize::Stdio::keep()); 139 | 140 | match daemonize.start() { 141 | Ok(_) => __null_add(id, nr_queues, depth, ctrl_flags, buf_size, flags), 142 | _ => panic!(), 143 | } 144 | } 145 | } 146 | 147 | fn main() { 148 | env_logger::builder() 149 | .format_target(false) 150 | .format_timestamp(None) 151 | .init(); 152 | let matches = Command::new("ublk-null-example") 153 | .subcommand_required(true) 154 | .arg_required_else_help(true) 155 | .subcommand( 156 | Command::new("add") 157 | .about("Add ublk device") 158 | .arg( 159 | Arg::new("number") 160 | .short('n') 161 | .long("number") 162 | .default_value("-1") 163 | .allow_hyphen_values(true) 164 | .help("device id, -1: auto-allocation") 165 | .action(ArgAction::Set), 166 | ) 167 | .arg( 168 | Arg::new("queues") 169 | .long("queues") 170 | .short('q') 171 | .default_value("1") 172 | .help("nr_hw_queues") 173 | .action(ArgAction::Set), 174 | ) 175 | .arg( 176 | Arg::new("depth") 177 | .long("depth") 178 | .short('d') 179 | .default_value("64") 180 | .help("queue depth: max in-flight io commands") 181 | .action(ArgAction::Set), 182 | ) 183 | .arg( 184 | Arg::new("buf_size") 185 | .long("buf_size") 186 | .short('b') 187 | .default_value("524288") 188 | .help("io buffer size") 189 | .action(ArgAction::Set), 190 | ) 191 | .arg( 192 | Arg::new("user_copy") 193 | .long("user_copy") 194 | .short('u') 195 | .action(ArgAction::SetTrue) 196 | .help("enable UBLK_F_USER_COPY"), 197 | ) 198 | .arg( 199 | Arg::new("unprivileged") 200 | .long("unprivileged") 201 | .short('p') 202 | .action(ArgAction::SetTrue) 203 | .help("enable UBLK_F_UN_PRIVILEGED_DEV"), 204 | ) 205 | .arg( 206 | Arg::new("foreground") 207 | .long("foreground") 208 | .action(ArgAction::SetTrue) 209 | .help("run in foreground mode"), 210 | ) 211 | .arg( 212 | Arg::new("oneshot") 213 | .long("oneshot") 214 | .action(ArgAction::SetTrue) 215 | .help("create, dump and remove device automatically"), 216 | ) 217 | .arg( 218 | Arg::new("async") 219 | .long("async") 220 | .short('a') 221 | .action(ArgAction::SetTrue) 222 | .help("use async/await to handle IO command"), 223 | ), 224 | ) 225 | .subcommand( 226 | Command::new("del").about("Delete ublk device").arg( 227 | Arg::new("number") 228 | .short('n') 229 | .long("number") 230 | .required(true) 231 | .help("device id") 232 | .action(ArgAction::Set), 233 | ), 234 | ) 235 | .subcommand( 236 | Command::new("list").about("List ublk device").arg( 237 | Arg::new("number") 238 | .short('n') 239 | .long("number") 240 | .default_value("-1") 241 | .help("device id") 242 | .action(ArgAction::Set), 243 | ), 244 | ) 245 | .get_matches(); 246 | 247 | match matches.subcommand() { 248 | Some(("add", add_matches)) => { 249 | let id = add_matches 250 | .get_one::("number") 251 | .unwrap() 252 | .parse::() 253 | .unwrap_or(-1); 254 | let nr_queues = add_matches 255 | .get_one::("queues") 256 | .unwrap() 257 | .parse::() 258 | .unwrap_or(1); 259 | let depth = add_matches 260 | .get_one::("depth") 261 | .unwrap() 262 | .parse::() 263 | .unwrap_or(64); 264 | let buf_size = add_matches 265 | .get_one::("buf_size") 266 | .unwrap() 267 | .parse::() 268 | .unwrap_or(52288); 269 | let mut flags: NullFlags = Default::default(); 270 | 271 | if add_matches.get_flag("async") { 272 | flags |= NullFlags::ASYNC; 273 | }; 274 | if add_matches.get_flag("foreground") { 275 | flags |= NullFlags::FOREGROUND; 276 | }; 277 | if add_matches.get_flag("oneshot") { 278 | flags |= NullFlags::ONESHOT; 279 | }; 280 | let ctrl_flags: u64 = if add_matches.get_flag("user_copy") { 281 | libublk::sys::UBLK_F_USER_COPY as u64 282 | } else { 283 | 0 284 | } | if add_matches.get_flag("unprivileged") { 285 | libublk::sys::UBLK_F_UNPRIVILEGED_DEV as u64 286 | } else { 287 | 0 288 | }; 289 | 290 | null_add(id, nr_queues, depth, ctrl_flags, buf_size, flags); 291 | } 292 | Some(("del", add_matches)) => { 293 | let id = add_matches 294 | .get_one::("number") 295 | .unwrap() 296 | .parse::() 297 | .unwrap_or(-1); 298 | UblkCtrl::new_simple(id).unwrap().del_dev().unwrap(); 299 | } 300 | Some(("list", add_matches)) => { 301 | let dev_id = add_matches 302 | .get_one::("number") 303 | .unwrap() 304 | .parse::() 305 | .unwrap_or(-1); 306 | if dev_id >= 0 { 307 | UblkCtrl::new_simple(dev_id as i32).unwrap().dump(); 308 | } else { 309 | UblkCtrl::for_each_dev_id(|dev_id| { 310 | UblkCtrl::new_simple(dev_id as i32).unwrap().dump(); 311 | }); 312 | } 313 | } 314 | _ => { 315 | println!("unsupported command"); 316 | } 317 | }; 318 | } 319 | -------------------------------------------------------------------------------- /examples/ramdisk.rs: -------------------------------------------------------------------------------- 1 | use libublk::ctrl::UblkCtrl; 2 | ///! # Example of ramdisk 3 | /// 4 | /// Serves for covering recovery test[`test_ublk_ramdisk_recovery`], 5 | /// 6 | /// Build ramdisk target in single-thread conext, and the same technique 7 | /// will be extended to create multiple devices in single thread 8 | /// 9 | use libublk::helpers::IoBuf; 10 | use libublk::io::{UblkDev, UblkQueue}; 11 | use libublk::uring_async::ublk_run_ctrl_task; 12 | use libublk::{UblkError, UblkFlags}; 13 | use std::io::{Error, ErrorKind}; 14 | use std::rc::Rc; 15 | use std::sync::Arc; 16 | 17 | fn handle_io(q: &UblkQueue, tag: u16, buf_addr: *mut u8, start: *mut u8) -> i32 { 18 | let iod = q.get_iod(tag); 19 | let off = (iod.start_sector << 9) as u64; 20 | let bytes = (iod.nr_sectors << 9) as i32; 21 | let op = iod.op_flags & 0xff; 22 | 23 | match op { 24 | libublk::sys::UBLK_IO_OP_READ => unsafe { 25 | libc::memcpy( 26 | buf_addr as *mut libc::c_void, 27 | start.wrapping_add(off.try_into().unwrap()) as *mut libc::c_void, 28 | bytes as usize, 29 | ); 30 | }, 31 | libublk::sys::UBLK_IO_OP_WRITE => unsafe { 32 | libc::memcpy( 33 | start.wrapping_add(off.try_into().unwrap()) as *mut libc::c_void, 34 | buf_addr as *mut libc::c_void, 35 | bytes as usize, 36 | ); 37 | }, 38 | libublk::sys::UBLK_IO_OP_FLUSH => {} 39 | _ => { 40 | return -libc::EINVAL; 41 | } 42 | } 43 | 44 | bytes 45 | } 46 | 47 | async fn io_task(q: &UblkQueue<'_>, tag: u16, dev_buf_addr: *mut u8) { 48 | let buf_size = q.dev.dev_info.max_io_buf_bytes as usize; 49 | let buffer = IoBuf::::new(buf_size); 50 | let addr = buffer.as_mut_ptr(); 51 | let mut cmd_op = libublk::sys::UBLK_U_IO_FETCH_REQ; 52 | let mut res = 0; 53 | 54 | loop { 55 | let cmd_res = q.submit_io_cmd(tag, cmd_op, addr, res).await; 56 | if cmd_res == libublk::sys::UBLK_IO_RES_ABORT { 57 | break; 58 | } 59 | 60 | res = handle_io(&q, tag, addr, dev_buf_addr); 61 | cmd_op = libublk::sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ; 62 | } 63 | } 64 | 65 | /// Start device in async IO task, in which both control and io rings 66 | /// are driven in current context 67 | fn start_dev_fn( 68 | exe: &smol::LocalExecutor, 69 | ctrl_rc: &Rc, 70 | dev_arc: &Arc, 71 | q: &UblkQueue, 72 | ) -> Result { 73 | let ctrl_clone = ctrl_rc.clone(); 74 | let dev_clone = dev_arc.clone(); 75 | 76 | // Start device in one dedicated io task 77 | let task = exe.spawn(async move { 78 | let r = ctrl_clone.configure_queue(&dev_clone, 0, unsafe { libc::gettid() }); 79 | if r.is_err() { 80 | r 81 | } else { 82 | ctrl_clone.start_dev_async(&dev_clone).await 83 | } 84 | }); 85 | ublk_run_ctrl_task(exe, q, &task)?; 86 | smol::block_on(task) 87 | } 88 | 89 | fn write_dev_id(ctrl: &UblkCtrl, efd: i32) -> Result { 90 | // Can't write 0 to eventfd file, otherwise the read() side may 91 | // not be waken up 92 | let dev_id = ctrl.dev_info().dev_id as u64 + 1; 93 | let bytes = dev_id.to_le_bytes(); 94 | 95 | nix::unistd::write(efd, &bytes)?; 96 | Ok(0) 97 | } 98 | 99 | fn read_dev_id(efd: i32) -> Result { 100 | let mut buffer = [0; 8]; 101 | 102 | let bytes_read = nix::unistd::read(efd, &mut buffer)?; 103 | if bytes_read == 0 { 104 | return Err(Error::new(ErrorKind::InvalidInput, "invalid device id")); 105 | } 106 | return Ok((i64::from_le_bytes(buffer) - 1) as i32); 107 | } 108 | 109 | ///run this ramdisk ublk daemon completely in single context with 110 | ///async control command, no need Rust async any more 111 | fn rd_add_dev(dev_id: i32, buf_addr: *mut u8, size: u64, for_add: bool, efd: i32) { 112 | let dev_flags = if for_add { 113 | UblkFlags::UBLK_DEV_F_ADD_DEV 114 | } else { 115 | UblkFlags::UBLK_DEV_F_RECOVER_DEV 116 | }; 117 | let ctrl = Rc::new( 118 | libublk::ctrl::UblkCtrlBuilder::default() 119 | .name("example_ramdisk") 120 | .id(dev_id) 121 | .nr_queues(1_u16) 122 | .depth(128_u16) 123 | .dev_flags(dev_flags) 124 | .ctrl_flags(libublk::sys::UBLK_F_USER_RECOVERY as u64) 125 | .build() 126 | .unwrap(), 127 | ); 128 | 129 | let tgt_init = |dev: &mut UblkDev| { 130 | dev.set_default_params(size); 131 | Ok(()) 132 | }; 133 | let dev_arc = Arc::new(UblkDev::new(ctrl.get_name(), tgt_init, &ctrl).unwrap()); 134 | let dev_clone = dev_arc.clone(); 135 | let q_rc = Rc::new(UblkQueue::new(0, &dev_clone).unwrap()); 136 | let exec = smol::LocalExecutor::new(); 137 | 138 | // spawn async io tasks 139 | let mut f_vec = Vec::new(); 140 | for tag in 0..ctrl.dev_info().queue_depth as u16 { 141 | let q_clone = q_rc.clone(); 142 | 143 | f_vec.push(exec.spawn(async move { 144 | io_task(&q_clone, tag, buf_addr).await; 145 | })); 146 | } 147 | 148 | // start device via async task 149 | let res = start_dev_fn(&exec, &ctrl, &dev_arc, &q_rc); 150 | match res { 151 | Ok(_) => { 152 | write_dev_id(&ctrl, efd).expect("Failed to write dev_id"); 153 | 154 | libublk::uring_async::ublk_wait_and_handle_ios(&exec, &q_rc); 155 | } 156 | _ => eprintln!("device can't be started"), 157 | } 158 | smol::block_on(async { futures::future::join_all(f_vec).await }); 159 | } 160 | 161 | fn rd_get_device_size(ctrl: &UblkCtrl) -> u64 { 162 | if let Ok(tgt) = ctrl.get_target_from_json() { 163 | tgt.dev_size 164 | } else { 165 | 0 166 | } 167 | } 168 | 169 | fn test_add(recover: usize) { 170 | let dev_id: i32 = std::env::args() 171 | .nth(2) 172 | .unwrap_or_else(|| "-1".to_string()) 173 | .parse::() 174 | .unwrap(); 175 | let s = std::env::args().nth(3).unwrap_or_else(|| "32".to_string()); 176 | let mb = s.parse::().unwrap(); 177 | let efd = nix::sys::eventfd::eventfd(0, nix::sys::eventfd::EfdFlags::empty()).unwrap(); 178 | 179 | let daemonize = daemonize::Daemonize::new() 180 | .stdout(daemonize::Stdio::devnull()) 181 | .stderr(daemonize::Stdio::devnull()); 182 | match daemonize.execute() { 183 | daemonize::Outcome::Child(Ok(_)) => { 184 | let mut size = (mb << 20) as u64; 185 | 186 | if recover > 0 { 187 | assert!(dev_id >= 0); 188 | let ctrl = UblkCtrl::new_simple(dev_id).unwrap(); 189 | size = rd_get_device_size(&ctrl); 190 | 191 | ctrl.start_user_recover().unwrap(); 192 | } 193 | 194 | let buf = libublk::helpers::IoBuf::::new(size as usize); 195 | rd_add_dev(dev_id, buf.as_mut_ptr(), size, recover == 0, efd); 196 | } 197 | daemonize::Outcome::Parent(Ok(_)) => match read_dev_id(efd) { 198 | Ok(id) => UblkCtrl::new_simple(id).unwrap().dump(), 199 | _ => eprintln!("Failed to add ublk device"), 200 | }, 201 | _ => panic!(), 202 | } 203 | } 204 | 205 | fn test_del(async_del: bool) { 206 | let s = std::env::args().nth(2).unwrap_or_else(|| "0".to_string()); 207 | let dev_id = s.parse::().unwrap(); 208 | let ctrl = UblkCtrl::new_simple(dev_id as i32).unwrap(); 209 | 210 | if !async_del { 211 | ctrl.del_dev().expect("fail to del_dev_async"); 212 | } else { 213 | ctrl.del_dev_async().expect("fail to del_dev_async"); 214 | } 215 | } 216 | 217 | fn main() { 218 | env_logger::builder() 219 | .format_target(false) 220 | .format_timestamp(None) 221 | .init(); 222 | if let Some(cmd) = std::env::args().nth(1) { 223 | match cmd.as_str() { 224 | "add" => test_add(0), 225 | "recover" => test_add(1), 226 | "del" => test_del(false), 227 | "del_async" => test_del(true), 228 | _ => todo!(), 229 | } 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /src/ctrl.rs: -------------------------------------------------------------------------------- 1 | use super::io::{UblkDev, UblkTgt}; 2 | use super::uring_async::UblkUringOpFuture; 3 | use super::{sys, UblkError, UblkFlags}; 4 | use bitmaps::Bitmap; 5 | use derive_setters::*; 6 | use io_uring::{opcode, squeue, types, IoUring}; 7 | use log::{error, trace}; 8 | use serde::Deserialize; 9 | use std::cell::RefCell; 10 | use std::os::unix::io::AsRawFd; 11 | use std::sync::{Arc, RwLock}; 12 | use std::{ 13 | fs, 14 | io::{Read, Write}, 15 | path::Path, 16 | }; 17 | 18 | const CTRL_PATH: &str = "/dev/ublk-control"; 19 | 20 | const MAX_BUF_SZ: u32 = 32_u32 << 20; 21 | 22 | // per-thread control uring 23 | // 24 | std::thread_local! { 25 | pub(crate) static CTRL_URING: RefCell> = 26 | RefCell::new(IoUring::::builder() 27 | .build(16).unwrap()); 28 | } 29 | 30 | /// Ublk per-queue CPU affinity 31 | /// 32 | /// Responsible for setting ublk queue pthread's affinity. 33 | /// 34 | #[derive(Debug, Default, Copy, Clone)] 35 | pub struct UblkQueueAffinity { 36 | affinity: Bitmap<1024>, 37 | } 38 | 39 | impl UblkQueueAffinity { 40 | pub fn new() -> UblkQueueAffinity { 41 | UblkQueueAffinity { 42 | affinity: Bitmap::new(), 43 | } 44 | } 45 | 46 | pub fn buf_len(&self) -> usize { 47 | 1024 / 8 48 | } 49 | 50 | pub fn addr(&self) -> *const u8 { 51 | self.affinity.as_bytes().as_ptr() 52 | } 53 | pub fn to_bits_vec(&self) -> Vec { 54 | self.affinity.into_iter().collect() 55 | } 56 | } 57 | 58 | #[repr(C)] 59 | union CtrlCmd { 60 | ctrl_cmd: sys::ublksrv_ctrl_cmd, 61 | buf: [u8; 80], 62 | } 63 | 64 | /// the max supported length of char device path, which 65 | /// is one implementation limit, and can be increased 66 | /// without breaking anything. 67 | const CTRL_UBLKC_PATH_MAX: usize = 32; 68 | const CTRL_CMD_HAS_DATA: u32 = 1; 69 | const CTRL_CMD_HAS_BUF: u32 = 2; 70 | /// this command need to read data back from device 71 | const CTRL_CMD_BUF_READ: u32 = 8; 72 | /// this command needn't to attach char device path for audit in 73 | /// case of unprivileged ublk, such as get_features(), add_dev(). 74 | const CTRL_CMD_NO_NEED_DEV_PATH: u32 = 16; 75 | 76 | #[derive(Debug, Default, Copy, Clone)] 77 | struct UblkCtrlCmdData { 78 | cmd_op: u32, 79 | flags: u32, 80 | data: u64, 81 | dev_path_len: u16, 82 | _pad: u16, 83 | _reserved: u32, 84 | 85 | addr: u64, 86 | len: u32, 87 | } 88 | 89 | impl UblkCtrlCmdData { 90 | fn prep_un_privileged_dev_path(&mut self, dev: &UblkCtrlInner) -> (u64, Option>) { 91 | // handle GET_DEV_INFO2 always with dev_path attached 92 | let cmd_op = self.cmd_op & 0xff; 93 | 94 | if cmd_op != sys::UBLK_CMD_GET_DEV_INFO2 95 | && (!dev.is_unprivileged() || (self.flags & CTRL_CMD_NO_NEED_DEV_PATH) != 0) 96 | { 97 | return (0, None); 98 | } 99 | 100 | let (buf, new_buf) = { 101 | let size = { 102 | if self.flags & CTRL_CMD_HAS_BUF != 0 { 103 | self.len as usize + CTRL_UBLKC_PATH_MAX 104 | } else { 105 | CTRL_UBLKC_PATH_MAX 106 | } 107 | }; 108 | let mut v = vec![0_u8; size]; 109 | 110 | (v.as_mut_ptr(), v) 111 | }; 112 | 113 | let path_str = dev.get_cdev_path().to_string(); 114 | assert!(path_str.len() <= CTRL_UBLKC_PATH_MAX); 115 | 116 | unsafe { 117 | libc::memset(buf as *mut libc::c_void, 0, CTRL_UBLKC_PATH_MAX); 118 | libc::memcpy( 119 | buf as *mut libc::c_void, 120 | path_str.as_ptr() as *const libc::c_void, 121 | path_str.len(), 122 | ); 123 | 124 | if self.flags & CTRL_CMD_HAS_BUF != 0 { 125 | libc::memcpy( 126 | (buf as u64 + CTRL_UBLKC_PATH_MAX as u64) as *mut libc::c_void, 127 | self.addr as *const libc::c_void, 128 | self.len as usize, 129 | ); 130 | } 131 | } 132 | 133 | self.flags |= CTRL_CMD_HAS_BUF | CTRL_CMD_HAS_DATA; 134 | self.len += CTRL_UBLKC_PATH_MAX as u32; 135 | self.dev_path_len = CTRL_UBLKC_PATH_MAX as u16; 136 | let addr = self.addr; 137 | self.addr = buf as u64; 138 | (addr, Some(new_buf)) 139 | } 140 | 141 | fn unprep_un_privileged_dev_path(&mut self, dev: &UblkCtrlInner, buf: u64) { 142 | let cmd_op = self.cmd_op & 0xff; 143 | 144 | if cmd_op != sys::UBLK_CMD_GET_DEV_INFO2 145 | && (!dev.is_unprivileged() || (self.flags & CTRL_CMD_NO_NEED_DEV_PATH) != 0) 146 | { 147 | return; 148 | } 149 | 150 | let addr = self.addr + CTRL_UBLKC_PATH_MAX as u64; 151 | let len = self.len - CTRL_UBLKC_PATH_MAX as u32; 152 | if self.flags & CTRL_CMD_BUF_READ != 0 { 153 | unsafe { 154 | libc::memcpy( 155 | buf as *mut libc::c_void, 156 | addr as *const libc::c_void, 157 | len as usize, 158 | ); 159 | } 160 | } 161 | } 162 | } 163 | 164 | #[derive(Debug, Deserialize)] 165 | struct QueueAffinityJson { 166 | affinity: Vec, 167 | qid: u32, 168 | tid: u32, 169 | } 170 | 171 | /// UblkSession: build one new ublk control device or recover the old one. 172 | /// 173 | /// High level API. 174 | /// 175 | /// One limit is that IO handling closure doesn't support FnMut, and low 176 | /// level API doesn't have such limit. 177 | /// 178 | #[derive(Setters, Debug, PartialEq, Eq)] 179 | pub struct UblkCtrlBuilder<'a> { 180 | /// target type, such as null, loop, ramdisk, or nbd,... 181 | name: &'a str, 182 | 183 | /// device id: -1 can only be used for adding one new device, 184 | /// and ublk driver will allocate one new ID for the created device; 185 | /// otherwise, we are asking driver to create or recover or list 186 | /// one device with specified ID 187 | id: i32, 188 | 189 | /// how many queues 190 | nr_queues: u16, 191 | 192 | /// each queue's IO depth 193 | depth: u16, 194 | 195 | /// max size of each IO buffer size, which will be converted to 196 | /// block layer's queue limit of max hw sectors 197 | io_buf_bytes: u32, 198 | 199 | /// passed to ublk driver via `sys::ublksrv_ctrl_dev_info.flags`, 200 | /// usually for adding or recovering device 201 | ctrl_flags: u64, 202 | 203 | /// store target flags in `sys::ublksrv_ctrl_dev_info.ublksrv_flags`, 204 | /// which is immutable in the whole device lifetime 205 | ctrl_target_flags: u64, 206 | 207 | /// libublk feature flags: UBLK_DEV_F_* 208 | dev_flags: UblkFlags, 209 | } 210 | 211 | impl Default for UblkCtrlBuilder<'_> { 212 | fn default() -> Self { 213 | UblkCtrlBuilder { 214 | name: "none", 215 | id: -1, 216 | nr_queues: 1, 217 | depth: 64, 218 | io_buf_bytes: 524288, 219 | ctrl_flags: 0, 220 | ctrl_target_flags: 0, 221 | dev_flags: UblkFlags::empty(), 222 | } 223 | } 224 | } 225 | 226 | impl UblkCtrlBuilder<'_> { 227 | /// create one pair of ublk devices, the 1st one is control device(`UblkCtrl`), 228 | /// and the 2nd one is data device(`UblkDev`) 229 | pub fn build(self) -> Result { 230 | UblkCtrl::new( 231 | Some(self.name.to_string()), 232 | self.id, 233 | self.nr_queues.into(), 234 | self.depth.into(), 235 | self.io_buf_bytes, 236 | self.ctrl_flags, 237 | self.ctrl_target_flags, 238 | self.dev_flags, 239 | ) 240 | } 241 | } 242 | 243 | /// ublk control device 244 | /// 245 | /// Responsible for controlling ublk device: 246 | /// 247 | /// 1) adding and removing ublk char device(/dev/ublkcN) 248 | /// 249 | /// 2) send all kinds of control commands(recover, list, set/get parameter, 250 | /// get queue affinity, ...) 251 | /// 252 | /// 3) exporting device as json file 253 | pub struct UblkCtrl { 254 | inner: RwLock, 255 | } 256 | 257 | struct UblkCtrlInner { 258 | name: Option, 259 | file: fs::File, 260 | dev_info: sys::ublksrv_ctrl_dev_info, 261 | json: serde_json::Value, 262 | features: Option, 263 | 264 | /// global flags, shared with UblkDev and UblkQueue 265 | dev_flags: UblkFlags, 266 | cmd_token: i32, 267 | queue_tids: Vec, 268 | nr_queues_configured: u16, 269 | } 270 | 271 | impl Drop for UblkCtrlInner { 272 | fn drop(&mut self) { 273 | let id = self.dev_info.dev_id; 274 | trace!("ctrl: device {} dropped", id); 275 | if self.for_add_dev() { 276 | if let Err(r) = self.del() { 277 | //Maybe deleted from other utilities, so no warn or error:w 278 | trace!("Delete char device {} failed {}", self.dev_info.dev_id, r); 279 | } 280 | } 281 | } 282 | } 283 | 284 | impl UblkCtrlInner { 285 | #[allow(clippy::too_many_arguments)] 286 | #[allow(clippy::uninit_vec)] 287 | fn new( 288 | name: Option, 289 | id: i32, 290 | nr_queues: u32, 291 | depth: u32, 292 | io_buf_bytes: u32, 293 | flags: u64, 294 | tgt_flags: u64, 295 | dev_flags: UblkFlags, 296 | ) -> Result { 297 | let info = sys::ublksrv_ctrl_dev_info { 298 | nr_hw_queues: nr_queues as u16, 299 | queue_depth: depth as u16, 300 | max_io_buf_bytes: io_buf_bytes, 301 | dev_id: id as u32, 302 | ublksrv_pid: unsafe { libc::getpid() } as i32, 303 | flags, 304 | ublksrv_flags: tgt_flags, 305 | ..Default::default() 306 | }; 307 | let fd = fs::OpenOptions::new() 308 | .read(true) 309 | .write(true) 310 | .open(CTRL_PATH)?; 311 | 312 | let mut dev = UblkCtrlInner { 313 | name, 314 | file: fd, 315 | dev_info: info, 316 | json: serde_json::json!({}), 317 | cmd_token: 0, 318 | queue_tids: { 319 | let mut tids = Vec::::with_capacity(nr_queues as usize); 320 | unsafe { 321 | tids.set_len(nr_queues as usize); 322 | } 323 | tids 324 | }, 325 | nr_queues_configured: 0, 326 | dev_flags, 327 | features: None, 328 | }; 329 | 330 | let features = match dev.__get_features() { 331 | Ok(f) => Some(f), 332 | _ => None, 333 | }; 334 | dev.features = features; 335 | 336 | //add cdev if the device is for adding device 337 | if dev.for_add_dev() { 338 | dev.add()?; 339 | } else if id >= 0 { 340 | let res = dev.reload_json(); 341 | if res.is_err() { 342 | eprintln!("device reload json failed"); 343 | } 344 | dev.read_dev_info()?; 345 | } 346 | 347 | log::info!( 348 | "ctrl: device {} flags {:x} created", 349 | dev.dev_info.dev_id, 350 | dev.dev_flags 351 | ); 352 | 353 | Ok(dev) 354 | } 355 | 356 | fn is_unprivileged(&self) -> bool { 357 | (self.dev_info.flags & (super::sys::UBLK_F_UNPRIVILEGED_DEV as u64)) != 0 358 | } 359 | 360 | fn get_cdev_path(&self) -> String { 361 | format!("{}{}", UblkCtrl::CDEV_PATH, self.dev_info.dev_id) 362 | } 363 | 364 | fn for_add_dev(&self) -> bool { 365 | self.dev_flags.intersects(UblkFlags::UBLK_DEV_F_ADD_DEV) 366 | } 367 | 368 | fn for_recover_dev(&self) -> bool { 369 | self.dev_flags.intersects(UblkFlags::UBLK_DEV_F_RECOVER_DEV) 370 | } 371 | 372 | fn dev_state_desc(&self) -> String { 373 | match self.dev_info.state as u32 { 374 | sys::UBLK_S_DEV_DEAD => "DEAD".to_string(), 375 | sys::UBLK_S_DEV_LIVE => "LIVE".to_string(), 376 | sys::UBLK_S_DEV_QUIESCED => "QUIESCED".to_string(), 377 | _ => "UNKNOWN".to_string(), 378 | } 379 | } 380 | 381 | fn store_queue_tid(&mut self, qid: u16, tid: i32) { 382 | self.queue_tids[qid as usize] = tid; 383 | } 384 | 385 | fn dump_from_json(&self) { 386 | if !Path::new(&self.run_path()).exists() { 387 | return; 388 | } 389 | let mut file = fs::File::open(self.run_path()).expect("Failed to open file"); 390 | let mut json_str = String::new(); 391 | 392 | file.read_to_string(&mut json_str) 393 | .expect("Failed to read file"); 394 | 395 | let json_value: serde_json::Value = 396 | serde_json::from_str(&json_str).expect("Failed to parse JSON"); 397 | let queues = &json_value["queues"]; 398 | 399 | for i in 0..self.dev_info.nr_hw_queues { 400 | let queue = &queues[i.to_string()]; 401 | let this_queue: Result = serde_json::from_value(queue.clone()); 402 | 403 | if let Ok(p) = this_queue { 404 | println!( 405 | "\tqueue {} tid: {} affinity({})", 406 | p.qid, 407 | p.tid, 408 | p.affinity 409 | .iter() 410 | .map(ToString::to_string) 411 | .collect::>() 412 | .join(" ") 413 | ); 414 | } 415 | } 416 | let tgt_val = &json_value["target"]; 417 | let tgt: Result = serde_json::from_value(tgt_val.clone()); 418 | if let Ok(p) = tgt { 419 | println!( 420 | "\ttarget {{\"dev_size\":{},\"name\":\"{}\",\"type\":0}}", 421 | p.dev_size, p.tgt_type 422 | ); 423 | } 424 | println!("\ttarget_data {}", &json_value["target_data"]); 425 | } 426 | 427 | /// Returned path of this device's exported json file 428 | /// 429 | fn run_path(&self) -> String { 430 | format!("{}/{:04}.json", UblkCtrl::run_dir(), self.dev_info.dev_id) 431 | } 432 | 433 | fn ublk_ctrl_prep_cmd( 434 | &mut self, 435 | fd: i32, 436 | dev_id: u32, 437 | data: &UblkCtrlCmdData, 438 | token: u64, 439 | ) -> squeue::Entry128 { 440 | let cmd = sys::ublksrv_ctrl_cmd { 441 | addr: if (data.flags & CTRL_CMD_HAS_BUF) != 0 { 442 | data.addr 443 | } else { 444 | 0 445 | }, 446 | len: if (data.flags & CTRL_CMD_HAS_BUF) != 0 { 447 | data.len as u16 448 | } else { 449 | 0 450 | }, 451 | data: if (data.flags & CTRL_CMD_HAS_DATA) != 0 { 452 | [data.data] 453 | } else { 454 | [0] 455 | }, 456 | dev_id, 457 | queue_id: u16::MAX, 458 | dev_path_len: data.dev_path_len, 459 | ..Default::default() 460 | }; 461 | let c_cmd = CtrlCmd { ctrl_cmd: cmd }; 462 | 463 | opcode::UringCmd80::new(types::Fd(fd), data.cmd_op) 464 | .cmd(unsafe { c_cmd.buf }) 465 | .build() 466 | .user_data(token) 467 | } 468 | 469 | fn ublk_submit_cmd_async(&mut self, data: &UblkCtrlCmdData) -> UblkUringOpFuture { 470 | let fd = self.file.as_raw_fd(); 471 | let dev_id = self.dev_info.dev_id; 472 | let f = UblkUringOpFuture::new(0); 473 | let sqe = self.ublk_ctrl_prep_cmd(fd, dev_id, data, f.user_data); 474 | 475 | unsafe { 476 | CTRL_URING.with(|refcell| { 477 | refcell.borrow_mut().submission().push(&sqe).unwrap(); 478 | }) 479 | } 480 | f 481 | } 482 | 483 | fn ublk_submit_cmd( 484 | &mut self, 485 | data: &UblkCtrlCmdData, 486 | to_wait: usize, 487 | ) -> Result { 488 | let fd = self.file.as_raw_fd(); 489 | let dev_id = self.dev_info.dev_id; 490 | 491 | // token is generated uniquely because '&mut self' is 492 | // passed in 493 | let token = { 494 | self.cmd_token += 1; 495 | self.cmd_token 496 | } as u64; 497 | let sqe = self.ublk_ctrl_prep_cmd(fd, dev_id, data, token); 498 | 499 | CTRL_URING.with(|refcell| { 500 | let mut r = refcell.borrow_mut(); 501 | 502 | unsafe { r.submission().push(&sqe).unwrap() }; 503 | let _ = r.submit_and_wait(to_wait); 504 | }); 505 | Ok(token) 506 | } 507 | 508 | /// check one control command and see if it is completed 509 | /// 510 | fn poll_cmd(&mut self, token: u64) -> i32 { 511 | CTRL_URING.with(|refcell| { 512 | let mut r = refcell.borrow_mut(); 513 | 514 | let res = match r.completion().next() { 515 | Some(cqe) => { 516 | if cqe.user_data() != token { 517 | -libc::EAGAIN 518 | } else { 519 | cqe.result() 520 | } 521 | } 522 | None => -libc::EAGAIN, 523 | }; 524 | 525 | res 526 | }) 527 | } 528 | 529 | fn ublk_ctrl_need_retry( 530 | new_data: &mut UblkCtrlCmdData, 531 | data: &UblkCtrlCmdData, 532 | res: i32, 533 | ) -> bool { 534 | let legacy_op = data.cmd_op & 0xff; 535 | 536 | // Needn't to retry: 537 | // 538 | // 1) command is completed successfully 539 | // 540 | // 2) this is new command which has been issued via ioctl encoding 541 | // already 542 | if res >= 0 || res == -libc::EBUSY || (legacy_op > sys::UBLK_CMD_GET_DEV_INFO2) { 543 | false 544 | } else { 545 | *new_data = *data; 546 | new_data.cmd_op = legacy_op; 547 | true 548 | } 549 | } 550 | 551 | fn ublk_err_to_result(res: i32) -> Result { 552 | if res >= 0 || res == -libc::EBUSY { 553 | Ok(res) 554 | } else { 555 | Err(UblkError::UringIOError(res)) 556 | } 557 | } 558 | 559 | async fn ublk_ctrl_cmd_async(&mut self, data: &UblkCtrlCmdData) -> Result { 560 | let mut new_data = *data; 561 | let mut res: i32 = 0; 562 | 563 | for _ in 0..2 { 564 | let (old_buf, _new) = new_data.prep_un_privileged_dev_path(self); 565 | res = self.ublk_submit_cmd_async(&new_data).await; 566 | new_data.unprep_un_privileged_dev_path(self, old_buf); 567 | 568 | trace!("ublk_ctrl_cmd_async: cmd {:x} res {}", data.cmd_op, res); 569 | if !Self::ublk_ctrl_need_retry(&mut new_data, data, res) { 570 | break; 571 | } 572 | } 573 | 574 | Self::ublk_err_to_result(res) 575 | } 576 | 577 | fn ublk_ctrl_cmd(&mut self, data: &UblkCtrlCmdData) -> Result { 578 | let mut new_data = *data; 579 | let mut res: i32 = 0; 580 | 581 | for _ in 0..2 { 582 | let (old_buf, _new) = new_data.prep_un_privileged_dev_path(self); 583 | let token = self.ublk_submit_cmd(&new_data, 1)?; 584 | res = self.poll_cmd(token); 585 | new_data.unprep_un_privileged_dev_path(self, old_buf); 586 | 587 | trace!("ublk_ctrl_cmd: cmd {:x} res {}", data.cmd_op, res); 588 | if !Self::ublk_ctrl_need_retry(&mut new_data, data, res) { 589 | break; 590 | } 591 | } 592 | 593 | Self::ublk_err_to_result(res) 594 | } 595 | 596 | fn add(&mut self) -> Result { 597 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 598 | cmd_op: sys::UBLK_U_CMD_ADD_DEV, 599 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_NO_NEED_DEV_PATH, 600 | addr: std::ptr::addr_of!(self.dev_info) as u64, 601 | len: core::mem::size_of::() as u32, 602 | ..Default::default() 603 | }; 604 | 605 | self.ublk_ctrl_cmd(&data) 606 | } 607 | 608 | /// Remove this device 609 | /// 610 | fn del(&mut self) -> Result { 611 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 612 | cmd_op: if self 613 | .dev_flags 614 | .intersects(UblkFlags::UBLK_DEV_F_DEL_DEV_ASYNC) 615 | { 616 | sys::UBLK_U_CMD_DEL_DEV_ASYNC 617 | } else { 618 | sys::UBLK_U_CMD_DEL_DEV 619 | }, 620 | ..Default::default() 621 | }; 622 | 623 | self.ublk_ctrl_cmd(&data) 624 | } 625 | 626 | /// Remove this device 627 | /// 628 | fn del_async(&mut self) -> Result { 629 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 630 | cmd_op: sys::UBLK_U_CMD_DEL_DEV_ASYNC, 631 | ..Default::default() 632 | }; 633 | 634 | self.ublk_ctrl_cmd(&data) 635 | } 636 | 637 | fn __get_features(&mut self) -> Result { 638 | let features = 0_u64; 639 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 640 | cmd_op: sys::UBLK_U_CMD_GET_FEATURES, 641 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_BUF_READ | CTRL_CMD_NO_NEED_DEV_PATH, 642 | addr: std::ptr::addr_of!(features) as u64, 643 | len: core::mem::size_of::() as u32, 644 | ..Default::default() 645 | }; 646 | 647 | self.ublk_ctrl_cmd(&data)?; 648 | 649 | Ok(features) 650 | } 651 | 652 | fn __read_dev_info(&mut self) -> Result { 653 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 654 | cmd_op: sys::UBLK_U_CMD_GET_DEV_INFO, 655 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_BUF_READ, 656 | addr: std::ptr::addr_of!(self.dev_info) as u64, 657 | len: core::mem::size_of::() as u32, 658 | ..Default::default() 659 | }; 660 | 661 | self.ublk_ctrl_cmd(&data) 662 | } 663 | 664 | fn __read_dev_info2(&mut self) -> Result { 665 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 666 | cmd_op: sys::UBLK_U_CMD_GET_DEV_INFO2, 667 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_BUF_READ, 668 | addr: std::ptr::addr_of!(self.dev_info) as u64, 669 | len: core::mem::size_of::() as u32, 670 | ..Default::default() 671 | }; 672 | 673 | self.ublk_ctrl_cmd(&data) 674 | } 675 | 676 | fn read_dev_info(&mut self) -> Result { 677 | let res = self.__read_dev_info2(); 678 | 679 | if res.is_err() { 680 | self.__read_dev_info() 681 | } else { 682 | res 683 | } 684 | } 685 | 686 | /// Start this device by sending command to ublk driver 687 | /// 688 | fn start(&mut self, pid: i32) -> Result { 689 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 690 | cmd_op: sys::UBLK_U_CMD_START_DEV, 691 | flags: CTRL_CMD_HAS_DATA, 692 | data: pid as u64, 693 | ..Default::default() 694 | }; 695 | 696 | self.ublk_ctrl_cmd(&data) 697 | } 698 | 699 | /// Start this device by sending command to ublk driver 700 | /// 701 | async fn start_async(&mut self, pid: i32) -> Result { 702 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 703 | cmd_op: sys::UBLK_U_CMD_START_DEV, 704 | flags: CTRL_CMD_HAS_DATA, 705 | data: pid as u64, 706 | ..Default::default() 707 | }; 708 | 709 | self.ublk_ctrl_cmd_async(&data).await 710 | } 711 | 712 | /// Stop this device by sending command to ublk driver 713 | /// 714 | fn stop(&mut self) -> Result { 715 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 716 | cmd_op: sys::UBLK_U_CMD_STOP_DEV, 717 | ..Default::default() 718 | }; 719 | 720 | self.ublk_ctrl_cmd(&data) 721 | } 722 | 723 | /// Retrieve this device's parameter from ublk driver by 724 | /// sending command 725 | /// 726 | /// Can't pass params by reference(&mut), why? 727 | fn get_params(&mut self, params: &mut sys::ublk_params) -> Result { 728 | params.len = core::mem::size_of::() as u32; 729 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 730 | cmd_op: sys::UBLK_U_CMD_GET_PARAMS, 731 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_BUF_READ, 732 | addr: params as *const sys::ublk_params as u64, 733 | len: params.len, 734 | ..Default::default() 735 | }; 736 | 737 | self.ublk_ctrl_cmd(&data) 738 | } 739 | 740 | /// Send this device's parameter to ublk driver 741 | /// 742 | /// Note: device parameter has to send to driver before starting 743 | /// this device 744 | fn set_params(&mut self, params: &sys::ublk_params) -> Result { 745 | let mut p = *params; 746 | 747 | p.len = core::mem::size_of::() as u32; 748 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 749 | cmd_op: sys::UBLK_U_CMD_SET_PARAMS, 750 | flags: CTRL_CMD_HAS_BUF, 751 | addr: std::ptr::addr_of!(p) as u64, 752 | len: p.len, 753 | ..Default::default() 754 | }; 755 | 756 | self.ublk_ctrl_cmd(&data) 757 | } 758 | 759 | fn get_queue_affinity(&mut self, q: u32, bm: &mut UblkQueueAffinity) -> Result { 760 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 761 | cmd_op: sys::UBLK_U_CMD_GET_QUEUE_AFFINITY, 762 | flags: CTRL_CMD_HAS_BUF | CTRL_CMD_HAS_DATA | CTRL_CMD_BUF_READ, 763 | addr: bm.addr() as u64, 764 | data: q as u64, 765 | len: bm.buf_len() as u32, 766 | ..Default::default() 767 | }; 768 | self.ublk_ctrl_cmd(&data) 769 | } 770 | 771 | fn __start_user_recover(&mut self) -> Result { 772 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 773 | cmd_op: sys::UBLK_U_CMD_START_USER_RECOVERY, 774 | ..Default::default() 775 | }; 776 | 777 | self.ublk_ctrl_cmd(&data) 778 | } 779 | 780 | /// End user recover for this device, do similar thing done in start_dev() 781 | /// 782 | fn end_user_recover(&mut self, pid: i32) -> Result { 783 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 784 | cmd_op: sys::UBLK_U_CMD_END_USER_RECOVERY, 785 | flags: CTRL_CMD_HAS_DATA, 786 | data: pid as u64, 787 | ..Default::default() 788 | }; 789 | 790 | self.ublk_ctrl_cmd(&data) 791 | } 792 | 793 | /// End user recover for this device, do similar thing done in start_dev() 794 | /// 795 | async fn end_user_recover_async(&mut self, pid: i32) -> Result { 796 | let data: UblkCtrlCmdData = UblkCtrlCmdData { 797 | cmd_op: sys::UBLK_U_CMD_END_USER_RECOVERY, 798 | flags: CTRL_CMD_HAS_DATA, 799 | data: pid as u64, 800 | ..Default::default() 801 | }; 802 | 803 | self.ublk_ctrl_cmd_async(&data).await 804 | } 805 | 806 | fn prep_start_dev(&mut self, dev: &UblkDev) -> Result { 807 | self.read_dev_info()?; 808 | if self.dev_info.state == sys::UBLK_S_DEV_LIVE as u16 { 809 | return Ok(0); 810 | } 811 | 812 | if self.dev_info.state != sys::UBLK_S_DEV_QUIESCED as u16 { 813 | self.set_params(&dev.tgt.params)?; 814 | self.flush_json()?; 815 | } else if self.for_recover_dev() { 816 | self.flush_json()?; 817 | } else { 818 | return Err(crate::UblkError::OtherError(-libc::EINVAL)); 819 | }; 820 | 821 | Ok(0) 822 | } 823 | 824 | fn set_path_permission(path: &Path, mode: u32) -> Result { 825 | use std::os::unix::fs::PermissionsExt; 826 | 827 | let metadata = fs::metadata(path)?; 828 | let mut permissions = metadata.permissions(); 829 | 830 | permissions.set_mode(mode); 831 | fs::set_permissions(path, permissions)?; 832 | 833 | Ok(0) 834 | } 835 | 836 | /// Flush this device's json info as file 837 | fn flush_json(&mut self) -> Result { 838 | if self.json == serde_json::json!({}) { 839 | return Ok(0); 840 | } 841 | 842 | // flushing json should only be done in case of adding new device 843 | // or recovering old device 844 | if !self.for_add_dev() && !self.for_recover_dev() { 845 | return Ok(0); 846 | } 847 | 848 | let run_path = self.run_path(); 849 | let json_path = Path::new(&run_path); 850 | 851 | if let Some(parent_dir) = json_path.parent() { 852 | if !Path::new(&parent_dir).exists() { 853 | fs::create_dir_all(parent_dir)?; 854 | 855 | // It is just fine to expose the running parent directory as 856 | // 777, and we will make sure every exported running json 857 | // file as 700. 858 | Self::set_path_permission(parent_dir, 0o777)?; 859 | } 860 | } 861 | let mut run_file = fs::File::create(json_path)?; 862 | 863 | // Each exported json file is only visible for the device owner. 864 | // In future, it can be relaxed, such as allowing group to access, 865 | // according to ublk use policy 866 | Self::set_path_permission(json_path, 0o700)?; 867 | 868 | run_file.write_all(self.json.to_string().as_bytes())?; 869 | Ok(0) 870 | } 871 | 872 | /// Build json info for this device 873 | /// 874 | /// # Arguments: 875 | /// 876 | /// * `dev`: this device's UblkDev instance 877 | /// * `affi`: queue affinity vector, in which each item stores the queue's affinity 878 | /// * `tids`: queue pthread tid vector, in which each item stores the queue's 879 | /// pthread tid 880 | /// 881 | fn build_json(&mut self, dev: &UblkDev) -> Result { 882 | // keep everything not changed except for queue tid 883 | if dev.dev_info.state == sys::UBLK_S_DEV_QUIESCED as u16 { 884 | if let Some(queues) = self.json.get_mut("queues") { 885 | for qid in 0..dev.dev_info.nr_hw_queues { 886 | let t = format!("{}", qid); 887 | if let Some(q) = queues.get_mut(t) { 888 | if let Some(tid) = q.get_mut("tid") { 889 | *tid = serde_json::json!(self.queue_tids[qid as usize]); 890 | } 891 | } 892 | } 893 | } 894 | return Ok(0); 895 | } 896 | 897 | let tgt_data = dev.get_target_json(); 898 | let mut map: serde_json::Map = serde_json::Map::new(); 899 | 900 | for qid in 0..dev.dev_info.nr_hw_queues { 901 | let mut affinity = self::UblkQueueAffinity::new(); 902 | self.get_queue_affinity(qid as u32, &mut affinity)?; 903 | 904 | map.insert( 905 | format!("{}", qid), 906 | serde_json::json!({ 907 | "qid": qid, 908 | "tid": self.queue_tids[qid as usize], 909 | "affinity": affinity.to_bits_vec(), 910 | }), 911 | ); 912 | } 913 | 914 | let mut json = serde_json::json!({ 915 | "dev_info": dev.dev_info, 916 | "target": dev.tgt, 917 | "target_flags": dev.flags.bits(), 918 | }); 919 | 920 | if let Some(val) = tgt_data { 921 | json["target_data"] = val.clone() 922 | } 923 | 924 | json["queues"] = serde_json::Value::Object(map); 925 | 926 | self.json = json; 927 | Ok(0) 928 | } 929 | 930 | /// Reload json info for this device 931 | /// 932 | fn reload_json(&mut self) -> Result { 933 | let mut file = fs::File::open(self.run_path())?; 934 | let mut json_str = String::new(); 935 | 936 | file.read_to_string(&mut json_str)?; 937 | self.json = serde_json::from_str(&json_str).map_err(UblkError::JsonError)?; 938 | 939 | Ok(0) 940 | } 941 | } 942 | 943 | impl UblkCtrl { 944 | /// char device and block device name may change according to system policy, 945 | /// such udev may rename it in its own namespaces. 946 | const CDEV_PATH: &'static str = "/dev/ublkc"; 947 | const BDEV_PATH: &'static str = "/dev/ublkb"; 948 | 949 | const UBLK_DRV_F_ALL: u64 = (sys::UBLK_F_SUPPORT_ZERO_COPY 950 | | sys::UBLK_F_URING_CMD_COMP_IN_TASK 951 | | sys::UBLK_F_NEED_GET_DATA 952 | | sys::UBLK_F_USER_RECOVERY 953 | | sys::UBLK_F_USER_RECOVERY_REISSUE 954 | | sys::UBLK_F_UNPRIVILEGED_DEV 955 | | sys::UBLK_F_CMD_IOCTL_ENCODE 956 | | sys::UBLK_F_USER_COPY 957 | | sys::UBLK_F_ZONED) as u64; 958 | 959 | fn get_inner(&self) -> std::sync::RwLockReadGuard { 960 | self.inner.read().unwrap() 961 | } 962 | 963 | fn get_inner_mut(&self) -> std::sync::RwLockWriteGuard { 964 | self.inner.write().unwrap() 965 | } 966 | 967 | pub fn get_name(&self) -> String { 968 | let inner = self.get_inner(); 969 | 970 | match &inner.name { 971 | Some(name) => name.clone(), 972 | None => "none".to_string(), 973 | } 974 | } 975 | 976 | pub(crate) fn get_dev_flags(&self) -> UblkFlags { 977 | self.get_inner().dev_flags 978 | } 979 | 980 | /// New one ublk control device 981 | /// 982 | /// # Arguments: 983 | /// 984 | /// * `id`: device id, or let driver allocate one if -1 is passed 985 | /// * `nr_queues`: how many hw queues allocated for this device 986 | /// * `depth`: each hw queue's depth 987 | /// * `io_buf_bytes`: max buf size for each IO 988 | /// * `flags`: flags for setting ublk device 989 | /// * `for_add`: is for adding new device 990 | /// * `dev_flags`: global flags as userspace side feature, will be 991 | /// shared with UblkDev and UblkQueue 992 | /// 993 | /// ublk control device is for sending command to driver, and maintain 994 | /// device exported json file, dump, or any misc management task. 995 | /// 996 | #[allow(clippy::too_many_arguments)] 997 | pub fn new( 998 | name: Option, 999 | id: i32, 1000 | nr_queues: u32, 1001 | depth: u32, 1002 | io_buf_bytes: u32, 1003 | flags: u64, 1004 | tgt_flags: u64, 1005 | dev_flags: UblkFlags, 1006 | ) -> Result { 1007 | if (flags & !Self::UBLK_DRV_F_ALL) != 0 { 1008 | return Err(UblkError::InvalidVal); 1009 | } 1010 | 1011 | if !Path::new(CTRL_PATH).exists() { 1012 | eprintln!("Please run `modprobe ublk_drv` first"); 1013 | return Err(UblkError::OtherError(-libc::ENOENT)); 1014 | } 1015 | 1016 | if dev_flags.intersects(UblkFlags::UBLK_DEV_F_INTERNAL_0) { 1017 | return Err(UblkError::InvalidVal); 1018 | } 1019 | 1020 | if id < 0 && id != -1 { 1021 | return Err(UblkError::InvalidVal); 1022 | } 1023 | 1024 | if nr_queues > sys::UBLK_MAX_NR_QUEUES { 1025 | return Err(UblkError::InvalidVal); 1026 | } 1027 | 1028 | if depth > sys::UBLK_MAX_QUEUE_DEPTH { 1029 | return Err(UblkError::InvalidVal); 1030 | } 1031 | 1032 | let page_sz = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as u32; 1033 | if io_buf_bytes > MAX_BUF_SZ || io_buf_bytes & (page_sz - 1) != 0 { 1034 | return Err(UblkError::InvalidVal); 1035 | } 1036 | 1037 | let inner = RwLock::new(UblkCtrlInner::new( 1038 | name, 1039 | id, 1040 | nr_queues, 1041 | depth, 1042 | io_buf_bytes, 1043 | flags, 1044 | tgt_flags, 1045 | dev_flags, 1046 | )?); 1047 | 1048 | Ok(UblkCtrl { inner }) 1049 | } 1050 | 1051 | /// Allocate one simple UblkCtrl device for delelting, listing, recovering,.., 1052 | /// and it can't be done for adding device 1053 | pub fn new_simple(id: i32) -> Result { 1054 | assert!(id >= 0); 1055 | Self::new(None, id, 0, 0, 0, 0, 0, UblkFlags::empty()) 1056 | } 1057 | 1058 | /// Return current device info 1059 | pub fn dev_info(&self) -> sys::ublksrv_ctrl_dev_info { 1060 | self.get_inner().dev_info 1061 | } 1062 | 1063 | /// Return ublk_driver's features 1064 | /// 1065 | /// Target code may need to query driver features runtime, so 1066 | /// cache it inside device 1067 | pub fn get_driver_features(&self) -> Option { 1068 | self.get_inner().features 1069 | } 1070 | 1071 | /// Return ublk char device path 1072 | pub fn get_cdev_path(&self) -> String { 1073 | self.get_inner().get_cdev_path() 1074 | } 1075 | 1076 | /// Return ublk block device path 1077 | pub fn get_bdev_path(&self) -> String { 1078 | format!("{}{}", Self::BDEV_PATH, self.get_inner().dev_info.dev_id) 1079 | } 1080 | 1081 | /// Get queue's pthread id from exported json file for this device 1082 | /// 1083 | /// # Arguments: 1084 | /// 1085 | /// * `qid`: queue id 1086 | /// 1087 | pub fn get_queue_tid(&self, qid: u32) -> Result { 1088 | let ctrl = self.get_inner_mut(); 1089 | let queues = &ctrl.json["queues"]; 1090 | let queue = &queues[qid.to_string()]; 1091 | let this_queue: Result = serde_json::from_value(queue.clone()); 1092 | 1093 | if let Ok(p) = this_queue { 1094 | Ok(p.tid as i32) 1095 | } else { 1096 | Err(UblkError::OtherError(-libc::EEXIST)) 1097 | } 1098 | } 1099 | 1100 | /// Get target flags from exported json file for this device 1101 | /// 1102 | pub fn get_target_flags_from_json(&self) -> Result { 1103 | let ctrl = self.get_inner_mut(); 1104 | let __tgt_flags = &ctrl.json["target_flags"]; 1105 | let tgt_flags: Result = serde_json::from_value(__tgt_flags.clone()); 1106 | 1107 | if let Ok(flags) = tgt_flags { 1108 | Ok(flags) 1109 | } else { 1110 | Err(UblkError::OtherError(-libc::EINVAL)) 1111 | } 1112 | } 1113 | 1114 | /// Get target from exported json file for this device 1115 | /// 1116 | pub fn get_target_from_json(&self) -> Result { 1117 | let tgt_val = &self.get_inner().json["target"]; 1118 | let tgt: Result = serde_json::from_value(tgt_val.clone()); 1119 | if let Ok(p) = tgt { 1120 | Ok(p) 1121 | } else { 1122 | Err(UblkError::OtherError(-libc::EINVAL)) 1123 | } 1124 | } 1125 | 1126 | /// Return target json data 1127 | /// 1128 | /// Should only be called after device is started, otherwise target data 1129 | /// won't be serialized out, and this API returns None 1130 | pub fn get_target_data_from_json(&self) -> Option { 1131 | let val = &self.get_inner().json["target_data"]; 1132 | if !val.is_null() { 1133 | Some(val.clone()) 1134 | } else { 1135 | None 1136 | } 1137 | } 1138 | 1139 | /// Get target type from exported json file for this device 1140 | /// 1141 | pub fn get_target_type_from_json(&self) -> Result { 1142 | if let Ok(tgt) = self.get_target_from_json() { 1143 | Ok(tgt.tgt_type) 1144 | } else { 1145 | Err(UblkError::OtherError(-libc::EINVAL)) 1146 | } 1147 | } 1148 | 1149 | /// Configure queue affinity and record queue tid 1150 | /// 1151 | /// # Arguments: 1152 | /// 1153 | /// * `qid`: queue id 1154 | /// * `tid`: tid of the queue's pthread context 1155 | /// * `pthread_id`: pthread handle for setting affinity 1156 | /// 1157 | /// Note: this method has to be called in queue daemon context 1158 | pub fn configure_queue(&self, dev: &UblkDev, qid: u16, tid: i32) -> Result { 1159 | let mut ctrl = self.get_inner_mut(); 1160 | 1161 | ctrl.store_queue_tid(qid, tid); 1162 | 1163 | ctrl.nr_queues_configured += 1; 1164 | 1165 | if ctrl.nr_queues_configured == ctrl.dev_info.nr_hw_queues { 1166 | ctrl.build_json(dev)?; 1167 | } 1168 | 1169 | Ok(0) 1170 | } 1171 | 1172 | /// Dump this device info 1173 | /// 1174 | /// The 1st part is from UblkCtrl.dev_info, and the 2nd part is 1175 | /// retrieved from device's exported json file 1176 | pub fn dump(&self) { 1177 | let mut ctrl = self.get_inner_mut(); 1178 | let mut p = sys::ublk_params { 1179 | ..Default::default() 1180 | }; 1181 | 1182 | if ctrl.read_dev_info().is_err() { 1183 | error!("Dump dev {} failed\n", ctrl.dev_info.dev_id); 1184 | return; 1185 | } 1186 | 1187 | if ctrl.get_params(&mut p).is_err() { 1188 | error!("Dump dev {} failed\n", ctrl.dev_info.dev_id); 1189 | return; 1190 | } 1191 | 1192 | let info = &ctrl.dev_info; 1193 | println!( 1194 | "\ndev id {}: nr_hw_queues {} queue_depth {} block size {} dev_capacity {}", 1195 | info.dev_id, 1196 | info.nr_hw_queues, 1197 | info.queue_depth, 1198 | 1 << p.basic.logical_bs_shift, 1199 | p.basic.dev_sectors 1200 | ); 1201 | println!( 1202 | "\tmax rq size {} daemon pid {} flags 0x{:x} state {}", 1203 | info.max_io_buf_bytes, 1204 | info.ublksrv_pid, 1205 | info.flags, 1206 | ctrl.dev_state_desc() 1207 | ); 1208 | println!( 1209 | "\tublkc: {}:{} ublkb: {}:{} owner: {}:{}", 1210 | p.devt.char_major, 1211 | p.devt.char_minor, 1212 | p.devt.disk_major, 1213 | p.devt.disk_minor, 1214 | info.owner_uid, 1215 | info.owner_gid 1216 | ); 1217 | 1218 | ctrl.dump_from_json(); 1219 | } 1220 | 1221 | pub fn run_dir() -> String { 1222 | String::from("/run/ublksrvd") 1223 | } 1224 | 1225 | /// Returned path of this device's exported json file 1226 | /// 1227 | pub fn run_path(&self) -> String { 1228 | self.get_inner().run_path() 1229 | } 1230 | 1231 | /// Retrieving supported UBLK FEATURES from ublk driver 1232 | /// 1233 | /// Supported since linux kernel v6.5 1234 | pub fn get_features() -> Option { 1235 | match Self::new(None, -1, 0, 0, 0, 0, 0, UblkFlags::empty()) { 1236 | Ok(ctrl) => ctrl.get_driver_features(), 1237 | _ => None, 1238 | } 1239 | } 1240 | 1241 | /// Retrieving device info from ublk driver 1242 | /// 1243 | pub fn read_dev_info(&self) -> Result { 1244 | self.get_inner_mut().read_dev_info() 1245 | } 1246 | 1247 | /// Retrieve this device's parameter from ublk driver by 1248 | /// sending command 1249 | /// 1250 | /// Can't pass params by reference(&mut), why? 1251 | pub fn get_params(&self, params: &mut sys::ublk_params) -> Result { 1252 | self.get_inner_mut().get_params(params) 1253 | } 1254 | 1255 | /// Send this device's parameter to ublk driver 1256 | /// 1257 | /// Note: device parameter has to send to driver before starting 1258 | /// this device 1259 | pub fn set_params(&self, params: &sys::ublk_params) -> Result { 1260 | self.get_inner_mut().set_params(params) 1261 | } 1262 | 1263 | /// Retrieving the specified queue's affinity from ublk driver 1264 | /// 1265 | pub fn get_queue_affinity(&self, q: u32, bm: &mut UblkQueueAffinity) -> Result { 1266 | self.get_inner_mut().get_queue_affinity(q, bm) 1267 | } 1268 | 1269 | /// Start user recover for this device 1270 | /// 1271 | pub fn start_user_recover(&self) -> Result { 1272 | let mut count = 0u32; 1273 | let unit = 100_u32; 1274 | 1275 | loop { 1276 | let res = self.get_inner_mut().__start_user_recover(); 1277 | if let Ok(r) = res { 1278 | if r == -libc::EBUSY { 1279 | std::thread::sleep(std::time::Duration::from_millis(unit as u64)); 1280 | count += unit; 1281 | if count < 30000 { 1282 | continue; 1283 | } 1284 | } 1285 | } 1286 | return res; 1287 | } 1288 | } 1289 | 1290 | /// Start ublk device 1291 | /// 1292 | /// # Arguments: 1293 | /// 1294 | /// * `dev`: ublk device 1295 | /// 1296 | /// Send parameter to driver, and flush json to storage, finally 1297 | /// send START command 1298 | /// 1299 | pub fn start_dev(&self, dev: &UblkDev) -> Result { 1300 | let mut ctrl = self.get_inner_mut(); 1301 | ctrl.prep_start_dev(dev)?; 1302 | 1303 | if ctrl.dev_info.state != sys::UBLK_S_DEV_QUIESCED as u16 { 1304 | ctrl.start(unsafe { libc::getpid() as i32 }) 1305 | } else if ctrl.for_recover_dev() { 1306 | ctrl.end_user_recover(unsafe { libc::getpid() as i32 }) 1307 | } else { 1308 | Err(crate::UblkError::OtherError(-libc::EINVAL)) 1309 | } 1310 | } 1311 | 1312 | /// Start ublk device in async/.await 1313 | /// 1314 | /// # Arguments: 1315 | /// 1316 | /// * `dev`: ublk device 1317 | /// 1318 | /// Send parameter to driver, and flush json to storage, finally 1319 | /// send START command 1320 | /// 1321 | pub async fn start_dev_async(&self, dev: &UblkDev) -> Result { 1322 | let mut ctrl = self.get_inner_mut(); 1323 | ctrl.prep_start_dev(dev)?; 1324 | 1325 | if ctrl.dev_info.state != sys::UBLK_S_DEV_QUIESCED as u16 { 1326 | ctrl.start_async(unsafe { libc::getpid() as i32 }).await 1327 | } else if ctrl.for_recover_dev() { 1328 | ctrl.end_user_recover_async(unsafe { libc::getpid() as i32 }) 1329 | .await 1330 | } else { 1331 | Err(crate::UblkError::OtherError(-libc::EINVAL)) 1332 | } 1333 | } 1334 | 1335 | /// Stop ublk device 1336 | /// 1337 | /// Remove json export, and send stop command to control device 1338 | /// 1339 | pub fn stop_dev(&self) -> Result { 1340 | let mut ctrl = self.get_inner_mut(); 1341 | let rp = ctrl.run_path(); 1342 | 1343 | if ctrl.for_add_dev() && Path::new(&rp).exists() { 1344 | fs::remove_file(rp)?; 1345 | } 1346 | ctrl.stop() 1347 | } 1348 | 1349 | /// Kill this device 1350 | /// 1351 | /// Preferred method for target code to stop & delete device, 1352 | /// which is safe and can avoid deadlock. 1353 | /// 1354 | /// But device may not be really removed yet, and the device ID 1355 | /// can still be in-use after kill_dev() returns. 1356 | /// 1357 | pub fn kill_dev(&self) -> Result { 1358 | self.get_inner_mut().stop() 1359 | } 1360 | 1361 | /// Remove this device and its exported json file 1362 | /// 1363 | /// Called when the user wants to remove one device really 1364 | /// 1365 | /// Be careful, this interface may cause deadlock if the 1366 | /// for-add control device is live, and it is always safe 1367 | /// to kill device via .kill_dev(). 1368 | /// 1369 | pub fn del_dev(&self) -> Result { 1370 | let mut ctrl = self.get_inner_mut(); 1371 | 1372 | ctrl.del()?; 1373 | if Path::new(&ctrl.run_path()).exists() { 1374 | fs::remove_file(ctrl.run_path())?; 1375 | } 1376 | Ok(0) 1377 | } 1378 | 1379 | /// Remove this device and its exported json file in async 1380 | /// way 1381 | pub fn del_dev_async(&self) -> Result { 1382 | let mut ctrl = self.get_inner_mut(); 1383 | 1384 | ctrl.del_async()?; 1385 | if Path::new(&ctrl.run_path()).exists() { 1386 | fs::remove_file(ctrl.run_path())?; 1387 | } 1388 | Ok(0) 1389 | } 1390 | 1391 | fn create_queue_handlers( 1392 | &self, 1393 | dev: &Arc, 1394 | q_fn: Q, 1395 | ) -> Vec> 1396 | where 1397 | Q: FnOnce(u16, &UblkDev) + Send + Sync + Clone + 'static, 1398 | { 1399 | use std::sync::mpsc; 1400 | 1401 | let mut q_threads = Vec::new(); 1402 | let nr_queues = dev.dev_info.nr_hw_queues; 1403 | 1404 | let (tx, rx) = mpsc::channel(); 1405 | 1406 | for q in 0..nr_queues { 1407 | let _dev = Arc::clone(dev); 1408 | let _tx = tx.clone(); 1409 | 1410 | let mut affinity = UblkQueueAffinity::new(); 1411 | self.get_queue_affinity(q as u32, &mut affinity).unwrap(); 1412 | let mut _q_fn = q_fn.clone(); 1413 | 1414 | q_threads.push(std::thread::spawn(move || { 1415 | //setup pthread affinity first, so that any allocation may 1416 | //be affine to cpu/memory 1417 | unsafe { 1418 | libc::pthread_setaffinity_np( 1419 | libc::pthread_self(), 1420 | affinity.buf_len(), 1421 | affinity.addr() as *const libc::cpu_set_t, 1422 | ); 1423 | } 1424 | _tx.send((q, unsafe { libc::gettid() })).unwrap(); 1425 | 1426 | unsafe { 1427 | const PR_SET_IO_FLUSHER: i32 = 57; //include/uapi/linux/prctl.h 1428 | libc::prctl(PR_SET_IO_FLUSHER, 0, 0, 0, 0); 1429 | }; 1430 | 1431 | _q_fn(q, &_dev); 1432 | })); 1433 | } 1434 | 1435 | for _q in 0..nr_queues { 1436 | let (qid, tid) = rx.recv().unwrap(); 1437 | if self.configure_queue(dev, qid, tid).is_err() { 1438 | println!( 1439 | "create_queue_handler: configure queue failed for {}-{}", 1440 | dev.dev_info.dev_id, qid 1441 | ); 1442 | } 1443 | } 1444 | 1445 | q_threads 1446 | } 1447 | 1448 | /// Run ublk daemon and kick off the ublk device, and `/dev/ublkbN` will be 1449 | /// created and exposed to userspace. 1450 | /// 1451 | /// # Arguments: 1452 | /// 1453 | /// * `tgt_fn`: target initialization handler 1454 | /// * `q_fn`: queue handler for setting up the queue and its handler, 1455 | /// all IO logical is implemented in queue handler 1456 | /// * `device_fn`: called after device is started, run in current 1457 | /// context 1458 | /// 1459 | /// This one is the preferred interface for creating ublk daemon, and 1460 | /// is friendly for user, such as, user can customize queue setup and 1461 | /// io handler, such as setup async/await for handling io command. 1462 | pub fn run_target(&self, tgt_fn: T, q_fn: Q, device_fn: W) -> Result 1463 | where 1464 | T: FnOnce(&mut UblkDev) -> Result<(), UblkError>, 1465 | Q: FnOnce(u16, &UblkDev) + Send + Sync + Clone + 'static, 1466 | W: FnOnce(&UblkCtrl) + Send + Sync + 'static, 1467 | { 1468 | let dev = &Arc::new(UblkDev::new(self.get_name(), tgt_fn, self)?); 1469 | let handles = self.create_queue_handlers(dev, q_fn); 1470 | 1471 | self.start_dev(dev)?; 1472 | 1473 | device_fn(self); 1474 | 1475 | for qh in handles { 1476 | qh.join().unwrap_or_else(|_| { 1477 | eprintln!("dev-{} join queue thread failed", dev.dev_info.dev_id) 1478 | }); 1479 | } 1480 | 1481 | //device may be deleted from another context, so it is normal 1482 | //to see -ENOENT failure here 1483 | let _ = self.stop_dev(); 1484 | 1485 | Ok(0) 1486 | } 1487 | 1488 | /// Iterator over each ublk device ID 1489 | pub fn for_each_dev_id(ops: T) 1490 | where 1491 | T: Fn(u32) + Clone + 'static, 1492 | { 1493 | if let Ok(entries) = std::fs::read_dir(UblkCtrl::run_dir()) { 1494 | for entry in entries.flatten() { 1495 | let f = entry.path(); 1496 | if f.is_file() { 1497 | if let Some(file_stem) = f.file_stem() { 1498 | if let Some(stem) = file_stem.to_str() { 1499 | if let Ok(num) = stem.parse::() { 1500 | ops(num); 1501 | } 1502 | } 1503 | } 1504 | } 1505 | } 1506 | } 1507 | } 1508 | } 1509 | 1510 | #[cfg(test)] 1511 | mod tests { 1512 | use crate::ctrl::UblkCtrlBuilder; 1513 | use crate::io::{UblkDev, UblkIOCtx, UblkQueue}; 1514 | use crate::UblkError; 1515 | use crate::{ctrl::UblkCtrl, UblkFlags, UblkIORes}; 1516 | use std::cell::Cell; 1517 | use std::path::Path; 1518 | use std::rc::Rc; 1519 | 1520 | #[test] 1521 | fn test_ublk_get_features() { 1522 | match UblkCtrl::get_features() { 1523 | Some(f) => eprintln!("features is {:04x}", f), 1524 | None => eprintln!("not support GET_FEATURES, require linux v6.5"), 1525 | } 1526 | } 1527 | 1528 | fn __test_add_ctrl_dev(del_async: bool) { 1529 | let ctrl = UblkCtrl::new( 1530 | None, 1531 | -1, 1532 | 1, 1533 | 64, 1534 | 512_u32 * 1024, 1535 | 0, 1536 | 0, 1537 | if del_async { 1538 | UblkFlags::UBLK_DEV_F_DEL_DEV_ASYNC 1539 | } else { 1540 | UblkFlags::empty() 1541 | } | UblkFlags::UBLK_DEV_F_ADD_DEV, 1542 | ) 1543 | .unwrap(); 1544 | let dev_path = ctrl.get_cdev_path(); 1545 | 1546 | std::thread::sleep(std::time::Duration::from_millis(500)); 1547 | assert!(Path::new(&dev_path).exists() == true); 1548 | } 1549 | #[test] 1550 | fn test_add_ctrl_dev_del_sync() { 1551 | __test_add_ctrl_dev(false); 1552 | } 1553 | 1554 | #[test] 1555 | fn test_add_ctrl_dev_del_async() { 1556 | __test_add_ctrl_dev(true); 1557 | } 1558 | 1559 | #[test] 1560 | fn test_add_ctrl_dev_del_async2() { 1561 | let ctrl = UblkCtrl::new( 1562 | None, 1563 | -1, 1564 | 1, 1565 | 64, 1566 | 512_u32 * 1024, 1567 | 0, 1568 | 0, 1569 | UblkFlags::UBLK_DEV_F_ADD_DEV, 1570 | ) 1571 | .unwrap(); 1572 | 1573 | match ctrl.del_dev_async() { 1574 | Ok(_res) => {} 1575 | Err(UblkError::UringIOError(res)) => { 1576 | /* -ENOSUPP */ 1577 | assert!(res == -524 || res == -libc::EOPNOTSUPP); 1578 | } 1579 | _ => assert!(false), 1580 | } 1581 | } 1582 | 1583 | /// minimized unprivileged ublk test, may just run in root privilege 1584 | #[test] 1585 | fn test_add_un_privileted_ublk() { 1586 | let ctrl = UblkCtrl::new( 1587 | None, 1588 | -1, 1589 | 1, 1590 | 64, 1591 | 512_u32 * 1024, 1592 | 0, 1593 | crate::sys::UBLK_F_UNPRIVILEGED_DEV as u64, 1594 | UblkFlags::UBLK_DEV_F_ADD_DEV, 1595 | ) 1596 | .unwrap(); 1597 | let dev_path = ctrl.get_cdev_path(); 1598 | 1599 | std::thread::sleep(std::time::Duration::from_millis(500)); 1600 | assert!(Path::new(&dev_path).exists() == true); 1601 | } 1602 | 1603 | #[test] 1604 | fn test_ublk_target_json() { 1605 | let ctrl = UblkCtrlBuilder::default() 1606 | .name("null") 1607 | .ctrl_target_flags(0xbeef as u64) 1608 | .dev_flags(UblkFlags::UBLK_DEV_F_ADD_DEV) 1609 | .build() 1610 | .unwrap(); 1611 | 1612 | let tgt_init = |dev: &mut UblkDev| { 1613 | dev.set_default_params(250_u64 << 30); 1614 | dev.set_target_json(serde_json::json!({"null": "test_data" })); 1615 | Ok(()) 1616 | }; 1617 | let dev = UblkDev::new(ctrl.get_name(), tgt_init, &ctrl).unwrap(); 1618 | 1619 | //not built & flushed out yet 1620 | assert!(ctrl.get_target_data_from_json().is_none()); 1621 | assert!(dev.get_target_json().is_some()); 1622 | assert!(dev.dev_info.ublksrv_flags == 0xbeef as u64); 1623 | assert!(ctrl.dev_info().ublksrv_flags == 0xbeef as u64); 1624 | } 1625 | 1626 | fn __test_ublk_session(w_fn: T) -> String 1627 | where 1628 | T: Fn(&UblkCtrl) + Send + Sync + Clone + 'static, 1629 | { 1630 | let ctrl = UblkCtrlBuilder::default() 1631 | .name("null") 1632 | .depth(16_u16) 1633 | .nr_queues(2_u16) 1634 | .dev_flags(UblkFlags::UBLK_DEV_F_ADD_DEV) 1635 | .build() 1636 | .unwrap(); 1637 | 1638 | let tgt_init = |dev: &mut UblkDev| { 1639 | dev.set_default_params(250_u64 << 30); 1640 | dev.set_target_json(serde_json::json!({"null": "test_data" })); 1641 | Ok(()) 1642 | }; 1643 | let q_fn = move |qid: u16, dev: &UblkDev| { 1644 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 1645 | let bufs = bufs_rc.clone(); 1646 | 1647 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 1648 | let iod = q.get_iod(tag); 1649 | let bytes = (iod.nr_sectors << 9) as i32; 1650 | let bufs = bufs_rc.clone(); 1651 | let buf_addr = bufs[tag as usize].as_mut_ptr(); 1652 | 1653 | q.complete_io_cmd(tag, buf_addr, Ok(UblkIORes::Result(bytes))); 1654 | }; 1655 | 1656 | UblkQueue::new(qid, dev) 1657 | .unwrap() 1658 | .regiser_io_bufs(Some(&bufs)) 1659 | .submit_fetch_commands(Some(&bufs)) 1660 | .wait_and_handle_io(io_handler); 1661 | }; 1662 | 1663 | ctrl.run_target(tgt_init, q_fn, move |ctrl: &UblkCtrl| { 1664 | w_fn(ctrl); 1665 | }) 1666 | .unwrap(); 1667 | 1668 | // could be too strict because of udev 1669 | let bdev = ctrl.get_bdev_path(); 1670 | assert!(Path::new(&bdev).exists() == false); 1671 | 1672 | let cpath = ctrl.get_cdev_path(); 1673 | 1674 | cpath 1675 | } 1676 | 1677 | /// Covers basic ublk device creation and destroying by UblkSession 1678 | /// APIs 1679 | #[test] 1680 | fn test_ublk_session() { 1681 | let cdev = __test_ublk_session(|ctrl: &UblkCtrl| { 1682 | assert!(ctrl.get_target_data_from_json().is_some()); 1683 | ctrl.kill_dev().unwrap(); 1684 | }); 1685 | 1686 | // could be too strict because of udev 1687 | assert!(Path::new(&cdev).exists() == false); 1688 | } 1689 | /// test for_each_dev_id 1690 | #[test] 1691 | fn test_ublk_for_each_dev_id() { 1692 | // Create one ublk device 1693 | let handle = std::thread::spawn(|| { 1694 | let cdev = __test_ublk_session(|ctrl: &UblkCtrl| { 1695 | std::thread::sleep(std::time::Duration::from_millis(1000)); 1696 | ctrl.kill_dev().unwrap(); 1697 | }); 1698 | // could be too strict because of udev 1699 | assert!(Path::new(&cdev).exists() == false); 1700 | }); 1701 | 1702 | std::thread::sleep(std::time::Duration::from_millis(400)); 1703 | let cnt_arc = Rc::new(Cell::new(0)); 1704 | let cnt = cnt_arc.clone(); 1705 | 1706 | //count all existed ublk devices 1707 | UblkCtrl::for_each_dev_id(move |dev_id| { 1708 | let ctrl = UblkCtrl::new_simple(dev_id as i32).unwrap(); 1709 | cnt.set(cnt.get() + 1); 1710 | 1711 | let dev_path = ctrl.get_cdev_path(); 1712 | assert!(Path::new(&dev_path).exists() == true); 1713 | }); 1714 | 1715 | // we created one 1716 | assert!(cnt_arc.get() > 0); 1717 | 1718 | handle.join().unwrap(); 1719 | } 1720 | } 1721 | -------------------------------------------------------------------------------- /src/helpers.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Deref, DerefMut}; 2 | 3 | pub fn type_of_this(_: &T) -> String { 4 | std::any::type_name::().to_string() 5 | } 6 | 7 | /// Slice like buffer, which address is aligned with 4096. 8 | /// 9 | pub struct IoBuf { 10 | ptr: *mut T, 11 | size: usize, 12 | } 13 | 14 | // Users of IoBuf has to deal with Send & Sync 15 | unsafe impl Send for IoBuf {} 16 | unsafe impl Sync for IoBuf {} 17 | 18 | impl IoBuf { 19 | pub fn new(size: usize) -> Self { 20 | let layout = std::alloc::Layout::from_size_align(size, 4096).unwrap(); 21 | let ptr = unsafe { std::alloc::alloc(layout) } as *mut T; 22 | 23 | assert!(size != 0); 24 | 25 | IoBuf { ptr, size } 26 | } 27 | 28 | /// how many elements in this buffer 29 | #[allow(clippy::len_without_is_empty)] 30 | pub fn len(&self) -> usize { 31 | let elem_size = core::mem::size_of::(); 32 | self.size / elem_size 33 | } 34 | 35 | /// Return raw address of this buffer 36 | pub fn as_ptr(&self) -> *const T { 37 | self.ptr 38 | } 39 | 40 | /// Return mutable raw address of this buffer 41 | pub fn as_mut_ptr(&self) -> *mut T { 42 | self.ptr 43 | } 44 | 45 | /// fill zero for every bits of this buffer 46 | pub fn zero_buf(&mut self) { 47 | unsafe { 48 | std::ptr::write_bytes(self.as_mut_ptr(), 0, self.len()); 49 | } 50 | } 51 | } 52 | 53 | impl std::fmt::Debug for IoBuf { 54 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 55 | write!( 56 | f, 57 | "ptr {:?} size {} element type {}", 58 | self.ptr, 59 | self.size, 60 | type_of_this(unsafe { &*self.ptr }) 61 | ) 62 | } 63 | } 64 | 65 | /// Slice reference of this buffer 66 | impl Deref for IoBuf { 67 | type Target = [T]; 68 | fn deref(&self) -> &[T] { 69 | let elem_size = core::mem::size_of::(); 70 | unsafe { std::slice::from_raw_parts(self.ptr, self.size / elem_size) } 71 | } 72 | } 73 | 74 | /// Mutable slice reference of this buffer 75 | impl DerefMut for IoBuf { 76 | fn deref_mut(&mut self) -> &mut [T] { 77 | let elem_size = core::mem::size_of::(); 78 | unsafe { std::slice::from_raw_parts_mut(self.ptr, self.size / elem_size) } 79 | } 80 | } 81 | 82 | /// Free buffer with same alloc layout 83 | impl Drop for IoBuf { 84 | fn drop(&mut self) { 85 | let layout = std::alloc::Layout::from_size_align(self.size, 4096).unwrap(); 86 | unsafe { std::alloc::dealloc(self.ptr as *mut u8, layout) }; 87 | } 88 | } 89 | 90 | #[macro_export] 91 | macro_rules! zero_io_buf { 92 | ($buffer:expr) => {{ 93 | unsafe { 94 | std::ptr::write_bytes($buffer.as_mut_ptr(), 0, $buffer.len()); 95 | } 96 | }}; 97 | } 98 | -------------------------------------------------------------------------------- /src/io.rs: -------------------------------------------------------------------------------- 1 | use super::uring_async::UblkUringOpFuture; 2 | #[cfg(feature = "fat_complete")] 3 | use super::UblkFatRes; 4 | use super::{ctrl::UblkCtrl, sys, UblkError, UblkFlags, UblkIORes}; 5 | use crate::helpers::IoBuf; 6 | use io_uring::{cqueue, opcode, squeue, types, IoUring}; 7 | use serde::{Deserialize, Serialize}; 8 | use std::cell::RefCell; 9 | use std::fs; 10 | use std::os::unix::io::{AsRawFd, RawFd}; 11 | 12 | /// UblkIOCtx 13 | /// 14 | /// When any io_uring CQE is received, libublk lets the target code handle 15 | /// it by IO handling closure. This CQE may represents IO command from 16 | /// /dev/ublkbN, or plain io_uring IO submitted from ublk target code, still 17 | /// in the same IO handling closure. 18 | /// 19 | /// If target won't use io_uring to handle IO, eventfd needs to be sent from 20 | /// the real handler context to wakeup ublk queue/io_uring context for 21 | /// driving the machinery. Eventfd gets minimized support with 22 | /// `dev_flags::UBLK_DEV_F_COMP_BATCH`, and native & generic IO offloading will 23 | /// be added soon. 24 | /// 25 | /// UblkIOCtx & UblkQueue provide enough information for target code to 26 | /// handle this CQE and implement target IO handling logic. 27 | /// 28 | pub struct UblkIOCtx<'a>(&'a cqueue::Entry, u32); 29 | 30 | impl<'a> UblkIOCtx<'a> { 31 | const UBLK_IO_F_FIRST: u32 = 1u32 << 16; 32 | const UBLK_IO_F_LAST: u32 = 1u32 << 17; 33 | 34 | /// Return CQE's request of this IO, and used for handling target IO by 35 | /// io_uring. When the target IO is completed, its CQE is coming and we 36 | /// parse the IO result with result(). 37 | #[inline(always)] 38 | pub fn result(&self) -> i32 { 39 | self.0.result() 40 | } 41 | 42 | /// Get this IO's tag. 43 | /// 44 | /// tag is one core concept in libublk. 45 | /// 46 | /// Each IO command has its unique tag, which is in [0, depth), and the tag 47 | /// is originated from ublk driver actually. 48 | /// 49 | /// When target IO uses io_uring for handling IO, this tag should be inherited 50 | /// by passing `tag` via `Self::build_user_data()` 51 | #[inline(always)] 52 | pub fn get_tag(&self) -> u32 { 53 | UblkIOCtx::user_data_to_tag(self.0.user_data()) 54 | } 55 | 56 | /// Get this CQE's userdata 57 | /// 58 | #[inline(always)] 59 | pub fn user_data(&self) -> u64 { 60 | self.0.user_data() 61 | } 62 | 63 | /// Return false if it is one IO command from ublk driver, otherwise 64 | /// it is one target IO submitted from IO closure 65 | #[inline(always)] 66 | pub fn is_tgt_io(&self) -> bool { 67 | Self::is_target_io(self.0.user_data()) 68 | } 69 | 70 | /// if this IO represented by CQE is the last one in current batch 71 | #[inline(always)] 72 | pub fn is_last_cqe(&self) -> bool { 73 | (self.1 & Self::UBLK_IO_F_LAST) != 0 74 | } 75 | 76 | /// if this IO represented by CQE is the first one in current batch 77 | #[inline(always)] 78 | pub fn is_first_cqe(&self) -> bool { 79 | (self.1 & Self::UBLK_IO_F_FIRST) != 0 80 | } 81 | 82 | /// Build offset for read from or write to per-io-cmd buffer 83 | /// 84 | /// # Arguments: 85 | /// 86 | /// * `q_id`: queue id 87 | /// * `tag`: io command tag 88 | /// * `offset`: offset to this io-cmd buffer 89 | /// 90 | /// The built offset is passed to pread() or pwrite() on device of 91 | /// /dev/ublkcN for reading data from io command buffer, or writing 92 | /// data to io command buffer. 93 | /// 94 | /// Available if UBLK_F_USER_COPY is enabled. 95 | /// 96 | #[inline(always)] 97 | #[allow(arithmetic_overflow)] 98 | pub fn ublk_user_copy_pos(q_id: u16, tag: u16, offset: u32) -> u64 { 99 | assert!((offset & !sys::UBLK_IO_BUF_BITS_MASK) == 0); 100 | 101 | sys::UBLKSRV_IO_BUF_OFFSET as u64 102 | + ((((q_id as u64) << sys::UBLK_QID_OFF) as u64) 103 | | ((tag as u64) << sys::UBLK_TAG_OFF) as u64 104 | | offset as u64) 105 | } 106 | 107 | /// Build userdata for submitting io via io_uring 108 | /// 109 | /// # Arguments: 110 | /// 111 | /// * `tag`: io tag, length is 16bit 112 | /// * `op`: io operation code, length is 8bit 113 | /// * `tgt_data`: target specific data, at most 39bit (64 - 16 - 8 - 1) 114 | /// * `is_target_io`: if this userdata is for handling target io, false if 115 | /// if it is only for ublk io command 116 | /// 117 | /// The built userdata is passed to io_uring for parsing io result 118 | /// 119 | #[inline(always)] 120 | #[allow(arithmetic_overflow)] 121 | pub fn build_user_data(tag: u16, op: u32, tgt_data: u32, is_target_io: bool) -> u64 { 122 | assert!((tgt_data >> 16) == 0); 123 | 124 | let op = op & 0xff; 125 | tag as u64 | (op << 16) as u64 | (tgt_data << 24) as u64 | ((is_target_io as u64) << 63) 126 | } 127 | 128 | /// Build userdata for async io_uring OP 129 | /// 130 | /// # Arguments: 131 | /// * `tag`: io tag, length is 16bit 132 | /// * `op`: io operation code, length is 8bit 133 | /// * `op_id`: unique id in io task 134 | /// 135 | /// The built userdata has to be unique in this io task, so that 136 | /// our executor can figure out the exact submitted OP with 137 | /// completed cqe 138 | #[inline(always)] 139 | pub fn build_user_data_async(tag: u16, op: u32, op_id: u32) -> u64 { 140 | Self::build_user_data(tag, op, op_id, true) 141 | } 142 | 143 | /// Extract tag from userdata 144 | #[inline(always)] 145 | pub fn user_data_to_tag(user_data: u64) -> u32 { 146 | (user_data & 0xffff) as u32 147 | } 148 | 149 | /// Extract operation code from userdata 150 | #[inline(always)] 151 | pub fn user_data_to_op(user_data: u64) -> u32 { 152 | ((user_data >> 16) & 0xff) as u32 153 | } 154 | 155 | /// Check if this userdata is from target IO 156 | #[inline(always)] 157 | fn is_target_io(user_data: u64) -> bool { 158 | (user_data & (1_u64 << 63)) != 0 159 | } 160 | 161 | /// Check if this userdata is from IO command which is from 162 | /// ublk driver 163 | #[inline(always)] 164 | fn is_io_command(user_data: u64) -> bool { 165 | (user_data & (1_u64 << 63)) == 0 166 | } 167 | } 168 | 169 | #[derive(Debug, Clone, Default, Serialize, Deserialize)] 170 | pub struct UblkTgt { 171 | /// target type 172 | pub tgt_type: String, 173 | 174 | /// target device size, will be the actual size of /dev/ublkbN 175 | pub dev_size: u64, 176 | 177 | /// target specific io_ring flags, default is 0 178 | pub ring_flags: u64, 179 | 180 | /// uring SQ depth, default is queue depth 181 | pub sq_depth: u16, 182 | 183 | /// uring CQ depth, default is queue depth 184 | pub cq_depth: u16, 185 | 186 | /// extra io slots, usually for meta data handling or eventfd, 187 | /// default is 0 188 | pub extra_ios: u16, 189 | 190 | //const struct ublk_tgt_ops *ops; 191 | pub fds: [i32; 32], 192 | pub nr_fds: i32, 193 | 194 | /// could become bigger, is it one issue? 195 | pub params: sys::ublk_params, 196 | } 197 | 198 | /// For supporting ublk device IO path, and one thin layer of device 199 | /// abstract in handling IO level. Ublk device supports multiple queue(MQ), 200 | /// and each queue has its IO depth. 201 | /// 202 | /// The `tgt` field provides target code for customizing ublk device, such 203 | /// as defining target specific parameters, exporting its own json output, 204 | /// and so on. 205 | pub struct UblkDev { 206 | pub dev_info: sys::ublksrv_ctrl_dev_info, 207 | 208 | /// reserved for supporting new features 209 | pub flags: UblkFlags, 210 | 211 | //fds[0] points to /dev/ublkcN 212 | cdev_file: fs::File, 213 | 214 | pub tgt: UblkTgt, 215 | tgt_json: Option, 216 | } 217 | 218 | unsafe impl Send for UblkDev {} 219 | unsafe impl Sync for UblkDev {} 220 | 221 | impl UblkDev { 222 | /// New one ublk device 223 | /// 224 | /// # Arguments: 225 | /// 226 | /// * `ops`: target operation functions 227 | /// * `ctrl`: control device reference 228 | /// * `tgt_type`: target type, such as 'loop', 'null', ... 229 | /// 230 | /// ublk device is abstraction for target, and prepare for setting 231 | /// up target. Any target private data can be defined in the data 232 | /// structure which implements UblkTgtImpl. 233 | pub fn new(tgt_name: String, ops: F, ctrl: &UblkCtrl) -> Result 234 | where 235 | F: FnOnce(&mut UblkDev) -> Result<(), UblkError>, 236 | { 237 | let info = ctrl.dev_info(); 238 | let mut tgt = UblkTgt { 239 | tgt_type: tgt_name, 240 | sq_depth: info.queue_depth, 241 | cq_depth: info.queue_depth, 242 | fds: [0_i32; 32], 243 | ring_flags: 0, 244 | ..Default::default() 245 | }; 246 | let mut cnt = 0; 247 | let cdev_path = ctrl.get_cdev_path(); 248 | 249 | // ublk char device setup(udev event handling, ...) may not be done 250 | // successfully, so wait a while. And the timeout is set as 3sec now. 251 | let cdev_file = loop { 252 | let f_result = fs::OpenOptions::new() 253 | .read(true) 254 | .write(true) 255 | .open(&cdev_path); 256 | 257 | if let Ok(f) = f_result { 258 | break f; 259 | } 260 | 261 | cnt += 1; 262 | std::thread::sleep(std::time::Duration::from_millis(10)); 263 | if cnt >= 300 { 264 | return Err(UblkError::OtherError(-libc::EACCES)); 265 | } 266 | }; 267 | 268 | tgt.fds[0] = cdev_file.as_raw_fd(); 269 | tgt.nr_fds = 1; 270 | 271 | let mut dev = UblkDev { 272 | dev_info: info, 273 | cdev_file, 274 | tgt, 275 | flags: ctrl.get_dev_flags(), 276 | tgt_json: None, 277 | }; 278 | 279 | ops(&mut dev)?; 280 | log::info!("dev {} initialized", dev.dev_info.dev_id); 281 | 282 | Ok(dev) 283 | } 284 | 285 | //private method for drop 286 | fn deinit_cdev(&mut self) { 287 | let id = self.dev_info.dev_id; 288 | 289 | log::info!("dev {} deinitialized", id); 290 | } 291 | 292 | /// Allocate IoBufs for one queue 293 | pub fn alloc_queue_io_bufs(&self) -> Vec> { 294 | let depth = self.dev_info.queue_depth; 295 | let bytes = self.dev_info.max_io_buf_bytes as usize; 296 | let mut bvec = Vec::with_capacity(depth as usize); 297 | 298 | for _ in 0..depth { 299 | bvec.push(IoBuf::::new(bytes)); 300 | } 301 | 302 | bvec 303 | } 304 | 305 | pub fn set_default_params(&mut self, dev_size: u64) { 306 | let info = self.dev_info; 307 | 308 | self.tgt.dev_size = dev_size; 309 | self.tgt.params = super::sys::ublk_params { 310 | types: super::sys::UBLK_PARAM_TYPE_BASIC, 311 | basic: super::sys::ublk_param_basic { 312 | attrs: super::sys::UBLK_ATTR_VOLATILE_CACHE, 313 | logical_bs_shift: 9, 314 | physical_bs_shift: 12, 315 | io_opt_shift: 12, 316 | io_min_shift: 12, 317 | max_sectors: info.max_io_buf_bytes >> 9, 318 | dev_sectors: dev_size >> 9, 319 | ..Default::default() 320 | }, 321 | ..Default::default() 322 | }; 323 | } 324 | 325 | // Store target specific json data, json["target_data"] 326 | pub fn set_target_json(&mut self, val: serde_json::Value) { 327 | self.tgt_json = Some(val); 328 | } 329 | 330 | // Retrieve target specific json data 331 | pub fn get_target_json(&self) -> Option<&serde_json::Value> { 332 | match self.tgt_json.as_ref() { 333 | None => None, 334 | Some(val) => Some(val), 335 | } 336 | } 337 | 338 | /// Return how many io slots, which is usually same with executor's 339 | /// nr_tasks. 340 | #[inline] 341 | pub fn get_nr_ios(&self) -> u16 { 342 | self.dev_info.queue_depth + self.tgt.extra_ios 343 | } 344 | } 345 | 346 | impl Drop for UblkDev { 347 | fn drop(&mut self) { 348 | self.deinit_cdev(); 349 | } 350 | } 351 | 352 | #[derive(Debug, Clone, Default)] 353 | struct UblkQueueState { 354 | cmd_inflight: u32, 355 | state: u32, 356 | } 357 | 358 | impl UblkQueueState { 359 | const UBLK_QUEUE_STOPPING: u32 = 1_u32 << 0; 360 | const UBLK_QUEUE_IDLE: u32 = 1_u32 << 1; 361 | 362 | #[inline(always)] 363 | fn queue_is_quiesced(&self) -> bool { 364 | self.cmd_inflight == 0 365 | } 366 | 367 | #[inline(always)] 368 | fn queue_is_done(&self) -> bool { 369 | self.is_stopping() && self.queue_is_quiesced() 370 | } 371 | 372 | #[inline(always)] 373 | fn get_nr_cmd_inflight(&self) -> u32 { 374 | self.cmd_inflight 375 | } 376 | 377 | #[inline(always)] 378 | fn is_stopping(&self) -> bool { 379 | (self.state & Self::UBLK_QUEUE_STOPPING) != 0 380 | } 381 | 382 | #[inline(always)] 383 | fn is_idle(&self) -> bool { 384 | (self.state & Self::UBLK_QUEUE_IDLE) != 0 385 | } 386 | 387 | #[inline(always)] 388 | fn inc_cmd_inflight(&mut self) { 389 | self.cmd_inflight += 1; 390 | } 391 | 392 | #[inline(always)] 393 | fn dec_cmd_inflight(&mut self) { 394 | self.cmd_inflight -= 1; 395 | } 396 | 397 | fn mark_stopping(&mut self) { 398 | self.state |= Self::UBLK_QUEUE_STOPPING; 399 | } 400 | 401 | fn set_idle(&mut self, val: bool) { 402 | if val { 403 | self.state |= Self::UBLK_QUEUE_IDLE; 404 | } else { 405 | self.state &= !Self::UBLK_QUEUE_IDLE; 406 | } 407 | } 408 | } 409 | 410 | /// UBLK queue abstraction 411 | /// 412 | /// UblkQueue is the core part of the whole stack, which communicates with 413 | /// ublk driver via `io_uring cmd`. When any io command representing one 414 | /// block IO request originating from /dev/ublkbN comes, one uring_cmd CQE 415 | /// is received in ublk userspace side. Basically the whole stack is driven 416 | /// by io_uring CQE(uring_cmd or plain io_uring IO submitted from target 417 | /// code). Here target means the specific ublk device implementation, such 418 | /// as ublk-loop, ublk-zoned, ublk-nbd, ublk-qcow2, ... 419 | /// 420 | /// So far, each queue is handled by one its own io_uring. 421 | /// 422 | pub struct UblkQueue<'a> { 423 | flags: UblkFlags, 424 | q_id: u16, 425 | q_depth: u32, 426 | io_cmd_buf: u64, 427 | //ops: Box, 428 | pub dev: &'a UblkDev, 429 | bufs: RefCell>, 430 | state: RefCell, 431 | 432 | // call uring_op() and uring_op_mut() for manipulating 433 | // q_ring, and in future it is likely to change to 434 | // thread_local variable 435 | pub(crate) q_ring: RefCell>, 436 | } 437 | 438 | impl AsRawFd for UblkQueue<'_> { 439 | fn as_raw_fd(&self) -> RawFd { 440 | self.q_ring.borrow().as_raw_fd() 441 | } 442 | } 443 | 444 | impl Drop for UblkQueue<'_> { 445 | fn drop(&mut self) { 446 | let dev = self.dev; 447 | log::trace!("dev {} queue {} dropped", dev.dev_info.dev_id, self.q_id); 448 | 449 | if let Err(r) = self.q_ring.borrow_mut().submitter().unregister_files() { 450 | log::error!("unregister fixed files failed {}", r); 451 | } 452 | 453 | let depth = dev.dev_info.queue_depth as u32; 454 | let cmd_buf_sz = UblkQueue::cmd_buf_sz(depth) as usize; 455 | 456 | //unmap, otherwise our cdev won't be released 457 | unsafe { 458 | libc::munmap(self.io_cmd_buf as *mut libc::c_void, cmd_buf_sz); 459 | } 460 | } 461 | } 462 | 463 | #[inline(always)] 464 | fn round_up(val: u32, rnd: u32) -> u32 { 465 | (val + rnd - 1) & !(rnd - 1) 466 | } 467 | 468 | impl UblkQueue<'_> { 469 | const UBLK_QUEUE_IDLE_SECS: u32 = 20; 470 | const UBLK_QUEUE_IOCTL_ENCODE: UblkFlags = UblkFlags::UBLK_DEV_F_INTERNAL_0; 471 | 472 | #[inline(always)] 473 | fn cmd_buf_sz(depth: u32) -> u32 { 474 | let size = depth * core::mem::size_of::() as u32; 475 | let page_sz = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as u32; 476 | 477 | round_up(size, page_sz) 478 | } 479 | 480 | #[inline(always)] 481 | fn is_ioctl_encode(&self) -> bool { 482 | self.flags.intersects(Self::UBLK_QUEUE_IOCTL_ENCODE) 483 | } 484 | 485 | /// New one ublk queue 486 | /// 487 | /// # Arguments: 488 | /// 489 | /// * `q_id`: queue id, [0, nr_queues) 490 | /// * `dev`: ublk device reference 491 | /// 492 | ///ublk queue is handling IO from driver, so far we use dedicated 493 | ///io_uring for handling both IO command and IO 494 | #[allow(clippy::uninit_vec)] 495 | pub fn new(q_id: u16, dev: &UblkDev) -> Result { 496 | let tgt = &dev.tgt; 497 | let sq_depth = tgt.sq_depth; 498 | let cq_depth = tgt.cq_depth; 499 | 500 | let ring = IoUring::::builder() 501 | .setup_cqsize(cq_depth as u32) 502 | .setup_coop_taskrun() 503 | .build(sq_depth as u32)?; 504 | 505 | //todo: apply io_uring flags from tgt.ring_flags 506 | 507 | let depth = dev.dev_info.queue_depth as u32; 508 | let cdev_fd = dev.cdev_file.as_raw_fd(); 509 | let cmd_buf_sz = UblkQueue::cmd_buf_sz(depth) as usize; 510 | let max_cmd_buf_sz = UblkQueue::cmd_buf_sz(sys::UBLK_MAX_QUEUE_DEPTH) as libc::off_t; 511 | 512 | ring.submitter() 513 | .register_files(&tgt.fds[0..tgt.nr_fds as usize])?; 514 | 515 | let off = 516 | sys::UBLKSRV_CMD_BUF_OFFSET as libc::off_t + (q_id as libc::off_t) * max_cmd_buf_sz; 517 | let io_cmd_buf = unsafe { 518 | libc::mmap( 519 | std::ptr::null_mut::(), 520 | cmd_buf_sz, 521 | libc::PROT_READ, 522 | libc::MAP_SHARED | libc::MAP_POPULATE, 523 | cdev_fd, 524 | off, 525 | ) 526 | }; 527 | if io_cmd_buf == libc::MAP_FAILED { 528 | return Err(UblkError::IOError(std::io::Error::last_os_error())); 529 | } 530 | 531 | let nr_ios = depth + tgt.extra_ios as u32; 532 | let mut bufs = Vec::<*mut u8>::with_capacity(nr_ios as usize); 533 | unsafe { 534 | bufs.set_len(nr_ios as usize); 535 | } 536 | 537 | for i in 0..nr_ios { 538 | bufs[i as usize] = std::ptr::null_mut(); 539 | } 540 | 541 | assert!(!dev.flags.intersects(Self::UBLK_QUEUE_IOCTL_ENCODE)); 542 | 543 | let q = UblkQueue { 544 | flags: dev.flags 545 | | if (dev.dev_info.flags & (sys::UBLK_F_CMD_IOCTL_ENCODE as u64)) != 0 { 546 | Self::UBLK_QUEUE_IOCTL_ENCODE 547 | } else { 548 | UblkFlags::empty() 549 | }, 550 | q_id, 551 | q_depth: depth, 552 | io_cmd_buf: io_cmd_buf as u64, 553 | dev, 554 | state: RefCell::new(UblkQueueState { 555 | cmd_inflight: 0, 556 | state: 0, 557 | }), 558 | q_ring: RefCell::new(ring), 559 | bufs: RefCell::new(bufs), 560 | }; 561 | 562 | log::info!("dev {} queue {} started", dev.dev_info.dev_id, q_id); 563 | 564 | Ok(q) 565 | } 566 | 567 | // Return if queue is idle 568 | pub fn is_idle(&self) -> bool { 569 | self.state.borrow().is_idle() 570 | } 571 | 572 | // Return if queue is stopping 573 | pub fn is_stopping(&self) -> bool { 574 | self.state.borrow().is_stopping() 575 | } 576 | 577 | // Manipulate immutable queue uring 578 | pub fn uring_op(&self, op_handler: H) -> Result 579 | where 580 | H: Fn(&IoUring) -> Result, 581 | { 582 | let uring = self.q_ring.borrow(); 583 | 584 | op_handler(&uring) 585 | } 586 | 587 | // Manipulate mutable queue uring 588 | pub fn uring_op_mut(&self, op_handler: H) -> Result 589 | where 590 | H: Fn(&mut IoUring) -> Result, 591 | { 592 | let mut uring = self.q_ring.borrow_mut(); 593 | 594 | op_handler(&mut uring) 595 | } 596 | 597 | /// Return queue depth 598 | /// 599 | /// Queue depth decides the max count of inflight io command 600 | #[inline(always)] 601 | pub fn get_depth(&self) -> u32 { 602 | self.q_depth 603 | } 604 | 605 | /// Return queue id 606 | /// 607 | /// Queue id is aligned with blk-mq's queue_num 608 | #[inline(always)] 609 | pub fn get_qid(&self) -> u16 { 610 | self.q_id 611 | } 612 | 613 | /// Return IO command description info represented by `ublksrv_io_desc` 614 | /// 615 | /// # Arguments: 616 | /// 617 | /// * `tag`: io tag 618 | /// 619 | /// Returned `ublksrv_io_desc` data is readonly, and filled by ublk kernel 620 | /// driver 621 | /// 622 | #[inline(always)] 623 | pub fn get_iod(&self, tag: u16) -> &sys::ublksrv_io_desc { 624 | assert!((tag as u32) < self.q_depth); 625 | let iod = (self.io_cmd_buf + tag as u64 * 24) as *const sys::ublksrv_io_desc; 626 | unsafe { &*iod } 627 | } 628 | 629 | fn get_io_buf_addr(&self, tag: u16) -> *mut u8 { 630 | self.bufs.borrow()[tag as usize] 631 | } 632 | 633 | /// Register IO buffer, so that pages in this buffer can 634 | /// be discarded in case queue becomes idle 635 | pub fn register_io_buf(&self, tag: u16, buf: &IoBuf) { 636 | self.bufs.borrow_mut()[tag as usize] = buf.as_mut_ptr(); 637 | } 638 | 639 | /// Register IO buffer, so that pages in this buffer can 640 | /// be discarded in case queue becomes idle 641 | pub fn unregister_io_buf(&self, tag: u16) { 642 | self.bufs.borrow_mut()[tag as usize] = std::ptr::null_mut(); 643 | } 644 | 645 | /// unregister all io buffers 646 | pub(crate) fn unregister_io_bufs(&self) { 647 | for tag in 0..self.q_depth { 648 | self.unregister_io_buf(tag.try_into().unwrap()); 649 | } 650 | } 651 | 652 | /// Register Io buffers 653 | pub fn regiser_io_bufs(self, bufs: Option<&Vec>>) -> Self { 654 | if let Some(b) = bufs { 655 | for tag in 0..self.q_depth { 656 | self.register_io_buf(tag.try_into().unwrap(), &b[tag as usize]); 657 | } 658 | } 659 | 660 | self 661 | } 662 | 663 | #[inline(always)] 664 | #[cfg(feature = "fat_complete")] 665 | fn support_comp_batch(&self) -> bool { 666 | self.flags.intersects(UblkFlags::UBLK_DEV_F_COMP_BATCH) 667 | } 668 | 669 | #[inline(always)] 670 | fn __queue_io_cmd( 671 | &self, 672 | r: &mut IoUring, 673 | tag: u16, 674 | cmd_op: u32, 675 | buf_addr: u64, 676 | user_data: u64, 677 | res: i32, 678 | ) -> i32 { 679 | let mut state = self.state.borrow_mut(); 680 | if state.is_stopping() { 681 | return 0; 682 | } 683 | 684 | let io_cmd = sys::ublksrv_io_cmd { 685 | tag, 686 | addr: buf_addr, 687 | q_id: self.q_id, 688 | result: res, 689 | }; 690 | 691 | let cmd_op = if !self.is_ioctl_encode() { 692 | cmd_op & 0xff 693 | } else { 694 | cmd_op 695 | }; 696 | 697 | let sqe = opcode::UringCmd16::new(types::Fixed(0), cmd_op) 698 | .cmd(unsafe { core::mem::transmute::(io_cmd) }) 699 | .build() 700 | .user_data(user_data); 701 | 702 | loop { 703 | let res = unsafe { r.submission().push(&sqe) }; 704 | 705 | match res { 706 | Ok(_) => break, 707 | Err(_) => { 708 | log::debug!("__queue_io_cmd: flush submission and retry"); 709 | r.submit_and_wait(0).unwrap(); 710 | } 711 | } 712 | } 713 | 714 | state.inc_cmd_inflight(); 715 | 716 | log::trace!( 717 | "{}: (qid {} flags {:x} tag {} cmd_op {}) stopping {}", 718 | "queue_io_cmd", 719 | self.q_id, 720 | self.flags, 721 | tag, 722 | cmd_op, 723 | state.is_stopping(), 724 | ); 725 | 726 | 1 727 | } 728 | 729 | #[inline(always)] 730 | fn queue_io_cmd( 731 | &self, 732 | r: &mut IoUring, 733 | tag: u16, 734 | cmd_op: u32, 735 | buf_addr: u64, 736 | res: i32, 737 | ) -> i32 { 738 | let data = UblkIOCtx::build_user_data(tag, cmd_op, 0, false); 739 | self.__queue_io_cmd(r, tag, cmd_op, buf_addr, data, res) 740 | } 741 | 742 | #[inline(always)] 743 | fn commit_and_queue_io_cmd( 744 | &self, 745 | r: &mut IoUring, 746 | tag: u16, 747 | buf_addr: u64, 748 | io_cmd_result: i32, 749 | ) { 750 | self.queue_io_cmd( 751 | r, 752 | tag, 753 | sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ, 754 | buf_addr, 755 | io_cmd_result, 756 | ); 757 | } 758 | 759 | /// Submit one io command. 760 | /// 761 | /// When it is called 1st time on this tag, the `cmd_op` has to be 762 | /// UBLK_U_IO_FETCH_REQ, otherwise it is UBLK_U_IO_COMMIT_AND_FETCH_REQ. 763 | /// 764 | /// UblkUringOpFuture is one Future object, so this function is actually 765 | /// one async function, and user can get result by submit_io_cmd().await 766 | /// 767 | /// Once result is returned, it means this command is completed and 768 | /// one ublk IO command is coming from ublk driver. 769 | /// 770 | /// In case of zoned, `buf_addr` can be the returned LBA for zone append 771 | /// command. 772 | #[inline] 773 | pub fn submit_io_cmd( 774 | &self, 775 | tag: u16, 776 | cmd_op: u32, 777 | buf_addr: *mut u8, 778 | result: i32, 779 | ) -> UblkUringOpFuture { 780 | let f = UblkUringOpFuture::new(0); 781 | let user_data = f.user_data | (tag as u64); 782 | let mut r = self.q_ring.borrow_mut(); 783 | self.__queue_io_cmd(&mut r, tag, cmd_op, buf_addr as u64, user_data, result); 784 | 785 | f 786 | } 787 | 788 | #[inline] 789 | pub fn ublk_submit_sqe(&self, sqe: io_uring::squeue::Entry) -> UblkUringOpFuture { 790 | let f = UblkUringOpFuture::new(1_u64 << 63); 791 | let sqe = sqe.user_data(f.user_data); 792 | 793 | loop { 794 | let res = unsafe { self.q_ring.borrow_mut().submission().push(&sqe) }; 795 | 796 | match res { 797 | Ok(_) => break, 798 | Err(_) => { 799 | log::debug!("ublk_submit_sqe: flush and retry"); 800 | self.q_ring.borrow().submit_and_wait(0).unwrap(); 801 | } 802 | } 803 | } 804 | 805 | f 806 | } 807 | 808 | #[inline] 809 | pub fn ublk_submit_sqe_sync(&self, sqe: io_uring::squeue::Entry) -> Result<(), UblkError> { 810 | loop { 811 | let res = unsafe { self.q_ring.borrow_mut().submission().push(&sqe) }; 812 | 813 | match res { 814 | Ok(_) => break, 815 | Err(_) => { 816 | log::debug!("ublk_submit_sqe: flush and retry"); 817 | self.q_ring.borrow().submit_and_wait(0)?; 818 | } 819 | } 820 | } 821 | 822 | Ok(()) 823 | } 824 | 825 | /// Submit all commands for fetching IO 826 | /// 827 | /// Only called during queue initialization. After queue is setup, 828 | /// COMMIT_AND_FETCH_REQ command is used for both committing io command 829 | /// result and fetching new incoming IO 830 | pub fn submit_fetch_commands(self, bufs: Option<&Vec>>) -> Self { 831 | for i in 0..self.q_depth { 832 | let buf_addr = match bufs { 833 | Some(b) => b[i as usize].as_mut_ptr(), 834 | None => std::ptr::null_mut(), 835 | }; 836 | 837 | assert!( 838 | ((self.dev.dev_info.flags & (crate::sys::UBLK_F_USER_COPY as u64)) != 0) 839 | == bufs.is_none() 840 | ); 841 | self.queue_io_cmd( 842 | &mut self.q_ring.borrow_mut(), 843 | i as u16, 844 | sys::UBLK_U_IO_FETCH_REQ, 845 | buf_addr as u64, 846 | -1, 847 | ); 848 | } 849 | self 850 | } 851 | fn __submit_fetch_commands(&self) { 852 | for i in 0..self.q_depth { 853 | let buf_addr = self.get_io_buf_addr(i as u16) as u64; 854 | self.queue_io_cmd( 855 | &mut self.q_ring.borrow_mut(), 856 | i as u16, 857 | sys::UBLK_U_IO_FETCH_REQ, 858 | buf_addr, 859 | -1, 860 | ); 861 | } 862 | } 863 | 864 | /// Complete one io command 865 | /// 866 | /// # Arguments: 867 | /// 868 | /// * `tag`: io command tag 869 | /// * `res`: io command result 870 | /// 871 | /// When calling this API, target code has to make sure that q_ring 872 | /// won't be borrowed. 873 | #[inline] 874 | pub fn complete_io_cmd(&self, tag: u16, buf_addr: *mut u8, res: Result) { 875 | let r = &mut self.q_ring.borrow_mut(); 876 | 877 | match res { 878 | Ok(UblkIORes::Result(res)) 879 | | Err(UblkError::OtherError(res)) 880 | | Err(UblkError::UringIOError(res)) => { 881 | self.commit_and_queue_io_cmd(r, tag, buf_addr as u64, res); 882 | } 883 | Err(UblkError::UringIoQueued) => {} 884 | #[cfg(feature = "fat_complete")] 885 | Ok(UblkIORes::FatRes(fat)) => match fat { 886 | UblkFatRes::BatchRes(ios) => { 887 | assert!(self.support_comp_batch()); 888 | for item in ios { 889 | let tag = item.0; 890 | self.commit_and_queue_io_cmd(r, tag, buf_addr as u64, item.1); 891 | } 892 | } 893 | UblkFatRes::ZonedAppendRes((res, lba)) => { 894 | self.commit_and_queue_io_cmd(r, tag, lba, res); 895 | } 896 | }, 897 | _ => {} 898 | }; 899 | } 900 | 901 | #[inline(always)] 902 | fn update_state(&self, cqe: &cqueue::Entry) { 903 | if !UblkIOCtx::is_target_io(cqe.user_data()) { 904 | let mut state = self.state.borrow_mut(); 905 | 906 | state.dec_cmd_inflight(); 907 | if cqe.result() == sys::UBLK_IO_RES_ABORT { 908 | state.mark_stopping(); 909 | } 910 | } 911 | } 912 | 913 | #[inline(always)] 914 | fn handle_cqe(&self, mut ops: F, e: &UblkIOCtx) 915 | where 916 | F: FnMut(&UblkQueue, u16, &UblkIOCtx), 917 | { 918 | let data = e.user_data(); 919 | let res = e.result(); 920 | let tag = UblkIOCtx::user_data_to_tag(data); 921 | let cmd_op = UblkIOCtx::user_data_to_op(data); 922 | 923 | { 924 | log::trace!( 925 | "{}: res {} (qid {} tag {} cmd_op {} target {}) state {:?}", 926 | "handle_cqe", 927 | res, 928 | self.q_id, 929 | tag, 930 | cmd_op, 931 | UblkIOCtx::is_target_io(data), 932 | self.state.borrow(), 933 | ); 934 | } 935 | 936 | if UblkIOCtx::is_target_io(data) { 937 | let res = e.result(); 938 | 939 | if res < 0 && res != -(libc::EAGAIN) { 940 | let data = e.user_data(); 941 | log::error!( 942 | "{}: failed tgt io: res {} qid {} tag {}, cmd_op {}\n", 943 | "handle_tgt_cqe", 944 | res, 945 | self.q_id, 946 | UblkIOCtx::user_data_to_tag(data), 947 | UblkIOCtx::user_data_to_op(data) 948 | ); 949 | } 950 | ops(self, tag as u16, e); 951 | return; 952 | } 953 | 954 | self.update_state(e.0); 955 | 956 | if res == sys::UBLK_IO_RES_OK as i32 { 957 | assert!(tag < self.q_depth); 958 | ops(self, tag as u16, e); 959 | } 960 | } 961 | 962 | #[inline(always)] 963 | fn reap_one_event(&self, ops: F, idx: i32, cnt: i32) -> usize 964 | where 965 | F: FnMut(&UblkQueue, u16, &UblkIOCtx), 966 | { 967 | if idx >= cnt { 968 | return 0; 969 | } 970 | 971 | let cqe = { 972 | match self.q_ring.borrow_mut().completion().next() { 973 | None => return 0, 974 | Some(r) => r, 975 | } 976 | }; 977 | 978 | let ctx = UblkIOCtx( 979 | &cqe, 980 | if idx == 0 { 981 | UblkIOCtx::UBLK_IO_F_FIRST 982 | } else { 983 | 0 984 | } | if idx + 1 == cnt { 985 | UblkIOCtx::UBLK_IO_F_LAST 986 | } else { 987 | 0 988 | }, 989 | ); 990 | self.handle_cqe(ops, &ctx); 991 | 992 | 1 993 | } 994 | 995 | fn discard_io_pages(&self) { 996 | let depth = self.q_depth; 997 | let buf_size = self.dev.dev_info.max_io_buf_bytes as usize; 998 | for i in 0..depth { 999 | let buf_addr = self.get_io_buf_addr(i as u16); 1000 | unsafe { libc::madvise(buf_addr as *mut libc::c_void, buf_size, libc::MADV_DONTNEED) }; 1001 | } 1002 | } 1003 | 1004 | fn enter_queue_idle(&self) { 1005 | let mut state = self.state.borrow_mut(); 1006 | let empty = self.q_ring.borrow_mut().submission().is_empty(); 1007 | 1008 | if empty && state.get_nr_cmd_inflight() == self.q_depth && !state.is_idle() { 1009 | log::debug!( 1010 | "dev {} queue {} becomes idle", 1011 | self.dev.dev_info.dev_id, 1012 | self.q_id 1013 | ); 1014 | state.set_idle(true); 1015 | self.discard_io_pages(); 1016 | } 1017 | } 1018 | 1019 | #[inline] 1020 | fn exit_queue_idle(&self) { 1021 | let idle = { self.state.borrow().is_idle() }; 1022 | 1023 | if idle { 1024 | log::debug!( 1025 | "dev {} queue {} becomes busy", 1026 | self.dev.dev_info.dev_id, 1027 | self.q_id 1028 | ); 1029 | self.state.borrow_mut().set_idle(false); 1030 | } 1031 | } 1032 | 1033 | /// Return inflight IOs being handled by target code 1034 | #[inline] 1035 | pub fn get_inflight_nr_io(&self) -> u32 { 1036 | self.q_depth - self.state.borrow().get_nr_cmd_inflight() 1037 | } 1038 | 1039 | #[inline] 1040 | fn __wait_ios(&self, to_wait: usize) -> Result { 1041 | let ts = types::Timespec::new().sec(Self::UBLK_QUEUE_IDLE_SECS as u64); 1042 | let args = types::SubmitArgs::new().timespec(&ts); 1043 | 1044 | let state = self.state.borrow(); 1045 | log::trace!( 1046 | "dev{}-q{}: to_submit {} inflight cmd {} stopping {}", 1047 | self.dev.dev_info.dev_id, 1048 | self.q_id, 1049 | 0, 1050 | state.get_nr_cmd_inflight(), 1051 | state.is_stopping(), 1052 | ); 1053 | 1054 | #[allow(clippy::collapsible_if)] 1055 | if state.queue_is_done() { 1056 | if self.q_ring.borrow_mut().submission().is_empty() { 1057 | return Err(UblkError::QueueIsDown); 1058 | } 1059 | } 1060 | 1061 | let mut r = self.q_ring.borrow_mut(); 1062 | let ret = r.submitter().submit_with_args(to_wait, &args); 1063 | match ret { 1064 | Err(ref err) if err.raw_os_error() == Some(libc::ETIME) => { 1065 | return Err(UblkError::UringTimeout); 1066 | } 1067 | Err(err) => return Err(UblkError::IOError(err)), 1068 | Ok(_) => {} 1069 | }; 1070 | 1071 | let nr_cqes = r.completion().len() as i32; 1072 | log::trace!( 1073 | "nr_cqes {} stop {} idle {}", 1074 | nr_cqes, 1075 | state.is_stopping(), 1076 | state.is_idle(), 1077 | ); 1078 | Ok(nr_cqes) 1079 | } 1080 | 1081 | #[inline] 1082 | fn wait_ios(&self, to_wait: usize) -> Result { 1083 | match self.__wait_ios(to_wait) { 1084 | Ok(nr_cqes) => { 1085 | if nr_cqes > 0 { 1086 | self.exit_queue_idle(); 1087 | } 1088 | Ok(nr_cqes) 1089 | } 1090 | Err(UblkError::UringTimeout) => { 1091 | self.enter_queue_idle(); 1092 | Ok(0) 1093 | } 1094 | Err(err) => Err(err), 1095 | } 1096 | } 1097 | 1098 | /// Process the incoming IOs(io commands & target IOs) from io_uring 1099 | /// 1100 | /// # Arguments: 1101 | /// 1102 | /// * `ops`: IO handling Closure 1103 | /// 1104 | /// * `to_wait`: passed to io_uring_enter(), wait until how many events are 1105 | /// available 1106 | /// 1107 | /// When either io command or target io is coming, we are called for 1108 | /// handling both. Basically the IO handling closure is called for 1109 | /// every incoming io_uring CQE. 1110 | /// 1111 | /// About IO handling Closure 1112 | /// 1113 | /// Target IO handling needs target code to implement the IO handling 1114 | /// closure. 1115 | /// 1116 | /// If IO is super fast to complete, such as ramdisk, this request can 1117 | /// be handled directly in the closure, and return `Ok(UblkIORes::Result)` 1118 | /// to complete the IO command originated from ublk driver. Another 1119 | /// example is null target(null.rs). 1120 | /// 1121 | /// Most of times, IO is slow, so it needs to be handled asynchronously. 1122 | /// The preferred way is to submit target IO by io_uring in IO handling 1123 | /// closure by using the same IO slot(represented by `tag`). After this 1124 | /// target IO is completed, one io_uring CQE will be received, and the 1125 | /// same IO closure is called for handling this target IO, which can be 1126 | /// checked by `UblkIOCtx::is_tgt_io()` method. Finally if the coming 1127 | /// target IO completion means the original IO command is done, 1128 | /// `Ok(UblkIORes::Result)` is returned for moving on, otherwise UblkError::IoQueued 1129 | /// can be returned and the IO handling closure can continue to submit IO 1130 | /// or whatever for driving its IO logic. 1131 | /// 1132 | /// Not all target IO logics can be done by io_uring, such as some 1133 | /// handling needs extra computation, which often require to offload IO 1134 | /// in another context. However, when target IO is done in remote offload 1135 | /// context, `Ok(UblkIORes::Result)` has to be returned from the queue/ 1136 | /// io_uring context. One approach is to use eventfd to wakeup & notify 1137 | /// ublk queue/io_uring. Here, eventfd can be thought as one special target 1138 | /// IO. Inside IO closure, eventfd is queued by io_uring opcode::PollAdd. 1139 | /// Once target IO handling is done, write(eventfd) can wakeup/notify ublk 1140 | /// queue & io_uring, then IO closure can get chance to handle all completed 1141 | /// IOs. Unfortunately, each IO command(originated from ublk driver) can 1142 | /// only use its own `UblkIOCtx` to complete itself. But one eventfd is 1143 | /// often reused for the whole queue, so normally multiple IOs are completed 1144 | /// when handling single eventfd CQE. Here IO completion batch feature is 1145 | /// provided, and target code can return UblkFatRes::BatchRes(batch) to 1146 | /// cover each completed IO(tag, result) in io closure. Then, all these 1147 | /// added IOs will be completed automatically. 1148 | pub(crate) fn process_ios(&self, mut ops: F, to_wait: usize) -> Result 1149 | where 1150 | F: FnMut(&UblkQueue, u16, &UblkIOCtx), 1151 | { 1152 | match self.wait_ios(to_wait) { 1153 | Err(r) => Err(r), 1154 | Ok(done) => { 1155 | for idx in 0..done { 1156 | self.reap_one_event(&mut ops, idx, done); 1157 | } 1158 | Ok(0) 1159 | } 1160 | } 1161 | } 1162 | 1163 | /// Wait and handle incoming IO 1164 | /// 1165 | /// # Arguments: 1166 | /// 1167 | /// * `ops`: IO handling closure 1168 | /// 1169 | /// Called in queue context. won't return unless error is observed. 1170 | /// Wait and handle any incoming cqe until queue is down. 1171 | /// 1172 | pub fn wait_and_handle_io(&self, mut ops: F) 1173 | where 1174 | F: FnMut(&UblkQueue, u16, &UblkIOCtx), 1175 | { 1176 | loop { 1177 | match self.process_ios(&mut ops, 1) { 1178 | Err(_) => break, 1179 | _ => continue, 1180 | } 1181 | } 1182 | 1183 | self.unregister_io_bufs(); 1184 | } 1185 | 1186 | /// Flush queued SQEs to io_uring, then wait and wake up io tasks 1187 | /// 1188 | /// # Arguments: 1189 | /// 1190 | /// * `wake_handler`: handler for wakeup io tasks pending on this uring 1191 | /// 1192 | /// * `to_wait`: passed to io_uring_enter(), wait until `to_wait` events 1193 | /// are available. It won't block in waiting for events if `to_wait` is 1194 | /// zero. 1195 | /// 1196 | /// Returns how many CQEs handled in this batch. 1197 | /// 1198 | /// This API is useful if user needs target specific batch handling. 1199 | pub fn flush_and_wake_io_tasks( 1200 | &self, 1201 | wake_handler: F, 1202 | to_wait: usize, 1203 | ) -> Result 1204 | where 1205 | F: Fn(u64, &cqueue::Entry, bool), 1206 | { 1207 | match self.wait_ios(to_wait) { 1208 | Err(r) => Err(r), 1209 | Ok(done) => { 1210 | for i in 0..done { 1211 | let cqe = { 1212 | match self.q_ring.borrow_mut().completion().next() { 1213 | None => return Err(UblkError::OtherError(-libc::EINVAL)), 1214 | Some(r) => r, 1215 | } 1216 | }; 1217 | let user_data = cqe.user_data(); 1218 | if UblkIOCtx::is_io_command(user_data) { 1219 | self.update_state(&cqe); 1220 | } 1221 | wake_handler(user_data, &cqe, i == done - 1); 1222 | } 1223 | Ok(done) 1224 | } 1225 | } 1226 | } 1227 | } 1228 | 1229 | #[cfg(test)] 1230 | mod tests { 1231 | use crate::ctrl::UblkCtrlBuilder; 1232 | use crate::io::{UblkDev, UblkQueue}; 1233 | use crate::{UblkError, UblkFlags}; 1234 | use io_uring::IoUring; 1235 | 1236 | fn __submit_uring_nop(ring: &mut IoUring) -> Result { 1237 | let nop_e = io_uring::opcode::Nop::new().build().user_data(0x42).into(); 1238 | 1239 | unsafe { 1240 | let mut queue = ring.submission(); 1241 | queue.push(&nop_e).expect("queue is full"); 1242 | } 1243 | 1244 | ring.submit_and_wait(1).map_err(UblkError::IOError) 1245 | } 1246 | 1247 | #[test] 1248 | fn test_queue_uring_op() { 1249 | let ctrl = UblkCtrlBuilder::default() 1250 | .dev_flags(UblkFlags::UBLK_DEV_F_ADD_DEV) 1251 | .build() 1252 | .unwrap(); 1253 | 1254 | let tgt_init = |dev: &mut _| { 1255 | let q = UblkQueue::new(0, dev)?; 1256 | 1257 | q.uring_op(|ring: &_| { 1258 | ring.submitter().unregister_files()?; 1259 | ring.submitter() 1260 | .register_files(&dev.tgt.fds) 1261 | .map_err(UblkError::IOError) 1262 | })?; 1263 | q.uring_op_mut(|ring: &mut _| -> Result { 1264 | __submit_uring_nop(ring) 1265 | })?; 1266 | 1267 | Ok(()) 1268 | }; 1269 | 1270 | UblkDev::new(ctrl.get_name(), tgt_init, &ctrl).unwrap(); 1271 | } 1272 | } 1273 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # libublk 2 | //! 3 | //! A library for building linux ublk block device in userspace, see related 4 | //! docs in `` 5 | //! and introduction doc in 6 | //! `` 7 | 8 | use bitflags::bitflags; 9 | 10 | pub mod ctrl; 11 | pub mod helpers; 12 | pub mod io; 13 | pub mod sys; 14 | pub mod uring_async; 15 | 16 | bitflags! { 17 | #[derive(Default, Debug, PartialEq, Eq, Copy, Clone)] 18 | /// UblkFlags: top 8bits are reserved for internal use 19 | pub struct UblkFlags: u32 { 20 | /// feature: support IO batch completion from single IO tag, typical 21 | /// usecase is to complete IOs from eventfd CQE handler 22 | const UBLK_DEV_F_COMP_BATCH = 0b00000001; 23 | 24 | /// tell UblkCtrl that we are adding one new device 25 | const UBLK_DEV_F_ADD_DEV = 0b00000010; 26 | 27 | /// tell UblkCtrl that we are recovering one old device 28 | const UBLK_DEV_F_RECOVER_DEV = 0b00000100; 29 | 30 | /// tell UblkCtrl that we are deleted in async 31 | const UBLK_DEV_F_DEL_DEV_ASYNC = 0b00001000; 32 | 33 | const UBLK_DEV_F_INTERNAL_0 = 1_u32 << 31; 34 | } 35 | } 36 | 37 | /// Ublk Fat completion result 38 | pub enum UblkFatRes { 39 | /// Batch completion 40 | /// 41 | /// Vector is returned, and each element(`tag`, `result`) describes one 42 | /// io command completion result. 43 | BatchRes(Vec<(u16, i32)>), 44 | 45 | /// Zoned Append completion result 46 | /// 47 | /// (`result`, `returned lba`) is included in this result. 48 | ZonedAppendRes((i32, u64)), 49 | } 50 | 51 | /// Ublk IO completion result 52 | /// 53 | /// Ok() part of io command completion result `Result` 54 | pub enum UblkIORes { 55 | /// normal result 56 | /// 57 | /// Completion result of this io command 58 | Result(i32), 59 | 60 | /// Fat completion result 61 | #[cfg(feature = "fat_complete")] 62 | FatRes(UblkFatRes), 63 | } 64 | 65 | #[derive(thiserror::Error, Debug)] 66 | pub enum UblkError { 67 | #[error("uring submission timeout")] 68 | UringTimeout, 69 | 70 | #[error("IO Queued")] 71 | UringIoQueued, 72 | 73 | #[error("io_uring IO failure")] 74 | UringIOError(i32), 75 | 76 | #[error("json failure")] 77 | JsonError(#[from] serde_json::Error), 78 | 79 | #[error("queue down failure")] 80 | QueueIsDown, 81 | 82 | #[error("other IO failure")] 83 | IOError(#[from] std::io::Error), 84 | 85 | #[error("Invalid input")] 86 | InvalidVal, 87 | 88 | #[error("other failure")] 89 | OtherError(i32), 90 | } 91 | 92 | #[cfg(test)] 93 | mod libublk { 94 | use crate::{UblkError, UblkIORes}; 95 | 96 | #[cfg(not(feature = "fat_complete"))] 97 | #[test] 98 | fn test_feature_fat_complete() { 99 | let sz = core::mem::size_of::>(); 100 | assert!(sz == 16); 101 | } 102 | 103 | #[cfg(feature = "fat_complete")] 104 | #[test] 105 | fn test_feature_fat_complete() { 106 | let sz = core::mem::size_of::>(); 107 | assert!(sz == 32); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/sys.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | #![allow(dead_code)] 5 | 6 | include!(concat!(env!("OUT_DIR"), "/ublk_cmd.rs")); 7 | -------------------------------------------------------------------------------- /src/uring_async.rs: -------------------------------------------------------------------------------- 1 | use crate::io::UblkQueue; 2 | use crate::UblkError; 3 | use io_uring::{cqueue, opcode, squeue, types, IoUring}; 4 | use slab::Slab; 5 | use std::cell::RefCell; 6 | use std::os::fd::AsRawFd; 7 | use std::{ 8 | future::Future, 9 | pin::Pin, 10 | task::{Context, Poll, Waker}, 11 | }; 12 | 13 | struct FutureData { 14 | waker: Option, 15 | result: Option, 16 | } 17 | 18 | std::thread_local! { 19 | static MY_SLAB: RefCell> = RefCell::new(Slab::new()); 20 | } 21 | 22 | /// User code creates one future with user_data used for submitting 23 | /// uring OP, then future.await returns this uring OP's result. 24 | pub struct UblkUringOpFuture { 25 | pub user_data: u64, 26 | } 27 | 28 | impl UblkUringOpFuture { 29 | pub fn new(tgt_io: u64) -> Self { 30 | MY_SLAB.with(|refcell| { 31 | let mut map = refcell.borrow_mut(); 32 | 33 | let key = map.insert(FutureData { 34 | waker: None, 35 | result: None, 36 | }); 37 | let user_data = ((key as u32) << 16) as u64 | tgt_io; 38 | log::trace!("uring: new future {:x}", user_data); 39 | UblkUringOpFuture { user_data } 40 | }) 41 | } 42 | } 43 | 44 | impl Future for UblkUringOpFuture { 45 | type Output = i32; 46 | fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll { 47 | MY_SLAB.with(|refcell| { 48 | let mut map = refcell.borrow_mut(); 49 | let key = ((self.user_data & !(1_u64 << 63)) >> 16) as usize; 50 | match map.get_mut(key) { 51 | None => { 52 | log::trace!("uring: null slab {:x}", self.user_data); 53 | Poll::Pending 54 | } 55 | Some(fd) => match fd.result { 56 | Some(result) => { 57 | map.remove(key); 58 | log::trace!("uring: uring io ready userdata {:x} ready", self.user_data); 59 | Poll::Ready(result) 60 | } 61 | None => { 62 | fd.waker = Some(cx.waker().clone()); 63 | log::trace!("uring: uring io pending userdata {:x}", self.user_data); 64 | Poll::Pending 65 | } 66 | }, 67 | } 68 | }) 69 | } 70 | } 71 | 72 | /// Wakeup the pending task, which will be marked as runnable 73 | /// by smol, and the task's future poll() will be run by smol 74 | /// executor's try_tick() 75 | #[inline] 76 | pub fn ublk_wake_task(data: u64, cqe: &cqueue::Entry) { 77 | MY_SLAB.with(|refcell| { 78 | let mut map = refcell.borrow_mut(); 79 | 80 | log::trace!( 81 | "ublk_wake_task: data {:x} user_data {:x} result {:x}", 82 | data, 83 | cqe.user_data(), 84 | cqe.result() 85 | ); 86 | let data = ((data & !(1_u64 << 63)) >> 16) as usize; 87 | if let Some(fd) = map.get_mut(data) { 88 | fd.result = Some(cqe.result()); 89 | if let Some(w) = &fd.waker { 90 | w.wake_by_ref(); 91 | } 92 | } 93 | }) 94 | } 95 | 96 | fn ublk_try_reap_cqe( 97 | ring: &mut IoUring, 98 | nr_waits: usize, 99 | ) -> Option { 100 | match ring.submit_and_wait(nr_waits) { 101 | Err(_) => None, 102 | _ => ring.completion().next(), 103 | } 104 | } 105 | 106 | fn ublk_process_queue_io( 107 | exe: &smol::LocalExecutor, 108 | q: &UblkQueue, 109 | nr_waits: usize, 110 | ) -> Result { 111 | let res = if !q.is_stopping() { 112 | q.flush_and_wake_io_tasks(|data, cqe, _| ublk_wake_task(data, cqe), nr_waits) 113 | } else { 114 | let mut r = q.q_ring.borrow_mut(); 115 | 116 | match ublk_try_reap_cqe(&mut r, nr_waits) { 117 | Some(cqe) => { 118 | let user_data = cqe.user_data(); 119 | ublk_wake_task(user_data, &cqe); 120 | Ok(1) 121 | } 122 | None => Ok(0), 123 | } 124 | }; 125 | while exe.try_tick() {} 126 | 127 | res 128 | } 129 | 130 | /// Run one task in this local Executor until the task is finished 131 | pub fn ublk_run_task( 132 | exe: &smol::LocalExecutor, 133 | task: &smol::Task, 134 | handler: F, 135 | ) -> Result<(), UblkError> 136 | where 137 | F: Fn(&smol::LocalExecutor) -> Result<(), UblkError>, 138 | { 139 | // make sure the spawned task is started by `try_tick()` 140 | while exe.try_tick() {} 141 | while !task.is_finished() { 142 | handler(exe)?; 143 | } 144 | Ok(()) 145 | } 146 | 147 | /// Run one IO task in this local Executor until the task is finished 148 | pub fn ublk_run_io_task( 149 | exe: &smol::LocalExecutor, 150 | task: &smol::Task, 151 | q: &UblkQueue, 152 | nr_waits: usize, 153 | ) -> Result<(), UblkError> { 154 | let handler = move |exe: &smol::LocalExecutor| -> Result<(), UblkError> { 155 | let _ = ublk_process_queue_io(exe, q, nr_waits)?; 156 | Ok(()) 157 | }; 158 | 159 | ublk_run_task(exe, task, handler) 160 | } 161 | 162 | /// Run one control task in this local Executor until the task is finished, 163 | /// control task is queued in the thread_local io_uring CTRL_URING. 164 | /// 165 | /// The current queue is passed in because some control command depends on 166 | /// IO command, such as START command, so ublk_run_ctrl_task() has to drive 167 | /// both data and control urings. 168 | /// 169 | /// Rust isn't friendly for using native poll or epoll, so use one dedicated 170 | /// uring for polling data and control urings. 171 | pub fn ublk_run_ctrl_task( 172 | exe: &smol::LocalExecutor, 173 | q: &UblkQueue, 174 | task: &smol::Task, 175 | ) -> Result<(), UblkError> { 176 | let mut pr: IoUring = IoUring::builder().build(4)?; 177 | let ctrl_fd = crate::ctrl::CTRL_URING.with(|refcell| refcell.borrow().as_raw_fd()); 178 | let q_fd = q.as_raw_fd(); 179 | let mut poll_q = true; 180 | let mut poll_ctrl = true; 181 | 182 | while exe.try_tick() {} 183 | while !task.is_finished() { 184 | log::debug!( 185 | "poll ring: submit and wait, ctrl_fd {} q_fd {}", 186 | ctrl_fd, 187 | q_fd 188 | ); 189 | 190 | if poll_q { 191 | let q_e = opcode::PollAdd::new(types::Fd(q_fd), (libc::POLLIN | libc::POLLOUT) as _); 192 | let _ = unsafe { pr.submission().push(&q_e.build().user_data(0x01)) }; 193 | poll_q = false; 194 | } 195 | if poll_ctrl { 196 | let ctrl_e = 197 | opcode::PollAdd::new(types::Fd(ctrl_fd), (libc::POLLIN | libc::POLLOUT) as _); 198 | let _ = unsafe { pr.submission().push(&ctrl_e.build().user_data(0x02)) }; 199 | poll_ctrl = false; 200 | } 201 | 202 | pr.submit_and_wait(1)?; 203 | let cqes: Vec = pr.completion().map(Into::into).collect(); 204 | for cqe in cqes { 205 | if cqe.user_data() == 0x1 { 206 | poll_q = true; 207 | } 208 | if cqe.user_data() == 0x2 { 209 | poll_ctrl = true; 210 | } 211 | } 212 | 213 | ublk_process_queue_io(exe, q, 0)?; 214 | let entry = 215 | crate::ctrl::CTRL_URING.with(|refcell| ublk_try_reap_cqe(&mut refcell.borrow_mut(), 0)); 216 | if let Some(cqe) = entry { 217 | ublk_wake_task(cqe.user_data(), &cqe); 218 | while exe.try_tick() {} 219 | } 220 | } 221 | //PollAdd will be canceled automatically 222 | 223 | Ok(()) 224 | } 225 | 226 | /// Wait and handle incoming IO command 227 | /// 228 | /// # Arguments: 229 | /// 230 | /// * `q`: UblkQueue instance 231 | /// * `exe`: Local async Executor 232 | /// 233 | /// Called in queue context. won't return unless error is observed. 234 | /// Wait and handle any incoming cqe until queue is down. 235 | /// 236 | /// This should be the only foreground thing done in queue thread. 237 | pub fn ublk_wait_and_handle_ios(exe: &smol::LocalExecutor, q: &UblkQueue) { 238 | loop { 239 | while exe.try_tick() {} 240 | if q.flush_and_wake_io_tasks(|data, cqe, _| ublk_wake_task(data, cqe), 1) 241 | .is_err() 242 | { 243 | break; 244 | } 245 | } 246 | q.unregister_io_bufs(); 247 | } 248 | -------------------------------------------------------------------------------- /tests/basic.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod integration { 3 | use io_uring::opcode; 4 | use libublk::helpers::IoBuf; 5 | use libublk::io::{UblkDev, UblkIOCtx, UblkQueue}; 6 | use libublk::uring_async::ublk_wait_and_handle_ios; 7 | use libublk::{ctrl::UblkCtrl, ctrl::UblkCtrlBuilder, sys, UblkError, UblkFlags, UblkIORes}; 8 | use std::env; 9 | use std::io::{BufRead, BufReader}; 10 | use std::path::Path; 11 | use std::process::{Command, Stdio}; 12 | use std::rc::Rc; 13 | use std::sync::{Arc, Mutex}; 14 | 15 | fn run_ublk_disk_sanity_test(ctrl: &UblkCtrl, dev_flags: UblkFlags) { 16 | use std::os::unix::fs::PermissionsExt; 17 | let dev_path = ctrl.get_cdev_path(); 18 | 19 | std::thread::sleep(std::time::Duration::from_millis(500)); 20 | 21 | let tgt_flags = ctrl.get_target_flags_from_json().unwrap(); 22 | assert!(UblkFlags::from_bits(tgt_flags).unwrap() == dev_flags); 23 | 24 | //ublk block device should be observed now 25 | assert!(Path::new(&dev_path).exists() == true); 26 | 27 | //ublk exported json file should be observed 28 | let run_path = ctrl.run_path(); 29 | let json_path = Path::new(&run_path); 30 | assert!(json_path.exists() == true); 31 | 32 | let metadata = std::fs::metadata(json_path).unwrap(); 33 | let permissions = metadata.permissions(); 34 | assert!((permissions.mode() & 0o777) == 0o700); 35 | } 36 | 37 | fn read_ublk_disk(ctrl: &UblkCtrl) { 38 | let dev_path = ctrl.get_bdev_path(); 39 | let mut arg_list: Vec = Vec::new(); 40 | let if_dev = format!("if={}", &dev_path); 41 | 42 | arg_list.push(if_dev); 43 | arg_list.push("of=/dev/null".to_string()); 44 | arg_list.push("bs=4096".to_string()); 45 | arg_list.push("count=10k".to_string()); 46 | println!("{:?}", Command::new("dd").args(arg_list).output().unwrap()); 47 | } 48 | 49 | fn __test_ublk_null(dev_flags: UblkFlags, q_handler: fn(u16, &UblkDev)) { 50 | let ctrl = UblkCtrlBuilder::default() 51 | .name("null") 52 | .nr_queues(2) 53 | .dev_flags(dev_flags) 54 | .ctrl_flags(libublk::sys::UBLK_F_USER_COPY.into()) 55 | .build() 56 | .unwrap(); 57 | let tgt_init = |dev: &mut UblkDev| { 58 | dev.set_default_params(250_u64 << 30); 59 | Ok(()) 60 | }; 61 | 62 | let q_fn = move |qid: u16, _dev: &UblkDev| { 63 | q_handler(qid, _dev); 64 | }; 65 | 66 | ctrl.run_target(tgt_init, q_fn, move |ctrl: &UblkCtrl| { 67 | run_ublk_disk_sanity_test(ctrl, dev_flags); 68 | read_ublk_disk(ctrl); 69 | 70 | ctrl.kill_dev().unwrap(); 71 | }) 72 | .unwrap(); 73 | } 74 | 75 | /// make one ublk-null and test if /dev/ublkbN can be created successfully 76 | #[test] 77 | fn test_ublk_null() { 78 | /// called from queue_handler closure(), which supports Clone(), 79 | fn null_handle_queue(qid: u16, dev: &UblkDev) { 80 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 81 | let user_copy = (dev.dev_info.flags & libublk::sys::UBLK_F_USER_COPY as u64) != 0; 82 | let bufs = bufs_rc.clone(); 83 | 84 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 85 | let iod = q.get_iod(tag); 86 | let bytes = (iod.nr_sectors << 9) as i32; 87 | 88 | let buf_addr = if user_copy { 89 | std::ptr::null_mut() 90 | } else { 91 | bufs[tag as usize].as_mut_ptr() 92 | }; 93 | q.complete_io_cmd(tag, buf_addr, Ok(UblkIORes::Result(bytes))); 94 | }; 95 | 96 | UblkQueue::new(qid, dev) 97 | .unwrap() 98 | .submit_fetch_commands(if user_copy { None } else { Some(&bufs_rc) }) 99 | .wait_and_handle_io(io_handler); 100 | } 101 | 102 | __test_ublk_null(UblkFlags::UBLK_DEV_F_ADD_DEV, null_handle_queue); 103 | } 104 | 105 | /// make one ublk-null and test if /dev/ublkbN can be created successfully 106 | #[cfg(feature = "fat_complete")] 107 | #[test] 108 | fn test_ublk_null_comp_batch() { 109 | use libublk::UblkFatRes; 110 | /// called from queue_handler closure(), which supports Clone(), 111 | fn null_handle_queue_batch(qid: u16, dev: &UblkDev) { 112 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 113 | let user_copy = (dev.dev_info.flags & libublk::sys::UBLK_F_USER_COPY as u64) != 0; 114 | let bufs = bufs_rc.clone(); 115 | 116 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 117 | let iod = q.get_iod(tag); 118 | let bytes = (iod.nr_sectors << 9) as i32; 119 | 120 | let buf_addr = if user_copy { 121 | std::ptr::null_mut() 122 | } else { 123 | bufs[tag as usize].as_mut_ptr() 124 | }; 125 | 126 | let res = Ok(UblkIORes::FatRes(UblkFatRes::BatchRes(vec![(tag, bytes)]))); 127 | q.complete_io_cmd(tag, buf_addr, res); 128 | }; 129 | 130 | UblkQueue::new(qid, dev) 131 | .unwrap() 132 | .submit_fetch_commands(if user_copy { None } else { Some(&bufs_rc) }) 133 | .wait_and_handle_io(io_handler); 134 | } 135 | 136 | __test_ublk_null( 137 | UblkFlags::UBLK_DEV_F_ADD_DEV | UblkFlags::UBLK_DEV_F_COMP_BATCH, 138 | null_handle_queue_batch, 139 | ); 140 | } 141 | 142 | #[test] 143 | fn test_ublk_null_async() { 144 | // submit one io_uring Nop via io-uring crate and UringOpFuture, and 145 | // user_data has to unique among io tasks, also has to encode tag 146 | // info, so please build user_data by UblkIOCtx::build_user_data_async() 147 | async fn handle_io_cmd(q: &UblkQueue<'_>, tag: u16) -> i32 { 148 | let iod = q.get_iod(tag); 149 | let bytes = (iod.nr_sectors << 9) as i32; 150 | 151 | let res = q.ublk_submit_sqe(opcode::Nop::new().build()).await; 152 | bytes + res 153 | } 154 | 155 | //Device wide data shared among all queue context 156 | struct DevData { 157 | done: u64, 158 | } 159 | 160 | // submit one io_uring Nop via io-uring crate and UringOpFuture, and 161 | // user_data has to unique among io tasks, also has to encode tag 162 | // info, so please build user_data by UblkIOCtx::build_user_data_async() 163 | let dev_flags = UblkFlags::UBLK_DEV_F_ADD_DEV; 164 | let depth = 64_u16; 165 | let ctrl = UblkCtrlBuilder::default() 166 | .name("null") 167 | .nr_queues(2) 168 | .depth(depth) 169 | .id(-1) 170 | .dev_flags(dev_flags) 171 | .build() 172 | .unwrap(); 173 | 174 | let tgt_init = |dev: &mut UblkDev| { 175 | dev.set_default_params(250_u64 << 30); 176 | Ok(()) 177 | }; 178 | // device data is shared among all queue contexts 179 | let dev_data = Arc::new(Mutex::new(DevData { done: 0 })); 180 | let wh_dev_data = dev_data.clone(); 181 | 182 | // queue handler supports Clone(), so will be cloned in each 183 | // queue pthread context 184 | let q_fn = move |qid: u16, dev: &UblkDev| { 185 | let q_rc = Rc::new(UblkQueue::new(qid as u16, &dev).unwrap()); 186 | let exe = smol::LocalExecutor::new(); 187 | let mut f_vec = Vec::new(); 188 | 189 | // `q_fn` closure implements Clone() Trait, so the captured 190 | // `dev_data` is cloned to `q_fn` context. 191 | let _dev_data = Rc::new(dev_data); 192 | 193 | for tag in 0..depth { 194 | let q = q_rc.clone(); 195 | let __dev_data = _dev_data.clone(); 196 | 197 | f_vec.push(exe.spawn(async move { 198 | let mut cmd_op = sys::UBLK_U_IO_FETCH_REQ; 199 | let buf = IoBuf::::new(q.dev.dev_info.max_io_buf_bytes as usize); 200 | let mut res = 0; 201 | 202 | q.register_io_buf(tag, &buf); 203 | loop { 204 | let cmd_res = q.submit_io_cmd(tag, cmd_op, buf.as_mut_ptr(), res).await; 205 | if cmd_res == sys::UBLK_IO_RES_ABORT { 206 | break; 207 | } 208 | 209 | res = handle_io_cmd(&q, tag).await; 210 | cmd_op = sys::UBLK_U_IO_COMMIT_AND_FETCH_REQ; 211 | { 212 | let mut guard = __dev_data.lock().unwrap(); 213 | (*guard).done += 1; 214 | } 215 | } 216 | })); 217 | } 218 | 219 | ublk_wait_and_handle_ios(&exe, &q_rc); 220 | smol::block_on(async { futures::future::join_all(f_vec).await }); 221 | }; 222 | 223 | // kick off our targets 224 | ctrl.run_target(tgt_init, q_fn, move |ctrl: &UblkCtrl| { 225 | // run sanity and disk IO test after ublk disk is ready 226 | run_ublk_disk_sanity_test(ctrl, dev_flags); 227 | read_ublk_disk(ctrl); 228 | 229 | { 230 | let guard = wh_dev_data.lock().unwrap(); 231 | assert!((*guard).done > 0); 232 | } 233 | 234 | ctrl.kill_dev().unwrap(); 235 | }) 236 | .unwrap(); 237 | } 238 | 239 | fn rd_handle_io(q: &UblkQueue, tag: u16, _io: &UblkIOCtx, buf_addr: *mut u8, start: u64) { 240 | let iod = q.get_iod(tag); 241 | let off = (iod.start_sector << 9) as u64; 242 | let bytes = (iod.nr_sectors << 9) as u32; 243 | let op = iod.op_flags & 0xff; 244 | 245 | match op { 246 | sys::UBLK_IO_OP_FLUSH => {} 247 | sys::UBLK_IO_OP_READ => unsafe { 248 | libc::memcpy( 249 | buf_addr as *mut libc::c_void, 250 | (start + off) as *mut libc::c_void, 251 | bytes as usize, 252 | ); 253 | }, 254 | sys::UBLK_IO_OP_WRITE => unsafe { 255 | libc::memcpy( 256 | (start + off) as *mut libc::c_void, 257 | buf_addr as *mut libc::c_void, 258 | bytes as usize, 259 | ); 260 | }, 261 | _ => { 262 | q.complete_io_cmd(tag, buf_addr, Err(UblkError::OtherError(-libc::EINVAL))); 263 | return; 264 | } 265 | } 266 | 267 | let res = Ok(UblkIORes::Result(bytes as i32)); 268 | q.complete_io_cmd(tag, buf_addr, res); 269 | } 270 | 271 | fn ublk_ramdisk_tester(ctrl: &UblkCtrl, dev_flags: UblkFlags) { 272 | let dev_path = ctrl.get_bdev_path(); 273 | 274 | run_ublk_disk_sanity_test(&ctrl, dev_flags); 275 | 276 | //format as ext4 and mount over the created ublk-ramdisk 277 | { 278 | let ext4_options = block_utils::Filesystem::Ext4 { 279 | inode_size: 512, 280 | stride: Some(2), 281 | stripe_width: None, 282 | reserved_blocks_percentage: 10, 283 | }; 284 | block_utils::format_block_device(&Path::new(&dev_path), &ext4_options).unwrap(); 285 | 286 | let tmp_dir = tempfile::TempDir::new().unwrap(); 287 | let bdev = block_utils::get_device_info(Path::new(&dev_path)).unwrap(); 288 | 289 | block_utils::mount_device(&bdev, tmp_dir.path()).unwrap(); 290 | block_utils::unmount_device(tmp_dir.path()).unwrap(); 291 | } 292 | ctrl.kill_dev().unwrap(); 293 | } 294 | 295 | fn __test_ublk_ramdisk() { 296 | let size = 32_u64 << 20; 297 | let buf = libublk::helpers::IoBuf::::new(size as usize); 298 | let dev_addr = buf.as_mut_ptr() as u64; 299 | let dev_flags = UblkFlags::UBLK_DEV_F_ADD_DEV; 300 | let ctrl = UblkCtrlBuilder::default() 301 | .name("ramdisk") 302 | .id(-1) 303 | .nr_queues(1) 304 | .depth(128) 305 | .dev_flags(dev_flags) 306 | .build() 307 | .unwrap(); 308 | let tgt_init = |dev: &mut UblkDev| { 309 | dev.set_default_params(size); 310 | Ok(()) 311 | }; 312 | 313 | let q_fn = move |qid: u16, dev: &UblkDev| { 314 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 315 | let bufs = bufs_rc.clone(); 316 | 317 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 318 | let buf_addr = bufs[tag as usize].as_mut_ptr(); 319 | 320 | rd_handle_io(q, tag, _io, buf_addr, dev_addr); 321 | }; 322 | 323 | UblkQueue::new(qid, dev) 324 | .unwrap() 325 | .regiser_io_bufs(Some(&bufs_rc)) 326 | .submit_fetch_commands(Some(&bufs_rc)) 327 | .wait_and_handle_io(io_handler); 328 | }; 329 | 330 | ctrl.run_target(tgt_init, q_fn, move |ctrl: &UblkCtrl| { 331 | ublk_ramdisk_tester(ctrl, dev_flags); 332 | }) 333 | .unwrap(); 334 | } 335 | 336 | /// make one ublk-ramdisk and test: 337 | /// - if /dev/ublkbN can be created successfully 338 | /// - if yes, then test format/mount/umount over this ublk-ramdisk 339 | #[test] 340 | fn test_ublk_ramdisk() { 341 | __test_ublk_ramdisk(); 342 | } 343 | 344 | /// make FnMut closure for IO handling 345 | #[test] 346 | fn test_fn_mut_io_closure() { 347 | /// called from queue_handler closure(), which supports Clone(), 348 | fn null_queue_mut_io(qid: u16, dev: &UblkDev) { 349 | let bufs_rc = Rc::new(dev.alloc_queue_io_bufs()); 350 | let user_copy = (dev.dev_info.flags & libublk::sys::UBLK_F_USER_COPY as u64) != 0; 351 | let bufs = bufs_rc.clone(); 352 | 353 | // modify this vector in io handling closure 354 | let mut q_vec = Vec::::new(); 355 | let io_handler = move |q: &UblkQueue, tag: u16, _io: &UblkIOCtx| { 356 | let iod = q.get_iod(tag); 357 | let res = Ok(UblkIORes::Result((iod.nr_sectors << 9) as i32)); 358 | 359 | { 360 | q_vec.push(tag as i32); 361 | if q_vec.len() >= 64 { 362 | q_vec.clear(); 363 | } 364 | } 365 | 366 | let buf_addr = if user_copy { 367 | std::ptr::null_mut() 368 | } else { 369 | let bufs = bufs_rc.clone(); 370 | bufs[tag as usize].as_mut_ptr() 371 | }; 372 | q.complete_io_cmd(tag, buf_addr, res); 373 | }; 374 | 375 | UblkQueue::new(qid, dev) 376 | .unwrap() 377 | .submit_fetch_commands(if user_copy { None } else { Some(&bufs) }) 378 | .wait_and_handle_io(io_handler); 379 | } 380 | 381 | __test_ublk_null(UblkFlags::UBLK_DEV_F_ADD_DEV, null_queue_mut_io); 382 | } 383 | 384 | /// run examples/ramdisk recovery test 385 | #[test] 386 | fn test_ublk_ramdisk_recovery() { 387 | fn get_curr_bin_dir() -> Option { 388 | if let Err(_current_exe) = env::current_exe() { 389 | None 390 | } else { 391 | env::current_exe().ok().map(|mut path| { 392 | path.pop(); 393 | if path.ends_with("deps") { 394 | path.pop(); 395 | } 396 | path 397 | }) 398 | } 399 | } 400 | 401 | fn ublk_state_wait_until(ctrl: &UblkCtrl, state: u16, timeout: u32) { 402 | let mut count = 0; 403 | let unit = 100_u32; 404 | loop { 405 | std::thread::sleep(std::time::Duration::from_millis(unit as u64)); 406 | 407 | ctrl.read_dev_info().unwrap(); 408 | if ctrl.dev_info().state == state { 409 | std::thread::sleep(std::time::Duration::from_millis(20)); 410 | break; 411 | } 412 | count += unit; 413 | assert!(count < timeout); 414 | } 415 | } 416 | 417 | let tgt_dir = get_curr_bin_dir().unwrap(); 418 | //println!("top dir: path {:?} {:?}", &tgt_dir, &file); 419 | let rd_path = tgt_dir.display().to_string() + &"/examples/ramdisk".to_string(); 420 | let mut cmd = Command::new(&rd_path) 421 | .args(["add", "-1", "32"]) 422 | .stdout(Stdio::piped()) 423 | .spawn() 424 | .expect("fail to add ublk ramdisk"); 425 | let stdout = cmd.stdout.take().expect("Failed to capture stdout"); 426 | let _ = cmd.wait().expect("Failed to wait on child"); 427 | 428 | let mut id = -1_i32; 429 | let mut tid = 0; 430 | let id_regx = regex::Regex::new(r"dev id (\d+)").unwrap(); 431 | let tid_regx = regex::Regex::new(r"queue 0 tid: (\d+)").unwrap(); 432 | for line in BufReader::new(stdout).lines() { 433 | match line { 434 | Ok(content) => { 435 | if let Some(c) = id_regx.captures(&content.as_str()) { 436 | id = c.get(1).unwrap().as_str().parse().unwrap(); 437 | } 438 | if let Some(c) = tid_regx.captures(&content.as_str()) { 439 | tid = c.get(1).unwrap().as_str().parse().unwrap(); 440 | } 441 | } 442 | Err(e) => eprintln!("Error reading line: {}", e), // Handle error 443 | } 444 | } 445 | assert!(tid != 0 && id >= 0); 446 | 447 | let ctrl = UblkCtrl::new_simple(id).unwrap(); 448 | ublk_state_wait_until(&ctrl, sys::UBLK_S_DEV_LIVE as u16, 2000); 449 | 450 | //ublk block device should be observed now 451 | let dev_path = ctrl.get_bdev_path(); 452 | assert!(Path::new(&dev_path).exists() == true); 453 | 454 | //simulate one panic by sending KILL to queue pthread 455 | unsafe { 456 | libc::kill(tid, libc::SIGKILL); 457 | } 458 | 459 | //wait device becomes quiesced 460 | ublk_state_wait_until(&ctrl, sys::UBLK_S_DEV_QUIESCED as u16, 6000); 461 | 462 | //recover device 463 | let mut cmd = Command::new(&rd_path) 464 | .args(["recover", &id.to_string().as_str()]) 465 | .stdout(Stdio::piped()) 466 | .spawn() 467 | .expect("fail to recover ramdisk"); 468 | cmd.wait().expect("Failed to wait on child"); 469 | ublk_state_wait_until(&ctrl, sys::UBLK_S_DEV_LIVE as u16, 20000); 470 | ctrl.del_dev().unwrap(); 471 | } 472 | } 473 | -------------------------------------------------------------------------------- /ublk_cmd.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 | #ifndef USER_BLK_DRV_CMD_INC_H 3 | #define USER_BLK_DRV_CMD_INC_H 4 | 5 | #include 6 | 7 | /* ublk server command definition */ 8 | 9 | /* 10 | * Admin commands, issued by ublk server, and handled by ublk driver. 11 | * 12 | * Legacy command definition, don't use in new application, and don't 13 | * add new such definition any more 14 | */ 15 | #define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 16 | #define UBLK_CMD_GET_DEV_INFO 0x02 17 | #define UBLK_CMD_ADD_DEV 0x04 18 | #define UBLK_CMD_DEL_DEV 0x05 19 | #define UBLK_CMD_START_DEV 0x06 20 | #define UBLK_CMD_STOP_DEV 0x07 21 | #define UBLK_CMD_SET_PARAMS 0x08 22 | #define UBLK_CMD_GET_PARAMS 0x09 23 | #define UBLK_CMD_START_USER_RECOVERY 0x10 24 | #define UBLK_CMD_END_USER_RECOVERY 0x11 25 | #define UBLK_CMD_GET_DEV_INFO2 0x12 26 | 27 | /* Any new ctrl command should encode by __IO*() */ 28 | #define UBLK_U_CMD_GET_QUEUE_AFFINITY \ 29 | _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd) 30 | #define UBLK_U_CMD_GET_DEV_INFO \ 31 | _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd) 32 | #define UBLK_U_CMD_ADD_DEV \ 33 | _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd) 34 | #define UBLK_U_CMD_DEL_DEV \ 35 | _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd) 36 | #define UBLK_U_CMD_START_DEV \ 37 | _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd) 38 | #define UBLK_U_CMD_STOP_DEV \ 39 | _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd) 40 | #define UBLK_U_CMD_SET_PARAMS \ 41 | _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd) 42 | #define UBLK_U_CMD_GET_PARAMS \ 43 | _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd) 44 | #define UBLK_U_CMD_START_USER_RECOVERY \ 45 | _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd) 46 | #define UBLK_U_CMD_END_USER_RECOVERY \ 47 | _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) 48 | #define UBLK_U_CMD_GET_DEV_INFO2 \ 49 | _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) 50 | #define UBLK_U_CMD_GET_FEATURES \ 51 | _IOR('u', 0x13, struct ublksrv_ctrl_cmd) 52 | #define UBLK_U_CMD_DEL_DEV_ASYNC \ 53 | _IOR('u', 0x14, struct ublksrv_ctrl_cmd) 54 | 55 | /* 56 | * 64bits are enough now, and it should be easy to extend in case of 57 | * running out of feature flags 58 | */ 59 | #define UBLK_FEATURES_LEN 8 60 | 61 | /* 62 | * IO commands, issued by ublk server, and handled by ublk driver. 63 | * 64 | * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request 65 | * from ublk driver, should be issued only when starting device. After 66 | * the associated cqe is returned, request's tag can be retrieved via 67 | * cqe->userdata. 68 | * 69 | * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled 70 | * this IO request, request's handling result is committed to ublk 71 | * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be 72 | * handled before completing io request. 73 | * 74 | * NEED_GET_DATA: only used for write requests to set io addr and copy data 75 | * When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA 76 | * command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA. 77 | * 78 | * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag 79 | * while starting a ublk device. 80 | */ 81 | 82 | /* 83 | * Legacy IO command definition, don't use in new application, and don't 84 | * add new such definition any more 85 | */ 86 | #define UBLK_IO_FETCH_REQ 0x20 87 | #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 88 | #define UBLK_IO_NEED_GET_DATA 0x22 89 | 90 | /* Any new IO command should encode by __IOWR() */ 91 | #define UBLK_U_IO_FETCH_REQ \ 92 | _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd) 93 | #define UBLK_U_IO_COMMIT_AND_FETCH_REQ \ 94 | _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) 95 | #define UBLK_U_IO_NEED_GET_DATA \ 96 | _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) 97 | 98 | /* only ABORT means that no re-fetch */ 99 | #define UBLK_IO_RES_OK 0 100 | #define UBLK_IO_RES_NEED_GET_DATA 1 101 | #define UBLK_IO_RES_ABORT (-ENODEV) 102 | 103 | #define UBLKSRV_CMD_BUF_OFFSET 0 104 | #define UBLKSRV_IO_BUF_OFFSET 0x80000000 105 | 106 | /* tag bit is 16bit, so far limit at most 4096 IOs for each queue */ 107 | #define UBLK_MAX_QUEUE_DEPTH 4096 108 | 109 | /* single IO buffer max size is 32MB */ 110 | #define UBLK_IO_BUF_OFF 0 111 | #define UBLK_IO_BUF_BITS 25 112 | #define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1) 113 | 114 | /* so at most 64K IOs for each queue */ 115 | #define UBLK_TAG_OFF UBLK_IO_BUF_BITS 116 | #define UBLK_TAG_BITS 16 117 | #define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1) 118 | 119 | /* max 4096 queues */ 120 | #define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS) 121 | #define UBLK_QID_BITS 12 122 | #define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1) 123 | 124 | #define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS) 125 | 126 | #define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) 127 | #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) 128 | 129 | /* 130 | * zero copy requires 4k block size, and can remap ublk driver's io 131 | * request into ublksrv's vm space 132 | */ 133 | #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) 134 | 135 | /* 136 | * Force to complete io cmd via io_uring_cmd_complete_in_task so that 137 | * performance comparison is done easily with using task_work_add 138 | */ 139 | #define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) 140 | 141 | /* 142 | * User should issue io cmd again for write requests to 143 | * set io buffer address and copy data from bio vectors 144 | * to the userspace io buffer. 145 | * 146 | * In this mode, task_work is not used. 147 | */ 148 | #define UBLK_F_NEED_GET_DATA (1UL << 2) 149 | 150 | #define UBLK_F_USER_RECOVERY (1UL << 3) 151 | 152 | #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) 153 | 154 | /* 155 | * Unprivileged user can create /dev/ublkcN and /dev/ublkbN. 156 | * 157 | * /dev/ublk-control needs to be available for unprivileged user, and it 158 | * can be done via udev rule to make all control commands available to 159 | * unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all 160 | * other commands are only allowed for the owner of the specified device. 161 | * 162 | * When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and 163 | * owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only 164 | * the current user's uid/gid is stored, that said owner of the created 165 | * device is always the current user. 166 | * 167 | * We still need udev rule to apply OWNER/GROUP with the stored owner_uid 168 | * and owner_gid. 169 | * 170 | * Then ublk server can be run as unprivileged user, and /dev/ublkbN can 171 | * be accessed and managed by its owner represented by owner_uid/owner_gid. 172 | */ 173 | #define UBLK_F_UNPRIVILEGED_DEV (1UL << 5) 174 | 175 | /* use ioctl encoding for uring command */ 176 | #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) 177 | 178 | /* Copy between request and user buffer by pread()/pwrite() */ 179 | #define UBLK_F_USER_COPY (1UL << 7) 180 | 181 | /* 182 | * User space sets this flag when setting up the device to request zoned storage support. Kernel may 183 | * deny the request by returning an error. 184 | */ 185 | #define UBLK_F_ZONED (1ULL << 8) 186 | 187 | /* device state */ 188 | #define UBLK_S_DEV_DEAD 0 189 | #define UBLK_S_DEV_LIVE 1 190 | #define UBLK_S_DEV_QUIESCED 2 191 | 192 | /* shipped via sqe->cmd of io_uring command */ 193 | struct ublksrv_ctrl_cmd { 194 | /* sent to which device, must be valid */ 195 | __u32 dev_id; 196 | 197 | /* sent to which queue, must be -1 if the cmd isn't for queue */ 198 | __u16 queue_id; 199 | /* 200 | * cmd specific buffer, can be IN or OUT. 201 | */ 202 | __u16 len; 203 | __u64 addr; 204 | 205 | /* inline data */ 206 | __u64 data[1]; 207 | 208 | /* 209 | * Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2 210 | * only, include null char 211 | */ 212 | __u16 dev_path_len; 213 | __u16 pad; 214 | __u32 reserved; 215 | }; 216 | 217 | struct ublksrv_ctrl_dev_info { 218 | __u16 nr_hw_queues; 219 | __u16 queue_depth; 220 | __u16 state; 221 | __u16 pad0; 222 | 223 | __u32 max_io_buf_bytes; 224 | __u32 dev_id; 225 | 226 | __s32 ublksrv_pid; 227 | __u32 pad1; 228 | 229 | __u64 flags; 230 | 231 | /* For ublksrv internal use, invisible to ublk driver */ 232 | __u64 ublksrv_flags; 233 | 234 | __u32 owner_uid; /* store by kernel */ 235 | __u32 owner_gid; /* store by kernel */ 236 | __u64 reserved1; 237 | __u64 reserved2; 238 | }; 239 | 240 | #define UBLK_IO_OP_READ 0 241 | #define UBLK_IO_OP_WRITE 1 242 | #define UBLK_IO_OP_FLUSH 2 243 | #define UBLK_IO_OP_DISCARD 3 244 | #define UBLK_IO_OP_WRITE_SAME 4 245 | #define UBLK_IO_OP_WRITE_ZEROES 5 246 | #define UBLK_IO_OP_ZONE_OPEN 10 247 | #define UBLK_IO_OP_ZONE_CLOSE 11 248 | #define UBLK_IO_OP_ZONE_FINISH 12 249 | #define UBLK_IO_OP_ZONE_APPEND 13 250 | #define UBLK_IO_OP_ZONE_RESET_ALL 14 251 | #define UBLK_IO_OP_ZONE_RESET 15 252 | /* 253 | * Construct a zone report. The report request is carried in `struct 254 | * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone 255 | * and shall indicate the first zone of the report. The `nr_zones` shall 256 | * indicate how many zones should be reported at most. The report shall be 257 | * delivered as a `struct blk_zone` array. To report fewer zones than requested, 258 | * zero the last entry of the returned array. 259 | * 260 | * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in 261 | * include/uapi/linux/blkzoned.h are part of ublk UAPI. 262 | */ 263 | #define UBLK_IO_OP_REPORT_ZONES 18 264 | 265 | #define UBLK_IO_F_FAILFAST_DEV (1U << 8) 266 | #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) 267 | #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) 268 | #define UBLK_IO_F_META (1U << 11) 269 | #define UBLK_IO_F_FUA (1U << 13) 270 | #define UBLK_IO_F_NOUNMAP (1U << 15) 271 | #define UBLK_IO_F_SWAP (1U << 16) 272 | 273 | /* 274 | * io cmd is described by this structure, and stored in share memory, indexed 275 | * by request tag. 276 | * 277 | * The data is stored by ublk driver, and read by ublksrv after one fetch command 278 | * returns. 279 | */ 280 | struct ublksrv_io_desc { 281 | /* op: bit 0-7, flags: bit 8-31 */ 282 | __u32 op_flags; 283 | 284 | /* bindgen can't handle union well */ 285 | #if 0 286 | union { 287 | __u32 nr_sectors; 288 | __u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */ 289 | }; 290 | #else 291 | __u32 nr_sectors; 292 | #endif 293 | 294 | /* start sector for this io */ 295 | __u64 start_sector; 296 | 297 | /* buffer address in ublksrv daemon vm space, from ublk driver */ 298 | __u64 addr; 299 | }; 300 | 301 | static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) 302 | { 303 | return iod->op_flags & 0xff; 304 | } 305 | 306 | static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) 307 | { 308 | return iod->op_flags >> 8; 309 | } 310 | 311 | /* issued to ublk driver via /dev/ublkcN */ 312 | struct ublksrv_io_cmd { 313 | __u16 q_id; 314 | 315 | /* for fetch/commit which result */ 316 | __u16 tag; 317 | 318 | /* io result, it is valid for COMMIT* command only */ 319 | __s32 result; 320 | 321 | #if 0 322 | union { 323 | /* 324 | * userspace buffer address in ublksrv daemon process, valid for 325 | * FETCH* command only 326 | * 327 | * `addr` should not be used when UBLK_F_USER_COPY is enabled, 328 | * because userspace handles data copy by pread()/pwrite() over 329 | * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is 330 | * re-used to pass back the allocated LBA for 331 | * UBLK_IO_OP_ZONE_APPEND which actually depends on 332 | * UBLK_F_USER_COPY 333 | */ 334 | __u64 addr; 335 | __u64 zone_append_lba; 336 | }; 337 | #else 338 | __u64 addr; 339 | #endif 340 | }; 341 | 342 | struct ublk_param_basic { 343 | #define UBLK_ATTR_READ_ONLY (1 << 0) 344 | #define UBLK_ATTR_ROTATIONAL (1 << 1) 345 | #define UBLK_ATTR_VOLATILE_CACHE (1 << 2) 346 | #define UBLK_ATTR_FUA (1 << 3) 347 | __u32 attrs; 348 | __u8 logical_bs_shift; 349 | __u8 physical_bs_shift; 350 | __u8 io_opt_shift; 351 | __u8 io_min_shift; 352 | 353 | __u32 max_sectors; 354 | __u32 chunk_sectors; 355 | 356 | __u64 dev_sectors; 357 | __u64 virt_boundary_mask; 358 | }; 359 | 360 | struct ublk_param_discard { 361 | __u32 discard_alignment; 362 | 363 | __u32 discard_granularity; 364 | __u32 max_discard_sectors; 365 | 366 | __u32 max_write_zeroes_sectors; 367 | __u16 max_discard_segments; 368 | __u16 reserved0; 369 | }; 370 | 371 | /* 372 | * read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available 373 | * after device is started 374 | */ 375 | struct ublk_param_devt { 376 | __u32 char_major; 377 | __u32 char_minor; 378 | __u32 disk_major; 379 | __u32 disk_minor; 380 | }; 381 | 382 | struct ublk_param_zoned { 383 | __u32 max_open_zones; 384 | __u32 max_active_zones; 385 | __u32 max_zone_append_sectors; 386 | __u8 reserved[20]; 387 | }; 388 | 389 | struct ublk_params { 390 | /* 391 | * Total length of parameters, userspace has to set 'len' for both 392 | * SET_PARAMS and GET_PARAMS command, and driver may update len 393 | * if two sides use different version of 'ublk_params', same with 394 | * 'types' fields. 395 | */ 396 | __u32 len; 397 | #define UBLK_PARAM_TYPE_BASIC (1 << 0) 398 | #define UBLK_PARAM_TYPE_DISCARD (1 << 1) 399 | #define UBLK_PARAM_TYPE_DEVT (1 << 2) 400 | #define UBLK_PARAM_TYPE_ZONED (1 << 3) 401 | __u32 types; /* types of parameter included */ 402 | 403 | struct ublk_param_basic basic; 404 | struct ublk_param_discard discard; 405 | struct ublk_param_devt devt; 406 | struct ublk_param_zoned zoned; 407 | }; 408 | 409 | #endif 410 | -------------------------------------------------------------------------------- /utils/ublk_chown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or Apache-2.0 3 | 4 | MY_DIR=$(cd "$(dirname "$0")";pwd) 5 | 6 | if ID=`${MY_DIR}/ublk_user_id $1 2>/dev/null`; then 7 | if [ "$2" == "add" ]; then 8 | /usr/bin/chown $ID /dev/$1 > /dev/null 2>&1 9 | fi 10 | fi 11 | -------------------------------------------------------------------------------- /utils/ublk_user_id_rs.rs: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or Apache-2.0 2 | 3 | fn main() { 4 | let s = std::env::args().nth(1).unwrap_or_else(|| "".to_string()); 5 | 6 | if s.len() >= 6 && (&s[0..5] == "ublkb" || &s[0..5] == "ublkc") { 7 | match s[5..].parse::() { 8 | Ok(id) => match libublk::ctrl::UblkCtrl::new_simple(id) { 9 | Ok(ctrl) => { 10 | let dinfo = ctrl.dev_info(); 11 | if (dinfo.flags & libublk::sys::UBLK_F_UNPRIVILEGED_DEV as u64) != 0 { 12 | println!("{}:{}", dinfo.owner_uid, dinfo.owner_gid); 13 | } 14 | std::process::exit(0); 15 | } 16 | _ => {} 17 | }, 18 | _ => {} 19 | } 20 | } 21 | std::process::exit(-1); 22 | } 23 | --------------------------------------------------------------------------------