├── .github └── workflows │ ├── build.yml │ └── ci.yml ├── .gitignore ├── COPYING ├── COPYING.LGPL ├── LICENSE ├── Makefile.am ├── README.rst ├── build_with_liburing_src ├── ci ├── .gitignore ├── mkosi.build ├── mkosi.conf ├── mkosi.default.d │ ├── 10-ubdsrv.conf │ └── fedora │ │ └── 10-fedora.conf └── mkosi.extra │ └── etc │ └── modules-load.d │ └── ublk-drv.conf ├── configure.ac ├── demo_event.c ├── demo_null.c ├── doc ├── Doxyfile ├── Makefile.am ├── external_links.rst ├── mainpage.dox ├── ublk.1 ├── ublk.1.xml └── ublk_intro.pdf ├── include ├── Makefile.am ├── nlohmann │ └── json.hpp ├── ublk_cmd.h ├── ublksrv.h ├── ublksrv_aio.h ├── ublksrv_priv.h └── ublksrv_utils.h ├── lib ├── Makefile.am ├── ublksrv.c ├── ublksrv_aio.c ├── ublksrv_cmd.c ├── ublksrv_json.cpp └── utils.c ├── m4 ├── ac_c_compile_flags.m4 └── ax_pthread.m4 ├── targets ├── include │ ├── ublksrv_tgt.h │ └── ublksrv_tgt_endian.h ├── nbd │ ├── README.rst │ ├── cliserv.c │ ├── cliserv.h │ ├── nbd-client.c │ ├── nbd-debug.h │ ├── nbd.h │ └── ublk.nbd.cpp ├── ublk.cpp ├── ublk.iscsi.cpp ├── ublk.loop.cpp ├── ublk.nfs.cpp ├── ublk.null.cpp └── ublksrv_tgt.cpp ├── tests ├── Makefile.am ├── common │ ├── fio_common │ ├── loop_common │ └── nbd_common ├── debug │ ├── test_dev │ └── ublk_docker ├── generic │ ├── 001 │ ├── 002 │ ├── 003 │ ├── 004 │ ├── 005 │ ├── 006 │ └── 007 ├── loop │ ├── 001 │ ├── 002 │ ├── 003 │ ├── 004 │ ├── 005 │ ├── 006 │ ├── 007 │ ├── 008 │ ├── 009 │ └── 010 ├── nbd │ ├── 001 │ ├── 002 │ ├── 003 │ ├── 021 │ ├── 022 │ ├── 023 │ ├── 041 │ ├── 042 │ └── 043 ├── null │ ├── 001 │ ├── 002 │ ├── 004 │ ├── 005 │ ├── 006 │ └── 007 └── run_test.sh ├── ublksrv.pc.in └── utils ├── genver.sh ├── ublk_chown.sh ├── ublk_chown_docker.sh ├── ublk_dev.rules └── ublk_user_id.c /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build CI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "master"] 7 | pull_request: 8 | branches: [ "master" ] 9 | 10 | env: 11 | URING_VER: "2.5" 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-22.04 16 | steps: 17 | - name: "acquire: ubdsrv" 18 | uses: actions/checkout@v4 19 | with: 20 | path: ubdsrv 21 | 22 | - name: "acquire: liburing" 23 | run: | 24 | wget https://brick.kernel.dk/snaps/liburing-$URING_VER.tar.gz 25 | tar xzvf liburing-$URING_VER.tar.gz 26 | 27 | - name: "build: liburing" 28 | working-directory: liburing-${{ env.URING_VER }} 29 | shell: bash 30 | run: | 31 | ./configure 32 | make -j$(nproc) 33 | sudo make install 34 | 35 | - name: "build: ubdsrv with --enable-debug" 36 | working-directory: ubdsrv 37 | run: | 38 | LIBURING_DIR=${{ github.workspace }}/liburing-$URING_VER ./build_with_liburing_src --enable-debug 39 | 40 | - name: "build: ubdsrv" 41 | working-directory: ubdsrv 42 | run: | 43 | make distclean 44 | LIBURING_DIR=${{ github.workspace }}/liburing-$URING_VER ./build_with_liburing_src 45 | 46 | - name: "build: installable artifacts" 47 | working-directory: ubdsrv 48 | shell: bash 49 | run: | 50 | mkdir -p ${{ github.workspace }}/files 51 | make DESTDIR=${{ github.workspace }}/files install 52 | 53 | - name: "publish: installable artifacts" 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: ubdsrv 57 | if-no-files-found: error 58 | path: ${{ github.workspace }}/files/** 59 | 60 | - name: "publish: logs" 61 | uses: actions/upload-artifact@v4 62 | if: always() 63 | with: 64 | name: build-logs 65 | if-no-files-found: ignore 66 | path: ubdsrv/*.log 67 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test CI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "next" ] 7 | pull_request: 8 | branches: [ "next" ] 9 | 10 | env: 11 | URING_VER: "2.5" 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-22.04 16 | steps: 17 | - name: "acquire: ubdsrv" 18 | uses: actions/checkout@v4 19 | with: 20 | path: ubdsrv 21 | 22 | - name: "acquire: liburing" 23 | run: | 24 | wget https://brick.kernel.dk/snaps/liburing-$URING_VER.tar.gz 25 | tar xzvf liburing-$URING_VER.tar.gz 26 | 27 | - name: "build: liburing" 28 | working-directory: liburing-${{ env.URING_VER }} 29 | shell: bash 30 | run: | 31 | ./configure 32 | make -j$(nproc) 33 | sudo make install 34 | 35 | - name: "build: ubdsrv with --enable-debug" 36 | working-directory: ubdsrv 37 | run: | 38 | LIBURING_DIR=${{ github.workspace }}/liburing-$URING_VER ./build_with_liburing_src --enable-debug 39 | 40 | - name: "build: ubdsrv" 41 | working-directory: ubdsrv 42 | run: | 43 | make distclean 44 | LIBURING_DIR=${{ github.workspace }}/liburing-$URING_VER ./build_with_liburing_src 45 | 46 | - name: "build: installable artifacts" 47 | working-directory: ubdsrv 48 | shell: bash 49 | run: | 50 | mkdir -p ${{ github.workspace }}/files 51 | make DESTDIR=${{ github.workspace }}/files install 52 | 53 | - name: "publish: installable artifacts" 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: ubdsrv 57 | if-no-files-found: error 58 | path: ${{ github.workspace }}/files/** 59 | 60 | - name: "publish: logs" 61 | uses: actions/upload-artifact@v4 62 | if: always() 63 | with: 64 | name: build-logs 65 | if-no-files-found: ignore 66 | path: ubdsrv/*.log 67 | 68 | test: 69 | runs-on: ubuntu-22.04 70 | timeout-minutes: 120 71 | steps: 72 | - name: "install: mkosi + dependencies" 73 | shell: bash 74 | run: | 75 | sudo apt update -o Acquire::Retries=3 76 | sudo apt install -y dnf rpm systemd-container qemu-system-x86 ovmf e2fsprogs btrfs-progs 77 | #python3 -m pip install --user git+https://github.com/systemd/mkosi.git 78 | #python3 -m pip install --user https://github.com/systemd/mkosi/archive/refs/tags/v14.tar.gz 79 | wget https://github.com/systemd/mkosi/archive/refs/tags/v14.tar.gz 80 | tar zxf v14.tar.gz 81 | cd mkosi-14 82 | sed -i '/gpgurl\ = \urllib.parse.urljoin/c \ gpgurl\ =\ \"https://fedoraproject.org/fedora.gpg\"' ./mkosi/__init__.py 83 | sed -i 's/gpgcheck = True/gpgcheck = False/g' ./mkosi/__init__.py 84 | python3 -m pip install --user ./ 85 | 86 | # Required for ssh'ing into VM 87 | - name: "setup: environment" 88 | run: | 89 | sudo systemctl enable --now systemd-networkd 90 | 91 | - name: "cache: os packages" 92 | uses: actions/cache@v4 93 | with: 94 | path: ~/mkosi.cache 95 | key: fedora-cache-v2 96 | 97 | - name: "acquire: ubdsrv" 98 | uses: actions/checkout@v4 99 | 100 | - name: "build: fedora image" 101 | working-directory: ci 102 | run: | 103 | [ -d ~/mkosi.cache ] && ln -s mkosi.cache ~/mkosi.cache 104 | sudo $(which mkosi) build 105 | if [ ! -d ~/mkosi.cache ]; then cp -fr ./mkosi.cache ~/; fi 106 | 107 | - name: "start: boot fedora in qemu" 108 | working-directory: ci 109 | run: | 110 | RUNNER_TRACKING_ID="" && sudo $(which mkosi) qemu -serial none -monitor none -display none -device virtio-net-pci,netdev=network0 -netdev user,id=network0,hostfwd=tcp:127.0.0.1:5555-:22 | tee ${{ github.workspace }}/qemu.log & 111 | 112 | - name: "connect: check ssh connection" 113 | shell: bash 114 | timeout-minutes: 10 115 | working-directory: ci 116 | run: | 117 | until mkosi ssh uname -a; do 118 | echo "Retrying..." 119 | sleep 0.25 120 | done 121 | 122 | - name: "test: run ublk" 123 | working-directory: ci 124 | run: | 125 | mkosi ssh ublk list 126 | 127 | - name: "test: run tests" 128 | working-directory: ci 129 | run: | 130 | mkosi ssh UBLK=ublk /usr/share/tests/run_test.sh all 10 tests/tmp/ 131 | 132 | - name: "cleanup" 133 | if: always() 134 | continue-on-error: true 135 | run: | 136 | cat ${{ github.workspace }}/qemu.log 137 | sudo pkill -f qemu 138 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.a 2 | *.exe 3 | *.la 4 | *.lo 5 | *.o 6 | *.so 7 | *.tar.gz 8 | *~ 9 | 10 | .deps 11 | .libs 12 | .dirstamp 13 | Makefile 14 | Makefile.in 15 | 16 | /TAGS 17 | /aclocal.m4 18 | /autom4te.cache/ 19 | /compile 20 | /config.guess 21 | /config.h.in 22 | /config.h 23 | /config.log 24 | /config.status 25 | /config.sub 26 | /configure 27 | /demo_event 28 | /demo_null 29 | /ublk_user_id 30 | /depcomp 31 | /install-sh 32 | /libtool 33 | /local* 34 | /ltmain.sh 35 | /m4/libtool.m4 36 | /m4/ltoptions.m4 37 | /m4/ltsugar.m4 38 | /m4/ltversion.m4 39 | /m4/lt~obsolete.m4 40 | /missing 41 | /stamp-h1 42 | /tags 43 | /tests/.fio_perf 44 | /tests/tmp/ 45 | /ublk 46 | /ublk.* 47 | /ublksrv.pc 48 | /doc/html 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Ming Lei 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included 12 | in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | ACLOCAL_AMFLAGS = -I m4 4 | 5 | EXTRA_DIST = \ 6 | COPYING.LGPL \ 7 | .gitignore \ 8 | LICENSE 9 | 10 | SUBDIRS = doc include lib tests 11 | 12 | AM_CXXFLAGS = -std=c++20 -Wunused 13 | 14 | TGT_DIR = targets 15 | TGT_INC = $(top_srcdir)/$(TGT_DIR)/include 16 | 17 | sbin_PROGRAMS = ublk ublk.null ublk.loop ublk.nbd ublk_user_id 18 | noinst_PROGRAMS = demo_null demo_event 19 | dist_sbin_SCRIPTS = utils/ublk_chown.sh utils/ublk_chown_docker.sh 20 | 21 | if HAVE_LIBNFS 22 | sbin_PROGRAMS += ublk.nfs 23 | endif 24 | 25 | if HAVE_LIBISCSI 26 | sbin_PROGRAMS += ublk.iscsi 27 | endif 28 | 29 | ublk_SOURCES = $(TGT_DIR)/ublk.cpp $(TGT_DIR)/ublksrv_tgt.cpp 30 | 31 | ublk_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 32 | ublk_CPPFLAGS = $(ublk_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 33 | ublk_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 34 | 35 | ublk_null_SOURCES = $(TGT_DIR)/ublk.null.cpp $(TGT_DIR)/ublksrv_tgt.cpp 36 | ublk_null_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 37 | ublk_null_CPPFLAGS = $(ublk_null_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 38 | ublk_null_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 39 | 40 | ublk_iscsi_SOURCES = $(TGT_DIR)/ublk.iscsi.cpp $(TGT_DIR)/ublksrv_tgt.cpp 41 | ublk_iscsi_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 42 | ublk_iscsi_CPPFLAGS = $(ublk_iscsi_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 43 | ublk_iscsi_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) -liscsi 44 | 45 | ublk_loop_SOURCES = $(TGT_DIR)/ublk.loop.cpp $(TGT_DIR)/ublksrv_tgt.cpp 46 | ublk_loop_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 47 | ublk_loop_CPPFLAGS = $(ublk_loop_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 48 | ublk_loop_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 49 | 50 | ublk_nbd_SOURCES = $(TGT_DIR)/nbd/ublk.nbd.cpp $(TGT_DIR)/nbd/cliserv.c $(TGT_DIR)/nbd/nbd-client.c $(TGT_DIR)/ublksrv_tgt.cpp 51 | ublk_nbd_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 52 | ublk_nbd_CPPFLAGS = $(ublk_nbd_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 53 | ublk_nbd_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 54 | 55 | ublk_nfs_SOURCES = $(TGT_DIR)/ublk.nfs.cpp $(TGT_DIR)/ublksrv_tgt.cpp 56 | ublk_nfs_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 57 | ublk_nfs_CPPFLAGS = $(ublk_nfs_CFLAGS) -I$(top_srcdir)/include -I$(TGT_INC) 58 | ublk_nfs_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) -lnfs 59 | 60 | demo_null_SOURCES = demo_null.c 61 | demo_null_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 62 | demo_null_CPPFLAGS = $(demo_null_CFLAGS) -I$(top_srcdir)/include 63 | demo_null_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 64 | 65 | demo_event_SOURCES = demo_event.c 66 | demo_event_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 67 | demo_event_CPPFLAGS = $(demo_event_CFLAGS) -I$(top_srcdir)/include 68 | demo_event_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 69 | 70 | ublk_user_id_SOURCES = utils/ublk_user_id.c 71 | ublk_user_id_CFLAGS = $(WARNINGS_CFLAGS) $(LIBURING_CFLAGS) $(PTHREAD_CFLAGS) 72 | ublk_user_id_CPPFLAGS = $(ublk_user_id_CFLAGS) -I$(top_srcdir)/include 73 | ublk_user_id_LDADD = lib/libublksrv.la $(LIBURING_LIBS) $(PTHREAD_LIBS) 74 | 75 | pkgconfigdir = $(libdir)/pkgconfig 76 | pkgconfig_DATA = ublksrv.pc 77 | 78 | CLEANFILES = *~ test cscope.* include/*~ *.d nbd/*~ utils/*~ doc/html/* 79 | 80 | R = 10 81 | D = tests/tmp/ 82 | test: $(sbin_PROGRAMS) $(noinst_PROGRAMS) 83 | make -C tests run T=${T} R=${R} D=${D} 84 | 85 | cscope: 86 | @cscope -b -R 87 | 88 | if HAVE_DOXYGEN 89 | doxygen_doc: 90 | doxygen doc/Doxyfile 91 | endif 92 | 93 | # Check no files are missing from EXTRA_DIST rules, and that all 94 | # generated files have been included in the tarball. (Note you must 95 | # have done 'make dist') 96 | maintainer-check-extra-dist: 97 | @zcat $(PACKAGE_NAME)-$(VERSION).tar.gz | tar tf - | sort | \ 98 | sed 's,^$(PACKAGE_NAME)-$(VERSION)/,,' > tarfiles 99 | @git ls-files | \ 100 | sort > gitfiles 101 | @comm -13 tarfiles gitfiles > comm-out 102 | @echo Checking for differences between EXTRA_DIST and git ... 103 | @cat comm-out 104 | @[ ! -s comm-out ] 105 | @rm tarfiles gitfiles comm-out 106 | @echo PASS: EXTRA_DIST tests 107 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | ============================ 3 | Userspace block driver(ublk) 4 | ============================ 5 | 6 | Introduction 7 | ============ 8 | 9 | This is the userspace daemon part(ublksrv) of the ublk framework, the other 10 | part is ``ublk driver`` [#userspace]_ which supports multiple queue. 11 | 12 | The two parts communicate by io_uring's IORING_OP_URING_CMD with one 13 | per-queue shared cmd buffer for storing io command, and the buffer is 14 | read only for ublksrv, each io command can be indexed by io request tag 15 | directly, and the command is written by ublk driver, and read by ublksrv 16 | after getting notification from ublk driver. 17 | 18 | For example, when one READ io request is submitted to ublk block driver, ublk 19 | driver stores the io command into cmd buffer first, then completes one 20 | IORING_OP_URING_CMD for notifying ublksrv, and the URING_CMD is issued to 21 | ublk driver beforehand by ublksrv for getting notification of any new io 22 | request, and each URING_CMD is associated with one io request by tag, 23 | so depth for URING_CMD is same with queue depth of ublk block device. 24 | 25 | After ublksrv gets the io command, it translates and handles the ublk io 26 | request, such as, for the ublk-loop target, ublksrv translates the request 27 | into same request on another file or disk, like the kernel loop block 28 | driver. In ublksrv's implementation, the io is still handled by io_uring, 29 | and share same ring with IORING_OP_URING_CMD command. When the target io 30 | request is done, the same IORING_OP_URING_CMD is issued to ublk driver for 31 | both committing io request result and getting future notification of new 32 | io request. 33 | 34 | So far, the ublk driver needs to copy io request pages into userspace buffer 35 | (pages) first for write before notifying the request to ublksrv, and copy 36 | userspace buffer(pages) to the io request pages after ublksrv handles 37 | READ. Also looks linux-mm can't support zero copy for this case yet. [#zero_copy]_ 38 | 39 | More ublk targets will be added with this framework in future even though only 40 | ublk-loop and ublk-null are implemented now. 41 | 42 | libublksrv is also generated, and it helps to integrate ublk into existed 43 | project. One example of demo_null is provided for how to make a ublk 44 | device over libublksrv. 45 | 46 | Quick start 47 | =========== 48 | 49 | how to build ublksrv: 50 | -------------------- 51 | 52 | .. code-block:: console 53 | 54 | autoreconf -i 55 | ./configure #pkg-config and libtool is usually needed 56 | make 57 | 58 | note: './configure' requires liburing 2.2 package installed, if liburing 2.2 59 | isn't available in your distribution, please configure via the following 60 | command, or refer to ``build_with_liburing_src`` [#build_with_liburing_src]_ 61 | 62 | .. code-block:: console 63 | 64 | PKG_CONFIG_PATH=${LIBURING_DIR} \ 65 | ./configure \ 66 | CFLAGS="-I${LIBURING_DIR}/src/include" \ 67 | CXXFLAGS="-I${LIBURING_DIR}/src/include" \ 68 | LDFLAGS="-L${LIBURING_DIR}/src" 69 | 70 | and LIBURING_DIR points to directory of liburing source code, and liburing 71 | needs to be built before running above commands. Also IORING_SETUP_SQE128 72 | has to be supported in the liburing source. 73 | 74 | c++20 is required for building ublk utility, but libublksrv and demo_null.c & 75 | demo_event.c can be built independently: 76 | 77 | - build libublksrv :: 78 | 79 | make -C lib/ 80 | 81 | - build demo_null && demo_event :: 82 | 83 | make -C lib/ 84 | make demo_null demo_event 85 | 86 | help 87 | ---- 88 | 89 | - ublk help 90 | 91 | add one ublk-null disk 92 | ---------------------- 93 | 94 | - ublk add -t null 95 | 96 | 97 | add one ublk-loop disk 98 | ---------------------- 99 | 100 | - ublk add -t loop -f /dev/vdb 101 | 102 | or 103 | 104 | - ublk add -t loop -f 1.img 105 | 106 | remove one ublk disk 107 | -------------------- 108 | 109 | - ublk del -n 0 #remove /dev/ublkb0 110 | 111 | - ublk del -a #remove all ublk devices 112 | 113 | list ublk devices 114 | --------------------- 115 | 116 | - ublk list 117 | 118 | - ublk list -v #with all device info dumped 119 | 120 | 121 | unprivileged mode 122 | ================== 123 | 124 | Typical use case is container [#stefan_container]_ in which user 125 | can manage its own devices not exposed to other containers. 126 | 127 | At default, controlling ublk device needs privileged user, since 128 | /dev/ublk-control is permitted for administrator only, and this 129 | is called privileged mode. 130 | 131 | For unprivilege mode, /dev/ublk-control needs to be allowed for 132 | all users, so the following udev rule need to be added: 133 | 134 | KERNEL=="ublk-control", MODE="0666", OPTIONS+="static_node=ublk-control" 135 | 136 | Also when new ublk device is added, we need ublk to change device 137 | ownership to the device's real owner, so the following rules are 138 | needed: :: 139 | 140 | KERNEL=="ublkc*",RUN+="ublk_chown.sh %k" 141 | KERNEL=="ublkb*",RUN+="ublk_chown.sh %k" 142 | 143 | ``ublk_chown.sh`` can be found under ``utils/`` too. 144 | 145 | ``utils/ublk_dev.rules`` includes the above rules. 146 | 147 | With the above two administrator changes, unprivileged user can 148 | create/delete/list/use ublk device, also anyone which isn't permitted 149 | can't access and control this ublk devices(ublkc*/ublkb*) 150 | 151 | Unprivileged user can pass '--unprevileged' to 'ublk add' for creating 152 | unprivileged ublk device, then the created ublk device is only available 153 | for the owner and administrator. 154 | 155 | use unprivileged ublk in docker 156 | ------------------------------- 157 | 158 | - install the following udev rules in host machine: :: 159 | 160 | ACTION=="add",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown_docker.sh %k 'add' '%M' '%m'" 161 | ACTION=="remove",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown_docker.sh %k 'remove' '%M' '%m'" 162 | 163 | ``ublk_chown_docker.sh`` can be found under ``utils/``. 164 | 165 | - run one container and install ublk & its dependency packages 166 | 167 | .. code-block:: console 168 | 169 | docker run \ 170 | --name fedora \ 171 | --hostname=ublk-docker.example.com \ 172 | --device=/dev/ublk-control \ 173 | --device-cgroup-rule='a *:* rmw' \ 174 | --tmpfs /tmp \ 175 | --tmpfs /run \ 176 | --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ 177 | -ti \ 178 | fedora:38 179 | 180 | .. code-block:: console 181 | 182 | #run the following commands inside the above container 183 | dnf install -y git libtool automake autoconf g++ liburing-devel 184 | git clone https://github.com/ming1/ubdsrv.git 185 | cd ubdsrv 186 | autoreconf -i&& ./configure&& make -j 4&& make install 187 | 188 | - add/delete ublk device inside container by unprivileged user 189 | 190 | .. code-block:: console 191 | 192 | docker exec -u 1001:1001 -ti fedora /bin/bash 193 | 194 | .. code-block:: console 195 | 196 | #run the following commands inside the above container 197 | bash-5.2$ ublk add -t null --unprivileged 198 | dev id 0: nr_hw_queues 1 queue_depth 128 block size 512 dev_capacity 524288000 199 | max rq size 524288 daemon pid 178 flags 0x62 state LIVE 200 | ublkc: 237:0 ublkb: 259:1 owner: 1001:1001 201 | queue 0: tid 179 affinity(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ) 202 | target {"dev_size":268435456000,"name":"null","type":0} 203 | 204 | bash-5.2$ ls -l /dev/ublk* 205 | crw-rw-rw-. 1 root root 10, 123 May 1 04:35 /dev/ublk-control 206 | brwx------. 1 1001 1001 259, 1 May 1 04:36 /dev/ublkb0 207 | crwx------. 1 1001 1001 237, 0 May 1 04:36 /dev/ublkc0 208 | 209 | bash-5.2$ ublk del -n 0 210 | bash-5.2$ ls -l /dev/ublk* 211 | crw-rw-rw-. 1 root root 10, 123 May 1 04:35 /dev/ublk-control 212 | 213 | - example of ublk in docker: ``tests/debug/ublk_docker`` 214 | 215 | test 216 | ==== 217 | 218 | run all built tests 219 | ------------------- 220 | 221 | make test T=all 222 | 223 | 224 | run test group 225 | -------------- 226 | 227 | make test T=null 228 | 229 | make test T=loop 230 | 231 | make test T=generic 232 | 233 | 234 | run single test 235 | --------------- 236 | 237 | make test T=generic/001 238 | 239 | make test T=null/001 240 | 241 | make test T=loop/001 242 | ... 243 | 244 | run specified tests or test groups 245 | ---------------------------------- 246 | 247 | make test T=generic:loop/001:null 248 | 249 | 250 | Debug 251 | ===== 252 | 253 | ublksrv is running as one daemon process, so most of debug messages won't be 254 | shown in terminal. If any issue is observed, please collect log via command 255 | of "journalctl | grep ublksrvd" 256 | 257 | ``./configure --enable-debug`` can build a debug version of ublk which 258 | dumps lots of runtime debug messages, and can't be used in production 259 | environment, should be for debug purpose only. For debug version of 260 | ublksrv, 'ublk add --debug_mask=0x{MASK}' can control which kind of 261 | debug log dumped, see ``UBLK_DBG_*`` defined in include/ublksrv_utils.h 262 | for each kind of debug log. 263 | 264 | libublksrv API doc 265 | ================== 266 | 267 | API is documented in include/ublksrv.h, and doxygen doc can be generated 268 | by running 'make doxygen_doc', the generated html docs are in doc/html. 269 | 270 | Contributing 271 | ============ 272 | 273 | Any kind of contribution is welcome! 274 | 275 | Development is done over github. 276 | 277 | Maillist 278 | ======== 279 | 280 | A ublk mailing list is available at http://groups.google.com/group/ublk. 281 | 282 | License 283 | ======= 284 | 285 | nlohmann(include/nlohmann/json.hpp) is from [#nlohmann]_, which is covered 286 | by MIT license. 287 | 288 | The library functions (all code in lib/ directory and include/ublksrv.h) 289 | are covered by dual licensed LGPL and MIT, see COPYING.LGPL and LICENSE. 290 | 291 | All other source code are covered by dual licensed GPL and MIT, see 292 | COPYING and LICENSE. 293 | 294 | References 295 | ========== 296 | 297 | .. [#ublk_driver] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/block/ublk_drv.c?h=v6.0 298 | .. [#zero_copy] https://lore.kernel.org/all/20220318095531.15479-1-xiaoguang.wang@linux.alibaba.com/ 299 | .. [#nlohmann] https://github.com/nlohmann/json 300 | .. [#build_with_liburing_src] https://github.com/ming1/ubdsrv/blob/master/build_with_liburing_src 301 | .. [#stefan_container] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ 302 | -------------------------------------------------------------------------------- /build_with_liburing_src: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # build liburing first 4 | # 5 | # cd $LIBURING_DIR 6 | # ./configure && make && make install 7 | 8 | PARAM=$1 9 | 10 | set -eo pipefail 11 | set -x 12 | 13 | script_dir="$(dirname ${BASH_SOURCE[0]})" 14 | 15 | autoreconf -i "${script_dir}" 16 | 17 | OPTS="-g -O0 -Wall" 18 | : "${LIBURING_DIR:=/root/git/liburing}" #replace with your own liburing path 19 | PKG_CONFIG_PATH=${LIBURING_DIR} \ 20 | ${script_dir}/configure \ 21 | --enable-gcc-warnings $PARAM \ 22 | CFLAGS="-I${LIBURING_DIR}/src/include $OPTS" \ 23 | CXXFLAGS="-I${LIBURING_DIR}/src/include $OPTS" \ 24 | LDFLAGS="-L${LIBURING_DIR}/src" 25 | 26 | make -j$(nproc) --no-print-directory 27 | -------------------------------------------------------------------------------- /ci/.gitignore: -------------------------------------------------------------------------------- 1 | mkosi.builddir/ 2 | mkosi.output/ 3 | mkosi.cache/ 4 | -------------------------------------------------------------------------------- /ci/mkosi.build: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # 4 | # This is a build script file for OS image generation using mkosi (https://github.com/systemd/mkosi). 5 | # It is invoked in a build environment, with the following set well-known variables: 6 | # 7 | # $SRCDIR 8 | # $DESTDIR 9 | # $BUILDDIR 10 | # 11 | 12 | set -eo pipefail 13 | 14 | mkdir -p $DESTDIR/root/ 15 | echo "systemctl restart systemd-networkd" >> $DESTDIR/root/.bash_profile 16 | chmod +x $DESTDIR/root/.bash_profile 17 | 18 | # Build newer version of liburing 19 | cd $SRCDIR 20 | git clone git://git.kernel.dk/liburing.git 21 | cd liburing 22 | ./configure 23 | make -j$(nproc) 24 | make DESTDIR= install 25 | 26 | # Build in BUILDDIR and install to DESTDIR 27 | mkdir -p $BUILDDIR/ubdsrv 28 | LIBURING_DIR=$SRCDIR/liburing $SRCDIR/build_with_liburing_src 29 | make install 30 | 31 | # Copy tests into the dest 32 | mkdir -p $DESTDIR/usr/share 33 | cp -r $SRCDIR/tests $DESTDIR/usr/share/ 34 | -------------------------------------------------------------------------------- /ci/mkosi.conf: -------------------------------------------------------------------------------- 1 | [Distribution] 2 | Distribution=fedora 3 | -------------------------------------------------------------------------------- /ci/mkosi.default.d/10-ubdsrv.conf: -------------------------------------------------------------------------------- 1 | # This is a settings file for OS image generation using mkosi (https://github.com/systemd/mkosi). 2 | 3 | [Output] 4 | Format=gpt_btrfs 5 | Bootable=yes 6 | HostonlyInitrd=yes 7 | OutputDirectory=mkosi.output 8 | QCow2=yes 9 | 10 | [Content] 11 | BuildDirectory=mkosi.builddir 12 | BuildSources=.. 13 | Cache=mkosi.cache 14 | SourceFileTransfer=copy-git-cached 15 | WithNetwork=yes 16 | 17 | [Host] 18 | QemuHeadless=yes 19 | Netdev=yes 20 | Ssh=yes 21 | SshTimeout=300 22 | QemuSmp=1 23 | QemuMem=4G 24 | QemuKvm=no 25 | 26 | [Validation] 27 | Password= 28 | Autologin=yes 29 | 30 | [Partitions] 31 | RootSize=6G 32 | -------------------------------------------------------------------------------- /ci/mkosi.default.d/fedora/10-fedora.conf: -------------------------------------------------------------------------------- 1 | # This is a settings file for OS image generation using mkosi (https://github.com/systemd/mkosi). 2 | 3 | [Distribution] 4 | Distribution=fedora 5 | Release=37 6 | 7 | [Content] 8 | BuildPackages= 9 | autoconf 10 | autoconf-archive 11 | automake 12 | diffutils 13 | gcc 14 | gcc-g++ 15 | git 16 | libtool 17 | make 18 | Packages= 19 | fio 20 | fio-engine-libaio 21 | qemu-img 22 | util-linux 23 | which 24 | nbd 25 | nbdkit 26 | net-tools 27 | systemd-resolved 28 | dhcp-client 29 | -------------------------------------------------------------------------------- /ci/mkosi.extra/etc/modules-load.d/ublk-drv.conf: -------------------------------------------------------------------------------- 1 | ublk_drv 2 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # ublksrv 2 | # Copyright (C) Ming Lei 3 | # SPDX-License-Identifier: MIT or GPL-2.0-only 4 | 5 | AC_INIT([ublksrv], 6 | m4_esyscmd(utils/genver.sh | tr -d '\n'), 7 | [https://github.com/ming1/ubdsrv],, 8 | [https://github.com/ming1/ubdsrv]) 9 | 10 | AC_CONFIG_MACRO_DIR([m4]) 11 | m4_ifdef([AC_USE_SYSTEM_EXTENSIONS],[], 12 | [m4_define([AC_USE_SYSTEM_EXTENSIONS],[])]) 13 | AC_USE_SYSTEM_EXTENSIONS 14 | AC_SYS_LARGEFILE 15 | 16 | AM_SILENT_RULES([yes]) 17 | 18 | dnl NB: Do not [quote] this parameter. 19 | AM_INIT_AUTOMAKE([foreign subdir-objects]) 20 | LT_INIT 21 | 22 | AC_CANONICAL_HOST 23 | 24 | AC_PROG_SED 25 | 26 | dnl Check for basic C environment. 27 | AC_PROG_CC 28 | AC_PROG_INSTALL 29 | AC_PROG_CPP 30 | 31 | AC_C_PROTOTYPES 32 | test "x$U" != "x" && AC_MSG_ERROR([Compiler not ANSI compliant]) 33 | 34 | AM_PROG_CC_C_O 35 | 36 | AX_PTHREAD 37 | 38 | dnl Check for C++. 39 | AC_PROG_CXX 40 | 41 | AS_CASE([$CXX], 42 | [*clang++*], [ENABLE_CORO_FLAG=""], 43 | [*g++*], [ENABLE_CORO_FLAG="-fcoroutines"], 44 | [ENABLE_CORO_FLAG=""] 45 | ) 46 | 47 | CXXFLAGS="$CXXFLAGS $ENABLE_CORO_FLAG" 48 | 49 | 50 | dnl --enable-gcc-warnings to turn on GCC warnings (for developers). 51 | AC_ARG_ENABLE([gcc-warnings], 52 | [AS_HELP_STRING([--enable-gcc-warnings], 53 | [turn on lots of GCC warnings (for developers)])], 54 | [case $enableval in 55 | yes|no) ;; 56 | *) AC_MSG_ERROR([bad value $enableval for gcc-warnings option]) ;; 57 | esac 58 | gcc_warnings=$enableval], 59 | [gcc_warnings=no] 60 | ) 61 | if test "x$gcc_warnings" = "xyes"; then 62 | # Enable normal GCC warnings and a few more: 63 | # - Warn about variable length arrays on stack. 64 | # - Warn about large stack frames (since we may be used from threads). 65 | # 66 | # TODO: Address 'sign-compare' errors and remove -Wno-sign-compare 67 | # TODO: Address 'parentheses' errors and remove -Wno-parentheses 68 | WARNINGS_CFLAGS="-Wall -Werror -Wuninitialized -Wmaybe-uninitialized -Wno-sign-compare -Wno-parentheses" 69 | AC_C_COMPILE_FLAGS([WARNINGS_CFLAGS], 70 | [-Wvla -Wframe-larger-than=5000 -Wstack-usage=10000], 71 | [$CFLAGS -Werror]) 72 | AC_SUBST([WARNINGS_CFLAGS]) 73 | fi 74 | 75 | AC_ARG_ENABLE( 76 | debug, 77 | AS_HELP_STRING([--enable-debug],[Build a debug version of ublk(default disabled)]), 78 | [ 79 | if test "x$enableval" = "xyes"; then 80 | AC_DEFINE(DEBUG, 1, [Define to 1 if you want to build debug version]) 81 | fi 82 | ] 83 | ) 84 | 85 | dnl Check for liburing (required). 86 | PKG_CHECK_MODULES([LIBURING], [liburing >= 2.2]) 87 | 88 | dnl Check if io_uring_prep_sendmsg_zc which isn't supported on 2.2 yet 89 | AC_MSG_CHECKING([for io_uring_prep_sendmsg_zc]) 90 | AC_LINK_IFELSE([AC_LANG_PROGRAM([[ 91 | #include 92 | ]], [[ 93 | int i = IORING_CQE_F_NOTIF; 94 | ]])], 95 | [AC_MSG_RESULT([yes]) 96 | AM_CONDITIONAL([HAVE_LIBURING_SEND_ZC], true) 97 | AC_DEFINE([HAVE_LIBURING_SEND_ZC], [1], [Define to 1 if liburing supports send zc])], 98 | [AC_MSG_RESULT([no]) 99 | AM_CONDITIONAL([HAVE_LIBURING_SEND_ZC], false)]) 100 | 101 | dnl Check for libnfs api v2 102 | AC_ARG_WITH([libnfs], 103 | [AS_HELP_STRING([--without-libnfs], 104 | [do not use libnfs])], 105 | [], 106 | [with_libnfs=check] 107 | ) 108 | if test "x$with_libnfs" != "xno"; then 109 | AC_MSG_CHECKING([for libnfs api v2]) 110 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ 111 | #include 112 | #include 113 | ]], [[ 114 | #ifndef LIBNFS_API_V2 115 | does not compile 116 | #endif 117 | nfs_mt_service_thread_start(NULL); 118 | ]])], 119 | [AC_MSG_RESULT([yes]) 120 | AM_CONDITIONAL([HAVE_LIBNFS], true) 121 | AC_DEFINE([HAVE_LIBNFS], [1], [Define to 1 if libnfs api v2 is available])], 122 | [AC_MSG_RESULT([no]) 123 | AM_CONDITIONAL([HAVE_LIBNFS], false)]) 124 | else 125 | AM_CONDITIONAL([HAVE_LIBNFS], false) 126 | fi 127 | AM_CONDITIONAL([LIBNFS], [test "x$HAVE_LIBNFS" = "x1"]) 128 | 129 | dnl Check for libiscsi 130 | AC_ARG_WITH([libiscsi], 131 | [AS_HELP_STRING([--without-libiscsi], 132 | [do not use libiscsi])], 133 | [], 134 | [with_libiscsi=check] 135 | ) 136 | if test "x$with_libiscsi" != "xno"; then 137 | AC_MSG_CHECKING([for libiscsi]) 138 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ 139 | #include 140 | #include 141 | ]], [[ 142 | iscsi_mt_service_thread_start(NULL); 143 | ]])], 144 | [AC_MSG_RESULT([yes]) 145 | AM_CONDITIONAL([HAVE_LIBISCSI], true) 146 | AC_DEFINE([HAVE_LIBISCSI], [1], [Define to 1 if libiscsi is available])], 147 | [AC_MSG_RESULT([no]) 148 | AM_CONDITIONAL([HAVE_LIBISCSI], false)]) 149 | else 150 | AM_CONDITIONAL([HAVE_LIBISCSI], false) 151 | fi 152 | AM_CONDITIONAL([LIBISCSI], [test "x$HAVE_LIBISCSI" = "x1"]) 153 | 154 | #gnutls is only for nbd target, borrowed from nbd-client project 155 | AC_ARG_WITH([gnutls], 156 | [AS_HELP_STRING([--without-gnutls], 157 | [do not use gnutls])], 158 | [], 159 | [with_gnutls=check] 160 | ) 161 | if test "x$with_gnutls" != "xno"; then 162 | PKG_CHECK_MODULES(GnuTLS, [gnutls >= 2.12.0], 163 | [HAVE_GNUTLS=1 164 | AC_DEFINE(HAVE_GNUTLS, 1, [Define to 1 if you have a GnuTLS version of 2.12 or above])], 165 | [if test "x$with_gnutls" = "xyes"; then 166 | AC_MSG_ERROR([--with-gnutls given but cannot find gnutls]) 167 | else 168 | HAVE_GNUTLS=0 169 | AC_DEFINE(HAVE_GNUTLS, 0) 170 | fi] 171 | ) 172 | else 173 | HAVE_GNUTLS=0 174 | AC_DEFINE(HAVE_GNUTLS, 0) 175 | fi 176 | AM_CONDITIONAL([GNUTLS], [test "x$HAVE_GNUTLS" = "x1"]) 177 | 178 | AC_ARG_ENABLE( 179 | sdp, 180 | AS_HELP_STRING([--enable-sdp],[Build a version of nbd-server with support for the Socket Direct Protocol (SDP). Requires you to build and install a kernel with the InfiniBand patches (default disabled)]), 181 | [ 182 | if test "x$enableval" = "xyes"; then 183 | AC_DEFINE(WITH_SDP, 1, [Define to 1 if you have and want support for the Socket Direct Protocol]) 184 | fi 185 | ] 186 | ) 187 | 188 | AC_ARG_ENABLE( 189 | lfs, 190 | AS_HELP_STRING([--disable-lfs],[Disable Large File Support (default on)]), 191 | [ 192 | if test "x$enableval" = "xyes" ; then 193 | NBD_LFS=1 194 | else 195 | NBD_LFS=0 196 | fi 197 | ], 198 | [NBD_LFS=1] 199 | ) 200 | AC_MSG_CHECKING([whether Large File Support should be enabled]) 201 | if test $NBD_LFS -eq 1; then 202 | AC_DEFINE(NBD_LFS,1, [Define to 1 if Large File Support should be enabled]) 203 | AC_MSG_RESULT([yes]) 204 | else 205 | AC_DEFINE(NBD_LFS,0) 206 | AC_MSG_RESULT([no]) 207 | fi 208 | 209 | ublk_control=/dev/ublk-control 210 | AC_ARG_WITH( 211 | [ublk_control], 212 | [AS_HELP_STRING([--with-ublk_control], [Set the ublk control device. Defaults to /dev/ublk-control.])], 213 | [ublk_control="$withval"] 214 | [], 215 | ) 216 | AC_DEFINE_UNQUOTED([UBLK_CONTROL], ["${ublk_control}"], [ublk control device.]) 217 | 218 | ublkc_prefix=/dev 219 | AC_ARG_WITH( 220 | [ublkc_prefix], 221 | [AS_HELP_STRING([--with-ublkc_prefix], [Set the directory prefix for ublkc devices. Defaults to /dev.])], 222 | [ublkc_prefix="$withval"] 223 | [], 224 | ) 225 | AC_DEFINE_UNQUOTED([UBLKC_PREFIX], ["${ublkc_prefix}"], [prefix for ublkc devices.]) 226 | 227 | AC_CHECK_PROGS([DOXYGEN], [doxygen]) 228 | if test -z "$DOXYGEN"; then 229 | AC_MSG_WARN([Doxygen not found - continue without Doxygen support]) 230 | fi 231 | AM_CONDITIONAL([HAVE_DOXYGEN], [test -n "$DOXYGEN"]) 232 | 233 | dnl Produce output files. 234 | AC_CONFIG_HEADERS([config.h]) 235 | 236 | AC_CONFIG_FILES([Makefile 237 | doc/Makefile 238 | include/Makefile 239 | lib/Makefile 240 | tests/Makefile 241 | ublksrv.pc]) 242 | 243 | AC_OUTPUT 244 | -------------------------------------------------------------------------------- /demo_null.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "ublksrv.h" 17 | #include "ublksrv_utils.h" 18 | 19 | struct demo_queue_info { 20 | const struct ublksrv_dev *dev; 21 | int qid; 22 | pthread_t thread; 23 | }; 24 | 25 | static struct ublksrv_ctrl_dev *this_dev; 26 | 27 | static void sig_handler(int sig) 28 | { 29 | fprintf(stderr, "got signal %d\n", sig); 30 | ublksrv_ctrl_stop_dev(this_dev); 31 | } 32 | 33 | /* 34 | * io handler for each ublkdev's queue 35 | * 36 | * Just for showing how to build ublksrv target's io handling, so callers 37 | * can apply these APIs in their own thread context for making one ublk 38 | * block device. 39 | */ 40 | static void *demo_null_io_handler_fn(void *data) 41 | { 42 | struct demo_queue_info *info = (struct demo_queue_info *)data; 43 | const struct ublksrv_dev *dev = info->dev; 44 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 45 | const struct ublksrv_ctrl_dev_info *dinfo = ublksrv_ctrl_get_dev_info(cdev); 46 | unsigned dev_id = dinfo->dev_id; 47 | unsigned short q_id = info->qid; 48 | const struct ublksrv_queue *q; 49 | 50 | sched_setscheduler(getpid(), SCHED_RR, NULL); 51 | 52 | ublk_json_write_queue_info(cdev, q_id, ublksrv_gettid()); 53 | ublk_tgt_store_dev_data(dev); 54 | 55 | q = ublksrv_queue_init(dev, q_id, NULL); 56 | if (!q) { 57 | fprintf(stderr, "ublk dev %d queue %d init queue failed\n", 58 | dinfo->dev_id, q_id); 59 | return NULL; 60 | } 61 | 62 | fprintf(stdout, "tid %d: ublk dev %d queue %d started\n", 63 | ublksrv_gettid(), 64 | dev_id, q->q_id); 65 | do { 66 | if (ublksrv_process_io(q) < 0) 67 | break; 68 | } while (1); 69 | 70 | fprintf(stdout, "ublk dev %d queue %d exited\n", dev_id, q->q_id); 71 | ublksrv_queue_deinit(q); 72 | return NULL; 73 | } 74 | 75 | static void demo_null_set_parameters(struct ublksrv_ctrl_dev *cdev, 76 | const struct ublksrv_dev *dev) 77 | { 78 | const struct ublksrv_ctrl_dev_info *info = 79 | ublksrv_ctrl_get_dev_info(cdev); 80 | struct ublk_params p = { 81 | .types = UBLK_PARAM_TYPE_BASIC, 82 | .basic = { 83 | .logical_bs_shift = 9, 84 | .physical_bs_shift = 12, 85 | .io_opt_shift = 12, 86 | .io_min_shift = 9, 87 | .max_sectors = info->max_io_buf_bytes >> 9, 88 | .dev_sectors = dev->tgt.dev_size >> 9, 89 | }, 90 | }; 91 | int ret; 92 | 93 | ublk_json_write_params(cdev, &p); 94 | 95 | ret = ublksrv_ctrl_set_params(cdev, &p); 96 | if (ret) 97 | fprintf(stderr, "dev %d set basic parameter failed %d\n", 98 | info->dev_id, ret); 99 | } 100 | 101 | static int demo_null_io_handler(struct ublksrv_ctrl_dev *ctrl_dev) 102 | { 103 | int ret, i; 104 | const struct ublksrv_dev *dev; 105 | struct demo_queue_info *info_array; 106 | void *thread_ret; 107 | const struct ublksrv_ctrl_dev_info *dinfo = 108 | ublksrv_ctrl_get_dev_info(ctrl_dev); 109 | 110 | info_array = (struct demo_queue_info *) 111 | calloc(sizeof(struct demo_queue_info), dinfo->nr_hw_queues); 112 | if (!info_array) 113 | return -ENOMEM; 114 | 115 | dev = ublksrv_dev_init(ctrl_dev); 116 | if (!dev) { 117 | free(info_array); 118 | return -ENOMEM; 119 | } 120 | 121 | for (i = 0; i < dinfo->nr_hw_queues; i++) { 122 | info_array[i].dev = dev; 123 | info_array[i].qid = i; 124 | pthread_create(&info_array[i].thread, NULL, 125 | demo_null_io_handler_fn, 126 | &info_array[i]); 127 | } 128 | 129 | demo_null_set_parameters(ctrl_dev, dev); 130 | 131 | /* everything is fine now, start us */ 132 | ret = ublksrv_ctrl_start_dev(ctrl_dev, getpid()); 133 | if (ret < 0) 134 | goto fail; 135 | 136 | ublksrv_ctrl_get_info(ctrl_dev); 137 | ublk_ctrl_dump(ctrl_dev); 138 | 139 | /* wait until we are terminated */ 140 | for (i = 0; i < dinfo->nr_hw_queues; i++) 141 | pthread_join(info_array[i].thread, &thread_ret); 142 | fail: 143 | ublksrv_dev_deinit(dev); 144 | 145 | free(info_array); 146 | 147 | return ret; 148 | } 149 | 150 | static int null_start_daemon(struct ublksrv_ctrl_dev *ctrl_dev) 151 | { 152 | int ret; 153 | 154 | if (ublksrv_ctrl_get_affinity(ctrl_dev) < 0) 155 | return -1; 156 | 157 | ret = demo_null_io_handler(ctrl_dev); 158 | 159 | return ret; 160 | } 161 | 162 | 163 | 164 | static int demo_init_tgt(struct ublksrv_dev *dev, int type, int argc, 165 | char *argv[]) 166 | { 167 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 168 | const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(cdev); 169 | struct ublksrv_tgt_info *tgt = &dev->tgt; 170 | struct ublksrv_tgt_base_json tgt_json = { 171 | .type = type, 172 | }; 173 | strcpy(tgt_json.name, "null"); 174 | 175 | tgt_json.dev_size = tgt->dev_size = 250UL * 1024 * 1024 * 1024; 176 | tgt->tgt_ring_depth = info->queue_depth; 177 | tgt->nr_fds = 0; 178 | 179 | ublk_json_write_dev_info(cdev); 180 | ublk_json_write_target_base(cdev, &tgt_json); 181 | 182 | return 0; 183 | } 184 | 185 | static int demo_handle_io_async(const struct ublksrv_queue *q, 186 | const struct ublk_io_data *data) 187 | { 188 | const struct ublksrv_io_desc *iod = data->iod; 189 | 190 | ublksrv_complete_io(q, data->tag, iod->nr_sectors << 9); 191 | 192 | return 0; 193 | } 194 | 195 | void *null_alloc_io_buf(const struct ublksrv_queue *q, int tag, int size) 196 | { 197 | return malloc(size); 198 | } 199 | 200 | void null_free_io_buf(const struct ublksrv_queue *q, void *buf, int tag) 201 | { 202 | free(buf); 203 | } 204 | 205 | static struct ublksrv_tgt_type demo_tgt_type = { 206 | .name = "demo_null", 207 | .init_tgt = demo_init_tgt, 208 | .handle_io_async = demo_handle_io_async, 209 | //.alloc_io_buf = null_alloc_io_buf, 210 | //.free_io_buf = null_free_io_buf, 211 | }; 212 | 213 | int main(int argc, char *argv[]) 214 | { 215 | struct ublksrv_dev_data data = { 216 | .dev_id = -1, 217 | .max_io_buf_bytes = DEF_BUF_SIZE, 218 | .nr_hw_queues = DEF_NR_HW_QUEUES, 219 | .queue_depth = DEF_QD, 220 | .tgt_type = "demo_null", 221 | .tgt_ops = &demo_tgt_type, 222 | .run_dir = ublksrv_get_pid_dir(), 223 | .flags = 0, 224 | }; 225 | struct ublksrv_ctrl_dev *dev; 226 | int ret; 227 | static const struct option longopts[] = { 228 | { "buf", 1, NULL, 'b' }, 229 | { "need_get_data", 1, NULL, 'g' }, 230 | { NULL } 231 | }; 232 | int opt; 233 | bool use_buf = false; 234 | 235 | while ((opt = getopt_long(argc, argv, ":bg", 236 | longopts, NULL)) != -1) { 237 | switch (opt) { 238 | case 'b': 239 | use_buf = true; 240 | break; 241 | case 'g': 242 | data.flags |= UBLK_F_NEED_GET_DATA; 243 | break; 244 | } 245 | } 246 | 247 | if (signal(SIGTERM, sig_handler) == SIG_ERR) 248 | error(EXIT_FAILURE, errno, "signal"); 249 | if (signal(SIGINT, sig_handler) == SIG_ERR) 250 | error(EXIT_FAILURE, errno, "signal"); 251 | 252 | if (use_buf) { 253 | demo_tgt_type.alloc_io_buf = null_alloc_io_buf; 254 | demo_tgt_type.free_io_buf = null_free_io_buf; 255 | } 256 | 257 | dev = ublksrv_ctrl_init(&data); 258 | if (!dev) 259 | error(EXIT_FAILURE, ENODEV, "ublksrv_ctrl_init"); 260 | /* ugly, but signal handler needs this_dev */ 261 | this_dev = dev; 262 | 263 | ret = ublksrv_ctrl_add_dev(dev); 264 | if (ret < 0) { 265 | error(0, -ret, "can't add dev %d", data.dev_id); 266 | goto fail; 267 | } 268 | 269 | ret = null_start_daemon(dev); 270 | if (ret < 0) { 271 | error(0, -ret, "can't start daemon"); 272 | goto fail_del_dev; 273 | } 274 | 275 | ublksrv_ctrl_del_dev(dev); 276 | ublksrv_ctrl_deinit(dev); 277 | exit(EXIT_SUCCESS); 278 | 279 | fail_del_dev: 280 | ublksrv_ctrl_del_dev(dev); 281 | fail: 282 | ublksrv_ctrl_deinit(dev); 283 | 284 | exit(EXIT_FAILURE); 285 | } 286 | -------------------------------------------------------------------------------- /doc/Makefile.am: -------------------------------------------------------------------------------- 1 | XSLTPROC = /usr/bin/xsltproc 2 | 3 | # Manpages 4 | man1_MANS = ublk.1 5 | 6 | %.1 : %.1.xml 7 | -test -z "$(XSLTPROC)" || $(XSLTPROC) -o $@ http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl $< 8 | -------------------------------------------------------------------------------- /doc/external_links.rst: -------------------------------------------------------------------------------- 1 | 2 | ======================= 3 | External ublk documents 4 | ======================= 5 | 6 | Tech 7 | ==== 8 | 9 | 2023 10 | ---- 11 | 12 | `libublk(Rust) ` 13 | 14 | `ublk: virtual block devices in userspace(DevConf.CZ 2023) ` 15 | 16 | `ublk: the new and improved way to serve SPDK storage locally! ` 17 | 18 | `btrfs-ublk ` 19 | 20 | `Zero-copy I/O for ublk, three different ways ` 21 | 22 | `qemu-storage-daemon ublk export - ` 23 | 24 | `ublk Target - SPDK ` 25 | 26 | 2022 27 | ---- 28 | 29 | `LWN: An io_uring-based user-space block driver ` 30 | 31 | `LWN: Crash recovery for user-space block drivers ` 32 | 33 | `An NBD block device written using Linux ublk (user block device) ` 34 | 35 | `Hacker News: An io_uring-based user-space block driver (lwn.net) ` 36 | 37 | `io_uring in Android OTA ` 38 | 39 | `ublk user recovery design[Chinese] ` 40 | 41 | Uses 42 | ==== 43 | 44 | `How to use ublk on Oracle Linux 8 ` 45 | 46 | `ublk on Aliyun[Chinese] ` 47 | 48 | `Testing ublk on Ubuntu 22.04 ` 49 | 50 | 51 | News 52 | ==== 53 | 54 | 2023 55 | ---- 56 | 57 | `OpenAnolis: ANCK 5.10 has supported ublk[Chinese] ` 58 | 59 | `miniublk merged to blktests ` 60 | 61 | `OpenAnolis: ANCK 5.10 will support ublk[Chinese] ` 62 | 63 | -------------------------------------------------------------------------------- /doc/mainpage.dox: -------------------------------------------------------------------------------- 1 | /*! 2 | \mainpage libublksrv API documentation 3 | 4 | UBLK (block in Userspace) is an interface for building block device by 5 | userspace programs. The UBLK project consists 6 | of two components: the *ublk_drv* kernel module (maintained in the regular 7 | kernel repositories) and the *libublksrv* userspace library. libublksrv 8 | provides the reference implementation for communicating with the ublk_drv 9 | kernel module. 10 | 11 | A UBLK block device is typically implemented as a standalone application that 12 | links with libublksrv. libublksrv provides APIs for the application to 13 | handle IO logic by its business logic, also add/delete/recovery device. 14 | 15 | ublksrv_tgt is built on libublksrv for supporting generic & multiple ublk 16 | targets, and it isn't covered in this document. In the future, the two 17 | will be separated from each other, and become standalone project. 18 | 19 | ## Getting started ## 20 | 21 | The API that is primarily specified in ublksrv.h. 22 | 23 | ublksrv_aio.h provides APIs for offloading IO handling to another context, 24 | and it is optional. 25 | 26 | ## Examples ## 27 | 28 | A good starting point is demo_null.c and demo_event.c. 29 | 30 | */ 31 | -------------------------------------------------------------------------------- /doc/ublk.1: -------------------------------------------------------------------------------- 1 | '\" t 2 | .\" Title: ublk 3 | .\" Author: [FIXME: author] [see http://www.docbook.org/tdg5/en/html/author] 4 | .\" Generator: DocBook XSL Stylesheets vsnapshot 5 | .\" Date: 05/15/2025 6 | .\" Manual: ublk: manage ublk devices 7 | .\" Source: ublk 8 | .\" Language: English 9 | .\" 10 | .TH "UBLK" "1" "05/15/2025" "ublk" "ublk: manage ublk devices" 11 | .\" ----------------------------------------------------------------- 12 | .\" * Define some portability stuff 13 | .\" ----------------------------------------------------------------- 14 | .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 15 | .\" http://bugs.debian.org/507673 16 | .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html 17 | .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | .ie \n(.g .ds Aq \(aq 19 | .el .ds Aq ' 20 | .\" ----------------------------------------------------------------- 21 | .\" * set default formatting 22 | .\" ----------------------------------------------------------------- 23 | .\" disable hyphenation 24 | .nh 25 | .\" disable justification (adjust text to left margin only) 26 | .ad l 27 | .\" ----------------------------------------------------------------- 28 | .\" * MAIN CONTENT STARTS HERE * 29 | .\" ----------------------------------------------------------------- 30 | .SH "NAME" 31 | ublk \- Utility to manage ublk devices 32 | .SH "SYNOPSIS" 33 | .HP \w'\fBublk\ \ []\ []\fR\ 'u 34 | \fBublk [] []\fR 35 | .SH "DESCRIPTION" 36 | .PP 37 | ublk is a utility that allows you to create, recover, view or delete user\-space block devices\&. 38 | .PP 39 | ublk by default comes with several different types of devices, such as iscsi, nbd, nfs, null and loop\&. 40 | .PP 41 | The following commands are supported: 42 | .SH "ADD COMMAND" 43 | .PP 44 | Command to add a ublk device\&. 45 | .PP 46 | \fB add {\-t, \-\-type} TYPE [{\-n, \-\-number} DEV_ID] [{\-q, \-\-queues} NR_HW_QUEUES] [{\-d, \-\-depth} QUEUE_DEPTH] [{\-u, \-\-uring_comp} URING_COMP] [{\-g, \-\-need\-get\-data} NEED_GET_DATA] [{\-r, \-\-user_recovery} {0|1}] [{\-i, \-\-user_recovery_reissue} {0|1}] [{\-e, \-\-user_recovery_fail_io} {0|1}] [\-\-debug_mask=0x{DBG_MASK}] [\-\-unprivileged] [\-\-usercopy] [{\-z, \-\-zerocopy}] [] \fR 47 | .PP 48 | \fB\-t, \-\-type\fR 49 | .RS 4 50 | Specifies the type of device to create\&. The five types of supported devices are iscsi, nbd, nfs, null and loop\&. 51 | .RE 52 | .PP 53 | \fB\-n, \-\-number\fR 54 | .RS 4 55 | Create a device with this id\&. The device node will be /dev/ublkb_n_\&. 56 | .RE 57 | .PP 58 | \fB\-q, \-\-queue\fR 59 | .RS 4 60 | Number of queues to create\&. Each queue is services by a dedicated child process\&. Default is 1\&. 61 | .RE 62 | .PP 63 | \fB\-d, \-\-depth\fR 64 | .RS 4 65 | Maximum queue\-depthfor each queue\&. Default is 4096\&. 66 | .RE 67 | .PP 68 | \fB\-u, \-\-uring_comp\fR 69 | .RS 4 70 | Force to complete io cmd via io_uring_cmd_complete_in_task so that performance comparison is done easily with using task_work_add\&. 71 | .RE 72 | .PP 73 | \fB\-g, \-\-need_get_data\fR 74 | .RS 4 75 | User should issue io cmd again for write requests to set io buffer address and copy data from bio vectors to the userspace io buffer\&. 76 | .RE 77 | .PP 78 | \fB\-r, \-\-user_recovey\fR 79 | .RS 4 80 | Block devices are recoverable if ublk server exits and restarts\&. Outstanding I/O when ublk server exits is met with errors\&. I/O issued while there is no ublk server queues\&. 81 | .RE 82 | .PP 83 | \fB\-i, \-\-user_recovey_reissue\fR 84 | .RS 4 85 | Block devices are recoverable if ublk server exits and restarts Outstanding I/O when ublk server exits is reissued I/O issued while there is no ublk server queues 86 | .RE 87 | .PP 88 | \fB\-e, \-\-user_recovey_fail_io\fR 89 | .RS 4 90 | Block devices are recoverable if ublk server exits and restarts Outstanding I/O when ublk server exits is met with errors I/O issued while there is no ublk server is met with errors 91 | .RE 92 | .PP 93 | \fB\-\-debug_mask\fR 94 | .RS 4 95 | Bitmask specifying which debug features to enable\&. 96 | .RE 97 | .PP 98 | \fB\-\-unprivileged\fR 99 | .RS 4 100 | Unprivileged user can create /dev/ublkcN and /dev/ublkbN\&. 101 | .sp 102 | /dev/ublk\-control needs to be available for unprivileged user, and it can be done via udev rule to make all control commands available to unprivileged user\&. Except for the command of UBLK_CMD_ADD_DEV, all other commands are only allowed for the owner of the specified device\&. 103 | .sp 104 | When userspace sends UBLK_CMD_ADD_DEV, the device pair\*(Aqs owner_uid and owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only the current user\*(Aqs uid/gid is stored, that said owner of the created device is always the current user\&. 105 | .sp 106 | We still need udev rule to apply OWNER/GROUP with the stored owner_uid and owner_gid\&. 107 | .sp 108 | Then ublk server can be run as unprivileged user, and /dev/ublkbN can be accessed and managed by its owner represented by owner_uid/owner_gid\&. 109 | .RE 110 | .PP 111 | \fB\-\-user_copy\fR 112 | .RS 4 113 | Copy between request and user buffer by pread()/pwrite() 114 | .RE 115 | .PP 116 | \fB\-z, \-\-zerocopy\fR 117 | .RS 4 118 | Zero\-copy is based on io\-uring uring_cmd of REGISTER_IO_BUF & UNREGISTER_IO_BUF, which avoids data copy between ublk frontend request buffer and ublk server buffer, so memory bandwidth is saved, and throughput & latency improvement can be often observed on large I/O size 119 | .sp 120 | This requires Linux kernel 6\&.15 or later\&. 121 | .RE 122 | .SS "NULL" 123 | .PP 124 | The null device type does not take any extra options\&. 125 | .SS "LOOP" 126 | .PP 127 | Extra options for the loop device type: 128 | .PP 129 | \fB add \-t loop \&.\&.\&. {\-f, \-\-file} FILE [\-\-buffered_io] [\-o, \-\-offset OFFSET] \fR 130 | .PP 131 | \fB\-f, \-\-file\fR 132 | .RS 4 133 | File to use as backing storage for the loop device\&. 134 | .RE 135 | .PP 136 | \fB\-\-buffered_io\fR 137 | .RS 4 138 | Use buffered i/o for accessing the backing file\&. Default is direct i/o\&. 139 | .RE 140 | .PP 141 | \fB\-\-offset\fR 142 | .RS 4 143 | Offset skips first NUM sectors on backing file\&. 144 | .RE 145 | .PP 146 | Example: Create a loop block device 147 | .sp 148 | .if n \{\ 149 | .RS 4 150 | .\} 151 | .nf 152 | # ublk add \-t loop \-n 0 \-f 10M\&.raw 153 | 154 | .fi 155 | .if n \{\ 156 | .RE 157 | .\} 158 | .sp 159 | .SS "NBD" 160 | .PP 161 | Extra options for the nbd (Network Block Device) device type: 162 | .PP 163 | \fB add \-t nbd \&.\&.\&. {\-\-host HOST | \-\-unix UNIX_PATH} \-\-export_name EXP_NAME [\-\-send_zc] [\-\-read_only] \fR 164 | .PP 165 | \fB\-\-host\fR 166 | .RS 4 167 | Hostname of NBD server\&. 168 | .RE 169 | .PP 170 | \fB\-\-host\fR 171 | .RS 4 172 | Hostname of NBD server to use\&. 173 | .RE 174 | .PP 175 | \fB\-\-unix\fR 176 | .RS 4 177 | Path to unix domain socket to use to talk to NBD\&. 178 | .RE 179 | .PP 180 | \fB\-\-export_name\fR 181 | .RS 4 182 | Name of NBD export\&. 183 | .RE 184 | .PP 185 | \fB\-\-send_zc\fR 186 | .RS 4 187 | Use Zero\-Copy\&. 188 | .RE 189 | .PP 190 | \fB\-\-read_only\fR 191 | .RS 4 192 | Read\-only device\&. 193 | .RE 194 | .SS "NFS" 195 | .PP 196 | Extra options for the nfs device type: 197 | .PP 198 | \fB add \-t nfs \&.\&.\&. \-\-nfs NFS\-URL \fR 199 | .PP 200 | \fB\-\-nfs NFS\-URL\fR 201 | .RS 4 202 | URL to the NFS file to use as the block device\&. 203 | .sp 204 | The NFS\-URL format is describe in the libnfs README: https://github\&.com/sahlberg/libnfs/blob/9fa155bfa9d34347a669fbecf4a64259cc573724/README#L55 205 | .RE 206 | .PP 207 | Example: Create a nfs block device 208 | .sp 209 | .if n \{\ 210 | .RS 4 211 | .\} 212 | .nf 213 | # ublk add \-t nfs \-n 0 \-\-nfs nfs://10\&.0\&.0\&.1/export/10M\&.raw 214 | 215 | .fi 216 | .if n \{\ 217 | .RE 218 | .\} 219 | .sp 220 | .SS "iSCSI" 221 | .PP 222 | Extra options for the iSCSI device type\&. iSCSI support requires libiscsi 1\&.20\&.1 or later\&. 223 | .PP 224 | \fB add \-t iscsi \&.\&.\&. \-\-iscsi ISCSI\-URL \-\-initiator\-name NAME \fR 225 | .PP 226 | \fB\-\-iscsi ISCSI\-URL\fR 227 | .RS 4 228 | URL to the iSCSI device to use as the block device\&. 229 | .sp 230 | The ISCSI\-URL format is describe in the libiscsi README: https://github\&.com/sahlberg/libiscsi/blob/eb19863f77e2bad4799ceb90e47fa3bc6205233e/README\&.md?plain=1#L37 231 | .RE 232 | .PP 233 | \fB\-\-initiator\-name NAME\fR 234 | .RS 4 235 | The initiator name to use when logging in to the target\&. 236 | .RE 237 | .PP 238 | Example: Create an iSCSI block device 239 | .sp 240 | .if n \{\ 241 | .RS 4 242 | .\} 243 | .nf 244 | # ublk add \-t iscsi \-n 0 \-\-iscsi iscsi://iscsi\-stgt/iqn\&.2001\-04\&.com\&.ronnie\&.sr0/1 \-\-initiator\-name iqn\&.ronnie\&.test 245 | 246 | .fi 247 | .if n \{\ 248 | .RE 249 | .\} 250 | .sp 251 | .SH "DEL COMMAND" 252 | .PP 253 | Command to delete a ublk device\&. 254 | .PP 255 | \fB del {\-n, \-\-number} DEV_ID [\-a, \-\-all] [\-\-async] \fR 256 | .PP 257 | \fB\-n, \-\-number\fR 258 | .RS 4 259 | Delete the device with this id\&. 260 | .RE 261 | .PP 262 | \fB\-a, \-\-all\fR 263 | .RS 4 264 | Delete all devices\&. 265 | .RE 266 | .PP 267 | Example: Deleting a loop block device 268 | .sp 269 | .if n \{\ 270 | .RS 4 271 | .\} 272 | .nf 273 | # ublk del \-n 0 274 | 275 | .fi 276 | .if n \{\ 277 | .RE 278 | .\} 279 | .sp 280 | .SH "SET_AFFINITY COMMAND" 281 | .PP 282 | Command to change queue affinity\&. 283 | .PP 284 | \fB set_affinity {\-n, \-\-number} DEV_ID [\-q, \-\-queue] QID \-\-cpuset="[SET]" \fR 285 | .PP 286 | \fB\-n, \-\-number\fR 287 | .RS 4 288 | Change the affinity on this device\&. 289 | .RE 290 | .PP 291 | \fB\-q, \-\-queue\fR 292 | .RS 4 293 | Which queue to change the affinity for\&. 294 | .RE 295 | .PP 296 | \fB\-\-cpuset="[SET]"\fR 297 | .RS 4 298 | The new cpuset for this device/queue\&. Format is a comma\-separated list of CPUs within squre brackets\&. 299 | .RE 300 | .PP 301 | Example: Set affinity to core 7 for device 0, queue 1 302 | .sp 303 | .if n \{\ 304 | .RS 4 305 | .\} 306 | .nf 307 | # ublk set_affinity \-n 0 \-q 1 \-\-cpuset="[7]" 308 | 309 | .fi 310 | .if n \{\ 311 | .RE 312 | .\} 313 | .sp 314 | .SH "LIST COMMAND" 315 | .PP 316 | List one or all devices and show their configutaion\&. 317 | .PP 318 | \fB list {\-n, \-\-number} DEV_ID [\-v, \-\-verbose] \fR 319 | .PP 320 | \fB\-n, \-\-number\fR 321 | .RS 4 322 | List the device with this id\&. If omitted all devices will be listed 323 | .RE 324 | .PP 325 | \fB\-v, \-\-verbose\fR 326 | .RS 4 327 | Verbose listing\&. Include the JSON device arguments in the output\&. 328 | .RE 329 | .SH "RECOVER COMMAND" 330 | .PP 331 | Recover a failed ublk device\&. 332 | .PP 333 | \fB recover {\-n, \-\-number} DEV_ID \fR 334 | .PP 335 | \fB\-n, \-\-number\fR 336 | .RS 4 337 | Device to recover\&. 338 | .RE 339 | .SH "FEATURES COMMAND" 340 | .PP 341 | Show supported features for the ublk driver\&. 342 | .PP 343 | \fB features \fR 344 | .SH "HELP COMMAND" 345 | .PP 346 | Show generic ot type specific help\&. 347 | .PP 348 | \fB help [{\-t, \-\-type} TYPE] \fR 349 | .PP 350 | \fB\-t, \-\-type\fR 351 | .RS 4 352 | Show help page\&. It \-t is specified, show help page for the specific device type\&. 353 | .RE 354 | .SH "VERSION" 355 | .PP 356 | Show help page\&.\&. 357 | .PP 358 | \fB {\-v, \-\-version} \fR 359 | .SH "RECOVERY" 360 | .PP 361 | There are three arguments that control how ublk will behave in case of a failure, such as crashing\&. The default behavior is no recovery and the device will fail and be removed once the target exists\&. 362 | .PP 363 | To enable recovery mode set "\-\-recovery 1" on the command line\&. Then instead of removing the device upon failure it will instead become inactive in a quiesced state\&. 364 | .sp 365 | .if n \{\ 366 | .RS 4 367 | .\} 368 | .nf 369 | dev id 0: nr_hw_queues 1 queue_depth 128 block size 4096 dev_capacity 20480 370 | max rq size 524288 daemon pid 1239110 state QUIESCED 371 | flags 0x4a [ URING_CMD_COMP_IN_TASK RECOVERY CMD_IOCTL_ENCODE ] 372 | ublkc: 511:0 ublkb: 259:4 owner: 0:0 373 | queue 0: tid 1239112 affinity(0 1 2 3 4 5 6 7 ) 374 | target {"backing_file":"10M","dev_size":10485760,"direct_io":1,"name":"loop","offset":0,"type":0} 375 | 376 | .fi 377 | .if n \{\ 378 | .RE 379 | .\} 380 | .sp 381 | In this state the block device still exists but no I/O can be performed\&. 382 | .PP 383 | To recover a QUIESCED device you can use the recover command: 384 | \fB ublk recover \-n DEV_ID \fR 385 | .PP 386 | There are two additional flags that control how ublk will handle I/O that were in flight when a device is recovered\&. 387 | .SS "\-\-user_recovery_reissue 1" 388 | .PP 389 | When the device is recovered ublk will reissue any I/O that were in flight\&. 390 | .SS "\-\-user_recovery_fail_io 1" 391 | .PP 392 | When the device is recovered ublk will fail all I/O and return an error back to the application\&. 393 | .SH "SEE ALSO" 394 | .PP 395 | \m[blue]\fB\%http://github.com/ublk-org/ublksrv\fR\m[] 396 | -------------------------------------------------------------------------------- /doc/ublk_intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ublk-org/ublksrv/7fc74012e5ee8ab6d3ebda12cfeaf75d7b1b135a/doc/ublk_intro.pdf -------------------------------------------------------------------------------- /include/Makefile.am: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | # Public headers. 4 | include_HEADERS = ublksrv.h ublk_cmd.h ublksrv_aio.h ublksrv_utils.h 5 | 6 | -------------------------------------------------------------------------------- /include/ublk_cmd.h: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) or MIT */ 2 | #ifndef USER_BLK_DRV_CMD_INC_H 3 | #define USER_BLK_DRV_CMD_INC_H 4 | 5 | #include 6 | 7 | /* ublk server command definition */ 8 | 9 | /* 10 | * Admin commands, issued by ublk server, and handled by ublk driver. 11 | * 12 | * Legacy command definition, don't use in new application, and don't 13 | * add new such definition any more 14 | */ 15 | #define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 16 | #define UBLK_CMD_GET_DEV_INFO 0x02 17 | #define UBLK_CMD_ADD_DEV 0x04 18 | #define UBLK_CMD_DEL_DEV 0x05 19 | #define UBLK_CMD_START_DEV 0x06 20 | #define UBLK_CMD_STOP_DEV 0x07 21 | #define UBLK_CMD_SET_PARAMS 0x08 22 | #define UBLK_CMD_GET_PARAMS 0x09 23 | #define UBLK_CMD_START_USER_RECOVERY 0x10 24 | #define UBLK_CMD_END_USER_RECOVERY 0x11 25 | #define UBLK_CMD_GET_DEV_INFO2 0x12 26 | 27 | /* Any new ctrl command should encode by __IO*() */ 28 | #define UBLK_U_CMD_GET_QUEUE_AFFINITY \ 29 | _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd) 30 | #define UBLK_U_CMD_GET_DEV_INFO \ 31 | _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd) 32 | #define UBLK_U_CMD_ADD_DEV \ 33 | _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd) 34 | #define UBLK_U_CMD_DEL_DEV \ 35 | _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd) 36 | #define UBLK_U_CMD_START_DEV \ 37 | _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd) 38 | #define UBLK_U_CMD_STOP_DEV \ 39 | _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd) 40 | #define UBLK_U_CMD_SET_PARAMS \ 41 | _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd) 42 | #define UBLK_U_CMD_GET_PARAMS \ 43 | _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd) 44 | #define UBLK_U_CMD_START_USER_RECOVERY \ 45 | _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd) 46 | #define UBLK_U_CMD_END_USER_RECOVERY \ 47 | _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) 48 | #define UBLK_U_CMD_GET_DEV_INFO2 \ 49 | _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) 50 | #define UBLK_U_CMD_GET_FEATURES \ 51 | _IOR('u', 0x13, struct ublksrv_ctrl_cmd) 52 | #define UBLK_U_CMD_DEL_DEV_ASYNC \ 53 | _IOR('u', 0x14, struct ublksrv_ctrl_cmd) 54 | 55 | /* 56 | * 64bits are enough now, and it should be easy to extend in case of 57 | * running out of feature flags 58 | */ 59 | #define UBLK_FEATURES_LEN 8 60 | 61 | /* 62 | * IO commands, issued by ublk server, and handled by ublk driver. 63 | * 64 | * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request 65 | * from ublk driver, should be issued only when starting device. After 66 | * the associated cqe is returned, request's tag can be retrieved via 67 | * cqe->userdata. 68 | * 69 | * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled 70 | * this IO request, request's handling result is committed to ublk 71 | * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be 72 | * handled before completing io request. 73 | * 74 | * NEED_GET_DATA: only used for write requests to set io addr and copy data 75 | * When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA 76 | * command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA. 77 | * 78 | * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag 79 | * while starting a ublk device. 80 | */ 81 | 82 | /* 83 | * Legacy IO command definition, don't use in new application, and don't 84 | * add new such definition any more 85 | */ 86 | #define UBLK_IO_FETCH_REQ 0x20 87 | #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 88 | #define UBLK_IO_NEED_GET_DATA 0x22 89 | 90 | /* Any new IO command should encode by __IOWR() */ 91 | #define UBLK_U_IO_FETCH_REQ \ 92 | _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd) 93 | #define UBLK_U_IO_COMMIT_AND_FETCH_REQ \ 94 | _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) 95 | #define UBLK_U_IO_NEED_GET_DATA \ 96 | _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) 97 | #define UBLK_U_IO_REGISTER_IO_BUF \ 98 | _IOWR('u', 0x23, struct ublksrv_io_cmd) 99 | #define UBLK_U_IO_UNREGISTER_IO_BUF \ 100 | _IOWR('u', 0x24, struct ublksrv_io_cmd) 101 | 102 | 103 | /* only ABORT means that no re-fetch */ 104 | #define UBLK_IO_RES_OK 0 105 | #define UBLK_IO_RES_NEED_GET_DATA 1 106 | #define UBLK_IO_RES_ABORT (-ENODEV) 107 | 108 | #define UBLKSRV_CMD_BUF_OFFSET 0 109 | #define UBLKSRV_IO_BUF_OFFSET 0x80000000 110 | 111 | /* tag bit is 16bit, so far limit at most 4096 IOs for each queue */ 112 | #define UBLK_MAX_QUEUE_DEPTH 4096 113 | 114 | /* single IO buffer max size is 32MB */ 115 | #define UBLK_IO_BUF_OFF 0 116 | #define UBLK_IO_BUF_BITS 25 117 | #define UBLK_IO_BUF_BITS_MASK ((1ULL << UBLK_IO_BUF_BITS) - 1) 118 | 119 | /* so at most 64K IOs for each queue */ 120 | #define UBLK_TAG_OFF UBLK_IO_BUF_BITS 121 | #define UBLK_TAG_BITS 16 122 | #define UBLK_TAG_BITS_MASK ((1ULL << UBLK_TAG_BITS) - 1) 123 | 124 | /* max 4096 queues */ 125 | #define UBLK_QID_OFF (UBLK_TAG_OFF + UBLK_TAG_BITS) 126 | #define UBLK_QID_BITS 12 127 | #define UBLK_QID_BITS_MASK ((1ULL << UBLK_QID_BITS) - 1) 128 | 129 | #define UBLK_MAX_NR_QUEUES (1U << UBLK_QID_BITS) 130 | 131 | #define UBLKSRV_IO_BUF_TOTAL_BITS (UBLK_QID_OFF + UBLK_QID_BITS) 132 | #define UBLKSRV_IO_BUF_TOTAL_SIZE (1ULL << UBLKSRV_IO_BUF_TOTAL_BITS) 133 | 134 | /* 135 | * zero copy requires 4k block size, and can remap ublk driver's io 136 | * request into ublksrv's vm space 137 | */ 138 | #define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) 139 | 140 | /* 141 | * Force to complete io cmd via io_uring_cmd_complete_in_task so that 142 | * performance comparison is done easily with using task_work_add 143 | */ 144 | #define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) 145 | 146 | /* 147 | * User should issue io cmd again for write requests to 148 | * set io buffer address and copy data from bio vectors 149 | * to the userspace io buffer. 150 | * 151 | * In this mode, task_work is not used. 152 | */ 153 | #define UBLK_F_NEED_GET_DATA (1UL << 2) 154 | 155 | /* 156 | * - Block devices are recoverable if ublk server exits and restarts 157 | * - Outstanding I/O when ublk server exits is met with errors 158 | * - I/O issued while there is no ublk server queues 159 | */ 160 | #define UBLK_F_USER_RECOVERY (1UL << 3) 161 | 162 | /* 163 | * - Block devices are recoverable if ublk server exits and restarts 164 | * - Outstanding I/O when ublk server exits is reissued 165 | * - I/O issued while there is no ublk server queues 166 | */ 167 | #define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4) 168 | 169 | /* 170 | * Unprivileged user can create /dev/ublkcN and /dev/ublkbN. 171 | * 172 | * /dev/ublk-control needs to be available for unprivileged user, and it 173 | * can be done via udev rule to make all control commands available to 174 | * unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all 175 | * other commands are only allowed for the owner of the specified device. 176 | * 177 | * When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and 178 | * owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only 179 | * the current user's uid/gid is stored, that said owner of the created 180 | * device is always the current user. 181 | * 182 | * We still need udev rule to apply OWNER/GROUP with the stored owner_uid 183 | * and owner_gid. 184 | * 185 | * Then ublk server can be run as unprivileged user, and /dev/ublkbN can 186 | * be accessed and managed by its owner represented by owner_uid/owner_gid. 187 | */ 188 | #define UBLK_F_UNPRIVILEGED_DEV (1UL << 5) 189 | 190 | /* use ioctl encoding for uring command */ 191 | #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) 192 | 193 | /* Copy between request and user buffer by pread()/pwrite() */ 194 | #define UBLK_F_USER_COPY (1UL << 7) 195 | 196 | /* 197 | * User space sets this flag when setting up the device to request zoned storage support. Kernel may 198 | * deny the request by returning an error. 199 | */ 200 | #define UBLK_F_ZONED (1ULL << 8) 201 | 202 | /* 203 | * - Block devices are recoverable if ublk server exits and restarts 204 | * - Outstanding I/O when ublk server exits is met with errors 205 | * - I/O issued while there is no ublk server is met with errors 206 | */ 207 | #define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9) 208 | 209 | /* device state */ 210 | #define UBLK_S_DEV_DEAD 0 211 | #define UBLK_S_DEV_LIVE 1 212 | #define UBLK_S_DEV_QUIESCED 2 213 | #define UBLK_S_DEV_FAIL_IO 3 214 | 215 | /* shipped via sqe->cmd of io_uring command */ 216 | struct ublksrv_ctrl_cmd { 217 | /* sent to which device, must be valid */ 218 | __u32 dev_id; 219 | 220 | /* sent to which queue, must be -1 if the cmd isn't for queue */ 221 | __u16 queue_id; 222 | /* 223 | * cmd specific buffer, can be IN or OUT. 224 | */ 225 | __u16 len; 226 | __u64 addr; 227 | 228 | /* inline data */ 229 | __u64 data[1]; 230 | 231 | /* 232 | * Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2 233 | * only, include null char 234 | */ 235 | __u16 dev_path_len; 236 | __u16 pad; 237 | __u32 reserved; 238 | }; 239 | 240 | struct ublksrv_ctrl_dev_info { 241 | __u16 nr_hw_queues; 242 | __u16 queue_depth; 243 | __u16 state; 244 | __u16 pad0; 245 | 246 | __u32 max_io_buf_bytes; 247 | __u32 dev_id; 248 | 249 | __s32 ublksrv_pid; 250 | __u32 pad1; 251 | 252 | __u64 flags; 253 | 254 | /* For ublksrv internal use, invisible to ublk driver */ 255 | __u64 ublksrv_flags; 256 | 257 | __u32 owner_uid; /* store by kernel */ 258 | __u32 owner_gid; /* store by kernel */ 259 | __u64 reserved1; 260 | __u64 reserved2; 261 | }; 262 | 263 | #define UBLK_IO_OP_READ 0 264 | #define UBLK_IO_OP_WRITE 1 265 | #define UBLK_IO_OP_FLUSH 2 266 | #define UBLK_IO_OP_DISCARD 3 267 | #define UBLK_IO_OP_WRITE_SAME 4 268 | #define UBLK_IO_OP_WRITE_ZEROES 5 269 | #define UBLK_IO_OP_ZONE_OPEN 10 270 | #define UBLK_IO_OP_ZONE_CLOSE 11 271 | #define UBLK_IO_OP_ZONE_FINISH 12 272 | #define UBLK_IO_OP_ZONE_APPEND 13 273 | #define UBLK_IO_OP_ZONE_RESET_ALL 14 274 | #define UBLK_IO_OP_ZONE_RESET 15 275 | /* 276 | * Construct a zone report. The report request is carried in `struct 277 | * ublksrv_io_desc`. The `start_sector` field must be the first sector of a zone 278 | * and shall indicate the first zone of the report. The `nr_zones` shall 279 | * indicate how many zones should be reported at most. The report shall be 280 | * delivered as a `struct blk_zone` array. To report fewer zones than requested, 281 | * zero the last entry of the returned array. 282 | * 283 | * Related definitions(blk_zone, blk_zone_cond, blk_zone_type, ...) in 284 | * include/uapi/linux/blkzoned.h are part of ublk UAPI. 285 | */ 286 | #define UBLK_IO_OP_REPORT_ZONES 18 287 | 288 | #define UBLK_IO_F_FAILFAST_DEV (1U << 8) 289 | #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) 290 | #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) 291 | #define UBLK_IO_F_META (1U << 11) 292 | #define UBLK_IO_F_FUA (1U << 13) 293 | #define UBLK_IO_F_NOUNMAP (1U << 15) 294 | #define UBLK_IO_F_SWAP (1U << 16) 295 | 296 | /* 297 | * io cmd is described by this structure, and stored in share memory, indexed 298 | * by request tag. 299 | * 300 | * The data is stored by ublk driver, and read by ublksrv after one fetch command 301 | * returns. 302 | */ 303 | struct ublksrv_io_desc { 304 | /* op: bit 0-7, flags: bit 8-31 */ 305 | __u32 op_flags; 306 | 307 | union { 308 | __u32 nr_sectors; 309 | __u32 nr_zones; /* for UBLK_IO_OP_REPORT_ZONES */ 310 | }; 311 | 312 | /* start sector for this io */ 313 | __u64 start_sector; 314 | 315 | /* buffer address in ublksrv daemon vm space, from ublk driver */ 316 | __u64 addr; 317 | }; 318 | 319 | static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) 320 | { 321 | return iod->op_flags & 0xff; 322 | } 323 | 324 | static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) 325 | { 326 | return iod->op_flags >> 8; 327 | } 328 | 329 | /* issued to ublk driver via /dev/ublkcN */ 330 | struct ublksrv_io_cmd { 331 | __u16 q_id; 332 | 333 | /* for fetch/commit which result */ 334 | __u16 tag; 335 | 336 | /* io result, it is valid for COMMIT* command only */ 337 | __s32 result; 338 | 339 | union { 340 | /* 341 | * userspace buffer address in ublksrv daemon process, valid for 342 | * FETCH* command only 343 | * 344 | * `addr` should not be used when UBLK_F_USER_COPY is enabled, 345 | * because userspace handles data copy by pread()/pwrite() over 346 | * /dev/ublkcN. But in case of UBLK_F_ZONED, this union is 347 | * re-used to pass back the allocated LBA for 348 | * UBLK_IO_OP_ZONE_APPEND which actually depends on 349 | * UBLK_F_USER_COPY 350 | */ 351 | __u64 addr; 352 | __u64 zone_append_lba; 353 | }; 354 | }; 355 | 356 | struct ublk_param_basic { 357 | #define UBLK_ATTR_READ_ONLY (1 << 0) 358 | #define UBLK_ATTR_ROTATIONAL (1 << 1) 359 | #define UBLK_ATTR_VOLATILE_CACHE (1 << 2) 360 | #define UBLK_ATTR_FUA (1 << 3) 361 | __u32 attrs; 362 | __u8 logical_bs_shift; 363 | __u8 physical_bs_shift; 364 | __u8 io_opt_shift; 365 | __u8 io_min_shift; 366 | 367 | __u32 max_sectors; 368 | __u32 chunk_sectors; 369 | 370 | __u64 dev_sectors; 371 | __u64 virt_boundary_mask; 372 | }; 373 | 374 | struct ublk_param_discard { 375 | __u32 discard_alignment; 376 | 377 | __u32 discard_granularity; 378 | __u32 max_discard_sectors; 379 | 380 | __u32 max_write_zeroes_sectors; 381 | __u16 max_discard_segments; 382 | __u16 reserved0; 383 | }; 384 | 385 | /* 386 | * read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available 387 | * after device is started 388 | */ 389 | struct ublk_param_devt { 390 | __u32 char_major; 391 | __u32 char_minor; 392 | __u32 disk_major; 393 | __u32 disk_minor; 394 | }; 395 | 396 | struct ublk_param_zoned { 397 | __u32 max_open_zones; 398 | __u32 max_active_zones; 399 | __u32 max_zone_append_sectors; 400 | __u8 reserved[20]; 401 | }; 402 | 403 | struct ublk_param_dma_align { 404 | __u32 alignment; 405 | __u8 pad[4]; 406 | }; 407 | 408 | struct ublk_params { 409 | /* 410 | * Total length of parameters, userspace has to set 'len' for both 411 | * SET_PARAMS and GET_PARAMS command, and driver may update len 412 | * if two sides use different version of 'ublk_params', same with 413 | * 'types' fields. 414 | */ 415 | __u32 len; 416 | #define UBLK_PARAM_TYPE_BASIC (1 << 0) 417 | #define UBLK_PARAM_TYPE_DISCARD (1 << 1) 418 | #define UBLK_PARAM_TYPE_DEVT (1 << 2) 419 | #define UBLK_PARAM_TYPE_ZONED (1 << 3) 420 | #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) 421 | __u32 types; /* types of parameter included */ 422 | 423 | struct ublk_param_basic basic; 424 | struct ublk_param_discard discard; 425 | struct ublk_param_devt devt; 426 | struct ublk_param_zoned zoned; 427 | struct ublk_param_dma_align dma; 428 | }; 429 | 430 | #endif 431 | -------------------------------------------------------------------------------- /include/ublksrv_aio.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or LGPL-2.1-only 2 | 3 | #ifndef UBLKSRV_AIO_INC_H 4 | #define UBLKSRV_AIO_INC_H 5 | 6 | /* 7 | * APIs for offloading IO handling in non-ublksrv context, refer to 8 | * demo_event.c for how to use these APIs 9 | */ 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | struct ublksrv_aio_ctx; 16 | struct ublksrv_aio; 17 | 18 | /* 19 | * return value: 20 | * 21 | * > 0 : the request is done 22 | * = 0 : submitted successfully, but not done 23 | * < 0 : submitted not successfully 24 | */ 25 | typedef int (ublksrv_aio_submit_fn)(struct ublksrv_aio_ctx *ctx, 26 | struct ublksrv_aio *req); 27 | 28 | #define ublksrv_aio_qid(val) ((val >> 13) & 0x7ff) 29 | #define ublksrv_aio_tag(val) (val & 0x1fff) 30 | 31 | static inline unsigned ublksrv_aio_pid_tag(unsigned qid, unsigned tag) 32 | { 33 | return tag | (qid << 13); 34 | } 35 | 36 | struct ublksrv_aio { 37 | struct ublksrv_io_desc io; 38 | union { 39 | int res; /* output */ 40 | int fd; /* input */ 41 | }; 42 | 43 | /* reserved 31 ~ 24, bit 23 ~ 13: qid, bit 12 ~ 0: tag */ 44 | unsigned id; 45 | struct ublksrv_aio *next; 46 | unsigned long data[0]; 47 | }; 48 | 49 | struct aio_list { 50 | struct ublksrv_aio *head, *tail; 51 | }; 52 | 53 | static inline void aio_list_init(struct aio_list *al) 54 | { 55 | al->head = al->tail = NULL; 56 | } 57 | 58 | static inline void aio_list_add(struct aio_list *al, struct ublksrv_aio *io) 59 | { 60 | io->next = NULL; 61 | 62 | if (al->tail) 63 | al->tail->next = io; 64 | else 65 | al->head = io; 66 | al->tail = io; 67 | } 68 | 69 | static inline void aio_list_splice(struct aio_list *n, 70 | struct aio_list *head) 71 | { 72 | if (!n->head) 73 | return; 74 | 75 | if (head->tail) 76 | head->tail->next = n->head; 77 | else 78 | head->head = n->head; 79 | 80 | head->tail = n->tail; 81 | 82 | aio_list_init(n); 83 | } 84 | 85 | static inline int aio_list_empty(const struct aio_list *al) 86 | { 87 | return al->head == NULL; 88 | } 89 | 90 | static inline struct ublksrv_aio *aio_list_pop(struct aio_list *al) 91 | { 92 | struct ublksrv_aio *io = al->head; 93 | 94 | if (io) { 95 | al->head = io->next; 96 | if (!al->head) 97 | al->tail = NULL; 98 | 99 | io->next = NULL; 100 | } 101 | 102 | return io; 103 | } 104 | 105 | struct ublksrv_aio_list { 106 | pthread_spinlock_t lock; 107 | struct aio_list list; 108 | }; 109 | 110 | static inline void ublksrv_aio_init_list(struct ublksrv_aio_list *l) 111 | { 112 | pthread_spin_init(&l->lock, PTHREAD_PROCESS_PRIVATE); 113 | aio_list_init(&l->list); 114 | } 115 | 116 | struct ublksrv_aio_ctx *ublksrv_aio_ctx_init(const struct ublksrv_dev *dev, 117 | unsigned flags); 118 | void ublksrv_aio_ctx_shutdown(struct ublksrv_aio_ctx *ctx); 119 | void ublksrv_aio_ctx_deinit(struct ublksrv_aio_ctx *ctx); 120 | struct ublksrv_aio *ublksrv_aio_alloc_req(struct ublksrv_aio_ctx *ctx, 121 | int payload_size); 122 | void ublksrv_aio_free_req(struct ublksrv_aio_ctx *ctx, struct ublksrv_aio *req); 123 | void ublksrv_aio_submit_req(struct ublksrv_aio_ctx *ctx, 124 | const struct ublksrv_queue *q, struct ublksrv_aio *req); 125 | void ublksrv_aio_get_completed_reqs(struct ublksrv_aio_ctx *ctx, 126 | const struct ublksrv_queue *q, 127 | struct aio_list *al); 128 | int ublksrv_aio_submit_worker(struct ublksrv_aio_ctx *ctx, 129 | ublksrv_aio_submit_fn *fn, struct aio_list *submitted); 130 | void ublksrv_aio_complete_worker(struct ublksrv_aio_ctx *ctx, 131 | struct aio_list *completed); 132 | void ublksrv_aio_handle_event(struct ublksrv_aio_ctx *ctx, 133 | const struct ublksrv_queue *q); 134 | int ublksrv_aio_get_efd(struct ublksrv_aio_ctx *ctx); 135 | void ublksrv_aio_set_ctx_data(struct ublksrv_aio_ctx *ctx, void *data); 136 | void *ublksrv_aio_get_ctx_data(struct ublksrv_aio_ctx *ctx); 137 | bool ublksrv_aio_ctx_dead(struct ublksrv_aio_ctx *ctx); 138 | const struct ublksrv_dev *ublksrv_aio_get_dev(struct ublksrv_aio_ctx *ctx); 139 | 140 | #ifdef __cplusplus 141 | } 142 | #endif 143 | #endif 144 | -------------------------------------------------------------------------------- /include/ublksrv_priv.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #if !defined(UBLKSRV_INTERNAL_H_) 4 | #error "Never include directly; use instead." 5 | #endif 6 | 7 | #ifndef UBLKSRV_PRIVATE_INC_H 8 | #define UBLKSRV_PRIVATE_INC_H 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "ublk_cmd.h" 23 | #include "ublksrv_utils.h" 24 | #include "ublksrv.h" 25 | #include "ublksrv_aio.h" 26 | 27 | 28 | /* todo: relace the hardcode name with /dev/char/maj:min */ 29 | #ifdef UBLKC_PREFIX 30 | #define UBLKC_DEV UBLKC_PREFIX "/ublkc" 31 | #else 32 | #define UBLKC_DEV "/dev/ublkc" 33 | #endif 34 | #define UBLKC_PATH_MAX 32 35 | 36 | #ifdef __cplusplus 37 | extern "C" { 38 | #endif 39 | 40 | struct ublksrv_tgt_jbuf { 41 | pthread_mutex_t lock; 42 | int jbuf_size; 43 | char *jbuf; 44 | }; 45 | 46 | struct ublksrv_ctrl_data { 47 | struct ublksrv_tgt_jbuf jbuf; 48 | bool recover; 49 | }; 50 | 51 | struct ublksrv_ctrl_dev { 52 | struct io_uring ring; 53 | 54 | int ctrl_fd; 55 | unsigned bs_shift; 56 | struct ublksrv_ctrl_dev_info dev_info; 57 | struct ublksrv_ctrl_data *data; 58 | 59 | const char *tgt_type; 60 | const struct ublksrv_tgt_type *tgt_ops; 61 | 62 | /* 63 | * default is UBLKSRV_RUN_DIR but can be specified via command line, 64 | * pid file will be saved there 65 | */ 66 | const char *run_dir; 67 | 68 | union { 69 | /* used by ->init_tgt() */ 70 | struct { 71 | int tgt_argc; 72 | char **tgt_argv; 73 | }; 74 | /* used by ->recovery_tgt(), tgt_argc == -1 */ 75 | struct { 76 | int padding; 77 | const char *recovery_jbuf; 78 | }; 79 | }; 80 | 81 | cpu_set_t *queues_cpuset; 82 | 83 | void *private_data; 84 | unsigned long reserved[3]; 85 | }; 86 | 87 | struct ublk_io { 88 | char *buf_addr; 89 | 90 | #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) 91 | #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) 92 | #define UBLKSRV_IO_FREE (1UL << 2) 93 | #define UBLKSRV_NEED_GET_DATA (1UL << 3) 94 | unsigned int flags; 95 | 96 | /* result is updated after all target ios are done */ 97 | unsigned int result; 98 | 99 | struct ublk_io_data data; 100 | }; 101 | 102 | struct _ublksrv_queue { 103 | /********** part of API, can't change ************/ 104 | int q_id; 105 | int q_depth; 106 | 107 | struct io_uring *ring_ptr; 108 | struct _ublksrv_dev *dev; 109 | void *private_data; 110 | /*************************************************/ 111 | 112 | /* 113 | * Read only by ublksrv daemon, setup via mmap on /dev/ublkcN. 114 | * 115 | * ublksrv_io_desc(iod) is stored in this buffer, so iod 116 | * can be retrieved by request's tag directly. 117 | * 118 | * ublksrv writes the iod into this array, and notify ublksrv daemon 119 | * by issued io_uring command beforehand. 120 | * */ 121 | struct ublksrv_io_desc *io_cmd_buf; 122 | char *io_buf; 123 | 124 | unsigned cmd_inflight, tgt_io_inflight; //obsolete 125 | unsigned state; 126 | 127 | /* eventfd */ 128 | int efd; 129 | 130 | /* cache tgt ops */ 131 | const struct ublksrv_tgt_type *tgt_ops; 132 | 133 | /* 134 | * ring for submit io command to ublk driver, can only be issued 135 | * from ublksrv daemon. 136 | * 137 | * ring depth == dev_info->queue_depth. 138 | */ 139 | struct io_uring ring; 140 | 141 | unsigned tid; 142 | 143 | #define UBLKSRV_NR_CTX_BATCH 4 144 | int nr_ctxs; 145 | struct ublksrv_aio_ctx *ctxs[UBLKSRV_NR_CTX_BATCH]; 146 | 147 | unsigned long reserved[8]; 148 | 149 | struct ublk_io ios[0]; 150 | }; 151 | 152 | struct _ublksrv_dev { 153 | //keep same with ublksrv_dev 154 | /********** part of API, can't change ************/ 155 | struct ublksrv_tgt_info tgt; 156 | /************************************************/ 157 | 158 | struct _ublksrv_queue *__queues[MAX_NR_HW_QUEUES]; 159 | char *io_buf_start; 160 | pthread_t *thread; 161 | int cdev_fd; 162 | int pid_file_fd; 163 | 164 | const struct ublksrv_ctrl_dev *ctrl_dev; 165 | void *target_data; 166 | int cq_depth; 167 | int pad; 168 | 169 | /* reserved isn't necessary any more */ 170 | unsigned long reserved[3]; 171 | }; 172 | 173 | #define local_to_tq(q) ((struct ublksrv_queue *)(q)) 174 | #define tq_to_local(q) ((struct _ublksrv_queue *)(q)) 175 | 176 | #define local_to_tdev(d) ((struct ublksrv_dev *)(d)) 177 | #define tdev_to_local(d) ((struct _ublksrv_dev *)(d)) 178 | 179 | struct ublksrv_tgt_jbuf *ublksrv_tgt_get_jbuf(const struct ublksrv_ctrl_dev *cdev); 180 | 181 | static inline struct ublksrv_ctrl_data *ublksrv_get_ctrl_data(const struct ublksrv_ctrl_dev *cdev) 182 | { 183 | return cdev->data; 184 | } 185 | 186 | static inline bool ublk_is_unprivileged(const struct ublksrv_ctrl_dev *ctrl_dev) 187 | { 188 | return !!(ctrl_dev->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV); 189 | } 190 | 191 | static inline cpu_set_t *ublksrv_get_queue_affinity( 192 | const struct ublksrv_ctrl_dev *dev, int qid) 193 | { 194 | unsigned char *buf = (unsigned char *)&dev->queues_cpuset[qid]; 195 | 196 | if (ublk_is_unprivileged(dev)) 197 | return (cpu_set_t *)&buf[UBLKC_PATH_MAX]; 198 | 199 | return &dev->queues_cpuset[qid]; 200 | } 201 | 202 | static inline void ublksrv_mark_io_done(struct ublk_io *io, int res) 203 | { 204 | /* 205 | * mark io done by target, so that ->ubq_daemon can commit its 206 | * result and fetch new request via io_uring command. 207 | */ 208 | io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); 209 | 210 | io->result = res; 211 | } 212 | 213 | static inline struct io_uring_sqe *ublksrv_alloc_sqe(struct io_uring *r) 214 | { 215 | unsigned left = io_uring_sq_space_left(r); 216 | 217 | if (left < 1) 218 | io_uring_submit(r); 219 | return io_uring_get_sqe(r); 220 | } 221 | 222 | int create_pid_file(const char *pid_file, int *pid_fd); 223 | 224 | extern void ublksrv_build_cpu_str(char *buf, int len, const cpu_set_t *cpuset); 225 | 226 | /* bit63: target io, bit62: eventfd data */ 227 | static inline __u64 build_eventfd_data() 228 | { 229 | return 0x3ULL << 62; 230 | } 231 | 232 | static inline int is_eventfd_io(__u64 user_data) 233 | { 234 | return (user_data & (1ULL << 62)) != 0; 235 | } 236 | 237 | static inline int is_target_io(__u64 user_data) 238 | { 239 | return (user_data & (1ULL << 63)) != 0; 240 | } 241 | 242 | static inline void ublksrv_setup_ring_params(struct io_uring_params *p, 243 | int cq_depth, unsigned flags) 244 | { 245 | memset(p, 0, sizeof(*p)); 246 | p->flags = flags | IORING_SETUP_CQSIZE; 247 | p->cq_entries = cq_depth; 248 | } 249 | 250 | static inline struct io_uring_sqe *ublksrv_uring_get_sqe(struct io_uring *r, 251 | int idx, bool is_sqe128) 252 | { 253 | if (is_sqe128) 254 | return &r->sq.sqes[idx << 1]; 255 | return &r->sq.sqes[idx]; 256 | } 257 | 258 | static inline void *ublksrv_get_sqe_cmd(struct io_uring_sqe *sqe) 259 | { 260 | return (void *)&sqe->addr3; 261 | } 262 | 263 | static inline void ublksrv_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 264 | { 265 | __u32 *addr = (__u32 *)&sqe->off; 266 | 267 | addr[0] = cmd_op; 268 | addr[1] = 0; 269 | } 270 | 271 | /* 272 | * ublksrv_aio_ctx is used to offload IO handling from ublksrv io_uring 273 | * context. 274 | * 275 | * ublksrv_aio_ctx is bound with one single pthread which has to belong 276 | * to same process of the io_uring where IO is originated, so we can 277 | * support to handle IO from multiple queues of the same device. At 278 | * default, ublksrv_aio_ctx supports to handle device wide aio or io 279 | * offloading except for UBLKSRV_AIO_QUEUE_WIDE. 280 | * 281 | * Meantime ublksrv_aio_ctx can be created per each queue, and only handle 282 | * IOs from this queue. 283 | * 284 | * The final io handling in the aio context depends on user's implementation, 285 | * either sync or async IO submitting is supported. 286 | */ 287 | struct ublksrv_aio_ctx { 288 | struct ublksrv_aio_list submit; 289 | 290 | /* per-queue completion list */ 291 | struct ublksrv_aio_list *complete; 292 | 293 | int efd; //for wakeup us 294 | 295 | #define UBLKSRV_AIO_QUEUE_WIDE (1U << 0) 296 | unsigned int flags; 297 | bool dead; 298 | 299 | const struct ublksrv_dev *dev; 300 | 301 | void *ctx_data; 302 | 303 | unsigned long reserved[8]; 304 | }; 305 | 306 | #define UBLK_TGT_MAX_JBUF_SZ 8192 307 | 308 | static inline bool tgt_realloc_jbuf(struct ublksrv_tgt_jbuf *j) 309 | { 310 | if (j->jbuf == NULL) 311 | j->jbuf_size = 512; 312 | else 313 | j->jbuf_size += 512; 314 | 315 | if (j->jbuf_size < UBLK_TGT_MAX_JBUF_SZ) { 316 | j->jbuf = (char *)realloc((void *)j->jbuf, j->jbuf_size); 317 | return true; 318 | } 319 | return false; 320 | } 321 | 322 | static inline void ublksrv_tgt_jbuf_init(struct ublksrv_ctrl_dev *cdev, 323 | struct ublksrv_tgt_jbuf *j, bool recover) 324 | { 325 | pthread_mutex_init(&j->lock, NULL); 326 | if (recover) { 327 | j->jbuf = ublksrv_tgt_get_dev_data(cdev); 328 | if (j->jbuf) 329 | j->jbuf_size = ublksrv_json_get_length(j->jbuf); 330 | } else { 331 | j->jbuf = NULL; 332 | j->jbuf_size = 0; 333 | tgt_realloc_jbuf(j); 334 | } 335 | } 336 | 337 | static inline void ublksrv_tgt_jbuf_exit(struct ublksrv_tgt_jbuf *jbuf) 338 | { 339 | free(jbuf->jbuf); 340 | } 341 | 342 | 343 | #ifdef __cplusplus 344 | } 345 | #endif 346 | 347 | #endif 348 | -------------------------------------------------------------------------------- /include/ublksrv_utils.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or LGPL-2.1-only 2 | 3 | #ifndef UBLKSRV_UTILS_INC_H 4 | #define UBLKSRV_UTILS_INC_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | cpu_set_t *ublk_make_cpuset(int num_sets, const char *cpuset); 16 | 17 | static inline int ublksrv_gettid(void) 18 | { 19 | return syscall(SYS_gettid); 20 | } 21 | 22 | /* The following two are obsolete, use new ublk_err/ublk_dbg/ublk_log */ 23 | static inline void ublksrv_log(int priority, const char *fmt, ...) 24 | __attribute__ ((format (printf, 2, 3))); 25 | static inline void ublksrv_printf(FILE *stream, const char *fmt, ...) 26 | __attribute__ ((format (printf, 2, 3))); 27 | 28 | #ifdef DEBUG 29 | static inline void ublksrv_log(int priority, const char *fmt, ...) 30 | { 31 | va_list ap; 32 | 33 | va_start(ap, fmt); 34 | vsyslog(priority, fmt, ap); 35 | } 36 | 37 | static inline void ublksrv_printf(FILE *stream, const char *fmt, ...) 38 | { 39 | va_list ap; 40 | 41 | va_start(ap, fmt); 42 | vfprintf(stream, fmt, ap); 43 | } 44 | #else 45 | static inline void ublksrv_log(int priority, const char *fmt, ...) { } 46 | static inline void ublksrv_printf(FILE *stream, const char *fmt, ...) {} 47 | #endif 48 | 49 | /* 32bit debug mask, high 16 bits are for target code, and low 16 bits for lib */ 50 | #define UBLK_DBG_DEV (1U << 0) 51 | #define UBLK_DBG_QUEUE (1U << 1) 52 | #define UBLK_DBG_IO_CMD (1U << 2) 53 | #define UBLK_DBG_IO (1U << 3) 54 | #define UBLK_DBG_CTRL_CMD (1U << 4) 55 | 56 | #ifdef DEBUG 57 | extern void ublk_dbg(int level, const char *fmt, ...) 58 | __attribute__ ((format (printf, 2, 3))); 59 | extern void ublk_ctrl_dbg(int level, const char *fmt, ...) 60 | __attribute__ ((format (printf, 2, 3))); 61 | extern void ublk_set_debug_mask(unsigned mask); 62 | extern unsigned ublk_get_debug_mask(unsigned mask); 63 | #else 64 | static inline void ublk_dbg(int level, const char *fmt, ...) { } 65 | static inline void ublk_ctrl_dbg(int level, const char *fmt, ...) { } 66 | static inline void ublk_set_debug_mask(unsigned mask) {} 67 | static inline unsigned ublk_get_debug_mask(unsigned mask) { return 0;} 68 | #endif 69 | 70 | extern void ublk_log(const char *fmt, ...) 71 | __attribute__ ((format (printf, 1, 2))); 72 | extern void ublk_err(const char *fmt, ...) 73 | __attribute__ ((format (printf, 1, 2))); 74 | 75 | #define round_up(val, rnd) \ 76 | (((val) + ((rnd) - 1)) & ~((rnd) - 1)) 77 | 78 | #ifndef offsetof 79 | #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) 80 | #endif 81 | 82 | #ifndef container_of 83 | #define container_of(ptr, type, member) ({ \ 84 | unsigned long __mptr = (unsigned long)(ptr); \ 85 | ((type *)(__mptr - offsetof(type, member))); }) 86 | #endif 87 | 88 | #define ublk_ignore_result(x) ({ typeof(x) z = x; (void)sizeof z; }) 89 | 90 | #ifdef __cplusplus 91 | } 92 | #endif 93 | 94 | #endif 95 | -------------------------------------------------------------------------------- /lib/Makefile.am: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT or LGPL-2.1-only 2 | 3 | lib_LTLIBRARIES = libublksrv.la 4 | 5 | libublksrv_la_SOURCES = \ 6 | ublksrv_cmd.c \ 7 | ublksrv_json.cpp \ 8 | ublksrv.c \ 9 | utils.c \ 10 | ublksrv_aio.c 11 | libublksrv_la_CFLAGS = \ 12 | $(WARNING_CFLAGS) \ 13 | $(LIBURING_CFLAGS) \ 14 | $(PTHREAD_CFLAGS) -DUBLKSRV_INTERNAL_H_ 15 | libublksrv_la_CPPFLAGS = \ 16 | -I$(top_srcdir)/include -DUBLKSRV_INTERNAL_H_ 17 | libublksrv_la_LIBADD = \ 18 | $(LIBURING_LIBS) \ 19 | $(PTHREAD_LIBS) 20 | libublksrv_la_LDFLAGS = \ 21 | -version-info 0:0:0 22 | 23 | CLEANFILES = *~ *.d 24 | -------------------------------------------------------------------------------- /lib/ublksrv_aio.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or LGPL-2.1-only 2 | 3 | #ifndef _GNU_SOURCE 4 | #define _GNU_SOURCE 5 | #endif 6 | 7 | #include "ublksrv_priv.h" 8 | 9 | #define aio_log ublk_log 10 | 11 | int ublksrv_aio_submit_worker(struct ublksrv_aio_ctx *ctx, 12 | ublksrv_aio_submit_fn *fn, struct aio_list *done) 13 | { 14 | struct ublksrv_aio *req = NULL; 15 | unsigned long long data; 16 | struct aio_list sl; 17 | int total = 0; 18 | bool more; 19 | 20 | aio_list_init(&sl); 21 | again: 22 | pthread_spin_lock(&ctx->submit.lock); 23 | aio_list_splice(&ctx->submit.list, &sl); 24 | pthread_spin_unlock(&ctx->submit.lock); 25 | 26 | while ((req = aio_list_pop(&sl))) { 27 | int ret = fn(ctx, req); 28 | 29 | /* 30 | * submission failed, so set result for this request, 31 | * otherwise it is user's responsibility to set correct 32 | * ->res after the request is completed 33 | */ 34 | if (ret < 0) { 35 | req->res = ret; 36 | aio_log("ublk aio submission fail, %d\n", ret); 37 | } 38 | total += 1; 39 | if (ret && done) 40 | aio_list_add(done, req); 41 | } 42 | 43 | ublk_ignore_result(read(ctx->efd, &data, 8)); 44 | 45 | pthread_spin_lock(&ctx->submit.lock); 46 | more = !aio_list_empty(&ctx->submit.list); 47 | pthread_spin_unlock(&ctx->submit.lock); 48 | if (more) 49 | goto again; 50 | 51 | return total; 52 | } 53 | 54 | static void move_to_queue_complete_list(struct ublksrv_aio_ctx *ctx, 55 | struct _ublksrv_queue *q, struct aio_list *list) 56 | { 57 | struct ublksrv_aio_list *compl; 58 | 59 | if (aio_list_empty(list)) 60 | return; 61 | 62 | compl = &ctx->complete[q->q_id]; 63 | pthread_spin_lock(&compl->lock); 64 | aio_list_splice(list, &compl->list); 65 | pthread_spin_unlock(&compl->lock); 66 | } 67 | 68 | void ublksrv_aio_complete_worker(struct ublksrv_aio_ctx *ctx, 69 | struct aio_list *completed) 70 | { 71 | struct aio_list this, others; 72 | struct ublksrv_aio *req = NULL; 73 | struct _ublksrv_queue *this_q = NULL; 74 | 75 | if (aio_list_empty(completed)) 76 | return; 77 | 78 | if (ctx->flags & UBLKSRV_AIO_QUEUE_WIDE) { 79 | const struct ublksrv_queue *tq = ublksrv_get_queue(ctx->dev, 80 | ublksrv_aio_qid(completed->head->id)); 81 | 82 | this_q = tq_to_local(tq); 83 | move_to_queue_complete_list(ctx, this_q, completed); 84 | ublksrv_queue_send_event(tq); 85 | return; 86 | } 87 | 88 | aio_list_init(&this); 89 | aio_list_init(&others); 90 | 91 | while (!aio_list_empty(completed)) { 92 | const struct ublksrv_queue *tq = ublksrv_get_queue(ctx->dev, 93 | ublksrv_aio_qid(completed->head->id)); 94 | 95 | this_q = tq_to_local(tq); 96 | while ((req = aio_list_pop(completed))) { 97 | const struct ublksrv_queue *q = ublksrv_get_queue( 98 | ctx->dev, ublksrv_aio_qid(req->id)); 99 | 100 | if (q == local_to_tq(this_q)) 101 | aio_list_add(&this, req); 102 | else 103 | aio_list_add(&others, req); 104 | } 105 | 106 | move_to_queue_complete_list(ctx, this_q, &this); 107 | ublksrv_queue_send_event(tq); 108 | aio_list_splice(&others, completed); 109 | } 110 | } 111 | 112 | struct ublksrv_aio_ctx *ublksrv_aio_ctx_init(const struct ublksrv_dev *dev, 113 | unsigned flags) 114 | { 115 | unsigned nr_hw_queues = tdev_to_local(dev)->ctrl_dev->dev_info.nr_hw_queues; 116 | struct ublksrv_aio_ctx *ctx; 117 | int i; 118 | 119 | if (!(tdev_to_local(dev)->ctrl_dev->dev_info.ublksrv_flags & UBLKSRV_F_NEED_EVENTFD)) 120 | return NULL; 121 | 122 | ctx = calloc(1, sizeof(*ctx)); 123 | if (!ctx) 124 | return NULL; 125 | 126 | ctx->complete = malloc(nr_hw_queues * sizeof(struct ublksrv_aio_list)); 127 | if (!ctx->complete) { 128 | free(ctx); 129 | return NULL; 130 | } 131 | for (i = 0; i < nr_hw_queues; i++) 132 | ublksrv_aio_init_list(&ctx->complete[i]); 133 | 134 | ublksrv_aio_init_list(&ctx->submit); 135 | 136 | ctx->flags = flags; 137 | ctx->dev = dev; 138 | ctx->dead = false; 139 | ctx->efd = eventfd(0, O_NONBLOCK); 140 | 141 | return ctx; 142 | } 143 | 144 | /* called before pthread_join() of the pthread context */ 145 | void ublksrv_aio_ctx_shutdown(struct ublksrv_aio_ctx *ctx) 146 | { 147 | unsigned long long data = 1; 148 | int ret; 149 | 150 | ctx->dead = true; 151 | ret = write(ctx->efd, &data, 8); 152 | if (ret != 8) 153 | ublk_err("%s:%d write fail %d/%d\n", 154 | __func__, __LINE__, ret, 8); 155 | } 156 | 157 | /* called afer pthread_join() of the pthread context returns */ 158 | void ublksrv_aio_ctx_deinit(struct ublksrv_aio_ctx *ctx) 159 | { 160 | close(ctx->efd); 161 | free(ctx); 162 | } 163 | 164 | struct ublksrv_aio *ublksrv_aio_alloc_req(struct ublksrv_aio_ctx *ctx, 165 | int payload_size) 166 | { 167 | const int sz = (sizeof(struct ublksrv_aio) + payload_size + 7) & ~ 0x7; 168 | 169 | return (struct ublksrv_aio *)calloc(1, sz); 170 | } 171 | 172 | void ublksrv_aio_free_req(struct ublksrv_aio_ctx *ctx, struct ublksrv_aio *req) 173 | { 174 | free(req); 175 | } 176 | 177 | static bool ublksrv_aio_add_ctx_for_submit(struct _ublksrv_queue *q, 178 | struct ublksrv_aio_ctx *ctx) 179 | { 180 | int i = 0; 181 | 182 | for (i = 0; i < q->nr_ctxs; i++) { 183 | if (q->ctxs[i] == ctx) 184 | return true; 185 | } 186 | 187 | if (q->nr_ctxs < UBLKSRV_NR_CTX_BATCH - 1) { 188 | q->ctxs[q->nr_ctxs++] = ctx; 189 | return true; 190 | } 191 | 192 | return false; 193 | } 194 | 195 | void ublksrv_aio_submit_req(struct ublksrv_aio_ctx *ctx, 196 | const struct ublksrv_queue *tq, struct ublksrv_aio *req) 197 | { 198 | struct _ublksrv_queue *q = tq_to_local(tq); 199 | unsigned long long data = 1; 200 | 201 | pthread_spin_lock(&ctx->submit.lock); 202 | aio_list_add(&ctx->submit.list, req); 203 | pthread_spin_unlock(&ctx->submit.lock); 204 | 205 | if (!ublksrv_aio_add_ctx_for_submit(q, ctx)) { 206 | int ret = write(ctx->efd, &data, 8); 207 | 208 | if (ret != 8) 209 | ublk_err("%s:%d write fail %d/%d\n", 210 | __func__, __LINE__, ret, 8); 211 | } 212 | } 213 | 214 | void ublksrv_aio_get_completed_reqs(struct ublksrv_aio_ctx *ctx, 215 | const struct ublksrv_queue *q, 216 | struct aio_list *al) 217 | { 218 | struct ublksrv_aio_list *compl = &ctx->complete[q->q_id]; 219 | 220 | pthread_spin_lock(&compl->lock); 221 | aio_list_splice(&compl->list, al); 222 | pthread_spin_unlock(&compl->lock); 223 | } 224 | 225 | void ublksrv_aio_handle_event(struct ublksrv_aio_ctx *ctx, 226 | const struct ublksrv_queue *q) 227 | { 228 | struct ublksrv_aio_list *compl = &ctx->complete[q->q_id]; 229 | struct ublksrv_aio *req; 230 | struct aio_list al; 231 | 232 | aio_list_init(&al); 233 | pthread_spin_lock(&compl->lock); 234 | aio_list_splice(&compl->list, &al); 235 | ublksrv_queue_handled_event(q); 236 | pthread_spin_unlock(&compl->lock); 237 | 238 | while ((req = aio_list_pop(&al))) { 239 | ublksrv_complete_io(q, ublksrv_aio_tag(req->id), 240 | req->res); 241 | ublksrv_aio_free_req(ctx, req); 242 | } 243 | } 244 | 245 | int ublksrv_aio_get_efd(struct ublksrv_aio_ctx *ctx) 246 | { 247 | return ctx->efd; 248 | } 249 | 250 | void ublksrv_aio_set_ctx_data(struct ublksrv_aio_ctx *ctx, void *data) 251 | { 252 | ctx->ctx_data = data; 253 | } 254 | 255 | void *ublksrv_aio_get_ctx_data(struct ublksrv_aio_ctx *ctx) 256 | { 257 | return ctx->ctx_data; 258 | } 259 | 260 | bool ublksrv_aio_ctx_dead(struct ublksrv_aio_ctx *ctx) 261 | { 262 | return ctx->dead; 263 | } 264 | 265 | const struct ublksrv_dev *ublksrv_aio_get_dev(struct ublksrv_aio_ctx *ctx) 266 | { 267 | return ctx->dev; 268 | } 269 | -------------------------------------------------------------------------------- /lib/utils.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or LGPL-2.1-only 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "ublksrv_priv.h" 11 | 12 | /* 13 | * We don't need to lock file since the device id is unique 14 | */ 15 | int create_pid_file(const char *pid_file, int *pid_fd) 16 | { 17 | #define PID_PATH_LEN 256 18 | char buf[PID_PATH_LEN]; 19 | int fd, ret; 20 | 21 | fd = open(pid_file, O_RDWR | O_CREAT | O_CLOEXEC, 22 | S_IRUSR | S_IWUSR); 23 | if (fd < 0) { 24 | ublk_err( "Fail to open file %s", pid_file); 25 | return fd; 26 | } 27 | 28 | ret = ftruncate(fd, 0); 29 | if (ret == -1) { 30 | ublk_err( "Could not truncate pid file %s, err %s", 31 | pid_file, strerror(errno)); 32 | goto fail; 33 | } 34 | 35 | snprintf(buf, PID_PATH_LEN, "%ld\n", (long) getpid()); 36 | if (write(fd, buf, strlen(buf)) != strlen(buf)) { 37 | ublk_err( "Fail to write %s to file %s", 38 | buf, pid_file); 39 | ret = -1; 40 | } else { 41 | *pid_fd = fd; 42 | } 43 | fail: 44 | if (ret) { 45 | close(fd); 46 | unlink(pid_file); 47 | } 48 | return ret; 49 | } 50 | 51 | void ublk_err(const char *fmt, ...) 52 | { 53 | va_list ap; 54 | 55 | va_start(ap, fmt); 56 | vsyslog(LOG_ERR, fmt, ap); 57 | } 58 | 59 | void ublk_log(const char *fmt, ...) 60 | { 61 | va_list ap; 62 | 63 | va_start(ap, fmt); 64 | vsyslog(LOG_INFO, fmt, ap); 65 | } 66 | 67 | #ifdef DEBUG 68 | static unsigned int ublk_debug_mask; 69 | void ublk_dbg(int level, const char *fmt, ...) 70 | { 71 | if (level & ublk_debug_mask) { 72 | va_list ap; 73 | 74 | va_start(ap, fmt); 75 | vsyslog(LOG_ERR, fmt, ap); 76 | } 77 | } 78 | 79 | void ublk_ctrl_dbg(int level, const char *fmt, ...) 80 | { 81 | if (level & ublk_debug_mask) { 82 | va_list ap; 83 | 84 | va_start(ap, fmt); 85 | vfprintf(stdout, fmt, ap); 86 | } 87 | } 88 | 89 | void ublk_set_debug_mask(unsigned mask) 90 | { 91 | ublk_debug_mask = mask; 92 | } 93 | 94 | unsigned ublk_get_debug_mask(unsigned mask) 95 | { 96 | return ublk_debug_mask; 97 | } 98 | #endif 99 | 100 | cpu_set_t *ublk_make_cpuset(int num_sets, const char *cpuset) 101 | { 102 | int i; 103 | char *ptr, *next, *tmp, *str = NULL; 104 | cpu_set_t *sets = NULL, *ret = NULL; 105 | 106 | if (!cpuset) 107 | return NULL; 108 | if (!(str = strdup(cpuset))) 109 | return NULL; 110 | 111 | for(i = 0, ptr = str; ptr = strchr(ptr, '['); i++) { 112 | if (!(ptr = strchr(ptr, ']'))) 113 | break; 114 | } 115 | if (i != num_sets) 116 | goto finished; 117 | 118 | if (!(sets = calloc(sizeof(cpu_set_t), i))) 119 | goto finished; 120 | 121 | for (i = 0; i < num_sets; i++) { 122 | CPU_ZERO(&sets[i]); 123 | } 124 | for (i = 0, ptr = str; 125 | i < num_sets; 126 | i++, ptr = next) { 127 | if (!(ptr = strchr(ptr, '['))) 128 | goto finished; 129 | ptr++; 130 | if (!(next = strchr(ptr, ']'))) 131 | goto finished; 132 | *next++ = '\0'; 133 | /* 134 | * ptr -> end now contains the cpuset string for 135 | * the queue with index i 136 | */ 137 | 138 | do { 139 | tmp = strchr(ptr, ','); 140 | if (tmp) 141 | *tmp++ = '\0'; 142 | CPU_SET(strtol(ptr, NULL, 10), &sets[i]); 143 | ptr = tmp; 144 | } while (ptr && *ptr); 145 | } 146 | 147 | ret = sets; 148 | sets = NULL; 149 | 150 | finished: 151 | free(sets); 152 | free(str); 153 | return ret; 154 | } 155 | -------------------------------------------------------------------------------- /m4/ac_c_compile_flags.m4: -------------------------------------------------------------------------------- 1 | # This program is free software; you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation; either version 2 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program; if not, write to the Free Software 13 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 14 | 15 | # Originally from: 16 | # https://github.com/WinterMute/prboom/blob/master/autotools/ac_c_compile_flags.m4 17 | # RWMJ: I adapted it to add the extra parameters and fixed a few bugs. 18 | 19 | # AC_C_COMPILE_FLAGS(VAR, FLAGS TO TEST, [CFLAGS_FOR_TEST = $CFLAGS]) 20 | # ---------------------------------------------------------- 21 | # Check if compiler flag $2 is supported, if so add it to $1. 22 | # Extra CFLAGS for the test can be passed in $3. 23 | AC_DEFUN([AC_C_COMPILE_FLAGS],[ 24 | CFLAGS_FOR_TEST="m4_default([$3], [$CFLAGS])" 25 | for flag in $2 26 | do 27 | AC_MSG_CHECKING(whether the compiler supports $flag) 28 | SAVED_CFLAGS="$CFLAGS" 29 | CFLAGS="$CFLAGS_FOR_TEST $flag" 30 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM() 31 | ],[ 32 | AC_MSG_RESULT(yes) 33 | $1="${$1} $flag" 34 | ],[AC_MSG_RESULT(no)]) 35 | CFLAGS="$SAVED_CFLAGS" 36 | done 37 | ]) 38 | -------------------------------------------------------------------------------- /targets/include/ublksrv_tgt.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #ifndef UBLKSRV_TGT_INC_H 4 | #define UBLKSRV_TGT_INC_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "ublksrv_utils.h" 24 | #include "ublksrv.h" 25 | 26 | #define ublk_assert(x) do { \ 27 | if (!(x)) { \ 28 | ublk_err("%s %d: assert!\n", __func__, __LINE__); \ 29 | assert(x); \ 30 | } \ 31 | } while (0) 32 | 33 | static inline unsigned ilog2(unsigned x) 34 | { 35 | return sizeof(unsigned) * 8 - 1 - __builtin_clz(x); 36 | } 37 | 38 | #define MAX_NR_UBLK_DEVS 128 39 | 40 | /* 41 | * Our convention is to use this macro instead of raw `co_await` to make it 42 | * easy to log `tag` when debugging coroutine issues. 43 | */ 44 | #define co_await__suspend_always(tag) { \ 45 | static_assert(std::is_same::value, "tag not int");\ 46 | co_await std::suspend_always(); \ 47 | } 48 | 49 | using co_handle_type = std::coroutine_handle<>; 50 | struct co_io_job { 51 | struct promise_type { 52 | co_io_job get_return_object() { 53 | return {std::coroutine_handle::from_promise(*this)}; 54 | } 55 | std::suspend_never initial_suspend() { 56 | return {}; 57 | } 58 | std::suspend_never final_suspend() noexcept { 59 | return {}; 60 | } 61 | void return_void() {} 62 | void unhandled_exception() {} 63 | }; 64 | 65 | co_handle_type coro; 66 | 67 | co_io_job(co_handle_type h): coro(h) {} 68 | 69 | operator co_handle_type() const { return coro; } 70 | }; 71 | 72 | struct ublk_io_tgt { 73 | co_handle_type co; 74 | const struct io_uring_cqe *tgt_io_cqe; 75 | int queued_tgt_io; /* obsolete */ 76 | }; 77 | 78 | /* don't overlap with _IO_NR(UBLK_U_IO_*) and UBLK_IO_OP_* */ 79 | #define UBLK_USER_COPY_READ 0x80 80 | #define UBLK_USER_COPY_WRITE 0x81 81 | 82 | static inline struct ublk_io_tgt *__ublk_get_io_tgt_data(const struct ublk_io_data *io) 83 | { 84 | return (struct ublk_io_tgt *)io->private_data; 85 | } 86 | 87 | static inline struct ublk_io_tgt *ublk_get_io_tgt_data( 88 | const struct ublksrv_queue *q, int tag) 89 | { 90 | return (struct ublk_io_tgt *)ublksrv_io_private_data(q, tag); 91 | } 92 | 93 | static inline void ublksrv_tgt_set_io_data_size(struct ublksrv_tgt_info *tgt) 94 | { 95 | tgt->io_data_size = sizeof(struct ublk_io_tgt); 96 | } 97 | 98 | //static_assert(sizeof(struct ublk_io_tgt) == sizeof(struct ublk_io), "ublk_io is defined as wrong"); 99 | 100 | enum { 101 | UBLK_UNIQUE_TAG_BITS = 16, 102 | UBLK_UNIQUE_TAG_MASK = (1 << UBLK_UNIQUE_TAG_BITS) - 1, 103 | }; 104 | 105 | static inline unsigned int ublk_unique_tag(unsigned short hwq, 106 | unsigned short tag) 107 | { 108 | return (hwq << UBLK_UNIQUE_TAG_BITS) | (tag & UBLK_UNIQUE_TAG_MASK); 109 | } 110 | 111 | static inline unsigned short ublk_unique_tag_to_hwq(unsigned int unique_tag) 112 | { 113 | return unique_tag >> UBLK_UNIQUE_TAG_BITS; 114 | } 115 | 116 | static inline unsigned short ublk_unique_tag_to_tag(unsigned int unique_tag) 117 | { 118 | return unique_tag & UBLK_UNIQUE_TAG_MASK; 119 | } 120 | 121 | static inline bool ublk_param_is_valid(const struct ublk_params *p) 122 | { 123 | if (p->basic.logical_bs_shift < 9 || p->basic.logical_bs_shift > 12) 124 | return false; 125 | if (p->basic.logical_bs_shift > p->basic.physical_bs_shift) 126 | return false; 127 | return true; 128 | } 129 | 130 | static inline int ublk_queue_alloc_sqes(const struct ublksrv_queue *q, 131 | struct io_uring_sqe *sqes[], int nr_sqes) 132 | { 133 | struct io_uring *r = q->ring_ptr; 134 | int i; 135 | 136 | if (io_uring_sq_space_left(r) < nr_sqes) 137 | io_uring_submit(r); 138 | 139 | for (i = 0; i < nr_sqes; i++) { 140 | sqes[i] = io_uring_get_sqe(r); 141 | if (!sqes[i]) 142 | return i; 143 | } 144 | 145 | return nr_sqes; 146 | } 147 | 148 | static inline enum io_uring_op ublk_to_uring_fs_op( 149 | const struct ublksrv_io_desc *iod, bool zc) 150 | { 151 | unsigned ublk_op = ublksrv_get_op(iod); 152 | 153 | if (ublk_op == UBLK_IO_OP_READ) 154 | return zc ? IORING_OP_READ_FIXED : IORING_OP_READ; 155 | else if (ublk_op == UBLK_IO_OP_WRITE) 156 | return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE; 157 | assert(0); 158 | } 159 | 160 | int ublksrv_tgt_send_dev_event(int evtfd, int dev_id); 161 | 162 | void ublksrv_print_std_opts(void); 163 | char *ublksrv_pop_cmd(int *argc, char *argv[]); 164 | int ublksrv_tgt_cmd_main(const struct ublksrv_tgt_type *tgt_type, int argc, char *argv[]); 165 | 166 | static inline unsigned short ublk_cmd_op_nr(unsigned int op) 167 | { 168 | return _IOC_NR(op); 169 | } 170 | 171 | /* if the OP is in the space of UBLK_IO_OP_* */ 172 | static inline int is_ublk_io_cmd(unsigned int op) 173 | { 174 | return op < UBLK_IO_FETCH_REQ; 175 | } 176 | 177 | /* called after one cqe is received */ 178 | static inline int ublksrv_tgt_process_cqe(const struct ublk_io_tgt *io, int *io_res) 179 | { 180 | const struct io_uring_cqe *cqe = io->tgt_io_cqe; 181 | 182 | assert(cqe); 183 | if (is_ublk_io_cmd(user_data_to_op(cqe->user_data))) 184 | *io_res = cqe->res; 185 | return cqe->res; 186 | } 187 | 188 | static inline void ublksrv_tgt_io_done(const struct ublksrv_queue *q, 189 | const struct ublk_io_data *data, 190 | const struct io_uring_cqe *cqe) 191 | { 192 | int tag = user_data_to_tag(cqe->user_data); 193 | struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); 194 | 195 | ublk_assert(tag == data->tag); 196 | io->tgt_io_cqe = cqe; 197 | io->co.resume(); 198 | } 199 | 200 | static inline void __set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) 201 | { 202 | __u32 *addr = (__u32 *)&sqe->off; 203 | 204 | addr[0] = cmd_op; 205 | addr[1] = 0; 206 | } 207 | 208 | static inline struct ublksrv_io_cmd *__get_sqe_cmd(struct io_uring_sqe *sqe) 209 | { 210 | return (struct ublksrv_io_cmd *)&sqe->addr3; 211 | } 212 | 213 | static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe, 214 | int dev_fd, int tag, int q_id, __u64 index) 215 | { 216 | struct ublksrv_io_cmd *cmd = __get_sqe_cmd(sqe); 217 | 218 | io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 219 | sqe->opcode = IORING_OP_URING_CMD; 220 | sqe->flags = 0; 221 | __set_sqe_cmd_op(sqe, UBLK_U_IO_REGISTER_IO_BUF); 222 | 223 | cmd->tag = tag; 224 | cmd->addr = index; 225 | cmd->q_id = q_id; 226 | } 227 | 228 | static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe, 229 | int dev_fd, int tag, int q_id, __u64 index) 230 | { 231 | struct ublksrv_io_cmd *cmd = __get_sqe_cmd(sqe); 232 | 233 | io_uring_prep_read(sqe, dev_fd, 0, 0, 0); 234 | sqe->opcode = IORING_OP_URING_CMD; 235 | sqe->flags = 0; 236 | __set_sqe_cmd_op(sqe, UBLK_U_IO_UNREGISTER_IO_BUF); 237 | 238 | cmd->tag = tag; 239 | cmd->addr = index; 240 | cmd->q_id = q_id; 241 | } 242 | 243 | static inline bool ublksrv_tgt_queue_zc(const struct ublksrv_queue *q) 244 | { 245 | return ublksrv_queue_state(q) & UBLKSRV_ZERO_COPY; 246 | } 247 | 248 | #endif 249 | -------------------------------------------------------------------------------- /targets/include/ublksrv_tgt_endian.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | #ifndef UBLK_TGT_ENDIAN_H 3 | #define UBLK_TGT_ENDIAN_H 4 | 5 | #include 6 | 7 | /* ublksrv target code private header, not for libublksrv user */ 8 | 9 | #define HOST_CONVERT(endian, size, type)\ 10 | static inline type endian ## size ## _to_cpu(type v)\ 11 | {\ 12 | return endian ## size ## toh(v); \ 13 | }\ 14 | \ 15 | static inline type cpu_to_ ## endian ## size(type v)\ 16 | {\ 17 | return hto ## endian ## size(v); \ 18 | }\ 19 | 20 | HOST_CONVERT(be, 16, uint16_t) 21 | HOST_CONVERT(be, 32, uint32_t) 22 | HOST_CONVERT(be, 64, uint64_t) 23 | 24 | HOST_CONVERT(le, 16, uint16_t) 25 | HOST_CONVERT(le, 32, uint32_t) 26 | HOST_CONVERT(le, 64, uint64_t) 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /targets/nbd/README.rst: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | 3 | ======== 4 | ublk-nbd 5 | ======== 6 | 7 | Motivation 8 | ========== 9 | 10 | As one attempt of using io_uring to implement network storage based on ublk 11 | framework, make one basic nbd client with ublk/io_uring, which could be for 12 | replacing linux kernel nbd driver. 13 | 14 | Howto 15 | ===== 16 | 17 | ublk add -t nbd [-q $NR_QUEUES ] [ -d $QUEUE_DEPTH ] [--host $HOST_IP_OR_NAME | --unix $UNIX_SOCKET_PATH] [--send_zc] 18 | 19 | Like ``nbd-client`` [#nbd_client]_, ublk-nbd supports both tcp and unix socket. 20 | 21 | ``--host $HOST_IP_OR_NAME`` points to nbd server's IP address or domain name if 22 | tcp socket is used. 23 | 24 | ``--unix $UNIX_SOCKET_PATH`` points to unix socket path if unix socket is used. 25 | 26 | The option of ``--send_zc`` enables ``io_uring send zero copy`` 27 | [#io_uring_send_zc]_, which is only used for handling ublk write IO. 28 | 29 | Design 30 | ====== 31 | 32 | Handshake 33 | --------- 34 | 35 | Borrow code from ``nbd`` [#nbd]_ project. 36 | 37 | Transmission 38 | ------------ 39 | 40 | Traditionally the transmission phase is implemented as kernel driver of 41 | ``nbd driver`` [#nbd_driver]_. Now we have ublk framework, so it is 42 | possible to move it out of linux kernel. 43 | 44 | NBD protocol [#nbd_protocol]_ is simple, for each block IO request, 45 | nbd client sends 24byte request header, nbd server responds with one 46 | 16 byte nbd reply. For READ request, the returned IO data follows the 47 | reply, and now ublk-nbd implements nbd simple reply only, and doesn't 48 | support structured reply which isn't implemented by ``nbd driver`` 49 | [#nbd_driver]_ too. For WRITE request, IO data needs to follow the 50 | 24byte request header. 51 | 52 | For every IO request delivered from ublk driver, ublk-nbd target code 53 | handles this IO in one dedicated coroutine bound to IO tag, the 54 | IO handling includes: 55 | 56 | - sending nbd request 57 | - sending WRITE data 58 | - reading nbd reply 59 | - reading nbd READ data 60 | 61 | One extra dedicated coroutine is responsible for reading reply and 62 | data in case of READ request via io_uring & recv(nonblocking) hybrid 63 | approach. recv(nonblocking) is always tried first: 64 | 65 | - if the whole reply or data in case of READ is done by recv, wakeup 66 | IO handling coroutine for completing this IO 67 | 68 | - if partial reply or data is read, keep to read via recv(nonblocking) 69 | until the whole reply or data is read or the max tries are reached. 70 | 71 | - otherwise, io_uring is used for handling the remained reply/data 72 | 73 | If io_uring is used finally for reading reply or data, when the CQE is 74 | received, wakeup IO handling coroutine for completing the IO. 75 | 76 | Each IO's handling coroutine is responsible for sending nbd request and 77 | WRITE data in case of WRITE request via io_uring SQE, then wait for 78 | reply or data in case of READ request, which is notified from the recv 79 | coroutine. 80 | 81 | Even though everything is actually done asynchronously in single pthread 82 | for each nbd queue, programming with coroutine still looks like every 83 | step done step by step, so it becomes easier to write efficient async 84 | IO code with coroutine. Like other ublk targets, c++20 coroutine is used, 85 | which is stackless and efficient. 86 | 87 | Given stream socket is used by nbd, sending request header and data to 88 | socket has to be serialized, and io_uring's SQE chain is taken with 89 | help of IOSQE_IO_LINK. There are two chains, one is current chain, another 90 | chain is next chain. Before each socket send IO in current chain is sent 91 | to socket, new IO request is staggered into next chain. After the whole 92 | current chain is done, the next chain is started to be submitted. And 93 | the chain stuff is handled in ublk target callback of ->handle_io_background(). 94 | 95 | Test 96 | ==== 97 | 98 | make test T=nbd 99 | 100 | 101 | TODO 102 | ==== 103 | 104 | TLS support 105 | ----------- 106 | 107 | Timeout handling 108 | ---------------- 109 | 110 | More NBD features 111 | ----------------- 112 | 113 | - structured replies 114 | 115 | References 116 | ========== 117 | 118 | .. [#nbd] https://github.com/NetworkBlockDevice/nbd 119 | .. [#nbd_client] https://github.com/NetworkBlockDevice/nbd/blob/master/nbd-client.c 120 | .. [#nbd_driver] https://github.com/torvalds/linux/blob/master/drivers/block/nbd.c 121 | .. [#nbd_protocol] https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md 122 | .. [#io_uring_send_zc] https://lwn.net/Articles/879724/ 123 | -------------------------------------------------------------------------------- /targets/nbd/cliserv.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cliserv.h" 11 | #include "nbd-debug.h" 12 | 13 | const u64 cliserv_magic = 0x00420281861253LL; 14 | const u64 opts_magic = 0x49484156454F5054LL; 15 | const u64 rep_magic = 0x3e889045565a9LL; 16 | 17 | /** 18 | * Set a socket to blocking or non-blocking 19 | * 20 | * @param fd The socket's FD 21 | * @param nb nonzero to set to non-blocking, else 0 to set to blocking 22 | * @return 0 - OK, -1 failed 23 | */ 24 | int set_nonblocking(int fd, int nb) { 25 | int sf = fcntl (fd, F_GETFL, 0); 26 | if (sf == -1) 27 | return -1; 28 | return fcntl (fd, F_SETFL, nb ? (sf | O_NONBLOCK) : (sf & ~O_NONBLOCK)); 29 | } 30 | 31 | 32 | void setmysockopt(int sock) { 33 | int size = 1; 34 | #if 0 35 | if (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &size, sizeof(int)) < 0) 36 | INFO("(no sockopt/1: %m)"); 37 | #endif 38 | #ifdef IPPROTO_TCP 39 | size = 1; 40 | if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &size, sizeof(int)) < 0) 41 | INFO("(no sockopt/2: %m)"); 42 | #endif 43 | #if 0 44 | size = 1024; 45 | if (setsockopt(sock, IPPROTO_TCP, TCP_MAXSEG, &size, sizeof(int)) < 0) 46 | INFO("(no sockopt/3: %m)"); 47 | #endif 48 | } 49 | 50 | void err_nonfatal(const char *s) { 51 | char s1[150], *s2; 52 | #pragma GCC diagnostic push 53 | #pragma GCC diagnostic ignored "-Wstringop-truncation" 54 | strncpy(s1, s, sizeof(s1)); 55 | #pragma GCC diagnostic pop 56 | if ((s2 = strstr(s, "%m"))) { 57 | strncpy(s1 + (s2 - s), strerror(errno), sizeof(s1) - (s2 - s)); 58 | s2 += 2; 59 | strncpy(s1 + strlen(s1), s2, sizeof(s1) - strlen(s1)); 60 | } 61 | #ifndef sun 62 | /* Solaris doesn't have %h in syslog */ 63 | else if ((s2 = strstr(s, "%h"))) { 64 | strncpy(s1 + (s2 - s), hstrerror(h_errno), sizeof(s1) - (s2 - s)); 65 | s2 += 2; 66 | strncpy(s1 + strlen(s1), s2, sizeof(s1) - strlen(s1)); 67 | } 68 | #endif 69 | 70 | s1[sizeof(s1)-1] = '\0'; 71 | #ifdef ISSERVER 72 | syslog(LOG_ERR, "%s", s1); 73 | syslog(LOG_ERR, "Exiting."); 74 | #endif 75 | fprintf(stderr, "Error: %s\n", s1); 76 | } 77 | 78 | void err(const char *s) { 79 | err_nonfatal(s); 80 | fprintf(stderr, "Exiting.\n"); 81 | exit(EXIT_FAILURE); 82 | } 83 | 84 | void logging(const char* name) { 85 | #ifdef ISSERVER 86 | openlog(name, LOG_PID, LOG_DAEMON); 87 | #endif 88 | setvbuf(stdout, NULL, _IONBF, 0); 89 | setvbuf(stderr, NULL, _IONBF, 0); 90 | } 91 | 92 | #ifndef ntohll 93 | #ifdef WORDS_BIGENDIAN 94 | uint64_t ntohll(uint64_t a) { 95 | return a; 96 | } 97 | #else 98 | uint64_t ntohll(uint64_t a) { 99 | u32 lo = a & 0xffffffff; 100 | u32 hi = a >> 32U; 101 | lo = ntohl(lo); 102 | hi = ntohl(hi); 103 | return ((uint64_t) lo) << 32U | hi; 104 | } 105 | #endif 106 | #endif 107 | 108 | /** 109 | * Read data from a file descriptor into a buffer 110 | * 111 | * @param f a file descriptor 112 | * @param buf a buffer 113 | * @param len the number of bytes to be read 114 | * @return 0 on completion, or -1 on failure 115 | **/ 116 | int readit(int f, void *buf, size_t len) { 117 | ssize_t res; 118 | while (len > 0) { 119 | NBD_DEBUG("*"); 120 | res = read(f, buf, len); 121 | if (res > 0) { 122 | len -= res; 123 | buf += res; 124 | } else if (res < 0) { 125 | if(errno != EAGAIN) { 126 | err_nonfatal("Read failed: %m"); 127 | return -1; 128 | } 129 | } else { 130 | errno = ECONNRESET; 131 | return -1; 132 | } 133 | } 134 | return 0; 135 | } 136 | 137 | /** 138 | * Write data from a buffer into a filedescriptor 139 | * 140 | * @param f a file descriptor 141 | * @param buf a buffer containing data 142 | * @param len the number of bytes to be written 143 | * @return 0 on success, or -1 if the socket was closed 144 | **/ 145 | int writeit(int f, void *buf, size_t len) { 146 | ssize_t res; 147 | while (len > 0) { 148 | NBD_DEBUG("+"); 149 | if ((res = write(f, buf, len)) <= 0) { 150 | switch(errno) { 151 | case EAGAIN: 152 | break; 153 | default: 154 | err_nonfatal("Send failed: %m"); 155 | return -1; 156 | } 157 | } 158 | len -= res; 159 | buf += res; 160 | } 161 | return 0; 162 | } 163 | -------------------------------------------------------------------------------- /targets/nbd/cliserv.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | 3 | #ifndef UBLK_NBD_CLISERV_INC_H 4 | #define UBLK_NBD_CLISERV_INC_H 5 | 6 | /* This header file is shared by client & server. They really have 7 | * something to share... 8 | * */ 9 | 10 | /* Client/server protocol is as follows: 11 | Send INIT_PASSWD 12 | Send 64-bit cliserv_magic 13 | Send 64-bit size of exported device 14 | Send 128 bytes of zeros (reserved for future use) 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | #define NOTLS 29 | 30 | #define u64 uint64_t 31 | #define u32 uint32_t 32 | #define u16 uint16_t 33 | #define u8 uint8_t 34 | 35 | #define s64 int64_t 36 | #define s32 int32_t 37 | #define s16 int16_t 38 | #define s8 int8_t 39 | 40 | #define __be32 u32 41 | #define __be64 u64 42 | #include "nbd.h" 43 | 44 | #ifndef HAVE_FDATASYNC 45 | #define fdatasync(arg) fsync(arg) 46 | #endif 47 | 48 | #if NBD_LFS==1 49 | /* /usr/include/features.h (included from /usr/include/sys/types.h) 50 | defines this when _GNU_SOURCE is defined 51 | */ 52 | #ifndef _LARGEFILE_SOURCE 53 | #define _LARGEFILE_SOURCE 54 | #endif 55 | #define _FILE_OFFSET_BITS 64 56 | #endif 57 | 58 | #ifndef G_GNUC_NORETURN 59 | #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4) 60 | #define G_GNUC_NORETURN __attribute__((__noreturn__)) 61 | #define G_GNUC_UNUSED __attribute__((unused)) 62 | #else 63 | #define G_GNUC_NORETURN 64 | #define G_GNUC_UNUSED 65 | #endif 66 | #endif 67 | 68 | extern const u64 cliserv_magic; 69 | extern const u64 opts_magic; 70 | extern const u64 rep_magic; 71 | 72 | #define INIT_PASSWD "NBDMAGIC" 73 | 74 | #define INFO(a) do { } while(0) 75 | 76 | int set_nonblocking(int fd, int nb); 77 | void setmysockopt(int sock); 78 | void err_nonfatal(const char *s); 79 | 80 | void nbd_err(const char *s) G_GNUC_NORETURN; 81 | #define err(S) nbd_err(S) 82 | 83 | void logging(const char* name); 84 | 85 | #ifndef ntohll 86 | uint64_t ntohll(uint64_t a); 87 | #endif 88 | #ifndef htonll 89 | #define htonll ntohll 90 | #endif 91 | 92 | int readit(int f, void *buf, size_t len); 93 | int writeit(int f, void *buf, size_t len); 94 | 95 | #define NBD_DEFAULT_PORT "10809" /* Port on which named exports are 96 | * served */ 97 | 98 | /* Options that the client can select to the server */ 99 | #define NBD_OPT_EXPORT_NAME (1) /**< Client wants to select a named export (is followed by name of export) */ 100 | #define NBD_OPT_ABORT (2) /**< Client wishes to abort negotiation */ 101 | #define NBD_OPT_LIST (3) /**< Client request list of supported exports (not followed by data) */ 102 | #define NBD_OPT_STARTTLS (5) /**< Client wishes to initiate TLS */ 103 | #define NBD_OPT_INFO (6) /**< Client wants information about the given export */ 104 | #define NBD_OPT_GO (7) /**< Client wants to select the given and move to the transmission phase */ 105 | 106 | /* Replies the server can send during negotiation */ 107 | #define NBD_REP_ACK (1) /**< ACK a request. Data: option number to be acked */ 108 | #define NBD_REP_SERVER (2) /**< Reply to NBD_OPT_LIST (one of these per server; must be followed by NBD_REP_ACK to signal the end of the list */ 109 | #define NBD_REP_INFO (3) /**< Reply to NBD_OPT_INFO */ 110 | #define NBD_REP_FLAG_ERROR (1 << 31) /** If the high bit is set, the reply is an error */ 111 | #define NBD_REP_ERR_UNSUP (1U | NBD_REP_FLAG_ERROR) /**< Client requested an option not understood by this version of the server */ 112 | #define NBD_REP_ERR_POLICY (2U | NBD_REP_FLAG_ERROR) /**< Client requested an option not allowed by server configuration. (e.g., the option was disabled) */ 113 | #define NBD_REP_ERR_INVALID (3U | NBD_REP_FLAG_ERROR) /**< Client issued an invalid request */ 114 | #define NBD_REP_ERR_PLATFORM (4U | NBD_REP_FLAG_ERROR) /**< Option not supported on this platform */ 115 | #define NBD_REP_ERR_TLS_REQD (5U | NBD_REP_FLAG_ERROR) /**< TLS required */ 116 | #define NBD_REP_ERR_UNKNOWN (6U | NBD_REP_FLAG_ERROR) /**< NBD_OPT_INFO or ..._GO requested on unknown export */ 117 | #define NBD_REP_ERR_BLOCK_SIZE_REQD (8 | NBD_REP_FLAG_ERROR) /**< Server is not willing to serve the export without the block size being negotiated */ 118 | 119 | /* Global flags */ 120 | #define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /**< new-style export that actually supports extending */ 121 | #define NBD_FLAG_NO_ZEROES (1 << 1) /**< we won't send the 128 bits of zeroes if the client sends NBD_FLAG_C_NO_ZEROES */ 122 | /* Flags from client to server. */ 123 | #define NBD_FLAG_C_FIXED_NEWSTYLE NBD_FLAG_FIXED_NEWSTYLE 124 | #define NBD_FLAG_C_NO_ZEROES NBD_FLAG_NO_ZEROES 125 | 126 | /* Info types */ 127 | #define NBD_INFO_EXPORT (0) 128 | #define NBD_INFO_NAME (1) 129 | #define NBD_INFO_DESCRIPTION (2) 130 | #define NBD_INFO_BLOCK_SIZE (3) 131 | 132 | #ifdef __cplusplus 133 | } 134 | #endif 135 | #endif 136 | -------------------------------------------------------------------------------- /targets/nbd/nbd-client.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | 3 | /* 4 | * Open connection for network block device 5 | * 6 | * Copyright 1997,1998 Pavel Machek, distribute under GPL 7 | * 8 | * Copyright (c) 2002 - 2011 Wouter Verhelst 9 | * 10 | * Version 1.0 - 64bit issues should be fixed, now 11 | * Version 1.1 - added bs (blocksize) option (Alexey Guzeev, aga@permonline.ru) 12 | * Version 1.2 - I added new option '-d' to send the disconnect request 13 | * Version 2.0 - Version synchronised with server 14 | * Version 2.1 - Check for disconnection before INIT_PASSWD is received 15 | * to make errormsg a bit more helpful in case the server can't 16 | * open the exported file. 17 | * 16/03/2010 - Add IPv6 support. 18 | * Kitt Tientanopajai 19 | * Neutron Soutmun 20 | * Suriya Soutmun 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | 48 | #define MY_NAME "ublk_nbd" 49 | #include "cliserv.h" 50 | 51 | #if HAVE_GNUTLS && !defined(NOTLS) 52 | #include "crypto-gnutls.h" 53 | #endif 54 | 55 | #define NBDC_DO_LIST 1 56 | 57 | int opennet(const char *name, const char* portstr, int sdp) { 58 | int sock; 59 | struct addrinfo hints; 60 | struct addrinfo *ai = NULL; 61 | struct addrinfo *rp = NULL; 62 | int e; 63 | 64 | memset(&hints,'\0',sizeof(hints)); 65 | hints.ai_family = AF_UNSPEC; 66 | hints.ai_socktype = SOCK_STREAM; 67 | hints.ai_flags = AI_ADDRCONFIG | AI_NUMERICSERV; 68 | hints.ai_protocol = IPPROTO_TCP; 69 | 70 | e = getaddrinfo(name, portstr, &hints, &ai); 71 | 72 | if(e != 0) { 73 | fprintf(stderr, "getaddrinfo failed: %s\n", gai_strerror(e)); 74 | freeaddrinfo(ai); 75 | return -1; 76 | } 77 | 78 | if(sdp) { 79 | #ifdef WITH_SDP 80 | if (ai->ai_family == AF_INET) 81 | ai->ai_family = AF_INET_SDP; 82 | else (ai->ai_family == AF_INET6) 83 | ai->ai_family = AF_INET6_SDP; 84 | #else 85 | err("Can't do SDP: I was not compiled with SDP support!"); 86 | #endif 87 | } 88 | 89 | for(rp = ai; rp != NULL; rp = rp->ai_next) { 90 | sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); 91 | 92 | if(sock == -1) 93 | continue; /* error */ 94 | 95 | if(connect(sock, rp->ai_addr, rp->ai_addrlen) != -1) 96 | break; /* success */ 97 | 98 | close(sock); 99 | } 100 | 101 | if (rp == NULL) { 102 | err_nonfatal("Socket failed: %m"); 103 | sock = -1; 104 | goto err; 105 | } 106 | 107 | setmysockopt(sock); 108 | err: 109 | freeaddrinfo(ai); 110 | return sock; 111 | } 112 | 113 | int openunix(const char *path) { 114 | int sock; 115 | struct sockaddr_un un_addr; 116 | memset(&un_addr, 0, sizeof(un_addr)); 117 | 118 | un_addr.sun_family = AF_UNIX; 119 | if (strnlen(path, sizeof(un_addr.sun_path)) == sizeof(un_addr.sun_path)) { 120 | err_nonfatal("UNIX socket path too long"); 121 | return -1; 122 | } 123 | 124 | strncpy(un_addr.sun_path, path, sizeof(un_addr.sun_path) - 1); 125 | 126 | if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { 127 | err_nonfatal("SOCKET failed"); 128 | return -1; 129 | }; 130 | 131 | if (connect(sock, &un_addr, sizeof(un_addr)) == -1) { 132 | err_nonfatal("CONNECT failed"); 133 | close(sock); 134 | return -1; 135 | } 136 | return sock; 137 | } 138 | 139 | static void send_request(int sock, uint32_t opt, ssize_t datasize, void* data) { 140 | struct { 141 | uint64_t magic; 142 | uint32_t opt; 143 | uint32_t datasize; 144 | } __attribute__((packed)) header = { 145 | ntohll(opts_magic), 146 | ntohl(opt), 147 | ntohl(datasize), 148 | }; 149 | if(datasize < 0) { 150 | datasize = strlen((char*)data); 151 | header.datasize = htonl(datasize); 152 | } 153 | writeit(sock, &header, sizeof(header)); 154 | if(data != NULL) { 155 | writeit(sock, data, datasize); 156 | } 157 | } 158 | 159 | static void send_info_request(int sock, uint32_t opt, int n_reqs, 160 | uint16_t* reqs, char* name) { 161 | uint16_t rlen = htons(n_reqs); 162 | uint32_t nlen = htonl(strlen(name)); 163 | 164 | send_request(sock, opt, sizeof(uint32_t) + strlen(name) + sizeof(uint16_t) + n_reqs * sizeof(uint16_t), NULL); 165 | writeit(sock, &nlen, sizeof(nlen)); 166 | writeit(sock, name, strlen(name)); 167 | writeit(sock, &rlen, sizeof(rlen)); 168 | if(n_reqs > 0) { 169 | writeit(sock, reqs, n_reqs * sizeof(uint16_t)); 170 | } 171 | } 172 | 173 | struct reply { 174 | uint64_t magic; 175 | uint32_t opt; 176 | uint32_t reply_type; 177 | uint32_t datasize; 178 | char data[]; 179 | } __attribute__((packed)); 180 | 181 | static struct reply* read_reply(int sock) { 182 | struct reply *retval = (struct reply *)malloc(sizeof(struct reply)); 183 | readit(sock, retval, sizeof(*retval)); 184 | retval->magic = ntohll(retval->magic); 185 | retval->opt = ntohl(retval->opt); 186 | retval->reply_type = ntohl(retval->reply_type); 187 | retval->datasize = ntohl(retval->datasize); 188 | if (retval->magic != rep_magic) { 189 | fprintf(stderr, "E: received invalid negotiation magic %" PRIu64 " (expected %" PRIu64 ")", retval->magic, rep_magic); 190 | exit(EXIT_FAILURE); 191 | } 192 | if (retval->datasize > 0) { 193 | retval = (struct reply *)realloc(retval, sizeof(struct reply) + retval->datasize); 194 | readit(sock, &(retval->data), retval->datasize); 195 | } 196 | return retval; 197 | } 198 | 199 | static void ask_list(int sock) { 200 | uint32_t opt_server; 201 | uint32_t len; 202 | uint32_t lenn; 203 | uint32_t reptype; 204 | uint64_t magic; 205 | int rlen; 206 | #define BUF_SIZE 1024 207 | char buf[BUF_SIZE]; 208 | 209 | send_request(sock, NBD_OPT_LIST, 0, NULL); 210 | /* newline, move away from the "Negotiation:" line */ 211 | printf("\n"); 212 | do { 213 | memset(buf, 0, 1024); 214 | if(read(sock, &magic, sizeof(magic)) < 0) { 215 | err("Reading magic from server: %m"); 216 | } 217 | if(read(sock, &opt_server, sizeof(opt_server)) < 0) { 218 | err("Reading option: %m"); 219 | } 220 | if(read(sock, &reptype, sizeof(reptype)) <0) { 221 | err("Reading reply from server: %m"); 222 | } 223 | if(read(sock, &len, sizeof(len)) < 0) { 224 | err("Reading length from server: %m"); 225 | } 226 | magic=ntohll(magic); 227 | len=ntohl(len); 228 | reptype=ntohl(reptype); 229 | if(magic != rep_magic) { 230 | err("Not enough magic from server"); 231 | } 232 | if(reptype & NBD_REP_FLAG_ERROR) { 233 | switch(reptype) { 234 | case NBD_REP_ERR_POLICY: 235 | fprintf(stderr, "\nE: listing not allowed by server.\n"); 236 | break; 237 | default: 238 | fprintf(stderr, "\nE: unexpected error from server.\n"); 239 | break; 240 | } 241 | if(len > 0 && len < BUF_SIZE) { 242 | if((rlen=read(sock, buf, len)) < 0) { 243 | fprintf(stderr, "\nE: could not read error message from server\n"); 244 | } else { 245 | buf[rlen] = '\0'; 246 | fprintf(stderr, "Server said: %s\n", buf); 247 | } 248 | } 249 | exit(EXIT_FAILURE); 250 | } else { 251 | if(reptype != NBD_REP_ACK) { 252 | if(reptype != NBD_REP_SERVER) { 253 | err("Server sent us a reply we don't understand!"); 254 | } 255 | if(read(sock, &lenn, sizeof(lenn)) < 0) { 256 | fprintf(stderr, "\nE: could not read export name length from server\n"); 257 | exit(EXIT_FAILURE); 258 | } 259 | lenn=ntohl(lenn); 260 | if (lenn >= BUF_SIZE) { 261 | fprintf(stderr, "\nE: export name on server too long\n"); 262 | exit(EXIT_FAILURE); 263 | } 264 | if(read(sock, buf, lenn) < 0) { 265 | fprintf(stderr, "\nE: could not read export name from server\n"); 266 | exit(EXIT_FAILURE); 267 | } 268 | buf[lenn] = 0; 269 | printf("%s", buf); 270 | len -= lenn; 271 | len -= sizeof(lenn); 272 | if(len > 0) { 273 | if(read(sock, buf, len) < 0) { 274 | fprintf(stderr, "\nE: could not read export description from server\n"); 275 | exit(EXIT_FAILURE); 276 | } 277 | buf[len] = 0; 278 | printf(": %s\n", buf); 279 | } else { 280 | printf("\n"); 281 | } 282 | } 283 | } 284 | } while(reptype != NBD_REP_ACK); 285 | send_request(sock, NBD_OPT_ABORT, 0, NULL); 286 | } 287 | 288 | static void parse_sizes(char *buf, uint64_t *size, uint16_t *flags) { 289 | memcpy(size, buf, sizeof(*size)); 290 | *size = ntohll(*size); 291 | buf += sizeof(*size); 292 | memcpy(flags, buf, sizeof(*flags)); 293 | *flags = ntohs(*flags); 294 | 295 | if ((*size>>12) > (uint64_t)~0UL) { 296 | printf("size = %luMB", (unsigned long)(*size>>20)); 297 | err("Exported device is too big for me. Get 64-bit machine :-(\n"); 298 | } else { 299 | printf("size = %luMB", (unsigned long)(*size>>20)); 300 | } 301 | printf("\n"); 302 | } 303 | 304 | static void send_opt_exportname(int sock, u64 *rsize64, uint16_t *flags, 305 | bool can_opt_go, char* name, uint16_t global_flags) { 306 | send_request(sock, NBD_OPT_EXPORT_NAME, -1, name); 307 | char b[sizeof(*flags) + sizeof(*rsize64)]; 308 | if(readit(sock, b, sizeof(b)) < 0 && can_opt_go) { 309 | err("E: server does not support NBD_OPT_GO and dropped connection after sending NBD_OPT_EXPORT_NAME. Try -g."); 310 | } 311 | parse_sizes(b, rsize64, flags); 312 | if(!(global_flags & NBD_FLAG_NO_ZEROES)) { 313 | char buf[125]; 314 | readit(sock, buf, 124); 315 | } 316 | } 317 | 318 | 319 | void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, char* name, 320 | uint32_t needed_flags, uint32_t client_flags, uint32_t do_opts, 321 | char *certfile, char *keyfile, char *cacertfile, 322 | char *tlshostname, bool tls, bool can_opt_go) { 323 | u64 magic; 324 | uint16_t tmp; 325 | uint16_t global_flags; 326 | char buf[256] = "\0\0\0\0\0\0\0\0\0"; 327 | int sock = *sockp; 328 | 329 | printf("Negotiation: "); 330 | readit(sock, buf, 8); 331 | if (strcmp(buf, INIT_PASSWD)) 332 | err("INIT_PASSWD bad"); 333 | printf("."); 334 | readit(sock, &magic, sizeof(magic)); 335 | magic = ntohll(magic); 336 | if (magic != opts_magic) { 337 | if(magic == cliserv_magic) { 338 | err("It looks like you're trying to connect to an oldstyle server. This is no longer supported since nbd 3.10."); 339 | } 340 | } 341 | printf("."); 342 | readit(sock, &tmp, sizeof(uint16_t)); 343 | global_flags = ntohs(tmp); 344 | if((needed_flags & global_flags) != needed_flags) { 345 | /* There's currently really only one reason why this 346 | * check could possibly fail, but we may need to change 347 | * this error message in the future... */ 348 | fprintf(stderr, "\nE: Server does not support listing exports\n"); 349 | exit(EXIT_FAILURE); 350 | } 351 | 352 | if (global_flags & NBD_FLAG_NO_ZEROES) { 353 | client_flags |= NBD_FLAG_C_NO_ZEROES; 354 | } 355 | client_flags = htonl(client_flags); 356 | if (write(sock, &client_flags, sizeof(client_flags)) < 0) 357 | err("Failed/2.1: %m"); 358 | 359 | #if HAVE_GNUTLS && !defined(NOTLS) 360 | /* TLS */ 361 | if (tls) { 362 | int plainfd[2]; // [0] is used by the proxy, [1] is used by NBD 363 | tlssession_t *s = NULL; 364 | int ret; 365 | uint32_t tmp32; 366 | uint64_t tmp64; 367 | 368 | send_request(sock, NBD_OPT_STARTTLS, 0, NULL); 369 | 370 | if (read(sock, &tmp64, sizeof(tmp64)) < 0) 371 | err("Could not read cliserv_magic: %m"); 372 | tmp64 = ntohll(tmp64); 373 | if (tmp64 != NBD_OPT_REPLY_MAGIC) { 374 | err("reply magic does not match"); 375 | } 376 | if (read(sock, &tmp32, sizeof(tmp32)) < 0) 377 | err("Could not read option type: %m"); 378 | tmp32 = ntohl(tmp32); 379 | if (tmp32 != NBD_OPT_STARTTLS) 380 | err("Reply to wrong option"); 381 | if (read(sock, &tmp32, sizeof(tmp32)) < 0) 382 | err("Could not read option reply type: %m"); 383 | tmp32 = ntohl(tmp32); 384 | if (tmp32 != NBD_REP_ACK) { 385 | err("Option reply type != NBD_REP_ACK"); 386 | } 387 | if (read(sock, &tmp32, sizeof(tmp32)) < 0) err( 388 | "Could not read option data length: %m"); 389 | tmp32 = ntohl(tmp32); 390 | if (tmp32 != 0) { 391 | err("Option reply data length != 0"); 392 | } 393 | s = tlssession_new(0, 394 | keyfile, 395 | certfile, 396 | cacertfile, 397 | tlshostname, 398 | !cacertfile || !tlshostname, // insecure flag 399 | #ifdef DODBG 400 | 1, // debug 401 | #else 402 | 0, // debug 403 | #endif 404 | NULL, // quitfn 405 | NULL, // erroutfn 406 | NULL // opaque 407 | ); 408 | if (!s) 409 | err("Cannot establish TLS session"); 410 | 411 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, plainfd) < 0) 412 | err("Cannot get socket pair"); 413 | 414 | if (set_nonblocking(plainfd[0], 0) <0 || 415 | set_nonblocking(plainfd[1], 0) <0 || 416 | set_nonblocking(sock, 0) <0) { 417 | close(plainfd[0]); 418 | close(plainfd[1]); 419 | err("Cannot set socket options"); 420 | } 421 | 422 | ret = fork(); 423 | if (ret < 0) 424 | err("Could not fork"); 425 | else if (ret == 0) { 426 | // we are the child 427 | if (daemon(0, 0) < 0) { 428 | /* no one will see this */ 429 | fprintf(stderr, "Can't detach from the terminal"); 430 | exit(1); 431 | } 432 | signal (SIGPIPE, SIG_IGN); 433 | close(plainfd[1]); 434 | tlssession_mainloop(sock, plainfd[0], s); 435 | close(sock); 436 | close(plainfd[0]); 437 | exit(0); 438 | } 439 | close(plainfd[0]); 440 | close(sock); 441 | sock = plainfd[1]; /* use the decrypted FD from now on */ 442 | *sockp = sock; 443 | } 444 | #else 445 | if (keyfile) { 446 | err("TLS requested but support not compiled in"); 447 | } 448 | #endif 449 | 450 | if(do_opts & NBDC_DO_LIST) { 451 | ask_list(sock); 452 | exit(EXIT_SUCCESS); 453 | } 454 | 455 | struct reply *rep = NULL; 456 | 457 | if(!can_opt_go) { 458 | send_opt_exportname(sock, rsize64, flags, can_opt_go, name, global_flags); 459 | return; 460 | } 461 | 462 | send_info_request(sock, NBD_OPT_GO, 0, NULL, name); 463 | 464 | do { 465 | if(rep != NULL) free(rep); 466 | rep = read_reply(sock); 467 | if(rep && (rep->reply_type & NBD_REP_FLAG_ERROR)) { 468 | switch(rep->reply_type) { 469 | case NBD_REP_ERR_UNSUP: 470 | /* server doesn't support NBD_OPT_GO or NBD_OPT_INFO, 471 | * fall back to NBD_OPT_EXPORT_NAME */ 472 | send_opt_exportname(sock, rsize64, flags, can_opt_go, name, global_flags); 473 | free(rep); 474 | return; 475 | case NBD_REP_ERR_POLICY: 476 | if(rep->datasize > 0) { 477 | char errstr[1024]; 478 | snprintf(errstr, sizeof errstr, "Connection not allowed by server policy. Server said: %s", rep->data); 479 | err(errstr); 480 | } else { 481 | err("Connection not allowed by server policy."); 482 | } 483 | free(rep); 484 | exit(EXIT_FAILURE); 485 | default: 486 | if(rep->datasize > 0) { 487 | char errstr[1024]; 488 | snprintf(errstr, sizeof errstr, "Unknown error returned by server. Server said: %s", rep->data); 489 | err(errstr); 490 | } else { 491 | err("Unknown error returned by server."); 492 | } 493 | free(rep); 494 | exit(EXIT_FAILURE); 495 | } 496 | } 497 | uint16_t info_type; 498 | switch(rep->reply_type) { 499 | case NBD_REP_INFO: 500 | memcpy(&info_type, rep->data, 2); 501 | info_type = htons(info_type); 502 | switch(info_type) { 503 | case NBD_INFO_EXPORT: 504 | parse_sizes(rep->data + 2, rsize64, flags); 505 | break; 506 | default: 507 | // ignore these, don't need them 508 | break; 509 | } 510 | break; 511 | case NBD_REP_ACK: 512 | break; 513 | default: 514 | err_nonfatal("Unknown reply to NBD_OPT_GO received, ignoring"); 515 | } 516 | } while(rep->reply_type != NBD_REP_ACK); 517 | free(rep); 518 | } 519 | -------------------------------------------------------------------------------- /targets/nbd/nbd-debug.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | 3 | #ifndef NBD_DEBUG_H 4 | #define NBD_DEBUG_H 5 | #include "config.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | /* Debugging macros */ 12 | #ifdef DODBG 13 | #define NBD_DEBUG(...) printf(__VA_ARGS__) 14 | #else 15 | #define NBD_DEBUG(...) 16 | #endif 17 | #ifndef PACKAGE_VERSION 18 | #define PACKAGE_VERSION "" 19 | #endif 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | #endif 25 | -------------------------------------------------------------------------------- /targets/nbd/nbd.h: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: GPL-2.0 2 | /* 3 | * 1999 Copyright (C) Pavel Machek, pavel@ucw.cz. This code is GPL. 4 | * 1999/11/04 Copyright (C) 1999 VMware, Inc. (Regis "HPReg" Duchesne) 5 | * Made nbd_end_request() use the io_request_lock 6 | * 2001 Copyright (C) Steven Whitehouse 7 | * New nbd_end_request() for compatibility with new linux block 8 | * layer code. 9 | * 2003/06/24 Louis D. Langholtz 10 | * Removed unneeded blksize_bits field from nbd_device struct. 11 | * Cleanup PARANOIA usage & code. 12 | * 2004/02/19 Paul Clements 13 | * Removed PARANOIA, plus various cleanup and comments 14 | */ 15 | 16 | #ifndef LINUX_NBD_H 17 | #define LINUX_NBD_H 18 | 19 | #include 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | //#include 26 | 27 | #if 0 28 | #define NBD_SET_SOCK _IO( 0xab, 0 ) 29 | #define NBD_SET_BLKSIZE _IO( 0xab, 1 ) 30 | #define NBD_SET_SIZE _IO( 0xab, 2 ) 31 | #define NBD_DO_IT _IO( 0xab, 3 ) 32 | #define NBD_CLEAR_SOCK _IO( 0xab, 4 ) 33 | #define NBD_CLEAR_QUE _IO( 0xab, 5 ) 34 | #define NBD_PRINT_DEBUG _IO( 0xab, 6 ) 35 | #define NBD_SET_SIZE_BLOCKS _IO( 0xab, 7 ) 36 | #define NBD_DISCONNECT _IO( 0xab, 8 ) 37 | #define NBD_SET_TIMEOUT _IO( 0xab, 9 ) 38 | #define NBD_SET_FLAGS _IO( 0xab, 10 ) 39 | #endif 40 | 41 | enum { 42 | NBD_CMD_READ = 0, 43 | NBD_CMD_WRITE = 1, 44 | NBD_CMD_DISC = 2, 45 | NBD_CMD_FLUSH = 3, 46 | NBD_CMD_TRIM = 4, 47 | NBD_CMD_CACHE = 5, 48 | NBD_CMD_WRITE_ZEROES = 6, 49 | NBD_CMD_BLOCK_STATUS = 7, 50 | NBD_CMD_RESIZE = 8 51 | }; 52 | 53 | #define NBD_CMD_MASK_COMMAND 0x0000ffff 54 | #define NBD_CMD_SHIFT (16) 55 | #define NBD_CMD_FLAG_FUA ((1 << 0) << NBD_CMD_SHIFT) 56 | #define NBD_CMD_FLAG_NO_HOLE ((1 << 1) << NBD_CMD_SHIFT) 57 | 58 | /* values for flags field */ 59 | #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ 60 | #define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ 61 | #define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ 62 | #define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ 63 | #define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ 64 | #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ 65 | #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send NBD_CMD_WRITE_ZEROES */ 66 | #define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* multiple connections are okay */ 67 | 68 | /* from libnbd/lib/nbd-protocol.h */ 69 | #define NBD_FLAG_SEND_DF (1 << 7) 70 | #define NBD_FLAG_SEND_CACHE (1 << 10) 71 | #define NBD_FLAG_SEND_FAST_ZERO (1 << 11) 72 | 73 | #define nbd_cmd(req) ((req)->cmd[0]) 74 | 75 | /* userspace doesn't need the nbd_device structure */ 76 | 77 | /* These are sent over the network in the request/reply magic fields */ 78 | 79 | #define NBD_REQUEST_MAGIC 0x25609513 80 | #define NBD_REPLY_MAGIC 0x67446698 81 | #define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef 82 | 83 | /* for the trace log, not part of the protocol, not sent over the wire */ 84 | #define NBD_TRACELOG_MAGIC 0x25609514 85 | 86 | #define NBD_OPT_REPLY_MAGIC 0x3e889045565a9LL 87 | 88 | /* 89 | * This is the packet used for communication between client and 90 | * server. All data are in network byte order. 91 | */ 92 | struct nbd_request { 93 | uint32_t magic; 94 | uint32_t type; /* == READ || == WRITE */ 95 | char handle[8]; 96 | uint64_t from; 97 | uint32_t len; 98 | } __attribute__ ((packed)); 99 | 100 | /* 101 | * This is the reply packet that nbd-server sends back to the client after 102 | * it has completed an I/O request (or an error occurs). 103 | */ 104 | struct nbd_reply { 105 | uint32_t magic; 106 | uint32_t error; /* 0 = ok, else error */ 107 | char handle[8]; /* handle you got from request */ 108 | }; 109 | 110 | extern int opennet(const char *name, const char* portstr, int sdp); 111 | extern int openunix(const char *path); 112 | extern void negotiate(int *sockp, u64 *rsize64, uint16_t *flags, char* name, 113 | uint32_t needed_flags, uint32_t client_flags, uint32_t do_opts, 114 | char *certfile, char *keyfile, char *cacertfile, 115 | char *tlshostname, bool tls, bool can_opt_go); 116 | 117 | #ifdef __cplusplus 118 | } 119 | #endif 120 | #endif 121 | -------------------------------------------------------------------------------- /targets/ublk.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #include "config.h" 4 | #include "ublksrv_tgt.h" 5 | #include 6 | 7 | static int list_one_dev(int number, bool log, bool verbose); 8 | 9 | /* 10 | * returns 0 on success and -errno on failure 11 | */ 12 | static int ublksrv_execv_helper(const char *op, const char *type, int argc, char *argv[]) 13 | { 14 | char *cmd, *fp, **nargv, *evtfd_str; 15 | char full_path[256]; 16 | ssize_t fp_len; 17 | int daemon = strcmp(op, "help"); 18 | int res, i; 19 | int pfd[2] = { -1, -1}; 20 | 21 | asprintf(&cmd, "ublk.%s", type); 22 | 23 | /* 24 | * Create full path to the ublk. binary. It must be in the 25 | * same directory as the main ublk binary itself. 26 | */ 27 | memset(full_path, 0, sizeof(full_path)); 28 | fp_len = readlink("/proc/self/exe", full_path, sizeof(full_path)); 29 | if (fp_len < 0 || fp_len >= sizeof(full_path)) 30 | return -EINVAL; 31 | asprintf(&fp, "%s.%s", full_path, type); 32 | 33 | nargv = (char **)calloc(argc + 4, sizeof(char *)); 34 | if (!nargv) 35 | return -ENOMEM; 36 | nargv[0] = cmd; 37 | nargv[1] = (char *)op; 38 | for (i = 1; i < argc; i++) { 39 | if (!strcmp(argv[i], "--eventfd")) 40 | return -EINVAL; 41 | nargv[i + 1] = argv[i]; 42 | } 43 | 44 | if (daemon) { 45 | if (pipe(pfd)) { 46 | fprintf(stderr, "Failed to create pipe %s\n", strerror(errno)); 47 | return -errno; 48 | } 49 | asprintf(&evtfd_str, "%d", pfd[1]); 50 | nargv[argc + 1] = strdup("--eventfd"); 51 | nargv[argc + 2] = evtfd_str; 52 | } 53 | 54 | if (!daemon) { 55 | exec: 56 | close(pfd[0]); 57 | execv(fp, nargv); 58 | 59 | /* only reach here is execve failed */ 60 | fprintf(stderr, "Failed to execve() %s. %s\n", fp, strerror(errno)); 61 | if (pfd[1] >= 0) 62 | ublksrv_tgt_send_dev_event(pfd[1], -1); 63 | return -errno; 64 | } 65 | 66 | setsid(); 67 | res = fork(); 68 | if (res == 0) 69 | goto exec; 70 | if (res > 0) { 71 | uint64_t id; 72 | 73 | close(pfd[1]); 74 | res = read(pfd[0], &id, sizeof(id)); 75 | close(pfd[0]); 76 | 77 | if (res == 0) 78 | res = -EINVAL; 79 | if (res == sizeof(id)) 80 | return list_one_dev(id - 1, false, false); 81 | return res; 82 | } 83 | if (res == -1) 84 | res = -errno; 85 | return res; 86 | } 87 | 88 | static void cmd_dev_add_usage(const char *cmd) 89 | { 90 | printf("%s add -t TYPE\n", cmd); 91 | ublksrv_print_std_opts(); 92 | printf("\tFor type specific options, run:\n"); 93 | printf("\t\tublk help -t \n"); 94 | } 95 | 96 | static int ublksrv_stop_io_daemon(const struct ublksrv_ctrl_dev *ctrl_dev) 97 | { 98 | int daemon_pid, cnt = 0; 99 | 100 | /* wait until daemon is exited, or timeout after 3 seconds */ 101 | do { 102 | daemon_pid = ublksrv_get_io_daemon_pid(ctrl_dev, false); 103 | if (daemon_pid > 0) { 104 | usleep(100000); 105 | cnt++; 106 | } 107 | } while (daemon_pid > 0 && cnt < 30); 108 | 109 | if (daemon_pid > 0) 110 | return -1; 111 | 112 | return 0; 113 | } 114 | 115 | static int __cmd_dev_del(int number, bool log, bool async) 116 | { 117 | struct ublksrv_ctrl_dev *dev; 118 | int ret; 119 | struct ublksrv_dev_data data = { 120 | .dev_id = number, 121 | .run_dir = ublksrv_get_pid_dir(), 122 | }; 123 | 124 | dev = ublksrv_ctrl_init(&data); 125 | if (!dev) { 126 | fprintf(stderr, "ublksrv_ctrl_init failed id %d\n", number); 127 | return -EOPNOTSUPP; 128 | } 129 | 130 | ret = ublksrv_ctrl_get_info(dev); 131 | if (ret < 0) { 132 | ret = 0; 133 | if (log) 134 | fprintf(stderr, "can't get dev info from %d: %d\n", number, ret); 135 | goto fail; 136 | } 137 | 138 | ret = ublksrv_ctrl_stop_dev(dev); 139 | if (ret < 0) { 140 | fprintf(stderr, "stop dev %d failed\n", number); 141 | goto fail; 142 | } 143 | 144 | ret = ublksrv_stop_io_daemon(dev); 145 | if (ret < 0) 146 | fprintf(stderr, "stop daemon %d failed\n", number); 147 | 148 | if (async) 149 | ret = ublksrv_ctrl_del_dev_async(dev); 150 | else 151 | ret = ublksrv_ctrl_del_dev(dev); 152 | if (ret < 0 && ret != -ENODEV) { 153 | fprintf(stderr, "delete dev %d failed %d\n", number, ret); 154 | goto fail; 155 | } 156 | 157 | fail: 158 | ublksrv_ctrl_deinit(dev); 159 | return ret; 160 | } 161 | 162 | static int cmd_dev_del(int argc, char *argv[]) 163 | { 164 | static const struct option longopts[] = { 165 | { "number", 1, NULL, 'n' }, 166 | { "all", 0, NULL, 'a' }, 167 | { "async", 0, NULL, 0 }, 168 | { NULL } 169 | }; 170 | int number = -1; 171 | int opt, ret, i; 172 | unsigned async = 0; 173 | int option_index = 0; 174 | 175 | while ((opt = getopt_long(argc, argv, "n:a", 176 | longopts, &option_index)) != -1) { 177 | switch (opt) { 178 | case 'a': 179 | break; 180 | 181 | case 'n': 182 | number = strtol(optarg, NULL, 10); 183 | break; 184 | case 0: 185 | if (!strcmp(longopts[option_index].name, "async")) 186 | async = 1; 187 | } 188 | } 189 | 190 | if (number >= 0) 191 | return __cmd_dev_del(number, true, async); 192 | 193 | for (i = 0; i < MAX_NR_UBLK_DEVS; i++) { 194 | ret = __cmd_dev_del(i, false, async); 195 | if (ret == -EOPNOTSUPP) 196 | return ret; 197 | } 198 | 199 | return ret; 200 | } 201 | 202 | static void cmd_dev_del_usage(const char *cmd) 203 | { 204 | printf("%s del -n DEV_ID [-a | --all]\n", cmd); 205 | } 206 | 207 | static int cmd_dev_set_affinity(int argc, char *argv[]) 208 | { 209 | static const struct option longopts[] = { 210 | { "number", 1, NULL, 'n' }, 211 | { "queue", 1, NULL, 'q' }, 212 | { "cpuset", 1, NULL, 0}, 213 | { NULL } 214 | }; 215 | int number = -1, qid = -1; 216 | int opt, ret; 217 | int option_index = 0; 218 | const char *cpuset = NULL; 219 | cpu_set_t *set = NULL; 220 | 221 | while ((opt = getopt_long(argc, argv, "n:q:", 222 | longopts, &option_index)) != -1) { 223 | switch (opt) { 224 | case 'n': 225 | number = strtol(optarg, NULL, 10); 226 | break; 227 | case 'q': 228 | qid = strtol(optarg, NULL, 10); 229 | break; 230 | case 0: 231 | if (!strcmp(longopts[option_index].name, "cpuset")) { 232 | cpuset = optarg; 233 | } 234 | } 235 | } 236 | 237 | if (number < 0) { 238 | fprintf(stderr, "Must specify -n / --number\n"); 239 | return -EINVAL; 240 | } 241 | if (qid < 0) { 242 | fprintf(stderr, "Must specify -q / --queue\n"); 243 | return -EINVAL; 244 | } 245 | if (!cpuset) { 246 | fprintf(stderr, "Must specify --cpuset\n"); 247 | return -EINVAL; 248 | } 249 | /* 250 | * The cpuset string for set_affinity is a single set 251 | */ 252 | set = ublk_make_cpuset(1, cpuset); 253 | 254 | ret = ublk_queue_set_affinity(number, qid, set); 255 | free(set); 256 | return ret; 257 | } 258 | 259 | static void cmd_dev_set_affinity_usage(const char *cmd) 260 | { 261 | printf("%s set_affinity [ -n | --number] DEV_ID [ -q | --queue] QID --cpuset SET\n", cmd); 262 | } 263 | 264 | static int list_one_dev(int number, bool log, bool verbose) 265 | { 266 | struct ublksrv_dev_data data = { 267 | .dev_id = number, 268 | .run_dir = ublksrv_get_pid_dir(), 269 | }; 270 | struct ublksrv_ctrl_dev *dev = ublksrv_ctrl_init(&data); 271 | if (!dev) { 272 | fprintf(stderr, "can't init dev %d\n", data.dev_id); 273 | return -EOPNOTSUPP; 274 | } 275 | int ret; 276 | 277 | if (!dev) { 278 | fprintf(stderr, "ublksrv_ctrl_init failed id %d\n", number); 279 | return -EOPNOTSUPP; 280 | } 281 | ret = ublksrv_ctrl_get_info(dev); 282 | if (ret < 0) { 283 | if (log) 284 | fprintf(stderr, "can't get dev info from %d: %d\n", number, ret); 285 | } else { 286 | const char *buf = ublksrv_tgt_get_dev_data(dev); 287 | 288 | if (verbose && buf) 289 | ublksrv_json_dump(buf); 290 | else 291 | ublksrv_ctrl_dump(dev, buf); 292 | } 293 | 294 | ublksrv_ctrl_deinit(dev); 295 | 296 | return ret; 297 | } 298 | 299 | static int cmd_list_dev_info(int argc, char *argv[]) 300 | { 301 | static const struct option longopts[] = { 302 | { "number", 0, NULL, 'n' }, 303 | { "verbose", 0, NULL, 'v' }, 304 | { NULL } 305 | }; 306 | int number = -1; 307 | int opt, i; 308 | bool verbose = false; 309 | 310 | while ((opt = getopt_long(argc, argv, "n:v", 311 | longopts, NULL)) != -1) { 312 | switch (opt) { 313 | case 'n': 314 | number = strtol(optarg, NULL, 10); 315 | break; 316 | case 'v': 317 | verbose = 1; 318 | break; 319 | } 320 | } 321 | 322 | if (number >= 0) 323 | return list_one_dev(number, true, verbose); 324 | 325 | for (i = 0; i < MAX_NR_UBLK_DEVS; i++) { 326 | int ret = list_one_dev(i, false, verbose); 327 | 328 | if (ret == -EOPNOTSUPP) 329 | return ret; 330 | } 331 | 332 | return 0; 333 | } 334 | 335 | static void cmd_dev_list_usage(const char *cmd) 336 | { 337 | printf("%s list [-n DEV_ID]\n", cmd); 338 | } 339 | 340 | #define const_ilog2(x) (63 - __builtin_clzll(x)) 341 | 342 | static int cmd_dev_get_features(int argc, char *argv[]) 343 | { 344 | struct ublksrv_dev_data data = { 345 | .dev_id = -1, 346 | .run_dir = ublksrv_get_pid_dir(), 347 | }; 348 | struct ublksrv_ctrl_dev *dev = ublksrv_ctrl_init(&data); 349 | if (!dev) { 350 | fprintf(stderr, "can't init dev %d\n", data.dev_id); 351 | return -EOPNOTSUPP; 352 | } 353 | __u64 features = 0; 354 | int ret; 355 | static const char *feat_map[] = { 356 | [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", 357 | [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", 358 | [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", 359 | [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", 360 | [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", 361 | [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", 362 | [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", 363 | [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", 364 | [const_ilog2(UBLK_F_ZONED)] = "ZONED", 365 | [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", 366 | }; 367 | 368 | if (!dev) { 369 | fprintf(stderr, "ublksrv_ctrl_init failed id\n"); 370 | return -EOPNOTSUPP; 371 | } 372 | 373 | ret = ublksrv_ctrl_get_features(dev, &features); 374 | if (!ret) { 375 | int i; 376 | 377 | printf("ublk_drv features: 0x%llx\n", features); 378 | 379 | for (i = 0; i < sizeof(features) * 8; i++) { 380 | const char *feat; 381 | 382 | if (!((1ULL << i) & features)) 383 | continue; 384 | if (i < sizeof(feat_map) / sizeof(feat_map[0])) 385 | feat = feat_map[i]; 386 | else 387 | feat = "unknown"; 388 | printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); 389 | } 390 | } 391 | 392 | return ret; 393 | } 394 | 395 | static void cmd_dev_get_features_help(const char *cmd) 396 | { 397 | printf("%s features\n", cmd); 398 | } 399 | 400 | static void cmd_dev_recover_usage(const char *cmd) 401 | { 402 | printf("%s recover [-n DEV_ID]\n", cmd); 403 | } 404 | 405 | static void cmd_usage(const char *cmd) 406 | { 407 | cmd_dev_add_usage(cmd); 408 | cmd_dev_del_usage(cmd); 409 | cmd_dev_list_usage(cmd); 410 | cmd_dev_recover_usage(cmd); 411 | cmd_dev_set_affinity_usage(cmd); 412 | cmd_dev_get_features_help(cmd); 413 | 414 | printf("%s help -t \n", cmd); 415 | printf("%s -v [--version]\n", cmd); 416 | printf("%s -h [--help]\n", cmd); 417 | } 418 | 419 | static void args_parse_number_type(struct ublksrv_dev_data *data, int argc, char *argv[]) 420 | { 421 | static const struct option longopts[] = { 422 | { "number", 1, NULL, 'n' }, 423 | { "type", 1, NULL, 't' }, 424 | { NULL } 425 | }; 426 | int opt, option_index = 0; 427 | 428 | while ((opt = getopt_long(argc, argv, "-:n:t:", 429 | longopts, &option_index)) != -1) { 430 | switch (opt) { 431 | case 'n': 432 | data->dev_id = strtol(optarg, NULL, 10); 433 | break; 434 | case 't': 435 | data->tgt_type = optarg; 436 | break; 437 | } 438 | } 439 | } 440 | 441 | static int cmd_dev_add(int argc, char *argv[]) 442 | { 443 | struct ublksrv_dev_data data = {0}; 444 | 445 | args_parse_number_type(&data, argc, argv); 446 | 447 | if (data.tgt_type == NULL) { 448 | fprintf(stderr, "no dev type specified\n"); 449 | return -EINVAL; 450 | } 451 | return ublksrv_execv_helper("add", data.tgt_type, argc, argv); 452 | } 453 | 454 | static int cmd_dev_help(int argc, char *argv[]) 455 | { 456 | struct ublksrv_dev_data data = {0}; 457 | 458 | args_parse_number_type(&data, argc, argv); 459 | 460 | if (data.tgt_type == NULL) { 461 | cmd_usage("ublk"); 462 | return EXIT_SUCCESS; 463 | } 464 | 465 | return ublksrv_execv_helper("help", data.tgt_type, argc, argv); 466 | } 467 | 468 | static int cmd_dev_recover(int argc, char *argv[]) 469 | { 470 | struct ublksrv_ctrl_dev *dev; 471 | char tgt_type[32] = {0}; 472 | char *buf = NULL; 473 | struct ublksrv_dev_data data = { 474 | .dev_id = -1, 475 | .run_dir = ublksrv_get_pid_dir(), 476 | }; 477 | int ret; 478 | 479 | args_parse_number_type(&data, argc, argv); 480 | 481 | if (data.dev_id < 0) { 482 | fprintf(stderr, "wrong dev_id provided for recover\n"); 483 | return EXIT_FAILURE; 484 | } 485 | dev = ublksrv_ctrl_init(&data); 486 | if (!dev) { 487 | fprintf(stderr, "initialize ctrl dev %d failed\n", data.dev_id); 488 | return EXIT_FAILURE; 489 | } 490 | buf = ublksrv_tgt_get_dev_data(dev); 491 | if (!buf) { 492 | fprintf(stderr, "get dev %d data failed\n", data.dev_id); 493 | return EXIT_FAILURE; 494 | } 495 | 496 | ret = ublksrv_json_read_target_str_info(buf, 32, "name", tgt_type); 497 | if (ret < 0) { 498 | fprintf(stderr, "can't get target type for %d\n", data.dev_id); 499 | return EXIT_FAILURE; 500 | } 501 | 502 | free(buf); 503 | 504 | return ublksrv_execv_helper("recover", tgt_type, argc, argv); 505 | } 506 | 507 | int main(int argc, char *argv[]) 508 | { 509 | const char *prog_name = "ublk"; 510 | char *cmd; 511 | int ret; 512 | char exe[PATH_MAX]; 513 | 514 | strncpy(exe, argv[0], PATH_MAX - 1); 515 | 516 | setvbuf(stdout, NULL, _IOLBF, 0); 517 | 518 | cmd = ublksrv_pop_cmd(&argc, argv); 519 | if (cmd == NULL) { 520 | printf("%s: missing command\n", argv[0]); 521 | cmd_usage(prog_name); 522 | return EXIT_FAILURE; 523 | } 524 | 525 | if (!strcmp(cmd, "add")) 526 | ret = cmd_dev_add(argc, argv); 527 | else if (!strcmp(cmd, "del")) 528 | ret = cmd_dev_del(argc, argv); 529 | else if (!strcmp(cmd, "set_affinity")) 530 | ret = cmd_dev_set_affinity(argc, argv); 531 | else if (!strcmp(cmd, "help")) 532 | ret = cmd_dev_help(argc, argv); 533 | else if (!strcmp(cmd, "list")) 534 | ret = cmd_list_dev_info(argc, argv); 535 | else if (!strcmp(cmd, "recover")) 536 | ret = cmd_dev_recover(argc, argv); 537 | else if (!strcmp(cmd, "features")) 538 | ret = cmd_dev_get_features(argc, argv); 539 | else if (!strcmp(cmd, "help") || !strcmp(cmd, "-h") || !strcmp(cmd, "--help")) { 540 | cmd_usage(prog_name); 541 | ret = EXIT_SUCCESS; 542 | } else if (!strcmp(cmd, "-v") || !strcmp(cmd, "--version")) { 543 | fprintf(stdout, "%s\n", PACKAGE_STRING); 544 | ret = EXIT_SUCCESS; 545 | } else { 546 | fprintf(stderr, "unknown command: %s\n", cmd); 547 | cmd_usage(prog_name); 548 | ret = EXIT_FAILURE; 549 | } 550 | 551 | ublk_ctrl_dbg(UBLK_DBG_CTRL_CMD, "cmd %s: result %d\n", cmd, ret); 552 | 553 | return ret; 554 | } 555 | -------------------------------------------------------------------------------- /targets/ublk.nfs.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #include 4 | 5 | #include "ublksrv_tgt.h" 6 | #include 7 | 8 | 9 | struct nfs_tgt_data { 10 | char url[4096]; 11 | struct nfs_context *nfs; 12 | struct nfsfh *nfsfh; 13 | size_t capacity; 14 | }; 15 | 16 | typedef struct nfs_cb_data { 17 | struct nfs_cb_data *next; 18 | const struct ublksrv_queue *q; 19 | ssize_t count; 20 | } nfs_cb_data_t; 21 | 22 | struct nfs_queue_data { 23 | nfs_cb_data_t *io_list; 24 | pthread_spinlock_t io_list_lock; 25 | nfs_cb_data_t ios[]; 26 | }; 27 | 28 | static inline struct nfs_queue_data * 29 | nfs_get_queue_data(const struct ublksrv_queue *q) 30 | { 31 | return (struct nfs_queue_data *)q->private_data; 32 | } 33 | 34 | static inline int cb_data_to_tag(nfs_cb_data_t *cb) 35 | { 36 | struct nfs_queue_data *q_data = nfs_get_queue_data(cb->q); 37 | int tag; 38 | 39 | tag = ((unsigned long)cb - (unsigned long)&q_data->ios[0]) / sizeof(*cb); 40 | return tag; 41 | } 42 | 43 | static void nfs_handle_event(const struct ublksrv_queue *q) 44 | { 45 | struct nfs_queue_data *q_data = nfs_get_queue_data(q); 46 | nfs_cb_data_t *cb_data, *tmp; 47 | 48 | pthread_spin_lock(&q_data->io_list_lock); 49 | cb_data = q_data->io_list; 50 | q_data->io_list = NULL; 51 | pthread_spin_unlock(&q_data->io_list_lock); 52 | 53 | while (cb_data) { 54 | unsigned int tag; 55 | 56 | tmp = cb_data->next; 57 | 58 | tag = cb_data_to_tag(cb_data); 59 | ublksrv_complete_io(cb_data->q, tag, cb_data->count); 60 | cb_data = tmp; 61 | } 62 | 63 | ublksrv_queue_handled_event(q); 64 | } 65 | 66 | void rw_async_cb(int status, struct nfs_context *nfs, 67 | void *data, void *private_data) 68 | { 69 | nfs_cb_data_t *cb_data = (nfs_cb_data_t *)private_data; 70 | const struct ublksrv_queue *q = cb_data->q; 71 | struct nfs_queue_data *q_data = nfs_get_queue_data(q); 72 | 73 | if (status < 0) { 74 | fprintf(stderr, "pread/pwrite failed with \"%s\"\n", (char *)data); 75 | status = -EIO; 76 | } 77 | cb_data->count = status; 78 | 79 | pthread_spin_lock(&q_data->io_list_lock); 80 | cb_data->next = q_data->io_list; 81 | q_data->io_list = cb_data; 82 | pthread_spin_unlock(&q_data->io_list_lock); 83 | 84 | ublksrv_queue_send_event(q); 85 | } 86 | 87 | static int nfs_tgt_read(const struct ublksrv_queue *q, 88 | const struct ublksrv_io_desc *iod, int tag) 89 | { 90 | const struct ublksrv_dev *dev = q->dev; 91 | struct nfs_tgt_data *nfs_data = (struct nfs_tgt_data *)dev->tgt.tgt_data; 92 | struct nfs_queue_data *q_data = nfs_get_queue_data(q); 93 | nfs_cb_data_t *cb_data = &q_data->ios[tag]; 94 | 95 | if ((iod->nr_sectors + iod->start_sector) * 512 > nfs_data->capacity) { 96 | return -EINVAL; 97 | } 98 | 99 | if (nfs_pread_async(nfs_data->nfs, nfs_data->nfsfh, 100 | (void *)iod->addr, 101 | iod->nr_sectors * 512, iod->start_sector * 512, 102 | rw_async_cb, cb_data) < 0) { 103 | ublk_err("Failed to read from nfs file. %s\n", nfs_get_error(nfs_data->nfs)); 104 | return -ENOMEM; 105 | } 106 | 107 | return 0; 108 | } 109 | 110 | static int nfs_tgt_write(const struct ublksrv_queue *q, 111 | const struct ublksrv_io_desc *iod, int tag) 112 | { 113 | const struct ublksrv_dev *dev = q->dev; 114 | struct nfs_tgt_data *nfs_data = (struct nfs_tgt_data *)dev->tgt.tgt_data; 115 | struct nfs_queue_data *q_data = nfs_get_queue_data(q); 116 | nfs_cb_data_t *cb_data = &q_data->ios[tag]; 117 | 118 | if ((iod->nr_sectors + iod->start_sector) * 512 > nfs_data->capacity) { 119 | return -EINVAL; 120 | } 121 | 122 | if (nfs_pwrite_async(nfs_data->nfs, nfs_data->nfsfh, 123 | (void *)iod->addr, 124 | iod->nr_sectors * 512, iod->start_sector * 512, 125 | rw_async_cb, cb_data) < 0) { 126 | ublk_err("Failed to write to nfs file. %s\n", nfs_get_error(nfs_data->nfs)); 127 | return -ENOMEM; 128 | } 129 | 130 | return 0; 131 | } 132 | 133 | static int nfs_tgt_flush(const struct ublksrv_queue *q, 134 | const struct ublksrv_io_desc *iod, int tag) 135 | { 136 | const struct ublksrv_dev *dev = q->dev; 137 | struct nfs_tgt_data *nfs_data = (struct nfs_tgt_data *)dev->tgt.tgt_data; 138 | struct nfs_queue_data *q_data = nfs_get_queue_data(q); 139 | nfs_cb_data_t *cb_data = &q_data->ios[tag]; 140 | 141 | if (nfs_fsync_async(nfs_data->nfs, nfs_data->nfsfh, 142 | rw_async_cb, cb_data) < 0) { 143 | ublk_err("Failed to fsync nfs file. %s\n", nfs_get_error(nfs_data->nfs)); 144 | return -ENOMEM; 145 | } 146 | 147 | return 0; 148 | } 149 | 150 | static int nfs_handle_io_async(const struct ublksrv_queue *q, 151 | const struct ublk_io_data *data) 152 | { 153 | const struct ublksrv_io_desc *iod = data->iod; 154 | unsigned ublk_op = ublksrv_get_op(iod); 155 | int ret = -ENOTSUP; 156 | 157 | switch (ublk_op) { 158 | case UBLK_IO_OP_READ: 159 | ret = nfs_tgt_read(q, iod, data->tag); 160 | break; 161 | case UBLK_IO_OP_WRITE: 162 | ret = nfs_tgt_write(q, iod, data->tag); 163 | break; 164 | case UBLK_IO_OP_DISCARD: 165 | ublk_err("UBLK_IO_OP_DISCARD is not supported"); 166 | break; 167 | case UBLK_IO_OP_FLUSH: 168 | ret = nfs_tgt_flush(q, iod, data->tag); 169 | break; 170 | case UBLK_IO_OP_WRITE_ZEROES: 171 | ublk_err("UBLK_IO_OP_WRITE_ZEROS is not supported"); 172 | break; 173 | default: 174 | ret = -EINVAL; 175 | } 176 | 177 | if (ret) { 178 | ublksrv_complete_io(q, data->tag, ret); 179 | } 180 | return ret; 181 | } 182 | 183 | static struct nfs_tgt_data *nfs_init(const char *nfsurl) 184 | { 185 | struct nfs_tgt_data *nfs_data; 186 | struct nfs_url *url; 187 | struct nfs_stat_64 st; 188 | 189 | nfs_data = (struct nfs_tgt_data *)calloc(sizeof(struct nfs_tgt_data), 1); 190 | if (nfs_data == NULL) { 191 | ublk_err( "%s: failed to calloc tgt_data\n", __func__); 192 | return NULL; 193 | } 194 | 195 | strncpy(nfs_data->url, nfsurl, sizeof(nfs_data->url)); 196 | 197 | nfs_data->nfs = nfs_init_context(); 198 | if (nfs_data->nfs == NULL) { 199 | fprintf(stderr, "failed to init context\n"); 200 | goto fail_free; 201 | } 202 | 203 | url = nfs_parse_url_full(nfs_data->nfs, nfs_data->url); 204 | if (url == NULL) { 205 | fprintf(stderr, "%s\n", nfs_get_error(nfs_data->nfs)); 206 | goto fail_context; 207 | } 208 | 209 | if (nfs_mount(nfs_data->nfs, url->server, url->path) != 0) { 210 | fprintf(stderr, "Failed to mount nfs share : %s\n", 211 | nfs_get_error(nfs_data->nfs)); 212 | goto fail_url; 213 | } 214 | 215 | if (nfs_stat64(nfs_data->nfs, url->file, &st) < 0) { 216 | fprintf(stderr, "Failed to stat %s\n", url->file); 217 | goto fail_url; 218 | } 219 | nfs_data->capacity = st.nfs_size; 220 | 221 | if (nfs_open(nfs_data->nfs, url->file, O_RDWR, &nfs_data->nfsfh) != 0) { 222 | fprintf(stderr, "Failed to open nfs file : %s\n", 223 | nfs_get_error(nfs_data->nfs)); 224 | goto fail_url; 225 | } 226 | 227 | if (nfs_mt_service_thread_start(nfs_data->nfs)) { 228 | fprintf(stderr, "failed to start service thread\n"); 229 | goto fail_close; 230 | } 231 | 232 | nfs_destroy_url(url); 233 | return nfs_data; 234 | 235 | fail_close: 236 | nfs_close(nfs_data->nfs, nfs_data->nfsfh); 237 | fail_url: 238 | nfs_destroy_url(url); 239 | fail_context: 240 | nfs_destroy_context(nfs_data->nfs); 241 | fail_free: 242 | free(nfs_data); 243 | return NULL; 244 | } 245 | 246 | static void nfs_exit(struct nfs_tgt_data *nfs_data) 247 | { 248 | if (nfs_data) { 249 | nfs_close(nfs_data->nfs, nfs_data->nfsfh); 250 | nfs_mt_service_thread_stop(nfs_data->nfs); 251 | nfs_destroy_context(nfs_data->nfs); 252 | free(nfs_data); 253 | } 254 | } 255 | 256 | static int nfs_setup_tgt(struct ublksrv_dev *dev) 257 | { 258 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 259 | const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(cdev); 260 | struct ublksrv_tgt_info *tgt = &dev->tgt; 261 | struct ublk_params p; 262 | int ret; 263 | 264 | ret = ublk_json_read_params(&p, cdev); 265 | if (ret) { 266 | ublk_err( "%s: read ublk params failed %d\n", 267 | __func__, ret); 268 | return ret; 269 | } 270 | 271 | tgt->dev_size = p.basic.dev_sectors << 9; 272 | tgt->tgt_ring_depth = info->queue_depth; 273 | tgt->nr_fds = 0; 274 | 275 | return 0; 276 | } 277 | 278 | static int nfs_recover_tgt(struct ublksrv_dev *dev, int type) 279 | { 280 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 281 | char url[PATH_MAX]; 282 | struct nfs_tgt_data *nfs_data = NULL; 283 | int ret; 284 | 285 | ret = ublk_json_read_target_str_info(cdev, "url", url); 286 | if (ret < 0) { 287 | ublk_err( "%s: backing file can't be retrieved from jbuf %d\n", 288 | __func__, ret); 289 | return ret; 290 | } 291 | 292 | nfs_data = nfs_init(url); 293 | dev->tgt.tgt_data = nfs_data; 294 | if (dev->tgt.tgt_data == NULL) { 295 | fprintf(stderr, "Failed to initialize nfs\n"); 296 | return -ENOMEM; 297 | } 298 | 299 | return nfs_setup_tgt(dev); 300 | } 301 | 302 | static int nfs_init_tgt(struct ublksrv_dev *dev, int type, int argc, 303 | char *argv[]) 304 | { 305 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 306 | const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(cdev); 307 | struct ublksrv_tgt_base_json tgt_json = { 0 }; 308 | struct ublk_params p = { 309 | .types = UBLK_PARAM_TYPE_BASIC, 310 | .basic = { 311 | .attrs = UBLK_ATTR_VOLATILE_CACHE, 312 | .logical_bs_shift = 9, 313 | .physical_bs_shift = 12, 314 | .io_opt_shift = 12, 315 | .io_min_shift = 9, 316 | }, 317 | }; 318 | static const struct option lo_longopts[] = { 319 | { "nfs", 1, NULL, 1024 }, 320 | { NULL } 321 | }; 322 | int opt; 323 | struct nfs_tgt_data *nfs_data = NULL; 324 | const char *nfsurl = NULL; 325 | 326 | if (info->flags & UBLK_F_UNPRIVILEGED_DEV) 327 | return -1; 328 | 329 | if (ublksrv_is_recovering(cdev)) 330 | return nfs_recover_tgt(dev, 0); 331 | 332 | strcpy(tgt_json.name, "nfs"); 333 | 334 | while ((opt = getopt_long(argc, argv, "-:", 335 | lo_longopts, NULL)) != -1) { 336 | switch (opt) { 337 | case 1024: 338 | nfsurl = optarg; 339 | break; 340 | } 341 | } 342 | 343 | if (nfsurl == NULL) { 344 | fprintf(stderr, "Must specify --nfs=NFS_URL\n"); 345 | return -EINVAL; 346 | } 347 | 348 | nfs_data = nfs_init(nfsurl); 349 | dev->tgt.tgt_data = nfs_data; 350 | if (dev->tgt.tgt_data == NULL) { 351 | fprintf(stderr, "Failed to initialize nfs\n"); 352 | return -ENOMEM; 353 | } 354 | 355 | tgt_json.dev_size = nfs_data->capacity; 356 | p.basic.dev_sectors = nfs_data->capacity >> 9; 357 | 358 | ublk_json_write_dev_info(cdev); 359 | ublk_json_write_target_base(cdev, &tgt_json); 360 | ublk_json_write_tgt_str(cdev, "url", nfs_data->url); 361 | ublk_json_write_params(cdev, &p); 362 | 363 | return nfs_setup_tgt(dev); 364 | } 365 | 366 | static void nfs_deinit_tgt(const struct ublksrv_dev *dev) 367 | { 368 | struct nfs_tgt_data *nfs_data = (struct nfs_tgt_data *)dev->tgt.tgt_data; 369 | 370 | nfs_exit(nfs_data); 371 | } 372 | 373 | static int nfs_init_queue(const struct ublksrv_queue *q, 374 | void **queue_data_ptr) 375 | { 376 | struct nfs_queue_data *data = (struct nfs_queue_data *)calloc(sizeof(*data) + 377 | sizeof(data->ios[0]) * q->q_depth, 1); 378 | int i; 379 | 380 | if (!data) 381 | return -ENOMEM; 382 | 383 | pthread_spin_init(&data->io_list_lock, PTHREAD_PROCESS_PRIVATE); 384 | for (i = 0; i < q->q_depth; i++) 385 | data->ios[i].q = q; 386 | 387 | *queue_data_ptr = (void *)data; 388 | return 0; 389 | } 390 | 391 | static void nfs_deinit_queue(const struct ublksrv_queue *q) 392 | { 393 | struct nfs_queue_data *data = nfs_get_queue_data(q); 394 | 395 | free(data); 396 | } 397 | 398 | static void nfs_cmd_usage() 399 | { 400 | printf("\t--nfs NFS-URL\n"); 401 | } 402 | 403 | static const struct ublksrv_tgt_type nfs_tgt_type = { 404 | .handle_io_async = nfs_handle_io_async, 405 | .handle_event = nfs_handle_event, 406 | .usage_for_add = nfs_cmd_usage, 407 | .init_tgt = nfs_init_tgt, 408 | .deinit_tgt = nfs_deinit_tgt, 409 | .ublksrv_flags = UBLKSRV_F_NEED_EVENTFD, 410 | .name = "nfs", 411 | .init_queue = nfs_init_queue, 412 | .deinit_queue = nfs_deinit_queue, 413 | }; 414 | 415 | int main(int argc, char *argv[]) 416 | { 417 | return ublksrv_tgt_cmd_main(&nfs_tgt_type, argc, argv); 418 | } 419 | -------------------------------------------------------------------------------- /targets/ublk.null.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #include 4 | 5 | #include "ublksrv_tgt.h" 6 | 7 | #ifndef IORING_NOP_INJECT_RESULT 8 | #define IORING_NOP_INJECT_RESULT (1U << 0) 9 | #endif 10 | 11 | #ifndef IORING_NOP_FIXED_BUFFER 12 | #define IORING_NOP_FIXED_BUFFER (1U << 3) 13 | #endif 14 | 15 | static int null_recover_tgt(struct ublksrv_dev *dev, int type); 16 | 17 | static int null_setup_tgt(struct ublksrv_dev *dev) 18 | { 19 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 20 | const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(cdev); 21 | struct ublksrv_tgt_info *tgt = &dev->tgt; 22 | struct ublk_params p; 23 | int ret; 24 | 25 | ret = ublk_json_read_params(&p, cdev); 26 | if (ret) { 27 | ublk_err( "%s: read ublk params failed %d\n", 28 | __func__, ret); 29 | return ret; 30 | } 31 | 32 | tgt->dev_size = p.basic.dev_sectors << 9; 33 | tgt->tgt_ring_depth = info->queue_depth; 34 | if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) 35 | tgt->tgt_ring_depth *= 2; 36 | tgt->nr_fds = 0; 37 | ublksrv_tgt_set_io_data_size(tgt); 38 | 39 | return 0; 40 | } 41 | 42 | static int null_init_tgt(struct ublksrv_dev *dev, int type, int argc, 43 | char *argv[]) 44 | { 45 | const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); 46 | const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(cdev); 47 | struct ublksrv_tgt_base_json tgt_json = { 0 }; 48 | unsigned long long dev_size = 250UL * 1024 * 1024 * 1024; 49 | struct ublk_params p = { 50 | .types = UBLK_PARAM_TYPE_BASIC, 51 | .basic = { 52 | .attrs = UBLK_ATTR_VOLATILE_CACHE, 53 | .logical_bs_shift = 9, 54 | .physical_bs_shift = 12, 55 | .io_opt_shift = 12, 56 | .io_min_shift = 9, 57 | .max_sectors = info->max_io_buf_bytes >> 9, 58 | .dev_sectors = dev_size >> 9, 59 | }, 60 | }; 61 | 62 | if (info->flags & UBLK_F_UNPRIVILEGED_DEV) 63 | return -1; 64 | 65 | if (ublksrv_is_recovering(cdev)) 66 | return null_recover_tgt(dev, 0); 67 | 68 | strcpy(tgt_json.name, "null"); 69 | 70 | ublk_json_write_dev_info(cdev); 71 | ublk_json_write_target_base(cdev, &tgt_json); 72 | ublk_json_write_params(cdev, &p); 73 | tgt_json.dev_size = dev_size; 74 | 75 | return null_setup_tgt(dev); 76 | } 77 | 78 | static int null_recover_tgt(struct ublksrv_dev *dev, int type) 79 | { 80 | return null_setup_tgt(dev); 81 | } 82 | 83 | static int null_submit_io(const struct ublksrv_queue *q, 84 | const struct ublk_io_data *data, int tag) 85 | { 86 | unsigned ublk_op = ublksrv_get_op(data->iod); 87 | struct io_uring_sqe *sqe[3]; 88 | 89 | if (!ublksrv_tgt_queue_zc(q)) 90 | return 0; 91 | 92 | ublk_queue_alloc_sqes(q, sqe, 3); 93 | 94 | io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); 95 | sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_FIXED_FILE| IOSQE_IO_LINK; 96 | sqe[0]->user_data = build_user_data(tag, 97 | ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF), 98 | 0, 99 | 1); 100 | 101 | io_uring_prep_nop(sqe[1]); 102 | sqe[1]->buf_index = tag; 103 | sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_LINK; 104 | sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; 105 | sqe[1]->len = data->iod->nr_sectors << 9; /* injected result */ 106 | sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); 107 | 108 | io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); 109 | sqe[2]->flags |= IOSQE_FIXED_FILE; 110 | sqe[2]->user_data = build_user_data(tag, 111 | ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF), 112 | 0, 113 | 1); 114 | 115 | // buf register is marked as IOSQE_CQE_SKIP_SUCCESS 116 | return 2; 117 | } 118 | 119 | static co_io_job __null_handle_io_async(const struct ublksrv_queue *q, 120 | const struct ublk_io_data *data, int tag) 121 | { 122 | struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); 123 | int ret; 124 | 125 | again: 126 | ret = null_submit_io(q, data, tag); 127 | if (ret >= 0) { 128 | int io_res = 0; 129 | while (ret-- > 0) { 130 | int res; 131 | 132 | co_await__suspend_always(tag); 133 | res = ublksrv_tgt_process_cqe(io, &io_res); 134 | if (res < 0 && io_res >= 0) 135 | io_res = res; 136 | } 137 | if (io_res == -EAGAIN) 138 | goto again; 139 | ublksrv_complete_io(q, tag, io_res); 140 | } else { 141 | ublk_err( "fail to queue io %d, ret %d\n", tag, ret); 142 | } 143 | co_return; 144 | } 145 | 146 | static int null_handle_io_async(const struct ublksrv_queue *q, 147 | const struct ublk_io_data *data) 148 | { 149 | struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); 150 | 151 | if (ublksrv_tgt_queue_zc(q)) 152 | io->co = __null_handle_io_async(q, data, data->tag); 153 | else 154 | ublksrv_complete_io(q, data->tag, data->iod->nr_sectors << 9); 155 | 156 | return 0; 157 | } 158 | 159 | static void null_tgt_io_done(const struct ublksrv_queue *q, 160 | const struct ublk_io_data *data, 161 | const struct io_uring_cqe *cqe) 162 | { 163 | ublksrv_tgt_io_done(q, data, cqe); 164 | } 165 | 166 | static const struct ublksrv_tgt_type null_tgt_type = { 167 | .handle_io_async = null_handle_io_async, 168 | .tgt_io_done = null_tgt_io_done, 169 | .init_tgt = null_init_tgt, 170 | .name = "null", 171 | }; 172 | 173 | 174 | int main(int argc, char *argv[]) 175 | { 176 | return ublksrv_tgt_cmd_main(&null_tgt_type, argc, argv); 177 | } 178 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | # XXX Fix to use autotools TESTS = ... 4 | 5 | .PHONY: run clean 6 | 7 | run: 8 | $(srcdir)/run_test.sh ${T} ${R} ${D} 9 | 10 | CLEANFILES = *~ */*~ 11 | 12 | EXTRA_DIST = \ 13 | common/fio_common \ 14 | common/loop_common \ 15 | generic/001 \ 16 | generic/002 \ 17 | generic/003 \ 18 | loop/001 \ 19 | loop/002 \ 20 | loop/003 \ 21 | loop/004 \ 22 | loop/005 \ 23 | loop/006 \ 24 | loop/007 \ 25 | null/001 \ 26 | null/002 \ 27 | null/004 \ 28 | null/005 \ 29 | null/006 \ 30 | run_test.sh 31 | -------------------------------------------------------------------------------- /tests/common/fio_common: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | declare -A TEST_RUN 5 | 6 | declare -A FIO_TERSE_FIELDS 7 | FIO_TERSE_FIELDS=( 8 | # Read status 9 | ["read io"]=6 10 | ["read bandwidth"]=7 11 | ["read iops"]=8 12 | ["read runtime"]=9 13 | ["read slat min"]=10 14 | ["read slat max"]=11 15 | ["read slat mean"]=12 16 | ["read slat stdev"]=13 17 | ["read clat min"]=14 18 | ["read clat max"]=15 19 | ["read clat mean"]=16 20 | ["read clat stdev"]=17 21 | # read clat percentiles are 18-37 22 | ["read lat min"]=38 23 | ["read lat max"]=39 24 | ["read lat mean"]=40 25 | ["read lat stdev"]=41 26 | ["read bandwidth min"]=42 27 | ["read bandwidth max"]=43 28 | ["read bandwidth %"]=44 29 | ["read bandwidth mean"]=45 30 | ["read bandwidth stdev"]=46 31 | 32 | # Write status 33 | ["write io"]=47 34 | ["write bandwidth"]=48 35 | ["write iops"]=49 36 | ["write runtime"]=50 37 | ["write slat min"]=51 38 | ["write slat max"]=52 39 | ["write slat mean"]=53 40 | ["write slat stdev"]=54 41 | ["write clat min"]=55 42 | ["write clat max"]=56 43 | ["write clat mean"]=57 44 | ["write clat stdev"]=58 45 | # write clat percentiles are 59-78 46 | ["write lat min"]=79 47 | ["write lat max"]=80 48 | ["write lat mean"]=81 49 | ["write lat stdev"]=82 50 | ["write bandwidth min"]=83 51 | ["write bandwidth max"]=84 52 | ["write bandwidth %"]=85 53 | ["write bandwidth mean"]=86 54 | ["write bandwidth stdev"]=87 55 | 56 | # Trim status 57 | ["trim io"]=88 58 | ["trim bandwidth"]=89 59 | ["trim iops"]=90 60 | ["trim runtime"]=91 61 | ["trim slat min"]=92 62 | ["trim slat max"]=93 63 | ["trim slat mean"]=94 64 | ["trim slat stdev"]=95 65 | ["trim clat min"]=96 66 | ["trim clat max"]=97 67 | ["trim clat mean"]=98 68 | ["trim clat stdev"]=99 69 | # trim clat percentiles are 100-119 70 | ["trim lat min"]=120 71 | ["trim lat max"]=121 72 | ["trim lat mean"]=122 73 | ["trim lat stdev"]=123 74 | ["trim bandwidth min"]=124 75 | ["trim bandwidth max"]=125 76 | ["trim bandwidth %"]=126 77 | ["trim bandwidth mean"]=127 78 | ["trim bandwidth stdev"]=128 79 | 80 | # CPU usage 81 | ["user cpu"]=129 82 | ["system cpu"]=130 83 | ["context switches"]=131 84 | ["major page faults"]=132 85 | ["minor page faults"]=133 86 | 87 | # IO depth distribution 88 | ["io depth <=1"]=134 89 | ["io depth 2"]=135 90 | ["io depth 4"]=136 91 | ["io depth 8"]=137 92 | ["io depth 16"]=138 93 | ["io depth 32"]=139 94 | ["io depth >=64"]=140 95 | 96 | # IO latency distribution 97 | ["io latency <=2 us"]=141 98 | ["io latency 4 us"]=142 99 | ["io latency 10 us"]=143 100 | ["io latency 20 us"]=144 101 | ["io latency 50 us"]=145 102 | ["io latency 100 us"]=146 103 | ["io latency 250 us"]=147 104 | ["io latency 500 us"]=148 105 | ["io latency 750 us"]=149 106 | ["io latency 1000 us"]=150 107 | ["io latency <=2 ms"]=151 108 | ["io latency 4 ms"]=152 109 | ["io latency 10 ms"]=153 110 | ["io latency 20 ms"]=154 111 | ["io latency 50 ms"]=155 112 | ["io latency 100 ms"]=156 113 | ["io latency 250 ms"]=157 114 | ["io latency 500 ms"]=158 115 | ["io latency 750 ms"]=159 116 | ["io latency 1000 ms"]=160 117 | ["io latency 2000 ms"]=161 118 | ["io latency >=2000 ms"]=162 119 | 120 | # Disk utilization (11 fields per disk) 121 | ) 122 | 123 | FIO_OUTPUT="$TEST_DIR/.fio_perf" 124 | 125 | _fio_perf_report() { 126 | # If there is more than one group, we don't know what to report. 127 | if [[ $(wc -l < "$FIO_OUTPUT") -gt 1 ]]; then 128 | echo "_fio_perf: too many terse lines" >&2 129 | return 130 | fi 131 | 132 | local name field value 133 | for name in "${FIO_PERF_FIELDS[@]}"; do 134 | field="${FIO_TERSE_FIELDS["$name"]}" 135 | if [[ -z $field ]]; then 136 | echo "_fio_perf: unknown fio terse field '$name'" >&2 137 | continue 138 | fi 139 | value="$(cut -d ';' -f "$field" "$FIO_OUTPUT")" 140 | TEST_RUN["$FIO_PERF_PREFIX$name"]="$value" 141 | done 142 | } 143 | 144 | __run_fio_libaio() { 145 | DEVS=$1 146 | BS=$2 147 | RW=$3 148 | JOBS=$4 149 | RTIME=$5 150 | 151 | QD=128 152 | BATCH=16 153 | FIO=fio 154 | 155 | $FIO --output=$FIO_OUTPUT --output-format=terse --terse-version=4 --group_reporting=1 \ 156 | --bs=$BS --ioengine=libaio \ 157 | --iodepth=$QD \ 158 | --iodepth_batch_submit=$BATCH \ 159 | --iodepth_batch_complete_min=$BATCH \ 160 | --filename=$DEVS --gtod_reduce=1 \ 161 | --direct=1 --runtime=$RTIME --numjobs=$JOBS --rw=$RW \ 162 | --name=test > /dev/null 2>&1 163 | } 164 | 165 | __ublk_loop_backing_file() { 166 | eval $UBLK list > ${UBLK_TMP} 167 | file=`cat ${UBLK_TMP} | grep "loop" | awk '{print $2}' | awk -F "," '{print $1}' | awk -F ":" '{print $2}'` 168 | echo $file | xargs 169 | } 170 | 171 | __ublk_dev_id() { 172 | local dev=$1 173 | dev_id=`echo "$dev" | awk '{print substr($1, 11)}'` 174 | echo "$dev_id" 175 | } 176 | 177 | __ublk_get_pid() { 178 | local dev=$1 179 | local dev_id=`__ublk_dev_id $dev` 180 | 181 | eval $UBLK list -n $dev_id > ${UBLK_TMP} 182 | pid=`cat ${UBLK_TMP} | grep "pid" | awk '{print $7}'` 183 | echo $pid 184 | } 185 | 186 | __ublk_get_queue_tid() { 187 | local dev=$1 188 | local qid=$2 189 | local dev_id=`__ublk_dev_id $dev` 190 | 191 | eval $UBLK list -n ${dev_id} > ${UBLK_TMP} 192 | q_tid=`cat ${UBLK_TMP} | grep "queue ${qid}" | awk '{print $4}'` 193 | echo $q_tid 194 | } 195 | 196 | __ublk_get_dev_state() { 197 | local dev=$1 198 | local dev_id=`__ublk_dev_id $dev` 199 | 200 | eval $UBLK list -n ${dev_id} > ${UBLK_TMP} 201 | state=`cat ${UBLK_TMP} | grep "state" | awk '{print $9}'` 202 | echo $state 203 | } 204 | 205 | __run_fio_perf() { 206 | __run_fio_libaio $@ 207 | _fio_perf_report 208 | } 209 | 210 | __remove_ublk_dev_return() { 211 | local dev="$1" 212 | if [ "$dev" == "*" ]; then 213 | eval $UBLK del -a 214 | else 215 | dev_id=`__ublk_dev_id $dev` 216 | eval $UBLK del -n "$dev_id" 217 | fi 218 | RES=$? 219 | udevadm settle 220 | echo $RES 221 | } 222 | 223 | __remove_ublk_dev() { 224 | __remove_ublk_dev_return $@ > /dev/null 2>&1 225 | } 226 | 227 | __find_free_ublk_id() 228 | { 229 | for id in `seq 0 64`; do 230 | [ -c /dev/ublkc${id} ] && continue 231 | echo $id 232 | break 233 | done 234 | [ $id == "64" ] && echo "-" 235 | } 236 | 237 | __create_ublk_dev() 238 | { 239 | id=`__find_free_ublk_id` 240 | [ ${id} == "-" ] && echo "no free ublk device nodes" && exit -1 241 | eval $UBLK add ${T_TYPE_PARAMS} -n $id > /dev/null 2>&1 242 | udevadm settle 243 | if [ -b /dev/ublkb${id} ]; then 244 | echo "/dev/ublkb${id}" 245 | else 246 | echo "/dev/ublkb-unknown" 247 | fi 248 | } 249 | 250 | __recover_ublk_dev() 251 | { 252 | local dev=$1 253 | local dev_id=`__ublk_dev_id $dev` 254 | 255 | eval $UBLK recover -n $dev_id > /dev/null 2>&1 256 | RES=$? 257 | echo $RES 258 | } 259 | 260 | # kill the ublk daemon and return ublk device state 261 | __ublk_kill_daemon() 262 | { 263 | local dev=$1 264 | local exp_state=$2 265 | local secs=0 266 | local daemon_pid=`__ublk_get_pid $dev` 267 | local state=`__ublk_get_dev_state $dev 0` 268 | 269 | while [ $secs -lt 30 ] && [ "$state" != "$exp_state" ]; do 270 | kill -9 $daemon_pid > /dev/null 2>&1 271 | sleep 1 272 | state=`__ublk_get_dev_state $dev 0` 273 | let secs++ 274 | done 275 | echo $state 276 | } 277 | 278 | recover_ublk_dev_and_wait() 279 | { 280 | local dev=$1 281 | local secs=0 282 | local state="" 283 | 284 | while [ $secs -lt 15 ]; do 285 | __recover_ublk_dev $dev > /dev/null 2>&1 286 | state=`__ublk_get_dev_state $dev 0` 287 | [ "$state" == "LIVE" ] && break 288 | sleep 1 289 | let secs++ 290 | done 291 | echo $state 292 | } 293 | 294 | __get_cpu_utils() 295 | { 296 | local user_cpu=`echo ${TEST_RUN["user cpu"]} | awk -F "." '{print $1}'` 297 | local sys_cpu=`echo ${TEST_RUN["system cpu"]} | awk -F "." '{print $1}'` 298 | echo "cpu_util(${user_cpu}% ${sys_cpu}%)" 299 | } 300 | 301 | __run_dev_perf_no_create() 302 | { 303 | local TYPE=$1 304 | local JOBS=$2 305 | local DEV=$3 306 | local RT=$TRUNTIME 307 | local BS=4k 308 | local FIO_PERF_FIELDS=("read iops" "write iops" "user cpu" "system cpu") 309 | 310 | RW="randwrite" 311 | __run_fio_perf $DEV $BS $RW $JOBS 20 312 | cpu_util=`__get_cpu_utils` 313 | echo -e "\t$RW($BS): jobs $JOBS, iops ${TEST_RUN["write iops"]}, $cpu_util" 314 | 315 | RW="randread" 316 | __run_fio_perf $DEV $BS $RW $JOBS $RT 317 | cpu_util=`__get_cpu_utils` 318 | echo -e "\t$RW($BS): jobs $JOBS, iops ${TEST_RUN["read iops"]}, $cpu_util" 319 | 320 | RW="randrw" 321 | __run_fio_perf $DEV $BS $RW $JOBS $RT 322 | cpu_util=`__get_cpu_utils` 323 | echo -e "\t$RW($BS): jobs $JOBS, iops read ${TEST_RUN["read iops"]} write ${TEST_RUN["write iops"]}, $cpu_util" 324 | 325 | RW="rw" 326 | BS=64k 327 | __run_fio_perf $DEV $BS $RW $JOBS $RT 328 | cpu_util=`__get_cpu_utils` 329 | echo -e "\t$RW($BS): jobs $JOBS, iops read ${TEST_RUN["read iops"]} write ${TEST_RUN["write iops"]}, $cpu_util" 330 | 331 | RW="rw" 332 | BS=512k 333 | __run_fio_perf $DEV $BS $RW $JOBS $RT 334 | cpu_util=`__get_cpu_utils` 335 | echo -e "\t$RW($BS): jobs $JOBS, iops read ${TEST_RUN["read iops"]} write ${TEST_RUN["write iops"]}, $cpu_util" 336 | 337 | echo "" 338 | } 339 | 340 | __run_dev_perf() 341 | { 342 | JOBS=$1 343 | 344 | DEV=`__create_ublk_dev` 345 | 346 | echo -e "\tublk add ${T_TYPE_PARAMS}, fio: ($DEV libaio dio io jobs($JOBS))..." 347 | __run_dev_perf_no_create "ublk" $JOBS $DEV 348 | 349 | __remove_ublk_dev $DEV 350 | } 351 | 352 | _create_null_image() 353 | { 354 | echo "" 355 | } 356 | 357 | _create_image() 358 | { 359 | local type=$1 360 | 361 | shift 1 362 | 363 | eval _create_${type}_image $@ 364 | } 365 | 366 | _remove_null_image() 367 | { 368 | echo "nothing" > /dev/null 369 | } 370 | 371 | _remove_image() 372 | { 373 | local type=$1 374 | shift 1 375 | eval _remove_${type}_image $@ 376 | } 377 | -------------------------------------------------------------------------------- /tests/common/loop_common: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | export LO_IMG_SZ=1G 5 | 6 | _create_loop_image() { 7 | local type=$1 8 | local my_size=$2 9 | 10 | local my_file=`mktemp -p ${UBLK_TMP_DIR} ublk_loop_${type}_${my_size}_XXXXX` 11 | truncate -s ${my_size} ${my_file} 12 | 13 | my_size=`stat --printf="%s" ${my_file}` 14 | local my_count=`expr ${my_size} / 1048576` 15 | 16 | dd if=/dev/zero of=$my_file bs=1M count=${my_count} oflag=direct > /dev/null 2>&1 17 | echo $my_file 18 | } 19 | 20 | _remove_loop_image() { 21 | local file=$1 22 | 23 | [ -f "$file" ] && rm -f $file 24 | } 25 | 26 | __remove_kernel_loop_dev() { 27 | local DEV=$1 28 | losetup -d $DEV 29 | udevadm settle 30 | } 31 | 32 | __create_kernel_loop_dev() { 33 | local my_file=`echo $T_TYPE_PARAMS | awk '{print $(NF)}'` 34 | if eval $T_TYPE $T_TYPE_PARAMS > /dev/null 2>&1; then 35 | local my_dev=`losetup -l | grep "${my_file}" | awk '{print $1}'` 36 | udevadm settle 37 | echo $my_dev 38 | else 39 | echo "" 40 | fi 41 | } 42 | 43 | __run_loop_dev_perf() 44 | { 45 | local JOBS=$1 46 | local DEV=`__create_kernel_loop_dev` 47 | 48 | if [ "$DEV" == "" ]; then 49 | export T_TYPE_PARAMS=${T_TYPE_PARAMS//--direct-io=on/" "} 50 | DEV=`__create_kernel_loop_dev` 51 | fi 52 | 53 | echo -e "\t$T_TYPE $T_TYPE_PARAMS, fio: ($DEV libaio, dio, io jobs $JOBS)..." 54 | __run_dev_perf_no_create "loop" $JOBS $DEV 55 | 56 | __remove_kernel_loop_dev $DEV 57 | } 58 | -------------------------------------------------------------------------------- /tests/common/nbd_common: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | export NBDSRV=127.0.0.1 5 | export NBD_SIZE=2G 6 | export _NBDS_PID=${UBLK_TMP_DIR}/nbds_pid 7 | 8 | _kill_nbdkit() { 9 | if [ -f ${_NBDS_PID} ]; then 10 | local nbds_pid=`cat ${_NBDS_PID}` 11 | kill -9 ${nbds_pid} > /dev/null 2>&1 12 | local count=0 13 | while [ $count -lt 32 ]; do 14 | kill -0 ${nbds_pid} > /dev/null 2>&1 15 | [ $? -ne 0 ] && break 16 | let count++ 17 | sleep 1 18 | done 19 | fi 20 | } 21 | 22 | _create_nbd_image() { 23 | local type=$1 24 | local my_size=$2 25 | local my_file=`mktemp -p ${UBLK_TMP_DIR} ublk_nbd_${type}_${my_size}_XXXXX` 26 | 27 | which nbdkit > /dev/null 2>&1 28 | [ $? -ne 0 ] && echo "please install nbdkit package" && exit -1 29 | 30 | if [ $type == "none" ]; then 31 | nbdkit -P ${_NBDS_PID} memory $my_size 32 | else 33 | truncate -s ${my_size} ${my_file} 34 | 35 | my_size=`stat --printf="%s" ${my_file}` 36 | local my_count=`expr ${my_size} / 1048576` 37 | dd if=/dev/zero of=$my_file bs=1M count=${my_count} oflag=direct > /dev/null 2>&1 38 | 39 | nbdkit -P ${_NBDS_PID} file $my_file 40 | fi 41 | sleep 1 42 | echo $my_file 43 | } 44 | 45 | _remove_nbd_image() { 46 | local file=$1 47 | 48 | _kill_nbdkit 49 | 50 | [ -f ${_NBDS_PID} ] && rm -f ${_NBDS_PID} 51 | 52 | [ -f "$file" ] && rm -f $file 53 | } 54 | -------------------------------------------------------------------------------- /tests/debug/test_dev: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | # 4 | #usage: 5 | # export UBLK_DBG_DEV=/dev/vdc; make test T=debug/test_dev 6 | 7 | 8 | . common/fio_common 9 | 10 | echo "run io test on specified device" 11 | 12 | DEV=${UBLK_DBG_DEV} 13 | TYPE="debug" 14 | 15 | DEV_NAME=`basename $DEV` 16 | QUEUES=`ls /sys/block/${DEV_NAME}/mq | wc -l` 17 | IOSCHED=`cat /sys/block/${DEV_NAME}/queue/scheduler | sed -n 's/.*\[\(.*\)\].*/\1/p'` 18 | JOBS=1 19 | 20 | echo -e "\tfio ($DEV, libaio, dio, io_jobs: $JOBS hw queues:$QUEUES, io_sched: $IOSCHED)..." 21 | __run_dev_perf_no_create $TYPE $JOBS $DEV 22 | 23 | -------------------------------------------------------------------------------- /tests/debug/ublk_docker: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | get_user_id() 4 | { 5 | local user=$1 6 | local my_uid 7 | local my_gid 8 | 9 | if ! my_uid=`id -u $user`; then 10 | adduser --shell /bin/bash $user 11 | passwd -d $user 12 | fi 13 | 14 | my_gid=`id -g $user` 15 | echo "$my_uid:$my_gid" 16 | } 17 | 18 | remove_container() 19 | { 20 | local container=$1 21 | 22 | docker stop $container 23 | docker rm $container 24 | } 25 | 26 | add_container() 27 | { 28 | local container=$1 29 | docker run --name $container --hostname=server.example.com \ 30 | --device=/dev/ublk-control \ 31 | --device-cgroup-rule='a *:* rmw' \ 32 | --tmpfs /tmp --tmpfs /run --volume /sys/fs/cgroup:/sys/fs/cgroup:ro \ 33 | --detach -i fedora:38 34 | docker exec -i $container dnf install -y git libtool automake autoconf g++ liburing-devel 35 | docker exec -i $container git config --global http.version HTTP/1.1 36 | } 37 | 38 | install_ublk() 39 | { 40 | local container=$1 41 | 42 | docker exec -i $container git clone -b next https://github.com/ming1/ubdsrv.git 43 | docker exec -i -w /ubdsrv $container autoreconf -i 44 | docker exec -i -w /ubdsrv $container ./configure 45 | docker exec -i -w /ubdsrv $container make -j4 install 46 | } 47 | 48 | test_ublk() 49 | { 50 | local container=$1 51 | local ugid=$2 52 | local ublk_script=`mktemp` 53 | 54 | echo "#!/bin/bash" >> $ublk_script 55 | echo "ublk add -t null -n 10 --unprivileged" >> $ublk_script 56 | echo "ublk list" >> $ublk_script 57 | echo "sleep 2" >> $ublk_script 58 | echo "ls -l /dev/ublk[bc]10" >> $ublk_script 59 | echo "dd if=/dev/zero of=/dev/ublkb10 bs=1M count=64" >> $ublk_script 60 | echo "ublk del -n 10" >> $ublk_script 61 | echo "sleep 2" >> $ublk_script 62 | echo "ls -l /dev/ublk[bc]10" >> $ublk_script 63 | 64 | chmod +x $ublk_script 65 | chown $ugid $ublk_script 66 | 67 | docker cp -a $ublk_script $container:/test.sh 68 | docker exec -u $ugid -i $container bash -c "cat /test.sh" 69 | docker exec -u $ugid -i $container bash -c ". /test.sh" 70 | rm -f $ublk_script 71 | } 72 | 73 | ublk_clean() 74 | { 75 | local container=$1 76 | local this_log=$2 77 | 78 | cat $this_log 79 | remove_container $container > /dev/null 2>&1 80 | rm "$this_log" 81 | exit -1 82 | } 83 | 84 | C="ublk_docker" 85 | UG="" 86 | ULOG=`mktemp` 87 | 88 | if ! UG=`get_user_id ublk_docker`; then 89 | echo "no ublk_docker user for running test" 90 | exit -1 91 | fi 92 | 93 | echo "setup container" 94 | if ! add_container $C >> "$ULOG"; then 95 | echo "add container failed" 96 | ublk_clean $C "$ULOG" 97 | fi 98 | 99 | echo "install ublk from github" 100 | if ! install_ublk $C >> "$ULOG" 2>&1; then 101 | echo "install ublk failed" 102 | ublk_clean $C "$ULOG" 103 | fi 104 | 105 | echo "add/del ublk inside container by user ublk_docker($UG)" 106 | if ! test_ublk $C $UG; then 107 | echo "test ublk in container failed" 108 | ublk_clean $C "$ULOG" 109 | fi 110 | 111 | echo "remove container" 112 | remove_container $C >> "$ULOG" 113 | rm "$ULOG" 114 | -------------------------------------------------------------------------------- /tests/generic/001: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | echo -e "\trun fio with delete ublk-loop test" 8 | 9 | BS=4k 10 | RW=rw 11 | JOBS=4 12 | QUEUES=4 13 | RT=$TRUNTIME 14 | LOOPS=4 15 | URING_COMP=1 16 | NEED_GET_DATA=1 17 | 18 | ublk_run_delete_test() 19 | { 20 | local cnt=$1 21 | local file=$2 22 | 23 | export T_TYPE_PARAMS="-t loop -q $QUEUES -u $URING_COMP -g $NEED_GET_DATA -f $file" 24 | 25 | local dev=`__create_ublk_dev` 26 | echo -e "\trun fio on $dev(ublk add $T_TYPE_PARAMS) with deleting dev $cnt" 27 | __run_fio_libaio $dev $BS $RW $JOBS $RT > /dev/null 2 >& 1 & 28 | sleep 4 29 | RES=`__remove_ublk_dev_return $dev` 30 | if [ $RES -ne 0 ]; then 31 | echo -e "\tdelete $dev failed($RES)" 32 | ps -eLf | grep ublk 33 | (cd /sys/kernel/debug/block/`basename $dev` && find . -type f -exec grep -aH . {} \;) 34 | break 35 | fi 36 | wait 37 | } 38 | 39 | TESTF=`_create_loop_image "data" 1G` 40 | 41 | for NEED_GET_DATA in `seq 0 1`; do 42 | for CNT in `seq $LOOPS`; do 43 | ublk_run_delete_test $CNT $TESTF 44 | done 45 | done 46 | 47 | _remove_loop_image $TESTF 48 | -------------------------------------------------------------------------------- /tests/generic/002: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | echo -e "\trun fio with kill ubq_deamon" 7 | 8 | BS=4k 9 | RW=rw 10 | JOBS=4 11 | QUEUES=2 12 | RT=$TRUNTIME 13 | LOOPS=4 14 | URING_COMP=1 15 | NEED_GET_DATA=1 16 | 17 | ublk_run_abort_test() 18 | { 19 | local cnt=$1 20 | local state="" 21 | 22 | export T_TYPE_PARAMS="-t null -q $QUEUES -u $URING_COMP -g $NEED_GET_DATA" 23 | local dev=`__create_ublk_dev` 24 | 25 | echo -e "\trun fio with killing $dev(ublk add $T_TYPE_PARAMS) queue daemon $cnt" 26 | __run_fio_libaio $dev $BS $RW $JOBS $RT > /dev/null 2 >& 1 & 27 | sleep 2 28 | state=`__ublk_kill_daemon $dev "DEAD"` 29 | [ "$state" != "DEAD" ] && echo "device isn't dead($state) after killing daemon" && exit -1 30 | RES=`__remove_ublk_dev_return $dev` 31 | if [ $RES -ne 0 ]; then 32 | echo -e "\tdelete $dev failed" 33 | exit -1 34 | fi 35 | wait 36 | } 37 | 38 | for NEED_GET_DATA in `seq 0 1`; do 39 | for CNT in `seq $LOOPS`; do 40 | ublk_run_abort_test $CNT 41 | done 42 | done 43 | -------------------------------------------------------------------------------- /tests/generic/003: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | . common/nbd_common 7 | 8 | ublk_run_mount_test() 9 | { 10 | local dev=`__create_ublk_dev` 11 | local mnt=$1 12 | 13 | ROOT_FSTYPE=`findmnt -l -o FSTYPE -n /` 14 | echo -n -e "\tmount/umount $dev(ublk $T_TYPE_PARAMS) with fstype($ROOT_FSTYPE)..." 15 | 16 | 17 | wipefs -a $dev > /dev/null 2>&1 18 | eval mkfs.${ROOT_FSTYPE} $dev > /dev/null 2>&1 19 | 20 | mount $dev $mnt > /dev/null 2>&1 21 | 22 | UBLK_FSTYPE=`findmnt -l -o FSTYPE -n $mnt` 23 | if [ "$UBLK_FSTYPE" = "$ROOT_FSTYPE" ]; then 24 | echo "ok" 25 | else 26 | echo "failed" 27 | fi 28 | umount $mnt > /dev/null 2>&1 29 | 30 | __remove_ublk_dev $dev 31 | } 32 | 33 | ublk_run_mount_type() 34 | { 35 | local type=$1 36 | local mnt=$2 37 | local queues=1 38 | local dev=/dev/ublkb0 39 | 40 | [ ${type} == "loop" ] && queues=2 41 | 42 | file=`_create_image $type "null" 1G` 43 | 44 | for NEED_GET_DATA in `seq 0 1`; do 45 | if [ "$type" == "nbd" ]; then 46 | export T_TYPE_PARAMS="-t $type -q $queues -g $NEED_GET_DATA -d 127 --host $NBDSRV" 47 | else 48 | export T_TYPE_PARAMS="-t $type -q $queues -g $NEED_GET_DATA -f $file" 49 | fi 50 | ublk_run_mount_test $mnt 51 | done 52 | _remove_image ${type} $file 53 | } 54 | 55 | MNT=`mktemp -d` 56 | 57 | for TYPE in "loop" "nbd"; do 58 | ublk_run_mount_type $TYPE $MNT 59 | done 60 | 61 | rm -fr $MNT 62 | -------------------------------------------------------------------------------- /tests/generic/004: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | echo -e "\trun fio with kill ubq_deamon and delete(NOWAIT) ublk-loop test" 8 | 9 | BS=4k 10 | RW=rw 11 | JOBS=4 12 | QUEUES=4 13 | RT=$TRUNTIME 14 | LOOPS=4 15 | URING_COMP=1 16 | NEED_GET_DATA=1 17 | RECOVERY=0 18 | RECOVERY_REISSUE=0 19 | 20 | 21 | ublk_run_delete_test() 22 | { 23 | local cnt=$1 24 | export T_TYPE_PARAMS="-t loop -q $QUEUES -u $URING_COMP -g $NEED_GET_DATA -r $RECOVERY -i $RECOVERY_REISSUE -f $file" 25 | local dev=`__create_ublk_dev` 26 | echo -e "\trun fio on $dev(ublk add $T_TYPE_PARAMS) with deleting dev $cnt" 27 | 28 | __run_fio_libaio $dev $BS $RW $JOBS $RT > /dev/null 2 >& 1 & 29 | sleep 2 30 | queue_tid=`__ublk_get_queue_tid $dev 0` 31 | kill -9 $queue_tid 32 | RES=`__remove_ublk_dev_return $dev` 33 | if [ $RES -ne 0 ]; then 34 | echo -e "\tdelete $dev failed" 35 | ps -eLf | grep ublk 36 | (cd /sys/kernel/debug/block/`basename $dev` && find . -type f -exec grep -aH . {} \;) 37 | break 38 | fi 39 | wait 40 | } 41 | 42 | file=`_create_loop_image "data" 1G` 43 | 44 | RECOVERY=1 45 | for RECOVERY_REISSUE in `seq 0 1`; do 46 | for CNT in `seq $LOOPS`; do 47 | ublk_run_delete_test $CNT 48 | done 49 | done 50 | 51 | _remove_loop_image $file 52 | -------------------------------------------------------------------------------- /tests/generic/005: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | . common/nbd_common 7 | 8 | echo -e "\trun fio with dev recovery, type 1:" 9 | echo -e "\t(1)kill all ubq_deamon, (2)recover with new ubq_daemon, (3)delete dev" 10 | 11 | BS=4k 12 | RW=rw 13 | JOBS=4 14 | QUEUES=2 15 | RT=$TRUNTIME*2 16 | LOOPS=1 17 | URING_COMP=1 18 | NEED_GET_DATA=1 19 | RECOVERY=1 20 | RECOVERY_REISSUE=0 21 | 22 | ublk_run_recover_test() 23 | { 24 | local type=$1 25 | local file=$2 26 | local cnt=$3 27 | local state="" 28 | 29 | if [ "$type" == "null" ]; then 30 | local backing="" 31 | elif [ "$type" == "nbd" ]; then 32 | local backing="--host 127.0.0.1" 33 | else 34 | local backing="-f $file" 35 | fi 36 | 37 | export T_TYPE_PARAMS="-t $type -q $QUEUES -u $URING_COMP -g $NEED_GET_DATA -r $RECOVERY -i $RECOVERY_REISSUE $backing" 38 | local dev=`__create_ublk_dev` 39 | echo -e "\trun fio with killing $dev(ublk add $T_TYPE_PARAMS) queue daemon $cnt" 40 | 41 | __run_fio_libaio $dev $BS $RW $JOBS $RT > /dev/null 2 >& 1 & 42 | sleep 2 43 | 44 | state=`__ublk_kill_daemon $dev "QUIESCED"` 45 | if [ "$state" == "QUIESCED" ]; then 46 | state=`recover_ublk_dev_and_wait $dev` 47 | if [ "$state" != "LIVE" ]; then 48 | echo "device isn't recovered($state)" 49 | fi 50 | else 51 | echo "device isn't changed to quiesced state($state)" 52 | fi 53 | 54 | RES=`__remove_ublk_dev_return $dev` 55 | if [ $RES -ne 0 ]; then 56 | echo -e "\tdelete $dev failed($RES)" 57 | break 58 | fi 59 | wait 60 | } 61 | 62 | for TGT in $ALL_TGTS; do 63 | for RECOVERY_REISSUE in `seq 0 1`; do 64 | TESTF=`_create_image $TGT "null" 256M` 65 | for CNT in `seq $LOOPS`; do 66 | ublk_run_recover_test $TGT $TESTF $CNT 67 | done 68 | _remove_image $TGT $TESTF 69 | done 70 | done 71 | -------------------------------------------------------------------------------- /tests/generic/006: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | echo -e "\trun fio with dev recovery, type 2:" 7 | echo -e "\t(1)kill all ubq_deamon, (2)recover with new ubq_daemon, (3)kill all ubq_deamon, (4)delete dev" 8 | 9 | BS=4k 10 | RW=rw 11 | JOBS=4 12 | QUEUES=2 13 | RT=$TRUNTIME*2 14 | LOOPS=2 15 | URING_COMP=1 16 | NEED_GET_DATA=1 17 | RECOVERY=1 18 | RECOVERY_REISSUE=0 19 | 20 | ublk_run_recover_kill_test() 21 | { 22 | local state="" 23 | local cnt=$1 24 | 25 | export T_TYPE_PARAMS="-t null -q $QUEUES -u $URING_COMP -g $NEED_GET_DATA -r $RECOVERY -i $RECOVERY_REISSUE" 26 | local dev=`__create_ublk_dev` 27 | 28 | echo -e "\trun fio with killing $dev(ublk add $T_TYPE_PARAMS) queue daemon $cnt" 29 | __run_fio_libaio $dev $BS $RW $JOBS $RT > /dev/null 2 >& 1 & 30 | sleep 2 31 | 32 | state=`__ublk_kill_daemon $dev "QUIESCED"` 33 | if [ "$state" == "QUIESCED" ]; then 34 | state=`recover_ublk_dev_and_wait $dev` 35 | if [ "$state" != "LIVE" ]; then 36 | echo "device isn't recovered($state)" 37 | fi 38 | else 39 | echo "device isn't changed to quiesced state($state)" 40 | fi 41 | 42 | pid2=`__ublk_get_pid $dev` 43 | kill -9 $pid2 > /dev/null 2>&1 44 | sleep 2 45 | RES=`__remove_ublk_dev_return $dev` 46 | if [ $RES -ne 0 ]; then 47 | echo -e "\tdelete $dev failed($RES)" 48 | break 49 | fi 50 | wait 51 | } 52 | 53 | for RECOVERY_REISSUE in `seq 0 1`; do 54 | for CNT in `seq $LOOPS`; do 55 | ublk_run_recover_kill_test $CNT 56 | done 57 | done 58 | -------------------------------------------------------------------------------- /tests/generic/007: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | echo -e "\ttest nosrv (state after ublk server is killed) and recovery behavior" 7 | echo -e "\tfor all valid recovery options" 8 | echo 9 | 10 | DD_PID=0 11 | 12 | # submit an I/O async and store pid into DD_PID 13 | submit_io() 14 | { 15 | dd if=$1 of=/dev/null iflag=direct count=1 bs=4k 2>/dev/null & 16 | DD_PID=$! 17 | } 18 | 19 | # check the status of the I/O issued by DD_PID 20 | # 0 - I/O succeeded 21 | # 1 - I/O error 22 | # 2 - I/O queued 23 | check_io_status() 24 | { 25 | sleep 1 26 | # if process is still alive after 1 second, I/O is likely queued 27 | if ps -p $DD_PID > /dev/null 2>/dev/null; then 28 | return 2 29 | else 30 | if wait $DD_PID; then return 0; else return 1; fi 31 | fi 32 | } 33 | 34 | del_dev() 35 | { 36 | sleep 2 37 | RES=`__remove_ublk_dev_return $1` 38 | if [ $RES -ne 0 ]; then 39 | echo -e "\t\tdelete $1 failed" 40 | return 1 41 | fi 42 | wait 43 | sleep 3 44 | } 45 | 46 | ublk_run_recovery_test() 47 | { 48 | export T_TYPE_PARAMS="-t null -r $RECOVERY -i $RECOVERY_REISSUE -e $RECOVERY_FAIL_IO" 49 | echo -e "\trunning with params: $T_TYPE_PARAMS" 50 | DEV=`__create_ublk_dev` 51 | 52 | echo -e "\t\tcheck behavior before nosrv - expect no error" 53 | submit_io $DEV 54 | check_io_status 55 | RES=$? 56 | if [ $RES -ne 0 ]; then 57 | echo -e "\t\tI/O error while ublk server still up!" 58 | return 1 59 | fi 60 | 61 | pid1=`__ublk_get_pid $DEV` 62 | kill -9 $pid1 63 | sleep 2 64 | echo -ne "\t\tcheck behavior during nosrv - " 65 | submit_io $DEV 66 | check_io_status 67 | RES=$? 68 | if [ $RECOVERY_FAIL_IO -ne 0 ]; then 69 | echo "expect I/O error" 70 | if [ $RES -ne 1 ]; then 71 | echo -e "\t\tincorrect nosrv behavior!" 72 | echo -e "\t\texpected io error, got $RES" 73 | return 1 74 | fi 75 | elif [ $RECOVERY -ne 0 ]; then 76 | echo "expect I/O queued" 77 | if [ $RES -ne 2 ]; then 78 | echo -e "\t\tincorrect nosrv behavior!" 79 | echo -e "\t\texpected queued io, got $RES" 80 | return 1 81 | fi 82 | else 83 | echo "expect I/O error" # because device should be gone 84 | if [ $RES -ne 1 ]; then 85 | echo -e "\t\tincorrect nosrv behavior!" 86 | echo -e "\t\texpected io error, got $RES" 87 | return 1 88 | fi 89 | fi 90 | 91 | echo -e "\t\ttry to recover the device" 92 | secs=0 93 | while [ $secs -lt 10 ]; do 94 | RES=`__recover_ublk_dev $DEV` 95 | [ $RES -eq 0 ] && break 96 | sleep 1 97 | let secs++ 98 | done 99 | if [ $RES -ne 0 ]; then 100 | echo -e "\t\tfailed to recover device!" 101 | if [ $RECOVERY -ne 0 ]; then 102 | return 1 103 | else 104 | echo -e "\t\tforgiving expected recovery failure" 105 | del_dev $DEV 106 | echo 107 | return 0 108 | fi 109 | else 110 | if [ $RECOVERY -eq 0 ]; then 111 | echo -e "\t\trecovery unexpectedly succeeded!" 112 | return 1 113 | fi 114 | fi 115 | 116 | # if I/O queued before, make sure it completes now 117 | if [ $RECOVERY_FAIL_IO -eq 0 ] && [ $RECOVERY -ne 0 ]; then 118 | echo -e "\t\tchecking that I/O completed after recovery" 119 | check_io_status 120 | RES=$? 121 | if [ $RES -ne 0 ]; then 122 | echo -e "\t\tpreviously queued I/O did not succeed!" 123 | echo -e "\t\texpected success got $RES" 124 | return 1 125 | fi 126 | fi 127 | 128 | echo -e "\t\tcheck behavior after recovery - expect no error" 129 | submit_io $DEV 130 | check_io_status 131 | RES=$? 132 | if [ $RES -ne 0 ]; then 133 | echo -e "\t\tI/O error after recovery!" 134 | return 1 135 | fi 136 | 137 | # cleanup 138 | pid2=`__ublk_get_pid $DEV` 139 | kill -9 $pid2 140 | del_dev $DEV 141 | 142 | echo 143 | } 144 | 145 | RECOVERY=0 146 | RECOVERY_REISSUE=0 147 | RECOVERY_FAIL_IO=0 148 | ublk_run_recovery_test 149 | 150 | RECOVERY=1 151 | RECOVERY_REISSUE=0 152 | RECOVERY_FAIL_IO=0 153 | ublk_run_recovery_test 154 | 155 | RECOVERY=1 156 | RECOVERY_REISSUE=1 157 | RECOVERY_FAIL_IO=0 158 | ublk_run_recovery_test 159 | 160 | RECOVERY=1 161 | RECOVERY_REISSUE=0 162 | RECOVERY_FAIL_IO=1 163 | ublk_run_recovery_test 164 | -------------------------------------------------------------------------------- /tests/loop/001: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | export T_TYPE_PARAMS="-t loop -q 1 -f $file" 9 | 10 | __run_dev_perf 1 11 | 12 | _remove_loop_image $file 13 | -------------------------------------------------------------------------------- /tests/loop/002: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | export T_TYPE_PARAMS="-t loop -q 2 -f $file" 9 | 10 | __run_dev_perf 2 11 | 12 | _remove_loop_image $file 13 | -------------------------------------------------------------------------------- /tests/loop/003: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | 9 | export T_TYPE="losetup" 10 | 11 | # '-f $file' has to be the last one 12 | export T_TYPE_PARAMS="--direct-io=on -f $file" 13 | 14 | __run_loop_dev_perf 1 15 | 16 | _remove_loop_image $file 17 | -------------------------------------------------------------------------------- /tests/loop/004: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | export T_TYPE_PARAMS="-t loop -q 1 -u 1 -f $file" 9 | 10 | __run_dev_perf 1 11 | 12 | _remove_loop_image $file 13 | -------------------------------------------------------------------------------- /tests/loop/005: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | modprobe -r null_blk 8 | modprobe null_blk 9 | file="/dev/nullb0" 10 | export T_TYPE_PARAMS="-t loop -q 1 -f $file" 11 | 12 | # ublk-loop over block device 13 | __run_dev_perf 1 14 | 15 | modprobe -r null_blk 16 | -------------------------------------------------------------------------------- /tests/loop/006: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . common/fio_common 4 | . common/loop_common 5 | 6 | file=`_create_loop_image "data" $LO_IMG_SZ` 7 | export T_TYPE_PARAMS="-t loop -q 1 -g 1 -f $file" 8 | 9 | __run_dev_perf 1 10 | 11 | _remove_loop_image $file 12 | -------------------------------------------------------------------------------- /tests/loop/007: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | export T_TYPE_PARAMS="-t loop -q 1 --buffered_io -f $file" 9 | 10 | __run_dev_perf 1 11 | 12 | _remove_loop_image $file 13 | -------------------------------------------------------------------------------- /tests/loop/008: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | echo "run loop usercopy test" 8 | file=`_create_loop_image "data" $LO_IMG_SZ` 9 | export T_TYPE_PARAMS="-t loop -q 1 --usercopy -f $file" 10 | 11 | __run_dev_perf 1 12 | 13 | _remove_loop_image $file 14 | -------------------------------------------------------------------------------- /tests/loop/009: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | file=`_create_loop_image "data" $LO_IMG_SZ` 8 | export T_TYPE_PARAMS="-t loop -q 1 --buffered_io -f $file -z" 9 | 10 | __run_dev_perf 1 11 | 12 | _remove_loop_image $file 13 | -------------------------------------------------------------------------------- /tests/loop/010: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | . common/loop_common 6 | 7 | echo "run loop zerocopy test" 8 | file=`_create_loop_image "data" $LO_IMG_SZ` 9 | export T_TYPE_PARAMS="-t loop -q 1 -f $file -z" 10 | 11 | __run_dev_perf 1 12 | 13 | _remove_loop_image $file 14 | -------------------------------------------------------------------------------- /tests/nbd/001: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via nbd-client(nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | nbd_dev=/dev/nbd11 10 | 11 | modprobe nbd 12 | 13 | file=`_create_image "nbd" "none" $NBD_SIZE` 14 | 15 | nbd-client -d $nbd_dev 16 | nbd-client $NBDSRV $nbd_dev 17 | 18 | __run_dev_perf_no_create "nbd" 1 $nbd_dev 19 | 20 | nbd-client -d $nbd_dev 21 | _remove_image "nbd" $file 22 | -------------------------------------------------------------------------------- /tests/nbd/002: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via nbd-client(nbd server: $NBDSRV:nbdkit file $NBD_SIZE)" 8 | 9 | nbd_dev=/dev/nbd11 10 | 11 | modprobe nbd 12 | 13 | file=`_create_image "nbd" "data" $NBD_SIZE` 14 | 15 | nbd-client -d $nbd_dev 16 | nbd-client $NBDSRV $nbd_dev 17 | 18 | __run_dev_perf_no_create "nbd" 1 $nbd_dev 19 | 20 | nbd-client -d $nbd_dev 21 | _remove_image "nbd" $file 22 | -------------------------------------------------------------------------------- /tests/nbd/003: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via nbd-client(2 queues, 2 jobs) (nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | nbd_dev=/dev/nbd11 10 | 11 | modprobe nbd 12 | 13 | file=`_create_image "nbd" "none" $NBD_SIZE` 14 | 15 | nbd-client -d $nbd_dev 16 | nbd-client $NBDSRV $nbd_dev -C 2 17 | 18 | __run_dev_perf_no_create "nbd" 2 $nbd_dev 19 | 20 | nbd-client -d $nbd_dev 21 | _remove_image "nbd" $file 22 | -------------------------------------------------------------------------------- /tests/nbd/021: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "none" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 1 -d 127 --host $NBDSRV" 12 | __run_dev_perf 1 13 | 14 | _remove_image "nbd" $file 15 | 16 | -------------------------------------------------------------------------------- /tests/nbd/022: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(nbd server: $NBDSRV:nbdkit file $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "data" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 1 -d 127 --host $NBDSRV" 12 | __run_dev_perf 1 13 | 14 | _remove_image "nbd" $file 15 | -------------------------------------------------------------------------------- /tests/nbd/023: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(queues: 2, jobs: 2, nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "none" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 2 -d 127 --host $NBDSRV" 12 | __run_dev_perf 2 13 | 14 | _remove_image "nbd" $file 15 | -------------------------------------------------------------------------------- /tests/nbd/041: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(send_zc, nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "none" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 1 -d 127 --host $NBDSRV --send_zc" 12 | __run_dev_perf 1 13 | 14 | _remove_image "nbd" $file 15 | -------------------------------------------------------------------------------- /tests/nbd/042: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(with send_zc nbd server: $NBDSRV:nbdkit file $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "data" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 1 -d 127 --host $NBDSRV --send_zc" 12 | __run_dev_perf 1 13 | 14 | _remove_image "nbd" $file 15 | -------------------------------------------------------------------------------- /tests/nbd/043: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0 3 | 4 | . common/fio_common 5 | . common/nbd_common 6 | 7 | echo "run perf test via ublk-nbd(with send_zc, nbd server: $NBDSRV:nbdkit memory $NBD_SIZE)" 8 | 9 | file=`_create_image "nbd" "none" $NBD_SIZE` 10 | 11 | export T_TYPE_PARAMS="-t nbd -q 2 -d 127 --host $NBDSRV --send_zc" 12 | __run_dev_perf 2 13 | 14 | _remove_image "nbd" $file 15 | -------------------------------------------------------------------------------- /tests/null/001: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | export T_TYPE_PARAMS="-t null -q 1" 7 | 8 | __run_dev_perf 1 9 | -------------------------------------------------------------------------------- /tests/null/002: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | export T_TYPE_PARAMS="-t null -q 2" 7 | 8 | __run_dev_perf 2 9 | -------------------------------------------------------------------------------- /tests/null/004: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | export T_TYPE_PARAMS="-t null -q 1 -u 1" 7 | 8 | __run_dev_perf 1 9 | -------------------------------------------------------------------------------- /tests/null/005: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | export T_TYPE_PARAMS="-t null -q 2 -u 1" 7 | 8 | __run_dev_perf 2 9 | -------------------------------------------------------------------------------- /tests/null/006: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . common/fio_common 4 | 5 | export T_TYPE_PARAMS="-t null -q 1 -g 1" 6 | 7 | __run_dev_perf 1 8 | -------------------------------------------------------------------------------- /tests/null/007: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | . common/fio_common 5 | 6 | export T_TYPE_PARAMS="-t null -q 2 -z" 7 | 8 | __run_dev_perf 2 9 | -------------------------------------------------------------------------------- /tests/run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | DIR=$(cd "$(dirname "$0")";pwd) 5 | cd $DIR 6 | 7 | # try to populate 'lt-ublk.{target}' first because 'lt-' prefix for 8 | # 'ulbk.{target}' may be required 9 | for UBLKT in `ls $DIR/../ublk.*`; do 10 | $UBLKT help > /dev/null 2>&1 11 | done 12 | 13 | : ${UBLK:=${DIR}/../ublk} 14 | if ! command -v "${UBLK}" &> /dev/null; then 15 | echo "error: ublk command could not be found: ${UBLK}" 16 | exit -1 17 | fi 18 | 19 | export UBLK 20 | export TEST_DIR=$DIR 21 | export UBLK_TMP=`mktemp /tmp/ublk_tmp_XXXXX` 22 | 23 | [ ! -d ${UBLK_TMP_DIR} ] && mkdir ${UBLK_TMP_DIR} 24 | 25 | run_test() { 26 | TS=$1 27 | 28 | NAME=`basename $TS` 29 | TMP=`dirname $TS` 30 | GRP=`basename $TMP` 31 | 32 | echo "running $GRP/$NAME" | tee /dev/kmsg 33 | sh -c $TS & 34 | local TPID=$! 35 | local timeout=600 36 | local count=0 37 | while [ $count -lt $timeout ]; do 38 | sleep 1 39 | kill -0 $TPID > /dev/null 2>&1 40 | [ $? -ne 0 ] && break 41 | let count++ 42 | done 43 | [ $count -ge $timeout ] && echo "test $GRP/$NAME timeout" 44 | } 45 | 46 | run_test_grp() { 47 | local D=$1 48 | for ITEM in `ls ${D} | grep "^[0-9]" | grep -v "~$"`; do 49 | #echo $D/$ITEM 50 | run_test $D/$ITEM 51 | done 52 | } 53 | 54 | run_test_all() { 55 | local D=$1 56 | local GRPS="generic $ALL_TGTS" 57 | for G in $GRPS; do 58 | run_test_grp $D/$G 59 | done 60 | } 61 | 62 | display_usage() { 63 | echo 'usage:' 64 | echo ' run_test.sh ' 65 | } 66 | 67 | TEST=$1 68 | if [ -z "$TEST" ]; then 69 | echo 'error: no test specified' 70 | display_usage 71 | exit -1 72 | fi 73 | 74 | [ ! -c /dev/ublk-control ] && echo 'please run "modprobe ublk_drv" first' && exit -1 75 | 76 | TDIR=$3 77 | if [ -z "$TDIR" ]; then 78 | echo 'error: no temp dir specified' 79 | display_usage 80 | exit -1 81 | fi 82 | 83 | if [ "${TDIR:0:1}" != "/" ]; then 84 | TDIR=`dirname $PWD`/${TDIR} 85 | fi 86 | 87 | export ALL_TGTS="null loop nbd" 88 | export TRUNTIME=$2 89 | export UBLK_TMP_DIR=$TDIR 90 | export T_TYPE_PARAMS="" 91 | 92 | [ ! -d ${UBLK_TMP_DIR} ] && mkdir -p ${UBLK_TMP_DIR} 93 | 94 | _ITEMS=($(echo ${TEST} | tr ':' '\n')) 95 | for _ITEM in "${_ITEMS[@]}"; do 96 | if [ -d ${_ITEM} ]; then 97 | run_test_grp ${_ITEM} 98 | elif [ -f ${_ITEM} ]; then 99 | run_test ${_ITEM} 100 | elif [ `basename ${_ITEM}` = "all" ]; then 101 | run_test_all `dirname ${_ITEM}` 102 | else 103 | echo "error: test suite not found: ${_ITEM}" 104 | exit -1 105 | fi 106 | done 107 | 108 | rm -f ${UBLK_TMP} 109 | -------------------------------------------------------------------------------- /ublksrv.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: @PACKAGE_NAME@ 7 | Version: @PACKAGE_VERSION@ 8 | Description: ublk server library and headers 9 | Requires: liburing 10 | Cflags: 11 | Libs: -lublksrv 12 | -------------------------------------------------------------------------------- /utils/genver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | GITDESC=$(git describe --dirty|sed -e 's/^v//' 2>/dev/null) 5 | 6 | if [ -z "$GITDESC" ]; then 7 | GITDESC="0.unknown" 8 | fi 9 | 10 | echo $GITDESC 11 | 12 | -------------------------------------------------------------------------------- /utils/ublk_chown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | MY_DIR=$(cd "$(dirname "$0")";pwd) 5 | ID=`${MY_DIR}/ublk_user_id $1` 6 | 7 | if [ "$2" == "add" ]; then 8 | if [ "${ID}" != "-1:-1" ]; then 9 | /usr/bin/chown $ID /dev/$1 > /dev/null 2>&1 10 | fi 11 | fi 12 | -------------------------------------------------------------------------------- /utils/ublk_chown_docker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # SPDX-License-Identifier: MIT or GPL-2.0-only 3 | 4 | ublk_docker_add() 5 | { 6 | local name=$1 7 | local maj=$2 8 | local min=$3 9 | local uid=$4 10 | local container=$5 11 | 12 | #echo "docker add $name" >> /tmp/udev_docker_udev.log 13 | if [[ "$name" == *"ublkc"* ]]; then 14 | docker exec -u 0 $container mknod /dev/$name c $maj $min 15 | docker exec -u 0 $container chown $uid /dev/$name 16 | docker exec -u 0 $container chmod 700 /dev/$name 17 | elif [[ "$name" == *"ublkb"* ]]; then 18 | docker exec -u 0 $container mknod /dev/$name b $maj $min 19 | docker exec -u 0 $container chown $uid /dev/$name 20 | docker exec -u 0 $container chmod 700 /dev/$name 21 | fi 22 | } 23 | 24 | ublk_docker_remove() 25 | { 26 | local name=$1 27 | local maj=$2 28 | local min=$3 29 | local uid=$4 30 | local container=$5 31 | 32 | #echo "docker remove $name" >> /tmp/udev_docker_udev.log 33 | if [[ "$name" == *"ublkc"* ]]; then 34 | docker exec -u 0 $container rm /dev/$name 35 | elif [[ "$name" == *"ublkb"* ]]; then 36 | docker exec -u 0 $container rm /dev/$name 37 | fi 38 | } 39 | 40 | MY_DIR=$(cd "$(dirname "$0")";pwd) 41 | DEV=$1 42 | ID=`${MY_DIR}/ublk_user_id $1` 43 | 44 | #echo $@ >> /tmp/udev_docker_udev.log 45 | 46 | #add ublk devices for interested containers 47 | if [ "$2" == "add" ]; then 48 | if CONTAINERS=`docker ps --format "{{.Names}}"`; then 49 | #echo $CONTAINERS >> /tmp/udev_docker_udev.log 50 | for C in $CONTAINERS; do 51 | if ps -ax | grep docker | grep $ID > /dev/null 2>&1; then 52 | ublk_docker_add $DEV $3 $4 $ID $C 53 | fi 54 | done 55 | fi 56 | elif [ "$2" == "remove" ]; then 57 | if CONTAINERS=`docker ps --format "{{.Names}}"`; then 58 | #echo $CONTAINERS >> /tmp/udev_docker_udev.log 59 | for C in $CONTAINERS; do 60 | ublk_docker_remove $DEV $3 $4 $ID $C 61 | done 62 | fi 63 | fi 64 | 65 | if [ "$2" == "add" ]; then 66 | if [ "${ID}" != "-1:-1" ]; then 67 | /usr/bin/chown $ID /dev/$1 > /dev/null 2>&1 68 | fi 69 | fi 70 | -------------------------------------------------------------------------------- /utils/ublk_dev.rules: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT or GPL-2.0-only 2 | # Udev rules for ublk devices. 3 | 4 | KERNEL=="ublk-control", MODE="0666", OPTIONS+="static_node=ublk-control" 5 | ACTION=="add",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown.sh %k 'add' '%M' '%m'" 6 | ACTION=="remove",KERNEL=="ublk[bc]*",RUN+="/usr/local/sbin/ublk_chown.sh %k 'remove' '%M' '%m'" 7 | -------------------------------------------------------------------------------- /utils/ublk_user_id.c: -------------------------------------------------------------------------------- 1 | // SPDX-License-Identifier: MIT or GPL-2.0-only 2 | 3 | #include 4 | #include 5 | #include "ublksrv.h" 6 | 7 | static int print_dev_owner_id(int number) 8 | { 9 | struct ublksrv_dev_data data = { 10 | .dev_id = number, 11 | }; 12 | struct ublksrv_ctrl_dev *dev = ublksrv_ctrl_init(&data); 13 | if (!dev) { 14 | fprintf(stderr, "can't init dev %d\n", data.dev_id); 15 | return -EOPNOTSUPP; 16 | } 17 | int ret = ublksrv_ctrl_get_info(dev); 18 | 19 | if (ret >= 0) { 20 | const struct ublksrv_ctrl_dev_info *dinfo = 21 | ublksrv_ctrl_get_dev_info(dev); 22 | 23 | if (dinfo->flags & UBLK_F_UNPRIVILEGED_DEV) 24 | printf("%d:%d\n", dinfo->owner_uid, dinfo->owner_gid); 25 | else 26 | printf("%d:%d\n", -1, -1); 27 | } else { 28 | printf("%d:%d\n", -1, -1); 29 | } 30 | 31 | ublksrv_ctrl_deinit(dev); 32 | 33 | return ret; 34 | } 35 | 36 | int main(int argc, char *argv[]) 37 | { 38 | long number; 39 | char *path; 40 | 41 | if (argc != 2) 42 | return -1; 43 | 44 | path = argv[1]; 45 | 46 | /* 47 | * ublkcN or ublkbN since it is called when 48 | * udev add event is received 49 | */ 50 | number = strtol(&path[5], NULL, 10); 51 | 52 | if (number >= 0) 53 | print_dev_owner_id(number); 54 | 55 | return 0; 56 | } 57 | --------------------------------------------------------------------------------