├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── benchmarks ├── benchmarks.md └── io_uring_vs_epoll.png └── io_uring_echo_server.c /.gitignore: -------------------------------------------------------------------------------- 1 | .out 2 | /io_uring_echo_server 3 | /.vscode 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hielke de Vries 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CCFLAGS ?= -Wall -O2 -D_GNU_SOURCE -luring 2 | all_targets = io_uring_echo_server 3 | 4 | .PHONY: liburing io_uring_echo_server 5 | 6 | all: $(all_targets) 7 | 8 | clean: 9 | rm -f $(all_targets) 10 | 11 | liburing: 12 | +$(MAKE) -C ./liburing 13 | 14 | io_uring_echo_server: 15 | $(CC) io_uring_echo_server.c -o ./io_uring_echo_server ${CCFLAGS} 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # io_uring bare minimum echo server 2 | * uses an event loop created with io_uring 3 | * uses liburing https://github.com/axboe/liburing 4 | * __Linux 5.7 or higher with IORING_FEAT_FAST_POLL and IORING_OP_PROVIDE_BUFFERS required__ 5 | 6 | 7 | ## Install and run 8 | #### First install the latest liburing: 9 | 10 | `git clone https://github.com/axboe/liburing` 11 | 12 | `./configure` 13 | 14 | `make` 15 | 16 | `make install` 17 | 18 | 19 | #### Then install io_uring-echo-server: 20 | 21 | `git clone https://github.com/frevib/io_uring-echo-server.git` 22 | 23 | `make io_uring_echo_server` 24 | 25 | `./io_uring_echo_server [port_number]` 26 | 27 | ## compare with epoll echo server 28 | https://github.com/frevib/epoll-echo-server 29 | 30 | 31 | ## Benchmarks (without buffer selection) 32 | https://github.com/frevib/io_uring-echo-server/blob/io-uring-feat-fast-poll/benchmarks/benchmarks.md 33 | 34 | 35 | 36 | ## Versions 37 | 38 | ### v1.7 39 | * Use latest liburing from https://github.com/axboe/liburing 40 | 41 | ### v1.6 42 | * Use IORING_OP_PROVIDE_BUFFERS 43 | 44 | ### v1.5 45 | * Use IORING_FEAT_FAST_POLL, which increases performance a lot 46 | * Remove all polling, this is now handled by IORING_FEAT_FAST_POLL 47 | 48 | ### v1.4 49 | Fix bug that massively overstated the performance. 50 | 51 | ### v1.3 52 | Use pre-allocated `sqe->user_data` instead of dynamically allocating memory. 53 | 54 | ### v1.1 55 | Fix memory leak. 56 | 57 | ### v1.0 58 | Working release. 59 | -------------------------------------------------------------------------------- /benchmarks/benchmarks.md: -------------------------------------------------------------------------------- 1 | # io_uring echo server benchmarks 2 | 3 | ## requirements to run the benchmarks 4 | * __Linux 5.6 or higher with IORING_FEAT_FAST_POLL required__ (available in https://git.kernel.dk/cgit/linux-block/?h=io_uring-task-poll). [here][kernel_compile] is how to compile a Linux kernel. Tip: clone Linux kernel from Github (much faster), then merge the io_uring-task-poll branch. 5 | 6 | ## programs under test 7 | * echo server using an event loop created with __io_uring__ : https://github.com/frevib/io_uring-echo-server/tree/io-uring-feat-fast-poll 8 | * echo server using an event loop created with __epoll__ : https://github.com/frevib/epoll-echo-server 9 | 10 | ## system specs for the benchmarks 11 | * Macbook pro 6-core, 32GB RAM, 2,6 GHz 12 | * Vmware Ubuntu 18.04, 8GB RAM, 6 vcores (3 physical cores) 13 | * Linux ubuntu 5.6.0-rc1+ x86_64 with IORING_FEAT_FAST_POLL 14 | * 2 vcores (one physical) isolated for the echo server with `isolcpus=0,1`. 15 | * Echo server is assigned a dedicated CPU with `taskset -cp 0 [pid]` 16 | 17 | 18 | ## benchmark tool 19 | * Rust echo bench: https://github.com/haraldh/rust_echo_bench 20 | * `cargo run --release -- --address "localhost:6666" --number [number of clients] --duration [duration in secs] --length [msg size]` 21 | * 2 runs for each combination of 128, 512 and 1000 bytes message size with 1, 50, 150, 300, 500 and 1000 clients 22 | 23 | 24 | 25 | ## benchmark results, requests/second 26 | 27 | **io_uring with IORING_FEAT_FAST_POLL** 28 | 29 | | clients | 1 | 50 | 150 | 300 | 500 | 1000 | 30 | |:----------:|:-----:|:------:|:------:|:------:|:------:|:------:| 31 | | 128 bytes | 13093 | 147078 | 190054 | 216637 | 211280 | 173343 | 32 | | 512 bytes | 13140 | 150444 | 193019 | 203360 | 194701 | 156880 | 33 | | 1000 bytes | 14024 | 140248 | 178638 | 200853 | 183845 | 143810 | 34 | 35 | 36 | 37 | **epoll** 38 | 39 | | clients | 1 | 50 | 150 | 300 | 500 | 1000 | 40 | |:--------------:|:-----:|:------:|:------:|:------:|:------:|:------:| 41 | | 128 bytes | 13177 | 139863 | 152561 | 145517 | 125402 | 108380 | 42 | | 512 bytes | 13190 | 135973 | 147153 | 142518 | 124584 | 107257 | 43 | | 1000 bytes | 13172 | 131773 | 142481 | 131748 | 123287 | 102474 | 44 | 45 | 46 | **scatter plot for the tables above** 47 | 48 | io_uring vs epoll benchmarks 49 | 50 | 51 | ## extra info 52 | * Testing with many more, > 2000 clients, causes both echo servers to crash. 53 | * When running many clients for a period of time, `io_uring_echo_server` becomes unresponsive in an uninterruptible sleep state. So for this echo server first the 128 bytes and 512 bytes benchmark is run sequentially, then the echo server is restarted and the 1000 bytes benchmark is run. I'm not sure what is happening here. There are no problems with the epoll echo server. 54 | * io_uring_echo_server needs a separate buffer per connection. Each buffer is indexed by it's file descriptor number, like `bufs[fd_number]`. So if you have many connections you could have a segfault when the fd_number is too high. There is a fix for that in progress: https://lore.kernel.org/io-uring/20200228203053.25023-1-axboe@kernel.dk/T/#u 55 | * the following script is used to run the benchmarks from the Rust echo bench directory: 56 | 57 | 58 | ```bash 59 | #!/bin/bash 60 | echo $(uname -a) 61 | 62 | if [ "$#" -ne 1 ]; then 63 | echo "Please give port where echo server is running: $0 [port]" 64 | exit 65 | fi 66 | 67 | PID=$(lsof -itcp:$1 | sed -n -e 2p | awk '{print $2}') 68 | taskset -cp 0 $PID 69 | 70 | for bytes in 1 128 512 1000 71 | do 72 | for connections in 1 50 150 300 500 1000 73 | do 74 | cargo run --release -- --address "localhost:$1" --number $connections --duration 60 --length $bytes 75 | sleep 4 76 | done 77 | done 78 | 79 | ``` 80 | 81 | 82 | 83 | 84 | [kernel_compile]: https://www.cyberciti.biz/tips/compiling-linux-kernel-26.html 85 | [benchmark_plot]: io_uring_vs_epoll.png =250x -------------------------------------------------------------------------------- /benchmarks/io_uring_vs_epoll.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frevib/io_uring-echo-server/dc00940baeec2ac577410dfa4d881d0852b01a08/benchmarks/io_uring_vs_epoll.png -------------------------------------------------------------------------------- /io_uring_echo_server.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "liburing.h" 13 | 14 | #define MAX_CONNECTIONS 4096 15 | #define BACKLOG 512 16 | #define MAX_MESSAGE_LEN 2048 17 | #define BUFFERS_COUNT MAX_CONNECTIONS 18 | 19 | void add_accept(struct io_uring *ring, int fd, struct sockaddr *client_addr, socklen_t *client_len, unsigned flags); 20 | void add_socket_read(struct io_uring *ring, int fd, unsigned gid, size_t size, unsigned flags); 21 | void add_socket_write(struct io_uring *ring, int fd, __u16 bid, size_t size, unsigned flags); 22 | void add_provide_buf(struct io_uring *ring, __u16 bid, unsigned gid); 23 | 24 | enum { 25 | ACCEPT, 26 | READ, 27 | WRITE, 28 | PROV_BUF, 29 | }; 30 | 31 | typedef struct conn_info { 32 | __u32 fd; 33 | __u16 type; 34 | __u16 bid; 35 | } conn_info; 36 | 37 | char bufs[BUFFERS_COUNT][MAX_MESSAGE_LEN] = {0}; 38 | int group_id = 1337; 39 | 40 | int main(int argc, char *argv[]) { 41 | if (argc < 2) { 42 | printf("Please give a port number: ./io_uring_echo_server [port]\n"); 43 | exit(0); 44 | } 45 | 46 | // some variables we need 47 | int portno = strtol(argv[1], NULL, 10); 48 | struct sockaddr_in serv_addr, client_addr; 49 | socklen_t client_len = sizeof(client_addr); 50 | 51 | // setup socket 52 | int sock_listen_fd = socket(AF_INET, SOCK_STREAM, 0); 53 | const int val = 1; 54 | setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); 55 | 56 | memset(&serv_addr, 0, sizeof(serv_addr)); 57 | serv_addr.sin_family = AF_INET; 58 | serv_addr.sin_port = htons(portno); 59 | serv_addr.sin_addr.s_addr = INADDR_ANY; 60 | 61 | // bind and listen 62 | if (bind(sock_listen_fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) { 63 | perror("Error binding socket...\n"); 64 | exit(1); 65 | } 66 | if (listen(sock_listen_fd, BACKLOG) < 0) { 67 | perror("Error listening on socket...\n"); 68 | exit(1); 69 | } 70 | printf("io_uring echo server listening for connections on port: %d\n", portno); 71 | 72 | // initialize io_uring 73 | struct io_uring_params params; 74 | struct io_uring ring; 75 | memset(¶ms, 0, sizeof(params)); 76 | 77 | if (io_uring_queue_init_params(2048, &ring, ¶ms) < 0) { 78 | perror("io_uring_init_failed...\n"); 79 | exit(1); 80 | } 81 | 82 | // check if IORING_FEAT_FAST_POLL is supported 83 | if (!(params.features & IORING_FEAT_FAST_POLL)) { 84 | printf("IORING_FEAT_FAST_POLL not available in the kernel, quiting...\n"); 85 | exit(0); 86 | } 87 | 88 | // check if buffer selection is supported 89 | struct io_uring_probe *probe; 90 | probe = io_uring_get_probe_ring(&ring); 91 | if (!probe || !io_uring_opcode_supported(probe, IORING_OP_PROVIDE_BUFFERS)) { 92 | printf("Buffer select not supported, skipping...\n"); 93 | exit(0); 94 | } 95 | 96 | io_uring_free_probe(probe); 97 | 98 | // register buffers for buffer selection 99 | struct io_uring_sqe *sqe; 100 | struct io_uring_cqe *cqe; 101 | 102 | sqe = io_uring_get_sqe(&ring); 103 | io_uring_prep_provide_buffers(sqe, bufs, MAX_MESSAGE_LEN, BUFFERS_COUNT, group_id, 0); 104 | 105 | io_uring_submit(&ring); 106 | io_uring_wait_cqe(&ring, &cqe); 107 | if (cqe->res < 0) { 108 | printf("cqe->res = %d\n", cqe->res); 109 | exit(1); 110 | } 111 | io_uring_cqe_seen(&ring, cqe); 112 | 113 | // add first accept SQE to monitor for new incoming connections 114 | add_accept(&ring, sock_listen_fd, (struct sockaddr *)&client_addr, &client_len, 0); 115 | 116 | // start event loop 117 | while (1) { 118 | io_uring_submit_and_wait(&ring, 1); 119 | struct io_uring_cqe *cqe; 120 | unsigned head; 121 | unsigned count = 0; 122 | 123 | // go through all CQEs 124 | io_uring_for_each_cqe(&ring, head, cqe) { 125 | ++count; 126 | struct conn_info conn_i; 127 | memcpy(&conn_i, &cqe->user_data, sizeof(conn_i)); 128 | 129 | int type = conn_i.type; 130 | if (cqe->res == -ENOBUFS) { 131 | fprintf(stdout, "bufs in automatic buffer selection empty, this should not happen...\n"); 132 | fflush(stdout); 133 | exit(1); 134 | } else if (type == PROV_BUF) { 135 | if (cqe->res < 0) { 136 | printf("cqe->res = %d\n", cqe->res); 137 | exit(1); 138 | } 139 | } else if (type == ACCEPT) { 140 | int sock_conn_fd = cqe->res; 141 | // only read when there is no error, >= 0 142 | if (sock_conn_fd >= 0) { 143 | add_socket_read(&ring, sock_conn_fd, group_id, MAX_MESSAGE_LEN, IOSQE_BUFFER_SELECT); 144 | } 145 | 146 | // new connected client; read data from socket and re-add accept to monitor for new connections 147 | add_accept(&ring, sock_listen_fd, (struct sockaddr *)&client_addr, &client_len, 0); 148 | } else if (type == READ) { 149 | int bytes_read = cqe->res; 150 | int bid = cqe->flags >> 16; 151 | if (cqe->res <= 0) { 152 | // read failed, re-add the buffer 153 | add_provide_buf(&ring, bid, group_id); 154 | // connection closed or error 155 | close(conn_i.fd); 156 | } else { 157 | // bytes have been read into bufs, now add write to socket sqe 158 | add_socket_write(&ring, conn_i.fd, bid, bytes_read, 0); 159 | } 160 | } else if (type == WRITE) { 161 | // write has been completed, first re-add the buffer 162 | add_provide_buf(&ring, conn_i.bid, group_id); 163 | // add a new read for the existing connection 164 | add_socket_read(&ring, conn_i.fd, group_id, MAX_MESSAGE_LEN, IOSQE_BUFFER_SELECT); 165 | } 166 | } 167 | 168 | io_uring_cq_advance(&ring, count); 169 | } 170 | } 171 | 172 | void add_accept(struct io_uring *ring, int fd, struct sockaddr *client_addr, socklen_t *client_len, unsigned flags) { 173 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring); 174 | io_uring_prep_accept(sqe, fd, client_addr, client_len, 0); 175 | io_uring_sqe_set_flags(sqe, flags); 176 | 177 | conn_info conn_i = { 178 | .fd = fd, 179 | .type = ACCEPT, 180 | }; 181 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); 182 | } 183 | 184 | void add_socket_read(struct io_uring *ring, int fd, unsigned gid, size_t message_size, unsigned flags) { 185 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring); 186 | io_uring_prep_recv(sqe, fd, NULL, message_size, 0); 187 | io_uring_sqe_set_flags(sqe, flags); 188 | sqe->buf_group = gid; 189 | 190 | conn_info conn_i = { 191 | .fd = fd, 192 | .type = READ, 193 | }; 194 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); 195 | } 196 | 197 | void add_socket_write(struct io_uring *ring, int fd, __u16 bid, size_t message_size, unsigned flags) { 198 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring); 199 | io_uring_prep_send(sqe, fd, &bufs[bid], message_size, 0); 200 | io_uring_sqe_set_flags(sqe, flags); 201 | 202 | conn_info conn_i = { 203 | .fd = fd, 204 | .type = WRITE, 205 | .bid = bid, 206 | }; 207 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); 208 | } 209 | 210 | void add_provide_buf(struct io_uring *ring, __u16 bid, unsigned gid) { 211 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring); 212 | io_uring_prep_provide_buffers(sqe, bufs[bid], MAX_MESSAGE_LEN, 1, gid, bid); 213 | 214 | conn_info conn_i = { 215 | .fd = 0, 216 | .type = PROV_BUF, 217 | }; 218 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i)); 219 | } 220 | --------------------------------------------------------------------------------