├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
├── benchmarks.md
└── io_uring_vs_epoll.png
└── io_uring_echo_server.c
/.gitignore:
--------------------------------------------------------------------------------
1 | .out
2 | /io_uring_echo_server
3 | /.vscode
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Hielke de Vries
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CCFLAGS ?= -Wall -O2 -D_GNU_SOURCE -luring
2 | all_targets = io_uring_echo_server
3 |
4 | .PHONY: liburing io_uring_echo_server
5 |
6 | all: $(all_targets)
7 |
8 | clean:
9 | rm -f $(all_targets)
10 |
11 | liburing:
12 | +$(MAKE) -C ./liburing
13 |
14 | io_uring_echo_server:
15 | $(CC) io_uring_echo_server.c -o ./io_uring_echo_server ${CCFLAGS}
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # io_uring bare minimum echo server
2 | * uses an event loop created with io_uring
3 | * uses liburing https://github.com/axboe/liburing
4 | * __Linux 5.7 or higher with IORING_FEAT_FAST_POLL and IORING_OP_PROVIDE_BUFFERS required__
5 |
6 |
7 | ## Install and run
8 | #### First install the latest liburing:
9 |
10 | `git clone https://github.com/axboe/liburing`
11 |
12 | `./configure`
13 |
14 | `make`
15 |
16 | `make install`
17 |
18 |
19 | #### Then install io_uring-echo-server:
20 |
21 | `git clone https://github.com/frevib/io_uring-echo-server.git`
22 |
23 | `make io_uring_echo_server`
24 |
25 | `./io_uring_echo_server [port_number]`
26 |
27 | ## compare with epoll echo server
28 | https://github.com/frevib/epoll-echo-server
29 |
30 |
31 | ## Benchmarks (without buffer selection)
32 | https://github.com/frevib/io_uring-echo-server/blob/io-uring-feat-fast-poll/benchmarks/benchmarks.md
33 |
34 |
35 |
36 | ## Versions
37 |
38 | ### v1.7
39 | * Use latest liburing from https://github.com/axboe/liburing
40 |
41 | ### v1.6
42 | * Use IORING_OP_PROVIDE_BUFFERS
43 |
44 | ### v1.5
45 | * Use IORING_FEAT_FAST_POLL, which increases performance a lot
46 | * Remove all polling, this is now handled by IORING_FEAT_FAST_POLL
47 |
48 | ### v1.4
49 | Fix bug that massively overstated the performance.
50 |
51 | ### v1.3
52 | Use pre-allocated `sqe->user_data` instead of dynamically allocating memory.
53 |
54 | ### v1.1
55 | Fix memory leak.
56 |
57 | ### v1.0
58 | Working release.
59 |
--------------------------------------------------------------------------------
/benchmarks/benchmarks.md:
--------------------------------------------------------------------------------
1 | # io_uring echo server benchmarks
2 |
3 | ## requirements to run the benchmarks
4 | * __Linux 5.6 or higher with IORING_FEAT_FAST_POLL required__ (available in https://git.kernel.dk/cgit/linux-block/?h=io_uring-task-poll). [here][kernel_compile] is how to compile a Linux kernel. Tip: clone Linux kernel from Github (much faster), then merge the io_uring-task-poll branch.
5 |
6 | ## programs under test
7 | * echo server using an event loop created with __io_uring__ : https://github.com/frevib/io_uring-echo-server/tree/io-uring-feat-fast-poll
8 | * echo server using an event loop created with __epoll__ : https://github.com/frevib/epoll-echo-server
9 |
10 | ## system specs for the benchmarks
11 | * Macbook pro 6-core, 32GB RAM, 2,6 GHz
12 | * Vmware Ubuntu 18.04, 8GB RAM, 6 vcores (3 physical cores)
13 | * Linux ubuntu 5.6.0-rc1+ x86_64 with IORING_FEAT_FAST_POLL
14 | * 2 vcores (one physical) isolated for the echo server with `isolcpus=0,1`.
15 | * Echo server is assigned a dedicated CPU with `taskset -cp 0 [pid]`
16 |
17 |
18 | ## benchmark tool
19 | * Rust echo bench: https://github.com/haraldh/rust_echo_bench
20 | * `cargo run --release -- --address "localhost:6666" --number [number of clients] --duration [duration in secs] --length [msg size]`
21 | * 2 runs for each combination of 128, 512 and 1000 bytes message size with 1, 50, 150, 300, 500 and 1000 clients
22 |
23 |
24 |
25 | ## benchmark results, requests/second
26 |
27 | **io_uring with IORING_FEAT_FAST_POLL**
28 |
29 | | clients | 1 | 50 | 150 | 300 | 500 | 1000 |
30 | |:----------:|:-----:|:------:|:------:|:------:|:------:|:------:|
31 | | 128 bytes | 13093 | 147078 | 190054 | 216637 | 211280 | 173343 |
32 | | 512 bytes | 13140 | 150444 | 193019 | 203360 | 194701 | 156880 |
33 | | 1000 bytes | 14024 | 140248 | 178638 | 200853 | 183845 | 143810 |
34 |
35 |
36 |
37 | **epoll**
38 |
39 | | clients | 1 | 50 | 150 | 300 | 500 | 1000 |
40 | |:--------------:|:-----:|:------:|:------:|:------:|:------:|:------:|
41 | | 128 bytes | 13177 | 139863 | 152561 | 145517 | 125402 | 108380 |
42 | | 512 bytes | 13190 | 135973 | 147153 | 142518 | 124584 | 107257 |
43 | | 1000 bytes | 13172 | 131773 | 142481 | 131748 | 123287 | 102474 |
44 |
45 |
46 | **scatter plot for the tables above**
47 |
48 |
49 |
50 |
51 | ## extra info
52 | * Testing with many more, > 2000 clients, causes both echo servers to crash.
53 | * When running many clients for a period of time, `io_uring_echo_server` becomes unresponsive in an uninterruptible sleep state. So for this echo server first the 128 bytes and 512 bytes benchmark is run sequentially, then the echo server is restarted and the 1000 bytes benchmark is run. I'm not sure what is happening here. There are no problems with the epoll echo server.
54 | * io_uring_echo_server needs a separate buffer per connection. Each buffer is indexed by it's file descriptor number, like `bufs[fd_number]`. So if you have many connections you could have a segfault when the fd_number is too high. There is a fix for that in progress: https://lore.kernel.org/io-uring/20200228203053.25023-1-axboe@kernel.dk/T/#u
55 | * the following script is used to run the benchmarks from the Rust echo bench directory:
56 |
57 |
58 | ```bash
59 | #!/bin/bash
60 | echo $(uname -a)
61 |
62 | if [ "$#" -ne 1 ]; then
63 | echo "Please give port where echo server is running: $0 [port]"
64 | exit
65 | fi
66 |
67 | PID=$(lsof -itcp:$1 | sed -n -e 2p | awk '{print $2}')
68 | taskset -cp 0 $PID
69 |
70 | for bytes in 1 128 512 1000
71 | do
72 | for connections in 1 50 150 300 500 1000
73 | do
74 | cargo run --release -- --address "localhost:$1" --number $connections --duration 60 --length $bytes
75 | sleep 4
76 | done
77 | done
78 |
79 | ```
80 |
81 |
82 |
83 |
84 | [kernel_compile]: https://www.cyberciti.biz/tips/compiling-linux-kernel-26.html
85 | [benchmark_plot]: io_uring_vs_epoll.png =250x
--------------------------------------------------------------------------------
/benchmarks/io_uring_vs_epoll.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frevib/io_uring-echo-server/dc00940baeec2ac577410dfa4d881d0852b01a08/benchmarks/io_uring_vs_epoll.png
--------------------------------------------------------------------------------
/io_uring_echo_server.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include "liburing.h"
13 |
14 | #define MAX_CONNECTIONS 4096
15 | #define BACKLOG 512
16 | #define MAX_MESSAGE_LEN 2048
17 | #define BUFFERS_COUNT MAX_CONNECTIONS
18 |
19 | void add_accept(struct io_uring *ring, int fd, struct sockaddr *client_addr, socklen_t *client_len, unsigned flags);
20 | void add_socket_read(struct io_uring *ring, int fd, unsigned gid, size_t size, unsigned flags);
21 | void add_socket_write(struct io_uring *ring, int fd, __u16 bid, size_t size, unsigned flags);
22 | void add_provide_buf(struct io_uring *ring, __u16 bid, unsigned gid);
23 |
24 | enum {
25 | ACCEPT,
26 | READ,
27 | WRITE,
28 | PROV_BUF,
29 | };
30 |
31 | typedef struct conn_info {
32 | __u32 fd;
33 | __u16 type;
34 | __u16 bid;
35 | } conn_info;
36 |
37 | char bufs[BUFFERS_COUNT][MAX_MESSAGE_LEN] = {0};
38 | int group_id = 1337;
39 |
40 | int main(int argc, char *argv[]) {
41 | if (argc < 2) {
42 | printf("Please give a port number: ./io_uring_echo_server [port]\n");
43 | exit(0);
44 | }
45 |
46 | // some variables we need
47 | int portno = strtol(argv[1], NULL, 10);
48 | struct sockaddr_in serv_addr, client_addr;
49 | socklen_t client_len = sizeof(client_addr);
50 |
51 | // setup socket
52 | int sock_listen_fd = socket(AF_INET, SOCK_STREAM, 0);
53 | const int val = 1;
54 | setsockopt(sock_listen_fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
55 |
56 | memset(&serv_addr, 0, sizeof(serv_addr));
57 | serv_addr.sin_family = AF_INET;
58 | serv_addr.sin_port = htons(portno);
59 | serv_addr.sin_addr.s_addr = INADDR_ANY;
60 |
61 | // bind and listen
62 | if (bind(sock_listen_fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
63 | perror("Error binding socket...\n");
64 | exit(1);
65 | }
66 | if (listen(sock_listen_fd, BACKLOG) < 0) {
67 | perror("Error listening on socket...\n");
68 | exit(1);
69 | }
70 | printf("io_uring echo server listening for connections on port: %d\n", portno);
71 |
72 | // initialize io_uring
73 | struct io_uring_params params;
74 | struct io_uring ring;
75 | memset(¶ms, 0, sizeof(params));
76 |
77 | if (io_uring_queue_init_params(2048, &ring, ¶ms) < 0) {
78 | perror("io_uring_init_failed...\n");
79 | exit(1);
80 | }
81 |
82 | // check if IORING_FEAT_FAST_POLL is supported
83 | if (!(params.features & IORING_FEAT_FAST_POLL)) {
84 | printf("IORING_FEAT_FAST_POLL not available in the kernel, quiting...\n");
85 | exit(0);
86 | }
87 |
88 | // check if buffer selection is supported
89 | struct io_uring_probe *probe;
90 | probe = io_uring_get_probe_ring(&ring);
91 | if (!probe || !io_uring_opcode_supported(probe, IORING_OP_PROVIDE_BUFFERS)) {
92 | printf("Buffer select not supported, skipping...\n");
93 | exit(0);
94 | }
95 |
96 | io_uring_free_probe(probe);
97 |
98 | // register buffers for buffer selection
99 | struct io_uring_sqe *sqe;
100 | struct io_uring_cqe *cqe;
101 |
102 | sqe = io_uring_get_sqe(&ring);
103 | io_uring_prep_provide_buffers(sqe, bufs, MAX_MESSAGE_LEN, BUFFERS_COUNT, group_id, 0);
104 |
105 | io_uring_submit(&ring);
106 | io_uring_wait_cqe(&ring, &cqe);
107 | if (cqe->res < 0) {
108 | printf("cqe->res = %d\n", cqe->res);
109 | exit(1);
110 | }
111 | io_uring_cqe_seen(&ring, cqe);
112 |
113 | // add first accept SQE to monitor for new incoming connections
114 | add_accept(&ring, sock_listen_fd, (struct sockaddr *)&client_addr, &client_len, 0);
115 |
116 | // start event loop
117 | while (1) {
118 | io_uring_submit_and_wait(&ring, 1);
119 | struct io_uring_cqe *cqe;
120 | unsigned head;
121 | unsigned count = 0;
122 |
123 | // go through all CQEs
124 | io_uring_for_each_cqe(&ring, head, cqe) {
125 | ++count;
126 | struct conn_info conn_i;
127 | memcpy(&conn_i, &cqe->user_data, sizeof(conn_i));
128 |
129 | int type = conn_i.type;
130 | if (cqe->res == -ENOBUFS) {
131 | fprintf(stdout, "bufs in automatic buffer selection empty, this should not happen...\n");
132 | fflush(stdout);
133 | exit(1);
134 | } else if (type == PROV_BUF) {
135 | if (cqe->res < 0) {
136 | printf("cqe->res = %d\n", cqe->res);
137 | exit(1);
138 | }
139 | } else if (type == ACCEPT) {
140 | int sock_conn_fd = cqe->res;
141 | // only read when there is no error, >= 0
142 | if (sock_conn_fd >= 0) {
143 | add_socket_read(&ring, sock_conn_fd, group_id, MAX_MESSAGE_LEN, IOSQE_BUFFER_SELECT);
144 | }
145 |
146 | // new connected client; read data from socket and re-add accept to monitor for new connections
147 | add_accept(&ring, sock_listen_fd, (struct sockaddr *)&client_addr, &client_len, 0);
148 | } else if (type == READ) {
149 | int bytes_read = cqe->res;
150 | int bid = cqe->flags >> 16;
151 | if (cqe->res <= 0) {
152 | // read failed, re-add the buffer
153 | add_provide_buf(&ring, bid, group_id);
154 | // connection closed or error
155 | close(conn_i.fd);
156 | } else {
157 | // bytes have been read into bufs, now add write to socket sqe
158 | add_socket_write(&ring, conn_i.fd, bid, bytes_read, 0);
159 | }
160 | } else if (type == WRITE) {
161 | // write has been completed, first re-add the buffer
162 | add_provide_buf(&ring, conn_i.bid, group_id);
163 | // add a new read for the existing connection
164 | add_socket_read(&ring, conn_i.fd, group_id, MAX_MESSAGE_LEN, IOSQE_BUFFER_SELECT);
165 | }
166 | }
167 |
168 | io_uring_cq_advance(&ring, count);
169 | }
170 | }
171 |
172 | void add_accept(struct io_uring *ring, int fd, struct sockaddr *client_addr, socklen_t *client_len, unsigned flags) {
173 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
174 | io_uring_prep_accept(sqe, fd, client_addr, client_len, 0);
175 | io_uring_sqe_set_flags(sqe, flags);
176 |
177 | conn_info conn_i = {
178 | .fd = fd,
179 | .type = ACCEPT,
180 | };
181 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i));
182 | }
183 |
184 | void add_socket_read(struct io_uring *ring, int fd, unsigned gid, size_t message_size, unsigned flags) {
185 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
186 | io_uring_prep_recv(sqe, fd, NULL, message_size, 0);
187 | io_uring_sqe_set_flags(sqe, flags);
188 | sqe->buf_group = gid;
189 |
190 | conn_info conn_i = {
191 | .fd = fd,
192 | .type = READ,
193 | };
194 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i));
195 | }
196 |
197 | void add_socket_write(struct io_uring *ring, int fd, __u16 bid, size_t message_size, unsigned flags) {
198 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
199 | io_uring_prep_send(sqe, fd, &bufs[bid], message_size, 0);
200 | io_uring_sqe_set_flags(sqe, flags);
201 |
202 | conn_info conn_i = {
203 | .fd = fd,
204 | .type = WRITE,
205 | .bid = bid,
206 | };
207 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i));
208 | }
209 |
210 | void add_provide_buf(struct io_uring *ring, __u16 bid, unsigned gid) {
211 | struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
212 | io_uring_prep_provide_buffers(sqe, bufs[bid], MAX_MESSAGE_LEN, 1, gid, bid);
213 |
214 | conn_info conn_i = {
215 | .fd = 0,
216 | .type = PROV_BUF,
217 | };
218 | memcpy(&sqe->user_data, &conn_i, sizeof(conn_i));
219 | }
220 |
--------------------------------------------------------------------------------