├── 01_basic-client-server ├── Makefile ├── client.c └── server.c ├── 02_read-write ├── Makefile ├── rdma-client.c ├── rdma-common.c ├── rdma-common.h └── rdma-server.c ├── 03_file-transfer ├── rdma-file-transfer │ ├── Makefile │ ├── client.c │ ├── common.c │ ├── common.h │ ├── messages.h │ └── server.c ├── results.txt └── sequence.txt ├── LICENSE.txt └── README.md /01_basic-client-server/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | CFLAGS := -Wall -g 4 | LDLIBS := ${LDLIBS} -lrdmacm -libverbs -lpthread 5 | 6 | APPS := server client 7 | 8 | all: ${APPS} 9 | 10 | 11 | clean: 12 | rm -f ${APPS} 13 | 14 | -------------------------------------------------------------------------------- /01_basic-client-server/client.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned non-zero)." ); } while (0) 9 | #define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned zero/null)."); } while (0) 10 | 11 | const int BUFFER_SIZE = 1024; 12 | const int TIMEOUT_IN_MS = 500; /* ms */ 13 | 14 | struct context { 15 | struct ibv_context *ctx; 16 | struct ibv_pd *pd; 17 | struct ibv_cq *cq; 18 | struct ibv_comp_channel *comp_channel; 19 | 20 | pthread_t cq_poller_thread; 21 | }; 22 | 23 | struct connection { 24 | struct rdma_cm_id *id; 25 | struct ibv_qp *qp; 26 | 27 | struct ibv_mr *recv_mr; 28 | struct ibv_mr *send_mr; 29 | 30 | char *recv_region; 31 | char *send_region; 32 | 33 | int num_completions; 34 | }; 35 | 36 | static void die(const char *reason); 37 | 38 | static void build_context(struct ibv_context *verbs); 39 | static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); 40 | static void * poll_cq(void *); 41 | static void post_receives(struct connection *conn); 42 | static void register_memory(struct connection *conn); 43 | 44 | static int on_addr_resolved(struct rdma_cm_id *id); 45 | static void on_completion(struct ibv_wc *wc); 46 | static int on_connection(void *context); 47 | static int on_disconnect(struct rdma_cm_id *id); 48 | static int on_event(struct rdma_cm_event *event); 49 | static int on_route_resolved(struct rdma_cm_id *id); 50 | 51 | static struct context *s_ctx = NULL; 52 | 53 | int main(int argc, char **argv) 54 | { 55 | struct addrinfo *addr; 56 | struct rdma_cm_event *event = NULL; 57 | struct rdma_cm_id *conn= NULL; 58 | struct rdma_event_channel *ec = NULL; 59 | 60 | if (argc != 3) 61 | die("usage: client "); 62 | 63 | TEST_NZ(getaddrinfo(argv[1], argv[2], NULL, &addr)); 64 | 65 | TEST_Z(ec = rdma_create_event_channel()); 66 | TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); 67 | TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); 68 | 69 | freeaddrinfo(addr); 70 | 71 | while (rdma_get_cm_event(ec, &event) == 0) { 72 | struct rdma_cm_event event_copy; 73 | 74 | memcpy(&event_copy, event, sizeof(*event)); 75 | rdma_ack_cm_event(event); 76 | 77 | if (on_event(&event_copy)) 78 | break; 79 | } 80 | 81 | rdma_destroy_event_channel(ec); 82 | 83 | return 0; 84 | } 85 | 86 | void die(const char *reason) 87 | { 88 | fprintf(stderr, "%s\n", reason); 89 | exit(EXIT_FAILURE); 90 | } 91 | 92 | void build_context(struct ibv_context *verbs) 93 | { 94 | if (s_ctx) { 95 | if (s_ctx->ctx != verbs) 96 | die("cannot handle events in more than one context."); 97 | 98 | return; 99 | } 100 | 101 | s_ctx = (struct context *)malloc(sizeof(struct context)); 102 | 103 | s_ctx->ctx = verbs; 104 | 105 | TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); 106 | TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); 107 | TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ 108 | TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); 109 | 110 | TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL)); 111 | } 112 | 113 | void build_qp_attr(struct ibv_qp_init_attr *qp_attr) 114 | { 115 | memset(qp_attr, 0, sizeof(*qp_attr)); 116 | 117 | qp_attr->send_cq = s_ctx->cq; 118 | qp_attr->recv_cq = s_ctx->cq; 119 | qp_attr->qp_type = IBV_QPT_RC; 120 | 121 | qp_attr->cap.max_send_wr = 10; 122 | qp_attr->cap.max_recv_wr = 10; 123 | qp_attr->cap.max_send_sge = 1; 124 | qp_attr->cap.max_recv_sge = 1; 125 | } 126 | 127 | void * poll_cq(void *ctx) 128 | { 129 | struct ibv_cq *cq; 130 | struct ibv_wc wc; 131 | 132 | while (1) { 133 | TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx)); 134 | ibv_ack_cq_events(cq, 1); 135 | TEST_NZ(ibv_req_notify_cq(cq, 0)); 136 | 137 | while (ibv_poll_cq(cq, 1, &wc)) 138 | on_completion(&wc); 139 | } 140 | 141 | return NULL; 142 | } 143 | 144 | void post_receives(struct connection *conn) 145 | { 146 | struct ibv_recv_wr wr, *bad_wr = NULL; 147 | struct ibv_sge sge; 148 | 149 | wr.wr_id = (uintptr_t)conn; 150 | wr.next = NULL; 151 | wr.sg_list = &sge; 152 | wr.num_sge = 1; 153 | 154 | sge.addr = (uintptr_t)conn->recv_region; 155 | sge.length = BUFFER_SIZE; 156 | sge.lkey = conn->recv_mr->lkey; 157 | 158 | TEST_NZ(ibv_post_recv(conn->qp, &wr, &bad_wr)); 159 | } 160 | 161 | void register_memory(struct connection *conn) 162 | { 163 | conn->send_region = malloc(BUFFER_SIZE); 164 | conn->recv_region = malloc(BUFFER_SIZE); 165 | 166 | TEST_Z(conn->send_mr = ibv_reg_mr( 167 | s_ctx->pd, 168 | conn->send_region, 169 | BUFFER_SIZE, 170 | 0)); 171 | 172 | TEST_Z(conn->recv_mr = ibv_reg_mr( 173 | s_ctx->pd, 174 | conn->recv_region, 175 | BUFFER_SIZE, 176 | IBV_ACCESS_LOCAL_WRITE)); 177 | } 178 | 179 | int on_addr_resolved(struct rdma_cm_id *id) 180 | { 181 | struct ibv_qp_init_attr qp_attr; 182 | struct connection *conn; 183 | 184 | printf("address resolved.\n"); 185 | 186 | build_context(id->verbs); 187 | build_qp_attr(&qp_attr); 188 | 189 | TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); 190 | 191 | id->context = conn = (struct connection *)malloc(sizeof(struct connection)); 192 | 193 | conn->id = id; 194 | conn->qp = id->qp; 195 | conn->num_completions = 0; 196 | 197 | register_memory(conn); 198 | post_receives(conn); 199 | 200 | TEST_NZ(rdma_resolve_route(id, TIMEOUT_IN_MS)); 201 | 202 | return 0; 203 | } 204 | 205 | void on_completion(struct ibv_wc *wc) 206 | { 207 | struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id; 208 | 209 | if (wc->status != IBV_WC_SUCCESS) 210 | die("on_completion: status is not IBV_WC_SUCCESS."); 211 | 212 | if (wc->opcode & IBV_WC_RECV) 213 | printf("received message: %s\n", conn->recv_region); 214 | else if (wc->opcode == IBV_WC_SEND) 215 | printf("send completed successfully.\n"); 216 | else 217 | die("on_completion: completion isn't a send or a receive."); 218 | 219 | if (++conn->num_completions == 2) 220 | rdma_disconnect(conn->id); 221 | } 222 | 223 | int on_connection(void *context) 224 | { 225 | struct connection *conn = (struct connection *)context; 226 | struct ibv_send_wr wr, *bad_wr = NULL; 227 | struct ibv_sge sge; 228 | 229 | snprintf(conn->send_region, BUFFER_SIZE, "message from active/client side with pid %d", getpid()); 230 | 231 | printf("connected. posting send...\n"); 232 | 233 | memset(&wr, 0, sizeof(wr)); 234 | 235 | wr.wr_id = (uintptr_t)conn; 236 | wr.opcode = IBV_WR_SEND; 237 | wr.sg_list = &sge; 238 | wr.num_sge = 1; 239 | wr.send_flags = IBV_SEND_SIGNALED; 240 | 241 | sge.addr = (uintptr_t)conn->send_region; 242 | sge.length = BUFFER_SIZE; 243 | sge.lkey = conn->send_mr->lkey; 244 | 245 | TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); 246 | 247 | return 0; 248 | } 249 | 250 | int on_disconnect(struct rdma_cm_id *id) 251 | { 252 | struct connection *conn = (struct connection *)id->context; 253 | 254 | printf("disconnected.\n"); 255 | 256 | rdma_destroy_qp(id); 257 | 258 | ibv_dereg_mr(conn->send_mr); 259 | ibv_dereg_mr(conn->recv_mr); 260 | 261 | free(conn->send_region); 262 | free(conn->recv_region); 263 | 264 | free(conn); 265 | 266 | rdma_destroy_id(id); 267 | 268 | return 1; /* exit event loop */ 269 | } 270 | 271 | int on_event(struct rdma_cm_event *event) 272 | { 273 | int r = 0; 274 | 275 | if (event->event == RDMA_CM_EVENT_ADDR_RESOLVED) 276 | r = on_addr_resolved(event->id); 277 | else if (event->event == RDMA_CM_EVENT_ROUTE_RESOLVED) 278 | r = on_route_resolved(event->id); 279 | else if (event->event == RDMA_CM_EVENT_ESTABLISHED) 280 | r = on_connection(event->id->context); 281 | else if (event->event == RDMA_CM_EVENT_DISCONNECTED) 282 | r = on_disconnect(event->id); 283 | else 284 | die("on_event: unknown event."); 285 | 286 | return r; 287 | } 288 | 289 | int on_route_resolved(struct rdma_cm_id *id) 290 | { 291 | struct rdma_conn_param cm_params; 292 | 293 | printf("route resolved.\n"); 294 | 295 | memset(&cm_params, 0, sizeof(cm_params)); 296 | TEST_NZ(rdma_connect(id, &cm_params)); 297 | 298 | return 0; 299 | } 300 | -------------------------------------------------------------------------------- /01_basic-client-server/server.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned non-zero)." ); } while (0) 8 | #define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned zero/null)."); } while (0) 9 | 10 | const int BUFFER_SIZE = 1024; 11 | 12 | struct context { 13 | struct ibv_context *ctx; 14 | struct ibv_pd *pd; 15 | struct ibv_cq *cq; 16 | struct ibv_comp_channel *comp_channel; 17 | 18 | pthread_t cq_poller_thread; 19 | }; 20 | 21 | struct connection { 22 | struct ibv_qp *qp; 23 | 24 | struct ibv_mr *recv_mr; 25 | struct ibv_mr *send_mr; 26 | 27 | char *recv_region; 28 | char *send_region; 29 | }; 30 | 31 | static void die(const char *reason); 32 | 33 | static void build_context(struct ibv_context *verbs); 34 | static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); 35 | static void * poll_cq(void *); 36 | static void post_receives(struct connection *conn); 37 | static void register_memory(struct connection *conn); 38 | 39 | static void on_completion(struct ibv_wc *wc); 40 | static int on_connect_request(struct rdma_cm_id *id); 41 | static int on_connection(void *context); 42 | static int on_disconnect(struct rdma_cm_id *id); 43 | static int on_event(struct rdma_cm_event *event); 44 | 45 | static struct context *s_ctx = NULL; 46 | 47 | int main(int argc, char **argv) 48 | { 49 | #if _USE_IPV6 50 | struct sockaddr_in6 addr; 51 | #else 52 | struct sockaddr_in addr; 53 | #endif 54 | struct rdma_cm_event *event = NULL; 55 | struct rdma_cm_id *listener = NULL; 56 | struct rdma_event_channel *ec = NULL; 57 | uint16_t port = 0; 58 | 59 | memset(&addr, 0, sizeof(addr)); 60 | #if _USE_IPV6 61 | addr.sin6_family = AF_INET6; 62 | #else 63 | addr.sin_family = AF_INET; 64 | #endif 65 | 66 | TEST_Z(ec = rdma_create_event_channel()); 67 | TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); 68 | TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr)); 69 | TEST_NZ(rdma_listen(listener, 10)); /* backlog=10 is arbitrary */ 70 | 71 | port = ntohs(rdma_get_src_port(listener)); 72 | 73 | printf("listening on port %d.\n", port); 74 | 75 | while (rdma_get_cm_event(ec, &event) == 0) { 76 | struct rdma_cm_event event_copy; 77 | 78 | memcpy(&event_copy, event, sizeof(*event)); 79 | rdma_ack_cm_event(event); 80 | 81 | if (on_event(&event_copy)) 82 | break; 83 | } 84 | 85 | rdma_destroy_id(listener); 86 | rdma_destroy_event_channel(ec); 87 | 88 | return 0; 89 | } 90 | 91 | void die(const char *reason) 92 | { 93 | fprintf(stderr, "%s\n", reason); 94 | exit(EXIT_FAILURE); 95 | } 96 | 97 | void build_context(struct ibv_context *verbs) 98 | { 99 | if (s_ctx) { 100 | if (s_ctx->ctx != verbs) 101 | die("cannot handle events in more than one context."); 102 | 103 | return; 104 | } 105 | 106 | s_ctx = (struct context *)malloc(sizeof(struct context)); 107 | 108 | s_ctx->ctx = verbs; 109 | 110 | TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); 111 | TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); 112 | TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ 113 | TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); 114 | 115 | TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL)); 116 | } 117 | 118 | void build_qp_attr(struct ibv_qp_init_attr *qp_attr) 119 | { 120 | memset(qp_attr, 0, sizeof(*qp_attr)); 121 | 122 | qp_attr->send_cq = s_ctx->cq; 123 | qp_attr->recv_cq = s_ctx->cq; 124 | qp_attr->qp_type = IBV_QPT_RC; 125 | 126 | qp_attr->cap.max_send_wr = 10; 127 | qp_attr->cap.max_recv_wr = 10; 128 | qp_attr->cap.max_send_sge = 1; 129 | qp_attr->cap.max_recv_sge = 1; 130 | } 131 | 132 | void * poll_cq(void *ctx) 133 | { 134 | struct ibv_cq *cq; 135 | struct ibv_wc wc; 136 | 137 | while (1) { 138 | TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx)); 139 | ibv_ack_cq_events(cq, 1); 140 | TEST_NZ(ibv_req_notify_cq(cq, 0)); 141 | 142 | while (ibv_poll_cq(cq, 1, &wc)) 143 | on_completion(&wc); 144 | } 145 | 146 | return NULL; 147 | } 148 | 149 | void post_receives(struct connection *conn) 150 | { 151 | struct ibv_recv_wr wr, *bad_wr = NULL; 152 | struct ibv_sge sge; 153 | 154 | wr.wr_id = (uintptr_t)conn; 155 | wr.next = NULL; 156 | wr.sg_list = &sge; 157 | wr.num_sge = 1; 158 | 159 | sge.addr = (uintptr_t)conn->recv_region; 160 | sge.length = BUFFER_SIZE; 161 | sge.lkey = conn->recv_mr->lkey; 162 | 163 | TEST_NZ(ibv_post_recv(conn->qp, &wr, &bad_wr)); 164 | } 165 | 166 | void register_memory(struct connection *conn) 167 | { 168 | conn->send_region = malloc(BUFFER_SIZE); 169 | conn->recv_region = malloc(BUFFER_SIZE); 170 | 171 | TEST_Z(conn->send_mr = ibv_reg_mr( 172 | s_ctx->pd, 173 | conn->send_region, 174 | BUFFER_SIZE, 175 | 0)); 176 | 177 | TEST_Z(conn->recv_mr = ibv_reg_mr( 178 | s_ctx->pd, 179 | conn->recv_region, 180 | BUFFER_SIZE, 181 | IBV_ACCESS_LOCAL_WRITE)); 182 | } 183 | 184 | void on_completion(struct ibv_wc *wc) 185 | { 186 | if (wc->status != IBV_WC_SUCCESS) 187 | die("on_completion: status is not IBV_WC_SUCCESS."); 188 | 189 | if (wc->opcode & IBV_WC_RECV) { 190 | struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id; 191 | 192 | printf("received message: %s\n", conn->recv_region); 193 | 194 | } else if (wc->opcode == IBV_WC_SEND) { 195 | printf("send completed successfully.\n"); 196 | } 197 | } 198 | 199 | int on_connect_request(struct rdma_cm_id *id) 200 | { 201 | struct ibv_qp_init_attr qp_attr; 202 | struct rdma_conn_param cm_params; 203 | struct connection *conn; 204 | 205 | printf("received connection request.\n"); 206 | 207 | build_context(id->verbs); 208 | build_qp_attr(&qp_attr); 209 | 210 | TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); 211 | 212 | id->context = conn = (struct connection *)malloc(sizeof(struct connection)); 213 | conn->qp = id->qp; 214 | 215 | register_memory(conn); 216 | post_receives(conn); 217 | 218 | memset(&cm_params, 0, sizeof(cm_params)); 219 | TEST_NZ(rdma_accept(id, &cm_params)); 220 | 221 | return 0; 222 | } 223 | 224 | int on_connection(void *context) 225 | { 226 | struct connection *conn = (struct connection *)context; 227 | struct ibv_send_wr wr, *bad_wr = NULL; 228 | struct ibv_sge sge; 229 | 230 | snprintf(conn->send_region, BUFFER_SIZE, "message from passive/server side with pid %d", getpid()); 231 | 232 | printf("connected. posting send...\n"); 233 | 234 | memset(&wr, 0, sizeof(wr)); 235 | 236 | wr.opcode = IBV_WR_SEND; 237 | wr.sg_list = &sge; 238 | wr.num_sge = 1; 239 | wr.send_flags = IBV_SEND_SIGNALED; 240 | 241 | sge.addr = (uintptr_t)conn->send_region; 242 | sge.length = BUFFER_SIZE; 243 | sge.lkey = conn->send_mr->lkey; 244 | 245 | TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); 246 | 247 | return 0; 248 | } 249 | 250 | int on_disconnect(struct rdma_cm_id *id) 251 | { 252 | struct connection *conn = (struct connection *)id->context; 253 | 254 | printf("peer disconnected.\n"); 255 | 256 | rdma_destroy_qp(id); 257 | 258 | ibv_dereg_mr(conn->send_mr); 259 | ibv_dereg_mr(conn->recv_mr); 260 | 261 | free(conn->send_region); 262 | free(conn->recv_region); 263 | 264 | free(conn); 265 | 266 | rdma_destroy_id(id); 267 | 268 | return 0; 269 | } 270 | 271 | int on_event(struct rdma_cm_event *event) 272 | { 273 | int r = 0; 274 | 275 | if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 276 | r = on_connect_request(event->id); 277 | else if (event->event == RDMA_CM_EVENT_ESTABLISHED) 278 | r = on_connection(event->id->context); 279 | else if (event->event == RDMA_CM_EVENT_DISCONNECTED) 280 | r = on_disconnect(event->id); 281 | else 282 | die("on_event: unknown event."); 283 | 284 | return r; 285 | } 286 | 287 | -------------------------------------------------------------------------------- /02_read-write/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | CFLAGS := -Wall -Werror -g 4 | LD := gcc 5 | LDLIBS := ${LDLIBS} -lrdmacm -libverbs -lpthread 6 | 7 | APPS := rdma-client rdma-server 8 | 9 | all: ${APPS} 10 | 11 | rdma-client: rdma-common.o rdma-client.o 12 | ${LD} -o $@ $^ ${LDLIBS} 13 | 14 | rdma-server: rdma-common.o rdma-server.o 15 | ${LD} -o $@ $^ ${LDLIBS} 16 | 17 | clean: 18 | rm -f *.o ${APPS} 19 | 20 | -------------------------------------------------------------------------------- /02_read-write/rdma-client.c: -------------------------------------------------------------------------------- 1 | #include "rdma-common.h" 2 | 3 | const int TIMEOUT_IN_MS = 500; /* ms */ 4 | 5 | static int on_addr_resolved(struct rdma_cm_id *id); 6 | static int on_connection(struct rdma_cm_id *id); 7 | static int on_disconnect(struct rdma_cm_id *id); 8 | static int on_event(struct rdma_cm_event *event); 9 | static int on_route_resolved(struct rdma_cm_id *id); 10 | static void usage(const char *argv0); 11 | 12 | int main(int argc, char **argv) 13 | { 14 | struct addrinfo *addr; 15 | struct rdma_cm_event *event = NULL; 16 | struct rdma_cm_id *conn= NULL; 17 | struct rdma_event_channel *ec = NULL; 18 | 19 | if (argc != 4) 20 | usage(argv[0]); 21 | 22 | if (strcmp(argv[1], "write") == 0) 23 | set_mode(M_WRITE); 24 | else if (strcmp(argv[1], "read") == 0) 25 | set_mode(M_READ); 26 | else 27 | usage(argv[0]); 28 | 29 | TEST_NZ(getaddrinfo(argv[2], argv[3], NULL, &addr)); 30 | 31 | TEST_Z(ec = rdma_create_event_channel()); 32 | TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); 33 | TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); 34 | 35 | freeaddrinfo(addr); 36 | 37 | while (rdma_get_cm_event(ec, &event) == 0) { 38 | struct rdma_cm_event event_copy; 39 | 40 | memcpy(&event_copy, event, sizeof(*event)); 41 | rdma_ack_cm_event(event); 42 | 43 | if (on_event(&event_copy)) 44 | break; 45 | } 46 | 47 | rdma_destroy_event_channel(ec); 48 | 49 | return 0; 50 | } 51 | 52 | int on_addr_resolved(struct rdma_cm_id *id) 53 | { 54 | printf("address resolved.\n"); 55 | 56 | build_connection(id); 57 | sprintf(get_local_message_region(id->context), "message from active/client side with pid %d", getpid()); 58 | TEST_NZ(rdma_resolve_route(id, TIMEOUT_IN_MS)); 59 | 60 | return 0; 61 | } 62 | 63 | int on_connection(struct rdma_cm_id *id) 64 | { 65 | on_connect(id->context); 66 | send_mr(id->context); 67 | 68 | return 0; 69 | } 70 | 71 | int on_disconnect(struct rdma_cm_id *id) 72 | { 73 | printf("disconnected.\n"); 74 | 75 | destroy_connection(id->context); 76 | return 1; /* exit event loop */ 77 | } 78 | 79 | int on_event(struct rdma_cm_event *event) 80 | { 81 | int r = 0; 82 | 83 | if (event->event == RDMA_CM_EVENT_ADDR_RESOLVED) 84 | r = on_addr_resolved(event->id); 85 | else if (event->event == RDMA_CM_EVENT_ROUTE_RESOLVED) 86 | r = on_route_resolved(event->id); 87 | else if (event->event == RDMA_CM_EVENT_ESTABLISHED) 88 | r = on_connection(event->id); 89 | else if (event->event == RDMA_CM_EVENT_DISCONNECTED) 90 | r = on_disconnect(event->id); 91 | else { 92 | fprintf(stderr, "on_event: %d\n", event->event); 93 | die("on_event: unknown event."); 94 | } 95 | 96 | return r; 97 | } 98 | 99 | int on_route_resolved(struct rdma_cm_id *id) 100 | { 101 | struct rdma_conn_param cm_params; 102 | 103 | printf("route resolved.\n"); 104 | build_params(&cm_params); 105 | TEST_NZ(rdma_connect(id, &cm_params)); 106 | 107 | return 0; 108 | } 109 | 110 | void usage(const char *argv0) 111 | { 112 | fprintf(stderr, "usage: %s \n mode = \"read\", \"write\"\n", argv0); 113 | exit(1); 114 | } 115 | -------------------------------------------------------------------------------- /02_read-write/rdma-common.c: -------------------------------------------------------------------------------- 1 | #include "rdma-common.h" 2 | 3 | static const int RDMA_BUFFER_SIZE = 1024; 4 | 5 | struct message { 6 | enum { 7 | MSG_MR, 8 | MSG_DONE 9 | } type; 10 | 11 | union { 12 | struct ibv_mr mr; 13 | } data; 14 | }; 15 | 16 | struct context { 17 | struct ibv_context *ctx; 18 | struct ibv_pd *pd; 19 | struct ibv_cq *cq; 20 | struct ibv_comp_channel *comp_channel; 21 | 22 | pthread_t cq_poller_thread; 23 | }; 24 | 25 | struct connection { 26 | struct rdma_cm_id *id; 27 | struct ibv_qp *qp; 28 | 29 | int connected; 30 | 31 | struct ibv_mr *recv_mr; 32 | struct ibv_mr *send_mr; 33 | struct ibv_mr *rdma_local_mr; 34 | struct ibv_mr *rdma_remote_mr; 35 | 36 | struct ibv_mr peer_mr; 37 | 38 | struct message *recv_msg; 39 | struct message *send_msg; 40 | 41 | char *rdma_local_region; 42 | char *rdma_remote_region; 43 | 44 | enum { 45 | SS_INIT, 46 | SS_MR_SENT, 47 | SS_RDMA_SENT, 48 | SS_DONE_SENT 49 | } send_state; 50 | 51 | enum { 52 | RS_INIT, 53 | RS_MR_RECV, 54 | RS_DONE_RECV 55 | } recv_state; 56 | }; 57 | 58 | static void build_context(struct ibv_context *verbs); 59 | static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); 60 | static char * get_peer_message_region(struct connection *conn); 61 | static void on_completion(struct ibv_wc *); 62 | static void * poll_cq(void *); 63 | static void post_receives(struct connection *conn); 64 | static void register_memory(struct connection *conn); 65 | static void send_message(struct connection *conn); 66 | 67 | static struct context *s_ctx = NULL; 68 | static enum mode s_mode = M_WRITE; 69 | 70 | void die(const char *reason) 71 | { 72 | fprintf(stderr, "%s\n", reason); 73 | exit(EXIT_FAILURE); 74 | } 75 | 76 | void build_connection(struct rdma_cm_id *id) 77 | { 78 | struct connection *conn; 79 | struct ibv_qp_init_attr qp_attr; 80 | 81 | build_context(id->verbs); 82 | build_qp_attr(&qp_attr); 83 | 84 | TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); 85 | 86 | id->context = conn = (struct connection *)malloc(sizeof(struct connection)); 87 | 88 | conn->id = id; 89 | conn->qp = id->qp; 90 | 91 | conn->send_state = SS_INIT; 92 | conn->recv_state = RS_INIT; 93 | 94 | conn->connected = 0; 95 | 96 | register_memory(conn); 97 | post_receives(conn); 98 | } 99 | 100 | void build_context(struct ibv_context *verbs) 101 | { 102 | if (s_ctx) { 103 | if (s_ctx->ctx != verbs) 104 | die("cannot handle events in more than one context."); 105 | 106 | return; 107 | } 108 | 109 | s_ctx = (struct context *)malloc(sizeof(struct context)); 110 | 111 | s_ctx->ctx = verbs; 112 | 113 | TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); 114 | TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); 115 | TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ 116 | TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); 117 | 118 | TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL)); 119 | } 120 | 121 | void build_params(struct rdma_conn_param *params) 122 | { 123 | memset(params, 0, sizeof(*params)); 124 | 125 | params->initiator_depth = params->responder_resources = 1; 126 | params->rnr_retry_count = 7; /* infinite retry */ 127 | } 128 | 129 | void build_qp_attr(struct ibv_qp_init_attr *qp_attr) 130 | { 131 | memset(qp_attr, 0, sizeof(*qp_attr)); 132 | 133 | qp_attr->send_cq = s_ctx->cq; 134 | qp_attr->recv_cq = s_ctx->cq; 135 | qp_attr->qp_type = IBV_QPT_RC; 136 | 137 | qp_attr->cap.max_send_wr = 10; 138 | qp_attr->cap.max_recv_wr = 10; 139 | qp_attr->cap.max_send_sge = 1; 140 | qp_attr->cap.max_recv_sge = 1; 141 | } 142 | 143 | void destroy_connection(void *context) 144 | { 145 | struct connection *conn = (struct connection *)context; 146 | 147 | rdma_destroy_qp(conn->id); 148 | 149 | ibv_dereg_mr(conn->send_mr); 150 | ibv_dereg_mr(conn->recv_mr); 151 | ibv_dereg_mr(conn->rdma_local_mr); 152 | ibv_dereg_mr(conn->rdma_remote_mr); 153 | 154 | free(conn->send_msg); 155 | free(conn->recv_msg); 156 | free(conn->rdma_local_region); 157 | free(conn->rdma_remote_region); 158 | 159 | rdma_destroy_id(conn->id); 160 | 161 | free(conn); 162 | } 163 | 164 | void * get_local_message_region(void *context) 165 | { 166 | if (s_mode == M_WRITE) 167 | return ((struct connection *)context)->rdma_local_region; 168 | else 169 | return ((struct connection *)context)->rdma_remote_region; 170 | } 171 | 172 | char * get_peer_message_region(struct connection *conn) 173 | { 174 | if (s_mode == M_WRITE) 175 | return conn->rdma_remote_region; 176 | else 177 | return conn->rdma_local_region; 178 | } 179 | 180 | void on_completion(struct ibv_wc *wc) 181 | { 182 | struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id; 183 | 184 | if (wc->status != IBV_WC_SUCCESS) 185 | die("on_completion: status is not IBV_WC_SUCCESS."); 186 | 187 | if (wc->opcode & IBV_WC_RECV) { 188 | conn->recv_state++; 189 | 190 | if (conn->recv_msg->type == MSG_MR) { 191 | memcpy(&conn->peer_mr, &conn->recv_msg->data.mr, sizeof(conn->peer_mr)); 192 | post_receives(conn); /* only rearm for MSG_MR */ 193 | 194 | if (conn->send_state == SS_INIT) /* received peer's MR before sending ours, so send ours back */ 195 | send_mr(conn); 196 | } 197 | 198 | } else { 199 | conn->send_state++; 200 | printf("send completed successfully.\n"); 201 | } 202 | 203 | if (conn->send_state == SS_MR_SENT && conn->recv_state == RS_MR_RECV) { 204 | struct ibv_send_wr wr, *bad_wr = NULL; 205 | struct ibv_sge sge; 206 | 207 | if (s_mode == M_WRITE) 208 | printf("received MSG_MR. writing message to remote memory...\n"); 209 | else 210 | printf("received MSG_MR. reading message from remote memory...\n"); 211 | 212 | memset(&wr, 0, sizeof(wr)); 213 | 214 | wr.wr_id = (uintptr_t)conn; 215 | wr.opcode = (s_mode == M_WRITE) ? IBV_WR_RDMA_WRITE : IBV_WR_RDMA_READ; 216 | wr.sg_list = &sge; 217 | wr.num_sge = 1; 218 | wr.send_flags = IBV_SEND_SIGNALED; 219 | wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr; 220 | wr.wr.rdma.rkey = conn->peer_mr.rkey; 221 | 222 | sge.addr = (uintptr_t)conn->rdma_local_region; 223 | sge.length = RDMA_BUFFER_SIZE; 224 | sge.lkey = conn->rdma_local_mr->lkey; 225 | 226 | TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); 227 | 228 | conn->send_msg->type = MSG_DONE; 229 | send_message(conn); 230 | 231 | } else if (conn->send_state == SS_DONE_SENT && conn->recv_state == RS_DONE_RECV) { 232 | printf("remote buffer: %s\n", get_peer_message_region(conn)); 233 | rdma_disconnect(conn->id); 234 | } 235 | } 236 | 237 | void on_connect(void *context) 238 | { 239 | ((struct connection *)context)->connected = 1; 240 | } 241 | 242 | void * poll_cq(void *ctx) 243 | { 244 | struct ibv_cq *cq; 245 | struct ibv_wc wc; 246 | 247 | while (1) { 248 | TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx)); 249 | ibv_ack_cq_events(cq, 1); 250 | TEST_NZ(ibv_req_notify_cq(cq, 0)); 251 | 252 | while (ibv_poll_cq(cq, 1, &wc)) 253 | on_completion(&wc); 254 | } 255 | 256 | return NULL; 257 | } 258 | 259 | void post_receives(struct connection *conn) 260 | { 261 | struct ibv_recv_wr wr, *bad_wr = NULL; 262 | struct ibv_sge sge; 263 | 264 | wr.wr_id = (uintptr_t)conn; 265 | wr.next = NULL; 266 | wr.sg_list = &sge; 267 | wr.num_sge = 1; 268 | 269 | sge.addr = (uintptr_t)conn->recv_msg; 270 | sge.length = sizeof(struct message); 271 | sge.lkey = conn->recv_mr->lkey; 272 | 273 | TEST_NZ(ibv_post_recv(conn->qp, &wr, &bad_wr)); 274 | } 275 | 276 | void register_memory(struct connection *conn) 277 | { 278 | conn->send_msg = malloc(sizeof(struct message)); 279 | conn->recv_msg = malloc(sizeof(struct message)); 280 | 281 | conn->rdma_local_region = malloc(RDMA_BUFFER_SIZE); 282 | conn->rdma_remote_region = malloc(RDMA_BUFFER_SIZE); 283 | 284 | TEST_Z(conn->send_mr = ibv_reg_mr( 285 | s_ctx->pd, 286 | conn->send_msg, 287 | sizeof(struct message), 288 | 0)); 289 | 290 | TEST_Z(conn->recv_mr = ibv_reg_mr( 291 | s_ctx->pd, 292 | conn->recv_msg, 293 | sizeof(struct message), 294 | IBV_ACCESS_LOCAL_WRITE)); 295 | 296 | TEST_Z(conn->rdma_local_mr = ibv_reg_mr( 297 | s_ctx->pd, 298 | conn->rdma_local_region, 299 | RDMA_BUFFER_SIZE, 300 | ((s_mode == M_WRITE) ? 0 : IBV_ACCESS_LOCAL_WRITE))); 301 | 302 | TEST_Z(conn->rdma_remote_mr = ibv_reg_mr( 303 | s_ctx->pd, 304 | conn->rdma_remote_region, 305 | RDMA_BUFFER_SIZE, 306 | ((s_mode == M_WRITE) ? (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE) : IBV_ACCESS_REMOTE_READ))); 307 | } 308 | 309 | void send_message(struct connection *conn) 310 | { 311 | struct ibv_send_wr wr, *bad_wr = NULL; 312 | struct ibv_sge sge; 313 | 314 | memset(&wr, 0, sizeof(wr)); 315 | 316 | wr.wr_id = (uintptr_t)conn; 317 | wr.opcode = IBV_WR_SEND; 318 | wr.sg_list = &sge; 319 | wr.num_sge = 1; 320 | wr.send_flags = IBV_SEND_SIGNALED; 321 | 322 | sge.addr = (uintptr_t)conn->send_msg; 323 | sge.length = sizeof(struct message); 324 | sge.lkey = conn->send_mr->lkey; 325 | 326 | while (!conn->connected); 327 | 328 | TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); 329 | } 330 | 331 | void send_mr(void *context) 332 | { 333 | struct connection *conn = (struct connection *)context; 334 | 335 | conn->send_msg->type = MSG_MR; 336 | memcpy(&conn->send_msg->data.mr, conn->rdma_remote_mr, sizeof(struct ibv_mr)); 337 | 338 | send_message(conn); 339 | } 340 | 341 | void set_mode(enum mode m) 342 | { 343 | s_mode = m; 344 | } 345 | -------------------------------------------------------------------------------- /02_read-write/rdma-common.h: -------------------------------------------------------------------------------- 1 | #ifndef RDMA_COMMON_H 2 | #define RDMA_COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define TEST_NZ(x) do { if ( (x)) die("error: " #x " failed (returned non-zero)." ); } while (0) 12 | #define TEST_Z(x) do { if (!(x)) die("error: " #x " failed (returned zero/null)."); } while (0) 13 | 14 | enum mode { 15 | M_WRITE, 16 | M_READ 17 | }; 18 | 19 | void die(const char *reason); 20 | 21 | void build_connection(struct rdma_cm_id *id); 22 | void build_params(struct rdma_conn_param *params); 23 | void destroy_connection(void *context); 24 | void * get_local_message_region(void *context); 25 | void on_connect(void *context); 26 | void send_mr(void *context); 27 | void set_mode(enum mode m); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /02_read-write/rdma-server.c: -------------------------------------------------------------------------------- 1 | #include "rdma-common.h" 2 | 3 | static int on_connect_request(struct rdma_cm_id *id); 4 | static int on_connection(struct rdma_cm_id *id); 5 | static int on_disconnect(struct rdma_cm_id *id); 6 | static int on_event(struct rdma_cm_event *event); 7 | static void usage(const char *argv0); 8 | 9 | int main(int argc, char **argv) 10 | { 11 | struct sockaddr_in6 addr; 12 | struct rdma_cm_event *event = NULL; 13 | struct rdma_cm_id *listener = NULL; 14 | struct rdma_event_channel *ec = NULL; 15 | uint16_t port = 0; 16 | 17 | if (argc != 2) 18 | usage(argv[0]); 19 | 20 | if (strcmp(argv[1], "write") == 0) 21 | set_mode(M_WRITE); 22 | else if (strcmp(argv[1], "read") == 0) 23 | set_mode(M_READ); 24 | else 25 | usage(argv[0]); 26 | 27 | memset(&addr, 0, sizeof(addr)); 28 | addr.sin6_family = AF_INET6; 29 | 30 | TEST_Z(ec = rdma_create_event_channel()); 31 | TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); 32 | TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr)); 33 | TEST_NZ(rdma_listen(listener, 10)); /* backlog=10 is arbitrary */ 34 | 35 | port = ntohs(rdma_get_src_port(listener)); 36 | 37 | printf("listening on port %d.\n", port); 38 | 39 | while (rdma_get_cm_event(ec, &event) == 0) { 40 | struct rdma_cm_event event_copy; 41 | 42 | memcpy(&event_copy, event, sizeof(*event)); 43 | rdma_ack_cm_event(event); 44 | 45 | if (on_event(&event_copy)) 46 | break; 47 | } 48 | 49 | rdma_destroy_id(listener); 50 | rdma_destroy_event_channel(ec); 51 | 52 | return 0; 53 | } 54 | 55 | int on_connect_request(struct rdma_cm_id *id) 56 | { 57 | struct rdma_conn_param cm_params; 58 | 59 | printf("received connection request.\n"); 60 | build_connection(id); 61 | build_params(&cm_params); 62 | sprintf(get_local_message_region(id->context), "message from passive/server side with pid %d", getpid()); 63 | TEST_NZ(rdma_accept(id, &cm_params)); 64 | 65 | return 0; 66 | } 67 | 68 | int on_connection(struct rdma_cm_id *id) 69 | { 70 | on_connect(id->context); 71 | 72 | return 0; 73 | } 74 | 75 | int on_disconnect(struct rdma_cm_id *id) 76 | { 77 | printf("peer disconnected.\n"); 78 | 79 | destroy_connection(id->context); 80 | return 0; 81 | } 82 | 83 | int on_event(struct rdma_cm_event *event) 84 | { 85 | int r = 0; 86 | 87 | if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) 88 | r = on_connect_request(event->id); 89 | else if (event->event == RDMA_CM_EVENT_ESTABLISHED) 90 | r = on_connection(event->id); 91 | else if (event->event == RDMA_CM_EVENT_DISCONNECTED) 92 | r = on_disconnect(event->id); 93 | else 94 | die("on_event: unknown event."); 95 | 96 | return r; 97 | } 98 | 99 | void usage(const char *argv0) 100 | { 101 | fprintf(stderr, "usage: %s \n mode = \"read\", \"write\"\n", argv0); 102 | exit(1); 103 | } 104 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | CFLAGS := -Wall -Werror -g 4 | LD := gcc 5 | LDLIBS := ${LDLIBS} -lrdmacm -libverbs -lpthread 6 | 7 | APPS := client server 8 | 9 | all: ${APPS} 10 | 11 | client: common.o client.o 12 | ${LD} -o $@ $^ ${LDLIBS} 13 | 14 | server: common.o server.o 15 | ${LD} -o $@ $^ ${LDLIBS} 16 | 17 | clean: 18 | rm -f *.o ${APPS} 19 | 20 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/client.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "common.h" 5 | #include "messages.h" 6 | 7 | struct client_context 8 | { 9 | char *buffer; 10 | struct ibv_mr *buffer_mr; 11 | 12 | struct message *msg; 13 | struct ibv_mr *msg_mr; 14 | 15 | uint64_t peer_addr; 16 | uint32_t peer_rkey; 17 | 18 | int fd; 19 | const char *file_name; 20 | }; 21 | 22 | static void write_remote(struct rdma_cm_id *id, uint32_t len) 23 | { 24 | struct client_context *ctx = (struct client_context *)id->context; 25 | 26 | struct ibv_send_wr wr, *bad_wr = NULL; 27 | struct ibv_sge sge; 28 | 29 | memset(&wr, 0, sizeof(wr)); 30 | 31 | wr.wr_id = (uintptr_t)id; 32 | wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; 33 | wr.send_flags = IBV_SEND_SIGNALED; 34 | wr.imm_data = htonl(len); 35 | wr.wr.rdma.remote_addr = ctx->peer_addr; 36 | wr.wr.rdma.rkey = ctx->peer_rkey; 37 | 38 | if (len) { 39 | wr.sg_list = &sge; 40 | wr.num_sge = 1; 41 | 42 | sge.addr = (uintptr_t)ctx->buffer; 43 | sge.length = len; 44 | sge.lkey = ctx->buffer_mr->lkey; 45 | } 46 | 47 | TEST_NZ(ibv_post_send(id->qp, &wr, &bad_wr)); 48 | } 49 | 50 | static void post_receive(struct rdma_cm_id *id) 51 | { 52 | struct client_context *ctx = (struct client_context *)id->context; 53 | 54 | struct ibv_recv_wr wr, *bad_wr = NULL; 55 | struct ibv_sge sge; 56 | 57 | memset(&wr, 0, sizeof(wr)); 58 | 59 | wr.wr_id = (uintptr_t)id; 60 | wr.sg_list = &sge; 61 | wr.num_sge = 1; 62 | 63 | sge.addr = (uintptr_t)ctx->msg; 64 | sge.length = sizeof(*ctx->msg); 65 | sge.lkey = ctx->msg_mr->lkey; 66 | 67 | TEST_NZ(ibv_post_recv(id->qp, &wr, &bad_wr)); 68 | } 69 | 70 | static void send_next_chunk(struct rdma_cm_id *id) 71 | { 72 | struct client_context *ctx = (struct client_context *)id->context; 73 | 74 | ssize_t size = 0; 75 | 76 | size = read(ctx->fd, ctx->buffer, BUFFER_SIZE); 77 | 78 | if (size == -1) 79 | rc_die("read() failed\n"); 80 | 81 | write_remote(id, size); 82 | } 83 | 84 | static void send_file_name(struct rdma_cm_id *id) 85 | { 86 | struct client_context *ctx = (struct client_context *)id->context; 87 | 88 | strcpy(ctx->buffer, ctx->file_name); 89 | 90 | write_remote(id, strlen(ctx->file_name) + 1); 91 | } 92 | 93 | static void on_pre_conn(struct rdma_cm_id *id) 94 | { 95 | struct client_context *ctx = (struct client_context *)id->context; 96 | 97 | posix_memalign((void **)&ctx->buffer, sysconf(_SC_PAGESIZE), BUFFER_SIZE); 98 | TEST_Z(ctx->buffer_mr = ibv_reg_mr(rc_get_pd(), ctx->buffer, BUFFER_SIZE, 0)); 99 | 100 | posix_memalign((void **)&ctx->msg, sysconf(_SC_PAGESIZE), sizeof(*ctx->msg)); 101 | TEST_Z(ctx->msg_mr = ibv_reg_mr(rc_get_pd(), ctx->msg, sizeof(*ctx->msg), IBV_ACCESS_LOCAL_WRITE)); 102 | 103 | post_receive(id); 104 | } 105 | 106 | static void on_completion(struct ibv_wc *wc) 107 | { 108 | struct rdma_cm_id *id = (struct rdma_cm_id *)(uintptr_t)(wc->wr_id); 109 | struct client_context *ctx = (struct client_context *)id->context; 110 | 111 | if (wc->opcode & IBV_WC_RECV) { 112 | if (ctx->msg->id == MSG_MR) { 113 | ctx->peer_addr = ctx->msg->data.mr.addr; 114 | ctx->peer_rkey = ctx->msg->data.mr.rkey; 115 | 116 | printf("received MR, sending file name\n"); 117 | send_file_name(id); 118 | } else if (ctx->msg->id == MSG_READY) { 119 | printf("received READY, sending chunk\n"); 120 | send_next_chunk(id); 121 | } else if (ctx->msg->id == MSG_DONE) { 122 | printf("received DONE, disconnecting\n"); 123 | rc_disconnect(id); 124 | return; 125 | } 126 | 127 | post_receive(id); 128 | } 129 | } 130 | 131 | int main(int argc, char **argv) 132 | { 133 | struct client_context ctx; 134 | 135 | if (argc != 3) { 136 | fprintf(stderr, "usage: %s \n", argv[0]); 137 | return 1; 138 | } 139 | 140 | ctx.file_name = basename(argv[2]); 141 | ctx.fd = open(argv[2], O_RDONLY); 142 | 143 | if (ctx.fd == -1) { 144 | fprintf(stderr, "unable to open input file \"%s\"\n", ctx.file_name); 145 | return 1; 146 | } 147 | 148 | rc_init( 149 | on_pre_conn, 150 | NULL, // on connect 151 | on_completion, 152 | NULL); // on disconnect 153 | 154 | rc_client_loop(argv[1], DEFAULT_PORT, &ctx); 155 | 156 | close(ctx.fd); 157 | 158 | return 0; 159 | } 160 | 161 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/common.c: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | const int TIMEOUT_IN_MS = 500; 4 | 5 | struct context { 6 | struct ibv_context *ctx; 7 | struct ibv_pd *pd; 8 | struct ibv_cq *cq; 9 | struct ibv_comp_channel *comp_channel; 10 | 11 | pthread_t cq_poller_thread; 12 | }; 13 | 14 | static struct context *s_ctx = NULL; 15 | static pre_conn_cb_fn s_on_pre_conn_cb = NULL; 16 | static connect_cb_fn s_on_connect_cb = NULL; 17 | static completion_cb_fn s_on_completion_cb = NULL; 18 | static disconnect_cb_fn s_on_disconnect_cb = NULL; 19 | 20 | static void build_context(struct ibv_context *verbs); 21 | static void build_qp_attr(struct ibv_qp_init_attr *qp_attr); 22 | static void event_loop(struct rdma_event_channel *ec, int exit_on_disconnect); 23 | static void * poll_cq(void *); 24 | 25 | void build_connection(struct rdma_cm_id *id) 26 | { 27 | struct ibv_qp_init_attr qp_attr; 28 | 29 | build_context(id->verbs); 30 | build_qp_attr(&qp_attr); 31 | 32 | TEST_NZ(rdma_create_qp(id, s_ctx->pd, &qp_attr)); 33 | } 34 | 35 | void build_context(struct ibv_context *verbs) 36 | { 37 | if (s_ctx) { 38 | if (s_ctx->ctx != verbs) 39 | rc_die("cannot handle events in more than one context."); 40 | 41 | return; 42 | } 43 | 44 | s_ctx = (struct context *)malloc(sizeof(struct context)); 45 | 46 | s_ctx->ctx = verbs; 47 | 48 | TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx)); 49 | TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx)); 50 | TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */ 51 | TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0)); 52 | 53 | TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL)); 54 | } 55 | 56 | void build_params(struct rdma_conn_param *params) 57 | { 58 | memset(params, 0, sizeof(*params)); 59 | 60 | params->initiator_depth = params->responder_resources = 1; 61 | params->rnr_retry_count = 7; /* infinite retry */ 62 | } 63 | 64 | void build_qp_attr(struct ibv_qp_init_attr *qp_attr) 65 | { 66 | memset(qp_attr, 0, sizeof(*qp_attr)); 67 | 68 | qp_attr->send_cq = s_ctx->cq; 69 | qp_attr->recv_cq = s_ctx->cq; 70 | qp_attr->qp_type = IBV_QPT_RC; 71 | 72 | qp_attr->cap.max_send_wr = 10; 73 | qp_attr->cap.max_recv_wr = 10; 74 | qp_attr->cap.max_send_sge = 1; 75 | qp_attr->cap.max_recv_sge = 1; 76 | } 77 | 78 | void event_loop(struct rdma_event_channel *ec, int exit_on_disconnect) 79 | { 80 | struct rdma_cm_event *event = NULL; 81 | struct rdma_conn_param cm_params; 82 | 83 | build_params(&cm_params); 84 | 85 | while (rdma_get_cm_event(ec, &event) == 0) { 86 | struct rdma_cm_event event_copy; 87 | 88 | memcpy(&event_copy, event, sizeof(*event)); 89 | rdma_ack_cm_event(event); 90 | 91 | if (event_copy.event == RDMA_CM_EVENT_ADDR_RESOLVED) { 92 | build_connection(event_copy.id); 93 | 94 | if (s_on_pre_conn_cb) 95 | s_on_pre_conn_cb(event_copy.id); 96 | 97 | TEST_NZ(rdma_resolve_route(event_copy.id, TIMEOUT_IN_MS)); 98 | 99 | } else if (event_copy.event == RDMA_CM_EVENT_ROUTE_RESOLVED) { 100 | TEST_NZ(rdma_connect(event_copy.id, &cm_params)); 101 | 102 | } else if (event_copy.event == RDMA_CM_EVENT_CONNECT_REQUEST) { 103 | build_connection(event_copy.id); 104 | 105 | if (s_on_pre_conn_cb) 106 | s_on_pre_conn_cb(event_copy.id); 107 | 108 | TEST_NZ(rdma_accept(event_copy.id, &cm_params)); 109 | 110 | } else if (event_copy.event == RDMA_CM_EVENT_ESTABLISHED) { 111 | if (s_on_connect_cb) 112 | s_on_connect_cb(event_copy.id); 113 | 114 | } else if (event_copy.event == RDMA_CM_EVENT_DISCONNECTED) { 115 | rdma_destroy_qp(event_copy.id); 116 | 117 | if (s_on_disconnect_cb) 118 | s_on_disconnect_cb(event_copy.id); 119 | 120 | rdma_destroy_id(event_copy.id); 121 | 122 | if (exit_on_disconnect) 123 | break; 124 | 125 | } else { 126 | rc_die("unknown event\n"); 127 | } 128 | } 129 | } 130 | 131 | void * poll_cq(void *ctx) 132 | { 133 | struct ibv_cq *cq; 134 | struct ibv_wc wc; 135 | 136 | while (1) { 137 | TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx)); 138 | ibv_ack_cq_events(cq, 1); 139 | TEST_NZ(ibv_req_notify_cq(cq, 0)); 140 | 141 | while (ibv_poll_cq(cq, 1, &wc)) { 142 | if (wc.status == IBV_WC_SUCCESS) 143 | s_on_completion_cb(&wc); 144 | else 145 | rc_die("poll_cq: status is not IBV_WC_SUCCESS"); 146 | } 147 | } 148 | 149 | return NULL; 150 | } 151 | 152 | void rc_init(pre_conn_cb_fn pc, connect_cb_fn conn, completion_cb_fn comp, disconnect_cb_fn disc) 153 | { 154 | s_on_pre_conn_cb = pc; 155 | s_on_connect_cb = conn; 156 | s_on_completion_cb = comp; 157 | s_on_disconnect_cb = disc; 158 | } 159 | 160 | void rc_client_loop(const char *host, const char *port, void *context) 161 | { 162 | struct addrinfo *addr; 163 | struct rdma_cm_id *conn = NULL; 164 | struct rdma_event_channel *ec = NULL; 165 | struct rdma_conn_param cm_params; 166 | 167 | TEST_NZ(getaddrinfo(host, port, NULL, &addr)); 168 | 169 | TEST_Z(ec = rdma_create_event_channel()); 170 | TEST_NZ(rdma_create_id(ec, &conn, NULL, RDMA_PS_TCP)); 171 | TEST_NZ(rdma_resolve_addr(conn, NULL, addr->ai_addr, TIMEOUT_IN_MS)); 172 | 173 | freeaddrinfo(addr); 174 | 175 | conn->context = context; 176 | 177 | build_params(&cm_params); 178 | 179 | event_loop(ec, 1); // exit on disconnect 180 | 181 | rdma_destroy_event_channel(ec); 182 | } 183 | 184 | void rc_server_loop(const char *port) 185 | { 186 | struct sockaddr_in6 addr; 187 | struct rdma_cm_id *listener = NULL; 188 | struct rdma_event_channel *ec = NULL; 189 | 190 | memset(&addr, 0, sizeof(addr)); 191 | addr.sin6_family = AF_INET6; 192 | addr.sin6_port = htons(atoi(port)); 193 | 194 | TEST_Z(ec = rdma_create_event_channel()); 195 | TEST_NZ(rdma_create_id(ec, &listener, NULL, RDMA_PS_TCP)); 196 | TEST_NZ(rdma_bind_addr(listener, (struct sockaddr *)&addr)); 197 | TEST_NZ(rdma_listen(listener, 10)); /* backlog=10 is arbitrary */ 198 | 199 | event_loop(ec, 0); // don't exit on disconnect 200 | 201 | rdma_destroy_id(listener); 202 | rdma_destroy_event_channel(ec); 203 | } 204 | 205 | void rc_disconnect(struct rdma_cm_id *id) 206 | { 207 | rdma_disconnect(id); 208 | } 209 | 210 | void rc_die(const char *reason) 211 | { 212 | fprintf(stderr, "%s\n", reason); 213 | exit(EXIT_FAILURE); 214 | } 215 | 216 | struct ibv_pd * rc_get_pd() 217 | { 218 | return s_ctx->pd; 219 | } 220 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/common.h: -------------------------------------------------------------------------------- 1 | #ifndef RDMA_COMMON_H 2 | #define RDMA_COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define TEST_NZ(x) do { if ( (x)) rc_die("error: " #x " failed (returned non-zero)." ); } while (0) 12 | #define TEST_Z(x) do { if (!(x)) rc_die("error: " #x " failed (returned zero/null)."); } while (0) 13 | 14 | typedef void (*pre_conn_cb_fn)(struct rdma_cm_id *id); 15 | typedef void (*connect_cb_fn)(struct rdma_cm_id *id); 16 | typedef void (*completion_cb_fn)(struct ibv_wc *wc); 17 | typedef void (*disconnect_cb_fn)(struct rdma_cm_id *id); 18 | 19 | void rc_init(pre_conn_cb_fn, connect_cb_fn, completion_cb_fn, disconnect_cb_fn); 20 | void rc_client_loop(const char *host, const char *port, void *context); 21 | void rc_disconnect(struct rdma_cm_id *id); 22 | void rc_die(const char *message); 23 | struct ibv_pd * rc_get_pd(); 24 | void rc_server_loop(const char *port); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/messages.h: -------------------------------------------------------------------------------- 1 | #ifndef RDMA_MESSAGES_H 2 | #define RDMA_MESSAGES_H 3 | 4 | const char *DEFAULT_PORT = "12345"; 5 | const size_t BUFFER_SIZE = 10 * 1024 * 1024; 6 | 7 | enum message_id 8 | { 9 | MSG_INVALID = 0, 10 | MSG_MR, 11 | MSG_READY, 12 | MSG_DONE 13 | }; 14 | 15 | struct message 16 | { 17 | int id; 18 | 19 | union 20 | { 21 | struct 22 | { 23 | uint64_t addr; 24 | uint32_t rkey; 25 | } mr; 26 | } data; 27 | }; 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /03_file-transfer/rdma-file-transfer/server.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "common.h" 5 | #include "messages.h" 6 | 7 | #define MAX_FILE_NAME 256 8 | 9 | struct conn_context 10 | { 11 | char *buffer; 12 | struct ibv_mr *buffer_mr; 13 | 14 | struct message *msg; 15 | struct ibv_mr *msg_mr; 16 | 17 | int fd; 18 | char file_name[MAX_FILE_NAME]; 19 | }; 20 | 21 | static void send_message(struct rdma_cm_id *id) 22 | { 23 | struct conn_context *ctx = (struct conn_context *)id->context; 24 | 25 | struct ibv_send_wr wr, *bad_wr = NULL; 26 | struct ibv_sge sge; 27 | 28 | memset(&wr, 0, sizeof(wr)); 29 | 30 | wr.wr_id = (uintptr_t)id; 31 | wr.opcode = IBV_WR_SEND; 32 | wr.sg_list = &sge; 33 | wr.num_sge = 1; 34 | wr.send_flags = IBV_SEND_SIGNALED; 35 | 36 | sge.addr = (uintptr_t)ctx->msg; 37 | sge.length = sizeof(*ctx->msg); 38 | sge.lkey = ctx->msg_mr->lkey; 39 | 40 | TEST_NZ(ibv_post_send(id->qp, &wr, &bad_wr)); 41 | } 42 | 43 | static void post_receive(struct rdma_cm_id *id) 44 | { 45 | struct ibv_recv_wr wr, *bad_wr = NULL; 46 | 47 | memset(&wr, 0, sizeof(wr)); 48 | 49 | wr.wr_id = (uintptr_t)id; 50 | wr.sg_list = NULL; 51 | wr.num_sge = 0; 52 | 53 | TEST_NZ(ibv_post_recv(id->qp, &wr, &bad_wr)); 54 | } 55 | 56 | static void on_pre_conn(struct rdma_cm_id *id) 57 | { 58 | struct conn_context *ctx = (struct conn_context *)malloc(sizeof(struct conn_context)); 59 | 60 | id->context = ctx; 61 | 62 | ctx->file_name[0] = '\0'; // take this to mean we don't have the file name 63 | 64 | posix_memalign((void **)&ctx->buffer, sysconf(_SC_PAGESIZE), BUFFER_SIZE); 65 | TEST_Z(ctx->buffer_mr = ibv_reg_mr(rc_get_pd(), ctx->buffer, BUFFER_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); 66 | 67 | posix_memalign((void **)&ctx->msg, sysconf(_SC_PAGESIZE), sizeof(*ctx->msg)); 68 | TEST_Z(ctx->msg_mr = ibv_reg_mr(rc_get_pd(), ctx->msg, sizeof(*ctx->msg), 0)); 69 | 70 | post_receive(id); 71 | } 72 | 73 | static void on_connection(struct rdma_cm_id *id) 74 | { 75 | struct conn_context *ctx = (struct conn_context *)id->context; 76 | 77 | ctx->msg->id = MSG_MR; 78 | ctx->msg->data.mr.addr = (uintptr_t)ctx->buffer_mr->addr; 79 | ctx->msg->data.mr.rkey = ctx->buffer_mr->rkey; 80 | 81 | send_message(id); 82 | } 83 | 84 | static void on_completion(struct ibv_wc *wc) 85 | { 86 | struct rdma_cm_id *id = (struct rdma_cm_id *)(uintptr_t)wc->wr_id; 87 | struct conn_context *ctx = (struct conn_context *)id->context; 88 | 89 | if (wc->opcode == IBV_WC_RECV_RDMA_WITH_IMM) { 90 | uint32_t size = ntohl(wc->imm_data); 91 | 92 | if (size == 0) { 93 | ctx->msg->id = MSG_DONE; 94 | send_message(id); 95 | 96 | // don't need post_receive() since we're done with this connection 97 | 98 | } else if (ctx->file_name[0]) { 99 | ssize_t ret; 100 | 101 | printf("received %i bytes.\n", size); 102 | 103 | ret = write(ctx->fd, ctx->buffer, size); 104 | 105 | if (ret != size) 106 | rc_die("write() failed"); 107 | 108 | post_receive(id); 109 | 110 | ctx->msg->id = MSG_READY; 111 | send_message(id); 112 | 113 | } else { 114 | size = (size > MAX_FILE_NAME) ? MAX_FILE_NAME : size; 115 | memcpy(ctx->file_name, ctx->buffer, size); 116 | ctx->file_name[size - 1] = '\0'; 117 | 118 | printf("opening file %s\n", ctx->file_name); 119 | 120 | ctx->fd = open(ctx->file_name, O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 121 | 122 | if (ctx->fd == -1) 123 | rc_die("open() failed"); 124 | 125 | post_receive(id); 126 | 127 | ctx->msg->id = MSG_READY; 128 | send_message(id); 129 | } 130 | } 131 | } 132 | 133 | static void on_disconnect(struct rdma_cm_id *id) 134 | { 135 | struct conn_context *ctx = (struct conn_context *)id->context; 136 | 137 | close(ctx->fd); 138 | 139 | ibv_dereg_mr(ctx->buffer_mr); 140 | ibv_dereg_mr(ctx->msg_mr); 141 | 142 | free(ctx->buffer); 143 | free(ctx->msg); 144 | 145 | printf("finished transferring %s\n", ctx->file_name); 146 | 147 | free(ctx); 148 | } 149 | 150 | int main(int argc, char **argv) 151 | { 152 | rc_init( 153 | on_pre_conn, 154 | on_connection, 155 | on_completion, 156 | on_disconnect); 157 | 158 | printf("waiting for connections. interrupt (^C) to exit.\n"); 159 | 160 | rc_server_loop(DEFAULT_PORT); 161 | 162 | return 0; 163 | } 164 | -------------------------------------------------------------------------------- /03_file-transfer/results.txt: -------------------------------------------------------------------------------- 1 | ib-host-1$ ./server 2 | waiting for connections. interrupt (^C) to exit. 3 | opening file test-file 4 | received 10485760 bytes. 5 | received 10485760 bytes. 6 | received 5242880 bytes. 7 | finished transferring test-file 8 | ^C 9 | 10 | ib-host-1$ md5sum test-file 11 | 5815ed31a65c5da9745764c887f5f777 test-file 12 | 13 | ib-host-2$ dd if=/dev/urandom of=test-file bs=1048576 count=25 14 | 25+0 records in 15 | 25+0 records out 16 | 26214400 bytes (26 MB) copied, 3.11979 seconds, 8.4 MB/s 17 | 18 | ib-host-2$ md5sum test-file 19 | 5815ed31a65c5da9745764c887f5f777 test-file 20 | 21 | ib-host-2$ ./client ib-host-1 test-file 22 | received MR, sending file name 23 | received READY, sending chunk 24 | received READY, sending chunk 25 | received READY, sending chunk 26 | received READY, sending chunk 27 | received DONE, disconnecting 28 | -------------------------------------------------------------------------------- /03_file-transfer/sequence.txt: -------------------------------------------------------------------------------- 1 | title File Transfer 2 | 3 | note left of Client: Post receive 4 | Client->Server: Initiate connection 5 | note right of Server: Post receive 6 | Server->Client: Accept connection 7 | Server->Client: Send memory region details (MSG_MR) 8 | note left of Client: Re-post receive 9 | Client->Server: RDMA-write file name 10 | note right of Server: Open file descriptor 11 | note right of Server: Re-post receive 12 | Server->Client: Send ready-to-receive (MSG_READY) 13 | note left of Client: Re-post receive 14 | note left of Client: Read chunk from file 15 | Client->Server: RDMA-write chunk 16 | note right of Server: Write chunk to file 17 | note right of Server: Re-post receive 18 | Server->Client: Send ready-to-receive (MSG_READY) 19 | note left of Client: Re-post receive 20 | Client->Server: RDMA-write zero bytes 21 | Server->Client: Send done (MSG_DONE) 22 | Client->Server: Close connection 23 | note right of Server: Close file descriptor 24 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Tarick Bedeir. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | the-geek-in-the-corner 2 | ====================== 3 | 4 | Sample code from thegeekinthecorner.com 5 | --------------------------------------------------------------------------------