├── 01 ├── Makefile ├── README.md └── service.c └── README.md /01/Makefile: -------------------------------------------------------------------------------- 1 | service: service.c 2 | cc service.c -o service -g -libverbs 3 | clean: 4 | rm -rf ./*.o ./service 5 | -------------------------------------------------------------------------------- /01/README.md: -------------------------------------------------------------------------------- 1 | # Mellanox RDMA文档中的样例 2 | 编译库的需求:`libibverbs ` 3 | 编译参数:GCC <文件名> -o service -libverbs 4 | 运行方式: 5 | 1. 有IB网络支持: 6 | 服务端:./service 7 | 客户端:./service 服务端IP 8 | 2. 走ROCE: 9 | 服务端:./service -g 0 10 | 客户端:./service -g 0 服务端IP 11 | -------------------------------------------------------------------------------- /01/service.c: -------------------------------------------------------------------------------- 1 | /* 2 | * BUILD COMMAND: 3 | * gcc -Wall -I/usr/local/ofed/include -O2 -o RDMA_RC_example -L/usr/local/ofed/lib64 -L/usr/local/ofed/lib - 4 | libverbs RDMA_RC_example.c 5 | * 6 | */ 7 | /****************************************************************************** 8 | * 9 | * RDMA Aware Networks Programming Example 10 | * 11 | * This code demonstrates how to perform the following operations using the * VPI Verbs API: 12 | * 13 | * Send 14 | * Receive 15 | * RDMA Read 16 | * RDMA Write 17 | * 18 | *****************************************************************************/ 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | /* poll CQ timeout in millisec (2 seconds) */ 36 | #define MAX_POLL_CQ_TIMEOUT 2000 37 | #define MSG "SEND operation " 38 | #define RDMAMSGR "RDMA read operation " 39 | #define RDMAMSGW "RDMA write operation" 40 | #define MSG_SIZE (strlen(MSG) + 1) 41 | #if __BYTE_ORDER == __LITTLE_ENDIAN 42 | static inline uint64_t htonll(uint64_t x) { return bswap_64(x); } 43 | static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); } 44 | #elif __BYTE_ORDER == __BIG_ENDIAN 45 | static inline uint64_t htonll(uint64_t x) { return x; } 46 | static inline uint64_t ntohll(uint64_t x) { return x; } 47 | #else 48 | #error __BYTE_ORDER is neither __LITTLE_ENDIAN nor __BIG_ENDIAN 49 | #endif 50 | /* structure of test parameters */ 51 | struct config_t 52 | { 53 | const char *dev_name; /* IB device name */ 54 | char *server_name; /* server host name */ 55 | u_int32_t tcp_port; /* server TCP port */ 56 | int ib_port; /* local IB port to work with */ 57 | int gid_idx; /* gid index to use */ 58 | }; 59 | /* structure to exchange data which is needed to connect the QPs */ 60 | struct cm_con_data_t 61 | { 62 | uint64_t addr; /* Buffer address */ 63 | uint32_t rkey; /* Remote key */ 64 | uint32_t qp_num; /* QP number */ 65 | uint16_t lid; /* LID of the IB port */ 66 | uint8_t gid[16]; /* gid */ 67 | } __attribute__((packed)); 68 | 69 | /* structure of system resources */ 70 | struct resources 71 | { 72 | struct ibv_device_attr 73 | device_attr; 74 | /* Device attributes */ 75 | struct ibv_port_attr port_attr; /* IB port attributes */ 76 | struct cm_con_data_t remote_props; /* values to connect to remote side */ 77 | struct ibv_context *ib_ctx; /* device handle */ 78 | struct ibv_pd *pd; /* PD handle */ 79 | struct ibv_cq *cq; /* CQ handle */ 80 | struct ibv_qp *qp; /* QP handle */ 81 | struct ibv_mr *mr; /* MR handle for buf */ 82 | char *buf; /* memory buffer pointer, used for RDMA and send 83 | ops */ 84 | int sock; /* TCP socket file descriptor */ 85 | }; 86 | struct config_t config = { 87 | NULL, /* dev_name */ 88 | NULL, /* server_name */ 89 | 19875, /* tcp_port */ 90 | 1, /* ib_port */ 91 | -1 /* gid_idx */}; 92 | 93 | /****************************************************************************** 94 | Socket operations 95 | For simplicity, the example program uses TCP sockets to exchange control 96 | information. If a TCP/IP stack/connection is not available, connection manager 97 | (CM) may be used to pass this information. Use of CM is beyond the scope of 98 | this example 99 | ******************************************************************************/ 100 | /****************************************************************************** 101 | * Function: sock_connect 102 | * 103 | * Input 104 | * servername URL of server to connect to (NULL for server mode) 105 | * port port of service 106 | * 107 | * Output 108 | * none 109 | * 110 | * Returns 111 | * socket (fd) on success, negative error code on failure 112 | * 113 | * Description 114 | * Connect a socket. If servername is specified a client connection will be 115 | * initiated to the indicated server and port. Otherwise listen on the 116 | * indicated port for an incoming connection. 117 | * 118 | ******************************************************************************/ 119 | static int sock_connect(const char *servername, int port) 120 | { 121 | struct addrinfo *resolved_addr = NULL; 122 | struct addrinfo *iterator; 123 | char service[6]; 124 | int sockfd = -1; 125 | int listenfd = 0; 126 | int tmp; 127 | struct addrinfo hints = 128 | { 129 | .ai_flags = AI_PASSIVE, 130 | .ai_family = AF_INET, 131 | .ai_socktype = SOCK_STREAM}; 132 | if (sprintf(service, "%d", port) < 0) 133 | goto sock_connect_exit; 134 | /* Resolve DNS address, use sockfd as temp storage */ 135 | sockfd = getaddrinfo(servername, service, &hints, &resolved_addr); 136 | if (sockfd < 0) 137 | { 138 | fprintf(stderr, "%s for %s:%d\n", gai_strerror(sockfd), servername, port); 139 | goto sock_connect_exit; 140 | } 141 | /* Search through results and find the one we want */ 142 | for (iterator = resolved_addr; iterator; iterator = iterator->ai_next) 143 | { 144 | sockfd = socket(iterator->ai_family, iterator->ai_socktype, iterator->ai_protocol); 145 | if (sockfd >= 0) 146 | { 147 | if (servername){ 148 | /* Client mode. Initiate connection to remote */ 149 | if ((tmp = connect(sockfd, iterator->ai_addr, iterator->ai_addrlen))) 150 | { 151 | fprintf(stdout, "failed connect \n"); 152 | close(sockfd); 153 | sockfd = -1; 154 | } 155 | } 156 | else 157 | { 158 | /* Server mode. Set up listening socket an accept a connection */ 159 | listenfd = sockfd; 160 | sockfd = -1; 161 | if (bind(listenfd, iterator->ai_addr, iterator->ai_addrlen)) 162 | goto sock_connect_exit; 163 | listen(listenfd, 1); 164 | sockfd = accept(listenfd, NULL, 0); 165 | } 166 | } 167 | } 168 | sock_connect_exit: 169 | if (listenfd) 170 | close(listenfd); 171 | if (resolved_addr) 172 | freeaddrinfo(resolved_addr); 173 | if (sockfd < 0) 174 | { 175 | if (servername) 176 | fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); 177 | else 178 | { 179 | perror("server accept"); 180 | fprintf(stderr, "accept() failed\n"); 181 | } 182 | } 183 | return sockfd; 184 | } 185 | /****************************************************************************** 186 | * Function: sock_sync_data 187 | * 188 | * Input 189 | * sock socket to transfer data on 190 | * xfer_size size of data to transfer 191 | * local_data pointer to data to be sent to remote 192 | * 193 | * Output 194 | * remote_data pointer to buffer to receive remote data 195 | * 196 | * Returns 197 | * 0 on success, negative error code on failure 198 | * 199 | * Description 200 | * Sync data across a socket. The indicated local data will be sent to the 201 | * remote. It will then wait for the remote to send its data back. It is 202 | * assumed that the two sides are in sync and call this function in the proper 203 | * order. Chaos will ensue if they are not. :) 204 | * 205 | * Also note this is a blocking function and will wait for the full data to be 206 | * received from the remote. 207 | * 208 | ******************************************************************************/ 209 | int sock_sync_data(int sock, int xfer_size, char *local_data, char *remote_data) 210 | { 211 | int rc; 212 | int read_bytes = 0; 213 | int total_read_bytes = 0; 214 | rc = write(sock, local_data, xfer_size); 215 | if (rc < xfer_size) 216 | fprintf(stderr, "Failed writing data during sock_sync_data\n"); 217 | else 218 | rc = 0; 219 | while (!rc && total_read_bytes < xfer_size) 220 | { 221 | read_bytes = read(sock, remote_data, xfer_size); 222 | if (read_bytes > 0) 223 | total_read_bytes += read_bytes; 224 | else 225 | rc = read_bytes; 226 | } 227 | return rc; 228 | } 229 | /****************************************************************************** 230 | End of socket operations 231 | ******************************************************************************/ 232 | /* poll_completion */ 233 | /****************************************************************************** 234 | * Function: poll_completion 235 | * 236 | * Input 237 | * res pointer to resources structure 238 | * 239 | * Output 240 | * none 241 | * 242 | * Returns 243 | * 0 on success, 1 on failure 244 | * 245 | * Description 246 | * Poll the completion queue for a single event. This function will continue to 247 | * poll the queue until MAX_POLL_CQ_TIMEOUT milliseconds have passed. 248 | * 249 | ******************************************************************************/ 250 | static int poll_completion(struct resources *res) 251 | { 252 | struct ibv_wc wc; 253 | unsigned long start_time_msec; 254 | unsigned long cur_time_msec; 255 | struct timeval cur_time; 256 | int poll_result; 257 | int rc = 0; 258 | /* poll the completion for a while before giving up of doing it .. */ 259 | gettimeofday(&cur_time, NULL); 260 | start_time_msec = (cur_time.tv_sec * 1000) + (cur_time.tv_usec / 1000); 261 | do 262 | { 263 | poll_result = ibv_poll_cq(res->cq, 1, &wc); 264 | gettimeofday(&cur_time, NULL); 265 | cur_time_msec = (cur_time.tv_sec * 1000) + (cur_time.tv_usec / 1000); 266 | } while ((poll_result == 0) && ((cur_time_msec - start_time_msec) < MAX_POLL_CQ_TIMEOUT)); 267 | if (poll_result < 0) 268 | { 269 | /* poll CQ failed */ 270 | fprintf(stderr, "poll CQ failed\n"); 271 | rc = 1; 272 | } 273 | else if (poll_result == 0) 274 | { /* the CQ is empty */ 275 | fprintf(stderr, "completion wasn't found in the CQ after timeout\n"); 276 | rc = 1; 277 | } 278 | else 279 | { 280 | /* CQE found */ 281 | fprintf(stdout, "completion was found in CQ with status 0x%x\n", wc.status); 282 | /* check the completion status (here we don't care about the completion opcode */ 283 | if (wc.status != IBV_WC_SUCCESS) 284 | { 285 | fprintf(stderr, "got bad completion with status: 0x%x, vendor syndrome: 0x%x\n", wc.status, 286 | wc.vendor_err); 287 | rc = 1; 288 | } 289 | } 290 | return rc; 291 | } 292 | /****************************************************************************** 293 | * Function: post_send 294 | * 295 | * Input 296 | * res pointer to resources structure 297 | * opcode IBV_WR_SEND, IBV_WR_RDMA_READ or IBV_WR_RDMA_WRITE 298 | * 299 | * Output 300 | * none 301 | * 302 | * Returns 303 | * 0 on success, error code on failure 304 | * 305 | * Description 306 | * This function will create and post a send work request 307 | ******************************************************************************/ 308 | static int post_send(struct resources *res, int opcode) 309 | { 310 | struct ibv_send_wr sr; 311 | struct ibv_sge sge; 312 | struct ibv_send_wr *bad_wr = NULL; 313 | int rc; 314 | /* prepare the scatter/gather entry */ 315 | memset(&sge, 0, sizeof(sge)); 316 | sge.addr = (uintptr_t)res->buf; 317 | sge.length = MSG_SIZE; 318 | sge.lkey = res->mr->lkey; 319 | /* prepare the send work request */ 320 | memset(&sr, 0, sizeof(sr)); 321 | sr.next = NULL; 322 | sr.wr_id = 0; 323 | sr.sg_list = &sge; 324 | sr.num_sge = 1; 325 | sr.opcode = opcode; 326 | sr.send_flags = IBV_SEND_SIGNALED; 327 | if (opcode != IBV_WR_SEND) 328 | { 329 | sr.wr.rdma.remote_addr = res->remote_props.addr; 330 | sr.wr.rdma.rkey = res->remote_props.rkey; 331 | } 332 | /* there is a Receive Request in the responder side, so we won't get any into RNR flow */ 333 | rc = ibv_post_send(res->qp, &sr, &bad_wr); 334 | if (rc) 335 | fprintf(stderr, "failed to post SR\n"); 336 | else 337 | { 338 | switch (opcode) 339 | { 340 | case IBV_WR_SEND: 341 | fprintf(stdout, "Send Request was posted\n"); 342 | break; 343 | case IBV_WR_RDMA_READ: 344 | fprintf(stdout, "RDMA Read Request was posted\n"); 345 | break; 346 | case IBV_WR_RDMA_WRITE: 347 | fprintf(stdout, "RDMA Write Request was posted\n"); 348 | break; 349 | default: 350 | fprintf(stdout, "Unknown Request was posted\n"); 351 | break; 352 | } 353 | } 354 | return rc; 355 | } 356 | /****************************************************************************** 357 | * Function: post_receive 358 | * 359 | * Input 360 | * res pointer to resources structure 361 | * 362 | * Output 363 | * none 364 | * 365 | * Returns 366 | * 0 on success, error code on failure 367 | * 368 | * Description 369 | * 370 | ******************************************************************************/ 371 | static int post_receive(struct resources *res) 372 | { 373 | struct ibv_recv_wr rr; 374 | struct ibv_sge sge; 375 | struct ibv_recv_wr *bad_wr; 376 | int rc; 377 | /* prepare the scatter/gather entry */ 378 | memset(&sge, 0, sizeof(sge)); 379 | sge.addr = (uintptr_t)res->buf; 380 | sge.length = MSG_SIZE; 381 | sge.lkey = res->mr->lkey; 382 | /* prepare the receive work request */ 383 | memset(&rr, 0, sizeof(rr)); 384 | rr.next = NULL; 385 | rr.wr_id = 0; 386 | rr.sg_list = &sge; 387 | rr.num_sge = 1; 388 | /* post the Receive Request to the RQ */ 389 | rc = ibv_post_recv(res->qp, &rr, &bad_wr); 390 | if (rc) 391 | fprintf(stderr, "failed to post RR\n"); 392 | else 393 | fprintf(stdout, "Receive Request was posted\n"); 394 | return rc; 395 | } 396 | /****************************************************************************** 397 | * Function: resources_init 398 | * 399 | * Input 400 | * res pointer to resources structure 401 | * 402 | * Output 403 | * res is initialized 404 | * 405 | * Returns 406 | * none 407 | * 408 | * Description 409 | * res is initialized to default values 410 | ******************************************************************************/ 411 | static void resources_init(struct resources *res) 412 | { 413 | memset(res, 0, sizeof *res); 414 | res->sock = -1; 415 | } 416 | /****************************************************************************** 417 | * Function: resources_create 418 | * 419 | * Input 420 | * res pointer to resources structure to be filled in 421 | * 422 | * Output 423 | * res filled in with resources 424 | * 425 | * Returns 426 | * 0 on success, 1 on failure 427 | * 428 | * Description 429 | * 430 | * This function creates and allocates all necessary system resources. These 431 | * are stored in res. 432 | *****************************************************************************/ 433 | static int resources_create(struct resources *res) 434 | { 435 | struct ibv_device **dev_list = NULL; 436 | struct ibv_qp_init_attr qp_init_attr; 437 | struct ibv_device *ib_dev = NULL; 438 | size_t size; 439 | int i; 440 | int mr_flags = 0; 441 | int cq_size = 0; 442 | int num_devices; 443 | int rc = 0; 444 | /* if client side */ 445 | if (config.server_name) 446 | { 447 | res->sock = sock_connect(config.server_name, config.tcp_port); 448 | if (res->sock < 0) 449 | { 450 | fprintf(stderr, "failed to establish TCP connection to server %s, port %d\n", 451 | config.server_name, config.tcp_port); 452 | rc = -1; 453 | goto resources_create_exit; 454 | } 455 | } 456 | else 457 | { 458 | fprintf(stdout, "waiting on port %d for TCP connection\n", config.tcp_port); 459 | res->sock = sock_connect(NULL, config.tcp_port); 460 | if (res->sock < 0) 461 | { 462 | fprintf(stderr, "failed to establish TCP connection with client on port %d\n", 463 | config.tcp_port); 464 | rc = -1; 465 | goto resources_create_exit; 466 | } 467 | } 468 | fprintf(stdout, "TCP connection was established\n"); 469 | fprintf(stdout, "searching for IB devices in host\n"); 470 | /* get device names in the system */ 471 | dev_list = ibv_get_device_list(&num_devices); 472 | if (!dev_list) 473 | { 474 | fprintf(stderr, "failed to get IB devices list\n"); 475 | rc = 1; 476 | goto resources_create_exit; 477 | } 478 | /* if there isn't any IB device in host */ 479 | if (!num_devices) 480 | { 481 | fprintf(stderr, "found %d device(s)\n", num_devices); 482 | rc = 1; 483 | goto resources_create_exit; 484 | } 485 | fprintf(stdout, "found %d device(s)\n", num_devices); 486 | /* search for the specific device we want to work with */ 487 | for (i = 0; i < num_devices; i++) 488 | { 489 | if (!config.dev_name) 490 | { 491 | config.dev_name = strdup(ibv_get_device_name(dev_list[i])); 492 | fprintf(stdout, "device not specified, using first one found: %s\n", config.dev_name); 493 | } 494 | if (!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)) 495 | { 496 | ib_dev = dev_list[i]; 497 | break; 498 | } 499 | } 500 | /* if the device wasn't found in host */ 501 | if (!ib_dev) 502 | { 503 | fprintf(stderr, "IB device %s wasn't found\n", config.dev_name); 504 | rc = 1; 505 | goto resources_create_exit; 506 | } 507 | /* get device handle */ 508 | res->ib_ctx = ibv_open_device(ib_dev); 509 | if (!res->ib_ctx) 510 | { 511 | fprintf(stderr, "failed to open device %s\n", config.dev_name); 512 | rc = 1; 513 | goto resources_create_exit; 514 | } 515 | /* We are now done with device list, free it */ 516 | ibv_free_device_list(dev_list); 517 | dev_list = NULL; 518 | ib_dev = NULL; 519 | /* query port properties */ 520 | if (ibv_query_port(res->ib_ctx, config.ib_port, &res->port_attr)) 521 | { 522 | fprintf(stderr, "ibv_query_port on port %u failed\n", config.ib_port); 523 | rc = 1; 524 | goto resources_create_exit; 525 | } 526 | /* allocate Protection Domain */ 527 | res->pd = ibv_alloc_pd(res->ib_ctx); 528 | if (!res->pd) 529 | { 530 | fprintf(stderr, "ibv_alloc_pd failed\n"); 531 | rc = 1; 532 | goto resources_create_exit; 533 | } 534 | /* each side will send only one WR, so Completion Queue with 1 entry is enough */ 535 | cq_size = 1; 536 | res->cq = ibv_create_cq(res->ib_ctx, cq_size, NULL, NULL, 0); 537 | if (!res->cq) 538 | { 539 | fprintf(stderr, "failed to create CQ with %u entries\n", cq_size); 540 | rc = 1; 541 | goto resources_create_exit; 542 | } 543 | /* allocate the memory buffer that will hold the data */ 544 | size = MSG_SIZE; 545 | res->buf = (char *)malloc(size); 546 | if (!res->buf) 547 | { 548 | fprintf(stderr, "failed to malloc %Zu bytes to memory buffer\n", size); 549 | rc = 1; 550 | goto resources_create_exit; 551 | } 552 | memset(res->buf, 0, size); 553 | /* only in the server side put the message in the memory buffer */ 554 | if (!config.server_name) 555 | { 556 | strcpy(res->buf, MSG); 557 | fprintf(stdout, "going to send the message: '%s'\n", res->buf); 558 | } 559 | else 560 | memset(res->buf, 0, size); 561 | /* register the memory buffer */ 562 | mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE; 563 | res->mr = ibv_reg_mr(res->pd, res->buf, size, mr_flags); 564 | if (!res->mr) 565 | { 566 | fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags); 567 | rc = 1; 568 | goto resources_create_exit; 569 | } 570 | fprintf(stdout, "MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n", 571 | res->buf, res->mr->lkey, res->mr->rkey, mr_flags); 572 | /* create the Queue Pair */ 573 | memset(&qp_init_attr, 0, sizeof(qp_init_attr)); 574 | qp_init_attr.qp_type = IBV_QPT_RC; 575 | qp_init_attr.sq_sig_all = 1; 576 | qp_init_attr.send_cq = res->cq; 577 | qp_init_attr.recv_cq = res->cq; 578 | qp_init_attr.cap.max_send_wr = 1; 579 | qp_init_attr.cap.max_recv_wr = 1; 580 | qp_init_attr.cap.max_send_sge = 1; 581 | qp_init_attr.cap.max_recv_sge = 1; 582 | res->qp = ibv_create_qp(res->pd, &qp_init_attr); 583 | if (!res->qp) 584 | { 585 | fprintf(stderr, "failed to create QP\n"); 586 | rc = 1; 587 | goto resources_create_exit; 588 | } 589 | fprintf(stdout, "QP was created, QP number=0x%x\n", res->qp->qp_num); 590 | resources_create_exit: 591 | if (rc) 592 | { 593 | /* Error encountered, cleanup */ 594 | if (res->qp) 595 | { 596 | ibv_destroy_qp(res->qp); 597 | res->qp = NULL; 598 | } 599 | if (res->mr) 600 | { 601 | ibv_dereg_mr(res->mr); 602 | res->mr = NULL; 603 | } 604 | if (res->buf) 605 | { 606 | free(res->buf); 607 | res->buf = NULL; 608 | } 609 | if (res->cq) 610 | { 611 | ibv_destroy_cq(res->cq); 612 | res->cq = NULL; 613 | } 614 | if (res->pd) 615 | { 616 | ibv_dealloc_pd(res->pd); 617 | res->pd = NULL; 618 | } 619 | if (res->ib_ctx) 620 | { 621 | ibv_close_device(res->ib_ctx); 622 | res->ib_ctx = NULL; 623 | } 624 | if (dev_list) 625 | { 626 | ibv_free_device_list(dev_list); 627 | dev_list = NULL; 628 | } 629 | if (res->sock >= 0) 630 | { 631 | if (close(res->sock)) 632 | fprintf(stderr, "failed to close socket\n"); 633 | res->sock = -1; 634 | } 635 | } 636 | return rc; 637 | } 638 | /****************************************************************************** 639 | * Function: modify_qp_to_init 640 | * 641 | * Input 642 | * qp QP to transition 643 | * 644 | * Output 645 | * none 646 | * 647 | * Returns 648 | * 0 on success, ibv_modify_qp failure code on failure 649 | * 650 | * Description 651 | * Transition a QP from the RESET to INIT state 652 | ******************************************************************************/ 653 | static int modify_qp_to_init(struct ibv_qp *qp) 654 | { 655 | struct ibv_qp_attr attr; 656 | int flags; 657 | int rc; 658 | memset(&attr, 0, sizeof(attr)); 659 | attr.qp_state = IBV_QPS_INIT; 660 | attr.port_num = config.ib_port; 661 | attr.pkey_index = 0; 662 | attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE; 663 | flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; 664 | rc = ibv_modify_qp(qp, &attr, flags); 665 | if (rc) 666 | fprintf(stderr, "failed to modify QP state to INIT\n"); 667 | return rc; 668 | } 669 | /****************************************************************************** 670 | * Function: modify_qp_to_rtr 671 | * 672 | * Input 673 | * qp QP to transition 674 | * remote_qpn remote QP number 675 | * dlid destination LID 676 | * dgid destination GID (mandatory for RoCEE) 677 | * 678 | * Output 679 | * none 680 | * 681 | * Returns 682 | * 0 on success, ibv_modify_qp failure code on failure 683 | * 684 | * Description 685 | * Transition a QP from the INIT to RTR state, using the specified QP number 686 | ******************************************************************************/ 687 | static int modify_qp_to_rtr(struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dlid, uint8_t *dgid) 688 | { 689 | struct ibv_qp_attr attr; 690 | int flags; 691 | int rc; 692 | memset(&attr, 0, sizeof(attr)); 693 | attr.qp_state = IBV_QPS_RTR; 694 | attr.path_mtu = IBV_MTU_256; 695 | attr.dest_qp_num = remote_qpn; 696 | attr.rq_psn = 0; 697 | attr.max_dest_rd_atomic = 1; 698 | attr.min_rnr_timer = 0x12; 699 | attr.ah_attr.is_global = 0; 700 | attr.ah_attr.dlid = dlid; 701 | attr.ah_attr.sl = 0; 702 | attr.ah_attr.src_path_bits = 0; 703 | attr.ah_attr.port_num = config.ib_port; 704 | if (config.gid_idx >= 0) 705 | { 706 | attr.ah_attr.is_global = 1; 707 | attr.ah_attr.port_num = 1; 708 | memcpy(&attr.ah_attr.grh.dgid, dgid, 16); 709 | attr.ah_attr.grh.flow_label = 0; 710 | attr.ah_attr.grh.hop_limit = 1; 711 | attr.ah_attr.grh.sgid_index = config.gid_idx; 712 | attr.ah_attr.grh.traffic_class = 0; 713 | } 714 | flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | 715 | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; 716 | rc = ibv_modify_qp(qp, &attr, flags); 717 | if (rc) 718 | fprintf(stderr, "failed to modify QP state to RTR\n"); 719 | return rc; 720 | } 721 | /****************************************************************************** 722 | * Function: modify_qp_to_rts 723 | * 724 | * Input 725 | * qp QP to transition 726 | * 727 | * Output 728 | * none 729 | * 730 | * Returns 731 | * 0 on success, ibv_modify_qp failure code on failure 732 | * 733 | * Description 734 | * Transition a QP from the RTR to RTS state 735 | ******************************************************************************/ 736 | static int modify_qp_to_rts(struct ibv_qp *qp) 737 | { 738 | struct ibv_qp_attr attr; 739 | int flags; 740 | int rc; 741 | memset(&attr, 0, sizeof(attr)); 742 | attr.qp_state = IBV_QPS_RTS; 743 | attr.timeout = 0x12; 744 | attr.retry_cnt = 6; 745 | attr.rnr_retry = 0; 746 | attr.sq_psn = 0; 747 | attr.max_rd_atomic = 1; 748 | flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | 749 | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; 750 | rc = ibv_modify_qp(qp, &attr, flags); 751 | if (rc) 752 | fprintf(stderr, "failed to modify QP state to RTS\n"); 753 | return rc; 754 | } 755 | /****************************************************************************** 756 | * Function: connect_qp 757 | * 758 | * Input 759 | * res pointer to resources structure 760 | * 761 | * Output 762 | * none 763 | * 764 | * Returns 765 | * 0 on success, error code on failure 766 | * 767 | * Description 768 | * Connect the QP. Transition the server side to RTR, sender side to RTS 769 | ******************************************************************************/ 770 | static int connect_qp(struct resources *res) 771 | { 772 | struct cm_con_data_t local_con_data; 773 | struct cm_con_data_t remote_con_data; 774 | struct cm_con_data_t tmp_con_data; 775 | int rc = 0; 776 | char temp_char; 777 | union ibv_gid my_gid; 778 | if (config.gid_idx >= 0) 779 | { 780 | rc = ibv_query_gid(res->ib_ctx, config.ib_port, config.gid_idx, &my_gid); 781 | if (rc) 782 | { 783 | fprintf(stderr, "could not get gid for port %d, index %d\n", config.ib_port, config.gid_idx); 784 | return rc; 785 | } 786 | } 787 | else 788 | memset(&my_gid, 0, sizeof my_gid); 789 | /* exchange using TCP sockets info required to connect QPs */ 790 | local_con_data.addr = htonll((uintptr_t)res->buf); 791 | local_con_data.rkey = htonl(res->mr->rkey); 792 | local_con_data.qp_num = htonl(res->qp->qp_num); 793 | local_con_data.lid = htons(res->port_attr.lid); 794 | memcpy(local_con_data.gid, &my_gid, 16); 795 | fprintf(stdout, "\nLocal LID = 0x%x\n", res->port_attr.lid); 796 | if (sock_sync_data(res->sock, sizeof(struct cm_con_data_t), (char *)&local_con_data, (char *)&tmp_con_data) < 0) 797 | { 798 | fprintf(stderr, "failed to exchange connection data between sides\n"); 799 | rc = 1; 800 | goto connect_qp_exit; 801 | } 802 | remote_con_data.addr = ntohll(tmp_con_data.addr); 803 | remote_con_data.rkey = ntohl(tmp_con_data.rkey); 804 | remote_con_data.qp_num = ntohl(tmp_con_data.qp_num); 805 | remote_con_data.lid = ntohs(tmp_con_data.lid); 806 | memcpy(remote_con_data.gid, tmp_con_data.gid, 16); 807 | /* save the remote side attributes, we will need it for the post SR */ 808 | res->remote_props = remote_con_data; 809 | fprintf(stdout, "Remote address = 0x%" PRIx64 "\n", remote_con_data.addr); 810 | fprintf(stdout, "Remote rkey = 0x%x\n", remote_con_data.rkey); 811 | fprintf(stdout, "Remote QP number = 0x%x\n", remote_con_data.qp_num); 812 | fprintf(stdout, "Remote LID = 0x%x\n", remote_con_data.lid); 813 | if (config.gid_idx >= 0) 814 | { 815 | uint8_t *p = remote_con_data.gid; 816 | fprintf(stdout, "Remote GID =%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n ",p[0], 817 | p[1], p[2], p[3], p[4], p[5], p[6], p[7], p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); 818 | } 819 | /* modify the QP to init */ 820 | rc = modify_qp_to_init(res->qp); 821 | if (rc) 822 | { 823 | fprintf(stderr, "change QP state to INIT failed\n"); 824 | goto connect_qp_exit; 825 | } 826 | /* let the client post RR to be prepared for incoming messages */ 827 | if (config.server_name) 828 | { 829 | rc = post_receive(res); 830 | if (rc) 831 | { 832 | fprintf(stderr, "failed to post RR\n"); 833 | goto connect_qp_exit; 834 | } 835 | } 836 | /* modify the QP to RTR */ 837 | rc = modify_qp_to_rtr(res->qp, remote_con_data.qp_num, remote_con_data.lid, remote_con_data.gid); 838 | if (rc) 839 | { 840 | fprintf(stderr, "failed to modify QP state to RTR\n"); 841 | goto connect_qp_exit; 842 | } 843 | rc = modify_qp_to_rts(res->qp); 844 | if (rc) 845 | { 846 | fprintf(stderr, "failed to modify QP state to RTR\n"); 847 | goto connect_qp_exit; 848 | } 849 | fprintf(stdout, "QP state was change to RTS\n"); 850 | /* sync to make sure that both sides are in states that they can connect to prevent packet loose */ 851 | if (sock_sync_data(res->sock, 1, "Q", &temp_char)) /* just send a dummy char back and forth */ 852 | { 853 | fprintf(stderr, "sync error after QPs are were moved to RTS\n"); 854 | rc = 1; 855 | } 856 | connect_qp_exit: 857 | return rc; 858 | } 859 | /****************************************************************************** 860 | * Function: resources_destroy 861 | * 862 | * Input 863 | * res pointer to resources structure 864 | * 865 | * Output 866 | * none 867 | * 868 | * Returns 869 | * 0 on success, 1 on failure 870 | * 871 | * Description 872 | * Cleanup and deallocate all resources used 873 | ******************************************************************************/ 874 | static int resources_destroy(struct resources *res) 875 | { 876 | int rc = 0; 877 | if (res->qp) 878 | if (ibv_destroy_qp(res->qp)) 879 | { 880 | fprintf(stderr, "failed to destroy QP\n"); 881 | rc = 1; 882 | } 883 | if (res->mr) 884 | if (ibv_dereg_mr(res->mr)) 885 | { 886 | fprintf(stderr, "failed to deregister MR\n"); 887 | rc = 1; 888 | } 889 | if (res->buf) 890 | free(res->buf); 891 | if (res->cq) 892 | if (ibv_destroy_cq(res->cq)) 893 | { 894 | fprintf(stderr, "failed to destroy CQ\n"); 895 | rc = 1; 896 | } 897 | if (res->pd) 898 | if (ibv_dealloc_pd(res->pd)) 899 | { 900 | fprintf(stderr, "failed to deallocate PD\n"); 901 | rc = 1; 902 | } 903 | if (res->ib_ctx) 904 | if (ibv_close_device(res->ib_ctx)) 905 | { 906 | fprintf(stderr, "failed to close device context\n"); 907 | rc = 1; 908 | } 909 | if (res->sock >= 0) 910 | if (close(res->sock)) 911 | { 912 | fprintf(stderr, "failed to close socket\n"); 913 | rc = 1; 914 | } 915 | return rc; 916 | } 917 | /****************************************************************************** 918 | * Function: print_config 919 | * 920 | * Input 921 | * none 922 | * 923 | * Output 924 | * none 925 | * 926 | * Returns 927 | * none 928 | * 929 | * Description 930 | * Print out config information 931 | ******************************************************************************/ 932 | static void print_config(void) 933 | { 934 | fprintf(stdout, " ------------------------------------------------\n"); 935 | fprintf(stdout, " Device name : \"%s\"\n", config.dev_name); 936 | fprintf(stdout, " IB port : %u\n", config.ib_port); 937 | if (config.server_name) 938 | fprintf(stdout, " IP : %s\n", config.server_name); 939 | fprintf(stdout, " TCP port : %u\n", config.tcp_port); 940 | if (config.gid_idx >= 0) 941 | fprintf(stdout, " GID index : %u\n", config.gid_idx); 942 | fprintf(stdout, " ------------------------------------------------\n\n"); 943 | } 944 | 945 | /****************************************************************************** 946 | * Function: usage 947 | * 948 | * Input 949 | * argv0 command line arguments 950 | * 951 | * Output 952 | * none 953 | * 954 | * Returns 955 | * none 956 | * 957 | * Description 958 | * print a description of command line syntax 959 | ******************************************************************************/ 960 | static void usage(const char *argv0) 961 | { 962 | fprintf(stdout, "Usage:\n"); 963 | fprintf(stdout, " %s start a server and wait for connection\n", argv0); 964 | fprintf(stdout, " %s connect to server at \n", argv0); 965 | fprintf(stdout, "\n"); 966 | fprintf(stdout, "Options:\n"); 967 | fprintf(stdout, " -p, --port listen on/connect to port (default 18515)\n"); 968 | fprintf(stdout, " -d, --ib-dev use IB device (default first device found)\n"); 969 | fprintf(stdout, " -i, --ib-port use port of IB device (default 1)\n"); 970 | fprintf(stdout, " -g, --gid_idx gid index to be used in GRH (default not used)\n"); 971 | } 972 | /****************************************************************************** 973 | * Function: main 974 | * 975 | * Input 976 | * argc number of items in argv 977 | * argv command line parameters 978 | * 979 | * Output 980 | * none 981 | * 982 | * Returns 983 | * 0 on success, 1 on failure 984 | * 985 | * Description 986 | * Main program code 987 | ******************************************************************************/ 988 | int main(int argc, char *argv[]) 989 | { 990 | struct resources res; 991 | int rc = 1; 992 | char temp_char; 993 | /* parse the command line parameters */ 994 | while (1) 995 | { 996 | int c; 997 | static struct option long_options[] = { 998 | {.name = "port", .has_arg = 1, .val = 'p'}, 999 | {.name = "ib-dev", .has_arg = 1, .val = 'd'}, 1000 | {.name = "ib-port", .has_arg = 1, .val = 'i'}, 1001 | {.name = "gid-idx", .has_arg = 1, .val = 'g'}, 1002 | {.name = NULL, .has_arg = 0, .val = '\0'} 1003 | }; 1004 | c = getopt_long(argc, argv, "p:d:i:g:", long_options, NULL); 1005 | if (c == -1) 1006 | break; 1007 | switch (c) 1008 | { 1009 | case 'p': 1010 | config.tcp_port = strtoul(optarg, NULL, 0); 1011 | break; 1012 | case 'd': 1013 | config.dev_name = strdup(optarg); 1014 | break; 1015 | case 'i': 1016 | config.ib_port = strtoul(optarg, NULL, 0); 1017 | if (config.ib_port < 0) 1018 | { 1019 | usage(argv[0]); 1020 | return 1; 1021 | } 1022 | break; 1023 | case 'g': 1024 | config.gid_idx = strtoul(optarg, NULL, 0); 1025 | if (config.gid_idx < 0) 1026 | { 1027 | usage(argv[0]); 1028 | return 1; 1029 | } 1030 | break; 1031 | default: 1032 | usage(argv[0]); 1033 | return 1; 1034 | } 1035 | } 1036 | /* parse the last parameter (if exists) as the server name */ 1037 | if (optind == argc - 1) 1038 | config.server_name = argv[optind]; 1039 | if(config.server_name){ 1040 | printf("servername=%s\n",config.server_name); 1041 | } 1042 | else if (optind < argc) 1043 | { 1044 | usage(argv[0]); 1045 | return 1; 1046 | } 1047 | /* print the used parameters for info*/ 1048 | print_config(); 1049 | /* init all of the resources, so cleanup will be easy */ 1050 | resources_init(&res); 1051 | /* create resources before using them */ 1052 | if (resources_create(&res)) 1053 | { 1054 | fprintf(stderr, "failed to create resources\n"); 1055 | goto main_exit; 1056 | } 1057 | /* connect the QPs */ 1058 | if (connect_qp(&res)) 1059 | { 1060 | fprintf(stderr, "failed to connect QPs\n"); 1061 | goto main_exit; 1062 | } 1063 | /* let the server post the sr */ 1064 | if (!config.server_name) 1065 | if (post_send(&res, IBV_WR_SEND)) 1066 | { 1067 | fprintf(stderr, "failed to post sr\n"); 1068 | goto main_exit; 1069 | } 1070 | /* in both sides we expect to get a completion */ 1071 | if (poll_completion(&res)) 1072 | { 1073 | fprintf(stderr, "poll completion failed\n"); 1074 | goto main_exit; 1075 | } 1076 | /* after polling the completion we have the message in the client buffer too */ 1077 | if (config.server_name) 1078 | fprintf(stdout, "Message is: '%s'\n", res.buf); 1079 | else 1080 | { 1081 | /* setup server buffer with read message */ 1082 | strcpy(res.buf, RDMAMSGR); 1083 | } 1084 | /* Sync so we are sure server side has data ready before client tries to read it */ 1085 | if (sock_sync_data(res.sock, 1, "R", &temp_char)) /* just send a dummy char back and forth */ 1086 | { 1087 | fprintf(stderr, "sync error before RDMA ops\n"); 1088 | rc = 1; 1089 | goto main_exit; 1090 | } 1091 | /* Now the client performs an RDMA read and then write on server. 1092 | Note that the server has no idea these events have occured */ 1093 | if (config.server_name) 1094 | { 1095 | /* First we read contens of server's buffer */ 1096 | if (post_send(&res, IBV_WR_RDMA_READ)) 1097 | { 1098 | fprintf(stderr, "failed to post SR 2\n"); 1099 | rc = 1; 1100 | goto main_exit; 1101 | } 1102 | if (poll_completion(&res)) 1103 | { 1104 | fprintf(stderr, "poll completion failed 2\n"); 1105 | rc = 1; 1106 | goto main_exit; 1107 | } 1108 | fprintf(stdout, "Contents of server's buffer: '%s'\n", res.buf); 1109 | /* Now we replace what's in the server's buffer */ 1110 | strcpy(res.buf, RDMAMSGW); 1111 | fprintf(stdout, "Now replacing it with: '%s'\n", res.buf); 1112 | if (post_send(&res, IBV_WR_RDMA_WRITE)) 1113 | { 1114 | fprintf(stderr, "failed to post SR 3\n"); 1115 | rc = 1; 1116 | goto main_exit; 1117 | } 1118 | if (poll_completion(&res)) 1119 | { 1120 | fprintf(stderr, "poll completion failed 3\n"); 1121 | rc = 1; 1122 | goto main_exit; 1123 | } 1124 | } 1125 | /* Sync so server will know that client is done mucking with its memory */ 1126 | if (sock_sync_data(res.sock, 1, "W", &temp_char)) /* just send a dummy char back and forth */ 1127 | { 1128 | fprintf(stderr, "sync error after RDMA ops\n"); 1129 | rc = 1; 1130 | goto main_exit; 1131 | } 1132 | if (!config.server_name) 1133 | fprintf(stdout, "Contents of server buffer: '%s'\n", res.buf); 1134 | rc = 0; 1135 | main_exit: 1136 | if (resources_destroy(&res)) 1137 | { 1138 | fprintf(stderr, "failed to destroy resources\n"); 1139 | rc = 1; 1140 | } 1141 | if (config.dev_name) 1142 | free((char *)config.dev_name); 1143 | fprintf(stdout, "\ntest result is %d\n", rc); 1144 | return rc; 1145 | } 1146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RDMA-EXAMPLE 2 | RDMA学习 3 | --------------------------------------------------------------------------------