├── bf ├── Makefile ├── bf_context.cpp ├── bf_context.hpp ├── bf_server.cpp ├── bf_server.hpp ├── bf_server_exe.cpp └── run_server.sh ├── bf_host ├── Makefile ├── bf_host.cu ├── bf_host.cu.hpp └── gpu_define.cu.h └── common ├── setup.cpp └── setup.hpp /bf/Makefile: -------------------------------------------------------------------------------- 1 | CC := g++ -std=c++11 -O3 2 | CFLAGS=-Wall -g 3 | LIBS := -libverbs -lrt -lpthread -lboost_filesystem -lboost_system 4 | 5 | ######################################################################## 6 | 7 | all: bf_server_exe 8 | 9 | # link 10 | bf_server_exe: setup.o bf_context.o bf_server.o bf_server_exe.o 11 | $(CC) $(LDFLAGS) -o $@ $^ $(LIBS) 12 | 13 | ######################################################################## 14 | 15 | # compile 16 | bf_server_exe.o: ../common/setup.hpp bf_context.hpp bf_context.cpp bf_server_exe.cpp 17 | $(CC) $(CFLAGS) -c bf_server_exe.cpp 18 | 19 | bf_server.o: ../common/setup.hpp bf_context.hpp bf_server.cpp 20 | $(CC) $(CFLAGS) -c bf_server.cpp 21 | 22 | bf_context.o: ../common/setup.hpp bf_context.hpp bf_context.cpp 23 | $(CC) $(CFLAGS) -c bf_context.cpp 24 | 25 | setup.o: ../common/setup.hpp ../common/setup.cpp 26 | $(CC) $(CFLAGS) -c ../common/setup.cpp 27 | 28 | ######################################################################## 29 | clean: 30 | \rm -f *.o bf_server_exe 31 | 32 | -------------------------------------------------------------------------------- /bf/bf_context.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #include "bf_context.hpp" 30 | 31 | 32 | //#define __MEASURE_GPU_RTT__ 33 | #ifdef __MEASURE_GPU_RTT__ 34 | #define G_N 100000 35 | double g_stats[G_N]; 36 | int g_index = 0; 37 | bool g_start = false; 38 | #endif 39 | 40 | struct recv_wr_t { 41 | struct ibv_sge* recv_sg_list; 42 | struct ibv_send_wr* rdma_write_wr_list; 43 | }; 44 | 45 | struct send_wr_t { 46 | ibv_sge* rdma_read_sg_list; 47 | ibv_send_wr* rdma_read_wr_list; 48 | 49 | ibv_sge* read_sg; 50 | ibv_send_wr* read_wr; 51 | 52 | ibv_sge* write_sg; 53 | ibv_send_wr* write_wr; 54 | }; 55 | 56 | struct client_md { 57 | bool _valid; 58 | struct sockaddr_in _client_addr; 59 | socklen_t _client_addr_len; 60 | #ifdef __MEASURE_GPU_RTT__ 61 | double _time_stamp; 62 | char padding[64 - sizeof(bool) - sizeof(struct sockaddr_in) - sizeof(socklen_t) - sizeof(double)]; 63 | #else 64 | char padding[64 - sizeof(bool) - sizeof(struct sockaddr_in) - sizeof(socklen_t)]; 65 | #endif 66 | }; 67 | 68 | 69 | double static inline get_time_msec(void) { 70 | struct timeval t; 71 | gettimeofday(&t, NULL); 72 | return t.tv_sec * 1e+3 + t.tv_usec * 1e-3; 73 | } 74 | 75 | inline void BFContext::copy_data_to_host(ib_resources_t* host_ib_resources, unsigned int wr_id, ibv_send_wr* rdma_write_wr_list) { 76 | if (ibv_post_send(host_ib_resources->qp,&(rdma_write_wr_list[MOD(wr_id, BF_MAX_SEND_WQES)]), NULL)) { 77 | std::cerr << "ibv_post_send() failed (line" << __LINE__ << ")" << std::endl; 78 | exit(1); 79 | } 80 | 81 | int max_ncqes = 4; 82 | struct ibv_wc wc[max_ncqes]; 83 | int ncqes = ibv_poll_cq(host_ib_resources->send_cq, max_ncqes, wc); 84 | if (ncqes < 0) { 85 | std::cerr << "ibv_poll_cq() failed" << std::endl; 86 | exit(1); 87 | } 88 | } 89 | 90 | int BFContext::poll_request_from_client(client_md* client_md_rbuf, ib_resources_t* client_ib_resources, unsigned int rbuf_index) { 91 | switch(_connection_type) { 92 | case UDP_CONNECTION: 93 | return poll_udp_request_from_client(client_md_rbuf, client_ib_resources, rbuf_index); 94 | /* case TCP_CONNECTION: 95 | return poll_tcp_request_from_client(); 96 | case IB_CONNECTION: 97 | return poll_ib_request_from_client();*/ 98 | default: 99 | std::cerr << "Unknown Connection Type: " << _connection_type << std::endl; 100 | exit(1); 101 | } 102 | return -1; 103 | } 104 | 105 | 106 | inline void BFContext::notify_host(ib_resources_t* notify_ib_resources, ibv_send_wr* write_wr){ 107 | struct ibv_send_wr *bad_wr; 108 | if (ibv_post_send(notify_ib_resources->qp, write_wr, &bad_wr)) { 109 | std::cerr << "ibv_post_send() failed (line " << __LINE__ << ")" << std::endl; 110 | exit(1); 111 | } 112 | notify_ib_resources->resp_sent = 0; 113 | notify_ib_resources->posted_wqes++; 114 | if(notify_ib_resources->update_wrap_around) { 115 | notify_ib_resources->update_wrap_around = false; 116 | notify_ib_resources->wrap_around = (notify_ib_resources->wrap_around + 1) % _workers_num; 117 | } else { 118 | notify_ib_resources->update_wrap_around = true; 119 | } 120 | } 121 | 122 | 123 | inline bool BFContext::pull_notification_from_host(ib_resources_t* notify_ib_resources, ibv_send_wr* read_wr, unsigned int *wr_id, unsigned int worker_id) { 124 | 125 | int pi_val = *(((int*)notify_ib_resources->lrecv_buf) + worker_id); 126 | int ci_val = *(((int*)notify_ib_resources->lsend_buf + _workers_num) + worker_id); 127 | //int ci_val = *wr_id; 128 | 129 | int available_slots = HAS_REQUEST(pi_val, ci_val, BF_MAX_RECV_WQES); 130 | available_slots = available_slots / _workers_num; 131 | // if(worker_id % ( (_workers_num-1)/2 + 1) != 0) { 132 | // if(worker_id == ( _workers_num - 1) || (worker_id != 0 && worker_id != 4 && worker_id != 8 && worker_id != 12) ){ 133 | // if((_workers_num != 1 && worker_id == (_workers_num - 1)) || (worker_id != 0 && worker_id != (_workers_num/2 + 1)) ) { 134 | // if((worker_id != 0 && worker_id%8 == 0) || worker_id != _workers_num - 1) { 135 | if(available_slots > 0) { 136 | *wr_id = MOD(ci_val + _workers_num, BF_MAX_SEND_WQES); 137 | } 138 | if(worker_id != _workers_num - 1) { 139 | // if(worker_id != 0 && worker_id != _workers_num - 1) { 140 | // if(worker_id % 2 != 0) { 141 | return available_slots > 0; 142 | } 143 | 144 | // std::cout << "worker_gid " << worker_id << " available slots " << available_slots << " pi_val " << pi_val << " ci_val " << ci_val << std::endl; 145 | 146 | /* 147 | if(available_slots > 0){ 148 | std::cout << "available_slots : " << available_slots << std::endl; 149 | std::cout << "pi = " << pi_val << " ci_val " << ci_val << std::endl; 150 | } 151 | 152 | */ 153 | unsigned int* _load_factor = ¬ify_ib_resources->load_factor[worker_id]; 154 | // if(*_load_factor != 0) std::cout << "load factor " << *_load_factor << std::endl; 155 | if( (2 * (*_load_factor)) < available_slots) { 156 | *_load_factor = available_slots / 2; 157 | } 158 | 159 | if(available_slots != *_load_factor) { 160 | return available_slots > 0; 161 | } 162 | 163 | *_load_factor = available_slots/2; 164 | 165 | if(notify_ib_resources->posted_wqes > 0) { 166 | int max_ncqes = 8; 167 | struct ibv_wc wc[max_ncqes]; 168 | int ncqes = ibv_poll_cq(notify_ib_resources->send_cq, max_ncqes, wc); 169 | if (ncqes < 0) { 170 | std::cerr << "ibv_poll_cq() failed" << std::endl; 171 | exit(1); 172 | } 173 | notify_ib_resources->posted_wqes -= ncqes; 174 | } 175 | if(notify_ib_resources->posted_wqes >= 5) { // the rest for posting rdma_write to update host_ci 176 | return available_slots > 0; 177 | } 178 | struct ibv_send_wr* bad_wr; 179 | if (ibv_post_send(notify_ib_resources->qp, read_wr, &bad_wr)) { 180 | std::cerr << "ibv_post_send() failed (line " << __LINE__ << ")" << std::endl; 181 | exit(1); 182 | } 183 | notify_ib_resources->posted_wqes++; 184 | return available_slots > 0; 185 | } 186 | 187 | inline void BFContext::update_ci(ib_resources_t* notify_ib_resources, int worker_id) { 188 | // std::cout << "worker_id " << worker_id << " ci = " << *(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) << std::endl; 189 | *(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) = MOD(*(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) + _workers_num, BF_MAX_RECV_WQES); 190 | // *(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num) = MOD(*(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num) + requests_num, BF_MAX_RECV_WQES); 191 | } 192 | 193 | /* 194 | inline void BFContext::push_ci_to_host() { 195 | ibv_send_wr* bad_wr; 196 | int max_ncqes = 8; 197 | ibv_wc wc[max_ncqes]; 198 | int ncqes = 0; 199 | 200 | if (ibv_post_send(host_notify_ib_resources->qp, write_wr, &bad_wr)) { 201 | std::cerr << "ibv_post_send() failed (line " << __LINE__ << ")" << std::endl; 202 | exit(1); 203 | } 204 | 205 | do { 206 | ncqes = ibv_poll_cq(host_notify_ib_resources->send_cq, max_ncqes, wc); 207 | } while (0); 208 | if (ncqes < 0) { 209 | std::cerr << "ibv_poll_cq() failed" << std::endl; 210 | exit(1); 211 | } 212 | // if (wc.status != IBV_WC_SUCCESS) { 213 | // std::cerr << "got CQE with error " << wc.status << " (line " << __LINE__ << ")" << std::endl; 214 | // exit(1); 215 | // } 216 | // assert(wc.opcode == IBV_WC_RDMA_WRITE); 217 | 218 | } 219 | 220 | */ 221 | inline void BFContext::send_response(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr, unsigned int wr_id, unsigned int last_wr_id, unsigned int worker_id,bool post_rdma) { 222 | switch(_connection_type) { 223 | case UDP_CONNECTION: 224 | send_udp_response(client_md_rbuf, host_ib_resources, notify_ib_resources, client_ib_resources, send_wr, wr_id, last_wr_id, worker_id,post_rdma); 225 | break; 226 | // case TCP_CONNECTION: 227 | // send_tcp_response(wr_id); 228 | // break; 229 | // case IB_CONNECTION: 230 | // send_ib_response(wr_id); 231 | // break; 232 | defaulf: 233 | std::cerr << "Unknown Connection Type: " << _connection_type << std::endl; 234 | exit(1); 235 | } 236 | } 237 | 238 | 239 | inline unsigned int BFContext::get_worker_id_and_notify_host(ib_resources_t* notify_ib_resources, send_wr_t* send_wr, unsigned int rbuf_index) { 240 | // notify_ib_resources->resp_sent++; 241 | if(notify_ib_resources->resp_sent >= RECV_WQES_NUM/2) { 242 | // std::cout << "notify host resp_sent " << notify_ib_resources->resp_sent << std::endl; 243 | notify_host(notify_ib_resources, send_wr->write_wr); 244 | } 245 | notify_ib_resources->resp_sent++; 246 | int worker_id = (rbuf_index + notify_ib_resources->wrap_around * BF_MAX_RECV_WQES) % _workers_num; 247 | if( MOD(*(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) + _workers_num , BF_MAX_RECV_WQES) != rbuf_index ) { 248 | unsigned int worker_id_1 = (rbuf_index + (notify_ib_resources->wrap_around == 0 ? _workers_num - 1 : notify_ib_resources->wrap_around - 1) * BF_MAX_RECV_WQES) % _workers_num; 249 | unsigned int worker_id_2 = (rbuf_index + (notify_ib_resources->wrap_around + 1) * BF_MAX_RECV_WQES) % _workers_num; 250 | if(MOD(*(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id_1) + _workers_num , BF_MAX_RECV_WQES) == rbuf_index ) { 251 | worker_id = worker_id_1; 252 | } else { 253 | worker_id = worker_id_2; 254 | } 255 | } 256 | // std::cout << "wrap_around= " << notify_ib_resources->wrap_around << " rbuf_index " << rbuf_index << " worker_id = " << worker_id << " last ci " << *(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) << std::endl; 257 | // assert(MOD(*(((unsigned int*)(notify_ib_resources->lsend_buf)) + _workers_num + worker_id) + _workers_num , BF_MAX_RECV_WQES) == rbuf_index); 258 | return worker_id; 259 | } 260 | 261 | inline void BFContext::send_udp_response(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr, unsigned int wr_id, unsigned int last_wr_id, unsigned int worker_id,bool post_rdma) { 262 | if(post_rdma) { 263 | ibv_send_wr* bad_wr; 264 | ibv_send_wr* rdma_read_wr_list = send_wr->rdma_read_wr_list; 265 | 266 | unsigned int wrap_around = notify_ib_resources->wrap_around; 267 | if(wr_id != last_wr_id) { 268 | if (ibv_post_send(host_ib_resources->qp, &(rdma_read_wr_list[wr_id]), &bad_wr)) { 269 | std::cerr << "ibv_post_send() failed (line " << __LINE__ << ")" << std::endl; 270 | exit(1); 271 | } 272 | // std::cout << "worker_gid " << worker_id << " requests msg id= " << wr_id << std::endl; 273 | //last_wr_id = wr_id; 274 | } 275 | return; 276 | } 277 | 278 | int max_ncqes = 16; 279 | struct ibv_wc wc[max_ncqes]; 280 | int ncqes = ibv_poll_cq(host_ib_resources->send_cq, max_ncqes, wc); 281 | if (ncqes < 0) { 282 | std::cerr << "ibv_poll_cq() failed" << std::endl; 283 | exit(1); 284 | } 285 | 286 | // if(ncqes) std::cout << "ncqes= " << ncqes << std::endl; 287 | 288 | int requests_num = 0; 289 | for(int i = 0 ; i < ncqes ; i++) { 290 | assert(wc[i].status == IBV_WC_SUCCESS); 291 | if(wc[i].opcode == IBV_WC_RDMA_WRITE) { 292 | continue; 293 | } 294 | 295 | // std::cerr << "got CQE with error " << wc[i].status << " (line " << __LINE__ << ")" << std::endl; 296 | // exit(1); 297 | // } 298 | assert(wc[i].opcode == IBV_WC_RDMA_READ); 299 | 300 | // std::cout << "sending msg wc[i].wr_id " << wc[i].wr_id << std::endl; 301 | int rbuf_index = wc[i].wr_id; 302 | char* send_buf = client_ib_resources->lsend_buf + rbuf_index * BF_H2C_MSG_SIZE; 303 | // std::cout << "rbuf_index " << rbuf_index << std::endl; 304 | assert(client_md_rbuf[rbuf_index]._valid == true); 305 | // if(rbuf_index == 1) std::cout << "rbuf_index to be released " << rbuf_index << "worker_id " << (rbuf_index + wrap_around * BF_MAX_RECV_WQES) % _workers_num << std::endl; 306 | int worker_id = get_worker_id_and_notify_host(notify_ib_resources, send_wr, rbuf_index); 307 | update_ci(notify_ib_resources,worker_id); 308 | // update_ci(notify_ib_resources,(rbuf_index + wrap_around * BF_MAX_RECV_WQES) % _workers_num); 309 | // notify_ib_resources->resp_sent += requests_num; 310 | 311 | #ifdef __MEASURE_GPU_RTT__ 312 | if(g_start) { 313 | g_stats[g_index++] = (get_time_msec() - client_md_rbuf[rbuf_index]._time_stamp) * 1000; 314 | if(g_index == G_N) { 315 | double min_val = 10000, max_val = 0, sum_val = 0; 316 | for(int i = 0 ; i < G_N ; i++) { 317 | sum_val += g_stats[i]; 318 | min_val = min_val < g_stats[i] ? min_val : g_stats[i]; 319 | max_val = max_val > g_stats[i] ? max_val : g_stats[i]; 320 | std::cout << g_stats[i] <<" usec." << std::endl; 321 | } 322 | std::cout << "min: " << min_val << " usec." << std::endl; 323 | std::cout << "max: " << max_val << " usec." << std::endl; 324 | std::cout << "avg: " << sum_val / G_N << " usec." << std::endl; 325 | } 326 | } else { 327 | if(++g_index == G_N) { 328 | g_index = 0; 329 | g_start = true; 330 | } 331 | } 332 | #endif 333 | // std::cout << "sending msg wc[i].wr_id " << wc[i].wr_id << std::endl; 334 | /* for(int i = 0 ; i < HOST_SEND_MSG_SIZE ; i++) { 335 | printf("%x ",send_buf[i]); 336 | } 337 | printf("\n");*/ 338 | int ret=sendto(client_ib_resources->client_fd, send_buf, HOST_SEND_MSG_SIZE, MSG_CONFIRM, (struct sockaddr *)&(client_md_rbuf[rbuf_index]._client_addr), client_md_rbuf[rbuf_index]._client_addr_len); 339 | if (ret < 0) { 340 | perror("send udp response"); 341 | exit(1); 342 | } 343 | client_md_rbuf[rbuf_index]._valid = false; 344 | requests_num++; 345 | } 346 | 347 | //printf("sent %d bytes\n", ret); 348 | //update ci in GPU 349 | /* if(requests_num > 0) { 350 | // update_ci(notify_ib_resources, requests_num); 351 | notify_ib_resources->resp_sent += requests_num; 352 | }*/ 353 | } 354 | 355 | 356 | ib_resources_t* BFContext::setup_notifyQP_from_Host(ib_resources_t* client_ib_resources, int sfd) { 357 | ibv_context* context = client_ib_resources->context; 358 | ibv_pd* pd = client_ib_resources->pd; 359 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 360 | 361 | struct ibv_mr *mr_recv; 362 | char *recv_buf = (char*) malloc(2 * _workers_num * sizeof(unsigned int)); 363 | for(int i = 0 ; i < _workers_num ; i++) { 364 | *((unsigned int*)recv_buf + i) = BF_MAX_RECV_WQES - _workers_num + i; 365 | } 366 | for(int i = 0 ; i < _workers_num ; i++) { 367 | *((unsigned int*)recv_buf + _workers_num + i) = BF_MAX_RECV_WQES - _workers_num + i; 368 | } 369 | 370 | mr_recv = ibv_reg_mr(pd, recv_buf, 2 * _workers_num * sizeof(unsigned int), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 371 | if (!mr_recv) { 372 | std::cerr << "ibv_reg_mr() failed for data_from_host" << std::endl; 373 | exit(1); 374 | } 375 | 376 | struct ibv_mr *mr_send; 377 | char *send_buf = (char*) malloc(2 * _workers_num * sizeof(unsigned int)); 378 | for(int i = 0 ; i < _workers_num ; i++) { 379 | *((unsigned int*)send_buf + i) = BF_MAX_SEND_WQES - _workers_num + i; 380 | } 381 | for(int i = 0 ; i < _workers_num ; i++) { 382 | *((unsigned int*)send_buf + _workers_num + i) = BF_MAX_SEND_WQES - _workers_num + i; 383 | } 384 | mr_send = ibv_reg_mr(pd, send_buf, 2 * _workers_num * sizeof(unsigned int), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 385 | if (!mr_send) { 386 | std::cerr << "ibv_reg_mr() failed for data_for_host" << std::endl; 387 | exit(1); 388 | } 389 | 390 | struct ibv_cq *recv_cq = ibv_create_cq(context, BF_RECV_CQ_SIZE, NULL, NULL, 0); 391 | if (!recv_cq) { 392 | std::cerr << "ibv_create_cq() failed" << std::endl; 393 | exit(1); 394 | } 395 | 396 | struct ibv_cq *send_cq = ibv_create_cq(context, BF_SEND_CQ_SIZE, NULL, NULL, 0); 397 | if (!send_cq) { 398 | std::cerr << "ibv_create_cq() failed" << std::endl; 399 | exit(1); 400 | } 401 | 402 | struct ibv_qp_init_attr qp_init_attr; 403 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 404 | qp_init_attr.send_cq = send_cq; 405 | qp_init_attr.recv_cq = recv_cq; 406 | qp_init_attr.qp_type = IBV_QPT_RC; 407 | qp_init_attr.cap.max_send_wr = NOTIFY_WQES_NUM; 408 | qp_init_attr.cap.max_recv_wr = 0; 409 | qp_init_attr.cap.max_send_sge = 1; 410 | qp_init_attr.cap.max_recv_sge = 0; 411 | //qp_init_attr.cap.max_inline_data = 32; 412 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 413 | if (!qp) { 414 | std::cerr << "ibv_create_qp() failed errno=" << errno << std::endl; 415 | exit(1); 416 | } 417 | 418 | struct ib_info_t server_info; 419 | int ret; 420 | ret = recv(sfd, &server_info, sizeof(struct ib_info_t), 0); 421 | if (ret < 0) { 422 | std::cerr << "recv" << std::endl; 423 | exit(1); 424 | } 425 | ib_resources->rmr_recv_key = server_info.mkey_data_buffer; 426 | ib_resources->rmr_recv_addr = server_info.addr_data_buffer; 427 | ib_resources->rmr_send_key = server_info.mkey_response_buffer; 428 | ib_resources->rmr_send_addr = server_info.addr_response_buffer; 429 | 430 | struct ibv_port_attr port_attr; 431 | ret = ibv_query_port(context, PORT_NUM, &port_attr); 432 | if (ret) { 433 | std::cerr << "ibv_query_port() failed" << std::endl; 434 | exit(1); 435 | } 436 | struct ib_info_t my_info; 437 | my_info.lid = port_attr.lid; 438 | my_info.qpn = qp->qp_num; 439 | int gid_index = get_gid_index(context); 440 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 441 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 442 | exit(1); 443 | } 444 | 445 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 446 | if (ret < 0) { 447 | perror("send"); 448 | std::cerr << "send" << std::endl; 449 | exit(1); 450 | } 451 | 452 | struct ibv_qp_attr qp_attr; 453 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 454 | qp_attr.qp_state = IBV_QPS_INIT; 455 | qp_attr.pkey_index = 0; 456 | qp_attr.port_num = PORT_NUM; 457 | qp_attr.qp_access_flags = 0; 458 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 459 | if (ret) { 460 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 461 | exit(1); 462 | } 463 | 464 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 465 | qp_attr.qp_state = IBV_QPS_RTR; 466 | qp_attr.path_mtu = IBV_MTU_4096; 467 | qp_attr.dest_qp_num = server_info.qpn; 468 | qp_attr.rq_psn = 0 ; 469 | qp_attr.max_dest_rd_atomic = 1; 470 | qp_attr.min_rnr_timer = 12; 471 | qp_attr.ah_attr.is_global = 1; 472 | qp_attr.ah_attr.grh.dgid = server_info.gid; 473 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 474 | qp_attr.ah_attr.grh.flow_label = 0; 475 | qp_attr.ah_attr.grh.hop_limit = 1; 476 | qp_attr.ah_attr.grh.traffic_class = 0; 477 | qp_attr.ah_attr.dlid = server_info.lid; 478 | qp_attr.ah_attr.sl = 0; 479 | qp_attr.ah_attr.src_path_bits = 0; 480 | qp_attr.ah_attr.port_num = PORT_NUM; 481 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU| IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 482 | if (ret) { 483 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret << std::endl; 484 | exit(1); 485 | } 486 | 487 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 488 | qp_attr.qp_state = IBV_QPS_RTS; 489 | qp_attr.sq_psn = 0; 490 | qp_attr.timeout = 14; 491 | qp_attr.retry_cnt = 7; 492 | qp_attr.rnr_retry = 7; 493 | qp_attr.max_rd_atomic = 1; 494 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 495 | if (ret) { 496 | std::cerr << "ibv_modify_qp() to RTS failed" << std::endl; 497 | exit(1); 498 | } 499 | 500 | ib_resources->context = context; 501 | ib_resources->pd = pd; 502 | ib_resources->qp = qp; 503 | ib_resources->recv_cq = recv_cq; 504 | ib_resources->send_cq = send_cq; 505 | ib_resources->lrecv_buf = recv_buf; 506 | ib_resources->lmr_recv = mr_recv; 507 | ib_resources->lsend_buf = send_buf; 508 | ib_resources->lmr_send = mr_send; 509 | ib_resources->posted_wqes = 0; 510 | ib_resources->resp_sent = 0; 511 | ib_resources->wrap_around = 0; 512 | ib_resources->update_wrap_around = false; 513 | 514 | // ib_resources->load_factor = 0; 515 | ib_resources->load_factor = (unsigned int*) malloc(sizeof(unsigned int) * _workers_num); 516 | for(int i = 0 ; i < _workers_num ; i++) { 517 | ib_resources->load_factor[i] = 0; 518 | } 519 | 520 | return ib_resources; 521 | } 522 | 523 | 524 | ib_resources_t* BFContext::setup_writeQP_to_Host(ib_resources_t* client_ib_resources, int sfd) { 525 | ibv_context* context = client_ib_resources->context; 526 | ibv_pd* pd = client_ib_resources->pd; 527 | 528 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 529 | 530 | struct ibv_mr *mr_recv = client_ib_resources->lmr_recv; 531 | char *recv_buf = client_ib_resources->lrecv_buf; 532 | 533 | struct ibv_cq *send_cq = ibv_create_cq(context, BF_SEND_CQ_SIZE, NULL, NULL, 0); 534 | if (!send_cq) { 535 | std::cerr << "ERROR: ibv_create_cq() failed" << std::endl; 536 | exit(1); 537 | } 538 | 539 | struct ibv_qp_init_attr qp_init_attr; 540 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 541 | qp_init_attr.send_cq = send_cq; 542 | qp_init_attr.recv_cq = send_cq; 543 | qp_init_attr.qp_type = IBV_QPT_RC; 544 | qp_init_attr.cap.max_send_wr = BF_MAX_SEND_WQES; 545 | qp_init_attr.cap.max_recv_wr = 0; 546 | qp_init_attr.cap.max_send_sge = 1; 547 | qp_init_attr.cap.max_recv_sge = 0; 548 | // qp_init_attr.cap.max_inline_data = 32; 549 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 550 | if (!qp) { 551 | std::cerr << "ibv_create_qp() failed errno=" << errno << std::endl; 552 | exit(1); 553 | } 554 | 555 | struct ibv_port_attr port_attr; 556 | int ret = ibv_query_port(context, PORT_NUM, &port_attr); 557 | if (ret) { 558 | std::cerr << "ibv_query_port() failed ret= " << ret << std::endl; 559 | exit(1); 560 | } 561 | 562 | struct ib_info_t my_info; 563 | my_info.lid = port_attr.lid; 564 | my_info.qpn = qp->qp_num; 565 | my_info.mkey_data_buffer = mr_recv->rkey; 566 | my_info.addr_data_buffer = (uintptr_t)mr_recv->addr; 567 | int gid_index = get_gid_index(context); 568 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 569 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 570 | exit(1); 571 | } 572 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 573 | if (ret < 0) { 574 | perror("setup_writeQP_to_Host send"); 575 | exit(1); 576 | } 577 | 578 | struct ib_info_t client_info; 579 | recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 580 | if (ret < 0) { 581 | perror("setup_writeQP_to_Host recv"); 582 | exit(1); 583 | } 584 | 585 | ib_resources->rmr_recv_key = client_info.mkey_data_buffer; 586 | ib_resources->rmr_recv_addr = client_info.addr_data_buffer; 587 | ib_resources->rmr_send_key = client_info.mkey_response_buffer; 588 | ib_resources->rmr_send_addr = client_info.addr_response_buffer; 589 | 590 | struct ibv_qp_attr qp_attr; 591 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 592 | qp_attr.qp_state = IBV_QPS_INIT; 593 | qp_attr.pkey_index = 0; 594 | qp_attr.port_num = PORT_NUM; 595 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; /* we'll allow client to RDMA write and read on this QP */ 596 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 597 | if (ret) { 598 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 599 | exit(1); 600 | } 601 | 602 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 603 | qp_attr.qp_state = IBV_QPS_RTR; 604 | qp_attr.path_mtu = IBV_MTU_4096; 605 | qp_attr.dest_qp_num = client_info.qpn; /* qp number of client */ 606 | qp_attr.rq_psn = 0 ; 607 | qp_attr.max_dest_rd_atomic = 1; /* max in-flight RDMA reads */ 608 | qp_attr.min_rnr_timer = 12; 609 | qp_attr.ah_attr.is_global = 1; 610 | qp_attr.ah_attr.grh.dgid = client_info.gid; 611 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 612 | qp_attr.ah_attr.grh.flow_label = 0; 613 | qp_attr.ah_attr.grh.hop_limit = 1; 614 | qp_attr.ah_attr.grh.traffic_class = 0; 615 | qp_attr.ah_attr.dlid = client_info.lid; /* LID (L2 Address) of client */ 616 | qp_attr.ah_attr.sl = 0; 617 | qp_attr.ah_attr.src_path_bits = 0; 618 | qp_attr.ah_attr.port_num = PORT_NUM; 619 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 620 | if (ret) { 621 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret << std::endl; 622 | exit(1); 623 | } 624 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 625 | qp_attr.qp_state = IBV_QPS_RTS; 626 | qp_attr.sq_psn = 0; 627 | qp_attr.timeout = 14; 628 | qp_attr.retry_cnt = 7; 629 | qp_attr.rnr_retry = 7; 630 | qp_attr.max_rd_atomic = 1; 631 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 632 | if (ret) { 633 | std::cerr << "bv_modify_qp() to RTS failed" << std::endl; 634 | exit(1); 635 | } 636 | 637 | ib_resources->context = context; 638 | ib_resources->pd = pd; 639 | ib_resources->qp = qp; 640 | ib_resources->recv_cq = send_cq; 641 | ib_resources->send_cq = send_cq; 642 | ib_resources->lrecv_buf = recv_buf; 643 | ib_resources->lmr_recv = mr_recv; 644 | 645 | return ib_resources; 646 | } 647 | 648 | 649 | 650 | ib_resources_t* BFContext::setup_readQP_from_Host(ib_resources_t* client_ib_resources, int sfd) { 651 | ibv_context* context = client_ib_resources->context; 652 | ibv_pd* pd = client_ib_resources->pd; 653 | 654 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 655 | 656 | struct ibv_mr *mr_recv = client_ib_resources->lmr_send; 657 | char *recv_buf = client_ib_resources->lsend_buf; 658 | 659 | struct ibv_cq *send_cq = ibv_create_cq(context, BF_SEND_CQ_SIZE, NULL, NULL, 0); 660 | if (!send_cq) { 661 | std::cerr << "ERROR: ibv_create_cq() failed" << std::endl; 662 | exit(1); 663 | } 664 | 665 | struct ibv_qp_init_attr qp_init_attr; 666 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 667 | qp_init_attr.send_cq = send_cq; 668 | qp_init_attr.recv_cq = send_cq; 669 | qp_init_attr.qp_type = IBV_QPT_RC; 670 | qp_init_attr.cap.max_send_wr = BF_MAX_SEND_WQES; 671 | qp_init_attr.cap.max_recv_wr = 0; 672 | qp_init_attr.cap.max_send_sge = 1; 673 | qp_init_attr.cap.max_recv_sge = 0; 674 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 675 | if (!qp) { 676 | std::cerr << "ibv_create_qp() failed errno=" << errno << std::endl; 677 | exit(1); 678 | } 679 | 680 | int ret; 681 | struct ibv_port_attr port_attr; 682 | ret = ibv_query_port(context, PORT_NUM, &port_attr); 683 | if (ret) { 684 | std::cerr << "ibv_query_port() failed ret= " << ret << std::endl; 685 | exit(1); 686 | } 687 | 688 | struct ib_info_t my_info; 689 | my_info.lid = port_attr.lid; 690 | my_info.qpn = qp->qp_num; 691 | my_info.mkey_data_buffer = mr_recv->rkey; 692 | my_info.addr_data_buffer = (uintptr_t)mr_recv->addr; 693 | int gid_index = get_gid_index(context); 694 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 695 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 696 | exit(1); 697 | } 698 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 699 | if (ret < 0) { 700 | perror("setup_dQP send"); 701 | exit(1); 702 | } 703 | 704 | struct ib_info_t client_info; 705 | recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 706 | if (ret < 0) { 707 | perror("recv"); 708 | exit(1); 709 | } 710 | 711 | ib_resources->rmr_recv_key = client_info.mkey_data_buffer; 712 | ib_resources->rmr_recv_addr = client_info.addr_data_buffer; 713 | ib_resources->rmr_send_key = client_info.mkey_response_buffer; 714 | ib_resources->rmr_send_addr = client_info.addr_response_buffer; 715 | 716 | struct ibv_qp_attr qp_attr; 717 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 718 | qp_attr.qp_state = IBV_QPS_INIT; 719 | qp_attr.pkey_index = 0; 720 | qp_attr.port_num = PORT_NUM; 721 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; // we'll allow client to RDMA write and read on this QP 722 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 723 | if (ret) { 724 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 725 | exit(1); 726 | } 727 | 728 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 729 | qp_attr.qp_state = IBV_QPS_RTR; 730 | qp_attr.path_mtu = IBV_MTU_4096; 731 | qp_attr.dest_qp_num = client_info.qpn; // qp number of client 732 | qp_attr.rq_psn = 0 ; 733 | qp_attr.max_dest_rd_atomic = 1; // max in-flight RDMA reads 734 | qp_attr.min_rnr_timer = 12; 735 | qp_attr.ah_attr.is_global = 1; 736 | qp_attr.ah_attr.grh.dgid = client_info.gid; 737 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 738 | qp_attr.ah_attr.grh.flow_label = 0; 739 | qp_attr.ah_attr.grh.hop_limit = 1; 740 | qp_attr.ah_attr.grh.traffic_class = 0; 741 | qp_attr.ah_attr.dlid = client_info.lid; // LID (L2 Address) of client 742 | qp_attr.ah_attr.sl = 0; 743 | qp_attr.ah_attr.src_path_bits = 0; 744 | qp_attr.ah_attr.port_num = PORT_NUM; 745 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 746 | if (ret) { 747 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret << std::endl; 748 | exit(1); 749 | } 750 | 751 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 752 | qp_attr.qp_state = IBV_QPS_RTS; 753 | qp_attr.sq_psn = 0; 754 | qp_attr.timeout = 14; 755 | qp_attr.retry_cnt = 7; 756 | qp_attr.rnr_retry = 7; 757 | qp_attr.max_rd_atomic = 1; 758 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 759 | if (ret) { 760 | std::cerr << "bv_modify_qp() to RTS failed" << std::endl; 761 | exit(1); 762 | } 763 | 764 | ib_resources->context = context; 765 | ib_resources->pd = pd; 766 | ib_resources->qp = qp; 767 | ib_resources->recv_cq = send_cq; 768 | ib_resources->send_cq = send_cq; 769 | ib_resources->lrecv_buf = recv_buf; 770 | ib_resources->lmr_recv = mr_recv; 771 | 772 | return ib_resources; 773 | } 774 | 775 | 776 | ib_resources_t* BFContext::setup_connection_with_client(CONNECTION_TYPE connection_type, const string& interface, unsigned int port) { 777 | struct ib_resources_t* client_ib_resources; 778 | switch(connection_type) { 779 | case RECV_UDP_CONNECTION: 780 | client_ib_resources = setup_recv_udp_connection_with_client(interface, port); 781 | break; 782 | case SEND_UDP_CONNECTION: 783 | client_ib_resources = setup_send_udp_connection_with_client(interface, port); 784 | break; 785 | /* case TCP_CONNECTION: 786 | client_ib_resources = setup_tcp_connection_with_client(interface); 787 | break; 788 | case IB_CONNECTION: 789 | client_ib_resources = setup_ib_connection_with_client(interface); 790 | break;*/ 791 | default: 792 | std::cerr << "Unknown Connection Type: " << connection_type << std::endl; 793 | exit(1); 794 | } 795 | return client_ib_resources; 796 | } 797 | 798 | 799 | ib_resources_t* BFContext::setup_recv_udp_connection_with_client(const string& interface, unsigned int udp_port) { 800 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 801 | int lfd; 802 | lfd = socket(AF_INET, SOCK_DGRAM, 0); 803 | fcntl(lfd, F_SETFL, O_NONBLOCK); 804 | if (lfd < 0) { 805 | std::cerr << "socket" << std::endl; 806 | exit(1); 807 | } 808 | 809 | struct sockaddr_in server_addr; 810 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 811 | server_addr.sin_family = AF_INET; 812 | server_addr.sin_addr.s_addr = INADDR_ANY; 813 | server_addr.sin_port = htons(udp_port); 814 | 815 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 816 | std::cerr << "bind" << std::endl; 817 | exit(1); 818 | } 819 | 820 | listen(lfd, 1); 821 | std::cout << "UDP Server is listening on port " << udp_port << std::endl; 822 | 823 | string device_name = ib_device_from_netdev(interface.c_str()); 824 | struct ibv_context *context = ibv_open_device_by_name(device_name); 825 | 826 | struct ibv_pd *pd = ibv_alloc_pd(context); 827 | if (!pd) { 828 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 829 | exit(1); 830 | } 831 | 832 | struct ibv_mr *mr_recv; 833 | char* recv_buf = (char*) malloc(BF_TOTAL_DATA_TO_HOST_SIZE); 834 | /*memset(recv_buf,0,BF_TOTAL_DATA_TO_HOST_SIZE); 835 | for(int i = 0 ; i < BF_MAX_RECV_WQES ; i++) { 836 | *(int*)(recv_buf + i * BF_C2H_MSG_SIZE + HOST_RECV_MSG_SIZE) = 1; 837 | }*/ 838 | mr_recv = ibv_reg_mr(pd, recv_buf, BF_TOTAL_DATA_TO_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 839 | if (!mr_recv) { 840 | std::cerr << "ibv_reg_mr() failed for recv_buf" << std::endl; 841 | exit(1); 842 | } 843 | 844 | ib_resources->client_fd = lfd; 845 | ib_resources->recv_buf_offset = 0; 846 | ib_resources->context = context; 847 | ib_resources->pd = pd; 848 | ib_resources->lrecv_buf = recv_buf; 849 | ib_resources->lmr_recv = mr_recv; 850 | 851 | return ib_resources; 852 | } 853 | 854 | 855 | 856 | ib_resources_t* BFContext::setup_send_udp_connection_with_client(const string& interface, unsigned int udp_port) { 857 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 858 | int lfd; 859 | lfd = socket(AF_INET, SOCK_DGRAM, 0); 860 | fcntl(lfd, F_SETFL, O_NONBLOCK); 861 | if (lfd < 0) { 862 | std::cerr << "socket" << std::endl; 863 | exit(1); 864 | } 865 | 866 | struct sockaddr_in server_addr; 867 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 868 | server_addr.sin_family = AF_INET; 869 | server_addr.sin_addr.s_addr = INADDR_ANY; 870 | server_addr.sin_port = htons(udp_port); 871 | 872 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 873 | std::cerr << "bind" << std::endl; 874 | exit(1); 875 | } 876 | 877 | listen(lfd, 1); 878 | std::cout << "UDP Server is listening on port " << udp_port << std::endl; 879 | 880 | string device_name = ib_device_from_netdev(interface.c_str()); 881 | struct ibv_context *context = ibv_open_device_by_name(device_name); 882 | 883 | struct ibv_pd *pd = ibv_alloc_pd(context); 884 | if (!pd) { 885 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 886 | exit(1); 887 | } 888 | 889 | struct ibv_mr *mr_send; 890 | char* send_buf = (char*) malloc(BF_TOTAL_DATA_FROM_HOST_SIZE); 891 | mr_send = ibv_reg_mr(pd, send_buf, BF_TOTAL_DATA_FROM_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 892 | if (!send_buf) { 893 | std::cerr << "ibv_reg_mr() failed for send_buf" << std::endl; 894 | exit(1); 895 | } 896 | 897 | ib_resources->client_fd = lfd; 898 | ib_resources->context = context; 899 | ib_resources->pd = pd; 900 | ib_resources->lsend_buf = send_buf; 901 | ib_resources->lmr_send = mr_send; 902 | 903 | return ib_resources; 904 | } 905 | 906 | 907 | 908 | /* 909 | ib_resources_t* BFContext::setup_udp_connection_with_client(const string& interface, unsigned int udp_port) { 910 | 911 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 912 | int lfd, sfd; 913 | lfd = socket(AF_INET, SOCK_DGRAM, 0); 914 | fcntl(lfd, F_SETFL, O_NONBLOCK); 915 | if (lfd < 0) { 916 | std::cerr << "socket" << std::endl; 917 | exit(1); 918 | } 919 | 920 | struct sockaddr_in server_addr; 921 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 922 | server_addr.sin_family = AF_INET; 923 | server_addr.sin_addr.s_addr = INADDR_ANY; 924 | server_addr.sin_port = htons(udp_port); 925 | 926 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 927 | std::cerr << "bind" << std::endl; 928 | exit(1); 929 | } 930 | 931 | listen(lfd, 1); 932 | 933 | std::cout << "UDP Server is waiting on port " << udp_port << ". Client can connect" << std::endl; 934 | sfd = lfd; 935 | 936 | string device_name = ib_device_from_netdev(interface.c_str()); 937 | struct ibv_context *context = ibv_open_device_by_name(device_name); 938 | 939 | struct ibv_pd *pd = ibv_alloc_pd(context); 940 | if (!pd) { 941 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 942 | exit(1); 943 | } 944 | 945 | struct ibv_mr *mr_recv; 946 | char* recv_buf = (char*) malloc(BF_TOTAL_DATA_TO_HOST_SIZE); 947 | memset(recv_buf,0,BF_TOTAL_DATA_TO_HOST_SIZE); 948 | for(int i = 0 ; i < BF_MAX_RECV_WQES ; i++) { 949 | *(int*)(recv_buf + i * BF_C2H_MSG_SIZE + HOST_RECV_MSG_SIZE) = 1; 950 | } 951 | mr_recv = ibv_reg_mr(pd, recv_buf, BF_TOTAL_DATA_TO_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 952 | if (!mr_recv) { 953 | std::cerr << "ibv_reg_mr() failed for recv_buf" << std::endl; 954 | exit(1); 955 | } 956 | 957 | 958 | struct ibv_mr *mr_send; 959 | char* send_buf = (char*) malloc(BF_TOTAL_DATA_FROM_HOST_SIZE); 960 | memset(recv_buf,0,BF_TOTAL_DATA_FROM_HOST_SIZE); 961 | mr_send = ibv_reg_mr(pd, send_buf, BF_TOTAL_DATA_FROM_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 962 | if (!send_buf) { 963 | std::cerr << "ibv_reg_mr() failed for send_buf" << std::endl; 964 | exit(1); 965 | } 966 | 967 | _client_sfd = sfd; 968 | _recv_buf_offset = 0; 969 | ib_resources->context = context; 970 | ib_resources->pd = pd; 971 | ib_resources->lrecv_buf = recv_buf; 972 | ib_resources->lmr_recv = mr_recv; 973 | ib_resources->lsend_buf = send_buf; 974 | ib_resources->lmr_send = mr_send; 975 | 976 | return ib_resources; 977 | } 978 | 979 | 980 | ib_resources_t* BFContext::setup_tcp_connection_with_client(const string& interface) { 981 | 982 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 983 | int lfd, sfd; 984 | lfd = socket(AF_INET, SOCK_STREAM, 0); 985 | if (lfd < 0) { 986 | std::cerr << "socket" << std::endl; 987 | exit(1); 988 | } 989 | 990 | int tcp_port = TCP_PORT_NUM; 991 | struct sockaddr_in server_addr; 992 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 993 | server_addr.sin_family = AF_INET; 994 | server_addr.sin_addr.s_addr = INADDR_ANY; 995 | server_addr.sin_port = htons(tcp_port); 996 | 997 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 998 | std::cerr << "bind" << std::endl; 999 | exit(1); 1000 | } 1001 | 1002 | listen(lfd, 1); 1003 | 1004 | std::cout << "TCP Server is waiting on port " << tcp_port << ". Client can connect" << std::endl; 1005 | sfd = accept(lfd, NULL, NULL); 1006 | if (sfd < 0) { 1007 | std::cerr << "accept" << std::endl; 1008 | exit(1); 1009 | } 1010 | std::cout << "client is connected" << std::endl; 1011 | 1012 | string device_name = ib_device_from_netdev(interface.c_str()); 1013 | struct ibv_context *context = ibv_open_device_by_name(device_name); 1014 | 1015 | struct ibv_pd *pd = ibv_alloc_pd(context); 1016 | if (!pd) { 1017 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 1018 | exit(1); 1019 | } 1020 | 1021 | struct ibv_mr *mr_recv; 1022 | char* recv_buf = (char*) malloc(BF_TOTAL_DATA_TO_HOST_SIZE); 1023 | mr_recv = ibv_reg_mr(pd, recv_buf, BF_TOTAL_DATA_TO_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 1024 | if (!mr_recv) { 1025 | std::cerr << "ibv_reg_mr() failed for recv_buf" << std::endl; 1026 | exit(1); 1027 | } 1028 | 1029 | struct ibv_mr *mr_send; 1030 | char* send_buf = (char*) malloc(BF_TOTAL_DATA_FROM_HOST_SIZE); 1031 | mr_send = ibv_reg_mr(pd, send_buf, BF_TOTAL_DATA_FROM_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 1032 | if (!send_buf) { 1033 | std::cerr << "ibv_reg_mr() failed for send_buf" << std::endl; 1034 | exit(1); 1035 | } 1036 | 1037 | _client_sfd = sfd; 1038 | _recv_buf_offset = 0; 1039 | ib_resources->context = context; 1040 | ib_resources->pd = pd; 1041 | ib_resources->lrecv_buf = recv_buf; 1042 | ib_resources->lmr_recv = mr_recv; 1043 | ib_resources->lsend_buf = send_buf; 1044 | ib_resources->lmr_send = mr_send; 1045 | 1046 | return ib_resources; 1047 | } 1048 | 1049 | 1050 | ib_resources_t* BFContext::setup_ib_connection_with_client(const string& interface) { 1051 | 1052 | struct ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 1053 | int lfd, sfd; 1054 | lfd = socket(AF_INET, SOCK_STREAM, 0); 1055 | if (lfd < 0) { 1056 | std::cerr << "socket" << std::endl; 1057 | exit(1); 1058 | } 1059 | 1060 | int tcp_port = TCP_PORT_NUM; 1061 | struct sockaddr_in server_addr; 1062 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 1063 | server_addr.sin_family = AF_INET; 1064 | server_addr.sin_addr.s_addr = INADDR_ANY; 1065 | server_addr.sin_port = htons(tcp_port); 1066 | 1067 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 1068 | std::cerr << "bind" << std::endl; 1069 | exit(1); 1070 | } 1071 | 1072 | listen(lfd, 1); 1073 | 1074 | std::cout << "IB Server is waiting on port " << tcp_port << ". Client can connect" << std::endl; 1075 | sfd = accept(lfd, NULL, NULL); 1076 | if (sfd < 0) { 1077 | std::cerr << "accept" << std::endl; 1078 | exit(1); 1079 | } 1080 | std::cout << "client is connected" << std::endl; 1081 | 1082 | string device_name = ib_device_from_netdev(interface.c_str()); 1083 | struct ibv_context *context = ibv_open_device_by_name(device_name); 1084 | 1085 | struct ibv_pd *pd = ibv_alloc_pd(context); 1086 | if (!pd) { 1087 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 1088 | exit(1); 1089 | } 1090 | 1091 | struct ibv_mr *mr_recv; 1092 | char* recv_buf = (char*) malloc(BF_TOTAL_DATA_TO_HOST_SIZE); 1093 | memset(recv_buf,0,BF_TOTAL_DATA_TO_HOST_SIZE); 1094 | for(int i = 0 ; i < BF_MAX_RECV_WQES ; i++) { 1095 | *(int*)(recv_buf + i * BF_C2H_MSG_SIZE + BF_C2H_MSG_SIZE - sizeof(unsigned int)) = 1; 1096 | } 1097 | mr_recv = ibv_reg_mr(pd, recv_buf, BF_TOTAL_DATA_TO_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 1098 | if (!mr_recv) { 1099 | std::cerr << "ibv_reg_mr() failed for recv_buf" << std::endl; 1100 | exit(1); 1101 | } 1102 | 1103 | struct ibv_mr *mr_send; 1104 | char* send_buf = (char*) malloc(BF_TOTAL_DATA_FROM_HOST_SIZE); 1105 | mr_send = ibv_reg_mr(pd, send_buf, BF_TOTAL_DATA_FROM_HOST_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 1106 | if (!send_buf) { 1107 | std::cerr << "ibv_reg_mr() failed for send_buf" << std::endl; 1108 | exit(1); 1109 | } 1110 | 1111 | struct ibv_cq *recv_cq = ibv_create_cq(context, BF_RECV_CQ_SIZE, NULL, NULL, 0); // create a CQ with place for 100 CQEs 1112 | if (!recv_cq) { 1113 | std::cerr << "ibv_create_cq() failed" << std::endl; 1114 | exit(1); 1115 | } 1116 | 1117 | struct ibv_cq *send_cq = ibv_create_cq(context, BF_SEND_CQ_SIZE, NULL, NULL, 0); // create a CQ with place for 100 CQEs 1118 | if (!send_cq) { 1119 | std::cerr << "ibv_create_cq() failed" << std::endl; 1120 | exit(1); 1121 | } 1122 | 1123 | struct ibv_qp_init_attr qp_init_attr; 1124 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 1125 | qp_init_attr.send_cq = send_cq; 1126 | qp_init_attr.recv_cq = recv_cq; 1127 | qp_init_attr.qp_type = IBV_QPT_RC; // we'll use RC transport service, which supports RDMA 1128 | qp_init_attr.cap.max_send_wr = BF_MAX_SEND_WQES; // max of 1 WQE in-flight in SQ. that's enough for us 1129 | qp_init_attr.cap.max_recv_wr = BF_MAX_RECV_WQES; // max of 8 WQE's in-flight in RQ. that's more than enough for us 1130 | qp_init_attr.cap.max_send_sge = 1; // 1 SGE in each send WQE 1131 | qp_init_attr.cap.max_recv_sge = 1; // 1 SGE in each recv WQE 1132 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 1133 | if (!qp) { 1134 | std::cerr << "ibv_create_qp() failed" << std::endl; 1135 | exit(1); 1136 | } 1137 | 1138 | int ret; 1139 | struct ibv_port_attr port_attr; 1140 | ret = ibv_query_port(context, PORT_NUM, &port_attr); 1141 | if (ret) { 1142 | std::cerr << "ERROR: ibv_query_port() failed ret= " << ret << std::endl; 1143 | exit(1); 1144 | } 1145 | 1146 | struct ib_info_t my_info; 1147 | my_info.lid = port_attr.lid; 1148 | my_info.qpn = qp->qp_num; 1149 | my_info.mkey_data_buffer = mr_recv->rkey; 1150 | my_info.addr_data_buffer = (uintptr_t)mr_recv->addr; 1151 | my_info.mkey_response_buffer = mr_send->rkey; 1152 | my_info.addr_response_buffer = (uintptr_t)mr_send->addr; 1153 | int gid_index = get_gid_index(context); 1154 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 1155 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 1156 | exit(1); 1157 | } 1158 | 1159 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 1160 | if (ret < 0) { 1161 | perror("send"); 1162 | std::cerr << "send" << std::endl; 1163 | exit(1); 1164 | } 1165 | 1166 | struct ib_info_t client_info; 1167 | ret = recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 1168 | if (ret < 0) { 1169 | std::cerr << "recv" << std::endl; 1170 | exit(1); 1171 | } 1172 | 1173 | ib_resources->rmr_recv_key = client_info.mkey_data_buffer; 1174 | ib_resources->rmr_recv_addr = client_info.addr_data_buffer; 1175 | ib_resources->rmr_send_key = client_info.mkey_response_buffer; 1176 | ib_resources->rmr_send_addr = client_info.addr_response_buffer; 1177 | 1178 | close(sfd); 1179 | close(lfd); 1180 | 1181 | struct ibv_qp_attr qp_attr; 1182 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 1183 | qp_attr.qp_state = IBV_QPS_INIT; 1184 | qp_attr.pkey_index = 0; 1185 | qp_attr.port_num = PORT_NUM; 1186 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; // we'll allow client to RDMA write and read on this QP 1187 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 1188 | if (ret) { 1189 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 1190 | exit(1); 1191 | } 1192 | 1193 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 1194 | qp_attr.qp_state = IBV_QPS_RTR; 1195 | qp_attr.path_mtu = IBV_MTU_4096; 1196 | qp_attr.dest_qp_num = client_info.qpn; // qp number of client 1197 | qp_attr.rq_psn = 0 ; 1198 | qp_attr.max_dest_rd_atomic = 1; // max in-flight RDMA reads 1199 | qp_attr.min_rnr_timer = 12; 1200 | qp_attr.ah_attr.is_global = 1; 1201 | qp_attr.ah_attr.grh.dgid = client_info.gid; 1202 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 1203 | qp_attr.ah_attr.grh.flow_label = 0; 1204 | qp_attr.ah_attr.grh.hop_limit = 1; 1205 | qp_attr.ah_attr.grh.traffic_class = 0; 1206 | qp_attr.ah_attr.dlid = client_info.lid; // LID (L2 Address) of client 1207 | qp_attr.ah_attr.sl = 0; 1208 | qp_attr.ah_attr.src_path_bits = 0; 1209 | qp_attr.ah_attr.port_num = PORT_NUM; 1210 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 1211 | if (ret) { 1212 | std::cerr << "bv_modify_qp() to RTR failed ret= " << ret << std::endl; 1213 | exit(1); 1214 | } 1215 | 1216 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 1217 | qp_attr.qp_state = IBV_QPS_RTS; 1218 | qp_attr.sq_psn = 0; 1219 | qp_attr.timeout = 14; 1220 | qp_attr.retry_cnt = 7; 1221 | qp_attr.rnr_retry = 7; 1222 | qp_attr.max_rd_atomic = 1; 1223 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 1224 | if (ret) { 1225 | std::cerr << "ibv_modify_qp() to RTS failed" << std::endl; 1226 | exit(1); 1227 | } 1228 | 1229 | ib_resources->context = context; 1230 | ib_resources->pd = pd; 1231 | ib_resources->qp = qp; 1232 | ib_resources->recv_cq = recv_cq; 1233 | ib_resources->send_cq = send_cq; 1234 | ib_resources->lrecv_buf = recv_buf; 1235 | ib_resources->lmr_recv = mr_recv; 1236 | ib_resources->lsend_buf = send_buf; 1237 | ib_resources->lmr_send = mr_send; 1238 | 1239 | return ib_resources; 1240 | } 1241 | 1242 | */ 1243 | struct send_wr_t* BFContext::prepare_send_qps(ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources) { 1244 | 1245 | struct send_wr_t* send_wr = (struct send_wr_t*) malloc(sizeof(send_wr_t)); 1246 | 1247 | send_wr->read_sg = (ibv_sge*)malloc(sizeof(ibv_sge)); 1248 | send_wr->read_wr = (ibv_send_wr*)malloc(sizeof(ibv_send_wr)); 1249 | memset(send_wr->read_sg, 0, sizeof(struct ibv_sge)); 1250 | memset(send_wr->read_wr, 0, sizeof(struct ibv_send_wr)); 1251 | 1252 | send_wr->read_sg->addr = (uintptr_t)notify_ib_resources->lmr_recv->addr; 1253 | send_wr->read_sg->length = 2 * _workers_num * sizeof(unsigned int); 1254 | send_wr->read_sg->lkey = notify_ib_resources->lmr_recv->lkey; 1255 | 1256 | send_wr->read_wr->wr_id = 0; //This could be a problem when we need to debug 1257 | send_wr->read_wr->sg_list = send_wr->read_sg; 1258 | send_wr->read_wr->num_sge = 1; 1259 | send_wr->read_wr->opcode = IBV_WR_RDMA_READ; 1260 | send_wr->read_wr->send_flags = IBV_SEND_SIGNALED; 1261 | send_wr->read_wr->wr.rdma.remote_addr = notify_ib_resources->rmr_send_addr; 1262 | send_wr->read_wr->wr.rdma.rkey = notify_ib_resources->rmr_send_key; 1263 | 1264 | send_wr->write_sg = (ibv_sge*)malloc(sizeof(ibv_sge)); 1265 | send_wr->write_wr = (ibv_send_wr*)malloc(sizeof(ibv_send_wr)); 1266 | memset(send_wr->write_sg, 0, sizeof(struct ibv_sge)); 1267 | memset(send_wr->write_wr, 0, sizeof(struct ibv_send_wr)); 1268 | 1269 | send_wr->write_sg->addr = (uintptr_t)notify_ib_resources->lmr_send->addr + _workers_num * sizeof(unsigned int); 1270 | send_wr->write_sg->length = _workers_num * sizeof(unsigned int); 1271 | send_wr->write_sg->lkey = notify_ib_resources->lmr_send->lkey; 1272 | 1273 | send_wr->write_wr->wr_id = 0; //This could be a problem when we need to debug 1274 | send_wr->write_wr->sg_list = send_wr->write_sg; 1275 | send_wr->write_wr->num_sge = 1; 1276 | send_wr->write_wr->opcode = IBV_WR_RDMA_WRITE; 1277 | send_wr->write_wr->send_flags = IBV_SEND_SIGNALED; 1278 | send_wr->write_wr->wr.rdma.remote_addr = notify_ib_resources->rmr_recv_addr; 1279 | send_wr->write_wr->wr.rdma.rkey = notify_ib_resources->rmr_recv_key; 1280 | 1281 | 1282 | send_wr->rdma_read_sg_list = (ibv_sge*) malloc(BF_MAX_SEND_WQES * sizeof(ibv_sge)); 1283 | memset(send_wr->rdma_read_sg_list, 0, BF_MAX_SEND_WQES * sizeof(struct ibv_sge)); 1284 | send_wr->rdma_read_wr_list = (ibv_send_wr*)malloc(BF_MAX_SEND_WQES * sizeof(ibv_send_wr)); 1285 | memset(send_wr->rdma_read_wr_list, 0, BF_MAX_SEND_WQES * sizeof(struct ibv_send_wr)); 1286 | 1287 | for(int i = 0 ; i < BF_MAX_SEND_WQES ; i++) { 1288 | send_wr->rdma_read_sg_list[i].addr = (uintptr_t)client_ib_resources->lmr_send->addr + i * BF_H2C_MSG_SIZE; 1289 | send_wr->rdma_read_sg_list[i].length = BF_H2C_MSG_SIZE; 1290 | send_wr->rdma_read_sg_list[i].lkey = client_ib_resources->lmr_send->lkey; 1291 | } 1292 | 1293 | for (int i = 0; i < BF_MAX_SEND_WQES; i++) { 1294 | send_wr->rdma_read_wr_list[i].wr_id = i; 1295 | send_wr->rdma_read_wr_list[i].sg_list = &(send_wr->rdma_read_sg_list[i]); 1296 | send_wr->rdma_read_wr_list[i].num_sge = 1; 1297 | send_wr->rdma_read_wr_list[i].opcode = IBV_WR_RDMA_READ; 1298 | send_wr->rdma_read_wr_list[i].send_flags = IBV_SEND_SIGNALED; 1299 | send_wr->rdma_read_wr_list[i].wr.rdma.remote_addr = host_ib_resources->rmr_send_addr + i * BF_H2C_MSG_SIZE; 1300 | send_wr->rdma_read_wr_list[i].wr.rdma.rkey = host_ib_resources->rmr_send_key; 1301 | } 1302 | 1303 | return send_wr; 1304 | } 1305 | 1306 | struct recv_wr_t* BFContext::prepare_recv_qps(ib_resources_t* host_ib_resources, ib_resources_t* client_ib_resources) { 1307 | struct recv_wr_t* recv_wr = (struct recv_wr_t*) malloc(sizeof(recv_wr_t)); 1308 | recv_wr->recv_sg_list = (ibv_sge*) malloc(BF_MAX_RECV_WQES * sizeof(ibv_sge)); 1309 | memset(recv_wr->recv_sg_list, 0, BF_MAX_RECV_WQES * sizeof(struct ibv_sge)); 1310 | 1311 | for (int i = 0; i < BF_MAX_RECV_WQES; i++) { 1312 | recv_wr->recv_sg_list[i].addr = (uintptr_t)client_ib_resources->lmr_recv->addr + i * BF_C2H_MSG_SIZE; 1313 | recv_wr->recv_sg_list[i].length = BF_C2H_MSG_SIZE; 1314 | recv_wr->recv_sg_list[i].lkey = client_ib_resources->lmr_recv->lkey; 1315 | } 1316 | 1317 | recv_wr->rdma_write_wr_list = (ibv_send_wr*)malloc(BF_MAX_RECV_WQES * sizeof(ibv_send_wr)); 1318 | memset(recv_wr->rdma_write_wr_list, 0, BF_MAX_RECV_WQES * sizeof(struct ibv_send_wr)); 1319 | 1320 | for (int i = 0; i < BF_MAX_RECV_WQES; i++) { 1321 | recv_wr->rdma_write_wr_list[i].wr_id = i; 1322 | recv_wr->rdma_write_wr_list[i].sg_list = &(recv_wr->recv_sg_list[i]); /* we could have used response_sg_list[i] */ 1323 | recv_wr->rdma_write_wr_list[i].num_sge = 1; 1324 | recv_wr->rdma_write_wr_list[i].opcode = IBV_WR_RDMA_WRITE; 1325 | recv_wr->rdma_write_wr_list[i].send_flags = IBV_SEND_SIGNALED; 1326 | recv_wr->rdma_write_wr_list[i].wr.rdma.remote_addr = host_ib_resources->rmr_recv_addr + i * BF_C2H_MSG_SIZE; 1327 | recv_wr->rdma_write_wr_list[i].wr.rdma.rkey = host_ib_resources->rmr_recv_key; 1328 | } 1329 | return recv_wr; 1330 | } 1331 | 1332 | /* 1333 | int BFContext::get_host_sfd() { 1334 | return _host_sfd; 1335 | } 1336 | 1337 | int BFContext::get_client_sfd() { 1338 | return _client_sfd; 1339 | } 1340 | 1341 | 1342 | ib_resources_t* BFContext::get_nQP_ib_resources() { 1343 | return host_notify_ib_resources; 1344 | } 1345 | 1346 | 1347 | void BFContext::set_nQP_ib_resources(ib_resources_t* ib_resources) { 1348 | host_notify_ib_resources = ib_resources; 1349 | } 1350 | */ 1351 | 1352 | BFContext::BFContext(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, unsigned int workers_num) : _connection_type(connection_type), _host_port_num(host_port_num), _client_port_num(client_port_num), _workers_num(workers_num) { 1353 | switch(_connection_type) { 1354 | case UDP_CONNECTION: 1355 | _recv_connection_type = RECV_UDP_CONNECTION; 1356 | _send_connection_type = SEND_UDP_CONNECTION; 1357 | break; 1358 | default: 1359 | std::cout << "unknown connection type " << connection_type << std::endl; 1360 | } 1361 | }; 1362 | 1363 | 1364 | void BFContext::run_all() { 1365 | client_md client_md_rbuf[BF_MAX_RECV_WQES]; 1366 | memset(client_md_rbuf, 0, BF_MAX_RECV_WQES * sizeof(client_md)); 1367 | 1368 | thread tsend(&BFContext::send_thread, this, _connection_type, _host_port_num, _client_port_num + 50, client_md_rbuf); 1369 | recv_thread(_connection_type, _host_port_num, _client_port_num, client_md_rbuf); 1370 | 1371 | tsend.join(); 1372 | } 1373 | 1374 | BFContext::~BFContext() {} 1375 | 1376 | /* 1377 | BFContext::BFContext(bool first_connection, bool last_connection, unsigned int connection_base_id, unsigned int connection_id, CONNECTION_TYPE connection_type, unsigned int connections_num, int sfd) : _first_connection(first_connection), _last_connection(last_connection), _connection_base_id(connection_base_id), _connection_id(connection_id), _connection_type(connection_type), _connections_num(connections_num) { 1378 | 1379 | std::cout << "Connection type: " << _connection_type << std::endl; 1380 | std::cout << "wait for client to connect in order to create QPs between BlueField <==> Host" << std::endl; 1381 | client_ib_resources = setup_connection_with_client("enp3s0f0", UDP_PORT_NUM + _connection_base_id + _connection_id); 1382 | std::cout << "Client is connected" << std::endl; 1383 | 1384 | _last_wr_id = 1; 1385 | _load_factor = 0; 1386 | 1387 | _host_sfd = sfd; 1388 | if(_first_connection) { 1389 | _host_sfd = socket(AF_INET, SOCK_STREAM, 0); 1390 | if (_host_sfd < 0) { 1391 | std::cerr << "socket" << std::endl; 1392 | exit(1); 1393 | } 1394 | struct sockaddr_in server_addr; 1395 | server_addr.sin_addr.s_addr = inet_addr("192.168.0.20"); 1396 | server_addr.sin_family = AF_INET; 1397 | server_addr.sin_port = htons(TCP_PORT_NUM + _connection_base_id + _connection_id); 1398 | if (connect(_host_sfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 1399 | std::cerr << "connect" << std::endl; 1400 | exit(1); 1401 | } 1402 | std::cout << "BlueField is connected to Host" << std::endl; 1403 | } 1404 | 1405 | if(_first_connection) { 1406 | host_notify_ib_resources = setup_nQP_Host(_host_sfd); 1407 | std::cout << "Host notify QP is established" << std::endl; 1408 | } else { 1409 | host_notify_ib_resources = NULL; 1410 | std::cout << "Host notify QP is already established" << std::endl; 1411 | } 1412 | 1413 | host_data_ib_resources = setup_dQP_Host(_host_sfd); 1414 | std::cout << "Host data QP is established" << std::endl; 1415 | 1416 | 1417 | if(_last_connection) { //last one 1418 | close(_host_sfd); 1419 | } 1420 | 1421 | prepare_qps(); 1422 | 1423 | } 1424 | 1425 | 1426 | BFContext::~BFContext() { 1427 | close(_client_sfd); 1428 | ibv_destroy_qp(client_ib_resources->qp); 1429 | ibv_destroy_qp(host_data_ib_resources->qp); 1430 | if(host_notify_ib_resources != NULL) { 1431 | ibv_destroy_qp(host_notify_ib_resources->qp); 1432 | ibv_destroy_cq(host_notify_ib_resources->recv_cq); 1433 | ibv_destroy_cq(host_notify_ib_resources->send_cq); 1434 | ibv_dereg_mr(host_notify_ib_resources->lmr_recv); 1435 | ibv_dereg_mr(host_notify_ib_resources->lmr_send); 1436 | free(host_notify_ib_resources->lrecv_buf); 1437 | free(host_notify_ib_resources->lsend_buf); 1438 | free(host_notify_ib_resources); 1439 | free(read_wr); 1440 | free(read_sg); 1441 | free(write_sg); 1442 | free(write_wr); 1443 | } 1444 | ibv_destroy_cq(client_ib_resources->recv_cq); 1445 | ibv_destroy_cq(host_data_ib_resources->recv_cq); 1446 | ibv_destroy_cq(client_ib_resources->send_cq); 1447 | ibv_destroy_cq(host_data_ib_resources->send_cq); 1448 | ibv_dereg_mr(client_ib_resources->lmr_recv); 1449 | ibv_dereg_mr(client_ib_resources->lmr_send); 1450 | free(client_ib_resources->lrecv_buf); 1451 | free(client_ib_resources->lsend_buf); 1452 | ibv_dealloc_pd(client_ib_resources->pd); 1453 | ibv_close_device(client_ib_resources->context); 1454 | free(client_ib_resources); 1455 | free(host_data_ib_resources); 1456 | 1457 | free(recv_sg_list); 1458 | free(recv_wr_list); 1459 | free(rdma_read_sg_list); 1460 | free(rdma_read_wr_list); 1461 | free(response_wr_list); 1462 | free(rdma_write_wr_list); 1463 | } 1464 | */ 1465 | 1466 | 1467 | void BFContext::recv_thread(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, client_md* client_md_rbuf) { 1468 | std::cout << "*** recv_thread ***" << std::endl; 1469 | 1470 | std::cout << "Client connection type: " << connection_type << std::endl; 1471 | std::cout << "wait for client to connect in order to create QPs between BlueField <==> Host" << std::endl; 1472 | ib_resources_t* client_ib_resources = setup_connection_with_client(_recv_connection_type, "enp3s0f0", client_port_num); 1473 | 1474 | std::cout << "Connect to Host" << std::endl; 1475 | int host_fd = socket(AF_INET, SOCK_STREAM, 0); 1476 | if (host_fd < 0) { 1477 | perror("socket"); 1478 | exit(1); 1479 | } 1480 | 1481 | struct sockaddr_in server_addr; 1482 | server_addr.sin_addr.s_addr = inet_addr("192.168.0.20"); 1483 | server_addr.sin_family = AF_INET; 1484 | server_addr.sin_port = htons(host_port_num); 1485 | if (connect(host_fd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 1486 | perror("connect"); 1487 | exit(1); 1488 | } 1489 | std::cout << "BlueField is connected to Host,Data can be exchanged." << std::endl; 1490 | 1491 | ib_resources_t* host_ib_resources = setup_writeQP_to_Host(client_ib_resources, host_fd); 1492 | std::cout << "Host write data QP is established" << std::endl; 1493 | 1494 | std::cout << "closing connection with Host" << std::endl; 1495 | close(host_fd); 1496 | 1497 | recv_wr_t* recv_wr = prepare_recv_qps(host_ib_resources, client_ib_resources); 1498 | 1499 | recv_loop(client_md_rbuf, host_ib_resources, client_ib_resources, recv_wr->rdma_write_wr_list); 1500 | //clean before exit 1501 | } 1502 | 1503 | 1504 | inline void BFContext::mark_owner_int(int* owner_int) { 1505 | *owner_int = 1; 1506 | } 1507 | 1508 | 1509 | inline bool BFContext::can_push_to_host(client_md* client_md_rbuf, unsigned int rbuf_index) { 1510 | bool ret = !client_md_rbuf[rbuf_index]._valid; 1511 | // if(ret == false) std::cout << "recv_thread rbuf_index " << rbuf_index << std::endl; 1512 | return ret; 1513 | } 1514 | 1515 | 1516 | inline int BFContext::recvfrom_client(client_md* client_md_rbuf, ib_resources_t* client_ib_resources, unsigned int rbuf_index) { 1517 | char* recv_buf = client_ib_resources->lrecv_buf + client_ib_resources->recv_buf_offset * BF_C2H_MSG_SIZE; 1518 | int ret = recvfrom(client_ib_resources->client_fd, recv_buf, HOST_RECV_MSG_SIZE, MSG_WAITALL, (struct sockaddr *)&(client_md_rbuf[rbuf_index]._client_addr), &(client_md_rbuf[rbuf_index]._client_addr_len)); 1519 | if(ret == -1) { 1520 | return 0; 1521 | } 1522 | /* std::cout << "recevied packet rbuf_index " << std::endl; 1523 | for(int i = 0 ; i < ret ; i++) { 1524 | printf("%x ",recv_buf[i]); 1525 | } 1526 | printf("\n");*/ 1527 | assert(ret == HOST_RECV_MSG_SIZE); 1528 | // std::cout << "recv packet" << std::endl; 1529 | #ifdef __MEASURE_GPU_RTT__ 1530 | client_md_rbuf[rbuf_index]._time_stamp = get_time_msec(); 1531 | #endif 1532 | client_md_rbuf[rbuf_index]._valid = true; 1533 | // std::cout << "assign to rbuf_index " << rbuf_index << std::endl; 1534 | mark_owner_int((int*)(recv_buf + ret)); 1535 | 1536 | client_ib_resources->recv_buf_offset += 1; 1537 | if(client_ib_resources->recv_buf_offset == BF_MAX_RECV_WQES) { 1538 | client_ib_resources->recv_buf_offset = 0; 1539 | } 1540 | return 1; 1541 | } 1542 | 1543 | 1544 | int BFContext::poll_udp_request_from_client(client_md* client_md_rbuf, ib_resources_t* client_ib_resources, unsigned int rbuf_index) { 1545 | if(!can_push_to_host(client_md_rbuf, rbuf_index)) { 1546 | return 0; 1547 | } 1548 | 1549 | return recvfrom_client(client_md_rbuf, client_ib_resources, rbuf_index); 1550 | } 1551 | 1552 | 1553 | static unsigned int _worker_id = 0; 1554 | inline void BFContext::inc_worker_id() { 1555 | _worker_id++; 1556 | if(_worker_id == _workers_num){ 1557 | _worker_id = 0; 1558 | } 1559 | } 1560 | inline unsigned int BFContext::get_free_worker() { 1561 | return _worker_id; 1562 | } 1563 | 1564 | void BFContext::recv_loop(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* client_ib_resources, ibv_send_wr* rdma_write_wr_list) { 1565 | int rbuf_index[_workers_num]; 1566 | for(int i = 0 ; i < _workers_num ; i++) { 1567 | rbuf_index[i] = i; 1568 | } 1569 | while(true) { 1570 | unsigned int worker_id = get_free_worker(); 1571 | if(poll_request_from_client(client_md_rbuf, client_ib_resources, rbuf_index[worker_id]) == 0) { 1572 | continue; 1573 | } 1574 | copy_data_to_host(host_ib_resources,rbuf_index[worker_id], rdma_write_wr_list); 1575 | rbuf_index[worker_id] = rbuf_index[worker_id] + _workers_num; 1576 | if(rbuf_index[worker_id] >= BF_MAX_RECV_WQES) { 1577 | rbuf_index[worker_id] = rbuf_index[worker_id] % BF_MAX_RECV_WQES; 1578 | } 1579 | inc_worker_id(); 1580 | } 1581 | } 1582 | 1583 | 1584 | void BFContext::send_thread(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, client_md* client_md_rbuf) { 1585 | sleep(1); 1586 | std::cout << "*** send_thread ***" << std::endl; 1587 | 1588 | std::cout << "Client connection type: " << connection_type << std::endl; 1589 | std::cout << "BF responses to clients on port " << client_port_num << std::endl; 1590 | ib_resources_t* client_ib_resources = setup_connection_with_client(_send_connection_type, "enp3s0f0", client_port_num); 1591 | 1592 | std::cout << "Connect to Host" << std::endl; 1593 | int host_fd = socket(AF_INET, SOCK_STREAM, 0); 1594 | if (host_fd < 0) { 1595 | perror("socket"); 1596 | exit(1); 1597 | } 1598 | 1599 | struct sockaddr_in server_addr; 1600 | server_addr.sin_addr.s_addr = inet_addr("192.168.0.20"); 1601 | server_addr.sin_family = AF_INET; 1602 | server_addr.sin_port = htons(host_port_num); 1603 | if (connect(host_fd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 1604 | perror("connect"); 1605 | exit(1); 1606 | } 1607 | std::cout << "BlueField is connected to Host,Data can be exchanged." << std::endl; 1608 | 1609 | ib_resources_t* host_ib_resources = setup_readQP_from_Host(client_ib_resources, host_fd); 1610 | std::cout << "Host read data QP is established" << std::endl; 1611 | ib_resources_t* notify_ib_resources = setup_notifyQP_from_Host(client_ib_resources, host_fd); 1612 | std::cout << "Host notify QP is established" << std::endl; 1613 | 1614 | std::cout << "closing connection with Host" << std::endl; 1615 | close(host_fd); 1616 | 1617 | send_wr_t* send_wr = prepare_send_qps(host_ib_resources, notify_ib_resources, client_ib_resources); 1618 | 1619 | send_loop(client_md_rbuf, host_ib_resources, notify_ib_resources, client_ib_resources, send_wr); 1620 | //clean before exit 1621 | } 1622 | 1623 | 1624 | void BFContext::send_loop(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr) { 1625 | unsigned int wr_id[_workers_num]; 1626 | unsigned int last_wr_id[_workers_num]; 1627 | for(int i = 0 ; i < _workers_num ; i++) { 1628 | wr_id[i] = BF_MAX_SEND_WQES - _workers_num + i; 1629 | last_wr_id[i] = wr_id[i]; 1630 | } 1631 | while(true) { 1632 | double add_delay = get_time_msec(); //do not delete/ somehow it improves the performance! 1633 | for(int i = 0 ; i < _workers_num ; i++) { 1634 | bool should_send = pull_notification_from_host(notify_ib_resources, send_wr->read_wr, &wr_id[i], i); 1635 | // if(should_send) std::cout << "worker_id = " << i << " send id " << wr_id[i] << " wrap_around " << notify_ib_resources->wrap_around << std::endl; 1636 | send_response(client_md_rbuf, host_ib_resources, notify_ib_resources, client_ib_resources, send_wr, wr_id[i], last_wr_id[i], i,true); 1637 | last_wr_id[i] = wr_id[i]; 1638 | } 1639 | send_response(client_md_rbuf, host_ib_resources, notify_ib_resources, client_ib_resources, send_wr, 0, 0, 0, false); 1640 | 1641 | /* 1642 | // std::cout << "resp_sent " << notify_ib_resources->resp_sent << std::endl; 1643 | if(notify_ib_resources->resp_sent >= RECV_WQES_NUM/2) { 1644 | // std::cout << "notify host resp_sent " << notify_ib_resources->resp_sent << std::endl; 1645 | notify_host(notify_ib_resources, send_wr->write_wr); 1646 | } 1647 | */ 1648 | } 1649 | } 1650 | 1651 | -------------------------------------------------------------------------------- /bf/bf_context.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #ifndef __BF_CONTEXT_H__ 30 | #define __BF_CONTEXT_H__ 31 | 32 | #include "../common/setup.hpp" 33 | 34 | #include 35 | using namespace std; 36 | 37 | 38 | typedef enum { 39 | UDP_CONNECTION = 0, 40 | RECV_UDP_CONNECTION = 1, 41 | SEND_UDP_CONNECTION = 2, 42 | TCP_CONNECTION = 3, 43 | IB_CONNECTION = 4 44 | } CONNECTION_TYPE; 45 | 46 | struct recv_wr_t; 47 | struct send_wr_t; 48 | struct client_md; 49 | 50 | class BFContext { 51 | /* int _host_sfd; 52 | bool _first_connection; 53 | bool _last_connection; 54 | unsigned int _connection_base_id; 55 | unsigned int _connection_id;*/ 56 | unsigned int _workers_num; 57 | CONNECTION_TYPE _connection_type; 58 | CONNECTION_TYPE _recv_connection_type; 59 | CONNECTION_TYPE _send_connection_type; 60 | unsigned int _host_port_num; 61 | unsigned int _client_port_num; 62 | /* 63 | unsigned int _last_wr_id; 64 | unsigned int _load_factor; 65 | 66 | // FOR TCP/UDP connections 67 | int _client_sfd; 68 | struct sockaddr_in _client_addr; 69 | socklen_t _client_addr_len; 70 | unsigned int _recv_buf_offset; 71 | 72 | ib_resources_t* client_ib_resources; 73 | ib_resources_t* host_notify_ib_resources; 74 | ib_resources_t* host_data_ib_resources; 75 | 76 | unsigned int polled_wqes; 77 | unsigned int next_client_wqes_batch_id; 78 | ibv_recv_wr* recv_wr_list_batch[BF_NUM_OF_WQE_LISTS]; 79 | ibv_recv_wr* recv_wr_list; 80 | ibv_sge* recv_sg_list; 81 | ibv_sge* read_sg; 82 | ibv_send_wr* read_wr; 83 | ibv_sge* write_sg; 84 | ibv_send_wr* write_wr; 85 | ibv_sge* rdma_read_sg_list; 86 | ibv_send_wr* rdma_read_wr_list; 87 | ibv_send_wr* rdma_write_wr_list; 88 | ibv_send_wr* response_wr_list; 89 | */ 90 | ib_resources_t* setup_connection_with_client(CONNECTION_TYPE connection_type, const string& interface, unsigned int port); 91 | // ib_resources_t* setup_ib_connection_with_client(const string& interface); 92 | ib_resources_t* setup_recv_udp_connection_with_client(const string& interface, unsigned int udp_port); 93 | ib_resources_t* setup_send_udp_connection_with_client(const string& interface, unsigned int udp_port); 94 | //ib_resources_t* setup_udp_connection_with_client(const string&, unsigned int); 95 | // ib_resources_t* setup_tcp_connection_with_client(const string& interface); 96 | 97 | int poll_request_from_client(client_md* client_md_rbuf, ib_resources_t* ib_resources, unsigned int rbuf_index = 0); 98 | // int poll_ib_request_from_client(); 99 | int poll_udp_request_from_client(client_md* client_md_rbuf, ib_resources_t* ib_resources, unsigned int rbuf_index); 100 | // int poll_tcp_request_from_client(); 101 | 102 | inline void send_response(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr, unsigned int wr_id, unsigned int last_wr_id, unsigned int worker_id,bool post_rdma); 103 | inline void send_udp_response(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr, unsigned int wr_id, unsigned int last_wr_id, unsigned int worker_id,bool post_rdma); 104 | // void send_ib_response(unsigned int wr_id); 105 | // void send_tcp_response(unsigned int wr_id); 106 | 107 | ib_resources_t* setup_writeQP_to_Host(ib_resources_t* client_ib_resources, int sfd); 108 | ib_resources_t* setup_notifyQP_from_Host(ib_resources_t* client_ib_resources, int sfd); 109 | ib_resources_t* setup_readQP_from_Host(ib_resources_t* client_ib_resources, int sfd); 110 | 111 | struct recv_wr_t* prepare_recv_qps(ib_resources_t* host_ib_resources, ib_resources_t* client_ib_resources); 112 | struct send_wr_t* prepare_send_qps(ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources); 113 | 114 | // void post_client_wqes_batch(); 115 | void copy_data_to_host(ib_resources_t* host_ib_resources, unsigned int wr_id, ibv_send_wr* rdma_write_wr_list); 116 | // inline int get_free_slots_num(); 117 | inline void notify_host(ib_resources_t* notify_ib_resources, ibv_send_wr* write_wr); 118 | inline bool pull_notification_from_host(ib_resources_t* notify_ib_resources, ibv_send_wr* read_wr, unsigned int *wr_id, unsigned int worker_id); 119 | inline void update_ci(ib_resources_t* notify_ib_resources, int requests_num); 120 | // inline void push_ci_to_host(); 121 | // inline void update_ci(int requests_num); 122 | 123 | void recv_loop(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* client_ib_resources, ibv_send_wr* rdma_write_wr_list); 124 | void recv_thread(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, client_md* client_md_rbuf); 125 | void send_loop(client_md* client_md_rbuf, ib_resources_t* host_ib_resources, ib_resources_t* notify_ib_resources, ib_resources_t* client_ib_resources, send_wr_t* send_wr); 126 | void send_thread(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, client_md* client_md_rbuf); 127 | 128 | inline void mark_owner_int(int* owner_int); 129 | inline bool can_push_to_host(client_md* client_md_rbuf, unsigned int rbuf_index); 130 | inline int recvfrom_client(client_md* client_md_rbuf, ib_resources_t* ib_resources, unsigned int rbuf_index); 131 | 132 | inline unsigned int get_worker_id_and_notify_host(ib_resources_t* notify_ib_resources, send_wr_t* send_wr, unsigned int rbuf_index); 133 | inline unsigned int get_free_worker(); 134 | inline void inc_worker_id(); 135 | 136 | public: 137 | 138 | BFContext(CONNECTION_TYPE connection_type, unsigned int host_port_num, unsigned int client_port_num, unsigned int workers_num = 1); 139 | ~BFContext(); 140 | void run_all(); 141 | // void set_nQP_ib_resources(ib_resources_t* ib_resources); 142 | // ib_resources_t* get_nQP_ib_resources(); 143 | // int get_host_sfd(); 144 | // int get_client_sfd(); 145 | 146 | // void recv_and_send_udp(); 147 | 148 | }; 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /bf/bf_server.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #include "bf_server.hpp" 30 | 31 | BFServer::BFServer(unsigned int base_connection_id, unsigned int connections_num, CONNECTION_TYPE connection_type) : _connections_num(connections_num) , _connection_type(connection_type){ 32 | /* _bf_ctxs = (BFContext**) malloc(_connections_num * sizeof(BFContext*)); 33 | int sfd = -1; 34 | for(int i = 0 ; i < _connections_num ; i++) { 35 | bool first_connection = i == 0; 36 | bool last_connection = i == (_connections_num - 1); 37 | _bf_ctxs[i] = new BFContext(first_connection, last_connection, base_connection_id, i, _connection_type, _connections_num, sfd); 38 | sfd = _bf_ctxs[i]->get_host_sfd(); 39 | if(i > 0) { 40 | _bf_ctxs[i]->set_nQP_ib_resources(_bf_ctxs[0]->get_nQP_ib_resources()); 41 | } 42 | }*/ 43 | } 44 | 45 | 46 | BFServer::~BFServer() { 47 | for(int i = 0 ; i < _connections_num ; i++) { 48 | delete(_bf_ctxs[i]); 49 | } 50 | free(_bf_ctxs); 51 | } 52 | 53 | void BFServer::doLoop() { 54 | for(int i = 0 ; i < _connections_num ; i++) { 55 | //_bf_ctxs[i]->doLoop(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /bf/bf_server.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #ifndef __BF_SERVER_H__ 30 | #define __BF_SERVER_H__ 31 | 32 | #include "bf_context.hpp" 33 | 34 | class BFServer { 35 | unsigned int _connections_num; 36 | CONNECTION_TYPE _connection_type; 37 | BFContext** _bf_ctxs; 38 | 39 | public: 40 | 41 | BFServer(unsigned int connections_base_id = 0, unsigned int connections_num = 1, CONNECTION_TYPE = IB_CONNECTION); 42 | ~BFServer(); 43 | void doLoop(); 44 | 45 | }; 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /bf/bf_server_exe.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #include "bf_server.hpp" 30 | 31 | CONNECTION_TYPE parse_connection_type(char* str) { 32 | if(!strcmp(str,"IB_CONNECTION")) { 33 | return IB_CONNECTION; 34 | } 35 | if(!strcmp(str,"UDP_CONNECTION")) { 36 | return UDP_CONNECTION; 37 | } 38 | if(!strcmp(str,"TCP_CONNECTION")) { 39 | return TCP_CONNECTION; 40 | } 41 | 42 | std::cerr << "unknown CONNECTION TYPE: " << str << std::endl; 43 | exit(1); 44 | } 45 | 46 | 47 | int main(int argc, char *argv[]) { 48 | unsigned int client_port = 5000; 49 | unsigned int host_port = 5000; 50 | CONNECTION_TYPE connection_type = IB_CONNECTION; 51 | unsigned int workers_num = 1; 52 | switch(argc) { 53 | case 1: 54 | std::cout << "No arguments were passed, use default values: " << std::endl; 55 | break; 56 | case 2: 57 | connection_type = parse_connection_type(argv[1]); 58 | break; 59 | case 3: 60 | connection_type = parse_connection_type(argv[1]); 61 | host_port = atoi(argv[2]); 62 | break; 63 | case 4: 64 | connection_type = parse_connection_type(argv[1]); 65 | host_port = atoi(argv[2]); 66 | client_port = atoi(argv[3]); 67 | break; 68 | case 5: 69 | connection_type = parse_connection_type(argv[1]); 70 | host_port = atoi(argv[2]); 71 | client_port = atoi(argv[3]); 72 | workers_num = atoi(argv[4]); 73 | break; 74 | default: 75 | std::cerr << "Too many arguments: (" << argc - 1 << ") while expecting (max 5)." << std::endl; 76 | exit(1); 77 | } 78 | 79 | std::cout << "connection type:" << connection_type << " host port: " << host_port << " client port: " << client_port << "workers number: " << workers_num << std::endl; 80 | 81 | BFContext bf_ctx(connection_type, host_port, client_port, workers_num); 82 | bf_ctx.run_all(); 83 | 84 | printf("Done\n"); 85 | 86 | return 0; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /bf/run_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LIBVMA=/root/libvma/src/vma/.libs/libvma.so 4 | 5 | if [[ $1 == "-h" ]]; then 6 | echo "Usage: " $0 connection_type connections_num host_port client_port total_workers_num 7 | echo " connection_type: UDP_CONNECTION [dfault = UDP_CONNECTION]" 8 | echo " connections_num: [defualt = 1]" 9 | echo " host port: [default = 5000]" 10 | echo " client port: [default = 5000]" 11 | echo " total workers number: [default = connections_num]" 12 | exit 1 13 | fi 14 | 15 | connection_type=UDP_CONNECTION 16 | connections_num=1 17 | host_port=5000 18 | client_port=5000 19 | total_workers_num=$connections_num 20 | 21 | if [ $# -gt 0 ] ; then 22 | connection_type=$1; shift 23 | fi 24 | if [ $# -gt 0 ] ; then 25 | connections_num=$1; shift 26 | fi 27 | if [ $# -gt 0 ] ; then 28 | host_port=$1; shift 29 | fi 30 | if [ $# -gt 0 ] ; then 31 | client_port=$1; shift 32 | fi 33 | if [ $# -gt 0 ] ; then 34 | total_workers_num=$1; shift 35 | if [[ $total_workers_num -lt $connections_num ]]; then 36 | echo "number of workers is less than number of connections.." 37 | exit 1 38 | fi 39 | fi 40 | 41 | 42 | delta_workers=$[$total_workers_num/$connections_num] 43 | 44 | env_vars=(\ 45 | LD_PRELOAD=$LIBVMA \ 46 | VMA_MTU=200 \ 47 | VMA_RX_BUFS=500000 \ 48 | VMA_RING_ALLOCATION_LOGIC_RX=20 \ 49 | VMA_RX_POLL=1000 \ 50 | VMA_RX_UDP_POLL_OS_RATIO=100 \ 51 | VMA_SELECT_POLL=1000 \ 52 | VMA_THREAD_MODE=1 \ 53 | VMA_RX_POLL_YIELD=1 \ 54 | VMA_RX_NO_CSUM=1\ 55 | VMA_NICA_ACCESS_MODE=0 \ 56 | ) 57 | 58 | #sudo taskset -c $i env "${env_vars[@]}" ./bf_server_exe $connection_type $host_port $client_port 59 | for i in `seq 0 $[$connections_num - 1]`; do 60 | workers_per_connection=$[$total_workers_num/$connections_num] 61 | temp=$[$total_workers_num % connections_num] 62 | if [[ $temp -gt $i ]]; then 63 | workers_per_connection=$[$workers_per_connection + 1] 64 | fi 65 | 66 | env "${env_vars[@]}" ./bf_server_exe $connection_type $host_port $client_port $workers_per_connection & 67 | #./bf_server_exe $connection_type $host_port $client_port $workers_per_connection & 68 | sleep 2s 69 | client_port=$[$client_port + 1] 70 | host_port=$[$host_port + 1] 71 | done 72 | -------------------------------------------------------------------------------- /bf_host/Makefile: -------------------------------------------------------------------------------- 1 | CUDA_PATH := /usr/local/cuda 2 | NVCC := $(CUDA_PATH)/bin/nvcc -Xptxas="-v" #-lineinfo 3 | CC := g++ 4 | 5 | CFLAGS=-Wall -g 6 | NVCCFLAGS := -arch=sm_35 --device-c -g 7 | 8 | NVCCLFLAGS := -arch=sm_35 9 | 10 | LDFLAGS := -I$(CUDA_PATH)/include -L$(CUDAPATH)/lib64 #-I~/gdrcopy 11 | 12 | LIBPATH := $(CUDA_PATH)/lib64 13 | SOLIBS := $(LIBPATH)/libcublas.so $(LIBPATH)/libcudart.so #~/gdrcopy/libgdrapi.so 14 | 15 | LIBS := -libverbs -lrt -lpthread -lboost_filesystem -lboost_system 16 | 17 | ######################################################################## 18 | 19 | all: echo matrix_mul hello_kernel lenet busy_wait 20 | 21 | busy_wait: setup.o bf_host.o busy_wait.o 22 | $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o $@ $^ $(SOLIBS) $(LIBS) 23 | 24 | echo: setup.o bf_host.o echo.o 25 | $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o $@ $^ $(SOLIBS) $(LIBS) 26 | 27 | matrix_mul: setup.o bf_host.o matrix_mul.o 28 | $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o $@ $^ $(SOLIBS) $(LIBS) 29 | 30 | hello_kernel: setup.o bf_host.o hello_kernel.o 31 | $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o $@ $^ $(SOLIBS) $(LIBS) 32 | 33 | lenet: setup.o bf_host.o lenet.o 34 | $(NVCC) $(NVCCLFLAGS) $(LDFLAGS) -o $@ $^ $(SOLIBS) $(LIBS) 35 | 36 | ######################################################################## 37 | 38 | # compile 39 | 40 | busy_wait.o: gpu_define.cu.h ../common/setup.hpp busy_wait.cu 41 | $(NVCC) $(NVCCFLAGS) -c busy_wait.cu 42 | 43 | echo.o: gpu_define.cu.h ../common/setup.hpp echo.cu 44 | $(NVCC) $(NVCCFLAGS) -c echo.cu 45 | 46 | matrix_mul.o: gpu_define.cu.h ../common/setup.hpp matrix_mul.cu 47 | $(NVCC) $(NVCCFLAGS) -c matrix_mul.cu 48 | 49 | hello_kernel.o: gpu_define.cu.h ../common/setup.hpp hello_kernel.cu 50 | $(NVCC) $(NVCCFLAGS) -c hello_kernel.cu 51 | 52 | bf_host.o: gpu_define.cu.h ../common/setup.hpp bf_host.cu.hpp bf_host.cu 53 | $(NVCC) $(NVCCFLAGS) -c bf_host.cu 54 | 55 | setup.o: ../common/setup.hpp ../common/setup.cpp 56 | $(CC) $(CFLAGS) -c ../common/setup.cpp 57 | 58 | lenet.o: gpu_define.cu.h ../common/setup.hpp lenet.cu 59 | $(NVCC) $(NVCCFLAGS) -c lenet.cu 60 | 61 | 62 | ######################################################################## 63 | clean: 64 | \rm -f *.o matrix_mul hello_kernel echo lenet 65 | -------------------------------------------------------------------------------- /bf_host/bf_host.cu: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "bf_host.cu.hpp" 29 | 30 | void hostContext::teardown_connection(ib_resources_t* ib_resources) { 31 | ibv_destroy_qp(ib_resources->qp); 32 | ibv_destroy_cq(ib_resources->recv_cq); 33 | ibv_destroy_cq(ib_resources->send_cq); 34 | ibv_dereg_mr(ib_resources->lmr_recv); 35 | ibv_dereg_mr(ib_resources->lmr_send); 36 | free(ib_resources->lrecv_buf); 37 | free(ib_resources->lsend_buf); 38 | ibv_dealloc_pd(ib_resources->pd); 39 | ibv_close_device(ib_resources->context); 40 | free(ib_resources); 41 | } 42 | 43 | 44 | ib_resources_t* hostContext::setup_notify_connection(const string& interface, int sfd) { 45 | ib_resources_t *ib_resources = (struct ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 46 | struct ibv_device **device_list = ibv_get_device_list(NULL); 47 | if (!device_list) { 48 | std::cerr << "ibv_get_device_list failed" << std::endl; 49 | exit(1); 50 | } 51 | 52 | string device_name = ib_device_from_netdev(interface.c_str()); 53 | struct ibv_context *context = ibv_open_device_by_name(device_name); 54 | 55 | struct ibv_pd *pd = ibv_alloc_pd(context); 56 | if (!pd) { 57 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 58 | exit(1); 59 | } 60 | 61 | struct ibv_mr *mr_recv; 62 | char* recv_buf; 63 | CUDA_CHECK(cudaMalloc(&recv_buf, _workers_num * sizeof(unsigned int))); 64 | unsigned int recv_arr_size = _workers_num; 65 | unsigned int recv_init[recv_arr_size]; 66 | for(int i = 0 ; i < recv_arr_size ; i++) { 67 | recv_init[i] = HOST_MAX_RECV_WQES - _workers_num + i; 68 | } 69 | CUDA_CHECK(cudaMemcpy(recv_buf, recv_init, _workers_num * sizeof(unsigned int), cudaMemcpyHostToDevice)); 70 | mr_recv = ibv_reg_mr(pd, recv_buf, _workers_num * sizeof(unsigned int), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 71 | if (!mr_recv) { 72 | std::cerr << "ibv_reg_mr() failed for data_for_host" << std::endl; 73 | exit(1); 74 | } 75 | 76 | struct ibv_mr *mr_send; 77 | char *send_buf; 78 | CUDA_CHECK(cudaMalloc(&send_buf,2 * _workers_num * sizeof(unsigned int))); 79 | unsigned int send_arr_size = 2 * _workers_num; 80 | unsigned int send_init[send_arr_size]; 81 | for(int i = 0 ; i < _workers_num ; i++) { 82 | send_init[i] = HOST_MAX_SEND_WQES - _workers_num + i; 83 | } 84 | for(int i = 0 ; i < _workers_num ; i++) { 85 | send_init[_workers_num + i] = HOST_MAX_SEND_WQES - 2 * _workers_num + i; //will be inc. when calling grecv 86 | } 87 | 88 | /* for(int i = 0 ; i < send_arr_size ; i++) { 89 | if( i < send_arr_size/2 ) { // PI part 90 | send_init[i] = HOST_MAX_SEND_WQES - 1;//0; 91 | } else { // CI part 92 | send_init[i] = HOST_MAX_SEND_WQES - 2; // will be inc. when calling grecv 93 | } 94 | }*/ 95 | CUDA_CHECK(cudaMemcpy(send_buf, send_init, 2 * _workers_num * sizeof(unsigned int), cudaMemcpyHostToDevice)); 96 | mr_send = ibv_reg_mr(pd, send_buf, 2 * _workers_num * sizeof(unsigned int), IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 97 | if (!mr_send) { 98 | std::cerr << "ibv_reg_mr() failed for data_from_host" << std::endl; 99 | exit(1); 100 | } 101 | 102 | struct ibv_cq *recv_cq = ibv_create_cq(context, HOST_RECV_CQ_SIZE, NULL, NULL, 0); 103 | if (!recv_cq) { 104 | std::cerr << "ibv_create_cq() failed" << std::endl; 105 | exit(1); 106 | } 107 | 108 | struct ibv_cq *send_cq = ibv_create_cq(context, HOST_SEND_CQ_SIZE, NULL, NULL, 0); 109 | if (!send_cq) { 110 | std::cerr << "ibv_create_cq() failed" << std::endl; 111 | exit(1); 112 | } 113 | 114 | struct ibv_qp_init_attr qp_init_attr; 115 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 116 | qp_init_attr.send_cq = send_cq; 117 | qp_init_attr.recv_cq = recv_cq; 118 | qp_init_attr.qp_type = IBV_QPT_RC; 119 | qp_init_attr.cap.max_send_wr = 0; 120 | qp_init_attr.cap.max_recv_wr = HOST_MAX_RECV_WQES; 121 | qp_init_attr.cap.max_send_sge = 0; 122 | qp_init_attr.cap.max_recv_sge = 1; 123 | // qp_init_attr.cap.max_inline_data = 512; 124 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 125 | if (!qp) { 126 | std::cerr << "ibv_create_qp() failed errno= " << errno << std::endl; 127 | exit(1); 128 | } 129 | 130 | int ret; 131 | struct ibv_port_attr port_attr; 132 | ret = ibv_query_port(context, PORT_NUM, &port_attr); 133 | if (ret) { 134 | std::cerr << "ibv_query_port() failed ret= " << ret << std::endl; 135 | exit(1); 136 | } 137 | 138 | struct ib_info_t my_info; 139 | my_info.lid = port_attr.lid; 140 | my_info.qpn = qp->qp_num; 141 | my_info.mkey_data_buffer = mr_recv->rkey; 142 | my_info.addr_data_buffer = (uintptr_t)mr_recv->addr; 143 | my_info.mkey_response_buffer = mr_send->rkey; 144 | my_info.addr_response_buffer = (uintptr_t)mr_send->addr; 145 | int gid_index = get_gid_index(context); 146 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 147 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 148 | exit(1); 149 | } 150 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 151 | if (ret < 0) { 152 | std::cerr << "send" << std::endl; 153 | exit(1); 154 | } 155 | 156 | struct ib_info_t client_info; 157 | recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 158 | if (ret < 0) { 159 | std::cerr << "recv" << std::endl; 160 | exit(1); 161 | } 162 | 163 | ib_resources->rmr_recv_key = client_info.mkey_data_buffer; 164 | ib_resources->rmr_recv_addr = client_info.addr_data_buffer; 165 | ib_resources->rmr_send_key = client_info.mkey_response_buffer; 166 | ib_resources->rmr_send_addr = client_info.addr_response_buffer; 167 | 168 | struct ibv_qp_attr qp_attr; 169 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 170 | qp_attr.qp_state = IBV_QPS_INIT; 171 | qp_attr.pkey_index = 0; 172 | qp_attr.port_num = PORT_NUM; 173 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; 174 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 175 | if (ret) { 176 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 177 | exit(1); 178 | } 179 | 180 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 181 | qp_attr.qp_state = IBV_QPS_RTR; 182 | qp_attr.path_mtu = IBV_MTU_4096; 183 | qp_attr.dest_qp_num = client_info.qpn; 184 | qp_attr.rq_psn = 0 ; 185 | qp_attr.max_dest_rd_atomic = 1; 186 | qp_attr.min_rnr_timer = 12; 187 | qp_attr.ah_attr.is_global = 1; 188 | qp_attr.ah_attr.grh.dgid = client_info.gid; 189 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 190 | qp_attr.ah_attr.grh.flow_label = 0; 191 | qp_attr.ah_attr.grh.hop_limit = 1; 192 | qp_attr.ah_attr.grh.traffic_class = 0; 193 | qp_attr.ah_attr.dlid = client_info.lid; 194 | qp_attr.ah_attr.sl = 0; 195 | qp_attr.ah_attr.src_path_bits = 0; 196 | qp_attr.ah_attr.port_num = PORT_NUM; 197 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 198 | if (ret) { 199 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret<< std::endl; 200 | exit(1); 201 | } 202 | 203 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 204 | qp_attr.qp_state = IBV_QPS_RTS; 205 | qp_attr.sq_psn = 0; 206 | qp_attr.timeout = 14; 207 | qp_attr.retry_cnt = 7; 208 | qp_attr.rnr_retry = 7; 209 | qp_attr.max_rd_atomic = 1; 210 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 211 | if (ret) { 212 | std::cerr << "ibv_modify_qp() to RTS failed" << std::endl; 213 | exit(1); 214 | } 215 | 216 | ib_resources->context = context; 217 | ib_resources->pd = pd; 218 | ib_resources->qp = qp; 219 | ib_resources->recv_cq = recv_cq; 220 | ib_resources->send_cq = send_cq; 221 | ib_resources->lrecv_buf = recv_buf; 222 | ib_resources->lmr_recv = mr_recv; 223 | ib_resources->lsend_buf = send_buf; 224 | ib_resources->lmr_send = mr_send; 225 | 226 | return ib_resources; 227 | } 228 | 229 | 230 | ib_resources_t* hostContext::setup_recv_data_connection(const string& interface, int sfd) { 231 | ib_resources_t *ib_resources = (ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 232 | 233 | ibv_device **device_list = ibv_get_device_list(NULL); 234 | if (!device_list) { 235 | std::cerr << "ERROR: ibv_get_device_list failed" << std::endl; 236 | exit(1); 237 | } 238 | 239 | string device_name = ib_device_from_netdev(interface.c_str()); 240 | struct ibv_context *context = ibv_open_device_by_name(device_name); 241 | struct ibv_pd *pd = ibv_alloc_pd(context); 242 | if (!pd) { 243 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 244 | exit(1); 245 | } 246 | 247 | struct ibv_mr *mr_recv; 248 | char *recv_buf; 249 | CUDA_CHECK(cudaMalloc(&recv_buf,HOST_TOTAL_DATA_FROM_CLIENT_SIZE)); 250 | CUDA_CHECK(cudaMemset(recv_buf, 0, HOST_TOTAL_DATA_FROM_CLIENT_SIZE)); 251 | // printf("ib_resources Data: recv_buf=%p size=%d\n",recv_buf,HOST_TOTAL_DATA_FROM_CLIENT_SIZE); 252 | mr_recv = ibv_reg_mr(pd, recv_buf, HOST_TOTAL_DATA_FROM_CLIENT_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE); 253 | if (!mr_recv) { 254 | std::cerr << "ibv_reg_mr() failed for data_for_host" << std::endl; 255 | exit(1); 256 | } 257 | 258 | struct ibv_cq *recv_cq = ibv_create_cq(context, HOST_RECV_CQ_SIZE, NULL, NULL, 0); 259 | if (!recv_cq) { 260 | printf("ERROR: ibv_create_cq() failed\n"); 261 | exit(1); 262 | } 263 | 264 | struct ibv_qp_init_attr qp_init_attr; 265 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 266 | qp_init_attr.send_cq = recv_cq; 267 | qp_init_attr.recv_cq = recv_cq; 268 | qp_init_attr.qp_type = IBV_QPT_RC; 269 | qp_init_attr.cap.max_send_wr = 0; 270 | qp_init_attr.cap.max_recv_wr = HOST_MAX_RECV_WQES; 271 | qp_init_attr.cap.max_send_sge = 0; 272 | qp_init_attr.cap.max_recv_sge = 1; 273 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 274 | if (!qp) { 275 | std::cerr << "ibv_create_qp() failed errno= " << errno << std::endl; 276 | exit(1); 277 | } 278 | 279 | struct ibv_port_attr port_attr; 280 | int ret = ibv_query_port(context, PORT_NUM, &port_attr); 281 | if (ret) { 282 | std::cerr << "ibv_query_port() failed ret=" << ret << std::endl; 283 | exit(1); 284 | } 285 | 286 | struct ib_info_t my_info; 287 | my_info.lid = port_attr.lid; 288 | my_info.qpn = qp->qp_num; 289 | my_info.mkey_data_buffer = mr_recv->rkey; 290 | my_info.addr_data_buffer = (uintptr_t)mr_recv->addr; 291 | int gid_index = get_gid_index(context); 292 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 293 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 294 | exit(1); 295 | } 296 | 297 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 298 | if (ret < 0) { 299 | std::cerr << "send" << std::endl; 300 | exit(1); 301 | } 302 | 303 | struct ib_info_t client_info; 304 | recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 305 | if (ret < 0) { 306 | std::cerr << "recv" << std::endl; 307 | exit(1); 308 | } 309 | 310 | struct ibv_qp_attr qp_attr; 311 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 312 | qp_attr.qp_state = IBV_QPS_INIT; 313 | qp_attr.pkey_index = 0; 314 | qp_attr.port_num = PORT_NUM; 315 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; 316 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 317 | if (ret) { 318 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 319 | exit(1); 320 | } 321 | 322 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 323 | qp_attr.qp_state = IBV_QPS_RTR; 324 | qp_attr.path_mtu = IBV_MTU_4096; 325 | qp_attr.dest_qp_num = client_info.qpn; 326 | qp_attr.rq_psn = 0 ; 327 | qp_attr.max_dest_rd_atomic = 1; 328 | qp_attr.min_rnr_timer = 12; 329 | qp_attr.ah_attr.is_global = 1; 330 | qp_attr.ah_attr.grh.dgid = client_info.gid; 331 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 332 | qp_attr.ah_attr.grh.flow_label = 0; 333 | qp_attr.ah_attr.grh.hop_limit = 1; 334 | qp_attr.ah_attr.grh.traffic_class = 0; 335 | qp_attr.ah_attr.dlid = client_info.lid; 336 | qp_attr.ah_attr.sl = 0; 337 | qp_attr.ah_attr.src_path_bits = 0; 338 | qp_attr.ah_attr.port_num = PORT_NUM; 339 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 340 | if (ret) { 341 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret << std::endl; 342 | exit(1); 343 | } 344 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 345 | qp_attr.qp_state = IBV_QPS_RTS; 346 | qp_attr.sq_psn = 0; 347 | qp_attr.timeout = 14; 348 | qp_attr.retry_cnt = 7; 349 | qp_attr.rnr_retry = 7; 350 | qp_attr.max_rd_atomic = 1; 351 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 352 | if (ret) { 353 | std::cerr << "ibv_modify_qp() to RTS failed" << std::endl; 354 | exit(1); 355 | } 356 | 357 | ib_resources->context = context; 358 | ib_resources->pd = pd; 359 | ib_resources->qp = qp; 360 | ib_resources->recv_cq = recv_cq; 361 | ib_resources->lrecv_buf = recv_buf; 362 | ib_resources->lmr_recv = mr_recv; 363 | 364 | return ib_resources; 365 | } 366 | 367 | 368 | 369 | ib_resources_t* hostContext::setup_send_data_connection(const string& interface, int sfd) { 370 | ib_resources_t *ib_resources = (ib_resources_t *)malloc(sizeof(struct ib_resources_t)); 371 | 372 | ibv_device **device_list = ibv_get_device_list(NULL); 373 | if (!device_list) { 374 | std::cerr << "ERROR: ibv_get_device_list failed" << std::endl; 375 | exit(1); 376 | } 377 | 378 | string device_name = ib_device_from_netdev(interface.c_str()); 379 | struct ibv_context *context = ibv_open_device_by_name(device_name); 380 | struct ibv_pd *pd = ibv_alloc_pd(context); 381 | if (!pd) { 382 | std::cerr << "ibv_alloc_pd() failed" << std::endl; 383 | exit(1); 384 | } 385 | 386 | struct ibv_mr *mr_send; 387 | char *send_buf; 388 | CUDA_CHECK(cudaMalloc(&send_buf, HOST_TOTAL_DATA_TO_CLIENT_SIZE)); 389 | mr_send = ibv_reg_mr(pd, send_buf, HOST_TOTAL_DATA_TO_CLIENT_SIZE, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ); 390 | if (!mr_send) { 391 | std::cerr << "ibv_reg_mr() failed for data_from_host" << std::endl; 392 | exit(1); 393 | } 394 | 395 | struct ibv_cq *recv_cq = ibv_create_cq(context, HOST_RECV_CQ_SIZE, NULL, NULL, 0); 396 | if (!recv_cq) { 397 | printf("ERROR: ibv_create_cq() failed\n"); 398 | exit(1); 399 | } 400 | 401 | struct ibv_qp_init_attr qp_init_attr; 402 | memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 403 | qp_init_attr.send_cq = recv_cq; 404 | qp_init_attr.recv_cq = recv_cq; 405 | qp_init_attr.qp_type = IBV_QPT_RC; 406 | qp_init_attr.cap.max_send_wr = 0; 407 | qp_init_attr.cap.max_recv_wr = HOST_MAX_RECV_WQES; 408 | qp_init_attr.cap.max_send_sge = 0; 409 | qp_init_attr.cap.max_recv_sge = 1; 410 | // qp_init_attr.cap.max_inline_data = 0; 411 | struct ibv_qp *qp = ibv_create_qp(pd, &qp_init_attr); 412 | if (!qp) { 413 | std::cerr << "ibv_create_qp() failed errno= " << errno << std::endl; 414 | exit(1); 415 | } 416 | 417 | int ret; 418 | struct ibv_port_attr port_attr; 419 | ret = ibv_query_port(context, PORT_NUM, &port_attr); 420 | if (ret) { 421 | std::cerr << "ibv_query_port() failed ret=" << ret << std::endl; 422 | exit(1); 423 | } 424 | 425 | struct ib_info_t my_info; 426 | my_info.lid = port_attr.lid; 427 | my_info.qpn = qp->qp_num; 428 | my_info.mkey_response_buffer = mr_send->rkey; 429 | my_info.addr_response_buffer = (uintptr_t)mr_send->addr; 430 | int gid_index = get_gid_index(context); 431 | if (ibv_query_gid(context, 1, gid_index, &(my_info.gid) )) { 432 | std::cerr << "ibv_query_gid failed for gid " << gid_index << std::endl; 433 | exit(1); 434 | } 435 | 436 | ret = send(sfd, &my_info, sizeof(struct ib_info_t), 0); 437 | if (ret < 0) { 438 | std::cerr << "send" << std::endl; 439 | exit(1); 440 | } 441 | 442 | struct ib_info_t client_info; 443 | recv(sfd, &client_info, sizeof(struct ib_info_t), 0); 444 | if (ret < 0) { 445 | std::cerr << "recv" << std::endl; 446 | exit(1); 447 | } 448 | 449 | struct ibv_qp_attr qp_attr; 450 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 451 | qp_attr.qp_state = IBV_QPS_INIT; 452 | qp_attr.pkey_index = 0; 453 | qp_attr.port_num = PORT_NUM; 454 | qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; 455 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS); 456 | if (ret) { 457 | std::cerr << "ibv_modify_qp() to INIT failed" << std::endl; 458 | exit(1); 459 | } 460 | 461 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 462 | qp_attr.qp_state = IBV_QPS_RTR; 463 | qp_attr.path_mtu = IBV_MTU_4096; 464 | qp_attr.dest_qp_num = client_info.qpn; 465 | qp_attr.rq_psn = 0 ; 466 | qp_attr.max_dest_rd_atomic = 1; 467 | qp_attr.min_rnr_timer = 12; 468 | qp_attr.ah_attr.is_global = 1; 469 | qp_attr.ah_attr.grh.dgid = client_info.gid; 470 | qp_attr.ah_attr.grh.sgid_index = get_gid_index(context); 471 | qp_attr.ah_attr.grh.flow_label = 0; 472 | qp_attr.ah_attr.grh.hop_limit = 1; 473 | qp_attr.ah_attr.grh.traffic_class = 0; 474 | qp_attr.ah_attr.dlid = client_info.lid; 475 | qp_attr.ah_attr.sl = 0; 476 | qp_attr.ah_attr.src_path_bits = 0; 477 | qp_attr.ah_attr.port_num = PORT_NUM; 478 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); 479 | if (ret) { 480 | std::cerr << "ibv_modify_qp() to RTR failed ret= " << ret << std::endl; 481 | exit(1); 482 | } 483 | memset(&qp_attr, 0, sizeof(struct ibv_qp_attr)); 484 | qp_attr.qp_state = IBV_QPS_RTS; 485 | qp_attr.sq_psn = 0; 486 | qp_attr.timeout = 14; 487 | qp_attr.retry_cnt = 7; 488 | qp_attr.rnr_retry = 7; 489 | qp_attr.max_rd_atomic = 1; 490 | ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); 491 | if (ret) { 492 | std::cerr << "ibv_modify_qp() to RTS failed" << std::endl; 493 | exit(1); 494 | } 495 | 496 | ib_resources->context = context; 497 | ib_resources->pd = pd; 498 | ib_resources->qp = qp; 499 | ib_resources->recv_cq = recv_cq; 500 | ib_resources->send_cq = recv_cq; 501 | ib_resources->lsend_buf = send_buf; 502 | ib_resources->lmr_send = mr_send; 503 | 504 | return ib_resources; 505 | } 506 | 507 | 508 | hostContext::hostContext(const string& interface, unsigned int workers_num, unsigned int tcp_port) : _workers_num(workers_num) { 509 | 510 | int lfd, sfd; 511 | int server_tcp_port = tcp_port; 512 | lfd = socket(AF_INET, SOCK_STREAM, 0); 513 | if (lfd < 0) { 514 | std::cerr << "socket" << std::endl; 515 | exit(1); 516 | } 517 | struct sockaddr_in server_addr; 518 | memset(&server_addr, 0, sizeof(struct sockaddr_in)); 519 | server_addr.sin_family = AF_INET; 520 | server_addr.sin_addr.s_addr = INADDR_ANY; 521 | server_addr.sin_port = htons(server_tcp_port); 522 | 523 | if (bind(lfd, (struct sockaddr *)&server_addr, sizeof(struct sockaddr_in)) < 0) { 524 | std::cerr << "bind lfd" << std::endl; 525 | exit(1); 526 | } 527 | listen(lfd, 1); 528 | 529 | std::cout << "Host is waiting on port " << server_tcp_port << " to establish RX Queue. BlueField can connect." << std::endl; 530 | sfd = accept(lfd, NULL, NULL); 531 | if (sfd < 0) { 532 | std::cerr << "accept sfd1" << std::endl; 533 | exit(1); 534 | } 535 | std::cout << "BlueField is connected" << std::endl; 536 | std::cout << "create RX Queue " << std::endl; 537 | recv_data_ib_resources = setup_recv_data_connection(interface,sfd); 538 | close(sfd); 539 | 540 | std::cout << "Host is waiting on port " << server_tcp_port << " to establish TX Queue. BlueField can connect." << std::endl; 541 | sfd = accept(lfd, NULL, NULL); 542 | if(sfd < 0) { 543 | std::cerr << "accept sfd" << std::endl; 544 | exit(1); 545 | } 546 | std::cout << "create TX Queue " << std::endl; 547 | send_data_ib_resources = setup_send_data_connection(interface,sfd); 548 | 549 | std::cout << "create Side Channel Notification " << std::endl; 550 | notify_ib_resources = setup_notify_connection(interface,sfd); 551 | close(sfd); 552 | 553 | close(lfd); 554 | 555 | _d_req_base_addresses = NULL; 556 | _d_resp_base_addresses = NULL; 557 | 558 | } 559 | 560 | 561 | hostContext::~hostContext() { 562 | std::cout << "kill hostcontext" << std::endl; 563 | teardown_connection(notify_ib_resources); 564 | teardown_connection(recv_data_ib_resources); 565 | teardown_connection(send_data_ib_resources); 566 | free(recv_data_ib_resources); 567 | free(send_data_ib_resources); 568 | if(_d_req_base_addresses != NULL){ 569 | CUDA_CHECK(cudaFree(_d_req_base_addresses)); 570 | } 571 | if(_d_resp_base_addresses != NULL){ 572 | CUDA_CHECK(cudaFree(_d_resp_base_addresses)); 573 | } 574 | } 575 | 576 | 577 | void* hostContext::getRequestBaseAddress() { 578 | return recv_data_ib_resources->lrecv_buf; 579 | } 580 | 581 | void* hostContext::getResponseBaseAddress() { 582 | return send_data_ib_resources->lsend_buf; 583 | } 584 | 585 | 586 | unsigned int* hostContext::getRequestCIBaseAddress() { 587 | return (unsigned int*) (notify_ib_resources->lsend_buf) + _workers_num; 588 | } 589 | 590 | 591 | unsigned int* hostContext::getResponsePIBaseAddress() { 592 | return (unsigned int*) notify_ib_resources->lsend_buf; 593 | } 594 | 595 | 596 | unsigned int* hostContext::getResponseCIBaseAddress() { 597 | return (unsigned int*) (notify_ib_resources->lrecv_buf); 598 | } 599 | 600 | 601 | void** hostContext::getDeviceReqBaseAddresses() { 602 | void *req_base_addresses = getRequestBaseAddress(); 603 | CUDA_CHECK(cudaMalloc(&_d_req_base_addresses, sizeof(void*))); 604 | CUDA_CHECK(cudaMemcpy(_d_req_base_addresses,&req_base_addresses, sizeof(void*), cudaMemcpyHostToDevice)); 605 | return _d_req_base_addresses; 606 | } 607 | 608 | 609 | void** hostContext::getDeviceRespBaseAddresses() { 610 | void *resp_base_addresses = getResponseBaseAddress(); 611 | CUDA_CHECK(cudaMalloc(&_d_resp_base_addresses, sizeof(void*))); 612 | CUDA_CHECK(cudaMemcpy(_d_resp_base_addresses, &resp_base_addresses, sizeof(void*), cudaMemcpyHostToDevice)); 613 | return _d_resp_base_addresses; 614 | } 615 | 616 | void hostContext::waitDevice(){ 617 | CUDA_CHECK(cudaDeviceSynchronize()); 618 | } 619 | -------------------------------------------------------------------------------- /bf_host/bf_host.cu.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | 29 | #ifndef __BF_HOST_H__ 30 | #define __BF_HOST_H__ 31 | 32 | #include "../common/setup.hpp" 33 | #include "gpu_define.cu.h" 34 | 35 | class hostContext { 36 | unsigned int _workers_num; 37 | void** _d_req_base_addresses; 38 | void** _d_resp_base_addresses; 39 | 40 | ib_resources_t* recv_data_ib_resources; 41 | ib_resources_t* send_data_ib_resources; 42 | ib_resources_t* notify_ib_resources; 43 | 44 | void teardown_connection(ib_resources_t* ib_resources); 45 | ib_resources_t* setup_recv_data_connection(const string& interface, int sfd); 46 | ib_resources_t* setup_send_data_connection(const string& interface, int sfd); 47 | ib_resources_t* setup_notify_connection(const string& interface, int sfd); 48 | 49 | public: 50 | hostContext(const string& interface, unsigned int workers_num = 1, unsigned int tcp_port = TCP_PORT_NUM); 51 | ~hostContext(); 52 | 53 | void* getRequestBaseAddress(); 54 | void* getResponseBaseAddress(); 55 | unsigned int* getRequestCIBaseAddress(); 56 | unsigned int* getResponsePIBaseAddress(); 57 | unsigned int* getResponseCIBaseAddress(); 58 | void** getDeviceReqBaseAddresses(); 59 | void** getDeviceRespBaseAddresses(); 60 | void waitDevice(); 61 | }; 62 | 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /bf_host/gpu_define.cu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifndef __GPU_DEFINE_H__ 29 | #define __GPU_DEFINE_H__ 30 | 31 | #include "../common/setup.hpp" 32 | 33 | 34 | //#define DEBUG_PRINTF( ... ) printf( __VA_ARGS__ ) 35 | #define DEBUG_PRINTF( ... ) 36 | 37 | 38 | #define FIRST_THREAD_IN_BLOCK() ((threadIdx.x + threadIdx.y + threadIdx.z) == 0) 39 | #define BEGIN_SINGLE_THREAD __syncthreads(); if(FIRST_THREAD_IN_BLOCK()) { do { 40 | #define END_SINGLE_THREAD } while(0); } __syncthreads(); 41 | #define getGlobalID() blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z 42 | 43 | //assumption 1D threadBlocks 44 | #define worker_init_send_recv(qp_recv_addr, qp_send_addr, recv_ci_addr, send_pi_addr, send_ci_addr) \ 45 | unsigned int worker_gid = getGlobalID();\ 46 | int stride = gridDim.x;\ 47 | volatile unsigned int *recv_ci_ptr = recv_ci_addr + worker_gid;\ 48 | volatile unsigned int *send_pi_ptr = send_pi_addr + worker_gid;\ 49 | volatile unsigned int *send_ci_ptr = send_ci_addr + worker_gid;\ 50 | volatile void* qp_recv_base_addr = qp_recv_addr;\ 51 | volatile void* qp_send_base_addr = qp_send_addr;\ 52 | if(threadIdx.x * blockDim.x + threadIdx.y == 0) send_buf = send_buf + worker_gid * HOST_SEND_MSG_SIZE;\ 53 | 54 | 55 | #define grecv() \ 56 | {\ 57 | BEGIN_SINGLE_THREAD\ 58 | DEBUG_PRINTF("----- GRECV -----\n");\ 59 | volatile unsigned int ci_val = MOD( *(recv_ci_ptr) + stride,HOST_MAX_RECV_WQES);\ 60 | *(recv_ci_ptr) = ci_val;\ 61 | *(int*)((char*)qp_recv_base_addr + (ci_val * (HOST_RECV_MSG_SIZE + OWNER_INT_SIZE)) + HOST_RECV_MSG_SIZE) = 0;\ 62 | recv_buf = (char*)qp_recv_base_addr + (MOD(ci_val+stride,HOST_MAX_RECV_WQES) * (HOST_RECV_MSG_SIZE + OWNER_INT_SIZE));\ 63 | volatile unsigned int* ptr = (unsigned int*) (recv_buf + HOST_RECV_MSG_SIZE);\ 64 | while(*ptr == 0) {}\ 65 | DEBUG_PRINTF("gid=%d after loop: ci_val=%d stride=%d\n",worker_gid, ci_val,stride);\ 66 | END_SINGLE_THREAD\ 67 | } 68 | 69 | 70 | 71 | //do we need threadfence? before updating send_buf? 72 | #define gsend() \ 73 | {\ 74 | BEGIN_SINGLE_THREAD\ 75 | unsigned int ci_val = *send_ci_ptr;\ 76 | unsigned int pi_val = *send_pi_ptr;\ 77 | DEBUG_PRINTF("----- GSEND -----\n");\ 78 | DEBUG_PRINTF("before loop: pi_val=%d ci_val=%d\n",pi_val, ci_val);\ 79 | while(!CAN_PUSH(pi_val,ci_val,HOST_MAX_SEND_WQES)) {\ 80 | ci_val = *send_ci_ptr;\ 81 | }\ 82 | pi_val = MOD(pi_val + stride,HOST_MAX_SEND_WQES);\ 83 | *(send_pi_ptr) = pi_val;\ 84 | DEBUG_PRINTF("after loop: new_pi_val=%d ci_val=%d\n",*(send_pi_ptr), ci_val);\ 85 | send_buf = (char*)qp_send_base_addr + (MOD(pi_val + stride, HOST_MAX_SEND_WQES) * HOST_SEND_MSG_SIZE);\ 86 | END_SINGLE_THREAD\ 87 | } 88 | 89 | 90 | #ifdef SOCKPERF 91 | #define copy_sockperf_header()\ 92 | {\ 93 | for(int i = threadIdx.x ; i < SOCKPERF_HEADER/4 ; i += blockDim.x) {\ 94 | *(int*)((int*)send_buf + i) = *(int*)((int*)recv_buf + i);\ 95 | if(i == 2) *(int*)((int*)send_buf + i) = *(int*)((int*)send_buf + i) & 0xFFFFFFFF00000000;\ 96 | }\ 97 | } 98 | #else 99 | #define copy_sockperf_header() 100 | #endif 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /common/setup.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #include "setup.hpp" 29 | 30 | double static inline get_time_msec(void) { 31 | struct timeval t; 32 | gettimeofday(&t, NULL); 33 | return t.tv_sec * 1e+3 + t.tv_usec * 1e-3; 34 | } 35 | 36 | 37 | struct ibv_context *ibv_open_device_by_name(const std::string& device_name) 38 | { 39 | int num_devices = 0; 40 | struct ibv_device **devices_list = ibv_get_device_list(&num_devices); 41 | if(!devices_list){ 42 | printf("ERROR: ibv_get_device_list() failed\n"); 43 | exit(1); 44 | } 45 | std::cout << "ibv_open_device_by_name: " << num_devices << " devices were found." << std::endl; 46 | std::cout << "device_name is " << device_name << std::endl; 47 | for (int i = 0; i < num_devices; ++i) { 48 | string cur_name = ibv_get_device_name(devices_list[i]); 49 | std::cout << "device[" << i << "] name: " << cur_name << std::endl; 50 | if (device_name == cur_name){ 51 | std::cout << device_name << " found." << std::endl; 52 | return ibv_open_device(devices_list[i]); 53 | } 54 | } 55 | 56 | printf("ERROR: device named '%s' not found\n", device_name.c_str()); 57 | return NULL; 58 | } 59 | 60 | string ib_device_from_netdev(const string& netdev) 61 | { 62 | fs::path dir = "/sys/class/net/" + netdev + "/device/infiniband"; 63 | fs::directory_iterator end; 64 | for (fs::directory_iterator dir_itr(dir); dir_itr != end; ++dir_itr) { 65 | return dir_itr->path().filename().string(); 66 | } 67 | 68 | printf("Could not find IB device of netdev '%s'\n", netdev.c_str()); 69 | abort(); 70 | } 71 | 72 | int get_gid_index(ibv_context* dev) 73 | { 74 | for (int i = 0; i < 0xffff; ++i) { 75 | ibv_gid gid; 76 | 77 | if (ibv_query_gid(dev, 1, i, &gid)) { 78 | printf("ibv_query_gid failed for gid %d", i); 79 | exit(1); 80 | } 81 | 82 | /* Check for IPv4 */ 83 | if (gid.global.subnet_prefix != 0ull || 84 | (gid.global.interface_id & 0xffffffff) != 0xffff0000ull) 85 | continue; 86 | 87 | char gid_type_str[7]; 88 | int len = ibv_read_sysfs_file("/sys/class/infiniband/mlx5_0/ports/1/gid_attrs/types", 89 | boost::lexical_cast(i).c_str(), gid_type_str, sizeof(gid_type_str)); 90 | if (len < 0) { 91 | printf("cannot read gid type for gid %d", i); 92 | return -1; 93 | } 94 | 95 | if (strncasecmp(gid_type_str, "RoCE v2", len) != 0) 96 | continue; 97 | 98 | /* TODO check also the netdev matches */ 99 | return i; 100 | } 101 | return -1; 102 | } 103 | 104 | -------------------------------------------------------------------------------- /common/setup.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Maroun Tork, Lina Maudlej and Mark Silberstein 3 | * All rights reserved. 4 | * If used, please cite: PAPER NAME, AUTHORS, CONFERENCE WHERE PUBLISHED 5 | * 6 | * Redistribution and use in source and binary forms, with or without modification, 7 | * are permitted provided that the following conditions are met: 8 | * 9 | * Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation and/or 14 | * other materials provided with the distribution. 15 | * 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 20 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifndef __SETUP_H__ 29 | #define __SETUP_H__ 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | extern "C" { 51 | #include 52 | } 53 | 54 | #define N 10 55 | 56 | #define PORT_NUM 1 57 | #define DATA_QP_SIZE (32*1024) 58 | #define CONTROL_QP_SIZE 4 59 | 60 | #define TCP_PORT_NUM 5000 61 | #define UDP_PORT_NUM 5000 62 | 63 | 64 | #define SOCKPERF 65 | 66 | #ifdef SOCKPERF 67 | #define SOCKPERF_HEADER 16 68 | #else 69 | #define SOCKPERF_HEADER 0 70 | #endif 71 | 72 | 73 | using boost::asio::local::stream_protocol; 74 | using std::string; 75 | namespace fs = boost::filesystem; 76 | 77 | #define ACTUAL_PAYLOAD (28*28) 78 | 79 | #define RECV_WQES_NUM (8*1024) 80 | #define NOTIFY_WQES_NUM (32) 81 | #define OWNER_INT_SIZE (sizeof(unsigned int)) 82 | 83 | #define CLIENT_MAX_RECV_WQES RECV_WQES_NUM 84 | #define CLIENT_MAX_SEND_WQES CLIENT_MAX_RECV_WQES 85 | #define CLIENT_RECV_CQ_SIZE CLIENT_MAX_RECV_WQES 86 | #define CLIENT_SEND_CQ_SIZE CLIENT_MAX_SEND_WQES 87 | #define CLIENT_SEND_MSG_SIZE (ACTUAL_PAYLOAD + SOCKPERF_HEADER) 88 | #define CLIENT_RECV_MSG_SIZE (ACTUAL_PAYLOAD + SOCKPERF_HEADER) 89 | #define CLIENT_TOTAL_DATA_TO_SERVER_SIZE (CLIENT_MAX_SEND_WQES * CLIENT_SEND_MSG_SIZE) 90 | #define CLIENT_TOTAL_DATA_FROM_SERVER_SIZE (CLIENT_MAX_RECV_WQES * CLIENT_RECV_MSG_SIZE) 91 | #define CLIENT_NUM_OF_WQE_LISTS (2) 92 | 93 | #define HOST_MAX_RECV_WQES RECV_WQES_NUM 94 | #define HOST_MAX_SEND_WQES HOST_MAX_RECV_WQES 95 | #define HOST_RECV_CQ_SIZE HOST_MAX_RECV_WQES 96 | #define HOST_SEND_CQ_SIZE HOST_MAX_SEND_WQES 97 | #define HOST_SEND_MSG_SIZE CLIENT_RECV_MSG_SIZE 98 | #define HOST_RECV_MSG_SIZE CLIENT_SEND_MSG_SIZE 99 | #define HOST_TOTAL_DATA_TO_CLIENT_SIZE (HOST_MAX_SEND_WQES * HOST_SEND_MSG_SIZE) 100 | #define HOST_TOTAL_DATA_FROM_CLIENT_SIZE (HOST_MAX_RECV_WQES * (HOST_RECV_MSG_SIZE + OWNER_INT_SIZE)) 101 | 102 | //H2C - Host to Client 103 | #define BF_MAX_RECV_WQES RECV_WQES_NUM 104 | #define BF_MAX_SEND_WQES HOST_MAX_RECV_WQES 105 | #define BF_RECV_CQ_SIZE BF_MAX_RECV_WQES 106 | #define BF_SEND_CQ_SIZE BF_MAX_SEND_WQES 107 | #define BF_C2H_MSG_SIZE (HOST_RECV_MSG_SIZE + OWNER_INT_SIZE) // owner int - it should be owner bit (alignment in GPU issues) 108 | #define BF_H2C_MSG_SIZE HOST_SEND_MSG_SIZE 109 | #define BF_TOTAL_DATA_TO_HOST_SIZE (BF_MAX_SEND_WQES * BF_C2H_MSG_SIZE) 110 | #define BF_TOTAL_DATA_FROM_HOST_SIZE (BF_MAX_RECV_WQES * BF_H2C_MSG_SIZE) 111 | #define BF_NUM_OF_WQE_LISTS (2) 112 | #define BF_MAX_POLL_CQES (1) 113 | 114 | #define CUDA_CHECK(f) do { \ 115 | cudaError_t e = f; \ 116 | if (e != cudaSuccess) { \ 117 | printf("Cuda failure %s:%d: '%s'\n", __FILE__, __LINE__, cudaGetErrorString(e)); \ 118 | exit(1); \ 119 | } \ 120 | } while (0) 121 | 122 | 123 | 124 | #define MOD(X,Y) ((X) & ((Y) - 1)) // Y has to be power of 2 125 | //#define HAS_REQUEST(pi_val, ci_val, stride) ( ((ci_val <= pi_val)&&(ci_val+stride <= pi_val)) || \ 126 | ((ci_val > pi_val)&&((ci_val+stride < MAX_WQES_NUMBER_PER_QP)||(MOD(ci_val+stride,MAX_WQES_NUMBER_PER_QP)<=pi_val))) ) 127 | //#define CAN_PUSH(pi_val, ci_val) ( MOD(pi_val+1,MAX_WQES_NUMBER_PER_QP) != ci_val) 128 | 129 | #define CAN_PUSH(pi, ci, N) (FREE_SLOTS(pi, ci, N) > 0) 130 | //#define HAS_REQUEST(pi, ci, N) (OCCUPIED_SLOTS(pi, ci, N) > 0) 131 | #define HAS_REQUEST(pi, ci, N) (OCCUPIED_SLOTS(pi, ci, N)) 132 | 133 | #define FREE_SLOTS(pi, ci, N) ( ((pi) >= (ci)) ? ( (N) - ( (pi) - (ci) + 1 ) ) : ( (ci) - (pi) - 1) ) 134 | #define OCCUPIED_SLOTS(pi, ci, N) ( (ci) > (pi) ? ( (N) + (pi) - (ci) ) : ( (pi) - (ci)) ) 135 | 136 | struct ib_resources_t { 137 | unsigned int posted_wqes; 138 | unsigned int* load_factor; 139 | unsigned int resp_sent; 140 | unsigned int client_fd; 141 | unsigned int recv_buf_offset; 142 | unsigned int wrap_around; 143 | bool update_wrap_around; 144 | 145 | struct ibv_context *context; 146 | struct ibv_pd *pd; 147 | 148 | struct ibv_qp *qp; 149 | struct ibv_cq *recv_cq; 150 | struct ibv_cq *send_cq; 151 | 152 | char* lrecv_buf; 153 | struct ibv_mr* lmr_recv; 154 | char* lsend_buf; 155 | struct ibv_mr* lmr_send; 156 | 157 | int rmr_recv_key; 158 | long long rmr_recv_addr; 159 | int rmr_send_key; 160 | long long rmr_send_addr; 161 | }; 162 | 163 | struct ib_info_t { 164 | int lid; 165 | int qpn; 166 | int mkey_data_buffer; 167 | long long addr_data_buffer; 168 | int mkey_response_buffer; 169 | long long addr_response_buffer; 170 | ibv_gid gid; 171 | }; 172 | 173 | 174 | int get_gid_index(ibv_context* dev); 175 | struct ibv_context *ibv_open_device_by_name(const std::string& device_name); 176 | string ib_device_from_netdev(const string& netdev); 177 | double static inline get_time_msec(); 178 | #endif 179 | --------------------------------------------------------------------------------