├── .gitignore ├── LICENSE ├── README.md ├── client ├── Makefile ├── main.cc ├── p4ml_manager.cc └── p4ml_manager.h ├── common ├── Rogue.cc ├── Rogue.h ├── ThreadPool.h ├── dma_common.cc ├── dma_common.h ├── mlx5_defs.h ├── packet.cc ├── packet.h ├── utils.h ├── window_manager.cc └── window_manager.h ├── docs └── benchmark.md ├── p4src ├── includes │ ├── actions.p4 │ ├── headers.p4 │ ├── parser.p4 │ ├── registers.p4 │ └── tables.p4 └── switchml.p4 ├── ptf └── ptfTest.py └── run_pd_rpc └── swithml_setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | *.tar 4 | log 5 | *.o 6 | app 7 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 NetLabIIIS and WISR 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ATP-SwitchML 2 | 3 | SwitchML implemented by the ATP group for experimental purpose. 4 | 5 | # Benchmark 6 | To run the benchmark, please see [benchmark](docs/benchmark.md). 7 | 8 | # Related Publications 9 | 10 | - [NSDI'21] "[ATP: In-network Aggregation for Multi-tenant Learning](https://www.usenix.org/conference/nsdi21/presentation/lao)". ChonLam Lao, Yanfang Le, Kshiteej Mahajan, Yixi Chen, Wenfei Wu, Aditya Akella, Michael Swift. 11 | 12 | # Contact 13 | 14 | Any questions? Please feel free to reach us at inatpcontact@gmail.com. You are more likely to receive a helpful response if your question is specific, self-contained and concise. 15 | -------------------------------------------------------------------------------- /client/Makefile: -------------------------------------------------------------------------------- 1 | # CFLAGS := -O3 -g 2 | # LD := g++ 3 | # LDFLAGS := ${LDFLAGS} -lrdmacm -libverbs -lrt -lpthread -lm 4 | 5 | # ROCE_COMMON_PATH = ../common/ 6 | # INCLUDES = -I${ROCE_COMMON_PATH} 7 | # CFLAGS := ${CFLAGS} ${INCLUDES} 8 | # SOURCES := $(wildcard *.c *.h ${ROCE_COMMON_PATH}*.c ${ROCE_COMMON_PATH}*.h) 9 | 10 | 11 | # all: app 12 | # app: main.o p4ml_manager.o ${ROCE_COMMON_PATH}packet.o ${ROCE_COMMON_PATH}dma_common.o ${ROCE_COMMON_PATH}window_manager.o 13 | # ${LD} $(CFLAGS) -o $@ $^ ${LDFLAGS} 14 | 15 | 16 | # # Clean Target 17 | # clean: 18 | # rm *.o ../common/*.o 19 | # rm app 20 | 21 | all: 22 | g++ -std=c++11 -g -O3 -c -o main.o main.cc 23 | g++ -std=c++11 -g -O3 -c -o p4ml_manager.o p4ml_manager.cc 24 | g++ -std=c++11 -g -O3 -c -o ../common/packet.o ../common/packet.cc 25 | g++ -std=c++11 -g -O3 -c -o ../common/dma_common.o ../common/dma_common.cc 26 | g++ -std=c++11 -g -O3 -c -o ../common/window_manager.o ../common/window_manager.cc 27 | g++ -std=c++11 -g -O3 -c -o ../common/Rogue.o ../common/Rogue.cc 28 | g++ -std=c++11 -g -O3 -I../common/ -o app main.o p4ml_manager.o ../common/Rogue.o ../common/packet.o ../common/dma_common.o ../common/window_manager.o -lrdmacm -libverbs -lrt -lpthread -lm 29 | 30 | clean: 31 | rm *.o 32 | rm app 33 | -------------------------------------------------------------------------------- /client/main.cc: -------------------------------------------------------------------------------- 1 | #include "p4ml_manager.h" 2 | 3 | uint32_t* init_model(int size) { 4 | uint32_t* tmp = new uint32_t[size]; 5 | for (int i = 0; i < size; i++) 6 | tmp[i] = i+1; 7 | return tmp; 8 | } 9 | 10 | std::shared_ptr _p4ml_manager; 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | bindingCPU(0); 15 | 16 | if (argc < 4) { 17 | printf("\nUsage %s [MyID] [Num of Worker] [AppID]\n\n", argv[0]); 18 | exit(1); 19 | } 20 | 21 | int host = atoi(argv[1]); 22 | int num_worker = atoi(argv[2]); 23 | int appID = atoi(argv[3]); 24 | 25 | //int host = 0; 26 | // int num_worker = 2; 27 | // int appID = 1; 28 | 29 | _p4ml_manager = std::shared_ptr(new P4mlManager(host, num_worker, appID)); 30 | 31 | /* Here for int size to send per thread */ 32 | /* ex. 25600 = 32*800 = 1 Round */ 33 | int size = 1024000; 34 | int thread_to_use = 12; 35 | int loop_time = 1000; 36 | 37 | if (argc > 4) { 38 | std::string option = argv[4]; 39 | if (option == "-a") { 40 | int num_agtr = atoi(argv[5]); 41 | _p4ml_manager->SetMaxAgtrSizePerThread(num_agtr); 42 | } 43 | if (option == "-f") { 44 | float forward_rate = atof(argv[5]); 45 | _p4ml_manager->SetForceForward(forward_rate); 46 | } 47 | } 48 | 49 | /* (40) Threads in thread pool */ 50 | /* MAX_AGTR (32000) / 40 = 800 Agtr per thread */ 51 | _p4ml_manager->init_threadPool(thread_to_use); 52 | 53 | // _p4ml_manager->SetForceForward(0.25); 54 | // _p4ml_manager->SetMaxAgtrSizePerThread(50); 55 | 56 | int finish_counter = loop_time * thread_to_use; 57 | uint32_t** tensor = new uint32_t*[thread_to_use * loop_time]; 58 | 59 | printf("\nModel initializing...\n"); 60 | // for (int i = 0; i < loop_time * thread_to_use; i++) 61 | tensor[0] = init_model(size); 62 | printf("\nModel initialized completed. Start sending...\n\n"); 63 | 64 | for (int j = 0; j < loop_time; j++) { 65 | /* thread to use */ 66 | for (int i = 0; i < thread_to_use; i++) { 67 | uint64_t key = _p4ml_manager->GetNewKey(); 68 | _p4ml_manager->PushPull(key, (char*) tensor[0], size, 1); 69 | } 70 | } 71 | 72 | std::chrono::time_point timer = std::chrono::high_resolution_clock::now(); 73 | 74 | std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); 75 | 76 | int total_sent = 0; 77 | 78 | while (finish_counter > 0) { 79 | int64_t tmp_key = _p4ml_manager->GetFinishKey(); 80 | if (tmp_key >= 0) { 81 | finish_counter--; 82 | total_sent++; 83 | } 84 | 85 | // std::chrono::time_point current_time = 86 | // std::chrono::high_resolution_clock::now(); 87 | // std::chrono::duration time_span = 88 | // std::chrono::duration_cast>(current_time - timer); 89 | // std::chrono::duration total_time = 90 | // std::chrono::duration_cast>(current_time - t1); 91 | // if (time_span.count() >= 1.0) { 92 | // // printf("Tensor left: %d, ", finish_counter); 93 | // // printf("total send %" PRIu64 " bytes, time %lf, throughput: %lf\n", total_sent * 32000 * 194, total_time, total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0); 94 | // printf("%lf\n", total_sent * 6062.5 / 1024.0 / 1024.0 * 8.0 / 1.0); 95 | // total_sent = 0; 96 | // timer = current_time; 97 | // } 98 | } 99 | std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); 100 | std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); 101 | double transmit_size_in_m = (double)((double)size * loop_time * thread_to_use / (float)MAX_ENTRIES_PER_PACKET) * P4ML_PACKET_SIZE / 1024 / 1024; 102 | double total_time = time_span.count(); 103 | double throughput = (transmit_size_in_m / 1024 * 8 ) / total_time; 104 | printf("Finish all %d Tensors,\n Time = %lf s,\n Total Size = %lf MB,\n Throughput: %lf Gbps\n\n", thread_to_use * loop_time, total_time, transmit_size_in_m, throughput); 105 | } 106 | -------------------------------------------------------------------------------- /client/p4ml_manager.cc: -------------------------------------------------------------------------------- 1 | #include "p4ml_manager.h" 2 | 3 | #define CC_ENABLE false 4 | #define LOSS_RECOVERY_ENABLE false 5 | #define LOSS_RECOVERY_LOG true 6 | 7 | uint32_t P4mlManager::host; 8 | uint8_t P4mlManager::num_worker; 9 | uint16_t P4mlManager::appID; 10 | uint64_t P4mlManager::p4mlKey; 11 | int P4mlManager::max_agtr_size_per_thread = 0; 12 | int P4mlManager::_num_thread; 13 | 14 | std::chrono::time_point P4mlManager::start; 15 | 16 | ThreadInfo** P4mlManager::threadInfoQueue; 17 | DMAcontext** P4mlManager::dmaContextQueue; 18 | std::thread** P4mlManager::threadQueue; 19 | std::queue *P4mlManager::jobQueue; 20 | std::queue *P4mlManager::pendingQueue; 21 | uint64_t* P4mlManager::weightQueue; 22 | std::queue P4mlManager::finishQueue; 23 | WindowManager* P4mlManager::window_manager; 24 | 25 | uint16_t* P4mlManager::hash_map; 26 | 27 | int finish_thread = 0; 28 | int loop_times[56] = {0}; 29 | 30 | int random_array[20][1024000] = {0}; 31 | bool isLoss[20][1024001] = {0}; 32 | bool isSeen[20][1024001] = {0}; 33 | int forwardCounter[20] = {0}; 34 | 35 | float mean[1000]; 36 | float median[1000]; 37 | 38 | bool P4mlManager::isForceForward = false; 39 | int P4mlManager::forwardFrequency; 40 | float P4mlManager::forwardRate; 41 | 42 | std::mutex P4mlManager::_P4MLKey_mutex; 43 | std::mutex P4mlManager::_print_mutex; 44 | std::mutex P4mlManager::_queuePush_mutex; 45 | 46 | std::chrono::high_resolution_clock::time_point recv_time; 47 | 48 | P4mlManager::P4mlManager(uint32_t host, int num_worker, int appID) { 49 | srand(time(NULL)); 50 | this->host = host; 51 | this->p4mlKey = 0; 52 | this->appID = (uint16_t) appID; 53 | this->num_worker = (uint8_t) num_worker; 54 | } 55 | 56 | void P4mlManager::updateModel(agghdr *p4ml_header, int32_t *data) { 57 | uint16_t *p_seq = &p4ml_header->seq_num; 58 | uint32_t *tensor_len = &p4ml_header->len_tensor; 59 | int32_t *p_model = p4ml_header->vector; 60 | uint32_t offset = (*p_seq - 1) * MAX_ENTRIES_PER_PACKET; 61 | if (offset < *tensor_len) { 62 | if (offset + MAX_ENTRIES_PER_PACKET > *tensor_len) 63 | memcpy(data + offset, p_model, sizeof(int32_t) * (*tensor_len % MAX_ENTRIES_PER_PACKET)); 64 | else 65 | memcpy(data + offset, p_model, sizeof(int32_t) * MAX_ENTRIES_PER_PACKET); 66 | } 67 | } 68 | 69 | uint64_t P4mlManager::GetNewKey() { 70 | std::lock_guard lock(_P4MLKey_mutex); 71 | return p4mlKey++; 72 | } 73 | 74 | int64_t P4mlManager::GetFinishKey() { 75 | if (!finishQueue.empty()) { 76 | std::lock_guard lock(_queuePush_mutex); 77 | uint64_t tmp_key = finishQueue.front(); 78 | finishQueue.pop(); 79 | // printf("return completed task %d to outside\n", tmp_key); 80 | return tmp_key; 81 | } else { 82 | return -1; 83 | } 84 | } 85 | 86 | void P4mlManager::SetForceForward(float forward_rate) { 87 | isForceForward = true; 88 | forwardRate = forward_rate; 89 | if (forward_rate == 0.75) { 90 | forwardFrequency = 4; 91 | printf("\n No 0.75 supported, exit.\n"); 92 | exit(1); 93 | } else { 94 | forwardFrequency = 1 / forward_rate; 95 | printf("\nSet force forward, frequency = %d\n\n", forwardFrequency); 96 | } 97 | } 98 | 99 | void P4mlManager::SetMaxAgtrSizePerThread(int agtr) { 100 | max_agtr_size_per_thread = agtr; 101 | printf("\nSet max_agtr_size_per_thread to %d...\n\n", agtr); 102 | } 103 | 104 | void P4mlManager::main_receive_packet_loop(DMAcontext *dma_context, 105 | int32_t *data, 106 | int my_id) { 107 | int msgs_completed; 108 | int total_resent = 0; 109 | std::queue resent_queue; 110 | std::queue trigger_queue; 111 | std::queue loss_queue; 112 | int total_loss = 0; 113 | int total_dup_packet = 0; 114 | int total_last_tensor_packet = 0; 115 | int this_pos_to_send = max_agtr_size_per_thread; 116 | int resend_pos_to_send = dma_context->my_send_queue_length / 2; 117 | int total_packet = window_manager[my_id].total_ACK; 118 | int rand_index = 0; 119 | 120 | int window = max_agtr_size_per_thread; 121 | /* Loss simulation */ 122 | int loss = 0; 123 | int resend_loss = 0; 124 | int timeout_loss = 0; 125 | 126 | int send_pointer = max_agtr_size_per_thread; 127 | 128 | int last_pending_front = 0; 129 | int pending_front_stuck_time = 0; 130 | 131 | bool resend_waiting = false; 132 | 133 | // int packet_processing_time_count = 0; 134 | // int packet_processing_time[35000]; 135 | // float total_time = 0.0; 136 | 137 | memset(&isLoss[my_id], 0, sizeof(bool) * 32001); 138 | 139 | loop_times[my_id]++; 140 | 141 | int finish_window_seq = max_agtr_size_per_thread; 142 | Rogue rogue(max_agtr_size_per_thread); 143 | 144 | if(loop_times[my_id] % 1000 == 0) 145 | fprintf(stderr, "loop_times[ %d ] %d finished\n", my_id, loop_times[my_id]); 146 | 147 | // if(loop_times[my_id] %100 == 0) { 148 | // for (int i = 0; i < _num_thread; i++) 149 | // while (loop_times[i] < loop_times[my_id] - 50) { 150 | // usleep(1); 151 | // } 152 | // } 153 | 154 | char *send_region = (char *)dma_context->send_region; 155 | 156 | while (window_manager[my_id].last_ACK < window_manager[my_id].total_ACK) { 157 | cqe_snapshot_t cur_snapshot; 158 | // printf("window_manager[my_id].last_ACK: %d \n", window_manager[my_id].last_ACK); 159 | // struct ibv_wc wc_recv[POLLING_SIZE * 2]; 160 | msgs_completed = 0; 161 | std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); 162 | while(1) { 163 | snapshot_cqe(&dma_context->mp_cqe_arr[dma_context->cqe_idx], cur_snapshot); 164 | const size_t delta = get_cycle_delta(dma_context->prev_snapshot, cur_snapshot); 165 | 166 | // msgs_completed = ibv_poll_cq(dma_context->receive_cq, POLLING_SIZE * 2, wc_recv); 167 | std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); 168 | std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); 169 | if (!(delta == 0 || delta >= kAppNumRingEntries)) { 170 | msgs_completed = delta; 171 | // printf("[%d] msgs_completed:%d, break\n", my_id, msgs_completed); 172 | recv_time = std::chrono::high_resolution_clock::now(); 173 | break; 174 | } 175 | // printf("msgs_completed: %d, %f\n", msgs_completed, time_span.count()); 176 | // printf("[%d] loop, msgs_completed:%d, dma_context->total_received:%d, time_span:%f\n", my_id, msgs_completed, dma_context->total_received, time_span); 177 | if (LOSS_RECOVERY_ENABLE) { 178 | if(time_span.count() > 0.05 && msgs_completed == 0) { 179 | uint16_t timeout_seq = window_manager[my_id].last_ACK + 1; 180 | if (LOSS_RECOVERY_LOG) 181 | printf("[thread %d] Timeout, send %d\n", my_id, timeout_seq); 182 | // exit(1); 183 | int offset = (timeout_seq - 1) * MAX_ENTRIES_PER_PACKET; 184 | uint16_t switch_agtr_pos = threadInfoQueue[my_id]->agtr_start_pos + ((timeout_seq - 1) % max_agtr_size_per_thread); 185 | if (timeout_seq <= total_packet) { 186 | // if(my_id == 0) 187 | // printf("Seq %d trigger %d Resend!\n", p4ml_header->seq_num, timeout_seq); 188 | // set Terminated if last packet 189 | if (timeout_seq != total_packet) { 190 | // for packet which integer < 32 191 | 192 | /* Packet Loss simulation */ 193 | rand_index++; 194 | if (random_array[my_id][rand_index] < timeout_loss) { 195 | total_loss++; 196 | rand_index++; 197 | // printf("[Timeout] %d loss.\n", timeout_seq); 198 | isLoss[my_id][timeout_seq] = true; 199 | loss_queue.push(timeout_seq); 200 | } else { 201 | if (offset + MAX_ENTRIES_PER_PACKET > jobQueue[my_id].front()->len) { 202 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 203 | memcpy(tmp, data + offset, sizeof(int32_t) * (jobQueue[my_id].front()->len % MAX_ENTRIES_PER_PACKET)); 204 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &timeout_seq, tmp, false, true); 205 | } else { 206 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &timeout_seq, data + offset, false, true); 207 | } 208 | } 209 | } else { 210 | // for packet which integer < 32 211 | if (offset + MAX_ENTRIES_PER_PACKET > jobQueue[my_id].front()->len) { 212 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 213 | memcpy(tmp, data + offset, sizeof(int32_t) * (jobQueue[my_id].front()->len % MAX_ENTRIES_PER_PACKET)); 214 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &timeout_seq, tmp, true, true); 215 | } else { 216 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &timeout_seq, data + offset, true, true); 217 | } 218 | } 219 | 220 | // p4ml_header_print(p4ml_header, "RESEND TRIGGER"); 221 | 222 | send_packet(dma_context, P4ML_LAYER_SIZE, resend_pos_to_send); 223 | resend_pos_to_send++; 224 | total_resent++; 225 | if (resend_pos_to_send == dma_context->my_send_queue_length - 1) 226 | resend_pos_to_send = dma_context->my_send_queue_length / 2 + 1; 227 | resent_queue.push(timeout_seq); 228 | trigger_queue.push(timeout_seq); 229 | } 230 | t1 = std::chrono::high_resolution_clock::now(); 231 | } 232 | } 233 | 234 | if(time_span.count() > 20.0 && msgs_completed == 0 && dma_context->total_received > 0) { 235 | fprintf(stderr, "Timeout happened this thread thread_id=%d, total_received=%d, total_sent=%d, total_loss=%d, total_resent=%d, last_ACK=%d, total_dup_recv=%d, total_last_tensor_packet_recv=%d, loop_time=%d\n", \ 236 | my_id, dma_context->total_received, dma_context->total_sent, total_loss, total_resent, window_manager[my_id].last_ACK, total_dup_packet, total_last_tensor_packet, loop_times[my_id]); 237 | fprintf(stderr, "%d is simulate loss packet [%d]\n", window_manager[my_id].last_ACK + 1, isLoss[my_id][window_manager[my_id].last_ACK+1]); 238 | 239 | printf("\nLoss\n"); 240 | for (int i = 0 ; i < 32001; i++) 241 | if (isLoss[my_id][i]) 242 | printf("%d ", i); 243 | printf("\nResend\n"); 244 | 245 | while (!resent_queue.empty()) { 246 | printf("%d[%d] ", resent_queue.front(), trigger_queue.front()); 247 | resent_queue.pop(); 248 | trigger_queue.pop(); 249 | } 250 | printf("\nPendingQueue\n"); 251 | while (!pendingQueue[my_id].empty()) { 252 | printf("%d ", pendingQueue[my_id].front().p4ml_header->seq_num); 253 | pendingQueue[my_id].pop(); 254 | } 255 | printf("\nlast_ACK: %d \n", window_manager[my_id].last_ACK); 256 | 257 | 258 | for(int i=0; i< _num_thread; i++) 259 | fprintf(stderr, "Timeout happened thread_id=%d, total_received=%d, total_sent=%d, loop_time=%d\n", i, dmaContextQueue[i]->total_received, dmaContextQueue[i]->total_sent, loop_times[i]); 260 | exit(-1); 261 | } 262 | } 263 | 264 | /* circle alignment */ 265 | if (this_pos_to_send + max_agtr_size_per_thread + max_agtr_size_per_thread > dma_context->my_send_queue_length / 2) 266 | this_pos_to_send = 0; 267 | 268 | int to_be_sent = 0; 269 | 270 | dma_context->total_received += msgs_completed; 271 | 272 | // printf("msgs_completed: %d, dma_context->total_received: %d\n", msgs_completed, dma_context->total_received); 273 | 274 | for (int msg = 0; msg < msgs_completed; msg++) { 275 | // std::chrono::high_resolution_clock::time_point packet_start = std::chrono::high_resolution_clock::now(); 276 | uint8_t* buf = &dma_context->mp_recv_ring[dma_context->ring_head * kAppRingMbufSize]; 277 | 278 | agghdr *p4ml_header = reinterpret_cast(buf + IP_ETH_UDP_HEADER_SIZE); 279 | // p4ml_header_print_h(p4ml_header, "RECEIVE"); 280 | p4ml_header_ntoh_without_data(p4ml_header); 281 | bool is_resend_packet = p4ml_header->flag & 0x04; 282 | bool is_ecn_mark_packet = p4ml_header->flag & 0x08; 283 | 284 | // If that is resend packet from last tensor, ignore it 285 | if (p4ml_header->key != jobQueue[my_id].front()->key) { 286 | total_last_tensor_packet++; 287 | dma_context->total_received--; 288 | 289 | dma_context->ring_head = (dma_context->ring_head + 1) % kAppNumRingEntries; 290 | dma_context->nb_rx_rolling++; 291 | if (dma_context->nb_rx_rolling == kAppStridesPerWQE) { 292 | dma_context->nb_rx_rolling = 0; 293 | int ret = dma_context->mp_wq_family->recv_burst(dma_context->mp_wq, &dma_context->mp_sge[dma_context->sge_idx], 1); 294 | rt_assert(ret == 0); 295 | dma_context->sge_idx = (dma_context->sge_idx + 1) % kAppRQDepth; 296 | } 297 | continue; 298 | } 299 | // If that is duplicate resend packet, ignore it 300 | if (window_manager[my_id].isACKed[p4ml_header->seq_num] && is_resend_packet) { 301 | total_dup_packet++; 302 | dma_context->total_received--; 303 | 304 | dma_context->ring_head = (dma_context->ring_head + 1) % kAppNumRingEntries; 305 | dma_context->nb_rx_rolling++; 306 | if (dma_context->nb_rx_rolling == kAppStridesPerWQE) { 307 | dma_context->nb_rx_rolling = 0; 308 | int ret = dma_context->mp_wq_family->recv_burst(dma_context->mp_wq, &dma_context->mp_sge[dma_context->sge_idx], 1); 309 | rt_assert(ret == 0); 310 | dma_context->sge_idx = (dma_context->sge_idx + 1) % kAppRQDepth; 311 | } 312 | continue; 313 | } 314 | 315 | // printf("packet %d receive\n", p4ml_header->seq_num); 316 | 317 | /* Receive Normal Packet */ 318 | if (!window_manager[my_id].isACKed[p4ml_header->seq_num]) { 319 | dma_context->receive_time[p4ml_header->seq_num] = recv_time; 320 | /* Update Model */ 321 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 322 | p4ml_header->vector[i] = ntohl(p4ml_header->vector[i]); 323 | updateModel(p4ml_header, data); 324 | 325 | /* Update Window */ 326 | if (window_manager[my_id].UpdateWindow(&p4ml_header->seq_num)) { 327 | resend_waiting = false; 328 | } 329 | 330 | /* If not Ready for next Seq sending, Enqueue */ 331 | bool isEnqueue = false; 332 | uint16_t next_seq_num = p4ml_header->seq_num + window; 333 | int next_offset = (next_seq_num - 1) * MAX_ENTRIES_PER_PACKET; 334 | // printf("next_seq_num: %d\n", next_seq_num); 335 | 336 | if (next_seq_num > window_manager[my_id].last_ACK + window) { 337 | // printf("%d: next seq_num: %d enqueue! [%d ~ %d] \n", p4ml_header->seq_num, next_seq_num, window_manager[my_id].last_ACK, window_manager[my_id].last_ACK + window); 338 | agghdr* enqueue_header = (agghdr*) malloc(sizeof(agghdr)); 339 | memcpy(enqueue_header, p4ml_header, sizeof(agghdr)); 340 | PacketObject pending_obj = { 341 | .p4ml_header = enqueue_header, 342 | // .wc_recv_id = wc_recv_id 343 | }; 344 | isEnqueue = true; 345 | pendingQueue[my_id].push(pending_obj); 346 | // } 347 | } 348 | 349 | /* Send Next Packet */ 350 | if (next_seq_num <= total_packet && next_seq_num <= window_manager[my_id].last_ACK + window && next_seq_num > send_pointer) { 351 | // printf("next_seq_num: %d, send_pointer: %d\n", next_seq_num, send_pointer); 352 | 353 | bool ForceForward = false; 354 | if (isForceForward) { 355 | if (forwardCounter[my_id] == forwardFrequency) { 356 | ForceForward = true; 357 | forwardCounter[my_id] = 0; 358 | } else { 359 | forwardCounter[my_id]++; 360 | } 361 | } 362 | 363 | int packet_to_process = abs(next_seq_num - send_pointer); 364 | // printf("packet to process: %d\n", packet_to_process); 365 | // send more packet if window change 366 | for (int i = packet_to_process - 1; i >= 0; i--) { 367 | uint16_t process_next_seq_num = next_seq_num - i; 368 | 369 | // printf("[%d] next_seq_num: %d, send_pointer: %d\n", p4ml_header->seq_num, process_next_seq_num, send_pointer); 370 | 371 | uint16_t switch_agtr_pos = threadInfoQueue[my_id]->agtr_start_pos + ((process_next_seq_num - 1) % max_agtr_size_per_thread); 372 | // set Terminated if last packet 373 | /* We don't loss Terminate packet here */ 374 | if (process_next_seq_num != total_packet) { 375 | /* Packet Loss simulation */ 376 | rand_index++; 377 | if (random_array[my_id][rand_index] < loss) { 378 | // if (process_next_seq_num == 477 || process_next_seq_num == 478) { 379 | total_loss++; 380 | rand_index++; 381 | // printf("%d loss.\n", process_next_seq_num); 382 | isLoss[my_id][process_next_seq_num] = true; 383 | to_be_sent--; 384 | loss_queue.push(process_next_seq_num); 385 | } else { 386 | // for packet which integer < 32 387 | if (next_offset + MAX_ENTRIES_PER_PACKET > p4ml_header->len_tensor) { 388 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 389 | memcpy(tmp, data + next_offset, sizeof(int32_t) * (p4ml_header->len_tensor % MAX_ENTRIES_PER_PACKET)); 390 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &p4ml_header->key, &p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, tmp, false, ForceForward); 391 | } else { 392 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &p4ml_header->key, &p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, data + next_offset, false, ForceForward); 393 | } 394 | } 395 | 396 | } else { 397 | // for packet which integer < 32 398 | if (next_offset + MAX_ENTRIES_PER_PACKET > p4ml_header->len_tensor) { 399 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 400 | memcpy(tmp, data + next_offset, sizeof(int32_t) * (p4ml_header->len_tensor % MAX_ENTRIES_PER_PACKET)); 401 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &p4ml_header->key, &p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, tmp, true, ForceForward); 402 | } else { 403 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &p4ml_header->key, &p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, data + next_offset, true, ForceForward); 404 | } 405 | } 406 | 407 | to_be_sent++; 408 | } 409 | send_pointer = next_seq_num; 410 | // printf("send_pointer: %d\n", send_pointer); 411 | } 412 | 413 | int i = 0; 414 | /* Check If packet in Pending Queue is Ready to send */ 415 | while (!pendingQueue[my_id].empty()) { 416 | // printf("p4ml_header->seq_num: %d\n", p4ml_header->seq_num); 417 | PacketObject pendingObj = pendingQueue[my_id].front(); 418 | 419 | i++; 420 | agghdr* pending_p4ml_header = pendingObj.p4ml_header; 421 | 422 | if (window_manager[my_id].last_ACK < pending_p4ml_header->seq_num) { 423 | // printf("%d Get %d from pending queue and window_manager[my_id].last_ACK = %d, quit\n", p4ml_header->seq_num, pending_p4ml_header->seq_num, window_manager[my_id].last_ACK); 424 | break; 425 | } 426 | 427 | uint16_t next_seq_num = pending_p4ml_header->seq_num + window; 428 | int next_offset = (next_seq_num - 1) * MAX_ENTRIES_PER_PACKET; 429 | // printf("[pending] org: %d, next_number: %d\n", pending_p4ml_header->seq_num, next_seq_num); 430 | if (next_seq_num <= window_manager[my_id].last_ACK + window && next_seq_num > send_pointer) { 431 | 432 | bool ForceForward = false; 433 | if (isForceForward) { 434 | if (forwardCounter[my_id] == forwardFrequency) { 435 | ForceForward = true; 436 | forwardCounter[my_id] = 0; 437 | } else { 438 | forwardCounter[my_id]++; 439 | } 440 | } 441 | 442 | /* Copy to Send Region */ 443 | if (next_seq_num <= total_packet) { 444 | 445 | int packet_to_process = abs(next_seq_num - send_pointer); 446 | // printf("[pending] packet to process: %d\n", packet_to_process); 447 | 448 | for (int i = packet_to_process - 1; i >= 0; i--) { 449 | uint16_t process_next_seq_num = next_seq_num - i; 450 | // printf("[Pending] seq_num trigger %d send next seq_num: %d\n", p4ml_header->seq_num, next_seq_num); 451 | uint16_t switch_agtr_pos = threadInfoQueue[my_id]->agtr_start_pos + ((process_next_seq_num - 1) % max_agtr_size_per_thread); 452 | // set Terminated if last packet 453 | if (process_next_seq_num != total_packet) { 454 | /* Packet Loss simulation */ 455 | rand_index++; 456 | if (random_array[my_id][rand_index] < loss) { 457 | total_loss++; 458 | rand_index++; 459 | // printf("[Pending] %d loss.\n", process_next_seq_num); 460 | isLoss[my_id][process_next_seq_num] = true; 461 | to_be_sent--; 462 | loss_queue.push(process_next_seq_num); 463 | } else { 464 | // for packet which integer < 32 465 | if (next_offset + MAX_ENTRIES_PER_PACKET > pending_p4ml_header->len_tensor) { 466 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 467 | memcpy(tmp, data + next_offset, sizeof(int32_t) * (pending_p4ml_header->len_tensor % MAX_ENTRIES_PER_PACKET)); 468 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &pending_p4ml_header->key, &pending_p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, tmp, false, ForceForward); 469 | } else { 470 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &pending_p4ml_header->key, &pending_p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, data + next_offset, false, ForceForward); 471 | } 472 | } 473 | } else { 474 | // for packet which integer < 32 475 | if (next_offset + MAX_ENTRIES_PER_PACKET > pending_p4ml_header->len_tensor) { 476 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 477 | memcpy(tmp, data + next_offset, sizeof(int32_t) * (pending_p4ml_header->len_tensor % MAX_ENTRIES_PER_PACKET)); 478 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &pending_p4ml_header->key, &pending_p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, tmp, true, ForceForward); 479 | } else { 480 | make_p4ml_layer_and_copy_to(send_region + (this_pos_to_send + to_be_sent) * P4ML_LAYER_SIZE, &pending_p4ml_header->key, &pending_p4ml_header->len_tensor, &host, &num_worker, &appID, &switch_agtr_pos, &process_next_seq_num, data + next_offset, true, ForceForward); 481 | } 482 | } 483 | 484 | to_be_sent++; 485 | } 486 | send_pointer = next_seq_num; 487 | // printf("[pending] send_pointer: %d\n", send_pointer); 488 | } 489 | // printf("%d: next seq_num: %d dequeue! [%d ~ %d] \n", pending_p4ml_header->seq_num, next_seq_num, window_manager[my_id].last_ACK, window_manager[my_id].last_ACK + window); 490 | free(pending_p4ml_header); 491 | pendingQueue[my_id].pop(); 492 | } else { 493 | // printf("%d: next seq_num: %d dequeue! [%d ~ %d] \n", pending_p4ml_header->seq_num, next_seq_num, window_manager[my_id].last_ACK, window_manager[my_id].last_ACK + window); 494 | free(pending_p4ml_header); 495 | pendingQueue[my_id].pop(); 496 | } 497 | } 498 | 499 | 500 | /* If force forward is configurated, expect will not packet loss */ 501 | if (!isForceForward && LOSS_RECOVERY_ENABLE) { 502 | if (!resend_waiting) { 503 | // printf("resend_processed\n"); 504 | uint16_t resend_seq = 0; 505 | 506 | if (!pendingQueue[my_id].empty()) { 507 | PacketObject pendingObj = pendingQueue[my_id].front(); 508 | agghdr* pending_p4ml_header = pendingObj.p4ml_header; 509 | // printf("%d, %d\n", pending_p4ml_header->seq_num, window_manager[my_id].last_ACK + 1); 510 | if (window_manager[my_id].last_ACK < pending_p4ml_header->seq_num) { 511 | resend_seq = window_manager[my_id].last_ACK + 1; 512 | } 513 | } 514 | 515 | bool isResendLoss = false; 516 | if (resend_seq) { 517 | resend_waiting = true; 518 | int offset = (resend_seq - 1) * MAX_ENTRIES_PER_PACKET; 519 | uint16_t switch_agtr_pos = hash_map[threadInfoQueue[my_id]->agtr_start_pos + ((resend_seq - 1) % max_agtr_size_per_thread)]; 520 | if (resend_seq <= total_packet) { 521 | if (LOSS_RECOVERY_LOG) 522 | printf("[Thread %d] Seq %d trigger %d Resend!\n", my_id, p4ml_header->seq_num, resend_seq); 523 | // exit(1); 524 | // set Terminated if last packet 525 | if (resend_seq != total_packet) { 526 | // for packet which integer < 32 527 | 528 | /* Packet Loss simulation */ 529 | rand_index++; 530 | if (random_array[my_id][rand_index] < resend_loss) { 531 | total_loss++; 532 | rand_index++; 533 | // printf("[Resend] %d loss.\n", resend_seq); 534 | isLoss[my_id][resend_seq] = true; 535 | loss_queue.push(resend_seq); 536 | isResendLoss = true; 537 | } else { 538 | if (offset + MAX_ENTRIES_PER_PACKET > jobQueue[my_id].front()->len) { 539 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 540 | memcpy(tmp, data + offset, sizeof(int32_t) * (jobQueue[my_id].front()->len % MAX_ENTRIES_PER_PACKET)); 541 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &resend_seq, tmp, false, true); 542 | } else { 543 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &resend_seq, data + offset, false, true); 544 | } 545 | } 546 | } else { 547 | // for packet which integer < 32 548 | if (offset + MAX_ENTRIES_PER_PACKET > jobQueue[my_id].front()->len) { 549 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 550 | memcpy(tmp, data + offset, sizeof(int32_t) * (jobQueue[my_id].front()->len % MAX_ENTRIES_PER_PACKET)); 551 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &resend_seq, tmp, true, true); 552 | } else { 553 | make_p4ml_layer_and_copy_to(send_region + (resend_pos_to_send * P4ML_LAYER_SIZE), &jobQueue[my_id].front()->key, &jobQueue[my_id].front()->len, &host, &num_worker, &appID, &switch_agtr_pos, &resend_seq, data + offset, true, true); 554 | } 555 | } 556 | // p4ml_header_print(p4ml_header, "RESEND TRIGGER"); 557 | 558 | if (!isResendLoss) { 559 | // int stop; 560 | // scanf("%d", &stop); 561 | send_packet(dma_context, P4ML_LAYER_SIZE, resend_pos_to_send); 562 | resend_pos_to_send++; 563 | total_resent++; 564 | if (resend_pos_to_send == dma_context->my_send_queue_length - 1) 565 | resend_pos_to_send = dma_context->my_send_queue_length / 2 + 1; 566 | } 567 | resent_queue.push(resend_seq); 568 | trigger_queue.push(p4ml_header->seq_num); 569 | 570 | // if (!isLoss[my_id][resend_seq]) { 571 | // printf("%d Not suppose to be lost\n", resend_seq); 572 | // printf("\nLoss\n"); 573 | // for (int i = 0 ; i < 32000; i++) 574 | // if (isLoss[my_id][i]) 575 | // printf("%d ", i); 576 | // printf("\nResend\n"); 577 | 578 | // while (!resent_queue.empty()) { 579 | // printf("%d[%d] ", resent_queue.front(), trigger_queue.front()); 580 | // resent_queue.pop(); 581 | // trigger_queue.pop(); 582 | // } 583 | // printf("\nPendingQueue\n"); 584 | // while (!pendingQueue[my_id].empty()) { 585 | // printf("%d ", pendingQueue[my_id].front().p4ml_header->seq_num); 586 | // pendingQueue[my_id].pop(); 587 | // } 588 | // printf("\n"); 589 | 590 | // printf("last_ACK: %d\n", window_manager[my_id].last_ACK); 591 | // exit(1); 592 | // } 593 | 594 | } 595 | } 596 | } 597 | } 598 | 599 | if (CC_ENABLE) { 600 | if (p4ml_header->seq_num == finish_window_seq) { 601 | // if (is_ecn_mark_packet) 602 | // printf("ECN mark found, adjust window.\n"); 603 | 604 | // rogue.packets_acked(time_in_us); 605 | // int new_window = rogue.cc_avoid(); 606 | int new_window = rogue.adjustWindow(is_ecn_mark_packet); 607 | // if (my_id == 0 && new_window != 100) 608 | // printf("new_window: %d\n", new_window); 609 | 610 | if (send_pointer + new_window > window_manager[my_id].total_ACK) 611 | window = window_manager[my_id].total_ACK - send_pointer; 612 | else 613 | window = new_window; 614 | //TODO: ADJUST WINDOW 615 | finish_window_seq += window; 616 | } 617 | } 618 | } 619 | 620 | dma_context->ring_head = (dma_context->ring_head + 1) % kAppNumRingEntries; 621 | dma_context->nb_rx_rolling++; 622 | if (dma_context->nb_rx_rolling == kAppStridesPerWQE) { 623 | dma_context->nb_rx_rolling = 0; 624 | int ret = dma_context->mp_wq_family->recv_burst(dma_context->mp_wq, &dma_context->mp_sge[dma_context->sge_idx], 1); 625 | rt_assert(ret == 0); 626 | dma_context->sge_idx = (dma_context->sge_idx + 1) % kAppRQDepth; 627 | } 628 | // std::chrono::high_resolution_clock::time_point packet_end = std::chrono::high_resolution_clock::now(); 629 | // std::chrono::duration time_span = std::chrono::duration_cast>(packet_end - packet_start); 630 | // double time = time_span.count(); 631 | // packet_processing_time[packet_processing_time_count] = time; 632 | // packet_processing_time_count++; 633 | // total_time += time; 634 | // if (packet_processing_time_count == 32000) { 635 | // std::sort(packet_processing_time, packet_processing_time+32000); 636 | // float mean = total_time/32000.0; 637 | // float median = packet_processing_time[16000]; 638 | // printf("mean:%.10f, madian: %.10f.\n", mean, median); 639 | // } 640 | } 641 | 642 | dma_context->prev_snapshot = cur_snapshot; 643 | dma_context->cqe_idx = (dma_context->cqe_idx + 1) % kAppRecvCQDepth; 644 | 645 | if (msgs_completed < 0) { 646 | printf("Polling error\n"); 647 | exit(1); 648 | } 649 | 650 | if (msgs_completed > 0 && to_be_sent) { 651 | send_packet(dma_context, P4ML_LAYER_SIZE * to_be_sent, this_pos_to_send); 652 | this_pos_to_send += to_be_sent; 653 | } 654 | 655 | } 656 | 657 | // if (my_id == 0) { 658 | // printf("packet_processing_time_count: %d\n", packet_processing_time_count); 659 | // for (int i = 0; i < packet_processing_time_count; i++) 660 | // printf("%d\n", packet_processing_time[i]); 661 | // // if (packet_processing_time_count == 32000) { 662 | // // std::sort(packet_processing_time, packet_processing_time+32000); 663 | // // float mean = total_time/32000.0; 664 | // // float median = packet_processing_time[16000]; 665 | // // printf("mean:%.10f, madian: %.10f.\n", mean, median); 666 | // // } 667 | // } 668 | dma_context->total_sent = 0; 669 | dma_context->total_received = 0; 670 | 671 | if (!pendingQueue[my_id].empty()){ 672 | printf("PENDING QUEUE NOT EMPTY AFTER DONE.\n"); 673 | while (!pendingQueue[my_id].empty()) { 674 | printf("%d ", pendingQueue[my_id].front().p4ml_header->seq_num); 675 | pendingQueue[my_id].pop(); 676 | } 677 | printf("pendingQueue[my_id].size: %zu\n", pendingQueue[my_id].size()); 678 | exit(1); 679 | } 680 | 681 | 682 | // printf("\nLoss\n"); 683 | // for (int i = 0 ; i < 32000; i++) 684 | // if (isLoss[my_id][i]) 685 | // printf("%d ", i); 686 | // printf("\nResend\n"); 687 | 688 | // while (!resent_queue.empty()) { 689 | // printf("%d[%d] ", resent_queue.front(), trigger_queue.front()); 690 | // resent_queue.pop(); 691 | // trigger_queue.pop(); 692 | // } 693 | // printf("\nPendingQueue\n"); 694 | // while (!pendingQueue[my_id].empty()) { 695 | // printf("%d ", pendingQueue[my_id].front().p4ml_header->seq_num); 696 | // pendingQueue[my_id].pop(); 697 | // } 698 | // printf("\n"); 699 | 700 | 701 | // if (my_id == 0) 702 | // fprintf(stderr, "[Finish log] thread_id=%d, total_received=%d, total_sent=%d, total_loss=%d, total_resent=%d, last_ACK=%d, total_dup_recv=%d, total_last_tensor_packet_recv=%d\n", \ 703 | my_id, dma_context->total_received, dma_context->total_sent, total_loss, total_resent, window_manager[my_id].last_ACK, total_dup_packet, total_last_tensor_packet); 704 | } 705 | 706 | void P4mlManager::PushPull(uint64_t key, char *data, int len, int cmd) { 707 | Job* job = new Job { 708 | .key = key, 709 | .data = (int32_t*) data, 710 | .len = (uint32_t) len, 711 | .cmd = cmd 712 | }; 713 | 714 | /* Load Balance */ 715 | uint64_t smallestWeight = weightQueue[0]; 716 | int queueToGo = 0; 717 | for (int i = 1; i < _num_thread; i++) { 718 | if (weightQueue[i] < smallestWeight) { 719 | smallestWeight = weightQueue[i]; 720 | queueToGo = i; 721 | } 722 | } 723 | 724 | /* If someone overflow, all minus the smallest one */ 725 | if (weightQueue[queueToGo] > UINT64_MAX - len) 726 | for (int i = 0; i < _num_thread; i++) 727 | weightQueue[i] = weightQueue[i] - weightQueue[queueToGo]; 728 | 729 | 730 | 731 | weightQueue[queueToGo] += len; 732 | // printf("% "PRIu64" \n", weightQueue[queueToGo]); 733 | // printf("Job %d Get, Send to Queue %d.\n", key, queueToGo); 734 | jobQueue[queueToGo].push(job); 735 | } 736 | 737 | 738 | void P4mlManager::init_threadPool(int num_thread) { 739 | _num_thread = num_thread; 740 | // max_agtr_size_per_thread = MAX_AGTR_COUNT / num_thread; 741 | /* Let fix each thread use 800 agtr */ 742 | if (!max_agtr_size_per_thread) 743 | max_agtr_size_per_thread = 100; 744 | printf("max_agtr_size_per_thread: %d\n", max_agtr_size_per_thread); 745 | threadInfoQueue = new ThreadInfo*[num_thread]; 746 | dmaContextQueue = new DMAcontext*[num_thread]; 747 | weightQueue = new uint64_t[num_thread](); 748 | threadQueue = new std::thread*[num_thread]; 749 | jobQueue = new std::queue[num_thread]; 750 | pendingQueue = new std::queue[num_thread]; 751 | window_manager = new WindowManager[num_thread]; 752 | hash_map = new uint16_t[num_thread * max_agtr_size_per_thread]; 753 | 754 | printf("Genereating random numbers...\n"); 755 | for (int i = 0; i < num_thread; i++) 756 | for (int j = 0; j < 1024000; j++) 757 | random_array[i][j] = rand() % 10000; 758 | 759 | // Start from zero 760 | for (int i = 0; i < num_thread * max_agtr_size_per_thread; i++) 761 | hash_map[i] = hash_by(i, appID); 762 | 763 | struct ibv_device **dev_list; 764 | struct ibv_device *ib_dev; 765 | dev_list = ibv_get_device_list(NULL); 766 | if (!dev_list) { 767 | perror("Failed to get devices list"); 768 | exit(1); 769 | } 770 | 771 | ib_dev = dev_list[1]; 772 | if (!ib_dev) { 773 | fprintf(stderr, "IB device not found\n"); 774 | exit(1); 775 | } 776 | 777 | for (int i = 0; i < num_thread; i++) { 778 | threadInfoQueue[i] = new ThreadInfo { 779 | .thread_id = i, 780 | .agtr_start_pos = max_agtr_size_per_thread * i, 781 | }; 782 | dmaContextQueue[i] = DMA_create(ib_dev, max_agtr_size_per_thread, i); 783 | threadQueue[i] = new std::thread(Start, i); 784 | window_manager[i].isACKed = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 785 | dmaContextQueue[i]->isMarkTimeStamp = false; 786 | dmaContextQueue[i]->isSent = new bool[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 787 | dmaContextQueue[i]->send_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 788 | dmaContextQueue[i]->receive_time = new std::chrono::high_resolution_clock::time_point[MAX_TENSOR_SIZE / MAX_ENTRIES_PER_PACKET + 1]; 789 | } 790 | 791 | printf("using: %s\n", ibv_get_device_name(ib_dev)); 792 | 793 | } 794 | 795 | void P4mlManager::Start(int thread_id) { 796 | bindingCPU(thread_id + 16); 797 | ThreadInfo *thread_info = threadInfoQueue[thread_id]; 798 | DMAcontext *dma_context = dmaContextQueue[thread_id]; 799 | int agtr_size = max_agtr_size_per_thread; 800 | int my_id = thread_id; 801 | int agtr_start_pos = thread_info->agtr_start_pos; 802 | char *send_region = (char *)dma_context->send_region; 803 | 804 | while (1) { 805 | if (!jobQueue[thread_id].empty()) { 806 | Job* job = jobQueue[thread_id].front(); 807 | uint64_t key = job->key; 808 | int32_t* data = job->data; 809 | uint32_t tensor_len = job->len; 810 | // fprintf(stderr, "%lld: thread_id=%d, tensor_len=%d, agg_size=%d\n", key, thread_id, tensor_len, agtr_size); 811 | 812 | int total_packet = ceil((float)tensor_len / MAX_ENTRIES_PER_PACKET); 813 | window_manager[thread_id].Reset(total_packet); 814 | memset(dma_context->isSent, 0, sizeof(bool) * window_manager[my_id].total_ACK + 1); 815 | 816 | for (int i = 0; i < tensor_len; i++) 817 | data[i] = htonl(data[i]); 818 | 819 | // SEQ number start from 1 820 | uint16_t seq_num = 0; 821 | 822 | int num_first_time_sending; 823 | if (max_agtr_size_per_thread * MAX_ENTRIES_PER_PACKET > tensor_len) 824 | num_first_time_sending = ceil((float)tensor_len / MAX_ENTRIES_PER_PACKET); 825 | else 826 | num_first_time_sending = max_agtr_size_per_thread; 827 | 828 | // the first round sending 829 | for (int i = 0; i < num_first_time_sending; i++) { 830 | // usleep(rand() % 10 + 1); 831 | seq_num++; 832 | int offset = (seq_num - 1) * MAX_ENTRIES_PER_PACKET; 833 | uint16_t switch_agtr_pos = hash_map[agtr_start_pos + i]; 834 | // This thread have first time sending 835 | if (seq_num <= total_packet) { 836 | 837 | bool ForceForward = false; 838 | if (isForceForward) { 839 | if (forwardRate == 0.75) { 840 | if (!(seq_num % forwardFrequency == 0)) { 841 | ForceForward = true; 842 | } 843 | } else { 844 | if (seq_num % forwardFrequency == 0) { 845 | ForceForward = true; 846 | } 847 | } 848 | } 849 | 850 | // set Terminated if last packet 851 | if (seq_num != total_packet) { 852 | // for packet which integer < 32 853 | if (offset + MAX_ENTRIES_PER_PACKET > tensor_len) { 854 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 855 | memcpy(tmp, data + offset, sizeof(int32_t) * (tensor_len % MAX_ENTRIES_PER_PACKET)); 856 | make_p4ml_layer_and_copy_to(send_region + P4ML_LAYER_SIZE * i, &key, &tensor_len, &host, &num_worker, &appID, &switch_agtr_pos, &seq_num, tmp, false, ForceForward); 857 | } else { 858 | make_p4ml_layer_and_copy_to(send_region + P4ML_LAYER_SIZE * i, &key, &tensor_len, &host, &num_worker, &appID, &switch_agtr_pos, &seq_num, data + offset, false, ForceForward); 859 | } 860 | } else { 861 | // for packet which integer < 32 862 | if (offset + MAX_ENTRIES_PER_PACKET > tensor_len) { 863 | int32_t *tmp = new int32_t[MAX_ENTRIES_PER_PACKET](); 864 | memcpy(tmp, data + offset, sizeof(int32_t) * (tensor_len % MAX_ENTRIES_PER_PACKET)); 865 | make_p4ml_layer_and_copy_to(send_region + P4ML_LAYER_SIZE * i, &key, &tensor_len, &host, &num_worker, &appID, &switch_agtr_pos, &seq_num, tmp, true, ForceForward); 866 | } else { 867 | make_p4ml_layer_and_copy_to(send_region + P4ML_LAYER_SIZE * i, &key, &tensor_len, &host, &num_worker, &appID, &switch_agtr_pos, &seq_num, data + offset, true, ForceForward); 868 | } 869 | } 870 | } else { 871 | } 872 | } 873 | 874 | for(int j = 0;j < num_first_time_sending; j++){ 875 | send_packet(dma_context, P4ML_LAYER_SIZE, j); 876 | } 877 | 878 | // send_packet(dma_context, P4ML_LAYER_SIZE * num_first_time_sending, 0); 879 | 880 | main_receive_packet_loop(dma_context, data, my_id); 881 | 882 | // for (int i = 0; i < tensor_len / 32; i++) { 883 | // printf("%d: ", i); 884 | // for (int j = 0; j < 32; j++) { 885 | // printf("%d ", data[i*32 + j]); 886 | // } 887 | // printf("\n"); 888 | // } 889 | 890 | /* For Per Packet */ 891 | // if (thread_id == 0) { 892 | // // printf("######################## %d ##########################\n", loop_times[my_id]); 893 | // // // printf("Last 32 int: \n"); 894 | // // // // for (int i = tensor_len - 32; i < tensor_len; i++) 895 | // for (int i = 0; i < tensor_len; i++) 896 | // printf("%d ", data[i]); 897 | // // // printf("\n"); 898 | // double total_time = 0.0; 899 | // double time[32001] = {0.0}; 900 | // double min_value = 100000.0; 901 | // if (thread_id == 0) { 902 | // for (int i = 1; i <= total_packet; i++) { 903 | // std::chrono::duration time_span = std::chrono::duration_cast>(dma_context->receive_time[i] - dma_context->send_time[i]); 904 | // time[i] = time_span.count(); 905 | // printf("%d: %lf\n", i, time[i]); 906 | // total_time += time[i]; 907 | // if (time[i] < min_value) 908 | // min_value = time[i]; 909 | // // if (isLoss[my_id][i]) 910 | // // printf(" [Loss Packet]\n"); 911 | // // else 912 | // // printf("\n"); 913 | // // // printf("totaltime: %lf\n", total_time); 914 | // } 915 | // std::sort(time+1, time+32001); 916 | // // // printf("\n###################################################\n"); 917 | // // // for (int i = 1; i <= total_packet; i++) { 918 | // // // printf("%lf ", time[i]); 919 | // // // } 920 | // // // printf("\n"); 921 | // mean[loop_times[my_id]] = total_time/32000.0; 922 | // median[loop_times[my_id]] = time[16000]; 923 | // printf("mean: %lf, median: %lf, min_value: %lf\n", total_time/32000.0, time[16000], min_value); 924 | // } 925 | // if (loop_times[my_id] == 1000) 926 | // printf("median: %lf\n", median[500]); 927 | // } 928 | 929 | /* For Throughput */ 930 | // if (thread_id == 0) { 931 | // printf("######################## %d ##########################\n", loop_times[my_id]); 932 | // // // printf("Last 32 int: \n"); 933 | // // // // for (int i = tensor_len - 32; i < tensor_len; i++) 934 | // // // for (int i = 0; i < tensor_len; i++) 935 | // // // printf("%d ", data[i]); 936 | // // // printf("\n"); 937 | // double total_time = 0.0; 938 | // double time[32001] = {0.0}; 939 | // for (int i = 1; i <= total_packet; i++) { 940 | // std::chrono::duration time_span = std::chrono::duration_cast>(window_manager[my_id].receive_time[i] - start_time); 941 | // time[i] = time_span.count(); 942 | // printf("%d: %lf, Throughput: %lf", i, time[i], ((float)P4ML_PACKET_SIZE * i / 1024 / 1024 / 1024 * 8) / time[i]); 943 | // total_time += time[i]; 944 | // if (isLoss[my_id][i]) 945 | // printf(" [Loss Packet]\n"); 946 | // else 947 | // printf("\n"); 948 | // // printf("totaltime: %lf\n", total_time); 949 | // } 950 | // std::sort(time+1, time+32001); 951 | // printf("\n###################################################\n"); 952 | // // for (int i = 1; i <= total_packet; i++) { 953 | // // printf("%lf ", time[i]); 954 | // // } 955 | // printf("\n"); 956 | // printf("mean: %lf, median: %lf\n", total_time/32000.0, time[16000]); 957 | // } 958 | 959 | jobQueue[thread_id].pop(); 960 | { 961 | std::lock_guard lock(_queuePush_mutex); 962 | // printf("%d to Finish Queue\n", key); 963 | finishQueue.push(key); 964 | } 965 | } 966 | usleep(1); 967 | } 968 | 969 | } 970 | -------------------------------------------------------------------------------- /client/p4ml_manager.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef P4ML_MANAGER_H 3 | #define P4ML_MANAGER_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "../common/utils.h" 23 | #include "../common/dma_common.h" 24 | #include "../common/packet.h" 25 | #include "../common/window_manager.h" 26 | 27 | #define P4ML_KEY_TOTAL 500000 28 | #define MAX_TENSOR_SIZE 1024000 29 | 30 | struct ThreadInfo { 31 | int thread_id; 32 | int agtr_start_pos; 33 | }; 34 | 35 | struct Job { 36 | uint64_t key; 37 | int32_t* data; 38 | uint32_t len; 39 | int cmd; 40 | }; 41 | 42 | struct PacketObject { 43 | agghdr* p4ml_header; 44 | }; 45 | 46 | class P4mlManager { 47 | public: 48 | 49 | P4mlManager(uint32_t host, int num_worker, int appID); 50 | // ~P4mlManager(); 51 | 52 | void init_threadPool(int num_thread); 53 | void PushPull(uint64_t key, char* data, int len, int cmd); 54 | static void Start(int thread_id); 55 | 56 | uint64_t GetNewKey(); 57 | int64_t GetFinishKey(); 58 | void SetForceForward(float forward_rate); 59 | void SetMaxAgtrSizePerThread(int max_agtr_size_per_thread); 60 | 61 | private: 62 | static uint32_t host; 63 | static uint8_t num_worker; 64 | static uint16_t appID; 65 | static uint64_t p4mlKey; 66 | static int max_agtr_size_per_thread; 67 | static int _num_thread; 68 | static std::chrono::time_point start; 69 | static ThreadInfo** threadInfoQueue; 70 | static DMAcontext** dmaContextQueue; 71 | static std::thread** threadQueue; 72 | static std::queue *jobQueue; 73 | static WindowManager* window_manager; 74 | static std::queue finishQueue; 75 | static std::queue *pendingQueue; 76 | static uint64_t* weightQueue; 77 | 78 | static uint16_t* hash_map; 79 | 80 | static bool isForceForward; 81 | static int forwardFrequency; 82 | static float forwardRate; 83 | 84 | static std::mutex Resource_mutex; 85 | static std::mutex _P4MLKey_mutex; 86 | static std::mutex _print_mutex; 87 | static std::mutex _queuePush_mutex; 88 | 89 | static void main_receive_packet_loop(DMAcontext* dma_context, int32_t* data, int my_id); 90 | static void updateModel(agghdr* p4ml_header, int32_t* data); 91 | }; 92 | 93 | #endif //P4ML_MANAGER_H -------------------------------------------------------------------------------- /common/Rogue.cc: -------------------------------------------------------------------------------- 1 | #include "Rogue.h" 2 | #define INIT_SS_THRESH 100 * P4ML_PACKET_SIZE 3 | #define BASE_RTT (6) 4 | 5 | #define MAX_BYTES 100 * P4ML_PACKET_SIZE 6 | 7 | static int alpha = 2; 8 | static int beta = 4; 9 | static int gamma_vegas = 1; 10 | 11 | Rogue::Rogue(int init_window) 12 | { 13 | baseRTT = 0x7fffffff; 14 | minRTT = 0x7fffffff; 15 | ss_thresh = INIT_SS_THRESH; 16 | cwnd_bytes = init_window; 17 | update_state = 0; 18 | } 19 | 20 | void Rogue::packets_acked(int sample_rtt) 21 | { 22 | if (sample_rtt <= BASE_RTT) 23 | sample_rtt = BASE_RTT; 24 | 25 | /* Filter to find propagation delay: */ 26 | baseRTT = GET_MIN(sample_rtt, baseRTT); 27 | 28 | minRTT = sample_rtt; //GET_MIN(minRTT, sample_rtt); 29 | } 30 | 31 | int Rogue::tcp_in_slow_start() 32 | { 33 | if (cwnd_bytes < ss_thresh) 34 | return 1; 35 | else 36 | return 0; 37 | } 38 | 39 | int Rogue::tcp_vegas_ssthresh() 40 | { 41 | return GET_MIN(ss_thresh, cwnd_bytes); 42 | } 43 | 44 | int Rogue::cc_avoid() 45 | { 46 | uint32_t rtt; 47 | uint64_t target_cwnd; 48 | rtt = minRTT; 49 | target_cwnd = (uint64_t)cwnd_bytes * baseRTT / rtt; 50 | diff = (cwnd_bytes) / P4ML_PACKET_SIZE * (rtt - baseRTT) / baseRTT; 51 | 52 | if (diff > gamma_vegas && tcp_in_slow_start()) { 53 | cwnd_bytes = GET_MIN(cwnd_bytes, (uint64_t)(target_cwnd + P4ML_PACKET_SIZE)); 54 | ss_thresh = tcp_vegas_ssthresh(); 55 | update_state = 0; 56 | 57 | } else if (rtt > 5000) { 58 | cwnd_bytes = P4ML_PACKET_SIZE; 59 | update_state = 9; 60 | 61 | } else if (tcp_in_slow_start()) { 62 | cwnd_bytes += cwnd_bytes; 63 | update_state = 1; 64 | 65 | } else if (rtt < 30) { 66 | cwnd_bytes += GET_MAX(P4ML_PACKET_SIZE, cwnd_bytes); 67 | update_state = 8; 68 | 69 | } else if (diff > beta) { 70 | cwnd_bytes = cwnd_bytes / 2; 71 | ss_thresh = tcp_vegas_ssthresh(); 72 | update_state = 2; 73 | 74 | } else if (diff < alpha) { 75 | cwnd_bytes += P4ML_PACKET_SIZE; 76 | update_state = 4; 77 | 78 | } else { 79 | update_state = 5; 80 | } 81 | 82 | if (cwnd_bytes < P4ML_PACKET_SIZE) 83 | cwnd_bytes = P4ML_PACKET_SIZE; 84 | if (cwnd_bytes > 100 * P4ML_PACKET_SIZE) 85 | cwnd_bytes = 100 * P4ML_PACKET_SIZE; 86 | if (cwnd_bytes > P4ML_PACKET_SIZE) 87 | cwnd_bytes = (cwnd_bytes / P4ML_PACKET_SIZE) * P4ML_PACKET_SIZE; 88 | return cwnd_bytes / P4ML_PACKET_SIZE; 89 | } 90 | 91 | /* Delete these function will lead to 6Gbps decrease... */ 92 | int Rogue:: tcp_current_ssthresh() 93 | { 94 | return GET_MAX(ss_thresh, 95 | ((cwnd_bytes >> 1) + 96 | (cwnd_bytes >> 2))); 97 | } 98 | void Rogue::PrintStats() 99 | { 100 | cout<<" cwnd_bytes=" << cwnd_bytes <<" minRTT="<b?a:b) 19 | 20 | class Rogue{ 21 | 22 | public: 23 | Rogue(int init_window); 24 | int cc_avoid(); 25 | void packets_acked(int sample_rtt); 26 | int tcp_in_slow_start(); 27 | uint64_t GetCwndBytes(){ return cwnd_bytes;} 28 | void PrintStats(); 29 | int tcp_current_ssthresh(); 30 | int tcp_vegas_ssthresh(); 31 | int no_packet_drop_cnt; 32 | int adjustWindow(bool isECN); 33 | private: 34 | uint16_t cntRTT; /* # of RTTs measured within last RTT */ 35 | uint32_t minRTT; /* min of RTTs measured within last RTT (in usec) */ 36 | uint32_t baseRTT; /* the min of all Vegas RTT measurements seen (in usec) */ 37 | uint32_t baseRTT_update_pacing; 38 | uint64_t cwnd_bytes; 39 | uint32_t ss_thresh; 40 | int diff; 41 | int update_state; 42 | int uid; 43 | }; 44 | 45 | #endif -------------------------------------------------------------------------------- /common/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | template ThreadPool(size_t, F callback); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | template 35 | inline ThreadPool::ThreadPool(size_t threads, F callback) 36 | : stop(false) 37 | { 38 | for(size_t i = 0;i task; 45 | 46 | { 47 | std::unique_lock lock(this->queue_mutex); 48 | this->condition.wait(lock, 49 | [this]{ return this->stop || !this->tasks.empty(); }); 50 | if(this->stop && this->tasks.empty()) 51 | return; 52 | task = std::move(this->tasks.front()); 53 | this->tasks.pop(); 54 | } 55 | 56 | task(); 57 | callback(); 58 | } 59 | } 60 | ); 61 | } 62 | 63 | // add new work item to the pool 64 | template 65 | auto ThreadPool::enqueue(F&& f, Args&&... args) 66 | -> std::future::type> 67 | { 68 | using return_type = typename std::result_of::type; 69 | 70 | auto task = std::make_shared< std::packaged_task >( 71 | std::bind(std::forward(f), std::forward(args)...) 72 | ); 73 | 74 | std::future res = task->get_future(); 75 | { 76 | std::unique_lock lock(queue_mutex); 77 | 78 | // don't allow enqueueing after stopping the pool 79 | if(stop) 80 | throw std::runtime_error("enqueue on stopped ThreadPool"); 81 | 82 | tasks.emplace([task](){ (*task)(); }); 83 | } 84 | condition.notify_one(); 85 | return res; 86 | } 87 | 88 | // the destructor joins all threads 89 | inline ThreadPool::~ThreadPool() 90 | { 91 | { 92 | std::unique_lock lock(queue_mutex); 93 | stop = true; 94 | } 95 | condition.notify_all(); 96 | for(std::thread &worker: workers) 97 | worker.join(); 98 | } 99 | 100 | #endif 101 | -------------------------------------------------------------------------------- /common/dma_common.cc: -------------------------------------------------------------------------------- 1 | #define __USE_GNU 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "dma_common.h" 16 | 17 | std::mutex ___print_mutex; 18 | int counter = 0; 19 | int my_send_queue_length = 0; 20 | int my_recv_queue_length = 0; 21 | 22 | DMAcontext* DMA_create(ibv_device* ib_dev, int queue_length, int thread_id) 23 | { 24 | my_send_queue_length = 2048; 25 | my_recv_queue_length = my_send_queue_length * 8; 26 | 27 | ibv_context* context = ibv_open_device(ib_dev); 28 | if (!context) { 29 | fprintf(stderr, "Couldn't get context for %s\n", 30 | ibv_get_device_name(ib_dev)); 31 | exit(1); 32 | } 33 | ibv_pd* pd = ibv_alloc_pd(context); 34 | if (!pd) { 35 | fprintf(stderr, "Couldn't allocate PD\n"); 36 | exit(1); 37 | } 38 | 39 | struct ibv_cq* rec_cq = ibv_create_cq(context, my_recv_queue_length + 1, NULL, NULL, 0); 40 | if (!rec_cq) { 41 | fprintf(stderr, "Couldn't create CQ %d\n", errno); 42 | exit(1); 43 | } 44 | 45 | struct ibv_cq* snd_cq = ibv_create_cq(context, my_send_queue_length + 1, NULL, NULL, 0); 46 | if (!snd_cq) { 47 | fprintf(stderr, "Couldn't create CQ %d\n", errno); 48 | exit(1); 49 | } 50 | 51 | struct ibv_qp* qp; 52 | struct ibv_exp_qp_init_attr* qp_init_attr = (struct ibv_exp_qp_init_attr*)malloc(sizeof(struct ibv_exp_qp_init_attr)); 53 | 54 | memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 55 | qp_init_attr->comp_mask = IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER | IBV_EXP_QP_INIT_ATTR_INL_RECV; 56 | qp_init_attr->send_cq = snd_cq; 57 | qp_init_attr->recv_cq = rec_cq; 58 | qp_init_attr->qp_type = IBV_QPT_RAW_PACKET; 59 | 60 | qp_init_attr->pd = pd; 61 | qp_init_attr->cap.max_send_wr = my_send_queue_length + 1; 62 | qp_init_attr->cap.max_recv_wr = my_recv_queue_length + 1; 63 | qp_init_attr->cap.max_inline_data = 512; 64 | qp_init_attr->cap.max_send_sge = 1; 65 | qp_init_attr->cap.max_recv_sge = 1; 66 | qp_init_attr->max_tso_header = IP_ETH_UDP_HEADER_SIZE; 67 | qp_init_attr->max_inl_recv = 512; 68 | 69 | qp = ibv_exp_create_qp(context, qp_init_attr); 70 | //qp = ibv_create_qp(pd, qp_init_attr); 71 | if (!qp) { 72 | fprintf(stderr, "Couldn't create RSS QP\n"); 73 | exit(1); 74 | } 75 | 76 | struct ibv_qp_attr qp_attr; 77 | int qp_flags; 78 | int ret; 79 | memset(&qp_attr, 0, sizeof(qp_attr)); 80 | qp_flags = IBV_QP_STATE | IBV_QP_PORT; 81 | qp_attr.qp_state = IBV_QPS_INIT; 82 | qp_attr.port_num = 1; 83 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 84 | if (ret < 0) { 85 | fprintf(stderr, "failed modify qp to init\n"); 86 | exit(1); 87 | } 88 | memset(&qp_attr, 0, sizeof(qp_attr)); 89 | 90 | /* a. Move ring state to ready to receive, this is needed to be able to move ring to ready to send even if receive queue is not enabled */ 91 | 92 | qp_flags = IBV_QP_STATE; 93 | qp_attr.qp_state = IBV_QPS_RTR; 94 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 95 | if (ret < 0) { 96 | fprintf(stderr, "failed modify qp to receive\n"); 97 | exit(1); 98 | } 99 | 100 | /* b. Move the ring to ready to send */ 101 | 102 | qp_flags = IBV_QP_STATE; 103 | qp_attr.qp_state = IBV_QPS_RTS; 104 | ret = ibv_modify_qp(qp, &qp_attr, qp_flags); 105 | if (ret < 0) { 106 | fprintf(stderr, "failed modify qp to send\n"); 107 | exit(1); 108 | } 109 | 110 | int send_buf_size = P4ML_PACKET_SIZE * my_recv_queue_length; 111 | 112 | void* send_buf; 113 | 114 | //send_buf = malloc(send_buf_size); 115 | send_buf = alloc_raw_pages(send_buf_size / EACH_HUGEPAGE_SIZE + 1, EACH_HUGEPAGE_SIZE); 116 | if (!send_buf) { 117 | fprintf(stderr, "Coudln't allocate send memory\n"); 118 | exit(1); 119 | } 120 | 121 | struct ibv_mr* send_mr; 122 | send_mr = ibv_reg_mr(pd, send_buf, send_buf_size, IBV_ACCESS_LOCAL_WRITE); 123 | if (!send_mr) { 124 | fprintf(stderr, "Couldn't register recv mr\n"); 125 | exit(1); 126 | } 127 | 128 | // Init CQ. Its size MUST be one so that we get two CQEs in mlx5. 129 | struct ibv_exp_cq_init_attr cq_init_attr; 130 | memset(&cq_init_attr, 0, sizeof(cq_init_attr)); 131 | struct ibv_cq* mp_recv_cq = ibv_exp_create_cq(context, kAppRecvCQDepth / 2, nullptr, nullptr, 0, &cq_init_attr); 132 | assert(mp_recv_cq != nullptr); 133 | 134 | // Modify the RECV CQ to ignore overrun 135 | struct ibv_exp_cq_attr cq_attr; 136 | memset(&cq_attr, 0, sizeof(cq_attr)); 137 | cq_attr.comp_mask = IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS; 138 | cq_attr.cq_cap_flags = IBV_EXP_CQ_IGNORE_OVERRUN; 139 | rt_assert(ibv_exp_modify_cq(mp_recv_cq, &cq_attr, IBV_EXP_CQ_CAP_FLAGS) == 0); 140 | 141 | struct ibv_exp_wq_init_attr wq_init_attr; 142 | memset(&wq_init_attr, 0, sizeof(wq_init_attr)); 143 | 144 | wq_init_attr.wq_type = IBV_EXP_WQT_RQ; 145 | wq_init_attr.max_recv_wr = kAppRQDepth; 146 | wq_init_attr.max_recv_sge = 1; 147 | wq_init_attr.pd = pd; 148 | wq_init_attr.cq = mp_recv_cq; 149 | 150 | wq_init_attr.comp_mask |= IBV_EXP_CREATE_WQ_MP_RQ; 151 | wq_init_attr.mp_rq.use_shift = IBV_EXP_MP_RQ_NO_SHIFT; 152 | wq_init_attr.mp_rq.single_wqe_log_num_of_strides = kAppLogNumStrides; 153 | wq_init_attr.mp_rq.single_stride_log_num_of_bytes = kAppLogStrideBytes; 154 | struct ibv_exp_wq* mp_wq = ibv_exp_create_wq(context, &wq_init_attr); 155 | assert(mp_wq != nullptr); 156 | 157 | // Change WQ to ready state 158 | struct ibv_exp_wq_attr wq_attr; 159 | memset(&wq_attr, 0, sizeof(wq_attr)); 160 | wq_attr.attr_mask = IBV_EXP_WQ_ATTR_STATE; 161 | wq_attr.wq_state = IBV_EXP_WQS_RDY; 162 | rt_assert(ibv_exp_modify_wq(mp_wq, &wq_attr) == 0); 163 | 164 | // Get the RQ burst function 165 | enum ibv_exp_query_intf_status intf_status = IBV_EXP_INTF_STAT_OK; 166 | struct ibv_exp_query_intf_params query_intf_params; 167 | memset(&query_intf_params, 0, sizeof(query_intf_params)); 168 | query_intf_params.intf_scope = IBV_EXP_INTF_GLOBAL; 169 | query_intf_params.intf = IBV_EXP_INTF_WQ; 170 | query_intf_params.obj = mp_wq; 171 | struct ibv_exp_wq_family* mp_wq_family = reinterpret_cast( 172 | ibv_exp_query_intf(context, &query_intf_params, &intf_status)); 173 | assert(mp_wq_family != nullptr); 174 | 175 | // Create indirect table 176 | struct ibv_exp_rwq_ind_table_init_attr rwq_ind_table_init_attr; 177 | memset(&rwq_ind_table_init_attr, 0, sizeof(rwq_ind_table_init_attr)); 178 | rwq_ind_table_init_attr.pd = pd; 179 | rwq_ind_table_init_attr.log_ind_tbl_size = 0; // Ignore hash 180 | rwq_ind_table_init_attr.ind_tbl = &mp_wq; // Pointer to RECV work queue 181 | rwq_ind_table_init_attr.comp_mask = 0; 182 | struct ibv_exp_rwq_ind_table* mp_ind_tbl = ibv_exp_create_rwq_ind_table(context, &rwq_ind_table_init_attr); 183 | assert(mp_ind_tbl != nullptr); 184 | 185 | // Create rx_hash_conf and indirection table for the QP 186 | uint8_t toeplitz_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 187 | 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 188 | 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 189 | 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 190 | 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; 191 | const int TOEPLITZ_RX_HASH_KEY_LEN = sizeof(toeplitz_key) / sizeof(toeplitz_key[0]); 192 | 193 | struct ibv_exp_rx_hash_conf rx_hash_conf; 194 | memset(&rx_hash_conf, 0, sizeof(rx_hash_conf)); 195 | rx_hash_conf.rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ; 196 | rx_hash_conf.rx_hash_key_len = TOEPLITZ_RX_HASH_KEY_LEN; 197 | rx_hash_conf.rx_hash_key = toeplitz_key; 198 | rx_hash_conf.rx_hash_fields_mask = IBV_EXP_RX_HASH_DST_PORT_UDP; 199 | rx_hash_conf.rwq_ind_tbl = mp_ind_tbl; 200 | 201 | struct ibv_exp_qp_init_attr mp_qp_init_attr; 202 | memset(&mp_qp_init_attr, 0, sizeof(mp_qp_init_attr)); 203 | mp_qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_RX_HASH; 204 | mp_qp_init_attr.rx_hash_conf = &rx_hash_conf; 205 | mp_qp_init_attr.pd = pd; 206 | mp_qp_init_attr.qp_type = IBV_QPT_RAW_PACKET; 207 | 208 | // Create the QP 209 | struct ibv_qp* mp_recv_qp = ibv_exp_create_qp(context, &mp_qp_init_attr); 210 | assert(mp_recv_qp != nullptr); 211 | 212 | size_t tx_ring_size = P4ML_LAYER_SIZE * kAppMaxPostlist; 213 | uint8_t* mp_send_ring = (uint8_t*)malloc(tx_ring_size); 214 | rt_assert(mp_send_ring != nullptr); 215 | memset(mp_send_ring, 0, tx_ring_size); 216 | 217 | struct ibv_mr* mp_send_mr = ibv_reg_mr(pd, mp_send_ring, tx_ring_size, IBV_ACCESS_LOCAL_WRITE); 218 | rt_assert(mp_send_mr != nullptr); 219 | 220 | // Register RX ring memory 221 | uint8_t* mp_recv_ring = (uint8_t*)malloc(kAppRingSize); 222 | rt_assert(mp_recv_ring != nullptr); 223 | memset(mp_recv_ring, 0, kAppRingSize); 224 | 225 | struct ibv_mr* mp_mr = ibv_reg_mr(pd, mp_recv_ring, kAppRingSize, IBV_ACCESS_LOCAL_WRITE); 226 | rt_assert(mp_mr != nullptr); 227 | ///////////////////////////////////////////////////////////////////////////////////// 228 | // install_flow_rule(mp_recv_qp, 30720 + thread_id); 229 | install_flow_rule(mp_recv_qp, thread_id); 230 | // This cast works for mlx5 where ibv_cq is the first member of mlx5_cq. 231 | auto* _mlx5_cq = reinterpret_cast(mp_recv_cq); 232 | rt_assert(kAppRecvCQDepth == std::pow(2, _mlx5_cq->cq_log_size)); 233 | rt_assert(_mlx5_cq->buf_a.buf != nullptr); 234 | 235 | auto* mp_cqe_arr = reinterpret_cast(_mlx5_cq->buf_a.buf); 236 | 237 | // Initialize the CQEs as if we received the last (kAppRecvCQDepth) packets 238 | // in the CQE cycle. 239 | static_assert(kAppStridesPerWQE >= kAppRecvCQDepth, ""); 240 | for (size_t i = 0; i < kAppRecvCQDepth; i++) { 241 | mp_cqe_arr[i].wqe_id = htons(std::numeric_limits::max()); 242 | // Last CQE gets 243 | // * wqe_counter = (kAppStridesPerWQE - 1) 244 | // * snapshot_cycle_idx = (kAppCQESnapshotCycle - 1) 245 | mp_cqe_arr[i].wqe_counter = htons(kAppStridesPerWQE - (kAppRecvCQDepth - i)); 246 | 247 | cqe_snapshot_t snapshot; 248 | snapshot_cqe(&mp_cqe_arr[i], snapshot); 249 | rt_assert(snapshot.get_cqe_snapshot_cycle_idx() == kAppCQESnapshotCycle - (kAppRecvCQDepth - i)); 250 | } 251 | 252 | // The multi-packet RECVs. This must be done after we've initialized the CQE. 253 | struct ibv_sge* mp_sge = reinterpret_cast(malloc(sizeof(struct ibv_sge) * kAppRQDepth)); 254 | fprintf(stderr, "kAppRingMbufSize=%lu, kAppStridesPerWQE=%lu, kAppRingSize=%lu, kAppRQDepth=%lu\n", kAppRingMbufSize, kAppStridesPerWQE, kAppRingSize, kAppRQDepth); 255 | for (size_t i = 0; i < kAppRQDepth; i++) { 256 | size_t mpwqe_offset = i * (kAppRingMbufSize * kAppStridesPerWQE); 257 | mp_sge[i].addr = reinterpret_cast(&mp_recv_ring[mpwqe_offset]); 258 | mp_sge[i].lkey = mp_mr->lkey; 259 | mp_sge[i].length = kAppRingMbufSize * kAppStridesPerWQE; //kAppRingSize; 260 | mp_wq_family->recv_burst(mp_wq, &mp_sge[i], 1); 261 | } 262 | 263 | printf("[Thread %d] Finish created QP with length: %d\n", thread_id, queue_length); 264 | 265 | auto* cqe_arr = mp_cqe_arr; 266 | cqe_snapshot_t prev_snapshot; 267 | snapshot_cqe(&cqe_arr[kAppRecvCQDepth - 1], prev_snapshot); 268 | 269 | return new DMAcontext{ 270 | .pd = pd, 271 | .ctx = context, 272 | .receive_cq = rec_cq, 273 | .send_cq = snd_cq, 274 | .send_mr = send_mr, 275 | .send_region = send_buf, 276 | .data_qp = qp, 277 | 278 | .mp_recv_qp = mp_recv_qp, 279 | .mp_recv_cq = mp_recv_cq, 280 | .mp_wq = mp_wq, 281 | .mp_wq_family = mp_wq_family, 282 | .mp_ind_tbl = mp_ind_tbl, 283 | .mp_cqe_arr = mp_cqe_arr, 284 | .mp_sge = mp_sge, 285 | .mp_recv_ring = mp_recv_ring, 286 | .mp_send_ring = mp_send_ring, 287 | .mp_send_mr = mp_send_mr, 288 | 289 | .id = thread_id, 290 | .total_received = 0, 291 | .total_sent = 0, 292 | .my_send_queue_length = my_send_queue_length, 293 | .my_recv_queue_length = my_recv_queue_length, 294 | 295 | .ring_head = 0, 296 | .nb_rx_rolling = 0, 297 | .sge_idx = 0, 298 | .cqe_idx = 0, 299 | .prev_snapshot = prev_snapshot, 300 | .isMarkTimeStamp = false, 301 | }; 302 | } 303 | 304 | void send_packet(DMAcontext* dma_context, int chunk_size, uint64_t offset) 305 | { 306 | int ret; 307 | 308 | struct ibv_sge sg; 309 | struct ibv_exp_send_wr wr, *bad_wr; 310 | // struct ibv_send_wr wr; 311 | // struct ibv_send_wr *bad_wr; 312 | 313 | memset(&sg, 0, sizeof(sg)); 314 | sg.addr = (uintptr_t)((char*)dma_context->send_region + offset * P4ML_LAYER_SIZE); 315 | // printf("%d\n", sg.addr); 316 | sg.length = chunk_size; 317 | sg.lkey = dma_context->send_mr->lkey; 318 | 319 | counter++; 320 | 321 | memset(&wr, 0, sizeof(wr)); 322 | wr.wr_id = 0; 323 | wr.sg_list = &sg; 324 | wr.num_sge = 1; 325 | // wr.opcode = IBV_WR_SEND; 326 | wr.exp_opcode = IBV_EXP_WR_TSO; 327 | wr.tso.mss = P4ML_LAYER_SIZE; // Maximum Segment Size example 328 | wr.tso.hdr_sz = IP_ETH_UDP_HEADER_SIZE; // ETH/IPv4/TCP header example 329 | 330 | char hdr[IP_ETH_UDP_HEADER_SIZE]; // ETH/IPv4/TCP header example 331 | memcpy(hdr, IP_ETH_UDP_HEADER, IP_ETH_UDP_HEADER_SIZE); // Assuming that the header buffer was define before. 332 | 333 | hdr[5] = dma_context->id; 334 | // hdr[37] = dma_context->id; 335 | wr.tso.hdr = hdr; // There is no need to use malloc operation in this case, local definition of hdr is ok. 336 | //wr.exp_send_flags = IBV_SEND_INLINE; 337 | wr.exp_send_flags |= IBV_SEND_SIGNALED; 338 | 339 | 340 | // for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) { 341 | // agghdr* p4ml_header = (agghdr*)((char *)sg.addr + i * P4ML_LAYER_SIZE); 342 | // // if (ntohs(p4ml_header->seq_num) == 32000) { 343 | // // if (p4ml_header->flag & 2 && ntohs(p4ml_header->seq_num) != 32000) { 344 | // p4ml_header_print_h((agghdr*)((char *)sg.addr + i * P4ML_LAYER_SIZE), "SEND"); 345 | // // printf("p4ml_header->seq_num: %d\n", ntohs(p4ml_header->seq_num)); 346 | // // // exit(1); 347 | // // } 348 | // } 349 | // // } 350 | 351 | // mark first time sending timestamp 352 | if (dma_context->isMarkTimeStamp) { 353 | std::chrono::high_resolution_clock::time_point current_time = std::chrono::high_resolution_clock::now(); 354 | for (int i = 0; i < chunk_size / P4ML_LAYER_SIZE; i++) { 355 | agghdr* p4ml_header = (agghdr*)((char *)sg.addr + i * P4ML_LAYER_SIZE); 356 | if (!dma_context->isSent[ntohs(p4ml_header->seq_num)]) { 357 | dma_context->isSent[ntohs(p4ml_header->seq_num)] = true; 358 | dma_context->send_time[ntohs(p4ml_header->seq_num)] = current_time; 359 | } else { 360 | // printf("something wrong\n"); 361 | } 362 | } 363 | } 364 | 365 | // we dont need to wait cq cause received represent sent 366 | ret = ibv_exp_post_send(dma_context->data_qp, &wr, &bad_wr); 367 | if (ret < 0) { 368 | fprintf(stderr, "failed in post send\n"); 369 | exit(1); 370 | } 371 | 372 | struct ibv_wc wc_send_cq[POLLING_SIZE]; 373 | ibv_poll_cq(dma_context->send_cq, POLLING_SIZE, wc_send_cq); 374 | dma_context->total_sent += chunk_size / P4ML_LAYER_SIZE; 375 | } 376 | 377 | const char* ibv_wc_opcode_str(enum ibv_wc_opcode opcode) 378 | { 379 | switch (opcode) { 380 | case IBV_EXP_WC_SEND: 381 | return "IBV_WC_SEND"; 382 | case IBV_EXP_WC_RDMA_WRITE: 383 | return "IBV_WC_RDMA_WRITE"; 384 | case IBV_EXP_WC_RDMA_READ: 385 | return "IBV_WC_RDMA_READ"; 386 | case IBV_WC_COMP_SWAP: 387 | return "IBV_WC_COMP_SWAP"; 388 | case IBV_WC_FETCH_ADD: 389 | return "IBV_WC_FETCH_ADD"; 390 | case IBV_WC_BIND_MW: 391 | return "IBV_WC_BIND_MW"; 392 | /* receive-side: inbound completion */ 393 | case IBV_EXP_WC_RECV: 394 | return "IBV_WC_RECV"; 395 | case IBV_EXP_WC_RECV_RDMA_WITH_IMM: 396 | return "IBV_WC_RECV_RDMA_WITH_IMM"; 397 | default: 398 | return "IBV_WC_UNKNOWN"; 399 | } 400 | } 401 | 402 | // Install a flow rule 403 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id) 404 | { 405 | static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext); 406 | 407 | uint8_t* flow_rule = new uint8_t[rule_sz]; 408 | memset(flow_rule, 0, rule_sz); 409 | uint8_t* buf = flow_rule; 410 | 411 | auto* flow_attr = reinterpret_cast(flow_rule); 412 | flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL; 413 | flow_attr->size = rule_sz; 414 | flow_attr->priority = 0; 415 | flow_attr->num_of_specs = 1; 416 | flow_attr->port = 1; 417 | flow_attr->flags = 0; 418 | flow_attr->reserved = 0; 419 | buf += sizeof(struct ibv_exp_flow_attr); 420 | 421 | // Ethernet - all wildcard 422 | auto* eth_spec = reinterpret_cast(buf); 423 | eth_spec->type = IBV_EXP_FLOW_SPEC_ETH; 424 | eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth); 425 | buf += sizeof(struct ibv_exp_flow_spec_eth); 426 | 427 | const unsigned char R_SRC_MAC[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 428 | unsigned char R_DST_MAC[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 }; 429 | R_DST_MAC[5] = thread_id; 430 | const unsigned char R_SRC_MAC_MASK[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; 431 | const unsigned char R_DST_MAC_MASK[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 432 | memcpy(eth_spec->val.dst_mac, R_DST_MAC, sizeof(R_DST_MAC)); 433 | memcpy(eth_spec->val.src_mac, R_SRC_MAC, sizeof(R_SRC_MAC)); 434 | memcpy(eth_spec->mask.dst_mac, R_DST_MAC_MASK, sizeof(R_DST_MAC_MASK)); 435 | memcpy(eth_spec->mask.src_mac, R_SRC_MAC_MASK, sizeof(R_SRC_MAC_MASK)); 436 | eth_spec->val.vlan_tag = 0; 437 | eth_spec->mask.ether_type = 0; 438 | 439 | rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr); 440 | } 441 | 442 | 443 | 444 | // Install a UDP destination port--based flow rule 445 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port) 446 | { 447 | static constexpr size_t rule_sz = sizeof(ibv_exp_flow_attr) + sizeof(ibv_exp_flow_spec_eth) + sizeof(ibv_exp_flow_spec_ipv4_ext) + sizeof(ibv_exp_flow_spec_tcp_udp); 448 | 449 | uint8_t* flow_rule = new uint8_t[rule_sz]; 450 | memset(flow_rule, 0, rule_sz); 451 | uint8_t* buf = flow_rule; 452 | 453 | auto* flow_attr = reinterpret_cast(flow_rule); 454 | flow_attr->type = IBV_EXP_FLOW_ATTR_NORMAL; 455 | flow_attr->size = rule_sz; 456 | flow_attr->priority = 0; 457 | flow_attr->num_of_specs = 1; 458 | flow_attr->port = 1; 459 | flow_attr->flags = 0; 460 | flow_attr->reserved = 0; 461 | buf += sizeof(struct ibv_exp_flow_attr); 462 | 463 | // Ethernet - all wildcard 464 | auto* eth_spec = reinterpret_cast(buf); 465 | eth_spec->type = IBV_EXP_FLOW_SPEC_ETH; 466 | eth_spec->size = sizeof(struct ibv_exp_flow_spec_eth); 467 | buf += sizeof(struct ibv_exp_flow_spec_eth); 468 | 469 | // IPv4 - all wildcard 470 | auto* spec_ipv4 = reinterpret_cast(buf); 471 | spec_ipv4->type = IBV_EXP_FLOW_SPEC_IPV4_EXT; 472 | spec_ipv4->size = sizeof(struct ibv_exp_flow_spec_ipv4_ext); 473 | buf += sizeof(struct ibv_exp_flow_spec_ipv4_ext); 474 | 475 | // UDP - match dst port 476 | auto* udp_spec = reinterpret_cast(buf); 477 | udp_spec->type = IBV_EXP_FLOW_SPEC_UDP; 478 | udp_spec->size = sizeof(struct ibv_exp_flow_spec_tcp_udp); 479 | udp_spec->val.dst_port = htons(dst_port); 480 | udp_spec->mask.dst_port = 0xffffu; 481 | udp_spec->mask.dst_port = 0; 482 | 483 | rt_assert(ibv_exp_create_flow(qp, flow_attr) != nullptr); 484 | } 485 | 486 | 487 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot) 488 | { 489 | while (true) { 490 | uint16_t wqe_id_0 = cqe->wqe_id; 491 | uint16_t wqe_counter_0 = cqe->wqe_counter; 492 | memory_barrier(); 493 | uint16_t wqe_id_1 = cqe->wqe_id; 494 | 495 | if (likely(wqe_id_0 == wqe_id_1)) { 496 | cqe_snapshot.wqe_id = ntohs(wqe_id_0); 497 | cqe_snapshot.wqe_counter = ntohs(wqe_counter_0); 498 | return; 499 | } 500 | } 501 | } 502 | 503 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur) 504 | { 505 | size_t prev_idx = prev.get_cqe_snapshot_cycle_idx(); 506 | size_t cur_idx = cur.get_cqe_snapshot_cycle_idx(); 507 | assert(prev_idx < kAppCQESnapshotCycle && cur_idx < kAppCQESnapshotCycle); 508 | 509 | return ((cur_idx + kAppCQESnapshotCycle) - prev_idx) % kAppCQESnapshotCycle; 510 | } 511 | -------------------------------------------------------------------------------- /common/dma_common.h: -------------------------------------------------------------------------------- 1 | #ifndef DMA_COMMON_H 2 | #define DMA_COMMON_H 3 | 4 | #include //ifreq 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "packet.h" 21 | #include "utils.h" 22 | #include "mlx5_defs.h" 23 | 24 | #define POLLING_SIZE 400 25 | #define ENTRY_SIZE 256 /* maximum size of each buffer */ 26 | #define PORT_NUM 1 27 | 28 | static constexpr size_t kAppRecvCQDepth = 8; 29 | static constexpr size_t kAppRQDepth = 4; // Multi-packet RQ depth 30 | 31 | static constexpr size_t kAppLogNumStrides = 9; 32 | static constexpr size_t kAppLogStrideBytes = 9; 33 | static constexpr size_t kAppMaxPostlist = 512; 34 | 35 | static constexpr bool kAppVerbose = false; 36 | static constexpr bool kAppCheckContents = true; // Check buffer contents 37 | 38 | /// Size of one ring message buffer 39 | static constexpr size_t kAppRingMbufSize = (1ull << kAppLogStrideBytes); 40 | 41 | /// Number of strides in one multi-packet RECV WQE 42 | static constexpr size_t kAppStridesPerWQE = (1ull << kAppLogNumStrides); 43 | 44 | 45 | /// Packets after which the CQE snapshot cycles 46 | static constexpr size_t kAppCQESnapshotCycle = 65536 * kAppStridesPerWQE; 47 | 48 | /// Total number of entries in the RX ring 49 | static constexpr size_t kAppNumRingEntries = (kAppStridesPerWQE * kAppRQDepth); 50 | 51 | static constexpr size_t kAppRingSize = (kAppNumRingEntries * kAppRingMbufSize); 52 | 53 | /// A consistent snapshot of CQE fields in host endian format 54 | struct cqe_snapshot_t { 55 | uint16_t wqe_id; 56 | uint16_t wqe_counter; 57 | 58 | /// Return this packet's index in the CQE snapshot cycle 59 | size_t get_cqe_snapshot_cycle_idx() const { 60 | return wqe_id * kAppStridesPerWQE + wqe_counter; 61 | } 62 | 63 | std::string to_string() { 64 | std::ostringstream ret; 65 | ret << "[ID " << std::to_string(wqe_id) << ", counter " 66 | << std::to_string(wqe_counter) << "]"; 67 | return ret.str(); 68 | } 69 | }; 70 | 71 | struct DMAcontext { 72 | struct ibv_pd *pd; 73 | struct ibv_context *ctx; 74 | struct ibv_cq *receive_cq; 75 | struct ibv_cq *send_cq; 76 | struct ibv_mr *send_mr; 77 | void *send_region; 78 | struct ibv_qp *data_qp; 79 | 80 | struct ibv_qp* mp_recv_qp; 81 | struct ibv_cq* mp_recv_cq; 82 | struct ibv_exp_wq* mp_wq; 83 | struct ibv_exp_wq_family* mp_wq_family; 84 | struct ibv_exp_rwq_ind_table* mp_ind_tbl; 85 | volatile mlx5_cqe64* mp_cqe_arr; 86 | struct ibv_sge* mp_sge; 87 | uint8_t* mp_recv_ring; 88 | uint8_t* mp_send_ring; 89 | struct ibv_mr* mp_send_mr; 90 | 91 | // for connection 92 | int id; 93 | int total_received; 94 | int total_sent; 95 | int my_send_queue_length; 96 | int my_recv_queue_length; 97 | 98 | size_t ring_head; 99 | size_t nb_rx_rolling; 100 | size_t sge_idx; 101 | size_t cqe_idx; 102 | 103 | cqe_snapshot_t prev_snapshot; 104 | 105 | // // For window adjustment 106 | bool isMarkTimeStamp; 107 | bool* isSent; 108 | std::chrono::high_resolution_clock::time_point* send_time; 109 | std::chrono::high_resolution_clock::time_point* receive_time; 110 | }; 111 | 112 | DMAcontext* DMA_create(ibv_device* ib_dev, int queue_length, int thread_id); 113 | const char *ibv_wc_opcode_str(enum ibv_wc_opcode opcode); 114 | void send_packet(DMAcontext *dma_context, int packet_size, uint64_t offset); 115 | void dma_context_print(DMAcontext* dma_context, const char* caption); 116 | 117 | 118 | // Install a UDP destination port--based flow rule 119 | void install_flow_rule(struct ibv_qp* qp, uint16_t thread_id); 120 | void install_udp_flow_rule(struct ibv_qp* qp, uint16_t dst_port); 121 | void snapshot_cqe(volatile mlx5_cqe64* cqe, cqe_snapshot_t& cqe_snapshot); 122 | size_t get_cycle_delta(const cqe_snapshot_t& prev, const cqe_snapshot_t& cur); 123 | #endif 124 | -------------------------------------------------------------------------------- /common/mlx5_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef MLX5_DEFS_H 2 | #define MLX5_DEFS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | enum mlx5_alloc_type { 10 | MLX5_ALLOC_TYPE_ANON, 11 | MLX5_ALLOC_TYPE_HUGE, 12 | MLX5_ALLOC_TYPE_CONTIG, 13 | MLX5_ALLOC_TYPE_PEER_DIRECT, 14 | MLX5_ALLOC_TYPE_PREFER_HUGE, 15 | MLX5_ALLOC_TYPE_PREFER_CONTIG, 16 | MLX5_ALLOC_TYPE_ALL 17 | }; 18 | 19 | enum mlx5_lock_type { 20 | MLX5_SPIN_LOCK = 0, 21 | MLX5_MUTEX = 1, 22 | }; 23 | 24 | enum mlx5_lock_state { MLX5_USE_LOCK, MLX5_LOCKED, MLX5_UNLOCKED }; 25 | 26 | struct mlx5_lock { 27 | pthread_mutex_t mutex; 28 | pthread_spinlock_t slock; 29 | enum mlx5_lock_state state; 30 | enum mlx5_lock_type type; 31 | }; 32 | 33 | struct mlx5_numa_req { 34 | int valid; 35 | int numa_id; 36 | }; 37 | 38 | struct mlx5_peer_direct_mem { 39 | uint32_t dir; 40 | uint64_t va_id; 41 | struct ibv_exp_peer_buf *pb; 42 | struct ibv_exp_peer_direct_attr *ctx; 43 | }; 44 | 45 | struct mlx5_buf { 46 | void *buf; 47 | size_t length; 48 | int base; 49 | struct mlx5_hugetlb_mem *hmem; 50 | struct mlx5_peer_direct_mem peer; 51 | enum mlx5_alloc_type type; 52 | struct mlx5_numa_req numa_req; 53 | int numa_alloc; 54 | }; 55 | 56 | struct mlx5_mini_cqe8 { 57 | union { 58 | uint32_t rx_hash_result; 59 | uint32_t checksum; 60 | struct { 61 | uint16_t wqe_counter; 62 | uint8_t s_wqe_opcode; 63 | uint8_t reserved; 64 | } s_wqe_info; 65 | }; 66 | uint32_t byte_cnt; 67 | }; 68 | 69 | enum { MLX5_MINI_ARR_SIZE = 8 }; 70 | 71 | struct mlx5_tm_cqe { 72 | uint32_t success; 73 | uint32_t hw_phase_cnt; 74 | uint8_t rsvd0[10]; 75 | }; 76 | 77 | struct mlx5_cqe64 { 78 | uint8_t rsvd0[2]; 79 | /* 80 | * wqe_id is valid only for 81 | * Striding RQ (Multi-Packet RQ). 82 | * It provides the WQE index inside the RQ. 83 | */ 84 | uint16_t wqe_id; 85 | uint8_t rsvd4[8]; 86 | uint32_t rx_hash_res; 87 | uint8_t rx_hash_type; 88 | uint8_t ml_path; 89 | uint8_t rsvd20[2]; 90 | uint16_t checksum; 91 | uint16_t slid; 92 | uint32_t flags_rqpn; 93 | uint8_t hds_ip_ext; 94 | uint8_t l4_hdr_type_etc; 95 | __be16 vlan_info; 96 | uint32_t srqn_uidx; 97 | uint32_t imm_inval_pkey; 98 | uint8_t app; 99 | uint8_t app_op; 100 | uint16_t app_info; 101 | uint32_t byte_cnt; 102 | __be64 timestamp; 103 | union { 104 | uint32_t sop_drop_qpn; 105 | struct { 106 | uint8_t sop; 107 | uint8_t qpn[3]; 108 | } sop_qpn; 109 | }; 110 | /* 111 | * In Striding RQ (Multi-Packet RQ) wqe_counter provides 112 | * the WQE stride index (to calc pointer to start of the message) 113 | */ 114 | uint16_t wqe_counter; 115 | uint8_t signature; 116 | uint8_t op_own; 117 | }; 118 | 119 | struct mlx5_cq { 120 | struct ibv_cq ibv_cq; 121 | uint32_t creation_flags; 122 | uint32_t pattern; 123 | struct mlx5_buf buf_a; 124 | struct mlx5_buf buf_b; 125 | struct mlx5_buf *active_buf; 126 | struct mlx5_buf *resize_buf; 127 | int resize_cqes; 128 | int active_cqes; 129 | struct mlx5_lock lock; 130 | uint32_t cqn; 131 | uint32_t cons_index; 132 | uint32_t wait_index; 133 | uint32_t wait_count; 134 | volatile uint32_t *dbrec; 135 | int arm_sn; 136 | int cqe_sz; 137 | int resize_cqe_sz; 138 | int stall_next_poll; 139 | int stall_enable; 140 | uint64_t stall_last_count; 141 | int stall_adaptive_enable; 142 | int stall_cycles; 143 | uint8_t model_flags; /* use mlx5_cq_model_flags */ 144 | uint16_t cqe_comp_max_num; 145 | uint8_t cq_log_size; 146 | /* Compressed CQE data */ 147 | struct mlx5_cqe64 next_decomp_cqe64; 148 | struct mlx5_resource *compressed_rsc; 149 | uint16_t compressed_left; 150 | uint16_t compressed_wqe_cnt; 151 | uint8_t compressed_req; 152 | uint8_t compressed_mp_rq; 153 | uint8_t mini_arr_idx; 154 | struct mlx5_mini_cqe8 mini_array[MLX5_MINI_ARR_SIZE]; 155 | /* peer-direct data */ 156 | int peer_enabled; 157 | struct ibv_exp_peer_direct_attr *peer_ctx; 158 | struct mlx5_buf peer_buf; 159 | struct mlx5_peek_entry **peer_peek_table; 160 | struct mlx5_peek_entry *peer_peek_free; 161 | }; 162 | 163 | #endif // MLX5_DEFS_H 164 | -------------------------------------------------------------------------------- /common/packet.cc: -------------------------------------------------------------------------------- 1 | #include "packet.h" 2 | std::mutex _print_mutex; 3 | 4 | void make_packet_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* num_worker, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isTerminated, bool isResend, int thread_id) { 5 | char* eth_ip_header = (char*) payload; 6 | memcpy(payload, IP_ETH_UDP_HEADER, sizeof(IP_ETH_UDP_HEADER)); 7 | eth_ip_header[5] = thread_id; 8 | make_p4ml_layer_and_copy_to((char*)payload + sizeof(IP_ETH_UDP_HEADER), key, len_tensor, workerID, num_worker, appID, agtr, seq_num, data, isTerminated, isResend); 9 | } 10 | 11 | void make_p4ml_layer_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* num_worker, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isTerminated, bool isResend) { 12 | agghdr* agg_header = (agghdr*) payload; 13 | agghdr* p4ml_header = agg_header; 14 | agg_header->key = *key; 15 | agg_header->len_tensor = htonl(*len_tensor); 16 | agg_header->bitmap = htonl(1 << (*workerID)); 17 | agg_header->num_worker = *num_worker; 18 | agg_header->appID = htons(*appID); 19 | agg_header->flag = 0; 20 | agg_header->agtr = htons(*agtr); 21 | agg_header->seq_num = htons(*seq_num); 22 | agg_header->versionIndex = htonl(*agtr * *num_worker + *workerID); 23 | // agg_header->last_ack = 0; 24 | 25 | // version = 1 26 | if (((*seq_num) % 200) > 100) 27 | agg_header->flag |= 32; 28 | if ((*seq_num) % 200 == 0) 29 | agg_header->flag |= 32; 30 | if (isTerminated) 31 | agg_header->flag |= 2; 32 | if (isResend) 33 | agg_header->flag |= 4; 34 | memcpy(agg_header->vector, data, sizeof(uint32_t) * (MAX_ENTRIES_PER_PACKET)); 35 | // p4ml_header_print_h(agg_header, "Make"); 36 | } 37 | 38 | void p4ml_header_ntoh(agghdr* p_p4ml) { 39 | p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor); 40 | p_p4ml->bitmap = ntohl(p_p4ml->bitmap); 41 | p_p4ml->seq_num = ntohs(p_p4ml->seq_num); 42 | p_p4ml->agtr = ntohs(p_p4ml->agtr); 43 | p_p4ml->appID = ntohs(p_p4ml->appID); 44 | p_p4ml->versionIndex = ntohl(p_p4ml->versionIndex); 45 | // // p_p4ml->last_ack = ntohl(p_p4ml->last_ack); 46 | int32_t* p_model = p_p4ml->vector; 47 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 48 | p_model[i] = ntohl(p_model[i]); 49 | } 50 | 51 | void p4ml_header_ntoh_without_data(agghdr* p_p4ml) { 52 | p_p4ml->len_tensor = ntohl(p_p4ml->len_tensor); 53 | p_p4ml->bitmap = ntohl(p_p4ml->bitmap); 54 | p_p4ml->seq_num = ntohs(p_p4ml->seq_num); 55 | p_p4ml->agtr = ntohs(p_p4ml->agtr); 56 | p_p4ml->appID = ntohs(p_p4ml->appID); 57 | p_p4ml->versionIndex = ntohl(p_p4ml->versionIndex); 58 | // // p_p4ml->last_ack = ntohl(p_p4ml->last_ack); 59 | int32_t* p_model = p_p4ml->vector; 60 | } 61 | 62 | void p4ml_header_hton_without_data(agghdr* p_p4ml) { 63 | p_p4ml->len_tensor = htonl(p_p4ml->len_tensor); 64 | p_p4ml->bitmap = htonl(p_p4ml->bitmap); 65 | p_p4ml->seq_num = htons(p_p4ml->seq_num); 66 | p_p4ml->agtr = htons(p_p4ml->agtr); 67 | p_p4ml->appID = htons(p_p4ml->appID); 68 | p_p4ml->versionIndex = htonl(p_p4ml->versionIndex); 69 | // // p_p4ml->last_ack = htonl(p_p4ml->last_ack); 70 | } 71 | 72 | void p4ml_header_setACK(agghdr* p4ml_header) { 73 | p4ml_header->flag |= 1; 74 | } 75 | 76 | void p4ml_header_setTerminated(agghdr* p4ml_header) { 77 | p4ml_header->flag |= 2; 78 | } 79 | 80 | void p4ml_header_resetIndex(agghdr* p4ml_header) { 81 | p4ml_header->flag &= ~(16); 82 | } 83 | 84 | void p4ml_header_print(agghdr* p4ml_header, char* caption) { 85 | std::lock_guard lock(_print_mutex); 86 | printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, " 87 | "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, " 88 | "agtr: %u seq_num: %u, versionIndex: %u, isACK: %d, version: %d," 89 | "isResend: %d, data: ", caption, p4ml_header->key, p4ml_header->len_tensor, \ 90 | BYTE_TO_BINARY(p4ml_header->bitmap), p4ml_header->num_worker, p4ml_header->appID, 91 | p4ml_header->agtr, p4ml_header->seq_num, p4ml_header->versionIndex, \ 92 | p4ml_header->flag & 1?1:0, p4ml_header->flag & 32?1:0, p4ml_header->flag & 4?1:0); 93 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 94 | printf("%d ", p4ml_header->vector[i]); 95 | printf("\n"); 96 | } 97 | 98 | void p4ml_header_print_h(agghdr* p4ml_header, char* caption) { 99 | std::lock_guard lock(_print_mutex); 100 | printf("[%s] \n key: %" PRIu64 ", len_tensor: %u, " 101 | "bitmap: " BYTE_TO_BINARY_PATTERN ", num_worker: %u, appID: %u, " 102 | "agtr: %u seq_num: %u, versionIndex: %u, isACK: %d, version: %d," 103 | "isResend: %d, data: ", caption, p4ml_header->key, ntohl(p4ml_header->len_tensor), \ 104 | BYTE_TO_BINARY(ntohl(p4ml_header->bitmap)), p4ml_header->num_worker, ntohs(p4ml_header->appID), \ 105 | ntohs(p4ml_header->agtr), ntohs(p4ml_header->seq_num), ntohl(p4ml_header->versionIndex), \ 106 | p4ml_header->flag & 1?1:0, p4ml_header->flag & 32?1:0, p4ml_header->flag & 4?1:0); 107 | for (int i = 0; i < MAX_ENTRIES_PER_PACKET; i++) 108 | printf("%d ", ntohl(p4ml_header->vector[i])); 109 | printf("\n"); 110 | } 111 | -------------------------------------------------------------------------------- /common/packet.h: -------------------------------------------------------------------------------- 1 | #ifndef PACKET_P4ML_H 2 | #define PACKET_P4ML_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "utils.h" 14 | 15 | #define DST_MAC 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 16 | // #define SRC_MAC 0xb8, 0x59, 0x9f, 0x1d, 0x04, 0xf2 17 | #define SRC_MAC 0xe4, 0x1d, 0x2d, 0xf3, 0xdd, 0xcc 18 | // #define DST_MAC 0xb8, 0x59, 0x9f, 0x0b, 0x30, 0x72 19 | 20 | #define ETH_TYPE 0x07, 0x00 21 | 22 | #define IP_HDRS 0x45, 0x00, 0x00, 0x54, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01, 0xaf, 0xb6 23 | 24 | #define SRC_IP 0x0d, 0x07, 0x38, 0x66 25 | 26 | #define DST_IP 0x0d, 0x07, 0x38, 0x7f 27 | 28 | #define SRC_PORT 0x67, 0x67 29 | 30 | #define DST_PORT 0x78, 0x78 31 | 32 | #define UDP_HDRS 0x00, 0x00, 0x00, 0x00 33 | 34 | // Only a template, DST_IP will be modified soon 35 | const unsigned char IP_ETH_UDP_HEADER[] = { DST_MAC, SRC_MAC, ETH_TYPE, IP_HDRS, SRC_IP, DST_IP }; 36 | 37 | // P4ML_PACKET_SIZE = IP_ETH_HEADER_SIZE + P4ML_HEADER_SIZE + P4ML_DATA_SIZE 38 | #define P4ML_PACKET_SIZE 190 39 | #define P4ML_DATA_SIZE 128 40 | #define P4ML_HEADER_SIZE 28 41 | #define P4ML_LAYER_SIZE 156 42 | #define IP_ETH_UDP_HEADER_SIZE 34 43 | 44 | #define MAX_ENTRIES_PER_PACKET 32 45 | 46 | #define BYTE_TO_BINARY_PATTERN "%c%c%c%c%c%c%c%c" 47 | #define BYTE_TO_BINARY(byte) \ 48 | (byte & 0x80 ? '1' : '0'), \ 49 | (byte & 0x40 ? '1' : '0'), \ 50 | (byte & 0x20 ? '1' : '0'), \ 51 | (byte & 0x10 ? '1' : '0'), \ 52 | (byte & 0x08 ? '1' : '0'), \ 53 | (byte & 0x04 ? '1' : '0'), \ 54 | (byte & 0x02 ? '1' : '0'), \ 55 | (byte & 0x01 ? '1' : '0') 56 | 57 | #pragma pack(push, 1) 58 | struct agghdr { 59 | uint64_t key; 60 | uint32_t len_tensor; 61 | uint32_t bitmap; 62 | uint8_t num_worker; 63 | uint8_t flag; 64 | uint16_t agtr; 65 | uint16_t appID; 66 | uint16_t seq_num; 67 | uint32_t versionIndex; 68 | int32_t vector[MAX_ENTRIES_PER_PACKET]; 69 | }; 70 | #pragma pack(pop) 71 | 72 | void make_packet_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* agtr_time, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isTerminated, bool isResend, int thread_id); 73 | void make_p4ml_layer_and_copy_to(void* payload, uint64_t* key, uint32_t* len_tensor, uint32_t* workerID, uint8_t* agtr_time, uint16_t* appID, uint16_t* agtr, uint16_t* seq_num, int32_t* data, bool isTerminated, bool isResend); 74 | void p4ml_header_ntoh(agghdr* p_p4ml); 75 | void p4ml_header_ntoh_without_data(agghdr* p_p4ml); 76 | void p4ml_header_hton_without_data(agghdr* p_p4ml); 77 | void p4ml_header_setACK(agghdr* p_p4ml); 78 | void p4ml_header_setTerminated(agghdr* p4ml_header); 79 | void p4ml_header_setLastDstMAC(int filter_id); 80 | void p4ml_header_print(agghdr* p4ml_header,char* caption); 81 | void p4ml_header_print_h(agghdr* p4ml_header, char* caption); 82 | 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /common/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef UTILS_H 2 | #define UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define MAX_AGTR_COUNT 32000 16 | #define AGTR_TO_USE_PER_APPLICATION 2800 17 | 18 | #define EACH_HUGEPAGE_SIZE (2048*1024) 19 | 20 | #define likely(x) __builtin_expect(!!(x), 1) 21 | #define unlikely(x) __builtin_expect(!!(x), 0) 22 | 23 | #define KB(x) (static_cast(x) << 10) 24 | #define KB_(x) (KB(x) - 1) 25 | #define MB(x) (static_cast(x) << 20) 26 | #define MB_(x) (MB(x) - 1) 27 | 28 | static void memory_barrier() { asm volatile("" ::: "memory"); } 29 | static void lfence() { asm volatile("lfence" ::: "memory"); } 30 | static void sfence() { asm volatile("sfence" ::: "memory"); } 31 | static void mfence() { asm volatile("mfence" ::: "memory"); } 32 | static void clflush(volatile void* p) { asm volatile("clflush (%0)" ::"r"(p)); } 33 | static void cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx, 34 | unsigned int* edx) { 35 | asm volatile("cpuid" 36 | : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) 37 | : "0"(*eax), "2"(*ecx)); 38 | } 39 | 40 | inline void bindingCPU(int num) { 41 | int result; 42 | cpu_set_t mask; 43 | CPU_ZERO(&mask); 44 | CPU_SET(num, &mask); 45 | result = sched_setaffinity(0, sizeof(mask), &mask); 46 | if (result < 0) { 47 | printf("binding CPU fails\n"); 48 | exit(1); 49 | } 50 | } 51 | 52 | /// Check a condition at runtime. If the condition is false, throw exception. 53 | static inline void rt_assert(bool condition) { 54 | if (unlikely(!condition)) throw std::runtime_error(""); 55 | } 56 | 57 | 58 | /* allocate the huge pages. */ 59 | inline char *alloc_raw_pages(int cnt, int size) { 60 | /* 61 | * Don't touch the page since then allocator would not allocate the page 62 | * right now. 63 | */ 64 | int flag = MAP_SHARED | MAP_ANONYMOUS; 65 | if (size == EACH_HUGEPAGE_SIZE) flag |= MAP_HUGETLB; 66 | char *ptr = 67 | (char *)mmap(NULL, (int64_t)cnt * size, PROT_READ | PROT_WRITE, flag, -1, 0); 68 | if (ptr == (char *)-1) { 69 | perror("alloc_raw_pages"); 70 | return NULL; 71 | } 72 | return ptr; 73 | } 74 | 75 | inline uint16_t hash_by(uint16_t agtr, uint8_t appID) { 76 | // return (agtr + appID) % MAX_AGTR_COUNT; 77 | return agtr % MAX_AGTR_COUNT; 78 | } 79 | 80 | // /* Returns the MAC Address Params: int iNetType - 0: ethernet, 1: Wifi char chMAC[6] - MAC Address in binary format Returns: 0: success -1: Failure */ 81 | // int getMACAddress(char chMAC[6]) 82 | // { 83 | // struct ifreq ifr; 84 | // int sock; 85 | // char* ifname = "enp178s0f0"; 86 | // sock = socket(AF_INET, SOCK_DGRAM, 0); 87 | // strcpy(ifr.ifr_name, ifname); 88 | // ifr.ifr_addr.sa_family = AF_INET; 89 | // if (ioctl(sock, SIOCGIFHWADDR, &ifr) < 0) { 90 | // return -1; 91 | // } 92 | // memcpy(chMAC, ifr.ifr_hwaddr.sa_data, 6); 93 | // close(sock); 94 | // return 0; 95 | // } 96 | 97 | // /* Returns the interface IP Address Params: int iNetType - 0: ethernet, 1: Wifi char *chIP - IP Address string Return: 0: success / -1: Failure */ 98 | // int getIpAddress(char chIP[16]) 99 | // { 100 | // struct ifreq ifr; 101 | // int sock = 0; 102 | // sock = socket(AF_INET, SOCK_DGRAM, 0); 103 | // strcpy(ifr.ifr_name, "enp178s0f0"); 104 | // if (ioctl(sock, SIOCGIFADDR, &ifr) < 0) { 105 | // strcpy(chIP, "0.0.0.0"); 106 | // return -1; 107 | // } 108 | // sprintf(chIP, "%s", inet_ntoa(((struct sockaddr_in*)&(ifr.ifr_addr))->sin_addr)); 109 | // close(sock); 110 | // return 0; 111 | // } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /common/window_manager.cc: -------------------------------------------------------------------------------- 1 | #include "window_manager.h" 2 | 3 | WindowManager::WindowManager() { 4 | last_ACK = 0; 5 | } 6 | 7 | bool WindowManager::UpdateWindow(uint16_t *seq_num){ 8 | bool isLastAckUpdated = false; 9 | isACKed[*seq_num] = true; 10 | while (isACKed[last_ACK + 1]) { 11 | last_ACK++; 12 | isLastAckUpdated = true; 13 | } 14 | return isLastAckUpdated; 15 | } 16 | 17 | int WindowManager::Reset(int packet_total) { 18 | last_ACK = 0; 19 | total_ACK = packet_total; 20 | memset(isACKed, 0, sizeof(bool) * packet_total + 1); 21 | } -------------------------------------------------------------------------------- /common/window_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef SLIDING_W_H 2 | #define SLIDING_W_H 3 | 4 | #include "packet.h" 5 | #include "Rogue.h" 6 | #define RESEND_TRIGGER 1 7 | 8 | class WindowManager { 9 | public: 10 | bool* isACKed; 11 | /* This three variable is completely useless, but 12 | when deleting it, the performance will drop from 46Gbps to 40Gbps.. */ 13 | bool* isSent; 14 | std::chrono::high_resolution_clock::time_point* send_time; 15 | std::chrono::high_resolution_clock::time_point* receive_time; 16 | /* */ 17 | int total_ACK; 18 | int last_ACK; 19 | 20 | WindowManager(); 21 | bool UpdateWindow(uint16_t *num); 22 | int Reset(int packet_total); 23 | 24 | }; 25 | 26 | #endif -------------------------------------------------------------------------------- /docs/benchmark.md: -------------------------------------------------------------------------------- 1 | # Benchmark 2 | 3 | 4 | ## Requirment - Run with Signal Switch 5 | 6 | In this experiment, 2 physical workers and 1 switch is used. 7 | 8 | ## Getting Started 9 | ``` 10 | $ git clone https://github.com/ATP-NSDI/switchML.git 11 | ``` 12 | 13 | ### Run Tofino Switch 14 | 15 | #### Compile P4 Program and Start the Tofino Model (Terminal1) 16 | If you are using physical switch, compile the switch program then jump to Terminal 2 directly. 17 | ``` 18 | $ $TOOLS/p4_build.sh $REPO_SWITCHML/p4src/switchml.p4 19 | ``` 20 | ``` 21 | # (Optional) for software Tofino behavior model 22 | $ cd $SDE/run_tofino_model.sh -p switchml 23 | ``` 24 | #### Load Specified Switch Program (Terminal2) 25 | ``` 26 | $ cd $SDE 27 | ``` 28 | ``` 29 | $ $SDE/run_switchd.sh -p switchml 30 | ``` 31 | #### Enable Ports and Install Entries (Terminal3) 32 | ``` 33 | $ $SDE/run_p4_tests.sh -t $REPO_SWITCHML/ptf/ -p switchml 34 | ``` 35 | ``` 36 | $ $TOOLS/run_pd_rpc.py -p switchml $REPO_SWITCHML/run_pd_rpc/setup.py 37 | ``` 38 | 39 | ### Compile and Run Workers 40 | ``` 41 | $ cd $REPO_SWITCHML/client/ 42 | ``` 43 | ``` 44 | $ make 45 | ``` 46 | #### Run Worker1 (Terminal4) 47 | ``` 48 | # Usage: ./app [MyID] [Num of Worker] [AppID] 49 | $ sudo ./app 0 2 1 50 | ``` 51 | #### Run Worker2 (Terminal5) 52 | ``` 53 | # Usage: ./app [MyID] [Num of Worker] [AppID] 54 | $ sudo ./app 1 2 1 55 | ``` 56 | 57 | Then you can switch to Terminal 4/5 to see the bandwidth report. 58 | -------------------------------------------------------------------------------- /p4src/includes/actions.p4: -------------------------------------------------------------------------------- 1 | action processentry1() { 2 | write_data_entry1.execute_stateful_alu(p4ml.agtr); 3 | } 4 | 5 | action processentry1andWriteToPacket() { 6 | write_read_data_entry1.execute_stateful_alu(p4ml.agtr); 7 | } 8 | 9 | action entry1WriteToPacket() { 10 | read_data_entry1.execute_stateful_alu(p4ml.agtr); 11 | } 12 | 13 | action processentry2() { 14 | write_data_entry2.execute_stateful_alu(p4ml.agtr); 15 | } 16 | 17 | action processentry2andWriteToPacket() { 18 | write_read_data_entry2.execute_stateful_alu(p4ml.agtr); 19 | } 20 | 21 | action entry2WriteToPacket() { 22 | read_data_entry2.execute_stateful_alu(p4ml.agtr); 23 | } 24 | 25 | action processentry3() { 26 | write_data_entry3.execute_stateful_alu(p4ml.agtr); 27 | } 28 | 29 | action processentry3andWriteToPacket() { 30 | write_read_data_entry3.execute_stateful_alu(p4ml.agtr); 31 | } 32 | 33 | action entry3WriteToPacket() { 34 | read_data_entry3.execute_stateful_alu(p4ml.agtr); 35 | } 36 | 37 | action processentry4() { 38 | write_data_entry4.execute_stateful_alu(p4ml.agtr); 39 | } 40 | 41 | action processentry4andWriteToPacket() { 42 | write_read_data_entry4.execute_stateful_alu(p4ml.agtr); 43 | } 44 | 45 | action entry4WriteToPacket() { 46 | read_data_entry4.execute_stateful_alu(p4ml.agtr); 47 | } 48 | 49 | action processentry5() { 50 | write_data_entry5.execute_stateful_alu(p4ml.agtr); 51 | } 52 | 53 | action processentry5andWriteToPacket() { 54 | write_read_data_entry5.execute_stateful_alu(p4ml.agtr); 55 | } 56 | 57 | action entry5WriteToPacket() { 58 | read_data_entry5.execute_stateful_alu(p4ml.agtr); 59 | } 60 | 61 | action processentry6() { 62 | write_data_entry6.execute_stateful_alu(p4ml.agtr); 63 | } 64 | 65 | action processentry6andWriteToPacket() { 66 | write_read_data_entry6.execute_stateful_alu(p4ml.agtr); 67 | } 68 | 69 | action entry6WriteToPacket() { 70 | read_data_entry6.execute_stateful_alu(p4ml.agtr); 71 | } 72 | 73 | action processentry7() { 74 | write_data_entry7.execute_stateful_alu(p4ml.agtr); 75 | } 76 | 77 | action processentry7andWriteToPacket() { 78 | write_read_data_entry7.execute_stateful_alu(p4ml.agtr); 79 | } 80 | 81 | action entry7WriteToPacket() { 82 | read_data_entry7.execute_stateful_alu(p4ml.agtr); 83 | } 84 | 85 | action processentry8() { 86 | write_data_entry8.execute_stateful_alu(p4ml.agtr); 87 | } 88 | 89 | action processentry8andWriteToPacket() { 90 | write_read_data_entry8.execute_stateful_alu(p4ml.agtr); 91 | } 92 | 93 | action entry8WriteToPacket() { 94 | read_data_entry8.execute_stateful_alu(p4ml.agtr); 95 | } 96 | 97 | action processentry9() { 98 | write_data_entry9.execute_stateful_alu(p4ml.agtr); 99 | } 100 | 101 | action processentry9andWriteToPacket() { 102 | write_read_data_entry9.execute_stateful_alu(p4ml.agtr); 103 | } 104 | 105 | action entry9WriteToPacket() { 106 | read_data_entry9.execute_stateful_alu(p4ml.agtr); 107 | } 108 | 109 | action processentry10() { 110 | write_data_entry10.execute_stateful_alu(p4ml.agtr); 111 | } 112 | 113 | action processentry10andWriteToPacket() { 114 | write_read_data_entry10.execute_stateful_alu(p4ml.agtr); 115 | } 116 | 117 | action entry10WriteToPacket() { 118 | read_data_entry10.execute_stateful_alu(p4ml.agtr); 119 | } 120 | 121 | action processentry11() { 122 | write_data_entry11.execute_stateful_alu(p4ml.agtr); 123 | } 124 | 125 | action processentry11andWriteToPacket() { 126 | write_read_data_entry11.execute_stateful_alu(p4ml.agtr); 127 | } 128 | 129 | action entry11WriteToPacket() { 130 | read_data_entry11.execute_stateful_alu(p4ml.agtr); 131 | } 132 | 133 | action processentry12() { 134 | write_data_entry12.execute_stateful_alu(p4ml.agtr); 135 | } 136 | 137 | action processentry12andWriteToPacket() { 138 | write_read_data_entry12.execute_stateful_alu(p4ml.agtr); 139 | } 140 | 141 | action entry12WriteToPacket() { 142 | read_data_entry12.execute_stateful_alu(p4ml.agtr); 143 | } 144 | 145 | action processentry13() { 146 | write_data_entry13.execute_stateful_alu(p4ml.agtr); 147 | } 148 | 149 | action processentry13andWriteToPacket() { 150 | write_read_data_entry13.execute_stateful_alu(p4ml.agtr); 151 | } 152 | 153 | action entry13WriteToPacket() { 154 | read_data_entry13.execute_stateful_alu(p4ml.agtr); 155 | } 156 | 157 | action processentry14() { 158 | write_data_entry14.execute_stateful_alu(p4ml.agtr); 159 | } 160 | 161 | action processentry14andWriteToPacket() { 162 | write_read_data_entry14.execute_stateful_alu(p4ml.agtr); 163 | } 164 | 165 | action entry14WriteToPacket() { 166 | read_data_entry14.execute_stateful_alu(p4ml.agtr); 167 | } 168 | 169 | action processentry15() { 170 | write_data_entry15.execute_stateful_alu(p4ml.agtr); 171 | } 172 | 173 | action processentry15andWriteToPacket() { 174 | write_read_data_entry15.execute_stateful_alu(p4ml.agtr); 175 | } 176 | 177 | action entry15WriteToPacket() { 178 | read_data_entry15.execute_stateful_alu(p4ml.agtr); 179 | } 180 | 181 | action processentry16() { 182 | write_data_entry16.execute_stateful_alu(p4ml.agtr); 183 | } 184 | 185 | action processentry16andWriteToPacket() { 186 | write_read_data_entry16.execute_stateful_alu(p4ml.agtr); 187 | } 188 | 189 | action entry16WriteToPacket() { 190 | read_data_entry16.execute_stateful_alu(p4ml.agtr); 191 | } 192 | 193 | action processentry17() { 194 | write_data_entry17.execute_stateful_alu(p4ml.agtr); 195 | } 196 | 197 | action processentry17andWriteToPacket() { 198 | write_read_data_entry17.execute_stateful_alu(p4ml.agtr); 199 | } 200 | 201 | action entry17WriteToPacket() { 202 | read_data_entry17.execute_stateful_alu(p4ml.agtr); 203 | } 204 | 205 | action processentry18() { 206 | write_data_entry18.execute_stateful_alu(p4ml.agtr); 207 | } 208 | 209 | action processentry18andWriteToPacket() { 210 | write_read_data_entry18.execute_stateful_alu(p4ml.agtr); 211 | } 212 | 213 | action entry18WriteToPacket() { 214 | read_data_entry18.execute_stateful_alu(p4ml.agtr); 215 | } 216 | 217 | action processentry19() { 218 | write_data_entry19.execute_stateful_alu(p4ml.agtr); 219 | } 220 | 221 | action processentry19andWriteToPacket() { 222 | write_read_data_entry19.execute_stateful_alu(p4ml.agtr); 223 | } 224 | 225 | action entry19WriteToPacket() { 226 | read_data_entry19.execute_stateful_alu(p4ml.agtr); 227 | } 228 | 229 | action processentry20() { 230 | write_data_entry20.execute_stateful_alu(p4ml.agtr); 231 | } 232 | 233 | action processentry20andWriteToPacket() { 234 | write_read_data_entry20.execute_stateful_alu(p4ml.agtr); 235 | } 236 | 237 | action entry20WriteToPacket() { 238 | read_data_entry20.execute_stateful_alu(p4ml.agtr); 239 | } 240 | 241 | action processentry21() { 242 | write_data_entry21.execute_stateful_alu(p4ml.agtr); 243 | } 244 | 245 | action processentry21andWriteToPacket() { 246 | write_read_data_entry21.execute_stateful_alu(p4ml.agtr); 247 | } 248 | 249 | action entry21WriteToPacket() { 250 | read_data_entry21.execute_stateful_alu(p4ml.agtr); 251 | } 252 | 253 | action processentry22() { 254 | write_data_entry22.execute_stateful_alu(p4ml.agtr); 255 | } 256 | 257 | action processentry22andWriteToPacket() { 258 | write_read_data_entry22.execute_stateful_alu(p4ml.agtr); 259 | } 260 | 261 | action entry22WriteToPacket() { 262 | read_data_entry22.execute_stateful_alu(p4ml.agtr); 263 | } 264 | 265 | action processentry23() { 266 | write_data_entry23.execute_stateful_alu(p4ml.agtr); 267 | } 268 | 269 | action processentry23andWriteToPacket() { 270 | write_read_data_entry23.execute_stateful_alu(p4ml.agtr); 271 | } 272 | 273 | action entry23WriteToPacket() { 274 | read_data_entry23.execute_stateful_alu(p4ml.agtr); 275 | } 276 | 277 | action processentry24() { 278 | write_data_entry24.execute_stateful_alu(p4ml.agtr); 279 | } 280 | 281 | action processentry24andWriteToPacket() { 282 | write_read_data_entry24.execute_stateful_alu(p4ml.agtr); 283 | } 284 | 285 | action entry24WriteToPacket() { 286 | read_data_entry24.execute_stateful_alu(p4ml.agtr); 287 | } 288 | 289 | action processentry25() { 290 | write_data_entry25.execute_stateful_alu(p4ml.agtr); 291 | } 292 | 293 | action processentry25andWriteToPacket() { 294 | write_read_data_entry25.execute_stateful_alu(p4ml.agtr); 295 | } 296 | 297 | action entry25WriteToPacket() { 298 | read_data_entry25.execute_stateful_alu(p4ml.agtr); 299 | } 300 | 301 | action processentry26() { 302 | write_data_entry26.execute_stateful_alu(p4ml.agtr); 303 | } 304 | 305 | action processentry26andWriteToPacket() { 306 | write_read_data_entry26.execute_stateful_alu(p4ml.agtr); 307 | } 308 | 309 | action entry26WriteToPacket() { 310 | read_data_entry26.execute_stateful_alu(p4ml.agtr); 311 | } 312 | 313 | action processentry27() { 314 | write_data_entry27.execute_stateful_alu(p4ml.agtr); 315 | } 316 | 317 | action processentry27andWriteToPacket() { 318 | write_read_data_entry27.execute_stateful_alu(p4ml.agtr); 319 | } 320 | 321 | action entry27WriteToPacket() { 322 | read_data_entry27.execute_stateful_alu(p4ml.agtr); 323 | } 324 | 325 | action processentry28() { 326 | write_data_entry28.execute_stateful_alu(p4ml.agtr); 327 | } 328 | 329 | action processentry28andWriteToPacket() { 330 | write_read_data_entry28.execute_stateful_alu(p4ml.agtr); 331 | } 332 | 333 | action entry28WriteToPacket() { 334 | read_data_entry28.execute_stateful_alu(p4ml.agtr); 335 | } 336 | 337 | action processentry29() { 338 | write_data_entry29.execute_stateful_alu(p4ml.agtr); 339 | } 340 | 341 | action processentry29andWriteToPacket() { 342 | write_read_data_entry29.execute_stateful_alu(p4ml.agtr); 343 | } 344 | 345 | action entry29WriteToPacket() { 346 | read_data_entry29.execute_stateful_alu(p4ml.agtr); 347 | } 348 | 349 | action processentry30() { 350 | write_data_entry30.execute_stateful_alu(p4ml.agtr); 351 | } 352 | 353 | action processentry30andWriteToPacket() { 354 | write_read_data_entry30.execute_stateful_alu(p4ml.agtr); 355 | } 356 | 357 | action entry30WriteToPacket() { 358 | read_data_entry30.execute_stateful_alu(p4ml.agtr); 359 | } 360 | 361 | action processentry31() { 362 | write_data_entry31.execute_stateful_alu(p4ml.agtr); 363 | } 364 | 365 | action processentry31andWriteToPacket() { 366 | write_read_data_entry31.execute_stateful_alu(p4ml.agtr); 367 | } 368 | 369 | action entry31WriteToPacket() { 370 | read_data_entry31.execute_stateful_alu(p4ml.agtr); 371 | } 372 | 373 | action processentry32() { 374 | write_data_entry32.execute_stateful_alu(p4ml.agtr); 375 | } 376 | 377 | action processentry32andWriteToPacket() { 378 | write_read_data_entry32.execute_stateful_alu(p4ml.agtr); 379 | } 380 | 381 | action entry32WriteToPacket() { 382 | read_data_entry32.execute_stateful_alu(p4ml.agtr); 383 | } 384 | 385 | -------------------------------------------------------------------------------- /p4src/includes/headers.p4: -------------------------------------------------------------------------------- 1 | #define MAX_ENTRIES_PER_PACKET 32 2 | /************************************************************************* 3 | *********************** H E A D E R S ********************************* 4 | *************************************************************************/ 5 | header_type ethernet_t { 6 | fields { 7 | dstAddr : 48; 8 | srcAddr : 48; 9 | etherType : 16; 10 | } 11 | } 12 | 13 | header_type ipv4_t { 14 | fields { 15 | version : 4; 16 | ihl : 4; 17 | dscp : 6; 18 | ecn : 2; 19 | totalLen : 16; 20 | identification : 16; 21 | flags : 3; 22 | fragOffset : 13; 23 | ttl : 8; 24 | protocol : 8; 25 | hdrChecksum : 16; 26 | srcAddr : 32; 27 | dstAddr : 32; 28 | } 29 | } 30 | 31 | header_type udp_t { 32 | fields { 33 | srcPort : 16; 34 | dstPort : 16; 35 | length_ : 16; 36 | checksum : 16; 37 | } 38 | } 39 | 40 | header_type p4ml_t { 41 | fields { 42 | key : 64; 43 | len_tensor : 32; 44 | bitmap : 32; 45 | agtr_time : 8; 46 | reserved : 1; 47 | isFirstTime : 1; 48 | version : 1; 49 | dataIndex : 1; 50 | ECN : 1; 51 | isResend : 1; 52 | isTerminated : 1; 53 | isACK : 1; 54 | agtr : 16; 55 | appIDandSeqNum : 32; 56 | versionIndex : 32; //in switchml.p4: this is used to find the bit location 57 | } 58 | } 59 | 60 | header_type entry_t { 61 | fields { 62 | data0 : 32 (signed); 63 | data1 : 32 (signed); 64 | data2 : 32 (signed); 65 | data3 : 32 (signed); 66 | data4 : 32 (signed); 67 | data5 : 32 (signed); 68 | data6 : 32 (signed); 69 | data7 : 32 (signed); 70 | data8 : 32 (signed); 71 | data9 : 32 (signed); 72 | data10 : 32 (signed); 73 | data11 : 32 (signed); 74 | data12 : 32 (signed); 75 | data13 : 32 (signed); 76 | data14 : 32 (signed); 77 | data15 : 32 (signed); 78 | data16 : 32 (signed); 79 | data17 : 32 (signed); 80 | data18 : 32 (signed); 81 | data19 : 32 (signed); 82 | data20 : 32 (signed); 83 | data21 : 32 (signed); 84 | data22 : 32 (signed); 85 | data23 : 32 (signed); 86 | data24 : 32 (signed); 87 | data25 : 32 (signed); 88 | data26 : 32 (signed); 89 | data27 : 32 (signed); 90 | data28 : 32 (signed); 91 | data29 : 32 (signed); 92 | data30 : 32 (signed); 93 | data31 : 32 (signed); 94 | } 95 | } 96 | 97 | // header_type entry_t { 98 | // fields { 99 | // data : 32 (signed); 100 | // } 101 | // } 102 | 103 | /************************************************************************* 104 | *********************** M E T A D A T A ******************************* 105 | *************************************************************************/ 106 | 107 | header_type p4ml_meta_t { 108 | fields { 109 | ack : 1; 110 | isResend : 32; 111 | isMyAppIDandMyCurrentSeq : 32; 112 | bitmap : 32; 113 | current_counter : 8; 114 | isAggregate : 8; 115 | qdepth : 32; 116 | position : 8; 117 | isDrop : 32; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /p4src/includes/parser.p4: -------------------------------------------------------------------------------- 1 | 2 | 3 | metadata p4ml_meta_t mdata; 4 | 5 | header ethernet_t ethernet; 6 | header ipv4_t ipv4; 7 | header udp_t udp; 8 | header p4ml_t p4ml; 9 | header p4ml_t p4ml_bg; 10 | 11 | // header entry_t entry[MAX_ENTRIES_PER_PACKET]; 12 | header entry_t p4ml_entries; 13 | 14 | /************************************************************************* 15 | *********************** P A R S E R *********************************** 16 | *************************************************************************/ 17 | 18 | parser start { 19 | extract(ethernet); 20 | return select(ethernet.etherType) { 21 | 0x0700 : parse_ipv4; 22 | 0x0800 : parse_rdma; 23 | 0x0900 : parse_bg; 24 | default : ingress; 25 | } 26 | } 27 | 28 | parser parse_ipv4 { 29 | extract(ipv4); 30 | return parse_p4ml; 31 | } 32 | 33 | parser parse_udp { 34 | extract(udp); 35 | return parse_p4ml; 36 | } 37 | 38 | parser parse_p4ml { 39 | extract(p4ml); 40 | set_metadata(mdata.qdepth, 0); 41 | // return ingress; 42 | return parse_entry; 43 | } 44 | 45 | parser parse_entry { 46 | extract(p4ml_entries); 47 | return ingress; 48 | } 49 | 50 | /* RDMA */ 51 | parser parse_rdma { 52 | extract(ipv4); 53 | return ingress; 54 | } 55 | 56 | /* BG */ 57 | parser parse_bg { 58 | extract(ipv4); 59 | return parse_udp_bg; 60 | } 61 | 62 | parser parse_udp_bg { 63 | extract(udp); 64 | return parse_p4ml_bg; 65 | } 66 | 67 | parser parse_p4ml_bg { 68 | extract(p4ml_bg); 69 | set_metadata(mdata.qdepth, 0); 70 | // return ingress; 71 | return ingress; 72 | } 73 | -------------------------------------------------------------------------------- /p4src/includes/registers.p4: -------------------------------------------------------------------------------- 1 | 2 | register dqueue_alert_threshold { 3 | width : 32; 4 | instance_count : 1; 5 | } 6 | 7 | // last_ack x appID 8 | register loss_counter { 9 | width : 32; 10 | instance_count : 1; 11 | } 12 | 13 | register bitmap { 14 | width : 32; 15 | instance_count : 10000; 16 | } 17 | 18 | register appID_and_Seq { 19 | width : 32; 20 | instance_count : 10000; 21 | } 22 | 23 | register agtr_time { 24 | width : 32; 25 | instance_count : 10000; 26 | } 27 | 28 | register register1 { 29 | width : 32; 30 | instance_count : 10000; 31 | attributes: signed; 32 | // 32 * 32 33 | } 34 | 35 | register register2 { 36 | width : 32; 37 | instance_count : 10000; 38 | attributes: signed; 39 | // 32 * 32 40 | } 41 | 42 | register register3 { 43 | width : 32; 44 | instance_count : 10000; 45 | attributes: signed; 46 | // 32 * 32 47 | } 48 | 49 | register register4 { 50 | width : 32; 51 | instance_count : 10000; 52 | attributes: signed; 53 | // 32 * 32 54 | } 55 | 56 | register register5 { 57 | width : 32; 58 | instance_count : 10000; 59 | attributes: signed; 60 | // 32 * 32 61 | } 62 | 63 | register register6 { 64 | width : 32; 65 | instance_count : 10000; 66 | attributes: signed; 67 | // 32 * 32 68 | } 69 | 70 | register register7 { 71 | width : 32; 72 | instance_count : 10000; 73 | attributes: signed; 74 | // 32 * 32 75 | } 76 | 77 | register register8 { 78 | width : 32; 79 | instance_count : 10000; 80 | attributes: signed; 81 | // 32 * 32 82 | } 83 | 84 | register register9 { 85 | width : 32; 86 | instance_count : 10000; 87 | attributes: signed; 88 | // 32 * 32 89 | } 90 | 91 | register register10 { 92 | width : 32; 93 | instance_count : 10000; 94 | attributes: signed; 95 | // 32 * 32 96 | } 97 | 98 | register register11 { 99 | width : 32; 100 | instance_count : 10000; 101 | attributes: signed; 102 | // 32 * 32 103 | } 104 | 105 | register register12 { 106 | width : 32; 107 | instance_count : 10000; 108 | attributes: signed; 109 | // 32 * 32 110 | } 111 | 112 | register register13 { 113 | width : 32; 114 | instance_count : 10000; 115 | attributes: signed; 116 | // 32 * 32 117 | } 118 | 119 | register register14 { 120 | width : 32; 121 | instance_count : 10000; 122 | attributes: signed; 123 | // 32 * 32 124 | } 125 | 126 | register register15 { 127 | width : 32; 128 | instance_count : 10000; 129 | attributes: signed; 130 | // 32 * 32 131 | } 132 | 133 | register register16 { 134 | width : 32; 135 | instance_count : 10000; 136 | attributes: signed; 137 | // 32 * 32 138 | } 139 | 140 | register register17 { 141 | width : 32; 142 | instance_count : 10000; 143 | attributes: signed; 144 | // 32 * 32 145 | } 146 | 147 | register register18 { 148 | width : 32; 149 | instance_count : 10000; 150 | attributes: signed; 151 | // 32 * 32 152 | } 153 | 154 | register register19 { 155 | width : 32; 156 | instance_count : 10000; 157 | attributes: signed; 158 | // 32 * 32 159 | } 160 | 161 | register register20 { 162 | width : 32; 163 | instance_count : 10000; 164 | attributes: signed; 165 | // 32 * 32 166 | } 167 | 168 | register register21 { 169 | width : 32; 170 | instance_count : 10000; 171 | attributes: signed; 172 | // 32 * 32 173 | } 174 | 175 | register register22 { 176 | width : 32; 177 | instance_count : 10000; 178 | attributes: signed; 179 | // 32 * 32 180 | } 181 | 182 | register register23 { 183 | width : 32; 184 | instance_count : 10000; 185 | attributes: signed; 186 | // 32 * 32 187 | } 188 | 189 | register register24 { 190 | width : 32; 191 | instance_count : 10000; 192 | attributes: signed; 193 | // 32 * 32 194 | } 195 | 196 | register register25 { 197 | width : 32; 198 | instance_count : 10000; 199 | attributes: signed; 200 | // 32 * 32 201 | } 202 | 203 | register register26 { 204 | width : 32; 205 | instance_count : 10000; 206 | attributes: signed; 207 | // 32 * 32 208 | } 209 | 210 | register register27 { 211 | width : 32; 212 | instance_count : 10000; 213 | attributes: signed; 214 | // 32 * 32 215 | } 216 | 217 | register register28 { 218 | width : 32; 219 | instance_count : 10000; 220 | attributes: signed; 221 | // 32 * 32 222 | } 223 | 224 | register register29 { 225 | width : 32; 226 | instance_count : 10000; 227 | attributes: signed; 228 | // 32 * 32 229 | } 230 | 231 | register register30 { 232 | width : 32; 233 | instance_count : 10000; 234 | attributes: signed; 235 | // 32 * 32 236 | } 237 | 238 | register register31 { 239 | width : 32; 240 | instance_count : 10000; 241 | attributes: signed; 242 | // 32 * 32 243 | } 244 | 245 | register register32 { 246 | width : 32; 247 | instance_count : 10000; 248 | attributes: signed; 249 | // 32 * 32 250 | } 251 | 252 | // Agg and write to packet 253 | blackbox stateful_alu write_data_entry1 { 254 | reg: register1; 255 | // if a new bitmap is coming, clear the register 256 | condition_lo : mdata.bitmap == 0; 257 | 258 | update_lo_1_predicate : condition_lo; 259 | update_lo_1_value : p4ml_entries.data0; 260 | 261 | update_lo_2_predicate : not condition_lo; 262 | update_lo_2_value : register_lo + p4ml_entries.data0; 263 | 264 | } 265 | 266 | // Write to packet 267 | blackbox stateful_alu write_read_data_entry1 { 268 | reg: register1; 269 | 270 | condition_lo : mdata.bitmap == 0; 271 | 272 | update_lo_1_predicate : condition_lo; 273 | update_lo_1_value : p4ml_entries.data0; 274 | 275 | update_lo_2_predicate : not condition_lo; 276 | update_lo_2_value : register_lo + p4ml_entries.data0; 277 | 278 | output_dst : p4ml_entries.data0; 279 | output_value : alu_lo; 280 | } 281 | 282 | blackbox stateful_alu read_data_entry1 { 283 | reg: register1; 284 | 285 | output_dst : p4ml_entries.data0; 286 | output_value : register_lo; 287 | } 288 | 289 | blackbox stateful_alu write_data_entry2 { 290 | reg: register2; 291 | 292 | condition_lo : mdata.bitmap == 0; 293 | update_lo_1_predicate : condition_lo; 294 | update_lo_1_value : p4ml_entries.data1; 295 | 296 | update_lo_2_predicate : not condition_lo; 297 | update_lo_2_value : register_lo + p4ml_entries.data1; 298 | } 299 | 300 | blackbox stateful_alu write_read_data_entry2 { 301 | reg: register2; 302 | 303 | condition_lo : mdata.bitmap == 0; 304 | 305 | update_lo_1_predicate : condition_lo; 306 | update_lo_1_value : p4ml_entries.data1; 307 | 308 | update_lo_2_predicate : not condition_lo; 309 | update_lo_2_value : register_lo + p4ml_entries.data1; 310 | 311 | output_dst : p4ml_entries.data1; 312 | output_value : alu_lo; 313 | } 314 | 315 | blackbox stateful_alu read_data_entry2 { 316 | reg: register2; 317 | 318 | output_dst : p4ml_entries.data1; 319 | output_value : register_lo; 320 | } 321 | 322 | blackbox stateful_alu write_data_entry3 { 323 | reg: register3; 324 | 325 | condition_lo : mdata.bitmap == 0; 326 | update_lo_1_predicate : condition_lo; 327 | update_lo_1_value : p4ml_entries.data2; 328 | 329 | update_lo_2_predicate : not condition_lo; 330 | update_lo_2_value : register_lo + p4ml_entries.data2; 331 | } 332 | 333 | blackbox stateful_alu write_read_data_entry3 { 334 | reg: register3; 335 | 336 | condition_lo : mdata.bitmap == 0; 337 | 338 | update_lo_1_predicate : condition_lo; 339 | update_lo_1_value : p4ml_entries.data2; 340 | 341 | update_lo_2_predicate : not condition_lo; 342 | update_lo_2_value : register_lo + p4ml_entries.data2; 343 | 344 | output_dst : p4ml_entries.data2; 345 | output_value : alu_lo; 346 | } 347 | 348 | blackbox stateful_alu read_data_entry3 { 349 | reg: register3; 350 | 351 | output_dst : p4ml_entries.data2; 352 | output_value : register_lo; 353 | } 354 | 355 | blackbox stateful_alu write_data_entry4 { 356 | reg: register4; 357 | 358 | condition_lo : mdata.bitmap == 0; 359 | update_lo_1_predicate : condition_lo; 360 | update_lo_1_value : p4ml_entries.data3; 361 | 362 | update_lo_2_predicate : not condition_lo; 363 | update_lo_2_value : register_lo + p4ml_entries.data3; 364 | } 365 | 366 | blackbox stateful_alu write_read_data_entry4 { 367 | reg: register4; 368 | 369 | condition_lo : mdata.bitmap == 0; 370 | 371 | update_lo_1_predicate : condition_lo; 372 | update_lo_1_value : p4ml_entries.data3; 373 | 374 | update_lo_2_predicate : not condition_lo; 375 | update_lo_2_value : register_lo + p4ml_entries.data3; 376 | 377 | output_dst : p4ml_entries.data3; 378 | output_value : alu_lo; 379 | } 380 | 381 | blackbox stateful_alu read_data_entry4 { 382 | reg: register4; 383 | 384 | output_dst : p4ml_entries.data3; 385 | output_value : register_lo; 386 | } 387 | 388 | blackbox stateful_alu write_data_entry5 { 389 | reg: register5; 390 | 391 | condition_lo : mdata.bitmap == 0; 392 | update_lo_1_predicate : condition_lo; 393 | update_lo_1_value : p4ml_entries.data4; 394 | 395 | update_lo_2_predicate : not condition_lo; 396 | update_lo_2_value : register_lo + p4ml_entries.data4; 397 | } 398 | 399 | blackbox stateful_alu write_read_data_entry5 { 400 | reg: register5; 401 | 402 | condition_lo : mdata.bitmap == 0; 403 | 404 | update_lo_1_predicate : condition_lo; 405 | update_lo_1_value : p4ml_entries.data4; 406 | 407 | update_lo_2_predicate : not condition_lo; 408 | update_lo_2_value : register_lo + p4ml_entries.data4; 409 | 410 | output_dst : p4ml_entries.data4; 411 | output_value : alu_lo; 412 | } 413 | 414 | blackbox stateful_alu read_data_entry5 { 415 | reg: register5; 416 | 417 | output_dst : p4ml_entries.data4; 418 | output_value : register_lo; 419 | } 420 | 421 | blackbox stateful_alu write_data_entry6 { 422 | reg: register6; 423 | 424 | condition_lo : mdata.bitmap == 0; 425 | update_lo_1_predicate : condition_lo; 426 | update_lo_1_value : p4ml_entries.data5; 427 | 428 | update_lo_2_predicate : not condition_lo; 429 | update_lo_2_value : register_lo + p4ml_entries.data5; 430 | } 431 | 432 | blackbox stateful_alu write_read_data_entry6 { 433 | reg: register6; 434 | 435 | condition_lo : mdata.bitmap == 0; 436 | 437 | update_lo_1_predicate : condition_lo; 438 | update_lo_1_value : p4ml_entries.data5; 439 | 440 | update_lo_2_predicate : not condition_lo; 441 | update_lo_2_value : register_lo + p4ml_entries.data5; 442 | 443 | output_dst : p4ml_entries.data5; 444 | output_value : alu_lo; 445 | } 446 | 447 | blackbox stateful_alu read_data_entry6 { 448 | reg: register6; 449 | 450 | output_dst : p4ml_entries.data5; 451 | output_value : register_lo; 452 | } 453 | 454 | blackbox stateful_alu write_data_entry7 { 455 | reg: register7; 456 | 457 | condition_lo : mdata.bitmap == 0; 458 | update_lo_1_predicate : condition_lo; 459 | update_lo_1_value : p4ml_entries.data6; 460 | 461 | update_lo_2_predicate : not condition_lo; 462 | update_lo_2_value : register_lo + p4ml_entries.data6; 463 | } 464 | 465 | blackbox stateful_alu write_read_data_entry7 { 466 | reg: register7; 467 | 468 | condition_lo : mdata.bitmap == 0; 469 | 470 | update_lo_1_predicate : condition_lo; 471 | update_lo_1_value : p4ml_entries.data6; 472 | 473 | update_lo_2_predicate : not condition_lo; 474 | update_lo_2_value : register_lo + p4ml_entries.data6; 475 | 476 | output_dst : p4ml_entries.data6; 477 | output_value : alu_lo; 478 | } 479 | 480 | blackbox stateful_alu read_data_entry7 { 481 | reg: register7; 482 | 483 | output_dst : p4ml_entries.data6; 484 | output_value : register_lo; 485 | } 486 | 487 | blackbox stateful_alu write_data_entry8 { 488 | reg: register8; 489 | 490 | condition_lo : mdata.bitmap == 0; 491 | update_lo_1_predicate : condition_lo; 492 | update_lo_1_value : p4ml_entries.data7; 493 | 494 | update_lo_2_predicate : not condition_lo; 495 | update_lo_2_value : register_lo + p4ml_entries.data7; 496 | } 497 | 498 | blackbox stateful_alu write_read_data_entry8 { 499 | reg: register8; 500 | 501 | condition_lo : mdata.bitmap == 0; 502 | 503 | update_lo_1_predicate : condition_lo; 504 | update_lo_1_value : p4ml_entries.data7; 505 | 506 | update_lo_2_predicate : not condition_lo; 507 | update_lo_2_value : register_lo + p4ml_entries.data7; 508 | 509 | output_dst : p4ml_entries.data7; 510 | output_value : alu_lo; 511 | } 512 | 513 | blackbox stateful_alu read_data_entry8 { 514 | reg: register8; 515 | 516 | output_dst : p4ml_entries.data7; 517 | output_value : register_lo; 518 | } 519 | 520 | blackbox stateful_alu write_data_entry9 { 521 | reg: register9; 522 | 523 | condition_lo : mdata.bitmap == 0; 524 | update_lo_1_predicate : condition_lo; 525 | update_lo_1_value : p4ml_entries.data8; 526 | 527 | update_lo_2_predicate : not condition_lo; 528 | update_lo_2_value : register_lo + p4ml_entries.data8; 529 | } 530 | 531 | blackbox stateful_alu write_read_data_entry9 { 532 | reg: register9; 533 | 534 | condition_lo : mdata.bitmap == 0; 535 | 536 | update_lo_1_predicate : condition_lo; 537 | update_lo_1_value : p4ml_entries.data8; 538 | 539 | update_lo_2_predicate : not condition_lo; 540 | update_lo_2_value : register_lo + p4ml_entries.data8; 541 | 542 | output_dst : p4ml_entries.data8; 543 | output_value : alu_lo; 544 | } 545 | 546 | blackbox stateful_alu read_data_entry9 { 547 | reg: register9; 548 | 549 | output_dst : p4ml_entries.data8; 550 | output_value : register_lo; 551 | } 552 | 553 | blackbox stateful_alu write_data_entry10 { 554 | reg: register10; 555 | 556 | condition_lo : mdata.bitmap == 0; 557 | update_lo_1_predicate : condition_lo; 558 | update_lo_1_value : p4ml_entries.data9; 559 | 560 | update_lo_2_predicate : not condition_lo; 561 | update_lo_2_value : register_lo + p4ml_entries.data9; 562 | } 563 | 564 | blackbox stateful_alu write_read_data_entry10 { 565 | reg: register10; 566 | 567 | condition_lo : mdata.bitmap == 0; 568 | 569 | update_lo_1_predicate : condition_lo; 570 | update_lo_1_value : p4ml_entries.data9; 571 | 572 | update_lo_2_predicate : not condition_lo; 573 | update_lo_2_value : register_lo + p4ml_entries.data9; 574 | 575 | output_dst : p4ml_entries.data9; 576 | output_value : alu_lo; 577 | } 578 | 579 | blackbox stateful_alu read_data_entry10 { 580 | reg: register10; 581 | 582 | output_dst : p4ml_entries.data9; 583 | output_value : register_lo; 584 | } 585 | 586 | blackbox stateful_alu write_data_entry11 { 587 | reg: register11; 588 | 589 | condition_lo : mdata.bitmap == 0; 590 | update_lo_1_predicate : condition_lo; 591 | update_lo_1_value : p4ml_entries.data10; 592 | 593 | update_lo_2_predicate : not condition_lo; 594 | update_lo_2_value : register_lo + p4ml_entries.data10; 595 | } 596 | 597 | blackbox stateful_alu write_read_data_entry11 { 598 | reg: register11; 599 | 600 | condition_lo : mdata.bitmap == 0; 601 | 602 | update_lo_1_predicate : condition_lo; 603 | update_lo_1_value : p4ml_entries.data10; 604 | 605 | update_lo_2_predicate : not condition_lo; 606 | update_lo_2_value : register_lo + p4ml_entries.data10; 607 | 608 | output_dst : p4ml_entries.data10; 609 | output_value : alu_lo; 610 | } 611 | 612 | blackbox stateful_alu read_data_entry11 { 613 | reg: register11; 614 | 615 | output_dst : p4ml_entries.data10; 616 | output_value : register_lo; 617 | } 618 | 619 | blackbox stateful_alu write_data_entry12 { 620 | reg: register12; 621 | 622 | condition_lo : mdata.bitmap == 0; 623 | update_lo_1_predicate : condition_lo; 624 | update_lo_1_value : p4ml_entries.data11; 625 | 626 | update_lo_2_predicate : not condition_lo; 627 | update_lo_2_value : register_lo + p4ml_entries.data11; 628 | } 629 | 630 | blackbox stateful_alu write_read_data_entry12 { 631 | reg: register12; 632 | 633 | condition_lo : mdata.bitmap == 0; 634 | 635 | update_lo_1_predicate : condition_lo; 636 | update_lo_1_value : p4ml_entries.data11; 637 | 638 | update_lo_2_predicate : not condition_lo; 639 | update_lo_2_value : register_lo + p4ml_entries.data11; 640 | 641 | output_dst : p4ml_entries.data11; 642 | output_value : alu_lo; 643 | } 644 | 645 | blackbox stateful_alu read_data_entry12 { 646 | reg: register12; 647 | 648 | output_dst : p4ml_entries.data11; 649 | output_value : register_lo; 650 | } 651 | 652 | blackbox stateful_alu write_data_entry13 { 653 | reg: register13; 654 | 655 | condition_lo : mdata.bitmap == 0; 656 | update_lo_1_predicate : condition_lo; 657 | update_lo_1_value : p4ml_entries.data12; 658 | 659 | update_lo_2_predicate : not condition_lo; 660 | update_lo_2_value : register_lo + p4ml_entries.data12; 661 | } 662 | 663 | blackbox stateful_alu write_read_data_entry13 { 664 | reg: register13; 665 | 666 | condition_lo : mdata.bitmap == 0; 667 | 668 | update_lo_1_predicate : condition_lo; 669 | update_lo_1_value : p4ml_entries.data12; 670 | 671 | update_lo_2_predicate : not condition_lo; 672 | update_lo_2_value : register_lo + p4ml_entries.data12; 673 | 674 | output_dst : p4ml_entries.data12; 675 | output_value : alu_lo; 676 | } 677 | 678 | blackbox stateful_alu read_data_entry13 { 679 | reg: register13; 680 | 681 | output_dst : p4ml_entries.data12; 682 | output_value : register_lo; 683 | } 684 | 685 | blackbox stateful_alu write_data_entry14 { 686 | reg: register14; 687 | 688 | condition_lo : mdata.bitmap == 0; 689 | update_lo_1_predicate : condition_lo; 690 | update_lo_1_value : p4ml_entries.data13; 691 | 692 | update_lo_2_predicate : not condition_lo; 693 | update_lo_2_value : register_lo + p4ml_entries.data13; 694 | } 695 | 696 | blackbox stateful_alu write_read_data_entry14 { 697 | reg: register14; 698 | 699 | condition_lo : mdata.bitmap == 0; 700 | 701 | update_lo_1_predicate : condition_lo; 702 | update_lo_1_value : p4ml_entries.data13; 703 | 704 | update_lo_2_predicate : not condition_lo; 705 | update_lo_2_value : register_lo + p4ml_entries.data13; 706 | 707 | output_dst : p4ml_entries.data13; 708 | output_value : alu_lo; 709 | } 710 | 711 | blackbox stateful_alu read_data_entry14 { 712 | reg: register14; 713 | 714 | output_dst : p4ml_entries.data13; 715 | output_value : register_lo; 716 | } 717 | 718 | blackbox stateful_alu write_data_entry15 { 719 | reg: register15; 720 | 721 | condition_lo : mdata.bitmap == 0; 722 | update_lo_1_predicate : condition_lo; 723 | update_lo_1_value : p4ml_entries.data14; 724 | 725 | update_lo_2_predicate : not condition_lo; 726 | update_lo_2_value : register_lo + p4ml_entries.data14; 727 | } 728 | 729 | blackbox stateful_alu write_read_data_entry15 { 730 | reg: register15; 731 | 732 | condition_lo : mdata.bitmap == 0; 733 | 734 | update_lo_1_predicate : condition_lo; 735 | update_lo_1_value : p4ml_entries.data14; 736 | 737 | update_lo_2_predicate : not condition_lo; 738 | update_lo_2_value : register_lo + p4ml_entries.data14; 739 | 740 | output_dst : p4ml_entries.data14; 741 | output_value : alu_lo; 742 | } 743 | 744 | blackbox stateful_alu read_data_entry15 { 745 | reg: register15; 746 | 747 | output_dst : p4ml_entries.data14; 748 | output_value : register_lo; 749 | } 750 | 751 | blackbox stateful_alu write_data_entry16 { 752 | reg: register16; 753 | 754 | condition_lo : mdata.bitmap == 0; 755 | update_lo_1_predicate : condition_lo; 756 | update_lo_1_value : p4ml_entries.data15; 757 | 758 | update_lo_2_predicate : not condition_lo; 759 | update_lo_2_value : register_lo + p4ml_entries.data15; 760 | } 761 | 762 | blackbox stateful_alu write_read_data_entry16 { 763 | reg: register16; 764 | 765 | condition_lo : mdata.bitmap == 0; 766 | 767 | update_lo_1_predicate : condition_lo; 768 | update_lo_1_value : p4ml_entries.data15; 769 | 770 | update_lo_2_predicate : not condition_lo; 771 | update_lo_2_value : register_lo + p4ml_entries.data15; 772 | 773 | output_dst : p4ml_entries.data15; 774 | output_value : alu_lo; 775 | } 776 | 777 | blackbox stateful_alu read_data_entry16 { 778 | reg: register16; 779 | 780 | output_dst : p4ml_entries.data15; 781 | output_value : register_lo; 782 | } 783 | 784 | blackbox stateful_alu write_data_entry17 { 785 | reg: register17; 786 | 787 | condition_lo : mdata.bitmap == 0; 788 | update_lo_1_predicate : condition_lo; 789 | update_lo_1_value : p4ml_entries.data16; 790 | 791 | update_lo_2_predicate : not condition_lo; 792 | update_lo_2_value : register_lo + p4ml_entries.data16; 793 | } 794 | 795 | blackbox stateful_alu write_read_data_entry17 { 796 | reg: register17; 797 | 798 | condition_lo : mdata.bitmap == 0; 799 | 800 | update_lo_1_predicate : condition_lo; 801 | update_lo_1_value : p4ml_entries.data16; 802 | 803 | update_lo_2_predicate : not condition_lo; 804 | update_lo_2_value : register_lo + p4ml_entries.data16; 805 | 806 | output_dst : p4ml_entries.data16; 807 | output_value : alu_lo; 808 | } 809 | 810 | blackbox stateful_alu read_data_entry17 { 811 | reg: register17; 812 | 813 | output_dst : p4ml_entries.data16; 814 | output_value : register_lo; 815 | } 816 | 817 | blackbox stateful_alu write_data_entry18 { 818 | reg: register18; 819 | 820 | condition_lo : mdata.bitmap == 0; 821 | update_lo_1_predicate : condition_lo; 822 | update_lo_1_value : p4ml_entries.data17; 823 | 824 | update_lo_2_predicate : not condition_lo; 825 | update_lo_2_value : register_lo + p4ml_entries.data17; 826 | } 827 | 828 | blackbox stateful_alu write_read_data_entry18 { 829 | reg: register18; 830 | 831 | condition_lo : mdata.bitmap == 0; 832 | 833 | update_lo_1_predicate : condition_lo; 834 | update_lo_1_value : p4ml_entries.data17; 835 | 836 | update_lo_2_predicate : not condition_lo; 837 | update_lo_2_value : register_lo + p4ml_entries.data17; 838 | 839 | output_dst : p4ml_entries.data17; 840 | output_value : alu_lo; 841 | } 842 | 843 | blackbox stateful_alu read_data_entry18 { 844 | reg: register18; 845 | 846 | output_dst : p4ml_entries.data17; 847 | output_value : register_lo; 848 | } 849 | 850 | blackbox stateful_alu write_data_entry19 { 851 | reg: register19; 852 | 853 | condition_lo : mdata.bitmap == 0; 854 | update_lo_1_predicate : condition_lo; 855 | update_lo_1_value : p4ml_entries.data18; 856 | 857 | update_lo_2_predicate : not condition_lo; 858 | update_lo_2_value : register_lo + p4ml_entries.data18; 859 | } 860 | 861 | blackbox stateful_alu write_read_data_entry19 { 862 | reg: register19; 863 | 864 | condition_lo : mdata.bitmap == 0; 865 | 866 | update_lo_1_predicate : condition_lo; 867 | update_lo_1_value : p4ml_entries.data18; 868 | 869 | update_lo_2_predicate : not condition_lo; 870 | update_lo_2_value : register_lo + p4ml_entries.data18; 871 | 872 | output_dst : p4ml_entries.data18; 873 | output_value : alu_lo; 874 | } 875 | 876 | blackbox stateful_alu read_data_entry19 { 877 | reg: register19; 878 | 879 | output_dst : p4ml_entries.data18; 880 | output_value : register_lo; 881 | } 882 | 883 | blackbox stateful_alu write_data_entry20 { 884 | reg: register20; 885 | 886 | condition_lo : mdata.bitmap == 0; 887 | update_lo_1_predicate : condition_lo; 888 | update_lo_1_value : p4ml_entries.data19; 889 | 890 | update_lo_2_predicate : not condition_lo; 891 | update_lo_2_value : register_lo + p4ml_entries.data19; 892 | } 893 | 894 | blackbox stateful_alu write_read_data_entry20 { 895 | reg: register20; 896 | 897 | condition_lo : mdata.bitmap == 0; 898 | 899 | update_lo_1_predicate : condition_lo; 900 | update_lo_1_value : p4ml_entries.data19; 901 | 902 | update_lo_2_predicate : not condition_lo; 903 | update_lo_2_value : register_lo + p4ml_entries.data19; 904 | 905 | output_dst : p4ml_entries.data19; 906 | output_value : alu_lo; 907 | } 908 | 909 | blackbox stateful_alu read_data_entry20 { 910 | reg: register20; 911 | 912 | output_dst : p4ml_entries.data19; 913 | output_value : register_lo; 914 | } 915 | 916 | blackbox stateful_alu write_data_entry21 { 917 | reg: register21; 918 | 919 | condition_lo : mdata.bitmap == 0; 920 | update_lo_1_predicate : condition_lo; 921 | update_lo_1_value : p4ml_entries.data20; 922 | 923 | update_lo_2_predicate : not condition_lo; 924 | update_lo_2_value : register_lo + p4ml_entries.data20; 925 | } 926 | 927 | blackbox stateful_alu write_read_data_entry21 { 928 | reg: register21; 929 | 930 | condition_lo : mdata.bitmap == 0; 931 | 932 | update_lo_1_predicate : condition_lo; 933 | update_lo_1_value : p4ml_entries.data20; 934 | 935 | update_lo_2_predicate : not condition_lo; 936 | update_lo_2_value : register_lo + p4ml_entries.data20; 937 | 938 | output_dst : p4ml_entries.data20; 939 | output_value : alu_lo; 940 | } 941 | 942 | blackbox stateful_alu read_data_entry21 { 943 | reg: register21; 944 | 945 | output_dst : p4ml_entries.data20; 946 | output_value : register_lo; 947 | } 948 | 949 | blackbox stateful_alu write_data_entry22 { 950 | reg: register22; 951 | 952 | condition_lo : mdata.bitmap == 0; 953 | update_lo_1_predicate : condition_lo; 954 | update_lo_1_value : p4ml_entries.data21; 955 | 956 | update_lo_2_predicate : not condition_lo; 957 | update_lo_2_value : register_lo + p4ml_entries.data21; 958 | } 959 | 960 | blackbox stateful_alu write_read_data_entry22 { 961 | reg: register22; 962 | 963 | condition_lo : mdata.bitmap == 0; 964 | 965 | update_lo_1_predicate : condition_lo; 966 | update_lo_1_value : p4ml_entries.data21; 967 | 968 | update_lo_2_predicate : not condition_lo; 969 | update_lo_2_value : register_lo + p4ml_entries.data21; 970 | 971 | output_dst : p4ml_entries.data21; 972 | output_value : alu_lo; 973 | } 974 | 975 | blackbox stateful_alu read_data_entry22 { 976 | reg: register22; 977 | 978 | output_dst : p4ml_entries.data21; 979 | output_value : register_lo; 980 | } 981 | 982 | blackbox stateful_alu write_data_entry23 { 983 | reg: register23; 984 | 985 | condition_lo : mdata.bitmap == 0; 986 | update_lo_1_predicate : condition_lo; 987 | update_lo_1_value : p4ml_entries.data22; 988 | 989 | update_lo_2_predicate : not condition_lo; 990 | update_lo_2_value : register_lo + p4ml_entries.data22; 991 | } 992 | 993 | blackbox stateful_alu write_read_data_entry23 { 994 | reg: register23; 995 | 996 | condition_lo : mdata.bitmap == 0; 997 | 998 | update_lo_1_predicate : condition_lo; 999 | update_lo_1_value : p4ml_entries.data22; 1000 | 1001 | update_lo_2_predicate : not condition_lo; 1002 | update_lo_2_value : register_lo + p4ml_entries.data22; 1003 | 1004 | output_dst : p4ml_entries.data22; 1005 | output_value : alu_lo; 1006 | } 1007 | 1008 | blackbox stateful_alu read_data_entry23 { 1009 | reg: register23; 1010 | 1011 | output_dst : p4ml_entries.data22; 1012 | output_value : register_lo; 1013 | } 1014 | 1015 | blackbox stateful_alu write_data_entry24 { 1016 | reg: register24; 1017 | 1018 | condition_lo : mdata.bitmap == 0; 1019 | update_lo_1_predicate : condition_lo; 1020 | update_lo_1_value : p4ml_entries.data23; 1021 | 1022 | update_lo_2_predicate : not condition_lo; 1023 | update_lo_2_value : register_lo + p4ml_entries.data23; 1024 | } 1025 | 1026 | blackbox stateful_alu write_read_data_entry24 { 1027 | reg: register24; 1028 | 1029 | condition_lo : mdata.bitmap == 0; 1030 | 1031 | update_lo_1_predicate : condition_lo; 1032 | update_lo_1_value : p4ml_entries.data23; 1033 | 1034 | update_lo_2_predicate : not condition_lo; 1035 | update_lo_2_value : register_lo + p4ml_entries.data23; 1036 | 1037 | output_dst : p4ml_entries.data23; 1038 | output_value : alu_lo; 1039 | } 1040 | 1041 | blackbox stateful_alu read_data_entry24 { 1042 | reg: register24; 1043 | 1044 | output_dst : p4ml_entries.data23; 1045 | output_value : register_lo; 1046 | } 1047 | 1048 | blackbox stateful_alu write_data_entry25 { 1049 | reg: register25; 1050 | 1051 | condition_lo : mdata.bitmap == 0; 1052 | update_lo_1_predicate : condition_lo; 1053 | update_lo_1_value : p4ml_entries.data24; 1054 | 1055 | update_lo_2_predicate : not condition_lo; 1056 | update_lo_2_value : register_lo + p4ml_entries.data24; 1057 | } 1058 | 1059 | blackbox stateful_alu write_read_data_entry25 { 1060 | reg: register25; 1061 | 1062 | condition_lo : mdata.bitmap == 0; 1063 | 1064 | update_lo_1_predicate : condition_lo; 1065 | update_lo_1_value : p4ml_entries.data24; 1066 | 1067 | update_lo_2_predicate : not condition_lo; 1068 | update_lo_2_value : register_lo + p4ml_entries.data24; 1069 | 1070 | output_dst : p4ml_entries.data24; 1071 | output_value : alu_lo; 1072 | } 1073 | 1074 | blackbox stateful_alu read_data_entry25 { 1075 | reg: register25; 1076 | 1077 | output_dst : p4ml_entries.data24; 1078 | output_value : register_lo; 1079 | } 1080 | 1081 | blackbox stateful_alu write_data_entry26 { 1082 | reg: register26; 1083 | 1084 | condition_lo : mdata.bitmap == 0; 1085 | update_lo_1_predicate : condition_lo; 1086 | update_lo_1_value : p4ml_entries.data25; 1087 | 1088 | update_lo_2_predicate : not condition_lo; 1089 | update_lo_2_value : register_lo + p4ml_entries.data25; 1090 | } 1091 | 1092 | blackbox stateful_alu write_read_data_entry26 { 1093 | reg: register26; 1094 | 1095 | condition_lo : mdata.bitmap == 0; 1096 | 1097 | update_lo_1_predicate : condition_lo; 1098 | update_lo_1_value : p4ml_entries.data25; 1099 | 1100 | update_lo_2_predicate : not condition_lo; 1101 | update_lo_2_value : register_lo + p4ml_entries.data25; 1102 | 1103 | output_dst : p4ml_entries.data25; 1104 | output_value : alu_lo; 1105 | } 1106 | 1107 | blackbox stateful_alu read_data_entry26 { 1108 | reg: register26; 1109 | 1110 | output_dst : p4ml_entries.data25; 1111 | output_value : register_lo; 1112 | } 1113 | 1114 | blackbox stateful_alu write_data_entry27 { 1115 | reg: register27; 1116 | 1117 | condition_lo : mdata.bitmap == 0; 1118 | update_lo_1_predicate : condition_lo; 1119 | update_lo_1_value : p4ml_entries.data26; 1120 | 1121 | update_lo_2_predicate : not condition_lo; 1122 | update_lo_2_value : register_lo + p4ml_entries.data26; 1123 | } 1124 | 1125 | blackbox stateful_alu write_read_data_entry27 { 1126 | reg: register27; 1127 | 1128 | condition_lo : mdata.bitmap == 0; 1129 | 1130 | update_lo_1_predicate : condition_lo; 1131 | update_lo_1_value : p4ml_entries.data26; 1132 | 1133 | update_lo_2_predicate : not condition_lo; 1134 | update_lo_2_value : register_lo + p4ml_entries.data26; 1135 | 1136 | output_dst : p4ml_entries.data26; 1137 | output_value : alu_lo; 1138 | } 1139 | 1140 | blackbox stateful_alu read_data_entry27 { 1141 | reg: register27; 1142 | 1143 | output_dst : p4ml_entries.data26; 1144 | output_value : register_lo; 1145 | } 1146 | 1147 | blackbox stateful_alu write_data_entry28 { 1148 | reg: register28; 1149 | 1150 | condition_lo : mdata.bitmap == 0; 1151 | update_lo_1_predicate : condition_lo; 1152 | update_lo_1_value : p4ml_entries.data27; 1153 | 1154 | update_lo_2_predicate : not condition_lo; 1155 | update_lo_2_value : register_lo + p4ml_entries.data27; 1156 | } 1157 | 1158 | blackbox stateful_alu write_read_data_entry28 { 1159 | reg: register28; 1160 | 1161 | condition_lo : mdata.bitmap == 0; 1162 | 1163 | update_lo_1_predicate : condition_lo; 1164 | update_lo_1_value : p4ml_entries.data27; 1165 | 1166 | update_lo_2_predicate : not condition_lo; 1167 | update_lo_2_value : register_lo + p4ml_entries.data27; 1168 | 1169 | output_dst : p4ml_entries.data27; 1170 | output_value : alu_lo; 1171 | } 1172 | 1173 | blackbox stateful_alu read_data_entry28 { 1174 | reg: register28; 1175 | 1176 | output_dst : p4ml_entries.data27; 1177 | output_value : register_lo; 1178 | } 1179 | 1180 | blackbox stateful_alu write_data_entry29 { 1181 | reg: register29; 1182 | 1183 | condition_lo : mdata.bitmap == 0; 1184 | update_lo_1_predicate : condition_lo; 1185 | update_lo_1_value : p4ml_entries.data28; 1186 | 1187 | update_lo_2_predicate : not condition_lo; 1188 | update_lo_2_value : register_lo + p4ml_entries.data28; 1189 | } 1190 | 1191 | blackbox stateful_alu write_read_data_entry29 { 1192 | reg: register29; 1193 | 1194 | condition_lo : mdata.bitmap == 0; 1195 | 1196 | update_lo_1_predicate : condition_lo; 1197 | update_lo_1_value : p4ml_entries.data28; 1198 | 1199 | update_lo_2_predicate : not condition_lo; 1200 | update_lo_2_value : register_lo + p4ml_entries.data28; 1201 | 1202 | output_dst : p4ml_entries.data28; 1203 | output_value : alu_lo; 1204 | } 1205 | 1206 | blackbox stateful_alu read_data_entry29 { 1207 | reg: register29; 1208 | 1209 | output_dst : p4ml_entries.data28; 1210 | output_value : register_lo; 1211 | } 1212 | 1213 | blackbox stateful_alu write_data_entry30 { 1214 | reg: register30; 1215 | 1216 | condition_lo : mdata.bitmap == 0; 1217 | update_lo_1_predicate : condition_lo; 1218 | update_lo_1_value : p4ml_entries.data29; 1219 | 1220 | update_lo_2_predicate : not condition_lo; 1221 | update_lo_2_value : register_lo + p4ml_entries.data29; 1222 | } 1223 | 1224 | blackbox stateful_alu write_read_data_entry30 { 1225 | reg: register30; 1226 | 1227 | condition_lo : mdata.bitmap == 0; 1228 | 1229 | update_lo_1_predicate : condition_lo; 1230 | update_lo_1_value : p4ml_entries.data29; 1231 | 1232 | update_lo_2_predicate : not condition_lo; 1233 | update_lo_2_value : register_lo + p4ml_entries.data29; 1234 | 1235 | output_dst : p4ml_entries.data29; 1236 | output_value : alu_lo; 1237 | } 1238 | 1239 | blackbox stateful_alu read_data_entry30 { 1240 | reg: register30; 1241 | 1242 | output_dst : p4ml_entries.data29; 1243 | output_value : register_lo; 1244 | } 1245 | 1246 | blackbox stateful_alu write_data_entry31 { 1247 | reg: register31; 1248 | 1249 | condition_lo : mdata.bitmap == 0; 1250 | update_lo_1_predicate : condition_lo; 1251 | update_lo_1_value : p4ml_entries.data30; 1252 | 1253 | update_lo_2_predicate : not condition_lo; 1254 | update_lo_2_value : register_lo + p4ml_entries.data30; 1255 | } 1256 | 1257 | blackbox stateful_alu write_read_data_entry31 { 1258 | reg: register31; 1259 | 1260 | condition_lo : mdata.bitmap == 0; 1261 | 1262 | update_lo_1_predicate : condition_lo; 1263 | update_lo_1_value : p4ml_entries.data30; 1264 | 1265 | update_lo_2_predicate : not condition_lo; 1266 | update_lo_2_value : register_lo + p4ml_entries.data30; 1267 | 1268 | output_dst : p4ml_entries.data30; 1269 | output_value : alu_lo; 1270 | } 1271 | 1272 | blackbox stateful_alu read_data_entry31 { 1273 | reg: register31; 1274 | 1275 | output_dst : p4ml_entries.data30; 1276 | output_value : register_lo; 1277 | } 1278 | 1279 | blackbox stateful_alu write_data_entry32 { 1280 | reg: register32; 1281 | 1282 | condition_lo : mdata.bitmap == 0; 1283 | update_lo_1_predicate : condition_lo; 1284 | update_lo_1_value : p4ml_entries.data31; 1285 | 1286 | update_lo_2_predicate : not condition_lo; 1287 | update_lo_2_value : register_lo + p4ml_entries.data31; 1288 | } 1289 | 1290 | blackbox stateful_alu write_read_data_entry32 { 1291 | reg: register32; 1292 | 1293 | condition_lo : mdata.bitmap == 0; 1294 | 1295 | update_lo_1_predicate : condition_lo; 1296 | update_lo_1_value : p4ml_entries.data31; 1297 | 1298 | update_lo_2_predicate : not condition_lo; 1299 | update_lo_2_value : register_lo + p4ml_entries.data31; 1300 | 1301 | output_dst : p4ml_entries.data31; 1302 | output_value : alu_lo; 1303 | } 1304 | 1305 | blackbox stateful_alu read_data_entry32 { 1306 | reg: register32; 1307 | 1308 | output_dst : p4ml_entries.data31; 1309 | output_value : register_lo; 1310 | } 1311 | -------------------------------------------------------------------------------- /p4src/includes/tables.p4: -------------------------------------------------------------------------------- 1 | 2 | @pragma stage 4 3 | table processEntry1 { 4 | actions { 5 | processentry1; 6 | } 7 | default_action : processentry1(); 8 | size : 1; 9 | } 10 | 11 | @pragma stage 4 12 | table Entry1WriteToPacket { 13 | actions { 14 | entry1WriteToPacket; 15 | } 16 | default_action : entry1WriteToPacket(); 17 | size : 1; 18 | } 19 | 20 | @pragma stage 4 21 | table processEntry1andWriteToPacket { 22 | actions { 23 | processentry1andWriteToPacket; 24 | } 25 | default_action : processentry1andWriteToPacket(); 26 | size : 1; 27 | } 28 | 29 | table processEntry2 { 30 | actions { 31 | processentry2; 32 | } 33 | default_action : processentry2(); 34 | size : 1; 35 | } 36 | 37 | table Entry2WriteToPacket { 38 | actions { 39 | entry2WriteToPacket; 40 | } 41 | default_action : entry2WriteToPacket(); 42 | size : 1; 43 | } 44 | 45 | table processEntry2andWriteToPacket { 46 | actions { 47 | processentry2andWriteToPacket; 48 | } 49 | default_action : processentry2andWriteToPacket(); 50 | size : 1; 51 | } 52 | 53 | table processEntry3 { 54 | actions { 55 | processentry3; 56 | } 57 | default_action : processentry3(); 58 | size : 1; 59 | } 60 | 61 | table Entry3WriteToPacket { 62 | actions { 63 | entry3WriteToPacket; 64 | } 65 | default_action : entry3WriteToPacket(); 66 | size : 1; 67 | } 68 | 69 | table processEntry3andWriteToPacket { 70 | actions { 71 | processentry3andWriteToPacket; 72 | } 73 | default_action : processentry3andWriteToPacket(); 74 | size : 1; 75 | } 76 | 77 | table processEntry4 { 78 | actions { 79 | processentry4; 80 | } 81 | default_action : processentry4(); 82 | size : 1; 83 | } 84 | 85 | table Entry4WriteToPacket { 86 | actions { 87 | entry4WriteToPacket; 88 | } 89 | default_action : entry4WriteToPacket(); 90 | size : 1; 91 | } 92 | 93 | table processEntry4andWriteToPacket { 94 | actions { 95 | processentry4andWriteToPacket; 96 | } 97 | default_action : processentry4andWriteToPacket(); 98 | size : 1; 99 | } 100 | 101 | table processEntry5 { 102 | actions { 103 | processentry5; 104 | } 105 | default_action : processentry5(); 106 | size : 1; 107 | } 108 | 109 | table Entry5WriteToPacket { 110 | actions { 111 | entry5WriteToPacket; 112 | } 113 | default_action : entry5WriteToPacket(); 114 | size : 1; 115 | } 116 | 117 | table processEntry5andWriteToPacket { 118 | actions { 119 | processentry5andWriteToPacket; 120 | } 121 | default_action : processentry5andWriteToPacket(); 122 | size : 1; 123 | } 124 | 125 | table processEntry6 { 126 | actions { 127 | processentry6; 128 | } 129 | default_action : processentry6(); 130 | size : 1; 131 | } 132 | 133 | table Entry6WriteToPacket { 134 | actions { 135 | entry6WriteToPacket; 136 | } 137 | default_action : entry6WriteToPacket(); 138 | size : 1; 139 | } 140 | 141 | table processEntry6andWriteToPacket { 142 | actions { 143 | processentry6andWriteToPacket; 144 | } 145 | default_action : processentry6andWriteToPacket(); 146 | size : 1; 147 | } 148 | 149 | table processEntry7 { 150 | actions { 151 | processentry7; 152 | } 153 | default_action : processentry7(); 154 | size : 1; 155 | } 156 | 157 | table Entry7WriteToPacket { 158 | actions { 159 | entry7WriteToPacket; 160 | } 161 | default_action : entry7WriteToPacket(); 162 | size : 1; 163 | } 164 | 165 | table processEntry7andWriteToPacket { 166 | actions { 167 | processentry7andWriteToPacket; 168 | } 169 | default_action : processentry7andWriteToPacket(); 170 | size : 1; 171 | } 172 | 173 | table processEntry8 { 174 | actions { 175 | processentry8; 176 | } 177 | default_action : processentry8(); 178 | size : 1; 179 | } 180 | 181 | table Entry8WriteToPacket { 182 | actions { 183 | entry8WriteToPacket; 184 | } 185 | default_action : entry8WriteToPacket(); 186 | size : 1; 187 | } 188 | 189 | table processEntry8andWriteToPacket { 190 | actions { 191 | processentry8andWriteToPacket; 192 | } 193 | default_action : processentry8andWriteToPacket(); 194 | size : 1; 195 | } 196 | 197 | table processEntry9 { 198 | actions { 199 | processentry9; 200 | } 201 | default_action : processentry9(); 202 | size : 1; 203 | } 204 | 205 | table Entry9WriteToPacket { 206 | actions { 207 | entry9WriteToPacket; 208 | } 209 | default_action : entry9WriteToPacket(); 210 | size : 1; 211 | } 212 | 213 | table processEntry9andWriteToPacket { 214 | actions { 215 | processentry9andWriteToPacket; 216 | } 217 | default_action : processentry9andWriteToPacket(); 218 | size : 1; 219 | } 220 | 221 | table processEntry10 { 222 | actions { 223 | processentry10; 224 | } 225 | default_action : processentry10(); 226 | size : 1; 227 | } 228 | 229 | table Entry10WriteToPacket { 230 | actions { 231 | entry10WriteToPacket; 232 | } 233 | default_action : entry10WriteToPacket(); 234 | size : 1; 235 | } 236 | 237 | table processEntry10andWriteToPacket { 238 | actions { 239 | processentry10andWriteToPacket; 240 | } 241 | default_action : processentry10andWriteToPacket(); 242 | size : 1; 243 | } 244 | 245 | table processEntry11 { 246 | actions { 247 | processentry11; 248 | } 249 | default_action : processentry11(); 250 | size : 1; 251 | } 252 | 253 | table Entry11WriteToPacket { 254 | actions { 255 | entry11WriteToPacket; 256 | } 257 | default_action : entry11WriteToPacket(); 258 | size : 1; 259 | } 260 | 261 | table processEntry11andWriteToPacket { 262 | actions { 263 | processentry11andWriteToPacket; 264 | } 265 | default_action : processentry11andWriteToPacket(); 266 | size : 1; 267 | } 268 | 269 | table processEntry12 { 270 | actions { 271 | processentry12; 272 | } 273 | default_action : processentry12(); 274 | size : 1; 275 | } 276 | 277 | table Entry12WriteToPacket { 278 | actions { 279 | entry12WriteToPacket; 280 | } 281 | default_action : entry12WriteToPacket(); 282 | size : 1; 283 | } 284 | 285 | table processEntry12andWriteToPacket { 286 | actions { 287 | processentry12andWriteToPacket; 288 | } 289 | default_action : processentry12andWriteToPacket(); 290 | size : 1; 291 | } 292 | 293 | table processEntry13 { 294 | actions { 295 | processentry13; 296 | } 297 | default_action : processentry13(); 298 | size : 1; 299 | } 300 | 301 | table Entry13WriteToPacket { 302 | actions { 303 | entry13WriteToPacket; 304 | } 305 | default_action : entry13WriteToPacket(); 306 | size : 1; 307 | } 308 | 309 | table processEntry13andWriteToPacket { 310 | actions { 311 | processentry13andWriteToPacket; 312 | } 313 | default_action : processentry13andWriteToPacket(); 314 | size : 1; 315 | } 316 | 317 | table processEntry14 { 318 | actions { 319 | processentry14; 320 | } 321 | default_action : processentry14(); 322 | size : 1; 323 | } 324 | 325 | table Entry14WriteToPacket { 326 | actions { 327 | entry14WriteToPacket; 328 | } 329 | default_action : entry14WriteToPacket(); 330 | size : 1; 331 | } 332 | 333 | table processEntry14andWriteToPacket { 334 | actions { 335 | processentry14andWriteToPacket; 336 | } 337 | default_action : processentry14andWriteToPacket(); 338 | size : 1; 339 | } 340 | 341 | table processEntry15 { 342 | actions { 343 | processentry15; 344 | } 345 | default_action : processentry15(); 346 | size : 1; 347 | } 348 | 349 | table Entry15WriteToPacket { 350 | actions { 351 | entry15WriteToPacket; 352 | } 353 | default_action : entry15WriteToPacket(); 354 | size : 1; 355 | } 356 | 357 | table processEntry15andWriteToPacket { 358 | actions { 359 | processentry15andWriteToPacket; 360 | } 361 | default_action : processentry15andWriteToPacket(); 362 | size : 1; 363 | } 364 | 365 | table processEntry16 { 366 | actions { 367 | processentry16; 368 | } 369 | default_action : processentry16(); 370 | size : 1; 371 | } 372 | 373 | table Entry16WriteToPacket { 374 | actions { 375 | entry16WriteToPacket; 376 | } 377 | default_action : entry16WriteToPacket(); 378 | size : 1; 379 | } 380 | 381 | table processEntry16andWriteToPacket { 382 | actions { 383 | processentry16andWriteToPacket; 384 | } 385 | default_action : processentry16andWriteToPacket(); 386 | size : 1; 387 | } 388 | 389 | table processEntry17 { 390 | actions { 391 | processentry17; 392 | } 393 | default_action : processentry17(); 394 | size : 1; 395 | } 396 | 397 | table Entry17WriteToPacket { 398 | actions { 399 | entry17WriteToPacket; 400 | } 401 | default_action : entry17WriteToPacket(); 402 | size : 1; 403 | } 404 | 405 | table processEntry17andWriteToPacket { 406 | actions { 407 | processentry17andWriteToPacket; 408 | } 409 | default_action : processentry17andWriteToPacket(); 410 | size : 1; 411 | } 412 | 413 | table processEntry18 { 414 | actions { 415 | processentry18; 416 | } 417 | default_action : processentry18(); 418 | size : 1; 419 | } 420 | 421 | table Entry18WriteToPacket { 422 | actions { 423 | entry18WriteToPacket; 424 | } 425 | default_action : entry18WriteToPacket(); 426 | size : 1; 427 | } 428 | 429 | table processEntry18andWriteToPacket { 430 | actions { 431 | processentry18andWriteToPacket; 432 | } 433 | default_action : processentry18andWriteToPacket(); 434 | size : 1; 435 | } 436 | 437 | table processEntry19 { 438 | actions { 439 | processentry19; 440 | } 441 | default_action : processentry19(); 442 | size : 1; 443 | } 444 | 445 | table Entry19WriteToPacket { 446 | actions { 447 | entry19WriteToPacket; 448 | } 449 | default_action : entry19WriteToPacket(); 450 | size : 1; 451 | } 452 | 453 | table processEntry19andWriteToPacket { 454 | actions { 455 | processentry19andWriteToPacket; 456 | } 457 | default_action : processentry19andWriteToPacket(); 458 | size : 1; 459 | } 460 | 461 | table processEntry20 { 462 | actions { 463 | processentry20; 464 | } 465 | default_action : processentry20(); 466 | size : 1; 467 | } 468 | 469 | table Entry20WriteToPacket { 470 | actions { 471 | entry20WriteToPacket; 472 | } 473 | default_action : entry20WriteToPacket(); 474 | size : 1; 475 | } 476 | 477 | table processEntry20andWriteToPacket { 478 | actions { 479 | processentry20andWriteToPacket; 480 | } 481 | default_action : processentry20andWriteToPacket(); 482 | size : 1; 483 | } 484 | 485 | table processEntry21 { 486 | actions { 487 | processentry21; 488 | } 489 | default_action : processentry21(); 490 | size : 1; 491 | } 492 | 493 | table Entry21WriteToPacket { 494 | actions { 495 | entry21WriteToPacket; 496 | } 497 | default_action : entry21WriteToPacket(); 498 | size : 1; 499 | } 500 | 501 | table processEntry21andWriteToPacket { 502 | actions { 503 | processentry21andWriteToPacket; 504 | } 505 | default_action : processentry21andWriteToPacket(); 506 | size : 1; 507 | } 508 | 509 | table processEntry22 { 510 | actions { 511 | processentry22; 512 | } 513 | default_action : processentry22(); 514 | size : 1; 515 | } 516 | 517 | table Entry22WriteToPacket { 518 | actions { 519 | entry22WriteToPacket; 520 | } 521 | default_action : entry22WriteToPacket(); 522 | size : 1; 523 | } 524 | 525 | table processEntry22andWriteToPacket { 526 | actions { 527 | processentry22andWriteToPacket; 528 | } 529 | default_action : processentry22andWriteToPacket(); 530 | size : 1; 531 | } 532 | 533 | table processEntry23 { 534 | actions { 535 | processentry23; 536 | } 537 | default_action : processentry23(); 538 | size : 1; 539 | } 540 | 541 | table Entry23WriteToPacket { 542 | actions { 543 | entry23WriteToPacket; 544 | } 545 | default_action : entry23WriteToPacket(); 546 | size : 1; 547 | } 548 | 549 | table processEntry23andWriteToPacket { 550 | actions { 551 | processentry23andWriteToPacket; 552 | } 553 | default_action : processentry23andWriteToPacket(); 554 | size : 1; 555 | } 556 | 557 | table processEntry24 { 558 | actions { 559 | processentry24; 560 | } 561 | default_action : processentry24(); 562 | size : 1; 563 | } 564 | 565 | table Entry24WriteToPacket { 566 | actions { 567 | entry24WriteToPacket; 568 | } 569 | default_action : entry24WriteToPacket(); 570 | size : 1; 571 | } 572 | 573 | table processEntry24andWriteToPacket { 574 | actions { 575 | processentry24andWriteToPacket; 576 | } 577 | default_action : processentry24andWriteToPacket(); 578 | size : 1; 579 | } 580 | 581 | table processEntry25 { 582 | actions { 583 | processentry25; 584 | } 585 | default_action : processentry25(); 586 | size : 1; 587 | } 588 | 589 | table Entry25WriteToPacket { 590 | actions { 591 | entry25WriteToPacket; 592 | } 593 | default_action : entry25WriteToPacket(); 594 | size : 1; 595 | } 596 | 597 | table processEntry25andWriteToPacket { 598 | actions { 599 | processentry25andWriteToPacket; 600 | } 601 | default_action : processentry25andWriteToPacket(); 602 | size : 1; 603 | } 604 | 605 | table processEntry26 { 606 | actions { 607 | processentry26; 608 | } 609 | default_action : processentry26(); 610 | size : 1; 611 | } 612 | 613 | table Entry26WriteToPacket { 614 | actions { 615 | entry26WriteToPacket; 616 | } 617 | default_action : entry26WriteToPacket(); 618 | size : 1; 619 | } 620 | 621 | table processEntry26andWriteToPacket { 622 | actions { 623 | processentry26andWriteToPacket; 624 | } 625 | default_action : processentry26andWriteToPacket(); 626 | size : 1; 627 | } 628 | 629 | table processEntry27 { 630 | actions { 631 | processentry27; 632 | } 633 | default_action : processentry27(); 634 | size : 1; 635 | } 636 | 637 | table Entry27WriteToPacket { 638 | actions { 639 | entry27WriteToPacket; 640 | } 641 | default_action : entry27WriteToPacket(); 642 | size : 1; 643 | } 644 | 645 | table processEntry27andWriteToPacket { 646 | actions { 647 | processentry27andWriteToPacket; 648 | } 649 | default_action : processentry27andWriteToPacket(); 650 | size : 1; 651 | } 652 | 653 | table processEntry28 { 654 | actions { 655 | processentry28; 656 | } 657 | default_action : processentry28(); 658 | size : 1; 659 | } 660 | 661 | table Entry28WriteToPacket { 662 | actions { 663 | entry28WriteToPacket; 664 | } 665 | default_action : entry28WriteToPacket(); 666 | size : 1; 667 | } 668 | 669 | table processEntry28andWriteToPacket { 670 | actions { 671 | processentry28andWriteToPacket; 672 | } 673 | default_action : processentry28andWriteToPacket(); 674 | size : 1; 675 | } 676 | 677 | table processEntry29 { 678 | actions { 679 | processentry29; 680 | } 681 | default_action : processentry29(); 682 | size : 1; 683 | } 684 | 685 | table Entry29WriteToPacket { 686 | actions { 687 | entry29WriteToPacket; 688 | } 689 | default_action : entry29WriteToPacket(); 690 | size : 1; 691 | } 692 | 693 | table processEntry29andWriteToPacket { 694 | actions { 695 | processentry29andWriteToPacket; 696 | } 697 | default_action : processentry29andWriteToPacket(); 698 | size : 1; 699 | } 700 | 701 | table processEntry30 { 702 | actions { 703 | processentry30; 704 | } 705 | default_action : processentry30(); 706 | size : 1; 707 | } 708 | 709 | table Entry30WriteToPacket { 710 | actions { 711 | entry30WriteToPacket; 712 | } 713 | default_action : entry30WriteToPacket(); 714 | size : 1; 715 | } 716 | 717 | table processEntry30andWriteToPacket { 718 | actions { 719 | processentry30andWriteToPacket; 720 | } 721 | default_action : processentry30andWriteToPacket(); 722 | size : 1; 723 | } 724 | 725 | table processEntry31 { 726 | actions { 727 | processentry31; 728 | } 729 | default_action : processentry31(); 730 | size : 1; 731 | } 732 | 733 | table Entry31WriteToPacket { 734 | actions { 735 | entry31WriteToPacket; 736 | } 737 | default_action : entry31WriteToPacket(); 738 | size : 1; 739 | } 740 | 741 | table processEntry31andWriteToPacket { 742 | actions { 743 | processentry31andWriteToPacket; 744 | } 745 | default_action : processentry31andWriteToPacket(); 746 | size : 1; 747 | } 748 | 749 | table processEntry32 { 750 | actions { 751 | processentry32; 752 | } 753 | default_action : processentry32(); 754 | size : 1; 755 | } 756 | 757 | table Entry32WriteToPacket { 758 | actions { 759 | entry32WriteToPacket; 760 | } 761 | default_action : entry32WriteToPacket(); 762 | size : 1; 763 | } 764 | 765 | table processEntry32andWriteToPacket { 766 | actions { 767 | processentry32andWriteToPacket; 768 | } 769 | default_action : processentry32andWriteToPacket(); 770 | size : 1; 771 | } 772 | -------------------------------------------------------------------------------- /p4src/switchml.p4: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "includes/headers.p4" 5 | #include "includes/registers.p4" 6 | #include "includes/parser.p4" 7 | #include "includes/tables.p4" 8 | #include "includes/actions.p4" 9 | 10 | /* 11 | * P4PS 12 | * / 13 | 14 | /************************************************************************* 15 | *********************** R E G I S T E R ******************************* 16 | *************************************************************************/ 17 | 18 | blackbox stateful_alu _check_counter_ { 19 | reg: agtr_time; 20 | // bitmap = 0 imply =, !=0 imply += 21 | condition_lo : register_lo + 1 == p4ml.agtr_time; 22 | output_dst : mdata.bitmap; 23 | 24 | update_lo_1_predicate : condition_lo; 25 | update_lo_1_value : 0; 26 | 27 | update_lo_2_predicate : not condition_lo; 28 | update_lo_2_value : register_lo + 1; 29 | 30 | output_value : register_lo; 31 | } 32 | 33 | blackbox stateful_alu _read_counter_ { 34 | reg: agtr_time; 35 | 36 | output_dst : mdata.bitmap; 37 | output_value : register_lo; 38 | } 39 | 40 | 41 | /************************************************************************* 42 | ************** I N G R E S S P R O C E S S I N G ******************* 43 | *************************************************************************/ 44 | 45 | /* 46 | * Actions 47 | */ 48 | 49 | 50 | action multicast(group) { 51 | modify_field(ig_intr_md_for_tm.mcast_grp_a, group); 52 | } 53 | 54 | 55 | action _check_counter() { 56 | _check_counter_.execute_stateful_alu(p4ml.agtr); 57 | // agtr = ith Agtr 58 | } 59 | 60 | action _read_counter(){ 61 | _read_counter_.execute_stateful_alu(p4ml.agtr); 62 | } 63 | 64 | table check_counter { 65 | actions { 66 | _check_counter;//mdata.seen == 0; 67 | } 68 | default_action: _check_counter; 69 | size : 1; 70 | } 71 | table read_counter { 72 | actions { 73 | _read_counter; //mdata.seen == 1; 74 | } 75 | default_action: _read_counter; 76 | size : 1; 77 | } 78 | 79 | table drop_table { 80 | actions { 81 | drop_pkt; 82 | } 83 | default_action: drop_pkt(); 84 | } 85 | 86 | action drop_pkt() { 87 | drop(); 88 | } 89 | 90 | action increase_counter(){ 91 | add(mdata.current_counter, mdata.bitmap, 1); 92 | } 93 | 94 | table increase_counter_table { 95 | actions { 96 | increase_counter; //mdata.seen == 1; 97 | } 98 | default_action: increase_counter; 99 | size : 1; 100 | } 101 | 102 | action set_ack(){ 103 | modify_field(p4ml.isACK, 1); 104 | } 105 | table set_ack_table { 106 | actions { 107 | set_ack; 108 | } 109 | default_action: set_ack(); 110 | size : 1; 111 | } 112 | 113 | register version { 114 | width : 8; 115 | instance_count : 80000; 116 | } 117 | 118 | blackbox stateful_alu check_version { 119 | reg: version; 120 | condition_hi : register_lo != mdata.position; 121 | 122 | update_lo_1_predicate : condition_hi; 123 | update_lo_1_value : mdata.position; 124 | output_predicate : condition_hi; 125 | output_dst : mdata.isAggregate; 126 | output_value : mdata.position; 127 | } 128 | 129 | action check_current_version(){ 130 | check_version.execute_stateful_alu(p4ml.versionIndex); 131 | // versionIndex = Number of worker * Number of Agtr + ith worker 132 | } 133 | 134 | table check_current_agtr_version { 135 | actions { 136 | check_current_version; 137 | } 138 | default_action: check_current_version(); 139 | size : 1; 140 | } 141 | 142 | // if version == 0, value = 2b 01 143 | // if version == 1, value = 2b 10 144 | action set_position(value){ 145 | modify_field(mdata.position, value); 146 | modify_field(mdata.isAggregate, 0); 147 | } 148 | 149 | table set_position_by_version{ 150 | reads{ 151 | p4ml.version: exact; 152 | } 153 | actions { 154 | set_position; 155 | } 156 | default_action: nop; 157 | size : 8; 158 | 159 | } 160 | 161 | table outPort_table { 162 | reads { 163 | // useless here, just can't use default action for variable 164 | p4ml.isACK : exact; 165 | } 166 | actions { 167 | set_egr; 168 | nop; 169 | } 170 | } 171 | 172 | action echo_back(){ 173 | modify_field(ig_intr_md_for_tm.ucast_egress_port, ig_intr_md.ingress_port); 174 | } 175 | 176 | table echo_back_table{ 177 | actions{ 178 | echo_back; 179 | } 180 | default_action: echo_back; 181 | size: 1; 182 | } 183 | table multicast_table { 184 | actions { 185 | multicast; 186 | } 187 | // refer to run_pd_rpc/setup.py 188 | default_action: multicast(999); 189 | } 190 | 191 | action nop() 192 | { 193 | } 194 | 195 | action set_egr(egress_spec) { 196 | modify_field(ig_intr_md_for_tm.ucast_egress_port, egress_spec); 197 | // increase_p4ml_counter.execute_stateful_alu(ig_intr_md.ingress_port); 198 | } 199 | 200 | 201 | table forward { 202 | reads { 203 | ethernet.dstAddr : exact; 204 | } 205 | actions { 206 | set_egr; nop; 207 | } 208 | } 209 | 210 | control ingress 211 | { 212 | 213 | 214 | if (valid(p4ml_entries)) { 215 | apply(set_position_by_version); 216 | apply(check_current_agtr_version); 217 | // If aggreagte is needed 218 | if(mdata.isAggregate == mdata.position) { 219 | apply(check_counter); 220 | apply(increase_counter_table); 221 | 222 | if (mdata.current_counter == p4ml.agtr_time) { 223 | apply(processEntry1andWriteToPacket); 224 | apply(processEntry2andWriteToPacket); 225 | apply(processEntry3andWriteToPacket); 226 | apply(processEntry4andWriteToPacket); 227 | apply(processEntry5andWriteToPacket); 228 | apply(processEntry6andWriteToPacket); 229 | apply(processEntry7andWriteToPacket); 230 | apply(processEntry8andWriteToPacket); 231 | apply(processEntry9andWriteToPacket); 232 | apply(processEntry10andWriteToPacket); 233 | apply(processEntry11andWriteToPacket); 234 | apply(processEntry12andWriteToPacket); 235 | apply(processEntry13andWriteToPacket); 236 | apply(processEntry14andWriteToPacket); 237 | apply(processEntry15andWriteToPacket); 238 | apply(processEntry16andWriteToPacket); 239 | apply(processEntry17andWriteToPacket); 240 | apply(processEntry18andWriteToPacket); 241 | apply(processEntry19andWriteToPacket); 242 | apply(processEntry20andWriteToPacket); 243 | apply(processEntry21andWriteToPacket); 244 | apply(processEntry22andWriteToPacket); 245 | apply(processEntry23andWriteToPacket); 246 | apply(processEntry24andWriteToPacket); 247 | apply(processEntry25andWriteToPacket); 248 | apply(processEntry26andWriteToPacket); 249 | apply(processEntry27andWriteToPacket); 250 | apply(processEntry28andWriteToPacket); 251 | apply(processEntry29andWriteToPacket); 252 | apply(processEntry30andWriteToPacket); 253 | apply(processEntry31andWriteToPacket); 254 | apply(processEntry32andWriteToPacket); 255 | /* Multicast Back */ 256 | // apply(set_ack_table); 257 | apply(multicast_table); 258 | 259 | } else { 260 | apply(processEntry1); 261 | apply(processEntry2); 262 | apply(processEntry3); 263 | apply(processEntry4); 264 | apply(processEntry5); 265 | apply(processEntry6); 266 | apply(processEntry7); 267 | apply(processEntry8); 268 | apply(processEntry9); 269 | apply(processEntry10); 270 | apply(processEntry11); 271 | apply(processEntry12); 272 | apply(processEntry13); 273 | apply(processEntry14); 274 | apply(processEntry15); 275 | apply(processEntry16); 276 | apply(processEntry17); 277 | apply(processEntry18); 278 | apply(processEntry19); 279 | apply(processEntry20); 280 | apply(processEntry21); 281 | apply(processEntry22); 282 | apply(processEntry23); 283 | apply(processEntry24); 284 | apply(processEntry25); 285 | apply(processEntry26); 286 | apply(processEntry27); 287 | apply(processEntry28); 288 | apply(processEntry29); 289 | apply(processEntry30); 290 | apply(processEntry31); 291 | apply(processEntry32); 292 | apply(drop_table); 293 | } 294 | } else {//end of not seen this packet 295 | apply(read_counter); 296 | 297 | if(mdata.bitmap == 0){ 298 | apply(Entry1WriteToPacket); 299 | apply(Entry2WriteToPacket); 300 | apply(Entry3WriteToPacket); 301 | apply(Entry4WriteToPacket); 302 | apply(Entry5WriteToPacket); 303 | apply(Entry6WriteToPacket); 304 | apply(Entry7WriteToPacket); 305 | apply(Entry8WriteToPacket); 306 | apply(Entry9WriteToPacket); 307 | apply(Entry10WriteToPacket); 308 | apply(Entry11WriteToPacket); 309 | apply(Entry12WriteToPacket); 310 | apply(Entry13WriteToPacket); 311 | apply(Entry14WriteToPacket); 312 | apply(Entry15WriteToPacket); 313 | apply(Entry16WriteToPacket); 314 | apply(Entry17WriteToPacket); 315 | apply(Entry18WriteToPacket); 316 | apply(Entry19WriteToPacket); 317 | apply(Entry20WriteToPacket); 318 | apply(Entry21WriteToPacket); 319 | apply(Entry22WriteToPacket); 320 | apply(Entry23WriteToPacket); 321 | apply(Entry24WriteToPacket); 322 | apply(Entry25WriteToPacket); 323 | apply(Entry26WriteToPacket); 324 | apply(Entry27WriteToPacket); 325 | apply(Entry28WriteToPacket); 326 | apply(Entry29WriteToPacket); 327 | apply(Entry30WriteToPacket); 328 | apply(Entry31WriteToPacket); 329 | apply(Entry32WriteToPacket); 330 | 331 | apply(echo_back_table); 332 | } 333 | } 334 | } else { 335 | apply(forward); 336 | } 337 | } 338 | 339 | control egress 340 | { 341 | } 342 | 343 | -------------------------------------------------------------------------------- /ptf/ptfTest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pd_base_tests 4 | import pltfm_pm_rpc 5 | import pal_rpc 6 | import random 7 | import sys 8 | import time 9 | import unittest 10 | 11 | from envTest.p4_pd_rpc.ttypes import * 12 | from pltfm_pm_rpc.ttypes import * 13 | from pal_rpc.ttypes import * 14 | from ptf import config 15 | from ptf.testutils import * 16 | from ptf.thriftutils import * 17 | from res_pd_rpc.ttypes import * 18 | 19 | this_dir = os.path.dirname(os.path.abspath(__file__)) 20 | 21 | fp_ports = ["9/0","10/0","11/0","12/0","13/0","14/0","15/0","16/0","17/0"] 22 | 23 | class L2Test(pd_base_tests.ThriftInterfaceDataPlane): 24 | def __init__(self): 25 | pd_base_tests.ThriftInterfaceDataPlane.__init__(self, 26 | ["switchml"]) 27 | 28 | # The setUp() method is used to prepare the test fixture. Typically 29 | # you would use it to establich connection to the Thrift server. 30 | # 31 | # You can also put the initial device configuration there. However, 32 | # if during this process an error is encountered, it will be considered 33 | # as a test error (meaning the test is incorrect), 34 | # rather than a test failure 35 | def setUp(self): 36 | # initialize the connection 37 | pd_base_tests.ThriftInterfaceDataPlane.setUp(self) 38 | self.sess_hdl = self.conn_mgr.client_init() 39 | self.dev_tgt = DevTarget_t(0, hex_to_i16(0xFFFF)) 40 | self.devPorts = [] 41 | 42 | self.platform_type = "mavericks" 43 | board_type = self.pltfm_pm.pltfm_pm_board_type_get() 44 | if re.search("0x0234|0x1234|0x4234|0x5234", hex(board_type)): 45 | self.platform_type = "mavericks" 46 | elif re.search("0x2234|0x3234", hex(board_type)): 47 | self.platform_type = "montara" 48 | 49 | # get the device ports from front panel ports 50 | try: 51 | for fpPort in fp_ports: 52 | port, chnl = fpPort.split("/") 53 | devPort = \ 54 | self.pal.pal_port_front_panel_port_to_dev_port_get(0, 55 | int(port), 56 | int(chnl)) 57 | self.devPorts.append(devPort) 58 | 59 | if test_param_get('setup') == True or (test_param_get('setup') != True 60 | and test_param_get('cleanup') != True): 61 | 62 | # add and enable the platform ports 63 | for i in self.devPorts: 64 | self.pal.pal_port_add(0, i, 65 | pal_port_speed_t.BF_SPEED_100G, 66 | pal_fec_type_t.BF_FEC_TYP_REED_SOLOMON) 67 | self.pal.pal_port_an_set(0, i, 2); 68 | self.pal.pal_port_enable(0, i) 69 | self.conn_mgr.complete_operations(self.sess_hdl) 70 | except Exception as e: 71 | print "Some Error in port init" 72 | 73 | def runTest(self): 74 | print "runTest" 75 | # Use this method to return the DUT to the initial state by cleaning 76 | # all the configuration and clearing up the connection 77 | def tearDown(self): 78 | print "tearDown" 79 | -------------------------------------------------------------------------------- /run_pd_rpc/swithml_setup.py: -------------------------------------------------------------------------------- 1 | clear_all() 2 | 3 | p4_pd.register_reset_all_version() 4 | p4_pd.register_reset_all_agtr_time() 5 | p4_pd.register_reset_all_register1() 6 | p4_pd.register_reset_all_register2() 7 | p4_pd.register_reset_all_register3() 8 | p4_pd.register_reset_all_register4() 9 | p4_pd.register_reset_all_register5() 10 | p4_pd.register_reset_all_register6() 11 | p4_pd.register_reset_all_register7() 12 | p4_pd.register_reset_all_register8() 13 | p4_pd.register_reset_all_register9() 14 | p4_pd.register_reset_all_register10() 15 | p4_pd.register_reset_all_register11() 16 | p4_pd.register_reset_all_register12() 17 | p4_pd.register_reset_all_register13() 18 | p4_pd.register_reset_all_register14() 19 | p4_pd.register_reset_all_register15() 20 | p4_pd.register_reset_all_register16() 21 | p4_pd.register_reset_all_register17() 22 | p4_pd.register_reset_all_register18() 23 | p4_pd.register_reset_all_register19() 24 | p4_pd.register_reset_all_register20() 25 | p4_pd.register_reset_all_register21() 26 | p4_pd.register_reset_all_register22() 27 | p4_pd.register_reset_all_register23() 28 | p4_pd.register_reset_all_register24() 29 | p4_pd.register_reset_all_register25() 30 | p4_pd.register_reset_all_register26() 31 | p4_pd.register_reset_all_register27() 32 | p4_pd.register_reset_all_register28() 33 | p4_pd.register_reset_all_register29() 34 | p4_pd.register_reset_all_register30() 35 | p4_pd.register_reset_all_register31() 36 | p4_pd.register_reset_all_register32() 37 | 38 | 39 | PS = "98:03:9b:03:54:20" 40 | w1 = "b8:59:9f:1d:04:f2" 41 | w2 = "b8:59:9f:0b:30:72" 42 | w3 = "98:03:9b:03:46:50" 43 | w4 = "b8:59:9f:02:0d:14" 44 | w5 = "b8:59:9f:b0:2d:50" 45 | w6 = "b8:59:9f:b0:2b:b0" 46 | w7 = "b8:59:9f:b0:2b:b8" 47 | w8 = "b8:59:9f:b0:2d:18" 48 | w9 = "b8:59:9f:b0:2d:58" 49 | 50 | p4_pd.forward_table_add_with_set_egr( 51 | p4_pd.forward_match_spec_t(macAddr_to_string(w1)), 52 | p4_pd.set_egr_action_spec_t(56) 53 | ) 54 | 55 | p4_pd.forward_table_add_with_set_egr( 56 | p4_pd.forward_match_spec_t(macAddr_to_string(w2)), 57 | p4_pd.set_egr_action_spec_t(48) 58 | ) 59 | 60 | p4_pd.forward_table_add_with_set_egr( 61 | p4_pd.forward_match_spec_t(macAddr_to_string(w3)), 62 | p4_pd.set_egr_action_spec_t(40) 63 | ) 64 | 65 | p4_pd.forward_table_add_with_set_egr( 66 | p4_pd.forward_match_spec_t(macAddr_to_string(w4)), 67 | p4_pd.set_egr_action_spec_t(32) 68 | ) 69 | 70 | p4_pd.forward_table_add_with_set_egr( 71 | p4_pd.forward_match_spec_t(macAddr_to_string(w5)), 72 | p4_pd.set_egr_action_spec_t(24) 73 | ) 74 | 75 | p4_pd.forward_table_add_with_set_egr( 76 | p4_pd.forward_match_spec_t(macAddr_to_string(w6)), 77 | p4_pd.set_egr_action_spec_t(16) 78 | ) 79 | 80 | p4_pd.forward_table_add_with_set_egr( 81 | p4_pd.forward_match_spec_t(macAddr_to_string(w7)), 82 | p4_pd.set_egr_action_spec_t(8) 83 | ) 84 | 85 | p4_pd.forward_table_add_with_set_egr( 86 | p4_pd.forward_match_spec_t(macAddr_to_string(w8)), 87 | p4_pd.set_egr_action_spec_t(0) 88 | ) 89 | 90 | p4_pd.forward_table_add_with_set_egr( 91 | p4_pd.forward_match_spec_t(macAddr_to_string(w9)), 92 | p4_pd.set_egr_action_spec_t(4) 93 | ) 94 | 95 | p4_pd.set_position_by_version_table_add_with_set_position( 96 | p4_pd.set_position_by_version_match_spec_t(0), 97 | p4_pd.set_position_action_spec_t(1), 98 | ) 99 | 100 | p4_pd.set_position_by_version_table_add_with_set_position( 101 | p4_pd.set_position_by_version_match_spec_t(1), 102 | p4_pd.set_position_action_spec_t(2), 103 | ) 104 | 105 | 106 | try: 107 | # TODO: understand it 108 | # dont know why, but if group = input port, 109 | # then the packet followed by that packet will execute multicast 110 | # therefore make it 20, no 20th port is used. 111 | mcg1 = mc.mgrp_create(999) 112 | except: 113 | print """ 114 | clean_all() does not yet support cleaning the PRE programming. 115 | You need to restart the driver before running this script for the second time 116 | """ 117 | quit() 118 | 119 | node1 = mc.node_create( 120 | rid=999, 121 | #port_map=devports_to_mcbitmap([188]), 122 | port_map=devports_to_mcbitmap([56,48,40,32,24,16,8,0]), 123 | # port_map=devports_to_mcbitmap([56, 24]), 124 | lag_map=lags_to_mcbitmap(([])) 125 | ) 126 | mc.associate_node(mcg1, node1, xid=0, xid_valid=False) 127 | 128 | conn_mgr.complete_operations() 129 | --------------------------------------------------------------------------------